def instructionFetch(self): self.inst = ila.load( self.mem, ila.zero_extend(self.pc[31:2], instruction_format.MEM_ADDRESS_BITS)) self.opcode = self.inst[(instruction_format.OPCODE_BIT_TOP - 1):instruction_format.OPCODE_BIT_BOT] self.fetch_expr = self.inst self.dest = self.inst[(instruction_format.DST_BIT_TOP - 1):instruction_format.DST_BIT_BOT] self.src1 = self.inst[(instruction_format.SRC0_BIT_TOP - 1):instruction_format.SRC0_BIT_BOT] self.src2 = self.inst[(instruction_format.SRC1_BIT_TOP - 1):instruction_format.SRC1_BIT_BOT] self.src3 = self.inst[(instruction_format.SRC2_BIT_TOP - 1):instruction_format.SRC2_BIT_BOT] self.baseImm = ila.sign_extend( self.inst[(instruction_format.BASE_BIT_TOP - 1):instruction_format.BASE_BIT_BOT], instruction_format.PC_BITS) self.branchPred = self.dest self.predReg = self.indexIntoReg(self.branchPred) self.branchImm = ila.zero_extend( self.inst[(instruction_format.IMM_BIT_TOP - 1):instruction_format.IMM_BIT_BOT], instruction_format.PC_BITS) self.sreg1 = self.indexIntoReg(self.src1) self.sreg2 = self.indexIntoReg(self.src2) self.sreg3 = self.indexIntoReg(self.src3) self.sregdest = self.indexIntoReg(self.dest)
def instructionFetch(self): self.instruction = self.model.load(self.mem, ila.zero_extend(self.pc, MEMORY_ADDRESS_BITS)) #TODO: How to fetch the instruction in GPU? self.opcode_SOPP = self.instruction[22:16] self.opcode_SOP2 = self.instruction[30:23] self.opcode_VOP2 = self.instruction[30:25] self.opcode_SMRD = self.instruction[26:22] self.opcode_SOP1 = self.instruction[15:8] self.opcode_SOPK = self.instruction[27:23] self.SOPPIdentifier = self.instruction[31:23] self.SOP2Identifier = self.instruction[31] self.VOP2Identifier = self.instruction[31] self.SMRDIdentifier = self.instruction[31:27] self.VOP3Identifier = self.instruction[31:26] self.SOPKIdentifier = self.instruction[31:28] self.isSOPK = (self.SOPKIdentifier == 0b1011) self.isSOPP = (self.SOPPIdentifier == 0b101111111) self.isSOP2 = (self.SOP2Identifier == 0b1) self.isVOP2 = (self.VOP2Identifier == 0b0) self.isSMRD = (self.SMRDIdentifier == 0b11000) self.isVOP3 = (self.VOP3Identifier == 0b110100) self.sdstSOP2 = self.instruction[22:16] self.ssrc1 = self.instruction[15:8] self.ssrc0 = self.instruction[7:0] self.vdst = self.instruction[24:17] self.vsrc1 = self.instruction[16:9] self.vsrc0 = self.instruction[8:0] #why whitepaper says its offset? self.sdstSMRD = self.instruction[21:15] self.sbase = self.instruction[14:9] self.imm = self.instruction[8] self.simm = self.instruction[15:0] self.extend_instruction = self.model.load(self.mem, ila.zero_extend(self.pc + 1, MEMORY_ADDRESS_BITS))
def writeBit(self, bitaddr, bitval): # FIXME msb1 = bitaddr[7:7] == 1 byteaddr = ila.ite(msb1, ila.concat(bitaddr[7:3], self.model.const(0, 3)), ila.zero_extend(bitaddr[7:3], 8) + 32) byte = self.readDirect(byteaddr) bitindex = ila.zero_extend(bitaddr[2:0], 8) mask1 = ~(self.model.const(1, 8) << bitindex) mask2 = ila.zero_extend(bitval, 8) << bitindex byte_p = (mask1 & byte) | mask2 return self.writeDirect(byteaddr, byte_p)
def instructionFetch(self): self.inst_a = ila.load( self.mem, ila.zero_extend(self.pc_a[31:2], instruction_format.MEM_ADDRESS_BITS)) self.inst_b = ila.load( self.mem, ila.zero_extend(self.pc_b[31:2], instruction_format.MEM_ADDRESS_BITS)) self.opcode_a = self.inst_a[(instruction_format.OPCODE_BIT_TOP - 1):instruction_format.OPCODE_BIT_BOT] self.opcode_b = self.inst_b[(instruction_format.OPCODE_BIT_TOP - 1):instruction_format.OPCODE_BIT_BOT]
def instructionFetch(self): self.inst = ila.load(self.mem, ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS)) self.opcode = self.inst[31:22] self.fetch_expr = self.inst self.dest = self.inst[21:17] self.src1 = self.inst[16:12] self.src2 = self.inst[11:7] self.src3 = self.inst[6:2] self.branchPC = ila.zero_extend(self.inst[21:0], PC_BITS) self.sreg1 = self.indexIntoReg(self.src1) self.sreg2 = self.indexIntoReg(self.src2) self.sreg3 = self.indexIntoReg(self.src3) self.sregdest = self.indexIntoReg(self.dest)
def auxMull_i(self, dataA, dataB): dataAIsNeg = dataA[31] dataBIsNeg = dataB[31] #First calculate whether the result is positive/negative resultIsNeg = dataAIsNeg ^ dataBIsNeg absDataA = ila.ite(dataAIsNeg, (~dataA) + self.model.const(0b1, SCALAR_REG_BITS), dataA) absDataB = ila.ite(dataBIsNeg, (~dataB) + self.model.const(0b1, SCALAR_REG_BITS), dataB) #Zero-extend the data to multiply absDataADoubleLength = ila.zero_extend(absDataA, 2 * SCALAR_REG_BITS) absDataBDoubleLength = ila.zero_extend(absDataB, 2 * SCALAR_REG_BITS) absResultDoubleLength = absDataADoubleLength * absDataBDoubleLength #Adjust the pos/neg of the result resultDoubleLength = ila.ite(resultIsNeg, (~absResultDoubleLength) + 1, absResultDoubleLength) mulResult = resultDoubleLength[SCALAR_REG_BITS - 1:0] return mulResult
def instructionFetch(self): self.inst = ila.load(self.mem, ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS)) self.opcode = self.inst[(REG_BITS - 1):OPCODE_BIT] self.fetch_expr = self.inst self.dest = self.inst[(OPCODE_BIT - 1):DST_BIT] self.src1 = self.inst[(DST_BIT - 1):SRC0_BIT] self.src2 = self.inst[(SRC0_BIT - 1):SRC1_BIT] self.src3 = self.inst[(SRC1_BIT - 1):SRC2_BIT] self.baseImm = ila.sign_extend(self.inst[(BASE_BIT - 1):0], PC_BITS) self.branchPred = self.dest self.predReg = self.indexIntoReg(self.branchPred) self.branchImm = ila.zero_extend(self.inst[(DST_BIT - 1):BASE_BIT], PC_BITS) self.sreg1 = self.indexIntoReg(self.src1) self.sreg2 = self.indexIntoReg(self.src2) self.sreg3 = self.indexIntoReg(self.src3) self.sregdest = self.indexIntoReg(self.dest)
def readBit(self, bitaddr): msb1 = bitaddr[7:7] == 1 byteaddr = ila.ite(msb1, ila.concat(bitaddr[7:3], self.model.const(0, 3)), ila.zero_extend(bitaddr[7:3], 8) + 32) bitindex = bitaddr[2:0] byte = self.readDirect(byteaddr) bit = byte[bitindex] return bit
def instructionFetch(self): self.instruction = ila.load( self.mem, ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS)) self.isBranch = (self.instruction[31:28] == self.model.const( 0b1111, 4)) self.branchOP = self.instruction[27:25] self.branchOffsetA = self.instruction[24:5] self.branchSrc = self.instruction[4:0] self.branchOffsetB = self.instruction[24:0] self.isRegReg = (self.instruction[31:29] == self.model.const(0b110, 3)) self.rrType = self.instruction[28:26] self.rrOpcode = self.instruction[25:20] self.rrSrc2 = self.instruction[19:15] self.rrMask = self.instruction[14:10] self.rrDest = self.instruction[9:5] self.rrSrc1 = self.instruction[4:0] self.isImmediate = (self.instruction[31] == self.model.const(0b0, 1)) self.immType = self.instruction[30:29] self.immOpcode = self.instruction[28:24] self.immA = ila.zero_extend(self.instruction[23:15], SCALAR_REG_BITS) self.immB = ila.zero_extend(self.instruction[23:10], SCALAR_REG_BITS) self.immCup = self.instruction[23:10] self.immClow = self.instruction[4:0] self.immDest = self.instruction[9:5] self.immMask = self.instruction[14:10] self.imm = ila.ite( self.immType[1] == self.model.const(0b0, 1), ila.zero_extend(self.immB, SCALAR_REG_BITS), ila.ite( self.immType == self.model.const(0b10, 2), ila.zero_extend(ila.concat(self.immCup, self.immClow), SCALAR_REG_BITS), ila.ite(self.immType == self.model.const(0b11, 2), ila.zero_extend(self.immA, SCALAR_REG_BITS), ila.zero_extend(self.immA, SCALAR_REG_BITS)))) self.isMem = (self.instruction[31:30] == self.model.const(0b10, 2)) self.isLoad = self.instruction[29] self.memOpcode = self.instruction[28:25] self.memOffSetA = self.instruction[24:15] self.memOffSetB = self.instruction[24:10] self.memMask = self.instruction[14:10] self.memDest = self.instruction[9:5] self.memSrc = self.instruction[9:5] self.memPtr = self.instruction[4:0] self.memOffSet = ila.ite( self.memOpcode == self.model.const(0b1000, 4), ila.sign_extend(self.memOffSetA, SCALAR_REG_BITS), ila.ite(self.memOpcode == self.model.const(0b1110, 4), ila.sign_extend(self.memOffSetA, SCALAR_REG_BITS), ila.sign_extend(self.memOffSetB, SCALAR_REG_BITS))) self.isMask = ( ((self.rrType == self.model.const(0b010, 3)) | (self.rrType == self.model.const(0b101, 3))) & self.isRegReg ) #need rewrite self.dest = self.instruction[9:5]
def get_reg_choices(reg): rs1_val = rm.indexIntoGPR(rm.rs1) rs2_val = rm.indexIntoGPR(rm.rs2) rd_val = rm.indexIntoGPR(rm.rd) rs_val = ila.choice('rs_sel', rs1_val, rs2_val) shamt = ila.choice('shift_amout', rs2_val[4:0], rm.inst[24:20]) rs2_comb = ila.choice('rs2_or_immed', rs2_val, ila.zero_extend(rm.immI, 32), ila.sign_extend(rm.immI, 32)) addr = rs1_val + rm.immI lw_val = ila.load(rm.mem, zext(addr[31:2])) load_val = getSlice(lw_val, addr[1:0]) #load_dw = ila.loadblk(rm.mem, zext(addr[31:2]), 2 ) return ila.choice( "x%d_next" % reg, [ rm.generalRegList[ reg], # Remain the Same regardless of RD (i.e. S/SB instructions) ila.ite( rm.rd == reg, # Is this the destination register? ila.choice( "x%d" % reg, [ rs1_val + rs2_comb, # RS1 + RS2 rs1_val - rs2_comb, # RS1 - RS2 rs1_val & rs2_comb, # AND rs1_val | rs2_comb, # OR rs1_val ^ rs2_comb, # XOR ila.ite( ila.slt(rs1_val, rs2_comb), # SLT bv(1), bv(0)), ila.ite(ila.slt(rs1_val, rs2_comb), bv(0), bv(1)), ila.ite(rs1_val < rs2_comb, bv(1), bv(0)), rs1_val << zext(shamt), # sll rs1_val >> zext(shamt), # srl ila.ashr(rs1_val, zext(shamt)), # sra rm.immU, # LUI rm.immU + rm.pc, # AUIPC rm.pc + bv(4), # JAL/JALR load_val #load_dw ]), rm.generalRegList[reg]) # Remain the same ])
def InstFetch(self): #self.inst = self.model.inp('inst',32) #self.fetch_expr = self.inst inst = ila.load(self.mem, ila.zero_extend(self.pc[31:2], 32)) #ila.zero_extend(self.pc[31:2], 32)) self.inst = inst self.fetch_expr = self.inst self.opcode = self.inst[6:0] self.rd = self.inst[11:7] self.rs1 = self.inst[19:15] self.rs2 = self.inst[24:20] self.funct3 = self.inst[14:12] self.funct7 = self.inst[31:25] self.funct12= self.inst[31:20] self.immI = ila.sign_extend( inst[31:20], XLEN) self.immS = ila.sign_extend( ila.concat( [inst[31:25], inst[11:7]] ), XLEN ) self.immB = ila.sign_extend( ila.concat( [inst[31],inst[7], inst[30:25], inst[11:8], const(0,1) ] ) , XLEN ) self.immU = ila.concat( [inst[31:12],const(0,12)] ) self.immJ = ila.sign_extend( ila.concat( [inst[31], inst[19:12], inst[20], inst[30:21], const(0,1) ] ) , XLEN) self.csr_index = self.inst[31:20]
def sreg_nxt(self, regNo): sreg1 = self.indexToSGPR(self.rrSrc1) sreg2 = self.indexToSGPR(self.rrSrc2) #load instruction addr = self.indexToSGPR(self.memPtr) + ila.sign_extend( self.memOffSet, SCALAR_REG_BITS) load_val = ila.load(self.mem, ila.zero_extend(addr[31:2], MEM_ADDRESS_BITS)) return ila.ite(self.dest == regNo,\ ila.ite(self.isRegReg, ila.ite(self.rrType == self.model.const(0b000, 3), ila.ite(self.rrOpcode == NyEncoding.ADD_I, sreg1 + sreg2, ila.ite(self.rrOpcode == NyEncoding.SUB_I, sreg1 - sreg2, ila.ite(self.rrOpcode == NyEncoding.AND, sreg1 & sreg2, ila.ite(self.rrOpcode == NyEncoding.OR, sreg1 | sreg2, ila.ite(self.rrOpcode == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))) , self.scalar_registers[regNo]),\ ila.ite(self.isImmediate, ila.ite(self.immType == self.model.const(0b00, 2), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))),\ ila.ite(self.immType == self.model.const(0b10, 2), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))),\ self.scalar_registers[regNo])),\ ila.ite(self.isLoad == self.model.const(0b1, 1), self.scalar_registers[regNo], self.scalar_registers[regNo]))),\ self.scalar_registers[regNo])
def instFetch(self): self.fetch_list = [] self.fetch_list.append(self.imem[ila.zero_extend(pc[0][31:2])]) self.fetch_list.append(self.imem[ila.zero_extend(pc[1][31:2])])
def perform_instruction(self, index, program_line, pc_target): if len(program_line) < 2: self.debug_log[self.current_pc] = program_line return opcode = program_line[0] opcode_split = re.split('\.', opcode) opcode_name = opcode_split[0] if (index == 0): print program_line print self.current_pc if (opcode_name != '@') & (opcode_name != 'bra'): self.next_state_finished.append(program_line[1]) if opcode_name == 'bar': op_len = 0 dest = self.aux_dest(program_line[0], [], index) self.current_pc += 4 return elif opcode == 'ld.acq': lock_addr_name = program_line[1] lock_addr_reg = self.model.getreg(lock_addr_name + '_%d' % (index)) lock_addr_type = ptx_declaration[lock_addr_name] op_len = int(lock_addr_type[2:]) if op_len < instruction_format.LONG_REG_BITS: lock_addr_reg = ila.zero_extend( lock_addr_reg, instruction_format.LONG_REG_BITS) self.mutex_guard_next_list[index] = ila.ite( self.pc_list[index] == self.current_pc, lock_addr_reg, self.mutex_guard_next_list[index]) self.mutex_flag_next_list[index] = ila.ite( self.pc_list[index] == self.current_pc, self.model.const(0x1, 1), self.mutex_flag_next_list[index]) self.current_pc += 4 return elif opcode == 'st.rel': self.mutex_flag_next_list[index] = ila.ite( self.pc_list[index] == self.current_pc, self.model.const(0x1, 1), self.mutex_flag_next_list[index]) self.current_pc += 4 return else: if opcode_split[-1] == 'pred': op_len = 1 elif opcode_split[-1] == 'ca': op_len = int(opcode_split[-2][1:]) elif opcode_split[-1] == 'cg': op_len = int(opcode_split[-2][1:]) else: op_len = int(opcode_split[-1][1:]) src_list = [] for i in range(2, len(program_line)): src_str = program_line[i] src_components = re.split('\+', src_str) for i in range(len(src_components)): src_component = src_components[i] src_components[i] = self.aux_imm(src_component, index, op_len) src_sum = src_components[0] for i in range(1, len(src_components)): src_sum = src_sum + src_components[0] src_list.append(src_sum) dest = self.aux_dest(program_line[0], src_list, index) if not dest: self.debug_log[self.current_pc] = program_line self.current_pc += 4 return dest_str = program_line[1] if opcode.find('atom') != -1: dest_str = program_line[1] op_len = instruction_format.LONG_REG_BITS if opcode.find('ld') != -1: if opcode.find('param') != -1: self.current_pc += 4 return if opcode.find('v4') != -1: self.current_pc += 4 return dest_str = program_line[1] op_len = instruction_format.LONG_REG_BITS dest = self.adjust_dest(index, dest, dest_str, op_len) current_next_state = self.next_state_dict[dest_str + '_%d' % (index)] self.next_state_dict[dest_str + '_%d' % (index)] = ila.ite( self.pc_list[index] == self.current_pc, dest, current_next_state) self.current_pc += 4 return else: if (opcode_name == '@'): opcode_jmp_dest = program_line[3] pred_guard = self.pred_one pred_guard_reg = program_line[1] if program_line[1][0] == '!': pred_guard = self.pred_zero pred_guard_reg = program_line[1][1:] opcode_pred = self.model.getreg(pred_guard_reg + '_%d' % (index)) opcode_jmp_target = pc_target[opcode_jmp_dest] print opcode print opcode_jmp_target pc_jmp = ila.ite( opcode_pred == pred_guard, ila.const(opcode_jmp_target, instruction_format.PC_BITS), self.pc_list[index] + 4) elif (opcode_name == 'bra'): opcode_jmp_dest = program_line[1] opcode_jmp_target = pc_target[opcode_jmp_dest] print opcode print opcode_jmp_target pc_jmp = ila.const(opcode_jmp_target, instruction_format.PC_BITS) self.pc_next_list[index] = ila.ite( self.pc_list[index] == self.current_pc, pc_jmp, self.pc_next_list[index]) self.current_pc += 4
def buildILA(): #--------------------------- # define universal constant #--------------------------- K = 5 NUM_MOVIE_MAX = 100 NUM_HIDDEN_MAX = 100 NUM_VISIBLE_MAX = NUM_MOVIE_MAX * K DATAMEM_ADDR_WIDTH = int( log(NUM_VISIBLE_MAX + 1) / log(2)) + 1 # 9 # it is definitely not dividable, but need to check HIDDEN_UNIT_WIDTH = int( log(NUM_HIDDEN_MAX + 1) / log(2)) + 1 # 7 # it is definitely not dividable, but need to check VISIBLE_UNIT_WIDTH = int(log(NUM_VISIBLE_MAX + 1) / log(2)) + 1 # 9 EDGEMEM_ADDR_WIDTH = int( log((NUM_VISIBLE_MAX + 1) * (NUM_HIDDEN_MAX + 1)) / log(2)) + 1 # 16 POS_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH NEG_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH PREDICT_RESULT_WIDTH = int(log(NUM_MOVIE_MAX) / log(2)) + 1 # 7 KWIDTH = int(log(K) / log(2)) + 1 # 3 #--------------------------- # Model #--------------------------- rbm = ila.Abstraction('RBM') conf_done = rbm.inp('conf_done', 1) conf_num_hidden = rbm.inp('conf_num_hidden', 32) conf_num_visible = rbm.inp('conf_num_visible', 32) conf_num_users = rbm.inp('conf_num_users', 32) conf_num_loops = rbm.inp('conf_num_loops', 32) conf_num_testusers = rbm.inp('conf_num_testusers', 32) conf_num_movies = rbm.inp('conf_num_movies', 32) rst = rbm.inp('rst', 1) init_done = rbm.reg('init_done', 1) done = rbm.reg('done', 1) num_hidden = rbm.reg('num_hidden', 16) num_visible = rbm.reg('num_visible', 16) num_users = rbm.reg('num_users', 16) num_loops = rbm.reg('num_loops', 16) num_testusers = rbm.reg('num_testusers', 16) num_movies = rbm.reg('num_movies', 16) # DMA output rd_index = rbm.reg('rd_index', 32) rd_length = rbm.reg('rd_length', 32) rd_request = rbm.reg('rd_request', 1) rd_grant = rbm.inp('rd_grant', 1) data_in = rbm.inp('data_in', 32) # rd_cnt = rbm.reg('rd_cnt', 16) # i ureg #585 # DMA input wr_grant = rbm.inp('wr_grant', 1) wr_request = rbm.reg('wr_request', 1) wr_index = rbm.reg('wr_index', 32) wr_length = rbm.reg('wr_length', 32) data_out = rbm.reg('data_out', 32) # wr_cnt = rbm.reg('wr_cnt', 16) : u reg data = rbm.mem('data', DATAMEM_ADDR_WIDTH, 8) rbm.mem('predict_result', PREDICT_RESULT_WIDTH, 8) #------------------------------------- # Decoding Expressions #------------------------------------- rstInst = rst == 1 confDoneInst = (rst == 0) & (init_done == 0) & (conf_done == 1) rdGrantInst = (rd_request == 1) & (rd_grant == 1) wrGrantInst = (wr_request == 1) & (wr_grant == 1) decodeExpr = [rstInst, confDoneInst, rdGrantInst, wrGrantInst] #------------------------------------- # AUX Functions #------------------------------------- def const(v, w): return rbm.const(v, w) b0 = const(0, 1) b1 = const(1, 1) h0_8 = const(0, 8) h1_8 = const(1, 8) h0_4 = const(0, 4) h1_4 = const(1, 4) h2_4 = const(2, 4) h3_4 = const(3, 4) h4_4 = const(4, 4) h0_16 = const(0, 16) h1_16 = const(1, 16) h0_32 = const(0, 32) h0_64 = const(0, 64) #------------------------------------- # Init conditions #------------------------------------- rbm.set_init('init_done', b0) rbm.set_init('done', b0) rbm.set_init('num_hidden', h0_16) rbm.set_init('num_visible', h0_16) rbm.set_init('num_users', h0_16) rbm.set_init('num_loops', h0_16) rbm.set_init('num_testusers', h0_16) rbm.set_init('num_movies', h0_16) #------------------------------------- # Config #------------------------------------- # this means, once configured, unless reset, it cannot be reconfigured init_done_nxt = ila.ite(rstInst, b0, ila.ite(confDoneInst, b1, init_done)) num_hidden_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_hidden[15:0], num_hidden)) num_visible_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_visible[15:0], num_visible)) num_users_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_users[15:0], num_users)) num_loops_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_loops[15:0], num_loops)) num_testusers_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_testusers[15:0], num_testusers)) num_movies_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_movies[15:0], num_movies)) rbm.set_next('init_done', init_done_nxt) rbm.set_next('num_hidden', num_hidden_nxt) rbm.set_next('num_visible', num_visible_nxt) rbm.set_next('num_users', num_users_nxt) rbm.set_next('num_loops', num_loops_nxt) rbm.set_next('num_testusers', num_testusers_nxt) rbm.set_next('num_movies', num_movies_nxt) # INST-level w/r complete rbm_rd_complete = rbm.reg('rd_complete', 1) rbm_wr_complete = rbm.reg('wr_complete', 1) rbm.set_init('rd_complete', b0) rbm.set_init('wr_complete', b0) #------------------------------------ # Compute UABS #------------------------------------ uabs = rbm.add_microabstraction('compute', (init_done == 1) & (done == 0)) index = uabs.reg('index', 16) loop_count = uabs.reg('loop_count', 16) pc = uabs.reg('upc', 4) edges_mem = uabs.mem('edges', EDGEMEM_ADDR_WIDTH, 8) nlp = uabs.getreg('num_loops') nm = ila.zero_extend(uabs.getreg('num_movies'), 32) nu = uabs.getreg('num_users') ntu = uabs.getreg('num_testusers') out_rd_request = uabs.getreg('rd_request') out_rd_complete = uabs.getreg('rd_complete') out_rd_length = uabs.getreg('rd_length') out_rd_index = uabs.getreg('rd_index') train_input_done = uabs.reg('train_input_done', 1) predict_input_done = uabs.reg('predict_input_done', 1) uabs.set_init('upc', const(0, 4)) uabs.set_init('index', h0_16) uabs.set_init('loop_count', h0_16) uabs.set_init('train_input_done', b0) uabs.set_init('predict_input_done', b0) uabs.set_init('rd_complete', b0) ### computation micro_instructions StartRead = (pc == 0) WaitReadComplete = (pc == 1) & (out_rd_complete == 0) DecideTrainOrPredict = (pc == 1) & (out_rd_complete == 1) StartTrain = (pc == 2) & (train_input_done == 1) StartPredict = (pc == 2) & (predict_input_done == 1) Finish = (pc == 3) StartReadState = const(0, 4) WaitReadCompleteState = const(1, 4) StartTrainOrPredict = const(2, 4) FinishState = const(3, 4) decodeExpr = [ StartRead, WaitReadComplete, DecideTrainOrPredict, StartTrain, StartPredict, Finish ] out_rd_request_nxt = ila.ite(StartRead, b1, out_rd_request) out_rd_length_nxt = ila.ite(StartRead, 5 * nm, out_rd_length) out_rd_index_nxt = ila.ite(StartRead, ila.zero_extend(index, 32), out_rd_index) out_rd_complete_nxt = ila.ite( StartRead, b0, ila.ite(DecideTrainOrPredict, b0, out_rd_complete)) train_input_done_nxt = ila.ite(DecideTrainOrPredict, ila.ite(loop_count < nlp, b1, b0), train_input_done) predict_input_done_nxt = ila.ite(DecideTrainOrPredict, ila.ite(loop_count == nlp, b1, b0), predict_input_done) pc_nxt = ila.ite( StartRead, WaitReadCompleteState, ila.ite( WaitReadComplete, pc, ila.ite( DecideTrainOrPredict, StartTrainOrPredict, ila.ite( StartTrain, StartTrainOrPredict, # StartReadState, # actually should be updated by u2inst ila.ite( StartPredict, StartTrainOrPredict, # StartReadState, # actually should be updated by u2inst ila.ite( Finish, FinishState, pc # should never happen! )))))) # should be updated by u2inst index_nxt_dummy = ila.ite( StartTrain | StartPredict, ila.ite( (index == nu - 1) & (loop_count != nlp), h0_16, ila.ite( (index == ntu - 1) & (loop_count == nlp), index, # And it is not correct index + 1)), index) # not in use loop_count_nxt_dummy = ila.ite( StartTrain | StartPredict, ila.ite((index == nu - 1) & (loop_count != nlp), loop_count + 1, loop_count), loop_count) uabs.set_next('rd_request', out_rd_request_nxt) uabs.set_next('rd_length', out_rd_length_nxt) uabs.set_next('rd_index', out_rd_index_nxt) uabs.set_next('rd_complete', out_rd_complete_nxt) uabs.set_next('train_input_done', train_input_done_nxt) uabs.set_next('predict_input_done', predict_input_done_nxt) uabs.set_next('upc', pc_nxt) uabs.set_next('index', index) uabs.set_next('loop_count', loop_count) # this has to be updated by micro_inst # read_request is turned off by loaduabs # predict_input_done, train_input_done is turned off by uabs_train/predict #------------------------------------ # Load UABS #------------------------------------ # RBM interface # high-level interface rd_granted = rbm.reg( 'rd_granted', 1 ) # this is only used for maintaining the validity of load UABS, no other should use data_nxt = ila.ite(rdGrantInst, ila.store(data, const(0, DATAMEM_ADDR_WIDTH), data_in[7:0]), data) # data # rd_granted_nxt = ila.ite(rdGrantInst, b1, rd_granted) rbm.set_next('rd_granted', rd_granted_nxt) rbm.set_next('data', data_nxt) # one change is to move these into lower abstraction DMAload = rbm.add_microabstraction( 'DMAload', (rd_granted == 1)) # this is sub-instruction w_cnt = DMAload.reg('i', 16) dma_rd_request = DMAload.getreg('rd_request') dma_rd_length = DMAload.getreg('rd_length') dma_rd_index = DMAload.getreg('rd_index') state_update_data = DMAload.getmem('data') state_update_rd_request = dma_rd_request self_update_rd_granted = DMAload.getreg('rd_granted') more_read_in = w_cnt < dma_rd_length[15:0] last_cycle = w_cnt == dma_rd_length[15:0] DMAload.set_init('i', h1_16) # h0_16 ) DMAload.set_next('i', ila.ite(more_read_in, w_cnt + 1, w_cnt)) DMAload.set_next('rd_request', b0) # reset to 0 immediately DMAload.set_next('rd_granted', ila.ite(more_read_in, self_update_rd_granted, b0)) DMAload.set_next('rd_complete', ila.ite(more_read_in, b0, b1)) DMAload.set_next( 'data', ila.ite( more_read_in, ila.store(state_update_data, w_cnt[DATAMEM_ADDR_WIDTH - 1:0], data_in[7:0]), ila.ite( last_cycle, ila.store(state_update_data, dma_rd_length[DATAMEM_ADDR_WIDTH - 1:0], h1_8), state_update_data))) #------------------------------------ # Train UUABS #------------------------------------ TrainUabs = uabs.add_microabstraction('train', train_input_done == 1) sigmoid_func = TrainUabs.fun('sigmoid', 64, [16]) # DATA_sum_, 01_D rand_func = TrainUabs.fun('rand', 64, []) # generate random number to_int_exp = TrainUabs.fun('to_int_exp', 32, [16]) # divide_func = TrainUabs.fun( 'divide', 64, [32, 64]) # dp:32_32 / sum_of_pow2 64_64 = 64_1 hidden_unit = TrainUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1) visible_unit = TrainUabs.mem('visible_unit', VISIBLE_UNIT_WIDTH, 1) visibleEnergy = TrainUabs.mem('visibleEnergies', KWIDTH, 16) pow2 = TrainUabs.mem('pow2', KWIDTH, 32) pos = TrainUabs.mem('pos', POS_ADDR_WIDTH, 1) #neg = TrainUabs.mem('neg', NEG_ADDR_WIDTH, 1 ) # not needed train_sum = TrainUabs.reg('train_sum', 16) train_max = TrainUabs.reg('train_max', 16) sumOfpow2 = TrainUabs.reg('sumOfpow2', 64) jstate = TrainUabs.reg('jstate', 16) inner_loop_pc = TrainUabs.reg('per_v_pc', 4) train_pc = TrainUabs.reg('train_upc', 4) # Re-evaluate v_cnt = TrainUabs.reg('train_v_cnt', 16) h_cnt = TrainUabs.reg('train_h_cnt', 16) train_input = TrainUabs.getmem('data') edges_input = TrainUabs.getmem('edges') nv = TrainUabs.getreg('num_visible') nh = TrainUabs.getreg('num_hidden') nu = TrainUabs.getreg('num_users') ntu = TrainUabs.getreg('num_testusers') nlp = TrainUabs.getreg('num_loops') SumEdge = train_pc == 0 SumEdgeState = const(0, 4) SumHidden = train_pc == 1 SumHiddenState = const(1, 4) StorePos = train_pc == 3 StorePosState = const(3, 4) EdgeUpdate = train_pc == 2 EdgeUpdateState = const(2, 4) TrainUabs.decode_exprs = [SumEdge, SumHidden, EdgeUpdate] #Begin v_cnt_init = const(0, 16) h_cnt_init = const(0, 16) pc_init = const(0, 4) #SumEdge: s0 edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt train_sum_s0_nxt = ila.ite(v_cnt == 0, const(0, 16), train_sum) + ila.ite( ila.load(train_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), const(0, 16)) v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1) h_cnt_s0_nxt = ila.ite((v_cnt == nv), ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt) # Here ^^^ is for transiting to next state hidden_update_s0_0 = ila.ite( ila.appfun(rand_func) < ila.appfun(sigmoid_func, train_sum_s0_nxt), b1, b0) hidden_update_s0_1 = ila.ite( v_cnt == nv, ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0], hidden_update_s0_0), hidden_unit) hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), ila.store(hidden_update_s0_1, nh[HIDDEN_UNIT_WIDTH - 1:0], b1), hidden_update_s0_1) train_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), SumHiddenState, SumEdgeState) # Just like init jstate_s0_nxt = h0_16 inner_loop_pc_s0_nxt = h0_4 # add prefix : # train_sum_nxt = ila.ite(SumEdge, train_sum_s0_nxt, ila.ite(SumHidden, ... ) ) # SumHiddenK0-K4 : s1-s5 # pc:1 per_v_pc : 0 1 2 3 LastH = h_cnt == nh LastJ = jstate == K - 1 LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX) SumHiddenL0 = SumHidden & (inner_loop_pc == 0) SumHiddenL1 = SumHidden & (inner_loop_pc == 1) SumHiddenL2 = SumHidden & (inner_loop_pc == 2) SumHiddenL3 = SumHidden & (inner_loop_pc == 3) h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1) jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1), jstate) inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc) jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1) inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc) jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc) jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt inner_loop_pc_s1_s5_L3_nxt = ila.ite( LastJ, ila.ite(LastV, h0_4, h0_4), # will choose to go back or not inner_loop_pc) def nextCondition(l0, l1, l2, l3, default): return ila.ite( SumHiddenL0, l0, ila.ite( SumHiddenL1, l1, ila.ite(SumHiddenL2, l2, ila.ite(SumHiddenL3, l3, default)))) h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt, h_cnt) v_cnt_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ, ila.ite(LastV, h0_16, v_cnt + K), v_cnt) jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt, jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt, jstate) inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt, inner_loop_pc_s1_s5_L1_nxt, inner_loop_pc_s1_s5_L2_nxt, inner_loop_pc_s1_s5_L3_nxt, inner_loop_pc) train_pc_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ & LastV, StorePosState, SumHiddenState) # L0 train_sum_s1_s5_L0_nxt = ila.ite(h_cnt == 0, h0_16, train_sum) + ila.ite( ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), h0_16) _train_max_origin_L0 = ila.ite( jstate == 0, fpconst(-500, FPsum).ast, train_max) # make sure the first time we are comparing with init sum train_max_s1_s5_L0_nxt = ila.ite( LastH, ila.ite(ila.sgt(train_sum_s1_s5_L0_nxt, _train_max_origin_L0), train_sum_s1_s5_L0_nxt, _train_max_origin_L0), train_max) visibleEnergy_s1_s5_L0_nxt = ila.ite( LastH, ila.store(visibleEnergy, jstate[KWIDTH - 1:0], train_sum_s1_s5_L0_nxt), visibleEnergy) # L1 # sum3: 64_64 -> dp: 32_32 _31_sum = fpconst(31, FPsum).ast train_max_s1_s5_L1_nxt = ila.ite(jstate == 0, train_max - _31_sum, train_max) _st_val_L1 = ila.load(visibleEnergy, jstate[KWIDTH - 1:0]) - train_max_s1_s5_L1_nxt visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0], _st_val_L1) # L2 _pow2_new_val = ila.appfun(to_int_exp, ila.load(visibleEnergy, jstate[KWIDTH - 1:0])) _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3) sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64, sumOfpow2) + _pow2_new_convert pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val) # L3 _probs = ila.appfun(divide_func, [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2]) _RAND = ila.appfun(rand_func) _visible_unit_new_val = ila.ite(_probs > _RAND, b1, b0) _vu_idx = v_cnt + jstate _visible_unit_s1_s5_L3_1 = ila.store(visible_unit, _vu_idx[VISIBLE_UNIT_WIDTH - 1:0], _visible_unit_new_val) visible_unit_s1_s5_L3_nxt = ila.ite( LastJ & LastV, ila.store(_visible_unit_s1_s5_L3_1, nv[VISIBLE_UNIT_WIDTH - 1:0], b1), _visible_unit_s1_s5_L3_1) # when exit visible unit should be made to store 1 at nv train_sum_s1_s5_nxt = nextCondition(train_sum_s1_s5_L0_nxt, train_sum, train_sum, train_sum, train_sum) train_max_s1_s5_nxt = nextCondition(train_max_s1_s5_L0_nxt, train_max_s1_s5_L1_nxt, train_max, train_max, train_max) visible_unit_s1_s5_nxt = nextCondition(visible_unit, visible_unit, visible_unit, visible_unit_s1_s5_L3_nxt, visible_unit) visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt, visibleEnergy_s1_s5_L1_nxt, visibleEnergy, visibleEnergy, visibleEnergy) sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2, sumOfpow2_s1_s5_L2_nxt, sumOfpow2, sumOfpow2) pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2) # before s6: store pos h_cnt_sp_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1) v_cnt_sp_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, h0_16, v_cnt + 1), v_cnt) _data_load = ila.load(train_input, v_cnt[VISIBLE_UNIT_WIDTH - 1:0]) _pos_sp_cond = (_data_load != 2) _pos_sp_val = ila.ite(_data_load != 0, b1, b0) & ila.load( hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) _pos_st_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt pos_sp_nxt = ila.store(pos, _pos_st_addr, _pos_sp_val) train_pc_sp_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState, StorePosState) # update edge : s6 h_cnt_s6_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1) v_cnt_s6_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, v_cnt, v_cnt + 1), v_cnt) _pos_ld_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt train_pos = ila.load(pos, _pos_ld_addr) != 0 train_neg = (ila.load( hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) != 0) & (ila.load( visible_unit, v_cnt[VISIBLE_UNIT_WIDTH - 1:0]) != 0) edge_original = ila.load(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt) edge_new = ila.ite((train_pos) & (~train_neg), edge_original + fpconst(LEARN_RATE, FPedge).ast, ila.ite((~train_pos) & (train_neg), edge_original - fpconst(LEARN_RATE, FPedge).ast, edge_original)) edge_s6_nxt = ila.store(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt, edge_new) train_pc_s6_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState, EdgeUpdateState) # no need to jump back itself, because the flag: train_input_done is turned back to zero # don't forget to set back signals in Uabs () train_done = TrainUabs.getreg('train_input_done') train_uabs_index = TrainUabs.getreg('index') train_uabs_loop_count = TrainUabs.getreg('loop_count') train_uabs_upc = TrainUabs.getreg('upc') # add prefix s6 !!! s6_complete = (h_cnt == nh) & (v_cnt == nv) index_nxt_s6_nxt = ila.ite( s6_complete, ila.ite((train_uabs_index == nu - 1) & (train_uabs_loop_count != nlp), h0_16, train_uabs_index + 1), train_uabs_index) # assert (train_uabs_index == ntu - 1) & (train_uabs_loop_count == nlp) should never happen loop_count_s6_nxt = ila.ite( s6_complete & (train_uabs_index == nu - 1) & (train_uabs_loop_count != nlp), train_uabs_loop_count + 1, train_uabs_loop_count) upc_s6_nxt = ila.ite(s6_complete, StartReadState, train_uabs_upc) train_input_done_s6_nxt_nxt = ila.ite(s6_complete, b0, train_done) # data -> hidden_unit -> visible_unit -> edge # data -> edge # add def TrainNext(e1, e2, e3, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(EdgeUpdate, e3, default))) def TrainNextSP(e1, e2, e3, e4, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(StorePos, e3, ila.ite(EdgeUpdate, e4, default)))) def TrainChoice5(name, e1, e2, e3, default): return ila.choice(name, e1, e2, e3, default) def TrainChoice4(name, e1, e2, default): return ila.choice(name, e1, e2, default) def TrainChoice3(name, e1, default): return ila.choice(name, e1, default) TrainUabs.set_init('train_upc', pc_init) TrainUabs.set_init('train_v_cnt', v_cnt_init) TrainUabs.set_init('train_h_cnt', h_cnt_init) TrainUabs.set_next( 'jstate', TrainNext(jstate_s0_nxt, jstate_s1_s5_nxt, jstate, jstate)) TrainUabs.set_next( 'train_sum', TrainNext(train_sum_s0_nxt, train_sum_s1_s5_nxt, train_sum, train_sum)) TrainUabs.set_next( 'train_v_cnt', TrainNextSP(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt_s6_nxt, v_cnt)) TrainUabs.set_next( 'train_h_cnt', TrainNextSP(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt_sp_nxt, h_cnt_s6_nxt, h_cnt)) TrainUabs.set_next( 'train_upc', TrainNextSP(train_pc_s0_nxt, train_pc_s1_s5_nxt, train_pc_sp_nxt, train_pc_s6_nxt, train_pc)) TrainUabs.set_next( 'train_max', TrainNext(train_max, train_max_s1_s5_nxt, train_max, train_max)) TrainUabs.set_next( 'hidden_unit', TrainNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit)) TrainUabs.set_next( 'visible_unit', TrainNext(visible_unit, visible_unit_s1_s5_nxt, visible_unit, visible_unit)) TrainUabs.set_next('edges', TrainNext(edges_mem, edges_mem, edge_s6_nxt, edges_mem)) TrainUabs.set_next( 'index', TrainNext(train_uabs_index, train_uabs_index, index_nxt_s6_nxt, train_uabs_index)) TrainUabs.set_next( 'loop_count', TrainNext(train_uabs_loop_count, train_uabs_loop_count, loop_count_s6_nxt, train_uabs_loop_count)) TrainUabs.set_next( 'upc', TrainNext(train_uabs_upc, train_uabs_upc, upc_s6_nxt, train_uabs_upc)) TrainUabs.set_next( 'train_input_done', TrainNext(train_done, train_done, train_input_done_s6_nxt_nxt, train_done)) # newly added TrainUabs.set_next( 'visibleEnergies', TrainNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy, visibleEnergy)) TrainUabs.set_next( 'sumOfpow2', TrainNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2)) TrainUabs.set_next('pow2', TrainNext(pow2, pow2_s1_s5_nxt, pow2, pow2)) TrainUabs.set_next('pos', ila.ite(StorePos, pos_sp_nxt, pos)) TrainUabs.set_next( 'per_v_pc', TrainNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt, inner_loop_pc, inner_loop_pc)) #------------------------------------ # Predict UUABS #------------------------------------ # data -> predict_result PredictUabs = uabs.add_microabstraction('predict', predict_input_done == 1) sigmoid_func = PredictUabs.fun('sigmoid', 64, [16]) # DATA_sum_, 01_D rand_func = PredictUabs.fun('rand', 64, []) # generate random number to_int_exp = PredictUabs.fun('to_int_exp', 32, [16]) # round_func = PredictUabs.fun('round', 8, [32]) # 05_D -> u8 divide_func = PredictUabs.fun( 'divide', 64, [32, 64]) # dp:32_32 / sum_of_pow2 64_64 = 64_1 hidden_unit = PredictUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1) visibleEnergy = PredictUabs.mem('visibleEnergies', KWIDTH, 16) predict_result = PredictUabs.getmem('predict_result') predict_sum = PredictUabs.reg('predict_sum', 16) predict_max = PredictUabs.reg('predict_max', 16) sumOfpow2 = PredictUabs.reg('sumOfpow2', 64) pow2 = PredictUabs.mem('pow2', KWIDTH, 32) predict_vector = PredictUabs.mem('predict_vector', VISIBLE_UNIT_WIDTH, 1) inner_loop_pc = PredictUabs.reg('per_v_pc', 4) count = PredictUabs.reg('count', 8) jstate = PredictUabs.reg('jstate', 16) expectation = PredictUabs.reg('expectation', 32) prediction = PredictUabs.reg('prediction', 8) predict_pc = PredictUabs.reg('predict_upc', 4) # Re-evaluate v_cnt = PredictUabs.reg('predict_v_cnt', 16) h_cnt = PredictUabs.reg('predict_h_cnt', 16) predict_input = PredictUabs.getmem('data') edges_input = PredictUabs.getmem('edges') nv = PredictUabs.getreg('num_visible') nh = PredictUabs.getreg('num_hidden') nu = PredictUabs.getreg('num_users') ntu = PredictUabs.getreg('num_testusers') nlp = PredictUabs.getreg('num_loops') SumEdge = predict_pc == 0 SumEdgeState = const(0, 4) SumHidden = predict_pc == 1 SumHiddenState = const(1, 4) GenResult = predict_pc == 3 GenResultState = const(3, 4) WaitForWrite = predict_pc == 2 WaitForWriteState = const(2, 4) PredictUabs.decode_exprs = [SumEdge, SumHidden, WaitForWrite] #Begin v_cnt_init = const(0, 16) h_cnt_init = const(0, 16) pc_init = const(0, 4) #SumEdge: s0 edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt predict_sum_s0_nxt = ila.ite(v_cnt == 0, const( 0, 16), predict_sum) + ila.ite( ila.load(predict_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), const(0, 16)) v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1) h_cnt_s0_nxt = ila.ite((v_cnt == nv), ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt) # Here ^^^ is for transiting to next state hidden_update_s0_0 = ila.ite( fpconst(0.5, FP01_D).ast < ila.appfun(sigmoid_func, predict_sum_s0_nxt), b1, b0) hidden_update_s0_1 = ila.ite( v_cnt == nv, ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0], hidden_update_s0_0), hidden_unit) hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), ila.store(hidden_update_s0_1, nh[HIDDEN_UNIT_WIDTH - 1:0], b1), hidden_update_s0_1) hidden_update_s0_next = hidden_update_s0_2 predict_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), SumHiddenState, SumEdgeState) jstate_s0_nxt = h0_16 count_s0_nxt = ila.const(0, 8) inner_loop_pc_s0_nxt = h0_4 # add prefix : # predict_sum_nxt = ila.ite(SumEdge, predict_sum_s0_nxt, ila.ite(SumHidden, ... ) ) #----------------------------- # SumHiddensK0-K4 : s1-s5 # #----------------------------- LastH = h_cnt == nh LastJ = jstate == K - 1 LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX) SumHiddenL0 = SumHidden & (inner_loop_pc == 0) SumHiddenL1 = SumHidden & (inner_loop_pc == 1) SumHiddenL2 = SumHidden & (inner_loop_pc == 2) SumHiddenL3 = SumHidden & (inner_loop_pc == 3) SumHiddenL4 = SumHidden & (inner_loop_pc == 4) h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1) jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1), jstate) inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc) jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1) inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc) jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc) jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt inner_loop_pc_s1_s5_L3_nxt = ila.ite(LastJ, h4_4, inner_loop_pc) jstate_s1_s5_L4_nxt = jstate_s1_s5_L3_nxt inner_loop_pc_s1_s5_L4_nxt = ila.ite( LastJ, ila.ite(LastV, h0_4, h0_4), # will choose to go back or not inner_loop_pc) def nextCondition(l0, l1, l2, l3, l4, default): return ila.ite( SumHiddenL0, l0, ila.ite( SumHiddenL1, l1, ila.ite( SumHiddenL2, l2, ila.ite(SumHiddenL3, l3, ila.ite(SumHiddenL4, l4, default))))) h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt, h_cnt, h_cnt) v_cnt_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ, ila.ite(LastV, h0_16, v_cnt + K), v_cnt) jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt, jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt, jstate_s1_s5_L4_nxt, jstate) inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt, inner_loop_pc_s1_s5_L1_nxt, inner_loop_pc_s1_s5_L2_nxt, inner_loop_pc_s1_s5_L3_nxt, inner_loop_pc_s1_s5_L4_nxt, inner_loop_pc) predict_pc_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ & LastV, GenResultState, SumHiddenState) # L0 predict_sum_s1_s5_L0_nxt = ila.ite( h_cnt == 0, h0_16, predict_sum) + ila.ite( ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), h0_16) _predict_max_origin_L0 = ila.ite( jstate == 0, fpconst(-500, FPsum).ast, predict_max) # make sure the first time we are comparing with init sum predict_max_s1_s5_L0_nxt = ila.ite( LastH, ila.ite(ila.sgt(predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0), predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0), predict_max) visibleEnergy_s1_s5_L0_nxt = ila.ite( LastH, ila.store(visibleEnergy, jstate[KWIDTH - 1:0], predict_sum_s1_s5_L0_nxt), visibleEnergy) # L1 # sum3: 64_64 -> dp: 32_32 _31_sum = fpconst(31, FPsum).ast predict_max_s1_s5_L1_nxt = ila.ite(jstate == 0, predict_max - _31_sum, predict_max) _st_val_L1 = ila.load(visibleEnergy, jstate[KWIDTH - 1:0]) - predict_max_s1_s5_L1_nxt visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0], _st_val_L1) # L2 _pow2_new_val = ila.appfun(to_int_exp, ila.load(visibleEnergy, jstate[KWIDTH - 1:0])) _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3) sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64, sumOfpow2) + _pow2_new_convert pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val) # L3 _probs = ila.appfun(divide_func, [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2]) _mul = fixpoint(_probs, FP01_D) * fixpoint(jstate, FPu16) expectation_s1_s5_L3_nxt = ila.ite(jstate == 0, h0_32, expectation) + _mul.toFormat(FP05_D) # L4 _prediction = ila.zero_extend(ila.appfun(round_func, [expectation]), 16) _pv_val = ila.ite(jstate == _prediction, b1, b0) _pv_idx = v_cnt + jstate _first_store = ila.store(predict_vector, _pv_idx[VISIBLE_UNIT_WIDTH - 1:0], _pv_val) predict_vector_s1_s5_L4_nxt = ila.ite( SumHiddenL4 & LastV & LastJ, ila.store(_first_store, nv[VISIBLE_UNIT_WIDTH - 1:0], b1), _first_store) predict_sum_s1_s5_nxt = nextCondition(predict_sum_s1_s5_L0_nxt, predict_sum, predict_sum, predict_sum, predict_sum, predict_sum) predict_max_s1_s5_nxt = nextCondition(predict_max_s1_s5_L0_nxt, predict_max_s1_s5_L1_nxt, predict_max, predict_max, predict_max, predict_max) visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt, visibleEnergy_s1_s5_L1_nxt, visibleEnergy, visibleEnergy, visibleEnergy, visibleEnergy) sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2, sumOfpow2_s1_s5_L2_nxt, sumOfpow2, sumOfpow2, sumOfpow2) pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2, pow2) expectation_s1_s5_nxt = ila.ite(SumHiddenL3, expectation_s1_s5_L3_nxt, expectation) predict_vector_s1_s5_nxt = ila.ite(SumHiddenL4, predict_vector_s1_s5_L4_nxt, predict_vector) count_s1_s5_nxt = ila.ite(SumHiddenL4 & LastV & LastJ, h0_8, count) # before s6: store pos LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX) LastJ = jstate == K - 1 v_cnt_sp_nxt = ila.ite(LastV, v_cnt + K, v_cnt + K) jstate_sp_nxt = ila.ite(LastJ, h0_16, jstate + 1) _prediction_old = ila.ite(jstate == 0, h0_8, prediction) _pv_idx = v_cnt + jstate _predict_result_sp_val = ila.load(predict_vector, _pv_idx[VISIBLE_UNIT_WIDTH - 1:0]) prediction_sp_nxt = ila.ite(_predict_result_sp_val == 1, (jstate + 1)[7:0], _prediction_old) count_sp_nxt = ila.ite(LastJ, count + 1, count) predict_result_sp_nxt = ila.ite( LastJ, ila.store(predict_result, count[PREDICT_RESULT_WIDTH - 1:0], prediction), predict_result) predict_pc_sp_nxt = ila.ite(LastV & LastJ, WaitForWriteState, GenResultState) wr_complete = PredictUabs.getreg('wr_complete') wr_req = PredictUabs.getreg('wr_request') wr_len = PredictUabs.getreg('wr_length') wr_idx = PredictUabs.getreg('wr_index') cur_idx = PredictUabs.getreg('index') # 32 exitLoop = LastV & LastJ wr_request_sp_nxt = ila.ite(exitLoop, b1, wr_req) wr_index_sp_nxt = ila.ite( exitLoop, ila.zero_extend(nm, 32) * ila.zero_extend(cur_idx, 32), wr_idx) wr_length_sp_nxt = ila.ite(exitLoop, ila.zero_extend(nm, 32), wr_len) wr_complete_sp_nxt = ila.ite(exitLoop, b0, wr_complete) # s6: #--------------------- # update edge : s6 #--------------------- FinishOneRound = (wr_req == 0) & (wr_complete == 1) predict_pc_s6_nxt = ila.ite(FinishOneRound, WaitForWriteState, WaitForWriteState) # its value does not matter because it will be terminated by predict_input_done # don't forget to set back signals in Uabs () predict_done = PredictUabs.getreg('predict_input_done') predict_uabs_index = PredictUabs.getreg('index') predict_uabs_loop_count = PredictUabs.getreg('loop_count') predict_uabs_upc = PredictUabs.getreg('upc') all_done = PredictUabs.getreg('done') # add prefix s6 !!! index_nxt_s6_nxt = ila.ite( FinishOneRound, ila.ite( (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp), predict_uabs_index, predict_uabs_index + 1), predict_uabs_index) wr_complete_s6_nxt = ila.ite(FinishOneRound, b0, wr_complete) # assert (predict_uabs_index == nu - 1) & (predict_uabs_loop_count != nlp) should never happen #loop_count_s6_nxt = ila.ite( (predict_uabs_index == nu - 1) & (predict_uabs_loop_count != nlp) , predict_uabs_loop_count + 1, predict_uabs_loop_count ) upc_s6_nxt = ila.ite( FinishOneRound, ila.ite( (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp), FinishState, StartReadState), predict_uabs_upc) predict_input_done_s6_nxt_nxt = ila.ite(FinishOneRound, b0, predict_done) all_done_s6_nxt = ila.ite( FinishOneRound & (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp), b1, b0) # data -> hidden_unit -> visible_unit -> edge # data -> edge # add # add def predictNext(e1, e2, e3, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(WaitForWrite, e3, default))) def predictNextSp(e1, e2, e3, e4, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(GenResult, e3, ila.ite(WaitForWrite, e4, default)))) def ite(inst, e, default): return ila.ite(inst, e, default) PredictUabs.set_init('predict_upc', pc_init) PredictUabs.set_init('predict_v_cnt', v_cnt_init) PredictUabs.set_init('predict_h_cnt', h_cnt_init) PredictUabs.set_next( 'jstate', predictNextSp(jstate_s0_nxt, jstate_s1_s5_nxt, jstate_sp_nxt, jstate, jstate)) PredictUabs.set_next( 'predict_sum', predictNext(predict_sum_s0_nxt, predict_sum_s1_s5_nxt, predict_sum, predict_sum)) PredictUabs.set_next( 'predict_v_cnt', predictNextSp(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt, v_cnt)) PredictUabs.set_next( 'predict_h_cnt', predictNext(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt, h_cnt)) PredictUabs.set_next( 'predict_upc', predictNextSp(predict_pc_s0_nxt, predict_pc_s1_s5_nxt, predict_pc_sp_nxt, predict_pc_s6_nxt, predict_pc)) PredictUabs.set_next( 'predict_max', predictNext(predict_max, predict_max_s1_s5_nxt, predict_max, predict_max)) PredictUabs.set_next( 'hidden_unit', predictNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit)) PredictUabs.set_next( 'count', predictNextSp(count_s0_nxt, count_s1_s5_nxt, count_sp_nxt, count, count)) PredictUabs.set_next( 'per_v_pc', predictNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt, inner_loop_pc, inner_loop_pc)) PredictUabs.set_next( 'index', predictNext(predict_uabs_index, predict_uabs_index, index_nxt_s6_nxt, predict_uabs_index)) PredictUabs.set_next( 'upc', predictNext(predict_uabs_upc, predict_uabs_upc, upc_s6_nxt, predict_uabs_upc)) PredictUabs.set_next( 'predict_input_done', predictNext(predict_done, predict_done, predict_input_done_s6_nxt_nxt, predict_done)) PredictUabs.set_next( 'done', predictNext(all_done, all_done, all_done_s6_nxt, all_done)) PredictUabs.set_next( 'wr_request', predictNextSp(wr_req, wr_req, wr_request_sp_nxt, wr_req, wr_req)) PredictUabs.set_next( 'wr_length', predictNextSp(wr_len, wr_len, wr_length_sp_nxt, wr_len, wr_len)) PredictUabs.set_next( 'wr_index', predictNextSp(wr_idx, wr_idx, wr_index_sp_nxt, wr_idx, wr_idx)) PredictUabs.set_next( 'wr_complete', predictNextSp(wr_complete, wr_complete, wr_complete_sp_nxt, wr_complete_s6_nxt, wr_complete)) # newly added PredictUabs.set_next( 'visibleEnergies', predictNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy, visibleEnergy)) PredictUabs.set_next( 'sumOfpow2', predictNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2)) PredictUabs.set_next('pow2', predictNext(pow2, pow2_s1_s5_nxt, pow2, pow2)) PredictUabs.set_next( 'expectation', predictNext(expectation, expectation_s1_s5_nxt, expectation, expectation)) PredictUabs.set_next( 'predict_vector', predictNext(predict_vector, predict_vector_s1_s5_nxt, predict_vector, predict_vector)) PredictUabs.set_next('prediction', ite(GenResult, prediction_sp_nxt, prediction)) PredictUabs.set_next('predict_result', ite(GenResult, predict_result_sp_nxt, predict_result)) #------------------------------------ # Store UABS #------------------------------------ # store is triggered by inst as uabs? # wr_grant == 1 is an instruction wr_granted = rbm.reg('wr_granted', 1) rbm.set_next('wr_granted', ila.ite((wr_request & wr_grant) == 1, b1, wr_granted)) data_out_1st_set = ila.zero_extend( ila.load(predict_result, const(0, PREDICT_RESULT_WIDTH)), 32) rbm.set_next( 'data_out', ila.ite((wr_request & wr_grant) == 1, data_out_1st_set, data_out)) # This is a hard decision, # as we set_next, the reaction as we defined will be appear in the next cycle StoreUabs = rbm.add_microabstraction('store', wr_granted == 1) store_idx = StoreUabs.reg('i', 16) nm = StoreUabs.getreg('num_movies') wr_granted = StoreUabs.getreg('wr_granted') wr_request = StoreUabs.getreg('wr_request') wr_complete = StoreUabs.getreg('wr_complete') predict_result = StoreUabs.getmem('predict_result') StoreUabs.set_init('i', h1_16) StoreUabs.set_next('i', ila.ite(store_idx < nm, store_idx + 1, store_idx)) StoreUabs.set_next('wr_granted', ila.ite(store_idx < nm, wr_granted, b0)) StoreUabs.set_next('wr_request', ila.ite(store_idx == 0, b0, wr_request)) StoreUabs.set_next('wr_complete', ila.ite(store_idx < nm, wr_complete, b1)) data_out = StoreUabs.getreg('data_out') # possibly one cycle earlier StoreUabs.set_next( 'data_out', ila.zero_extend( ila.load(predict_result, store_idx[PREDICT_RESULT_WIDTH - 1:0]), 32)) #--------------------------- # Add no next # def keepNC(Abs, name): Abs.set_next(name, Abs.getreg(name)) def keepMemNC(Abs, name): Abs.set_next(name, Abs.getmem(name)) keepNC(rbm, 'done') keepNC(rbm, 'wr_request') keepNC(rbm, 'wr_index') keepNC(rbm, 'wr_length') keepNC(rbm, 'rd_index') keepNC(rbm, 'rd_length') keepNC(rbm, 'rd_request') keepMemNC(uabs, 'edges') keepNC(rbm, 'rd_complete') keepNC(rbm, 'wr_complete') return rbm
def auxMulh_u(self, dataA, dataB): #unsigned mul dataADoubleLength = ila.zero_extend(dataA, 2 * SCALAR_REG_BITS) dataBDoubleLength = ila.zero_extend(dataB, 2 * SCALAR_REG_BITS) resultDoubleLength = dataADoubleLength * dataBDoubleLength mulResult = resultDoubleLength[31:0] return mulResult
def createSHAILA(synstates, enable_ps): m = ila.Abstraction("sha") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response. dataout = m.reg('dataout', 8) # internal arch state. state = m.reg('sha_state', 3) rdaddr = m.reg('sha_rdaddr', 16) wraddr = m.reg('sha_wraddr', 16) oplen = m.reg('sha_len', 16) # for the uinst. bytes_read = m.reg('sha_bytes_read', 16) rd_data = m.reg('sha_rd_data', 512) hs_data = m.reg('sha_hs_data', 160) xram = m.mem('XRAM', 16, 8) sha = m.fun('sha', 160, [512]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # decode rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr) for addr in xrange(0xfe00, 0xfe10) for i in [0, 1, 2, 3, 4]] wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr) for addr in xrange(0xfe00, 0xfe10)] nopcmds = [(state == i) & (cmd != 1) & (cmdaddr == addr) for addr in xrange(0xfe00, 0xfe10) for i in [1, 2, 3, 4]] m.decode_exprs = rdcmds + wrcmds + nopcmds # read commands. statebyte = ila.zero_extend(state, 8) rdaddrbyte = ila.readchunk('rd_addr', rdaddr, 8) wraddrbyte = ila.readchunk('wr_addr', wraddr, 8) oplenbyte = ila.readchunk('op_len', oplen, 8) dataoutnext = ila.choice( 'dataout', [statebyte, rdaddrbyte, wraddrbyte, oplenbyte, m.const(0, 8)]) m.set_next('dataout', dataoutnext) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('sha_rdaddr', rdaddr) mb_reg_wr('sha_wraddr', wraddr) mb_reg_wr('sha_len', oplen) # state state_next = ila.choice('state_next', [ m.const(0, 3), m.const(1, 3), m.const(2, 3), m.const(3, 3), m.const(4, 3), ila.ite(cmddata == 1, m.const(1, 3), state), ila.ite(bytes_read < oplen, m.const(1, 3), m.const(4, 3)) ]) m.set_next('sha_state', state_next) # these are for the uinst # bytes_read #bytes_read_inc = ila.ite(bytes_read+64 <= oplen, bytes_read+64, oplen) bytes_read_inc = bytes_read + 64 bytes_read_rst = ila.ite(cmddata == 1, m.const(0, 16), bytes_read) bytes_read_nxt = ila.choice( 'bytes_read_nxt', [m.const(0, 16), bytes_read_inc, bytes_read_rst, bytes_read]) m.set_next('sha_bytes_read', bytes_read_nxt) # rd_data rdblock_little = ila.loadblk(xram, rdaddr + bytes_read, 64) rdblock_big = ila.loadblk_big(xram, rdaddr + bytes_read, 64) rd_data_nxt = ila.choice('rd_data_nxt', rdblock_big, rdblock_little, rd_data) m.set_next('sha_rd_data', rd_data_nxt) # hs_data sha_hs_data = ila.appfun(sha, [rd_data]) hs_data_nxt = ila.choice('hs_data_nxt', sha_hs_data, hs_data) m.set_next('sha_hs_data', hs_data_nxt) # xram write xram_w_sha_little = ila.storeblk(xram, wraddr, hs_data) xram_w_sha_big = ila.storeblk_big(xram, wraddr, hs_data) xram_nxt = ila.choice('xram_nxt', xram, xram_w_sha_little, xram_w_sha_big) m.set_next('XRAM', xram_nxt) suffix = 'en' if enable_ps else 'dis' timefile = open('sha-times-%s.txt' % suffix, 'wt') t_elapsed = 0 # synthesis. sim = lambda s: SHA().simulate(s) for s in synstates: st = time.clock() m.synthesize(s, sim) dt = time.clock() - st print >> timefile, '%s %.2f' % (s, dt) t_elapsed += dt ast = m.get_next(s) m.exportOne(ast, 'asts/%s_%s' % (s, suffix)) print 'time: %.2f' % t_elapsed #m.generateSim('tmp/shasim.hpp') m.generateSimToDir('sim')
def vreg_nxt(self, regNo, laneNo): ssreg1 = self.indexToSGPR(self.rrSrc1) ssreg2 = self.indexToSGPR(self.rrSrc2) vsreg1 = self.indexToVGPR(self.rrSrc1, self.model.const(laneNo, SCALAR_REG_BITS)) vsreg2 = self.indexToVGPR(self.rrSrc2, self.model.const(laneNo, SCALAR_REG_BITS)) mask = self.indexToSGPR(self.rrMask) #load instruction addr = self.indexToSGPR(self.memPtr) + ila.sign_extend(self.memOffSet, SCALAR_REG_BITS) load_val = ila.load(self.mem, ila.zero_extend(addr[31:2], PC_BITS)) return ila.ite(self.dest == regNo, ila.ite(self.isRegReg, ila.ite(self.rrType == self.model.const(0b001, 3), ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + ssreg2, ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - ssreg2, ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & ssreg2, ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | ssreg2, ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * ssreg2, self.vector_registers[regNo][laneNo]))))), ila.ite(self.rrType == self.model.const(0b100, 3), ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + vsreg2, ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - vsreg2, ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & vsreg2, ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | vsreg2, ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * vsreg2, self.vector_registers[regNo][laneNo]))))), ila.ite(self.rrType == self.model.const(0b010, 3), ila.ite(mask[laneNo] == self.model.const(0b0, 1), self.vector_registers[regNo][laneNo], ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + ssreg2, ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - ssreg2, ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & ssreg2, ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | ssreg2, ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * ssreg2, self.vector_registers[regNo][laneNo])))))), ila.ite(self.rrType == self.model.const(0b101, 3), ila.ite(mask[laneNo] == self.model.const(0b0, 1), self.vector_registers[regNo][laneNo], ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + vsreg2, ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - vsreg2, ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & vsreg2, ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | vsreg2, ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * vsreg2, self.vector_registers[regNo][laneNo])))))), self.vector_registers[regNo][laneNo]) ))), ila.ite(self.isImmediate, ila.ite(self.immType == self.model.const(0b01, 2), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, vsreg1 + self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, vsreg1 - self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, vsreg1 & self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, vsreg1 | self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, vsreg1 * self.immB, self.vector_registers[regNo][laneNo]))))), ila.ite(self.immType == self.model.const(0b11, 2), ila.ite(mask[laneNo] == self.model.const(0b0, 1), self.vector_registers[regNo][laneNo], ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, vsreg1 + self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, vsreg1 - self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, vsreg1 & self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, vsreg1 | self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, vsreg1 * self.immA, self.vector_registers[regNo][laneNo])))))), self.vector_registers[regNo][laneNo]), ), ila.ite(self.isLoad == self.model.const(0b1, 1), self.vector_registers[regNo][laneNo], self.vector_registers[regNo][laneNo]) )), self.vector_registers[regNo][laneNo])
def createShaIla(): m = ila.Abstraction("sha") m.enable_parameterized_synthesis = 0 # I/O interface cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response dataout = m.reg('dataout', 8) # arch states state = m.reg('sha_state', 3) rdaddr = m.reg('sha_rdaddr', 16) wraddr = m.reg('sha_wraddr', 16) oplen = m.reg('sha_len', 16) xram = m.mem('XRAM', 16, 8) # child-ILA states bytes_read = m.reg('sha_bytes_read', 16) rd_data = m.reg('sha_rd_data', 512) hs_data = m.reg('sha_hs_data', 160) sha = m.fun('sha', 160, [512]) # fetch m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # read commands. statebyte = ila.zero_extend(state, 8) rdaddrbyte = ila.readchunk('rd_addr', rdaddr, 8) wraddrbyte = ila.readchunk('wr_addr', wraddr, 8) oplenbyte = ila.readchunk('op_len', oplen, 8) dataoutnext = ila.choice( 'dataout', [statebyte, rdaddrbyte, wraddrbyte, oplenbyte, m.const(0, 8)]) m.set_next('dataout', dataoutnext) # write commands. def mb_reg_wr(name, reg): reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('sha_rdaddr', rdaddr) mb_reg_wr('sha_wraddr', wraddr) mb_reg_wr('sha_len', oplen) # state state_choice = ila.choice('state_choice', [ m.const(0, 3), m.const(1, 3), m.const(2, 3), m.const(3, 3), m.const(4, 3) ]) rd_nxt = ila.ite(bytes_read < oplen, m.const(1, 3), m.const(4, 3)) state_nxt = ila.choice('state_nxt', [ rd_nxt, state_choice, ila.ite(cmddata == 1, m.const(1, 3), state), state ]) m.set_next('sha_state', state_nxt) # bytes_read bytes_read_inc = bytes_read + 64 bytes_read_rst = ila.ite(cmddata == 1, m.const(0, 16), bytes_read) bytes_read_nxt = ila.choice( 'bytes_read_nxt', [m.const(0, 16), bytes_read_inc, bytes_read_rst, bytes_read]) m.set_next('sha_bytes_read', bytes_read_nxt) # rd_data rdblock_little = ila.loadblk(xram, rdaddr + bytes_read, 64) rdblock_big = ila.loadblk_big(xram, rdaddr + bytes_read, 64) rd_data_nxt = ila.choice('rd_data_nxt', [rdblock_big, rdblock_little, rd_data]) m.set_next('sha_rd_data', rd_data_nxt) # hs_data sha_hs_data = ila.appfun(sha, [rd_data]) hs_data_nxt = ila.choice('sh_data_nxt', sha_hs_data, hs_data) m.set_next('sha_hs_data', hs_data_nxt) # xram xram_w_sha_little = ila.storeblk(xram, wraddr, hs_data) xram_w_sha_big = ila.storeblk_big(xram, wraddr, hs_data) xram_nxt = ila.choice('xram_nxt', [xram_w_sha_little, xram_w_sha_big, xram]) m.set_next('XRAM', xram_nxt) return m
def zext(self,v): return ila.zero_extend(v,XLEN)
def sreg_nxt(self, regNo): sreg1 = self.indexToSGPR(self.rrSrc1) sreg2 = self.indexToSGPR(self.rrSrc2) #load instruction addr = self.indexToSGPR(self.memPtr) + ila.sign_extend(self.memOffSet, SCALAR_REG_BITS) load_val = ila.load(self.mem, ila.zero_extend(addr[31:2], MEM_ADDRESS_BITS)) return ila.ite(self.dest == regNo,\ ila.ite(self.isRegReg, ila.ite(self.rrType == self.model.const(0b000, 3), ila.ite(self.rrOpcode == NyEncoding.ADD_I, sreg1 + sreg2, ila.ite(self.rrOpcode == NyEncoding.SUB_I, sreg1 - sreg2, ila.ite(self.rrOpcode == NyEncoding.AND, sreg1 & sreg2, ila.ite(self.rrOpcode == NyEncoding.OR, sreg1 | sreg2, ila.ite(self.rrOpcode == NyEncoding.MULH_I, self.auxMull_i(sreg1, sreg2), ila.ite(self.rrOpcode == NyEncoding.MULH_U, self.auxMulh_u(sreg1, sreg2), ila.ite(self.rrOpcode == NyEncoding.ASHR, ila.ashr(sreg1, sreg2[4:0]), ila.ite(self.rrOpcode == NyEncoding.SHR, sreg1 >> sreg2[4:0], ila.ite(self.rrOpcode == NyEncoding.SHL, sreg1 << sreg2[4:0], ila.ite(self.rrOpcode == NyEncoding.CLZ, self.aux_clz(sreg2), ila.ite(self.rrOpcode == NyEncoding.CTZ, self.aux_ctz(sreg2), ila.ite(self.rrOpcode == NyEncoding.MOVE, sreg2, ila.ite(self.rrOpcode == NyEncoding.CMPEQ_I, ila.ite(sreg1 == sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPNE_I, ila.ite(sreg1 != sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPGT_I, ila.ite(self.auxCmpgt_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPGE_I, ila.ite(self.auxCmpge_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPLT_I, ila.ite(self.auxCmplt_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPLE_I, ila.ite(self.auxCmple_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPGT_U, ila.ite(sreg1 > sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPGE_U, ila.ite(sreg1 < sreg2, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPLT_U, ila.ite(sreg1 < sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPLE_U, ila.ite(sreg1 > sreg2, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff,SCALAR_REG_BITS)), self.scalar_registers[regNo])))))))))))))))))))))) , self.scalar_registers[regNo]),\ ila.ite(self.isImmediate, ila.ite(self.immType == self.model.const(0b00, 2), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, self.auxMull_i(sreg1, self.immB), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULL_I, self.auxMulh_u(sreg1, self.immB), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ASHR, ila.ashr(sreg1, self.immB[4:0]), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SHR, sreg1 >> self.immB[4:0], ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SHL, sreg1 << self.immB[4:0], ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CLZ, sreg1, self.aux_clz(self.immB), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CTZ, sreg1, self.aux_ctz(self.immB), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MOVE, self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPEQ_I, ila.ite(sreg1 == self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPNE_I, ila.ite(sreg1 != self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGT_I, ila.ite(self.auxCmpgt_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGE_I, ila.ite(self.auxCmpge_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLT_I, ila.ite(self.auxCmplt_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLE_I, ila.ite(self.auxCmple_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGT_U, ila.ite(sreg1 > self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGE_U, ila.ite(sreg1 < self.immB, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLT_U, ila.ite(sreg1 < self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLE_U, ila.ite(sreg1 > self.immB, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff,SCALAR_REG_BITS)), self.scalar_registers[regNo])))))))))))))))))))))),\ ila.ite(self.immType == self.model.const(0b10, 2), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))),\ self.scalar_registers[regNo])),\ ila.ite(self.isLoad == self.model.const(0b1, 1), self.scalar_registers[regNo], self.scalar_registers[regNo]))),\ self.scalar_registers[regNo])
def main(): c = ila.Abstraction("test") # n = ila.Node() # test on boolOp top = c.bool(True) bot = c.bool(False) Band = (top & bot) # 0 Bor = (top | bot) # 1 Bxor = (Band ^ Bor) # 1 Bnor = ila.nor(Band, Bor) # 0 Bxnor = ila.xnor(Bxor, Bnor) # 0 Bnand = ila.nand(Bnor, Bxnor) # 1 nBnor = ~Bnor # 1 assert c.areEqual(nBnor, top) b1 = c.bit('b1') b2 = c.bit('b2') b3 = c.bit('b3') b4 = (b1 & b2) ^ b3 b5 = ila.xnor(ila.nand(b1, b2), b3) assert c.areEqual(b4, b5) b6 = ila.ite(b1, b2, b3) b7 = (b1 & b2) | (~b1 & b3) assert c.areEqual(b6, b7) # test on bitvectorOp x = c.reg('x', 8) y = c.reg('y', 8) c0 = c.const(0, 8) c1 = c.const(1, 8) c2 = c.const(2, 8) c4 = c.const(4, 8) c6 = c.const(6, 8) c8 = c.const(8, 8) v1 = (x == c4) v2 = x << 1 v3 = c4 << 1 assert c.areEqual(c8, v3) assert c.areEqual(c8, (c4 + c4)) assert c.areEqual(c4, (c8 - c4)) assert c.areEqual(c8, (c4 * c2)) assert c.areEqual(c4, (c8 / c2)) v4 = ila.ite(v1, v2, v3) # 8 assert c.areEqual(v4, c8) assert c.areEqual(v4, v3) assert c.areEqual(v4, (4 + c4)) assert c.areEqual(-c4, (c4 - 8)) assert c.areEqual(v4, (2 * c4)) assert c.areEqual(v4 >> 2, (v3 / c2) - 2) assert c.areEqual(c8 % 5, 7 % (1 << c2)) assert c.areEqual( (x < y) ^ (y <= x), (x == y) | (x != y) ) assert c.areEqual( (x > y) | (x == y) | ~(x >= y), top ) assert c.areEqual( ~x ^ x, y ^ ~y) assert c.areEqual( ~x, ila.nand(x, x) ) v5 = ~ila.nor(c2, c4) # 00000110 assert c.areEqual( ~v5, ila.xnor(c4, c2)) v6 = c2 - c4 # 11111110 v7 = 3 - c8 # 11111011 v8 = ~(c2 - 2) # 11111111 assert c.areEqual( v8, ~c0) assert c.areEqual( v8 - 1, v6) assert c.areEqual( c4 + c1, -v7) # 00000101 assert c.areEqual( ila.sdiv(c4, c2), c2) assert c.areEqual( ila.sdiv(-c4, c2), -c2) assert c.areEqual( ila.sdiv(v5, -4), -c1) assert c.areEqual( ila.srem(v5, -4), c2) # -6 = -4 * 1 + -2 ?? assert c.areEqual( ila.sdiv(-6, -c4), c1) assert c.areEqual( ila.srem(-v5, -c4), -c2) assert c.areEqual( x - ila.srem(x, y), ila.sdiv(x, y) * y ) assert c.areEqual( x - x % y, (x / y) * y ) assert c.areEqual( ila.ashr(v6, 1), v8) assert c.areEqual( ila.slt(v7, v6), top) s1 = c.const(1, 4) s2 = c.const(2, 4) v9 = ila.concat(s1, s2) # 00010010 v10 = (c1 << 4) + c2 assert c.areEqual(v9, v10) v11 = ila.rrotate(v9, 2) # 10000100 v12 = ila.lrotate(v9, 6) assert c.areEqual(v11, v12) s3 = c.const(9, 4) v13 = v9[4:1] assert c.areEqual(s3, v13) v14 = x[3:0] v15 = y[7:4] v16 = ila.concat(v15, v14) v17 = ((x << 4) >> 4) + ((y >> 4) << 4) assert c.areEqual(v16, v17) # imply v18 = ila.slt(x, 5) v19 = ila.sge(x, 5) c.areEqual(ila.implies(v18, ~v19), top) #nonzero & bool ite v20 = ila.ite( ila.nonzero(x), (x<7), ~(x>=7) ) assert c.areEqual(v20, (x!=7) & ~(x>7)) assert c.areEqual(ila.nonzero(c4), top) #add nonzero to ite assert c.areEqual( ila.ite(ila.nonzero(c2), top, bot), top) assert c.areEqual( ila.ite(ila.nonzero(c0), top, bot), bot) # zero/sign extend short = c4[3:0] recover = ila.zero_extend(short, 8) assert c.areEqual(recover, c4) longC4 = c.const(4, 16) nlongC4 = -longC4 nshortC4 = -c4 extNS4 = ila.sign_extend(nshortC4, 16) assert c.areEqual(nlongC4, extNS4) # extract/slice with var v21 = c0[3:0] v21r = ila.zero_extend(v21, 8) assert c.areEqual(c0, v21r) # v14 = x[3:0] v14ex = ila.zero_extend(v14, 8) v14re = (x << 4) >> 4 assert c.areEqual(v14ex, v14re) # v15 = y[7:4] v15ex = ila.zero_extend(v15, 8) v15re = (y >> 4) assert c.areEqual(v15ex, v15re) """ v21 = ila.extractIV(x, 3, c0) v22 = ila.extractVI(y, c4+3, 4) assert c.areEqual(v14ex, v21) assert c.areEqual(v15ex << 4, v22 << 4) v23 = v21 + (v22 << 4) assert c.areEqual(v23, v16) v24 = ila.extractVV(c8, c8-1, c0) assert c.areEqual(v24, c8) v25 = ila.extractVV(x, c8-1, c4) v26 = ila.zero_extend(x[7:4], 8) assert c.areEqual(v25, v26) # slice one bit bv1 = c.const(1, 1) s1 = ila.get_bit(c1, c0) assert c.areEqual(bv1, s1) """ z = x & x bx = y[x] bz = y[z] assert c.areEqual(bx, bz) bx = c8[x] by = c8[y] inv = ila.implies(x == y, bx == by) assert c.areEqual(inv, top) dum = ila.ite(b1, bx, by) shd = ila.implies(x == y, dum == bx) assert c.areEqual(shd, top) assert c.areEqual(c6[1], c6[2]) assert c.areEqual(c6[4], c6[c0])
def createRsaIla(): m = ila.Abstraction('rsa') m.enable_parameterized_synthesis = 0 # I/O interface cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response dataout = m.reg('dataout', 8) # states state = m.reg('rsa_state', 2) addr = m.reg('rsa_addr', 16) rsa_M = m.reg('rsa_M', 2048) rsa_N = m.reg('rsa_N', 2048) rsa_E = m.reg('rsa_E', 2048) rsa_buff = m.reg('rsa_buff', 2048) byte_counter = m.reg('rsa_byte_counter', 8) xram = m.mem('XRAM', 16, 8) rsa = m.fun('rsa', 2048, [2048]) # fetch m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) statebyte = ila.zero_extend(state, 8) wraddrbyte = ila.readchunk('rsa_addr', addr, 8) dataout_nxt = ila.choice('dataout', [statebyte, wraddrbyte, m.const(0, 8)]) m.set_next('dataout', dataout_nxt) # rsa_addr addr_wr = ila.writechunk('wr_addr', addr, cmddata) addr_nxt = ila.choice('nxt_addr', [addr_wr, addr]) m.set_next('rsa_addr', addr_nxt) # rsa_state state_choice = ila.choice( 'state_choice', [m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2)]) wr_nxt = ila.ite(byte_counter == 255, m.const(0, 2), m.const(3, 2)) state_nxt = ila.choice('rsa_state_nxt', [ wr_nxt, state_choice, ila.ite(cmddata == 1, m.const(1, 2), state), state ]) m.set_next('rsa_state', state_nxt) # byte_counter byte_counter_inc = byte_counter + 1 byte_counter_rst = ila.ite(cmddata == 1, m.const(0, 8), byte_counter) byte_counter_nxt = ila.choice( 'byte_counter_nxt', [byte_counter_inc, byte_counter_rst, byte_counter]) m.set_next('rsa_byte_counter', byte_counter_nxt) # buff rsa_buff_op = ila.appfun(rsa, [rsa_M]) rsa_buff_nxt = ila.choice('rsa_buff_nxt', rsa_buff_op, rsa_buff) m.set_next('rsa_buff', rsa_buff_nxt) # rsa_M m.set_next('rsa_M', rsa_M) # xram #xram_w_rsa_lit = ila.storeblk (xram, addr, rsa_buff) #xram_w_rsa_big = ila.storeblk_big (xram, addr, rsa_buff) byte_cnt_16 = ila.zero_extend(byte_counter, 16) sh = ila.zero_extend((255 - byte_counter) * 8, 2048) xram_w_rsa_data_1 = (rsa_buff >> sh)[7:0] #xram_w_rsa_data_2 = rsa_buff [255 - byte_cnt_16] xram_w_rsa_lit = ila.store(xram, addr + byte_cnt_16, xram_w_rsa_data_1) xram_nxt = ila.choice('xram_nxt', [xram_w_rsa_lit, xram]) m.set_next('XRAM', xram_nxt) return m
def nxtStateFunction(self): ######next state function for pc m = self.model self.pc_nxt_32 = self.pc + m.const(0x1, PC_REG_BITS) self.pc_nxt_64 = self.pc + m.const(0x2, PC_REG_BITS) self.source_reg0 = m.indexIntoSGPR(self.ssrc0) self.source_reg1 = m.indexIntoSGPR(self.ssrc1) self.scc = m.indexIntoSGPR(0, False, True) self.exec = m.indexIntoSGPR(0, False, False, True) self.source_reg0_ext = m.indexIntoSGPR(self.ssrc0 + m.const(0x1)) self.source_reg1_ext = m.indexIntoSGPR(self.ssrc1 + m.const(0x1)) self.dst_reg = m.indexIntoSGPR(self.sdstSOP2) self.dst_reg_ext = m.indexIntoSGPR(self.sdstSOP2 + m.const(0x1)) self.source_reg0_long = ila.concat(self.source_reg0_ext, self.source_reg0) self.source_reg1_long = ila.concat(self.source_reg1_ext, self.source_reg1) self.dst_reg_long = ila.concat(self.dst_reg, self.dst_reg_ext) self.source_reg2_bfe = self.source_reg1[20:16] self.source_reg1_bfe = self.source_reg1[4:0] self.source_reg2_bfe_long = self.source_reg1[22:16] self.source_reg1_bfe_long = self.source_reg1[5:0] self.nxt_dst_sop2 = ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ABSDIFF_I32_OPCODE, \ ila.ite(self.source_reg0 > self.source_reg1, self.source_reg0 - self.source_reg1, self.source_reg1 - self.source_reg0), \ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ADD_I32_OPCODE, self.source_reg0 + self.source_reg1,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ADD_U32_OPCODE, self.source_reg0 + self.source_reg1,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ADDC_U32_OPCODE, self.source_reg0 + self.source_reg1 + self.scc,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_AND_B32_OPCODE, self.source_reg0 & self.source_reg1,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_AND_B64_OPCODE, self.source_reg0_long & self.source_reg1_long,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ANDN2_B32_OPCODE, self.source_reg0 & (~self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ANDN2_B64_OPCODE, self.source_reg0_long & (~self.source_reg1_long),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ASHR_I32_OPCODE, ila.ashr(self.source_reg0, self.source_reg1[4:0]),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ASHR_I64_OPCODE, ila.ashr(self.source_reg0_long, self.source_reg1_long[5:0]),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFE_I32_OPCODE, ila.ite(self.source_reg2_bfe == 0, m.const(0, SCALAR_REG_BITS), ila.ite((self.source_reg2_bfe + self.source_reg1_bfe) < 32, (self.source_reg0 << (SCALAR_REG_BITS - self.source_reg2_bfe - self.source_reg1_bfe)) >> (32 - self.source_reg2_bfe), source_reg0 >> source_reg1_bfe)),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFE_U32_OPCODE, ila.ite(self.source_reg2_bfe == 0, m.const(0, SCALAR_REG_BITS), ila.ite((self.source_reg2_bfe + self.source_reg1_bfe) < 32, (self.source_reg0 << (SCALAR_REG_BITS - self.source_reg2_bfe - self.source_reg1_bfe)) >> (32 - self.source_reg2_bfe), source_reg0 >> source_reg1_bfe)),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFE_I64_OPCODE, (self.source_reg0 >> self.source_reg1_bfe_long) & ((1 << self.source_reg2_bfe_long) - 1), \ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFE_U64_OPCODE, (self.source_reg0 >> self.source_reg1_bfe_long) & ((1 << self.source_reg2_bfe_long) - 1), \ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFM_B32_OPCODE, ((1 << self.source_reg0[4:0]) - 1) << self.source_reg1[4:0],\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFM_B64_OPCODE, ((1 << self.source_reg0_long[5:0]) - 1) << self.source_reg1_long[5:0],\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_CBRANCH_G_FORK_OPCODE, ,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_CSELECT_B32_OPCODE, ila.ite(self.scc, self.source_reg0, self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_CSELECT_B64_OPCODE, ila.ite(self.scc, self.source_reg0_long, self.source_reg1_long),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_LSHL_B32_OPCODE, self.source_reg0 << self.source_reg1[4:0],\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_LSHL_B64_OPCODE, self.source_reg0_long << self.source_reg1_long[5:0],\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_LSHR_B32_OPCODE, self.source_reg0 >> self.source_reg0[4:0],\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_LSHR_B64_OPCODE, self.source_reg0_long >> self.source_reg1_long[5:0],\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MAX_I32_OPCODE, ila.ite(self.source_reg0 > self.source_reg1, self.source_reg0, self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MAX_U32_OPCODE, ila.ite(self.source_reg0 > self.source_reg1, self.source_reg0, self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MIN_I32_OPCODE, ila.ite(self.source_reg0 < self.source_reg1, self.source_reg0, self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MIN_U32_OPCODE, ila.ite(self.source_reg0 < self.source_reg1, self.source_reg0, self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MUL_I32_OPCODE, self.source_reg0 * self.source_reg1 ,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_NAND_B32_OPCODE, ~(self.source_reg0 & self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_NAND_B64_OPCODE, ~(self.source_reg0_long & self.source_reg1_long),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_NOR_B32_OPCODE, ~(self.source_reg0 | self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_NOR_B64_OPCODE, ~(self.source_reg0_long | self.source_reg1_long),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_OR_B32_OPCODE, self.source_reg0 | self.source_reg1,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_OR_B64_OPCODE, self.source_reg0_long | self.source_reg1_long,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ORN2_B32_OPCODE, self.source_reg0 | (~self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ORN2_B64_OPCODE, self.source_reg0_long | (~self.source_reg1_long),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_SUB_I32_OPCODE, self.source_reg0 - self.source_reg1 ,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_SUB_U32_OPCODE, self.source_reg0 - self.source_reg1,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_SUBB_U32_OPCODE, self.source_reg0 - self.source_reg1 - self.scc,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_XNOR_B32_OPCODE, ~(self.source_reg0 ^ self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_XNOR_B64_OPCODE, ~(self.source_reg0_long ^ self.source_reg1_long),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_XOR_B32_OPCODE, self.source_reg0 ^ self.source_reg1,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_XOR_B64_OPCODE, self.source_reg0_long ^ self.source_reg1_long),\ self.dst_reg\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ self.nxt_dst_sop1 = ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_ABS_I32_OPCODE, ile.ite(self.source_reg0 > 0, self.source_reg0, -self.source_reg0),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_AND_SAVEEXEC_B64_OPCODE, self.exec,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_ANDN2_SAVEEXEC_B64_OPCODE, self.exec, \ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BCNT0_I32_B32_OPCODE, aux_count(self.source_reg0, m.const(0x1, 1), m.const(SCALAR_REG_BITS)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BCNT0_I32_B64_OPCODE, aux_count(self.source_reg0, m.const(0x1, 1), m.const(SCALAR_REG_BITS_LONG)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BCNT1_I32_B32_OPCODE, aux_count(self.source_reg0, m.const(0x1, 0), m.const(SCALAR_REG_BITS)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BCNT1_I32_B64_OPCODE, aux_count(self.source_reg0, m.const(0x1, 0), m.const(SCALAR_REG_BITS_LONG)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BITSET0_B32_OPCODE, aux_bit_set_zero(self.dst_reg, self.source_reg0[4:0]),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BITSET0_B64_OPCODE, aux_bit_set_zero(self.dst_reg_long, self.source_reg0_long[5:0]),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BITSET1_B32_OPCODE, aux_bit_set_one(self.dst_reg, self.source_reg0[4:0]),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BITSET1_B64_OPCODE, aux_bit_set_one(self.dst_reg_long, self.source_reg0_long[5:0]),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BREV_B32_OPCODE, aux_bit_rev(self.source_reg0, SCALAR_REG_BITS),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BREV_B64_OPCODE, aux_bit_rev(self.source_reg0_long, SCALAR_REG_BITS_LONG),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_CBRANCH_JOIN_OPCODE, #TODO ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_CMOV_B32_OPCODE, ila.ite(self.scc, self.source_reg0, ~(self.source_reg0 | self.dst_reg)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_CMOV_B64_OPCODE, ila.ite(self.scc, self.source_reg0_long, ~(self.source_reg0_long | self.dst_reg_long)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FF0_I32_B32_OPCODE, aux_ff_bit(self.source_reg0, SCALAR_REG_BITS, m.const(0x0, 1)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FF1_I32_B32_OPCODE, aux_ff_bit(self.source_reg0, SCALAR_REG_BITS, m.const(0x1, 1)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FF0_I32_B64_OPCODE, aux_ff_bit(self.source_reg0_long, SCALAR_REG_BITS_LONG, m.const(0x0, 1)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FF1_I32_B64_OPCODE, aux_ff_bit(self.source_reg0_long, SCALAR_REG_BITS_LONG, m.const(0x1, 1)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FLBIT_I32_OPCODE, aux_ff_op_bit(self.source_reg0, SCALAR_REG_BITS),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FLBIT_I32_I64_OPCODE, aux_ff_op_bit(self.source_reg0_long, SCALAR_REG_BITS_LONG),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FLBIT_I32_B32_OPCODE, aux_ff_bit_m(self.source_reg0, SCALAR_REG_BITS, m.const(0x1, 1)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FLBIT_I32_B64_OPCODE, aux_ff_bit_m(self.source_reg0_long, SCALAR_REG_BITS_LONG, m.const(0x1, 1)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_GETPC_B64_OPCODE, self.pc + m.const(0x4, SCALAR_REG_BITS_LONG) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOV_B32_OPCODE, self.source_reg0,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOV_B64_OPCODE, self.source_reg0_long,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOVRELD_B32_OPCODE, #TODO ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOVRELD_B64_OPCODE, #TODO ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOVRELS_B32_OPCODE, #TODO ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOVRELS_B64_OPCODE, #TODO ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_NAND_SAVEEXEC_B64_OPCODE, self.exec, \ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_NOR_SAVEEXEC_B64_OPCODE, self.exec,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_NOT_B32_OPCODE, ~(self.source_reg0),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_NOT_B64_OPCODE, ~(self.source_reg0_long),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_OR_SAVEEXEC_B64_OPCODE, self.exec,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_ORN2_SAVEEXEC_B64_OPCODE, self.exec ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_QUADMASK_B32_OPCODE, ila.zero_extend(aux_quadmask(self.source_reg0, SCALAR_REG_BITS)) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_QUADMASK_B64_OPCODE, ila.zero_extend(aux_quadmask(self.source_reg0_long)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_RFE_B64_OPCODE, ila.source_reg0_long,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_WQM_B32_OPCODE, ila.zero_extend(aux_quadmask(self.source_reg0, SCALAR_REG_BITS)) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_WQM_B64_OPCODE, ila.zero_extend(aux_quadmask(self.source_reg0, SCALAR_REG_BITS)) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_SEXT_I32_I8_OPCODE, ila.sign_extend(self.source_reg0[7:0]) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_SEXT_I32_I16_OPCODE, ila.sign_extend(self.source_reg0[15:0]) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_SWAPPC_B64_OPCODE, self.pc + m.const(0x4, SCALAR_REG_BITS_LONG) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOV_FED_B32_OPCODE, self.source_reg0,\ ila.ite(self,opcode_SOP1 == Encoding.SOP1_S_XOR_SAVEEXEC_B64_OPCODE, self.exec,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_XNOR_SAVEEXEC_B64_OPCODE, self.exec,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_SETPC_B64_OPCODE, self.dst_reg,\ self.dst_reg ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) self.nxt_dst_sopk = ila.ite( self.opcode_SOPK == Encoding.SOPK_S_MOVK_I32_OPCODE, ila.sign_extend(self.simm, SCALAR_REG_BITS),\ ila.ite( self.opcode_SOPK == Encoding.SOPK_S_CMOVK_I32_OPCODE, ila.ite(self.scc != m.const(0x1, 1), ila.sign_extend(self.simm, SCALAR_REG_BITS, self.dst_reg)),\ ila.ite( self.opcode_SOPK == Encoding.SOPK_S_ADDK_I32_OPCODE, self.dst_reg + sign_extend(self.simm, SCALAR_REG_BITS),\ ila.ite( self.opcode_SOPK == Encoding.SOPK_S_MULK_I32_OPCODE, self.dst_reg * sign_extend(self.simm, SCALAR_REG_BITS),\ ) ) ) )
def instructionFetch(self): self.inst = ila.load( self.mem, ila.zero_extend(self.pc[31:3], instruction_format.MEM_ADDRESS_BITS)) self.opcode = self.inst[(instruction_format.OPCODE_BIT_TOP - 1):instruction_format.OPCODE_BIT_BOT] self.fetch_expr = self.inst self.dest = self.inst[(instruction_format.DST_BIT_TOP - 1):instruction_format.DST_BIT_BOT] self.src1 = self.inst[(instruction_format.SRC0_BIT_TOP - 1):instruction_format.SRC0_BIT_BOT] self.src2 = self.inst[(instruction_format.SRC1_BIT_TOP - 1):instruction_format.SRC1_BIT_BOT] self.src3 = self.inst[(instruction_format.SRC2_BIT_TOP - 1):instruction_format.SRC2_BIT_BOT] self.baseImm = ila.sign_extend( self.inst[(instruction_format.BASE_BIT_TOP - 1):instruction_format.BASE_BIT_BOT], instruction_format.PC_BITS) #self.branchPred = self.dest #(self.predReg, self.predReg_flag) = self.indexIntoReg(self.branchPred) self.branchImm = ila.zero_extend( self.inst[(instruction_format.IMM_BIT_TOP - 1):instruction_format.IMM_BIT_BOT], instruction_format.PC_BITS) self.ldImm = ila.zero_extend( self.inst[(instruction_format.IMM_BIT_TOP - 1):instruction_format.IMM_BIT_BOT], instruction_format.PC_BITS) self.stImm = ila.zero_extend( self.inst[(instruction_format.IMM_BIT_TOP - 1):instruction_format.IMM_BIT_BOT], instruction_format.PC_BITS) self.sreg1_flag = ila.ite((self.src1 >= self.scalar_register_num) & (self.src1 < self.register_total_num), self.long_scalar_register_flag, self.scalar_register_flag) self.sreg2_flag = ila.ite((self.src2 >= self.scalar_register_num) & (self.src2 < self.register_total_num), self.long_scalar_register_flag, self.scalar_register_flag) self.sreg3_flag = ila.ite((self.src3 >= self.scalar_register_num) & (self.src3 < self.register_total_num), self.long_scalar_register_flag, self.scalar_register_flag) self.sregdest_flag = ila.ite((self.dest >= self.scalar_register_num) & (self.dest < self.register_total_num), self.long_scalar_register_flag, self.scalar_register_flag) self.ssreg1 = self.indexIntoSReg(self.src1) self.ssreg2 = self.indexIntoSReg(self.src2) self.ssreg3 = self.indexIntoSReg(self.src3) self.ssregdest = self.indexIntoSReg(self.dest) self.lsreg1 = self.indexIntoLReg(self.src1) self.lsreg2 = self.indexIntoLReg(self.src2) self.lsreg3 = self.indexIntoLReg(self.src3) self.lsregdest = self.indexIntoLReg(self.dest) self.sreg1 = ila.ite(self.sreg1_flag, self.ssreg1, self.lsreg1[instruction_format.REG_BITS - 1:0]) self.sreg2 = ila.ite(self.sreg2_flag, self.ssreg2, self.lsreg2[instruction_format.REG_BITS - 1:0]) self.sreg3 = ila.ite(self.sreg3_flag, self.ssreg3, self.lsreg3[instruction_format.REG_BITS - 1:0]) self.sregdest = ila.ite( self.sregdest_flag, self.ssregdest, self.lsregdest[instruction_format.REG_BITS - 1:0])
def createAESILA(synstates, enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response. dataout = m.reg('dataout', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) keysel = m.reg('aes_keysel', 1) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) key1 = m.reg('aes_key1', 128) # for the uinst. byte_cnt = m.reg('byte_cnt', 16) rd_data = m.reg('rd_data', 128) enc_data = m.reg('enc_data', 128) xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # decode rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40) for i in [0, 1, 2, 3]] wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40)] nopcmds = [(state == i) & (cmd != 1) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40) for i in [1, 2, 3]] m.decode_exprs = rdcmds + wrcmds + nopcmds # read commands statebyte = ila.zero_extend(state, 8) opaddrbyte = ila.readchunk('rd_addr', opaddr, 8) oplenbyte = ila.readchunk('rd_len', oplen, 8) keyselbyte = ila.zero_extend(keysel, 8) ctrbyte = ila.readchunk('rd_ctr', ctr, 8) key0byte = ila.readchunk('rd_key0', key0, 8) key1byte = ila.readchunk('rd_key1', key1, 8) dataoutnext = ila.choice('dataout', [ statebyte, opaddrbyte, oplenbyte, keyselbyte, ctrbyte, key0byte, key1byte, m.const(0, 8) ]) m.set_next('dataout', dataoutnext) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) mb_reg_wr('aes_key1', key1) # bit-level registers def bit_reg_wr(name, reg, sz): # bitwise register write assert reg.type.bitwidth == sz reg_wr = cmddata[sz - 1:0] reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) bit_reg_wr('aes_keysel', keysel, 1) # state state_next = ila.choice('state_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ila.ite(cmddata == 1, m.const(1, 2), state), ila.ite(byte_cnt + 16 < oplen, m.const(1, 2), m.const(0, 2)) ]) m.set_next('aes_state', state_next) # these are for the uinst # byte_cnt byte_cnt_inc = byte_cnt + 16 byte_cnt_rst = ila.ite(cmddata == 1, m.const(0, 16), byte_cnt) byte_cnt_nxt = ila.choice( 'byte_cnt_nxt', [m.const(0, 16), byte_cnt_inc, byte_cnt_rst, byte_cnt]) m.set_next('byte_cnt', byte_cnt_nxt) # rd_data rdblock = ila.loadblk(xram, opaddr + byte_cnt, 16) rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) m.set_next('rd_data', rd_data_nxt) # enc_data aes_key = ila.ite(keysel == 0, key0, key1) aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) m.set_next('enc_data', enc_data_nxt) # xram write xram_w_aes = ila.storeblk(xram, opaddr + byte_cnt, enc_data) xram_nxt = ila.choice('xram_nxt', xram, xram_w_aes) m.set_next('XRAM', xram_nxt) # synthesize. timefile = open('aes-times-%s.txt' % ('en' if enable_ps else 'dis'), 'wt') sim = lambda s: AES().simulate(s) for s in synstates: st = time.clock() m.synthesize(s, sim) t_elapsed = time.clock() - st print >> timefile, s print >> timefile, '%.2f' % (t_elapsed) ast = m.get_next(s) m.exportOne(ast, 'asts/%s_%s' % (s, 'en' if enable_ps else 'dis')) m.generateSimToDir('sim')
def nextStateVALUFunction(self, threadNo): m = self.model self.vsource_reg0 = ila.ite(self.vsrc0 > 255, m.indexIntoVGPR(self.vsrc0 - m.const(0x100, VECTOR_SOURCE_BIT), threadNo), m.indexIntoSGPR(self.vsrc0)) self.vsource_reg1 = m.indexIntoVGPR(self.vsrc1) self.vcc = m.indexIntoVGPR(0, 0, True) self.vsource_reg0_ext = ila.ite(self.vsrc0 > 255, m.indexIntoVGPR(self.vsrc0 + m.const(0x1, VECTOR_SOURCE_BIT) - m.const(0x100, VECTOR_SOURCE_BIT), threadNo), m.indexIntoSGPR(self.vsrc0 + m.const(0x1,1))) self.vsource_reg1_ext = m.indexIntoVGPR(self.vsrc1 + m.const(0x1, VECTOR_SOURCE_BIT - 1)) self.vdst_reg = m.indexIntoVGPR(self.vdst) self.vdst_reg_ext = m.indexIntoVGPR(self.vdst + m.const(0x1, VECTOR_SOURCE_BIT - 1)) self.vsource_reg0_long = ila.concat(self.vsource_reg0_ext, self.vsource_reg0) self.vsource_reg1_long = ila.concat(self.vsource_reg1_ext, self.vsource_reg1) self.vdst_reg_long = ila.concat(self.vdst_reg, self.vdst_reg_ext) self.nxt_dst_vop2 = ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CNDMASK_B32, ila.ite(self.vcc[threadNo] != 0, self.vsource_reg1, slef.vsource_reg0),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_READLANE_B32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_WRITELANE_B32, , \ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ADD_F32, self.vsource_reg0 + self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUB_F32, self.vsource_reg0 - self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUBREV_F32, self.vsource_reg1 - self.vsource_reg0 ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAC_LEGACY_F32, self.vsource_reg0 * self.vsource_reg1 + self.vdst_reg,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_LEGACY_F32, self.vsource_reg0 * self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_F32, self.vsource_reg0 * self.vsource_reg1.\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_I32_I24, ila.sign_extend((self.vsource_reg0[23:0] * self.vsource_reg1[23:0])[30:0], VECTOR_REG_BITS),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_HI_I32_I24, ila.sign_extend((self.vsource_reg0[23:0] * self.vsource_reg1[23:0])[47:32], VECTOR_REG_BITS),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_U32_U24, (self.vsource_reg0[23:0] * self.vsource_reg1[23:0])[31:0],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_HI_U32_U24, ila.zero_extend((self.vsource_reg0[23:0] * self.vsource_reg1[23:0])[47:32], VECTOR_REG_BITS),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MIN_LEGACY_F32, ila.ite(self.vsource_reg0 < self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAX_LEGACY_F32, ila.ite(self.vsource_reg0 >= self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MIN_F32, ila.ite(self.vsource_reg0 < self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAX_F32, ila.ite(self.vsource_reg0 > self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MIN_I32, ila.ite(self.vsource_reg0 < self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAX_I32, ila.ite(self.vsource_reg0 > self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MIN_U32, ila.ite(self.vsource_reg0 < self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAX_U32, ila.ite(self.vsource_reg0 > self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LSHR_B32, self.vsource_reg0 >> self.vsource_reg1[4:0],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LSHRREV_B32, self.vsource_reg1 >> self.vsource_reg0[4:0],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ASHR_I32, ila.ashr(self.vsource_reg0, self.vsource_reg1[4:0]),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ASHRREV_I32, ila.ashr(self.vsource_reg1, self.vsource_reg0[4:0]),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LSHL_B32, self.vsource_reg0 << self.vsource_reg1[4:0],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LSHLREV_B32, self.vsource_reg1 << self.vsource_reg0[4:0],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_AND_B32, self.vsource_reg0 & self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_OR_B32, self.vsource_reg0 | self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_XOR_B32, self.vsource_reg0 ^ self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_BFM_B32, ((1 << self.vsource_reg0[4:0]) - 1) << self.vsource_reg1[4:0],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAC_F32, self.vsource_reg0 * self.vsource_reg1 + self.vdst_reg,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MADMK_F32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MADAK_F32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_BCNT_U32_B32, aux_count(self.vsource_reg0, False ,VECTOR_REG_BITS) + self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MBCNT_LO_U32_B32, ,\ #TODO: ThreadMask ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MBCNT_HI_U32_B32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ADD_I32, self.vsource_reg0 + self.vsource_reg1 ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUB_I32, self.vsource_reg0 - self.vsource_reg1, \ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUBREV_I32, self.vsource_reg1 - self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ADDC_U32, self.vsource_reg0 + self.vsource_reg1 + self.vcc[threadNo],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUBB_U32, self.vsource_reg0 - self.vsource_reg1 - self.vcc[threadNo],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUBBREV_U32, self.vsource_reg1 - self.vsource_reg0 - self.VCC[threadNo],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LDEXP_F32, ,\#TODO:EXP ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PKACCUM_U8_F32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PKNORM_I16_F32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PKNORM_U16_F32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PKRTZ_F16_F32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PK_U16_U32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PK_I16_I32, ,\ ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
def synthesize(state, enable_ps): uc = uc8051() # create nicknames pc, iram, sp = uc.pc, uc.iram, uc.sp op0, op1, op2 = uc.op0, uc.op1, uc.op2 acc, b, dptr = uc.acc, uc.b, uc.dptr psw = uc.psw rx = uc.rx rom = uc.rom model = uc.model model.enable_parameterized_synthesis = enable_ps bv = model.const # fetch and decode. model.fetch_expr = uc.op0 # s/hand for uc.rom[uc.pc] model.decode_exprs = [uc.op0 == i for i in xrange(0x0, 0x100)] ########################### PC ############################################## def cjmp(name, cond): pc_taken = ila.choice(name + '_taken', pc_rel1, pc_rel2) pc_seq = ila.choice(name + '_seq', pc + 2, pc + 3) return ila.ite(cond, pc_taken, pc_seq) def jmppolarity(name): return ila.inrange(name, bv(0, 1), bv(1, 1)) # ajmp/acall pc_ajmp_pg1 = (pc + 2)[15:11] pc_ajmp_pg2 = ila.inrange('ajmp_page', bv(0x0, 3), bv(0x7, 3)) pc_ajmp_pg = ila.concat(pc_ajmp_pg1, pc_ajmp_pg2) pc_ajmp = ila.concat(pc_ajmp_pg, op1) # lcall/ljmp pc_ljmp = ila.choice('ljmp', [ila.concat(op2, op1), ila.concat(op1, op2)]) # ret. pc_ret = ila.choice('pc_ret', [ ila.concat(iram[sp - 1], iram[sp]), ila.concat(iram[sp], iram[sp - 1]), ila.concat(iram[sp], iram[sp + 1]), ila.concat(iram[sp + 1], iram[sp]) ]) # relative to pc pc_rel1 = ila.choice('pc_rel1_base', [pc, pc + 1, pc + 2, pc + 3 ]) + ila.sign_extend(op1, 16) pc_rel2 = ila.choice('pc_rel2_base', [pc, pc + 1, pc + 2, pc + 3 ]) + ila.sign_extend(op2, 16) # sjmp pc_sjmp = ila.choice('sjmp', pc_rel1, pc_rel2) # jb jb_bitaddr = ila.choice('jb_bitaddr', [op1, op2]) jb_bit = uc.readBit(jb_bitaddr) jx_polarity = jmppolarity('jx_polarity') pc_jb = cjmp('pc_jb', jb_bit == jx_polarity) # jc pc_jc = cjmp('pc_jc', uc.cy == jx_polarity) # jz acc_zero = acc == 0 acc_nonzero = acc != 0 jz_test = ila.choice('jz_test_polarity', acc_zero, acc_nonzero) pc_jz = cjmp('pc_jz', jz_test) # jmp pc_jmp = dptr + ila.zero_extend(acc, 16) # cjne cjne_src1 = ila.choice('cjne_src1', [acc, iram[rx[0]], iram[rx[1]]] + rx) cjne_src2 = ila.choice( 'cjne_src2', [op1, op2, uc.readDirect(ila.choice('cjne_iram_addr', [op1, op2]))]) cjne_taken = cjne_src1 != cjne_src2 pc_cjne = cjmp('pc_cjne', cjne_taken) # djnz djnz_src = ila.choice( 'djnz_src', [uc.readDirect(ila.choice('djnz_iram_src', [op1, op2]))] + rx) djnz_taken = djnz_src != 1 pc_djnz = cjmp('pc_djnz', djnz_taken) pc_choices = [ pc + 1, pc + 2, pc + 3, pc_ajmp, pc_ljmp, pc_ret, pc_sjmp, pc_jb, pc_jc, pc_jz, pc_jmp, pc_cjne, pc_djnz ] model.set_next('PC', ila.choice('pc', pc_choices)) ########################### ACC ############################################## # various sources for ALU ops. acc_src2_dir_addr = ila.choice('acc_src2_dir_addr', [op1, op2]) acc_src2_dir = ila.choice('acc_src2_dir', [uc.readDirect(acc_src2_dir_addr)] + rx) acc_src2_indir_addr = ila.choice('acc_src2_indir_addr', [rx[0], rx[1]]) acc_src2_indir = iram[acc_src2_indir_addr] src2_imm = ila.choice('src2_imm', [op1, op2]) acc_src2 = ila.choice('acc_src2', [acc_src2_dir, acc_src2_indir, src2_imm]) acc_rom_offset = ila.choice('acc_rom_offset', [dptr, pc + 1, pc + 2, pc + 3]) # the decimal adjust instruction. this is a bit of mess. # first, deal with the lower nibble acc_add_6 = (uc.ac == 1) | (acc[3:0] > 9) acc_ext9 = ila.zero_extend(acc, 9) acc_da_stage1 = ila.ite(acc_add_6, acc_ext9 + 6, acc_ext9) acc_da_cy1 = acc_da_stage1[8:8] # and then the upper nibble acc_add_60 = ((acc_da_cy1 | uc.cy) == 1) | (acc_da_stage1[7:4] > 9) acc_da_stage2 = ila.ite(acc_add_60, acc_da_stage1 + 0x60, acc_da_stage1) acc_da = acc_da_stage2[7:0] # instructions which modify the accumulator. acc_rr = ila.rrotate(acc, 1) acc_rrc = ila.rrotate(ila.concat(acc, uc.cy), 1)[8:1] acc_rl = ila.lrotate(acc, 1) acc_rlc = ila.lrotate(ila.concat(uc.cy, acc), 1)[7:0] acc_inc = acc + 1 acc_dec = acc - 1 acc_add = acc + acc_src2 acc_addc = acc + acc_src2 + ila.zero_extend(uc.cy, 8) acc_orl = acc | acc_src2 acc_anl = acc & acc_src2 acc_xrl = acc ^ acc_src2 acc_subb = acc - acc_src2 + ila.sign_extend(uc.cy, 8) acc_mov = acc_src2 acc_cpl = ~acc acc_clr = bv(0, 8) acc_rom = rom[ila.zero_extend(acc, 16) + acc_rom_offset] acc_swap = ila.concat(acc[3:0], acc[7:4]) # div. acc_div = ila.ite(b == 0, bv(0xff, 8), acc / b) b_div = ila.ite(b == 0, acc, acc % b) # mul mul_result = ila.zero_extend(acc, 16) * ila.zero_extend(b, 16) acc_mul = mul_result[7:0] b_mul = mul_result[15:8] # xchg - dir xchg_src2_dir_addr = ila.choice('xchg_src2_dir_addr', [op1, op2] + uc.rxaddr) xchg_src2_dir = uc.readDirect(xchg_src2_dir_addr) acc_xchg_dir = xchg_src2_dir # xchg - indir xchg_src2_indir_addr = ila.choice('xchg_src2_indir_addr', [rx[0], rx[1]]) xchg_src2_full_indir = iram[xchg_src2_indir_addr] xchg_src2_half_indir = ila.concat(acc[7:4], xchg_src2_full_indir[3:0]) xchg_src2_indir = ila.choice('xchg_src2_indir', [xchg_src2_full_indir, xchg_src2_half_indir]) acc_xchg_indir = xchg_src2_indir # final acc value. acc_next = ila.choice('acc_r_next', [ acc_rr, acc_rl, acc_rrc, acc_rlc, acc_inc, acc_dec, acc_add, acc_addc, acc_orl, acc_anl, acc_xrl, acc_mov, acc_rom, acc_clr, acc_subb, acc_swap, acc_cpl, acc, acc_div, acc_mul, acc_da, acc_xchg_dir, acc_xchg_indir, uc.xram_data_in ]) model.set_next('ACC', acc_next) ########################### IRAM ############################################## # instructions where the result is a direct iram address dir_src1_addr = ila.choice('dir_src1_addr', [op1, op2] + uc.rxaddr) dir_src1 = uc.readDirect(dir_src1_addr) dir_src2_iram_addr = ila.choice('dir_src2_iram_addr', [op1, op2] + uc.rxaddr) dir_src2_iram = uc.readDirect(dir_src2_iram_addr) dir_src2_indir_addr = ila.choice('dir_src2_indir_addr', [rx[0], rx[1]]) dir_src2_indir = iram[dir_src2_indir_addr] dir_src2 = ila.choice('dir_src2', [op1, op2, acc, dir_src2_iram, dir_src2_indir]) dir_inc = dir_src1 + 1 dir_dec = dir_src1 - 1 dir_orl = dir_src1 | dir_src2 dir_anl = dir_src1 & dir_src2 dir_xrl = dir_src1 ^ dir_src2 dir_mov = dir_src2 dir_result = ila.choice( 'dir_result', [dir_inc, dir_dec, dir_orl, dir_anl, dir_xrl, dir_mov]) dir_addrs = [dir_src1_addr] dir_datas = [dir_result] # write a bit. bit_src1_addr = ila.choice('bit_src1_addr', [op1, op2]) bit_src1 = uc.readBit(bit_src1_addr) wrbit_data = ila.choice( 'wrbit_data', [uc.cy, ~uc.cy, bit_src1, ~bit_src1, bv(0, 1), bv(1, 1)]) r_bit = uc.writeBit(bit_src1_addr, wrbit_data) # some instructions write their result to the carry flag; which is also the first operand. cy_orl = uc.cy | bit_src1 cy_orlc = uc.cy | ~bit_src1 cy_anl = uc.cy & bit_src1 cy_anlc = uc.cy & ~bit_src1 cy_mov = bit_src1 cy_cpl_bit = ~bit_src1 cy_cpl_c = ~uc.cy bit_cnst1 = bv(1, 1) bit_cnst0 = bv(0, 1) bit_cy = ila.choice('bit_cy', [ cy_orl, cy_anl, cy_orlc, cy_anlc, cy_cpl_c, cy_mov, cy_cpl_bit, bit_cnst1, bit_cnst0 ]) # instructions where the result is an indirect iram address. src1_indir_addr = ila.choice('src1_indir_addr', [rx[0], rx[1]]) src1_indir = iram[src1_indir_addr] src2_indir_dir_addr = ila.choice('src2_indir_dir_addr', [op1, op2]) src2_indir_dir = uc.readDirect(src2_indir_dir_addr) src2_indir = ila.choice('src2_indir', [op1, op2, acc, src2_indir_dir]) src1_indir_inc = src1_indir + 1 src1_indir_dec = src1_indir - 1 src1_indir_mov = src2_indir src1_indir_result = ila.choice( 'src1_indir_result', [src1_indir_inc, src1_indir_dec, src1_indir_mov]) indir_addrs = [src1_indir_addr] # indirect write addr indir_datas = [src1_indir_result] # and data. # calls pc_topush = ila.choice('pc_topush', [pc + 1, pc + 2, pc + 3]) pc_topush_lo = pc_topush[7:0] pc_topush_hi = pc_topush[15:8] pc_topush_0 = ila.choice('pc_topush_endianess', [pc_topush_lo, pc_topush_hi]) pc_topush_1 = ila.choice('pc_topush_endianess', [pc_topush_hi, pc_topush_lo]) pc_push_addr = ila.choice('pc_push_addr', [sp, sp + 1]) iram_call = ila.store(ila.store(iram, pc_push_addr, pc_topush_0), pc_push_addr + 1, pc_topush_1) # push or pop instructions. stk_iram_addr = ila.choice('stk_iram_addr', [sp, sp + 1, sp - 1]) stk_src_dir_addr = ila.choice('stk_src_dir_addr', [op1, op2]) stk_src_dir = uc.readDirect(stk_src_dir_addr) stk_src = ila.choice('stk_src', [stk_src_dir, acc]) sp_pushpop = ila.choice('sp_pushpop', sp + 1, sp - 1) indir_addrs.append(stk_iram_addr) indir_datas.append(stk_src) stk_data = ila.choice('stk_data', [iram[sp], iram[sp + 1], iram[sp - 1]]) dir_addrs.append(stk_src_dir_addr) dir_datas.append(stk_data) r_pop = uc.writeDirect(stk_src_dir_addr, stk_data) sp_pop = ila.ite(stk_src_dir_addr == bv(0x81, 8), r_pop.sp, sp_pushpop) # exchanges; part of this implemented above in acc section. dir_addrs.append(xchg_src2_dir_addr) dir_datas.append(acc) xchg_src1_half_indir = ila.concat(xchg_src2_full_indir[7:4], acc[3:0]) xchg_src1_indir = ila.choice('xchg_src1', [xchg_src1_half_indir, acc]) indir_addrs.append(xchg_src2_indir_addr) indir_datas.append(xchg_src1_indir) # final indirect writes. iram_indir = ila.store(iram, ila.choice('iram_indir', indir_addrs), ila.choice('iram_indir', indir_datas)) # final direct writes. assert len(dir_addrs) == len(dir_datas) r_dir = uc.writeDirect(ila.choice('iram_dir', dir_addrs), ila.choice('iram_dir', dir_datas)) # set the next iram. iram_next = ila.choice( 'iram_result', [iram, iram_indir, iram_call, r_dir.iram, r_bit.iram]) model.set_next('IRAM', iram_next) ########################### PSW ############################################## cjne_cy = ila.ite(cjne_src1 < cjne_src2, bv(1, 1), bv(0, 1)) # muldiv div_ov = ila.ite(b == 0, bv(1, 1), bv(0, 1)) mul_ov = ila.ite(b_mul != 0, bv(1, 1), bv(0, 1)) # da acc_da_cy2 = acc_da_stage2[8:8] acc_da_cy = acc_da_cy2 | acc_da_cy1 | uc.cy # alu alu_cy_in = ila.choice('alu_cy_in', [uc.cy, bv(0, 1)]) alu_cy_5b = ila.choice( 'alu_cy_5b', [ila.zero_extend(alu_cy_in, 5), ila.sign_extend(alu_cy_in, 5)]) alu_src1_lo_5b = ila.zero_extend(acc[3:0], 5) alu_src2_lo_5b = ila.zero_extend(acc_src2[3:0], 5) alu_ac_add = (alu_src1_lo_5b + alu_src2_lo_5b + alu_cy_5b)[4:4] alu_ac_sub = ila.ite(alu_src1_lo_5b < (alu_src2_lo_5b + alu_cy_5b), bv(1, 1), bv(0, 1)) alu_ac = ila.choice('alu_ac', [alu_ac_add, alu_ac_sub]) alu_src1_sext = ila.sign_extend(acc, 9) alu_src2_sext = ila.sign_extend(acc_src2, 9) alu_src1_zext = ila.zero_extend(acc, 9) alu_src2_zext = ila.zero_extend(acc_src2, 9) alu_cy_9b_sext = ila.sign_extend(alu_cy_in, 9) alu_cy_9b_zext = ila.zero_extend(alu_cy_in, 9) alu_cy_9b = ila.choice('alu_cy_9b', [alu_cy_9b_zext, alu_cy_9b_sext]) alu_zext_9b_sum = alu_src1_zext + alu_src2_zext + alu_cy_9b alu_cy_add = alu_zext_9b_sum[8:8] alu_cy_sub1 = ila.ite(alu_src1_zext < (alu_src2_zext + alu_cy_9b), bv(1, 1), bv(0, 1)) alu_cy_sub2 = ila.ite(acc < (acc_src2 + ila.zero_extend(uc.cy, 8)), bv(1, 1), bv(0, 1)) alu_cy = ila.choice('alu_cy', [alu_cy_add, alu_cy_sub1, alu_cy_sub2]) alu_ov_9b_src1 = ila.choice('alu_ov_9b_src1', [alu_src1_sext, alu_src1_zext]) alu_ov_9b_src2 = ila.choice('alu_ov_9b_src2', [alu_src2_sext, alu_src2_zext]) alu_9b_add = alu_ov_9b_src1 + alu_ov_9b_src2 + alu_cy_9b alu_9b_sub = alu_ov_9b_src1 - alu_ov_9b_src2 + alu_cy_9b alu_9b_res = ila.choice('alu_9b_res', [alu_9b_add, alu_9b_sub]) alu_ov = ila.ite(alu_9b_res[8:8] != alu_9b_res[7:7], bv(1, 1), bv(0, 1)) acc_cy = ila.choice('acc_cy', [uc.cy, acc[0:0], acc[7:7], alu_cy]) acc_ac = ila.choice('acc_ac', [uc.ac, alu_ac]) acc_ov = ila.choice('acc_ov', [uc.ov, alu_ov]) psw_bit = ila.concat(bit_cy, psw[6:0]) psw_cjne = ila.concat(cjne_cy, psw[6:0]) psw_div = ila.concat(bv(0, 1), ila.concat(psw[6:3], ila.concat(div_ov, psw[1:0]))) psw_mul = ila.concat(bv(0, 1), ila.concat(psw[6:3], ila.concat(mul_ov, psw[1:0]))) psw_da = ila.concat(acc_da_cy, psw[6:0]) psw_acc = ila.concat( acc_cy, ila.concat(acc_ac, ila.concat(psw[5:3], ila.concat(acc_ov, psw[1:0])))) psw_next = ila.choice('psw_next', [ r_dir.psw, r_bit.psw, psw_cjne, psw_bit, psw_div, psw_mul, psw_da, psw_acc, psw ]) model.set_next('PSW', psw_next) ########################### SP ############################################## sp_next = ila.choice('sp_next', [ sp + 2, sp + 1, sp - 1, sp - 2, sp, sp_pop, r_pop.sp, r_dir.sp, r_bit.sp ]) model.set_next('SP', sp_next) ########################### DPTR ############################################## mov_dptr = ila.choice( 'mov_dptr', [ila.concat(op1, op2), ila.concat(op2, op1)]) inc_dptr = dptr + 1 dptr_n1 = ila.choice('next_dptr', [mov_dptr, inc_dptr, dptr]) dpl_n1 = dptr[7:0] dph_n1 = dptr[15:8] dpl_next = ila.choice('dpl_next', [dpl_n1, r_dir.dpl, r_bit.dpl, uc.dpl]) dph_next = ila.choice('dph_next', [dph_n1, r_dir.dph, r_bit.dph, uc.dph]) model.set_next('DPL', dpl_next) model.set_next('DPH', dph_next) ########################### B ################################################# b_next = ila.choice('b_next', [b_mul, b_div, r_bit.b, r_dir.b, uc.b]) model.set_next('B', b_next) ########################## XRAM ############################################### xram_addr_rx = ila.concat(bv(0, 8), ila.choice('lsb_xram_addr', [rx[0], rx[1]])) xram_addr_next = ila.choice('xram_addr', [xram_addr_rx, dptr, uc.xram_addr, bv(0, 16)]) model.set_next('XRAM_ADDR', xram_addr_next) xram_data_out_next = ila.choice('xram_data_out', [bv(0, 8), acc]) model.set_next('XRAM_DATA_OUT', xram_data_out_next) ########################## SFRS ############################################### sfrs = [ 'p0', 'p1', 'p2', 'p3', 'pcon', 'tcon', 'tmod', 'tl0', 'th0', 'tl1', 'th1', 'scon', 'sbuf', 'ie', 'ip' ] for s in sfrs: sfr_next = ila.choice( s + '_next', [getattr(r_bit, s), getattr(r_dir, s), getattr(uc, s)]) model.set_next(s.upper(), sfr_next) for s in state: print s st = time.clock() model.synthesize(s, eval8051) t_elapsed = time.clock() - st ast = model.get_next(s) print 'time: %.2f' % t_elapsed model.exportOne(ast, 'asts/%s_%s' % (s, 'en' if enable_ps else 'dis'))
def createAESILA(enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response. dataout = m.reg('dataout', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) keysel = m.reg('aes_keysel', 1) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) key1 = m.reg('aes_key1', 128) # for the uinst. xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # decode rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40) for i in [0, 1, 2, 3]] wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40)] nopcmds = [ ((state != 0) & (cmd != 1)) | ((state == 0) & (cmd != 1) & (cmd != 2)) ] m.decode_exprs = rdcmds + wrcmds + nopcmds # read commands statebyte = ila.zero_extend(state, 8) opaddrbyte = ila.readchunk('rd_addr', opaddr, 8) oplenbyte = ila.readchunk('rd_len', oplen, 8) keyselbyte = ila.zero_extend(keysel, 8) ctrbyte = ila.readchunk('rd_ctr', ctr, 8) key0byte = ila.readchunk('rd_key0', key0, 8) key1byte = ila.readchunk('rd_key1', key1, 8) dataoutnext = ila.choice('dataout', [ statebyte, opaddrbyte, oplenbyte, keyselbyte, ctrbyte, key0byte, key1byte, m.const(0, 8) ]) m.set_next('dataout', dataoutnext) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) mb_reg_wr('aes_key1', key1) # bit-level registers def bit_reg_wr(name, reg, sz): # bitwise register write assert reg.type.bitwidth == sz reg_wr = cmddata[sz - 1:0] reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) bit_reg_wr('aes_keysel', keysel, 1) # these are for the uinst um = m.add_microabstraction('aes_compute', state != 0) # read data rd_data = um.reg('rd_data', 128) enc_data = um.reg('enc_data', 128) byte_cnt = um.reg('byte_cnt', 4) oped_byte_cnt = um.reg('oped_byte_cnt', 16) blk_cnt = um.reg('blk_cnt', 16) um.set_init('byte_cnt', um.const(0, 4)) um.set_init('blk_cnt', um.const(0, 16)) um.set_init('oped_byte_cnt', um.const(0, 16)) uxram = m.getmem('XRAM') byte_cnt_16b = ila.zero_extend(byte_cnt, 16) um.fetch_expr = state um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16) for i in [1, 2, 3]] usim = lambda s: AESmicro().simMicro(s) # byte_cnt byte_cnt_inc = byte_cnt + 1 byte_cnt_buf = ila.choice('byte_cnt_buf', [byte_cnt_inc, byte_cnt]) byte_cnt_nxt = ila.choice( 'byte_cnt_nxt', [byte_cnt_inc, m.const(0, 4), byte_cnt]) um.set_next('byte_cnt', byte_cnt_nxt) # oped_byte_cnt oped_byte_cnt_inc = oped_byte_cnt + 16 oped_byte_cnt_nxt = ila.choice( 'oped_byte_cnt_nxt', [m.const(0, 16), oped_byte_cnt, oped_byte_cnt_inc]) um.set_next('oped_byte_cnt', oped_byte_cnt_nxt) # blk_cnt blk_cnt_inc = blk_cnt + 16 more_blocks = (oped_byte_cnt_inc < oplen) blk_cnt_nxt = ila.choice('blk_cnt_nxt', [ m.const(0, 16), blk_cnt, blk_cnt_inc, ila.ite(more_blocks, blk_cnt_inc, blk_cnt) ]) um.set_next('blk_cnt', blk_cnt_nxt) # ustate ustate = um.getreg('aes_state') ustate_nxt = ila.choice('ustate_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ustate, ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)) ]) um.set_next('aes_state', ustate_nxt) # rd_data rdblock = ila.writechunk("rd_data_chunk", rd_data, ila.load(uxram, opaddr + blk_cnt + byte_cnt_16b)) rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) um.set_next('rd_data', rd_data_nxt) # enc_data aes_key = ila.ite(keysel == 0, key0, key1) aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) um.set_next('enc_data', enc_data_nxt) #print um.get_next('enc_data') # xram write xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8) xram_w_addr = opaddr + blk_cnt + byte_cnt_16b xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data) xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes) um.set_next('XRAM', xram_nxt) suffix = 'en' if enable_ps else 'dis' timefile = open('aes-times-%s.txt' % suffix, 'wt') t_elapsed = 0 # micro-synthesis for s in [ 'XRAM', 'aes_state', 'byte_cnt', 'blk_cnt', 'oped_byte_cnt', 'rd_data' ]: t_elapsed = 0 st = time.clock() um.synthesize(s, usim) dt = time.clock() - st t_elapsed += dt print >> timefile, '%s %.2f' % ('u_' + s, dt) print '%s: %s' % (s, str(um.get_next(s))) ast = um.get_next(s) m.exportOne(ast, 'asts/u_%s_%s' % (s, suffix)) sim = lambda s: AESmacro().simMacro(s) # state state_next = ila.choice( 'state_next', [state, ila.ite(cmddata == 1, m.const(1, 2), state)]) m.set_next('aes_state', state_next) # xram m.set_next('XRAM', xram) # synthesize. for s in [ 'aes_state', 'aes_addr', 'aes_len', 'aes_keysel', 'aes_ctr', 'aes_key0', 'aes_key1', 'dataout' ]: st = time.clock() m.synthesize(s, sim) dt = time.clock() - st t_elapsed += dt print >> timefile, '%s %.2f' % (s, dt) ast = m.get_next(s) print '%s: %s' % (s, str(ast)) m.exportOne(ast, 'asts/%s_%s' % (s, suffix)) # connect to the uinst m.connect_microabstraction('aes_state', um) m.connect_microabstraction('XRAM', um) print 'total time: %.2f' % t_elapsed #print 'aes_state: %s' % str(m.get_next('aes_state')) #print 'XRAM: %s' % str(m.get_next('XRAM')) #m.generateSim('gen/aes_sim.hpp') m.generateSimToDir('sim')
def createAESILA(enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) # for the uinst. xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([cmd, cmdaddr, cmddata ]) # actually, the equivelant instruction m.fetch_valid = (cmd == 2) # when write to some addresses # decode wrcmds = [(cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff30)] # m.decode_exprs = wrcmds um = m.add_microabstraction('aes_compute', state != 0) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) # state state_next = ila.choice( 'state_next', [state, ila.ite(cmddata == 1, m.const(1, 2), state)]) m.set_next('aes_state', state_next) # xram m.set_next('XRAM', xram) ################################ # Micro-ILA ################################ # read data rd_data = um.reg('rd_data', 128) enc_data = um.reg('enc_data', 128) byte_cnt = um.reg('byte_cnt', 4) oped_byte_cnt = um.reg('oped_byte_cnt', 16) blk_cnt = um.reg('blk_cnt', 16) aes_time = um.reg('aes_time', 5) uaes_ctr = um.reg('uaes_ctr', 128) # change 1 um.set_init('byte_cnt', um.const(0, 4)) um.set_init('blk_cnt', um.const(0, 16)) um.set_init('oped_byte_cnt', um.const(0, 16)) um.set_init('aes_time', um.const(0, 5)) um.set_init('uaes_ctr', m.getreg('aes_ctr')) # change 2 uxram = m.getmem('XRAM') byte_cnt_16b = ila.zero_extend(byte_cnt, 16) um.fetch_expr = state um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16) for i in [1, 2, 3]] # Decode Expressionss # byte_cnt byte_cnt_inc = byte_cnt + 1 byte_cnt_nxt = ila.choice( 'byte_cnt_nxt', [m.const(0, 4), byte_cnt_inc, byte_cnt]) # 0, +1, NC um.set_next('byte_cnt', byte_cnt_nxt) # oped_byte_cnt oped_byte_cnt_inc = oped_byte_cnt + 16 oped_byte_cnt_nxt = ila.choice( 'oped_byte_cnt_nxt', [m.const(0, 16), oped_byte_cnt_inc, oped_byte_cnt]) # 0, +16, NC um.set_next('oped_byte_cnt', oped_byte_cnt_nxt) # blk_cnt blk_cnt_inc = blk_cnt + 16 more_blocks = (oped_byte_cnt_inc < oplen) blk_cnt_nxt = ila.choice('blk_cnt_nxt', [ m.const(0, 16), blk_cnt, blk_cnt_inc, ila.ite(more_blocks, blk_cnt_inc, blk_cnt) ]) um.set_next('blk_cnt', blk_cnt_nxt) aes_time_inc = aes_time + 1 aes_time_ov = aes_time == m.const(31, 5) aes_time_nxt_c = ila.ite(aes_time_ov, aes_time, aes_time_inc) aes_time_nxt = ila.choice( "aes_timeC", m.const(0, 5), aes_time_nxt_c, ila.ite(more_blocks, m.const(0, 5), aes_time_nxt_c)) aes_time_enough = aes_time > m.const(10, 5) um.set_next('aes_time', aes_time_nxt) # change 3 um.set_next( 'uaes_ctr', ila.choice( 'uaes_ctr_nxt', uaes_ctr, ila.ite( more_blocks, uaes_ctr + ila.inrange('addvalue', um.const(1, 128), um.const(128, 128)), uaes_ctr), ctr)) # ustate ustate = um.getreg('aes_state') ustate_nxt = ila.choice('ustate_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ustate, ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)), ila.ite(aes_time_enough, m.const(3, 2), m.const(2, 2)) ]) # change 4 um.set_next('aes_state', ustate_nxt) # rd_data rdblock = ila.writechunk("rd_data_chunk", rd_data, ila.load(uxram, opaddr + blk_cnt + byte_cnt_16b)) # rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) um.set_next('rd_data', rd_data_nxt) # enc_data aes_key = key0 aes_ctr = ila.choice('ctr', uaes_ctr, ctr + ila.zero_extend(blk_cnt, 128)) aes_enc_data = ila.appfun(aes, [aes_ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) um.set_next('enc_data', enc_data_nxt) #print um.get_next('enc_data') # xram write xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8) xram_w_addr = opaddr + blk_cnt + byte_cnt_16b xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data) xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes) um.set_next('XRAM', xram_nxt) return m, um