def sliceSelectOne (modCase): idx = seqs[modCase] if modCase == gb.RAM_size - 1: return ila.load (gb.RAM[idx], gb.RAM_x - gb.RAM_x_1) else: return ila.ite (start == modCase, ila.load (gb.RAM[idx], gb.RAM_x - gb.RAM_x_1), sliceSelectOne (modCase + 1))
def instructionFetch(self): self.inst_a = ila.load( self.mem, ila.zero_extend(self.pc_a[31:2], instruction_format.MEM_ADDRESS_BITS)) self.inst_b = ila.load( self.mem, ila.zero_extend(self.pc_b[31:2], instruction_format.MEM_ADDRESS_BITS)) self.opcode_a = self.inst_a[(instruction_format.OPCODE_BIT_TOP - 1):instruction_format.OPCODE_BIT_BOT] self.opcode_b = self.inst_b[(instruction_format.OPCODE_BIT_TOP - 1):instruction_format.OPCODE_BIT_BOT]
def instructionFetch(self): self.inst = ila.load( self.mem, ila.zero_extend(self.pc[31:2], instruction_format.MEM_ADDRESS_BITS)) self.opcode = self.inst[(instruction_format.OPCODE_BIT_TOP - 1):instruction_format.OPCODE_BIT_BOT] self.fetch_expr = self.inst self.dest = self.inst[(instruction_format.DST_BIT_TOP - 1):instruction_format.DST_BIT_BOT] self.src1 = self.inst[(instruction_format.SRC0_BIT_TOP - 1):instruction_format.SRC0_BIT_BOT] self.src2 = self.inst[(instruction_format.SRC1_BIT_TOP - 1):instruction_format.SRC1_BIT_BOT] self.src3 = self.inst[(instruction_format.SRC2_BIT_TOP - 1):instruction_format.SRC2_BIT_BOT] self.baseImm = ila.sign_extend( self.inst[(instruction_format.BASE_BIT_TOP - 1):instruction_format.BASE_BIT_BOT], instruction_format.PC_BITS) self.branchPred = self.dest self.predReg = self.indexIntoReg(self.branchPred) self.branchImm = ila.zero_extend( self.inst[(instruction_format.IMM_BIT_TOP - 1):instruction_format.IMM_BIT_BOT], instruction_format.PC_BITS) self.sreg1 = self.indexIntoReg(self.src1) self.sreg2 = self.indexIntoReg(self.src2) self.sreg3 = self.indexIntoReg(self.src3) self.sregdest = self.indexIntoReg(self.dest)
def sreg_nxt(self, regNo): sreg1 = self.indexToSGPR(self.rrSrc1) sreg2 = self.indexToSGPR(self.rrSrc2) #load instruction addr = self.indexToSGPR(self.memPtr) + ila.sign_extend( self.memOffSet, SCALAR_REG_BITS) load_val = ila.load(self.mem, ila.zero_extend(addr[31:2], MEM_ADDRESS_BITS)) return ila.ite(self.dest == regNo,\ ila.ite(self.isRegReg, ila.ite(self.rrType == self.model.const(0b000, 3), ila.ite(self.rrOpcode == NyEncoding.ADD_I, sreg1 + sreg2, ila.ite(self.rrOpcode == NyEncoding.SUB_I, sreg1 - sreg2, ila.ite(self.rrOpcode == NyEncoding.AND, sreg1 & sreg2, ila.ite(self.rrOpcode == NyEncoding.OR, sreg1 | sreg2, ila.ite(self.rrOpcode == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))) , self.scalar_registers[regNo]),\ ila.ite(self.isImmediate, ila.ite(self.immType == self.model.const(0b00, 2), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))),\ ila.ite(self.immType == self.model.const(0b10, 2), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))),\ self.scalar_registers[regNo])),\ ila.ite(self.isLoad == self.model.const(0b1, 1), self.scalar_registers[regNo], self.scalar_registers[regNo]))),\ self.scalar_registers[regNo])
def instructionFetch(self): self.instruction = ila.load( self.mem, ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS)) self.isBranch = (self.instruction[31:28] == self.model.const( 0b1111, 4)) self.branchOP = self.instruction[27:25] self.branchOffsetA = self.instruction[24:5] self.branchSrc = self.instruction[4:0] self.branchOffsetB = self.instruction[24:0] self.isRegReg = (self.instruction[31:29] == self.model.const(0b110, 3)) self.rrType = self.instruction[28:26] self.rrOpcode = self.instruction[25:20] self.rrSrc2 = self.instruction[19:15] self.rrMask = self.instruction[14:10] self.rrDest = self.instruction[9:5] self.rrSrc1 = self.instruction[4:0] self.isImmediate = (self.instruction[31] == self.model.const(0b0, 1)) self.immType = self.instruction[30:29] self.immOpcode = self.instruction[28:24] self.immA = ila.zero_extend(self.instruction[23:15], SCALAR_REG_BITS) self.immB = ila.zero_extend(self.instruction[23:10], SCALAR_REG_BITS) self.immCup = self.instruction[23:10] self.immClow = self.instruction[4:0] self.immDest = self.instruction[9:5] self.immMask = self.instruction[14:10] self.imm = ila.ite( self.immType[1] == self.model.const(0b0, 1), ila.zero_extend(self.immB, SCALAR_REG_BITS), ila.ite( self.immType == self.model.const(0b10, 2), ila.zero_extend(ila.concat(self.immCup, self.immClow), SCALAR_REG_BITS), ila.ite(self.immType == self.model.const(0b11, 2), ila.zero_extend(self.immA, SCALAR_REG_BITS), ila.zero_extend(self.immA, SCALAR_REG_BITS)))) self.isMem = (self.instruction[31:30] == self.model.const(0b10, 2)) self.isLoad = self.instruction[29] self.memOpcode = self.instruction[28:25] self.memOffSetA = self.instruction[24:15] self.memOffSetB = self.instruction[24:10] self.memMask = self.instruction[14:10] self.memDest = self.instruction[9:5] self.memSrc = self.instruction[9:5] self.memPtr = self.instruction[4:0] self.memOffSet = ila.ite( self.memOpcode == self.model.const(0b1000, 4), ila.sign_extend(self.memOffSetA, SCALAR_REG_BITS), ila.ite(self.memOpcode == self.model.const(0b1110, 4), ila.sign_extend(self.memOffSetA, SCALAR_REG_BITS), ila.sign_extend(self.memOffSetB, SCALAR_REG_BITS))) self.isMask = ( ((self.rrType == self.model.const(0b010, 3)) | (self.rrType == self.model.const(0b101, 3))) & self.isRegReg ) #need rewrite self.dest = self.instruction[9:5]
def instructionFetch(self): self.inst = ila.load(self.mem, ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS)) self.opcode = self.inst[31:22] self.fetch_expr = self.inst self.dest = self.inst[21:17] self.src1 = self.inst[16:12] self.src2 = self.inst[11:7] self.src3 = self.inst[6:2] self.branchPC = ila.zero_extend(self.inst[21:0], PC_BITS) self.sreg1 = self.indexIntoReg(self.src1) self.sreg2 = self.indexIntoReg(self.src2) self.sreg3 = self.indexIntoReg(self.src3) self.sregdest = self.indexIntoReg(self.dest)
def get_reg_choices(reg): rs1_val = rm.indexIntoGPR(rm.rs1) rs2_val = rm.indexIntoGPR(rm.rs2) rd_val = rm.indexIntoGPR(rm.rd) rs_val = ila.choice('rs_sel', rs1_val, rs2_val) shamt = ila.choice('shift_amout', rs2_val[4:0], rm.inst[24:20]) rs2_comb = ila.choice('rs2_or_immed', rs2_val, ila.zero_extend(rm.immI, 32), ila.sign_extend(rm.immI, 32)) addr = rs1_val + rm.immI lw_val = ila.load(rm.mem, zext(addr[31:2])) load_val = getSlice(lw_val, addr[1:0]) #load_dw = ila.loadblk(rm.mem, zext(addr[31:2]), 2 ) return ila.choice( "x%d_next" % reg, [ rm.generalRegList[ reg], # Remain the Same regardless of RD (i.e. S/SB instructions) ila.ite( rm.rd == reg, # Is this the destination register? ila.choice( "x%d" % reg, [ rs1_val + rs2_comb, # RS1 + RS2 rs1_val - rs2_comb, # RS1 - RS2 rs1_val & rs2_comb, # AND rs1_val | rs2_comb, # OR rs1_val ^ rs2_comb, # XOR ila.ite( ila.slt(rs1_val, rs2_comb), # SLT bv(1), bv(0)), ila.ite(ila.slt(rs1_val, rs2_comb), bv(0), bv(1)), ila.ite(rs1_val < rs2_comb, bv(1), bv(0)), rs1_val << zext(shamt), # sll rs1_val >> zext(shamt), # srl ila.ashr(rs1_val, zext(shamt)), # sra rm.immU, # LUI rm.immU + rm.pc, # AUIPC rm.pc + bv(4), # JAL/JALR load_val #load_dw ]), rm.generalRegList[reg]) # Remain the same ])
def instructionFetch(self): self.inst = ila.load(self.mem, ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS)) self.opcode = self.inst[(REG_BITS - 1):OPCODE_BIT] self.fetch_expr = self.inst self.dest = self.inst[(OPCODE_BIT - 1):DST_BIT] self.src1 = self.inst[(DST_BIT - 1):SRC0_BIT] self.src2 = self.inst[(SRC0_BIT - 1):SRC1_BIT] self.src3 = self.inst[(SRC1_BIT - 1):SRC2_BIT] self.baseImm = ila.sign_extend(self.inst[(BASE_BIT - 1):0], PC_BITS) self.branchPred = self.dest self.predReg = self.indexIntoReg(self.branchPred) self.branchImm = ila.zero_extend(self.inst[(DST_BIT - 1):BASE_BIT], PC_BITS) self.sreg1 = self.indexIntoReg(self.src1) self.sreg2 = self.indexIntoReg(self.src2) self.sreg3 = self.indexIntoReg(self.src3) self.sregdest = self.indexIntoReg(self.dest)
def InstFetch(self): #self.inst = self.model.inp('inst',32) #self.fetch_expr = self.inst inst = ila.load(self.mem, ila.zero_extend(self.pc[31:2], 32)) #ila.zero_extend(self.pc[31:2], 32)) self.inst = inst self.fetch_expr = self.inst self.opcode = self.inst[6:0] self.rd = self.inst[11:7] self.rs1 = self.inst[19:15] self.rs2 = self.inst[24:20] self.funct3 = self.inst[14:12] self.funct7 = self.inst[31:25] self.funct12= self.inst[31:20] self.immI = ila.sign_extend( inst[31:20], XLEN) self.immS = ila.sign_extend( ila.concat( [inst[31:25], inst[11:7]] ), XLEN ) self.immB = ila.sign_extend( ila.concat( [inst[31],inst[7], inst[30:25], inst[11:8], const(0,1) ] ) , XLEN ) self.immU = ila.concat( [inst[31:12],const(0,12)] ) self.immJ = ila.sign_extend( ila.concat( [inst[31], inst[19:12], inst[20], inst[30:21], const(0,1) ] ) , XLEN) self.csr_index = self.inst[31:20]
def createAESILA(enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response. dataout = m.reg('dataout', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) keysel = m.reg('aes_keysel', 1) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) key1 = m.reg('aes_key1', 128) # for the uinst. xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # decode rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40) for i in [0, 1, 2, 3]] wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40)] nopcmds = [ ((state != 0) & (cmd != 1)) | ((state == 0) & (cmd != 1) & (cmd != 2)) ] m.decode_exprs = rdcmds + wrcmds + nopcmds # read commands statebyte = ila.zero_extend(state, 8) opaddrbyte = ila.readchunk('rd_addr', opaddr, 8) oplenbyte = ila.readchunk('rd_len', oplen, 8) keyselbyte = ila.zero_extend(keysel, 8) ctrbyte = ila.readchunk('rd_ctr', ctr, 8) key0byte = ila.readchunk('rd_key0', key0, 8) key1byte = ila.readchunk('rd_key1', key1, 8) dataoutnext = ila.choice('dataout', [ statebyte, opaddrbyte, oplenbyte, keyselbyte, ctrbyte, key0byte, key1byte, m.const(0, 8) ]) m.set_next('dataout', dataoutnext) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) mb_reg_wr('aes_key1', key1) # bit-level registers def bit_reg_wr(name, reg, sz): # bitwise register write assert reg.type.bitwidth == sz reg_wr = cmddata[sz - 1:0] reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) bit_reg_wr('aes_keysel', keysel, 1) # these are for the uinst um = m.add_microabstraction('aes_compute', state != 0) # read data rd_data = um.reg('rd_data', 128) enc_data = um.reg('enc_data', 128) byte_cnt = um.reg('byte_cnt', 4) oped_byte_cnt = um.reg('oped_byte_cnt', 16) blk_cnt = um.reg('blk_cnt', 16) um.set_init('byte_cnt', um.const(0, 4)) um.set_init('blk_cnt', um.const(0, 16)) um.set_init('oped_byte_cnt', um.const(0, 16)) uxram = m.getmem('XRAM') byte_cnt_16b = ila.zero_extend(byte_cnt, 16) um.fetch_expr = state um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16) for i in [1, 2, 3]] usim = lambda s: AESmicro().simMicro(s) # byte_cnt byte_cnt_inc = byte_cnt + 1 byte_cnt_buf = ila.choice('byte_cnt_buf', [byte_cnt_inc, byte_cnt]) byte_cnt_nxt = ila.choice( 'byte_cnt_nxt', [byte_cnt_inc, m.const(0, 4), byte_cnt]) um.set_next('byte_cnt', byte_cnt_nxt) # oped_byte_cnt oped_byte_cnt_inc = oped_byte_cnt + 16 oped_byte_cnt_nxt = ila.choice( 'oped_byte_cnt_nxt', [m.const(0, 16), oped_byte_cnt, oped_byte_cnt_inc]) um.set_next('oped_byte_cnt', oped_byte_cnt_nxt) # blk_cnt blk_cnt_inc = blk_cnt + 16 more_blocks = (oped_byte_cnt_inc < oplen) blk_cnt_nxt = ila.choice('blk_cnt_nxt', [ m.const(0, 16), blk_cnt, blk_cnt_inc, ila.ite(more_blocks, blk_cnt_inc, blk_cnt) ]) um.set_next('blk_cnt', blk_cnt_nxt) # ustate ustate = um.getreg('aes_state') ustate_nxt = ila.choice('ustate_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ustate, ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)) ]) um.set_next('aes_state', ustate_nxt) # rd_data rdblock = ila.writechunk("rd_data_chunk", rd_data, ila.load(uxram, opaddr + blk_cnt + byte_cnt_16b)) rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) um.set_next('rd_data', rd_data_nxt) # enc_data aes_key = ila.ite(keysel == 0, key0, key1) aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) um.set_next('enc_data', enc_data_nxt) #print um.get_next('enc_data') # xram write xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8) xram_w_addr = opaddr + blk_cnt + byte_cnt_16b xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data) xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes) um.set_next('XRAM', xram_nxt) suffix = 'en' if enable_ps else 'dis' timefile = open('aes-times-%s.txt' % suffix, 'wt') t_elapsed = 0 # micro-synthesis for s in [ 'XRAM', 'aes_state', 'byte_cnt', 'blk_cnt', 'oped_byte_cnt', 'rd_data' ]: t_elapsed = 0 st = time.clock() um.synthesize(s, usim) dt = time.clock() - st t_elapsed += dt print >> timefile, '%s %.2f' % ('u_' + s, dt) print '%s: %s' % (s, str(um.get_next(s))) ast = um.get_next(s) m.exportOne(ast, 'asts/u_%s_%s' % (s, suffix)) sim = lambda s: AESmacro().simMacro(s) # state state_next = ila.choice( 'state_next', [state, ila.ite(cmddata == 1, m.const(1, 2), state)]) m.set_next('aes_state', state_next) # xram m.set_next('XRAM', xram) # synthesize. for s in [ 'aes_state', 'aes_addr', 'aes_len', 'aes_keysel', 'aes_ctr', 'aes_key0', 'aes_key1', 'dataout' ]: st = time.clock() m.synthesize(s, sim) dt = time.clock() - st t_elapsed += dt print >> timefile, '%s %.2f' % (s, dt) ast = m.get_next(s) print '%s: %s' % (s, str(ast)) m.exportOne(ast, 'asts/%s_%s' % (s, suffix)) # connect to the uinst m.connect_microabstraction('aes_state', um) m.connect_microabstraction('XRAM', um) print 'total time: %.2f' % t_elapsed #print 'aes_state: %s' % str(m.get_next('aes_state')) #print 'XRAM: %s' % str(m.get_next('XRAM')) #m.generateSim('gen/aes_sim.hpp') m.generateSimToDir('sim')
def vreg_nxt(self, regNo, laneNo): ssreg1 = self.indexToSGPR(self.rrSrc1) ssreg2 = self.indexToSGPR(self.rrSrc2) vsreg1 = self.indexToVGPR(self.rrSrc1, self.model.const(laneNo, SCALAR_REG_BITS)) vsreg2 = self.indexToVGPR(self.rrSrc2, self.model.const(laneNo, SCALAR_REG_BITS)) mask = self.indexToSGPR(self.rrMask) #load instruction addr = self.indexToSGPR(self.memPtr) + ila.sign_extend(self.memOffSet, SCALAR_REG_BITS) load_val = ila.load(self.mem, ila.zero_extend(addr[31:2], PC_BITS)) return ila.ite(self.dest == regNo, ila.ite(self.isRegReg, ila.ite(self.rrType == self.model.const(0b001, 3), ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + ssreg2, ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - ssreg2, ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & ssreg2, ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | ssreg2, ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * ssreg2, self.vector_registers[regNo][laneNo]))))), ila.ite(self.rrType == self.model.const(0b100, 3), ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + vsreg2, ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - vsreg2, ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & vsreg2, ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | vsreg2, ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * vsreg2, self.vector_registers[regNo][laneNo]))))), ila.ite(self.rrType == self.model.const(0b010, 3), ila.ite(mask[laneNo] == self.model.const(0b0, 1), self.vector_registers[regNo][laneNo], ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + ssreg2, ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - ssreg2, ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & ssreg2, ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | ssreg2, ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * ssreg2, self.vector_registers[regNo][laneNo])))))), ila.ite(self.rrType == self.model.const(0b101, 3), ila.ite(mask[laneNo] == self.model.const(0b0, 1), self.vector_registers[regNo][laneNo], ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + vsreg2, ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - vsreg2, ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & vsreg2, ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | vsreg2, ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * vsreg2, self.vector_registers[regNo][laneNo])))))), self.vector_registers[regNo][laneNo]) ))), ila.ite(self.isImmediate, ila.ite(self.immType == self.model.const(0b01, 2), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, vsreg1 + self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, vsreg1 - self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, vsreg1 & self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, vsreg1 | self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, vsreg1 * self.immB, self.vector_registers[regNo][laneNo]))))), ila.ite(self.immType == self.model.const(0b11, 2), ila.ite(mask[laneNo] == self.model.const(0b0, 1), self.vector_registers[regNo][laneNo], ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, vsreg1 + self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, vsreg1 - self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, vsreg1 & self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, vsreg1 | self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, vsreg1 * self.immA, self.vector_registers[regNo][laneNo])))))), self.vector_registers[regNo][laneNo]), ), ila.ite(self.isLoad == self.model.const(0b1, 1), self.vector_registers[regNo][laneNo], self.vector_registers[regNo][laneNo]) )), self.vector_registers[regNo][laneNo])
def sreg_nxt(self, regNo): sreg1 = self.indexToSGPR(self.rrSrc1) sreg2 = self.indexToSGPR(self.rrSrc2) #load instruction addr = self.indexToSGPR(self.memPtr) + ila.sign_extend(self.memOffSet, SCALAR_REG_BITS) load_val = ila.load(self.mem, ila.zero_extend(addr[31:2], MEM_ADDRESS_BITS)) return ila.ite(self.dest == regNo,\ ila.ite(self.isRegReg, ila.ite(self.rrType == self.model.const(0b000, 3), ila.ite(self.rrOpcode == NyEncoding.ADD_I, sreg1 + sreg2, ila.ite(self.rrOpcode == NyEncoding.SUB_I, sreg1 - sreg2, ila.ite(self.rrOpcode == NyEncoding.AND, sreg1 & sreg2, ila.ite(self.rrOpcode == NyEncoding.OR, sreg1 | sreg2, ila.ite(self.rrOpcode == NyEncoding.MULH_I, self.auxMull_i(sreg1, sreg2), ila.ite(self.rrOpcode == NyEncoding.MULH_U, self.auxMulh_u(sreg1, sreg2), ila.ite(self.rrOpcode == NyEncoding.ASHR, ila.ashr(sreg1, sreg2[4:0]), ila.ite(self.rrOpcode == NyEncoding.SHR, sreg1 >> sreg2[4:0], ila.ite(self.rrOpcode == NyEncoding.SHL, sreg1 << sreg2[4:0], ila.ite(self.rrOpcode == NyEncoding.CLZ, self.aux_clz(sreg2), ila.ite(self.rrOpcode == NyEncoding.CTZ, self.aux_ctz(sreg2), ila.ite(self.rrOpcode == NyEncoding.MOVE, sreg2, ila.ite(self.rrOpcode == NyEncoding.CMPEQ_I, ila.ite(sreg1 == sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPNE_I, ila.ite(sreg1 != sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPGT_I, ila.ite(self.auxCmpgt_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPGE_I, ila.ite(self.auxCmpge_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPLT_I, ila.ite(self.auxCmplt_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPLE_I, ila.ite(self.auxCmple_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPGT_U, ila.ite(sreg1 > sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPGE_U, ila.ite(sreg1 < sreg2, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPLT_U, ila.ite(sreg1 < sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(self.rrOpcode == NyEncoding.CMPLE_U, ila.ite(sreg1 > sreg2, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff,SCALAR_REG_BITS)), self.scalar_registers[regNo])))))))))))))))))))))) , self.scalar_registers[regNo]),\ ila.ite(self.isImmediate, ila.ite(self.immType == self.model.const(0b00, 2), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, self.auxMull_i(sreg1, self.immB), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULL_I, self.auxMulh_u(sreg1, self.immB), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ASHR, ila.ashr(sreg1, self.immB[4:0]), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SHR, sreg1 >> self.immB[4:0], ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SHL, sreg1 << self.immB[4:0], ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CLZ, sreg1, self.aux_clz(self.immB), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CTZ, sreg1, self.aux_ctz(self.immB), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MOVE, self.immB, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPEQ_I, ila.ite(sreg1 == self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPNE_I, ila.ite(sreg1 != self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGT_I, ila.ite(self.auxCmpgt_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGE_I, ila.ite(self.auxCmpge_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLT_I, ila.ite(self.auxCmplt_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLE_I, ila.ite(self.auxCmple_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGT_U, ila.ite(sreg1 > self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGE_U, ila.ite(sreg1 < self.immB, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLT_U, ila.ite(sreg1 < self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLE_U, ila.ite(sreg1 > self.immB, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff,SCALAR_REG_BITS)), self.scalar_registers[regNo])))))))))))))))))))))),\ ila.ite(self.immType == self.model.const(0b10, 2), ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immA, ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))),\ self.scalar_registers[regNo])),\ ila.ite(self.isLoad == self.model.const(0b1, 1), self.scalar_registers[regNo], self.scalar_registers[regNo]))),\ self.scalar_registers[regNo])
def createFifoILA(): m = ila.Abstraction("fifo") # ------------------------------------------------------------- # Inputs # ------------------------------------------------------------- cmd = m.inp("cmd", 3) cmdaddr = m.inp("cmdaddr", 64) cmddata = m.inp("cmddata", 8) # ------------------------------------------------------------- # Constants # ------------------------------------------------------------- ZERO = m.const(0x0, 8) ONE = m.const(0x1, 8) TWO = m.const(0x2, 8) THREE = m.const(0x3, 8) FOUR = m.const(0x4, 8) THIRTY = m.const(0x1e, 8) # These are the flags that status can output STS_VALID = m.const(fifo_def.STS_VALID, 8) STS_DATA_AVAIL = m.const(fifo_def.STS_DATA_AVAIL, 8) STS_DATA_EXPECT = m.const(fifo_def.STS_DATA_EXPECT, 8) # these are the commands that can be written to status STS_GO = m.const(fifo_def.STS_GO, 8) STS_COMMAND_READY = m.const(fifo_def.STS_COMMAND_READY, 8) # ------------------------------------------------------------- # Variable Definitions # ------------------------------------------------------------- # Fifo State fifo_state = m.reg("fifo_state", 8) m.set_next("fifo_state", ila.choice("fifo_state_choice", [ZERO, ONE, TWO, THREE, FOUR])) # Status register fifo_sts = m.reg("fifo_sts", 8) m.set_next("fifo_sts", ila.choice("fifo_sts_choice", [STS_VALID, STS_VALID | STS_DATA_AVAIL, STS_VALID | STS_DATA_EXPECT, ZERO])) # internal index to the FIFO, # is amount written so far fifo_in_amt = m.reg("fifo_in_amt", 8) m.set_next("fifo_in_amt", ila.choice("fifo_in_amt_choice", [fifo_in_amt, fifo_in_amt+1, ZERO])) fifo_in_cmdsize = m.reg("fifo_in_cmdsize", 8) m.set_next("fifo_in_cmdsize", ila.choice("fifo_in_cmdsize_choice", [fifo_in_cmdsize, cmddata, ZERO])) # the fifo memory. # 256 8 bit registers fifo_indata = m.mem("fifo_indata", 8, 8) m.set_next("fifo_indata", ila.choice("fifo_indata", [fifo_indata, ila.store(fifo_indata, fifo_in_amt, cmddata)])) # internal index to the FIFO, # TODO base this size off of list of command sizes fifo_out_amt = m.reg("fifo_out_amt", 8) m.set_next("fifo_out_amt", ila.choice("fifo_out_amt_choice", [fifo_out_amt, fifo_out_amt-1, ZERO, THIRTY])) # The fifo out memory # another 256 8 bit registers fifo_outdata = m.mem("fifo_outdata", 8, 8) m.set_next("fifo_outdata", fifo_outdata) # dataout # this is what is returned by a read or write dataout = m.reg("dataout", 8) m.set_next("dataout", ila.choice("dataout_choice", [ZERO, fifo_outdata[fifo_out_amt], fifo_sts, fifo_def.FIFO_MAX_AMT - fifo_in_amt, fifo_out_amt])) # ------------------------------------------------------------- # Decode Logic # ------------------------------------------------------------- # General Information addresses = [fifo_def.STS_ADDR, fifo_def.FIFO_ADDR, fifo_def.BURST_ADDR] commandData = [fifo_def.STS_COMMAND_READY, fifo_def.STS_GO] # Commands start and end cmds = [(cmdaddr == fifo_def.STS_ADDR) & (cmd == fifo_def.WR) & (cmddata == d) & (fifo_out_amt == a) & (fifo_state == s) for d in commandData for a in range(2) for s in range(5)] # Reading the in_cmdsize cmdsize = [(cmdaddr == fifo_def.FIFO_ADDR) & (fifo_state == fifo_def.FIFO_SENDING) & (fifo_in_amt == 5) & (cmd == fifo_def.WR) & (cmddata == d) for d in commandData] # actual commands pcr_extend = [(cmdaddr == fifo_def.STS_ADDR) & (cmd == fifo_def.WR) & (cmddata == fifo_def.STS_GO) & (fifo_state == fifo_def.FIFO_ACCEPTING) & (fifo_in_amt == fifo_in_cmdsize) & (ila.load(fifo_indata, m.const(0x6, 8)) == 0) & (ila.load(fifo_indata, m.const(0x7, 8)) == 0) & (ila.load(fifo_indata, m.const(0x8, 8)) == 0) & (ila.load(fifo_indata, m.const(0x9, 8)) == 0x14) ] # General Reading and Writing in every state + Address general = [(cmdaddr == a) & (cmd == c) & (fifo_state == s) for a in addresses for c in [0,1,2] for s in range(5)] m.decode_exprs = general + cmds + pcr_extend + cmdsize # ------------------------------------------------------------- # Synthesize # ------------------------------------------------------------- f = fifo() sim = lambda s: f.simulate(s) for var in f.all_state: synth(m, var, sim) m.generateSim('tpm_export.cpp') m.generateCbmcC('tpm_export.c')
def instructionFetch(self): self.inst = ila.load( self.mem, ila.zero_extend(self.pc[31:3], instruction_format.MEM_ADDRESS_BITS)) self.opcode = self.inst[(instruction_format.OPCODE_BIT_TOP - 1):instruction_format.OPCODE_BIT_BOT] self.fetch_expr = self.inst self.dest = self.inst[(instruction_format.DST_BIT_TOP - 1):instruction_format.DST_BIT_BOT] self.src1 = self.inst[(instruction_format.SRC0_BIT_TOP - 1):instruction_format.SRC0_BIT_BOT] self.src2 = self.inst[(instruction_format.SRC1_BIT_TOP - 1):instruction_format.SRC1_BIT_BOT] self.src3 = self.inst[(instruction_format.SRC2_BIT_TOP - 1):instruction_format.SRC2_BIT_BOT] self.baseImm = ila.sign_extend( self.inst[(instruction_format.BASE_BIT_TOP - 1):instruction_format.BASE_BIT_BOT], instruction_format.PC_BITS) #self.branchPred = self.dest #(self.predReg, self.predReg_flag) = self.indexIntoReg(self.branchPred) self.branchImm = ila.zero_extend( self.inst[(instruction_format.IMM_BIT_TOP - 1):instruction_format.IMM_BIT_BOT], instruction_format.PC_BITS) self.ldImm = ila.zero_extend( self.inst[(instruction_format.IMM_BIT_TOP - 1):instruction_format.IMM_BIT_BOT], instruction_format.PC_BITS) self.stImm = ila.zero_extend( self.inst[(instruction_format.IMM_BIT_TOP - 1):instruction_format.IMM_BIT_BOT], instruction_format.PC_BITS) self.sreg1_flag = ila.ite((self.src1 >= self.scalar_register_num) & (self.src1 < self.register_total_num), self.long_scalar_register_flag, self.scalar_register_flag) self.sreg2_flag = ila.ite((self.src2 >= self.scalar_register_num) & (self.src2 < self.register_total_num), self.long_scalar_register_flag, self.scalar_register_flag) self.sreg3_flag = ila.ite((self.src3 >= self.scalar_register_num) & (self.src3 < self.register_total_num), self.long_scalar_register_flag, self.scalar_register_flag) self.sregdest_flag = ila.ite((self.dest >= self.scalar_register_num) & (self.dest < self.register_total_num), self.long_scalar_register_flag, self.scalar_register_flag) self.ssreg1 = self.indexIntoSReg(self.src1) self.ssreg2 = self.indexIntoSReg(self.src2) self.ssreg3 = self.indexIntoSReg(self.src3) self.ssregdest = self.indexIntoSReg(self.dest) self.lsreg1 = self.indexIntoLReg(self.src1) self.lsreg2 = self.indexIntoLReg(self.src2) self.lsreg3 = self.indexIntoLReg(self.src3) self.lsregdest = self.indexIntoLReg(self.dest) self.sreg1 = ila.ite(self.sreg1_flag, self.ssreg1, self.lsreg1[instruction_format.REG_BITS - 1:0]) self.sreg2 = ila.ite(self.sreg2_flag, self.ssreg2, self.lsreg2[instruction_format.REG_BITS - 1:0]) self.sreg3 = ila.ite(self.sreg3_flag, self.ssreg3, self.lsreg3[instruction_format.REG_BITS - 1:0]) self.sregdest = ila.ite( self.sregdest_flag, self.ssregdest, self.lsregdest[instruction_format.REG_BITS - 1:0])
def buildILA(): #--------------------------- # define universal constant #--------------------------- K = 5 NUM_MOVIE_MAX = 100 NUM_HIDDEN_MAX = 100 NUM_VISIBLE_MAX = NUM_MOVIE_MAX * K DATAMEM_ADDR_WIDTH = int( log(NUM_VISIBLE_MAX + 1) / log(2)) + 1 # 9 # it is definitely not dividable, but need to check HIDDEN_UNIT_WIDTH = int( log(NUM_HIDDEN_MAX + 1) / log(2)) + 1 # 7 # it is definitely not dividable, but need to check VISIBLE_UNIT_WIDTH = int(log(NUM_VISIBLE_MAX + 1) / log(2)) + 1 # 9 EDGEMEM_ADDR_WIDTH = int( log((NUM_VISIBLE_MAX + 1) * (NUM_HIDDEN_MAX + 1)) / log(2)) + 1 # 16 POS_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH NEG_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH PREDICT_RESULT_WIDTH = int(log(NUM_MOVIE_MAX) / log(2)) + 1 # 7 KWIDTH = int(log(K) / log(2)) + 1 # 3 #--------------------------- # Model #--------------------------- rbm = ila.Abstraction('RBM') conf_done = rbm.inp('conf_done', 1) conf_num_hidden = rbm.inp('conf_num_hidden', 32) conf_num_visible = rbm.inp('conf_num_visible', 32) conf_num_users = rbm.inp('conf_num_users', 32) conf_num_loops = rbm.inp('conf_num_loops', 32) conf_num_testusers = rbm.inp('conf_num_testusers', 32) conf_num_movies = rbm.inp('conf_num_movies', 32) rst = rbm.inp('rst', 1) init_done = rbm.reg('init_done', 1) done = rbm.reg('done', 1) num_hidden = rbm.reg('num_hidden', 16) num_visible = rbm.reg('num_visible', 16) num_users = rbm.reg('num_users', 16) num_loops = rbm.reg('num_loops', 16) num_testusers = rbm.reg('num_testusers', 16) num_movies = rbm.reg('num_movies', 16) # DMA output rd_index = rbm.reg('rd_index', 32) rd_length = rbm.reg('rd_length', 32) rd_request = rbm.reg('rd_request', 1) rd_grant = rbm.inp('rd_grant', 1) data_in = rbm.inp('data_in', 32) # rd_cnt = rbm.reg('rd_cnt', 16) # i ureg #585 # DMA input wr_grant = rbm.inp('wr_grant', 1) wr_request = rbm.reg('wr_request', 1) wr_index = rbm.reg('wr_index', 32) wr_length = rbm.reg('wr_length', 32) data_out = rbm.reg('data_out', 32) # wr_cnt = rbm.reg('wr_cnt', 16) : u reg data = rbm.mem('data', DATAMEM_ADDR_WIDTH, 8) rbm.mem('predict_result', PREDICT_RESULT_WIDTH, 8) #------------------------------------- # Decoding Expressions #------------------------------------- rstInst = rst == 1 confDoneInst = (rst == 0) & (init_done == 0) & (conf_done == 1) rdGrantInst = (rd_request == 1) & (rd_grant == 1) wrGrantInst = (wr_request == 1) & (wr_grant == 1) decodeExpr = [rstInst, confDoneInst, rdGrantInst, wrGrantInst] #------------------------------------- # AUX Functions #------------------------------------- def const(v, w): return rbm.const(v, w) b0 = const(0, 1) b1 = const(1, 1) h0_8 = const(0, 8) h1_8 = const(1, 8) h0_4 = const(0, 4) h1_4 = const(1, 4) h2_4 = const(2, 4) h3_4 = const(3, 4) h4_4 = const(4, 4) h0_16 = const(0, 16) h1_16 = const(1, 16) h0_32 = const(0, 32) h0_64 = const(0, 64) #------------------------------------- # Init conditions #------------------------------------- rbm.set_init('init_done', b0) rbm.set_init('done', b0) rbm.set_init('num_hidden', h0_16) rbm.set_init('num_visible', h0_16) rbm.set_init('num_users', h0_16) rbm.set_init('num_loops', h0_16) rbm.set_init('num_testusers', h0_16) rbm.set_init('num_movies', h0_16) #------------------------------------- # Config #------------------------------------- # this means, once configured, unless reset, it cannot be reconfigured init_done_nxt = ila.ite(rstInst, b0, ila.ite(confDoneInst, b1, init_done)) num_hidden_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_hidden[15:0], num_hidden)) num_visible_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_visible[15:0], num_visible)) num_users_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_users[15:0], num_users)) num_loops_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_loops[15:0], num_loops)) num_testusers_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_testusers[15:0], num_testusers)) num_movies_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_movies[15:0], num_movies)) rbm.set_next('init_done', init_done_nxt) rbm.set_next('num_hidden', num_hidden_nxt) rbm.set_next('num_visible', num_visible_nxt) rbm.set_next('num_users', num_users_nxt) rbm.set_next('num_loops', num_loops_nxt) rbm.set_next('num_testusers', num_testusers_nxt) rbm.set_next('num_movies', num_movies_nxt) # INST-level w/r complete rbm_rd_complete = rbm.reg('rd_complete', 1) rbm_wr_complete = rbm.reg('wr_complete', 1) rbm.set_init('rd_complete', b0) rbm.set_init('wr_complete', b0) #------------------------------------ # Compute UABS #------------------------------------ uabs = rbm.add_microabstraction('compute', (init_done == 1) & (done == 0)) index = uabs.reg('index', 16) loop_count = uabs.reg('loop_count', 16) pc = uabs.reg('upc', 4) edges_mem = uabs.mem('edges', EDGEMEM_ADDR_WIDTH, 8) nlp = uabs.getreg('num_loops') nm = ila.zero_extend(uabs.getreg('num_movies'), 32) nu = uabs.getreg('num_users') ntu = uabs.getreg('num_testusers') out_rd_request = uabs.getreg('rd_request') out_rd_complete = uabs.getreg('rd_complete') out_rd_length = uabs.getreg('rd_length') out_rd_index = uabs.getreg('rd_index') train_input_done = uabs.reg('train_input_done', 1) predict_input_done = uabs.reg('predict_input_done', 1) uabs.set_init('upc', const(0, 4)) uabs.set_init('index', h0_16) uabs.set_init('loop_count', h0_16) uabs.set_init('train_input_done', b0) uabs.set_init('predict_input_done', b0) uabs.set_init('rd_complete', b0) ### computation micro_instructions StartRead = (pc == 0) WaitReadComplete = (pc == 1) & (out_rd_complete == 0) DecideTrainOrPredict = (pc == 1) & (out_rd_complete == 1) StartTrain = (pc == 2) & (train_input_done == 1) StartPredict = (pc == 2) & (predict_input_done == 1) Finish = (pc == 3) StartReadState = const(0, 4) WaitReadCompleteState = const(1, 4) StartTrainOrPredict = const(2, 4) FinishState = const(3, 4) decodeExpr = [ StartRead, WaitReadComplete, DecideTrainOrPredict, StartTrain, StartPredict, Finish ] out_rd_request_nxt = ila.ite(StartRead, b1, out_rd_request) out_rd_length_nxt = ila.ite(StartRead, 5 * nm, out_rd_length) out_rd_index_nxt = ila.ite(StartRead, ila.zero_extend(index, 32), out_rd_index) out_rd_complete_nxt = ila.ite( StartRead, b0, ila.ite(DecideTrainOrPredict, b0, out_rd_complete)) train_input_done_nxt = ila.ite(DecideTrainOrPredict, ila.ite(loop_count < nlp, b1, b0), train_input_done) predict_input_done_nxt = ila.ite(DecideTrainOrPredict, ila.ite(loop_count == nlp, b1, b0), predict_input_done) pc_nxt = ila.ite( StartRead, WaitReadCompleteState, ila.ite( WaitReadComplete, pc, ila.ite( DecideTrainOrPredict, StartTrainOrPredict, ila.ite( StartTrain, StartTrainOrPredict, # StartReadState, # actually should be updated by u2inst ila.ite( StartPredict, StartTrainOrPredict, # StartReadState, # actually should be updated by u2inst ila.ite( Finish, FinishState, pc # should never happen! )))))) # should be updated by u2inst index_nxt_dummy = ila.ite( StartTrain | StartPredict, ila.ite( (index == nu - 1) & (loop_count != nlp), h0_16, ila.ite( (index == ntu - 1) & (loop_count == nlp), index, # And it is not correct index + 1)), index) # not in use loop_count_nxt_dummy = ila.ite( StartTrain | StartPredict, ila.ite((index == nu - 1) & (loop_count != nlp), loop_count + 1, loop_count), loop_count) uabs.set_next('rd_request', out_rd_request_nxt) uabs.set_next('rd_length', out_rd_length_nxt) uabs.set_next('rd_index', out_rd_index_nxt) uabs.set_next('rd_complete', out_rd_complete_nxt) uabs.set_next('train_input_done', train_input_done_nxt) uabs.set_next('predict_input_done', predict_input_done_nxt) uabs.set_next('upc', pc_nxt) uabs.set_next('index', index) uabs.set_next('loop_count', loop_count) # this has to be updated by micro_inst # read_request is turned off by loaduabs # predict_input_done, train_input_done is turned off by uabs_train/predict #------------------------------------ # Load UABS #------------------------------------ # RBM interface # high-level interface rd_granted = rbm.reg( 'rd_granted', 1 ) # this is only used for maintaining the validity of load UABS, no other should use data_nxt = ila.ite(rdGrantInst, ila.store(data, const(0, DATAMEM_ADDR_WIDTH), data_in[7:0]), data) # data # rd_granted_nxt = ila.ite(rdGrantInst, b1, rd_granted) rbm.set_next('rd_granted', rd_granted_nxt) rbm.set_next('data', data_nxt) # one change is to move these into lower abstraction DMAload = rbm.add_microabstraction( 'DMAload', (rd_granted == 1)) # this is sub-instruction w_cnt = DMAload.reg('i', 16) dma_rd_request = DMAload.getreg('rd_request') dma_rd_length = DMAload.getreg('rd_length') dma_rd_index = DMAload.getreg('rd_index') state_update_data = DMAload.getmem('data') state_update_rd_request = dma_rd_request self_update_rd_granted = DMAload.getreg('rd_granted') more_read_in = w_cnt < dma_rd_length[15:0] last_cycle = w_cnt == dma_rd_length[15:0] DMAload.set_init('i', h1_16) # h0_16 ) DMAload.set_next('i', ila.ite(more_read_in, w_cnt + 1, w_cnt)) DMAload.set_next('rd_request', b0) # reset to 0 immediately DMAload.set_next('rd_granted', ila.ite(more_read_in, self_update_rd_granted, b0)) DMAload.set_next('rd_complete', ila.ite(more_read_in, b0, b1)) DMAload.set_next( 'data', ila.ite( more_read_in, ila.store(state_update_data, w_cnt[DATAMEM_ADDR_WIDTH - 1:0], data_in[7:0]), ila.ite( last_cycle, ila.store(state_update_data, dma_rd_length[DATAMEM_ADDR_WIDTH - 1:0], h1_8), state_update_data))) #------------------------------------ # Train UUABS #------------------------------------ TrainUabs = uabs.add_microabstraction('train', train_input_done == 1) sigmoid_func = TrainUabs.fun('sigmoid', 64, [16]) # DATA_sum_, 01_D rand_func = TrainUabs.fun('rand', 64, []) # generate random number to_int_exp = TrainUabs.fun('to_int_exp', 32, [16]) # divide_func = TrainUabs.fun( 'divide', 64, [32, 64]) # dp:32_32 / sum_of_pow2 64_64 = 64_1 hidden_unit = TrainUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1) visible_unit = TrainUabs.mem('visible_unit', VISIBLE_UNIT_WIDTH, 1) visibleEnergy = TrainUabs.mem('visibleEnergies', KWIDTH, 16) pow2 = TrainUabs.mem('pow2', KWIDTH, 32) pos = TrainUabs.mem('pos', POS_ADDR_WIDTH, 1) #neg = TrainUabs.mem('neg', NEG_ADDR_WIDTH, 1 ) # not needed train_sum = TrainUabs.reg('train_sum', 16) train_max = TrainUabs.reg('train_max', 16) sumOfpow2 = TrainUabs.reg('sumOfpow2', 64) jstate = TrainUabs.reg('jstate', 16) inner_loop_pc = TrainUabs.reg('per_v_pc', 4) train_pc = TrainUabs.reg('train_upc', 4) # Re-evaluate v_cnt = TrainUabs.reg('train_v_cnt', 16) h_cnt = TrainUabs.reg('train_h_cnt', 16) train_input = TrainUabs.getmem('data') edges_input = TrainUabs.getmem('edges') nv = TrainUabs.getreg('num_visible') nh = TrainUabs.getreg('num_hidden') nu = TrainUabs.getreg('num_users') ntu = TrainUabs.getreg('num_testusers') nlp = TrainUabs.getreg('num_loops') SumEdge = train_pc == 0 SumEdgeState = const(0, 4) SumHidden = train_pc == 1 SumHiddenState = const(1, 4) StorePos = train_pc == 3 StorePosState = const(3, 4) EdgeUpdate = train_pc == 2 EdgeUpdateState = const(2, 4) TrainUabs.decode_exprs = [SumEdge, SumHidden, EdgeUpdate] #Begin v_cnt_init = const(0, 16) h_cnt_init = const(0, 16) pc_init = const(0, 4) #SumEdge: s0 edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt train_sum_s0_nxt = ila.ite(v_cnt == 0, const(0, 16), train_sum) + ila.ite( ila.load(train_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), const(0, 16)) v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1) h_cnt_s0_nxt = ila.ite((v_cnt == nv), ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt) # Here ^^^ is for transiting to next state hidden_update_s0_0 = ila.ite( ila.appfun(rand_func) < ila.appfun(sigmoid_func, train_sum_s0_nxt), b1, b0) hidden_update_s0_1 = ila.ite( v_cnt == nv, ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0], hidden_update_s0_0), hidden_unit) hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), ila.store(hidden_update_s0_1, nh[HIDDEN_UNIT_WIDTH - 1:0], b1), hidden_update_s0_1) train_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), SumHiddenState, SumEdgeState) # Just like init jstate_s0_nxt = h0_16 inner_loop_pc_s0_nxt = h0_4 # add prefix : # train_sum_nxt = ila.ite(SumEdge, train_sum_s0_nxt, ila.ite(SumHidden, ... ) ) # SumHiddenK0-K4 : s1-s5 # pc:1 per_v_pc : 0 1 2 3 LastH = h_cnt == nh LastJ = jstate == K - 1 LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX) SumHiddenL0 = SumHidden & (inner_loop_pc == 0) SumHiddenL1 = SumHidden & (inner_loop_pc == 1) SumHiddenL2 = SumHidden & (inner_loop_pc == 2) SumHiddenL3 = SumHidden & (inner_loop_pc == 3) h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1) jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1), jstate) inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc) jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1) inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc) jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc) jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt inner_loop_pc_s1_s5_L3_nxt = ila.ite( LastJ, ila.ite(LastV, h0_4, h0_4), # will choose to go back or not inner_loop_pc) def nextCondition(l0, l1, l2, l3, default): return ila.ite( SumHiddenL0, l0, ila.ite( SumHiddenL1, l1, ila.ite(SumHiddenL2, l2, ila.ite(SumHiddenL3, l3, default)))) h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt, h_cnt) v_cnt_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ, ila.ite(LastV, h0_16, v_cnt + K), v_cnt) jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt, jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt, jstate) inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt, inner_loop_pc_s1_s5_L1_nxt, inner_loop_pc_s1_s5_L2_nxt, inner_loop_pc_s1_s5_L3_nxt, inner_loop_pc) train_pc_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ & LastV, StorePosState, SumHiddenState) # L0 train_sum_s1_s5_L0_nxt = ila.ite(h_cnt == 0, h0_16, train_sum) + ila.ite( ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), h0_16) _train_max_origin_L0 = ila.ite( jstate == 0, fpconst(-500, FPsum).ast, train_max) # make sure the first time we are comparing with init sum train_max_s1_s5_L0_nxt = ila.ite( LastH, ila.ite(ila.sgt(train_sum_s1_s5_L0_nxt, _train_max_origin_L0), train_sum_s1_s5_L0_nxt, _train_max_origin_L0), train_max) visibleEnergy_s1_s5_L0_nxt = ila.ite( LastH, ila.store(visibleEnergy, jstate[KWIDTH - 1:0], train_sum_s1_s5_L0_nxt), visibleEnergy) # L1 # sum3: 64_64 -> dp: 32_32 _31_sum = fpconst(31, FPsum).ast train_max_s1_s5_L1_nxt = ila.ite(jstate == 0, train_max - _31_sum, train_max) _st_val_L1 = ila.load(visibleEnergy, jstate[KWIDTH - 1:0]) - train_max_s1_s5_L1_nxt visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0], _st_val_L1) # L2 _pow2_new_val = ila.appfun(to_int_exp, ila.load(visibleEnergy, jstate[KWIDTH - 1:0])) _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3) sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64, sumOfpow2) + _pow2_new_convert pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val) # L3 _probs = ila.appfun(divide_func, [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2]) _RAND = ila.appfun(rand_func) _visible_unit_new_val = ila.ite(_probs > _RAND, b1, b0) _vu_idx = v_cnt + jstate _visible_unit_s1_s5_L3_1 = ila.store(visible_unit, _vu_idx[VISIBLE_UNIT_WIDTH - 1:0], _visible_unit_new_val) visible_unit_s1_s5_L3_nxt = ila.ite( LastJ & LastV, ila.store(_visible_unit_s1_s5_L3_1, nv[VISIBLE_UNIT_WIDTH - 1:0], b1), _visible_unit_s1_s5_L3_1) # when exit visible unit should be made to store 1 at nv train_sum_s1_s5_nxt = nextCondition(train_sum_s1_s5_L0_nxt, train_sum, train_sum, train_sum, train_sum) train_max_s1_s5_nxt = nextCondition(train_max_s1_s5_L0_nxt, train_max_s1_s5_L1_nxt, train_max, train_max, train_max) visible_unit_s1_s5_nxt = nextCondition(visible_unit, visible_unit, visible_unit, visible_unit_s1_s5_L3_nxt, visible_unit) visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt, visibleEnergy_s1_s5_L1_nxt, visibleEnergy, visibleEnergy, visibleEnergy) sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2, sumOfpow2_s1_s5_L2_nxt, sumOfpow2, sumOfpow2) pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2) # before s6: store pos h_cnt_sp_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1) v_cnt_sp_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, h0_16, v_cnt + 1), v_cnt) _data_load = ila.load(train_input, v_cnt[VISIBLE_UNIT_WIDTH - 1:0]) _pos_sp_cond = (_data_load != 2) _pos_sp_val = ila.ite(_data_load != 0, b1, b0) & ila.load( hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) _pos_st_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt pos_sp_nxt = ila.store(pos, _pos_st_addr, _pos_sp_val) train_pc_sp_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState, StorePosState) # update edge : s6 h_cnt_s6_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1) v_cnt_s6_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, v_cnt, v_cnt + 1), v_cnt) _pos_ld_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt train_pos = ila.load(pos, _pos_ld_addr) != 0 train_neg = (ila.load( hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) != 0) & (ila.load( visible_unit, v_cnt[VISIBLE_UNIT_WIDTH - 1:0]) != 0) edge_original = ila.load(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt) edge_new = ila.ite((train_pos) & (~train_neg), edge_original + fpconst(LEARN_RATE, FPedge).ast, ila.ite((~train_pos) & (train_neg), edge_original - fpconst(LEARN_RATE, FPedge).ast, edge_original)) edge_s6_nxt = ila.store(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt, edge_new) train_pc_s6_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState, EdgeUpdateState) # no need to jump back itself, because the flag: train_input_done is turned back to zero # don't forget to set back signals in Uabs () train_done = TrainUabs.getreg('train_input_done') train_uabs_index = TrainUabs.getreg('index') train_uabs_loop_count = TrainUabs.getreg('loop_count') train_uabs_upc = TrainUabs.getreg('upc') # add prefix s6 !!! s6_complete = (h_cnt == nh) & (v_cnt == nv) index_nxt_s6_nxt = ila.ite( s6_complete, ila.ite((train_uabs_index == nu - 1) & (train_uabs_loop_count != nlp), h0_16, train_uabs_index + 1), train_uabs_index) # assert (train_uabs_index == ntu - 1) & (train_uabs_loop_count == nlp) should never happen loop_count_s6_nxt = ila.ite( s6_complete & (train_uabs_index == nu - 1) & (train_uabs_loop_count != nlp), train_uabs_loop_count + 1, train_uabs_loop_count) upc_s6_nxt = ila.ite(s6_complete, StartReadState, train_uabs_upc) train_input_done_s6_nxt_nxt = ila.ite(s6_complete, b0, train_done) # data -> hidden_unit -> visible_unit -> edge # data -> edge # add def TrainNext(e1, e2, e3, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(EdgeUpdate, e3, default))) def TrainNextSP(e1, e2, e3, e4, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(StorePos, e3, ila.ite(EdgeUpdate, e4, default)))) def TrainChoice5(name, e1, e2, e3, default): return ila.choice(name, e1, e2, e3, default) def TrainChoice4(name, e1, e2, default): return ila.choice(name, e1, e2, default) def TrainChoice3(name, e1, default): return ila.choice(name, e1, default) TrainUabs.set_init('train_upc', pc_init) TrainUabs.set_init('train_v_cnt', v_cnt_init) TrainUabs.set_init('train_h_cnt', h_cnt_init) TrainUabs.set_next( 'jstate', TrainNext(jstate_s0_nxt, jstate_s1_s5_nxt, jstate, jstate)) TrainUabs.set_next( 'train_sum', TrainNext(train_sum_s0_nxt, train_sum_s1_s5_nxt, train_sum, train_sum)) TrainUabs.set_next( 'train_v_cnt', TrainNextSP(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt_s6_nxt, v_cnt)) TrainUabs.set_next( 'train_h_cnt', TrainNextSP(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt_sp_nxt, h_cnt_s6_nxt, h_cnt)) TrainUabs.set_next( 'train_upc', TrainNextSP(train_pc_s0_nxt, train_pc_s1_s5_nxt, train_pc_sp_nxt, train_pc_s6_nxt, train_pc)) TrainUabs.set_next( 'train_max', TrainNext(train_max, train_max_s1_s5_nxt, train_max, train_max)) TrainUabs.set_next( 'hidden_unit', TrainNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit)) TrainUabs.set_next( 'visible_unit', TrainNext(visible_unit, visible_unit_s1_s5_nxt, visible_unit, visible_unit)) TrainUabs.set_next('edges', TrainNext(edges_mem, edges_mem, edge_s6_nxt, edges_mem)) TrainUabs.set_next( 'index', TrainNext(train_uabs_index, train_uabs_index, index_nxt_s6_nxt, train_uabs_index)) TrainUabs.set_next( 'loop_count', TrainNext(train_uabs_loop_count, train_uabs_loop_count, loop_count_s6_nxt, train_uabs_loop_count)) TrainUabs.set_next( 'upc', TrainNext(train_uabs_upc, train_uabs_upc, upc_s6_nxt, train_uabs_upc)) TrainUabs.set_next( 'train_input_done', TrainNext(train_done, train_done, train_input_done_s6_nxt_nxt, train_done)) # newly added TrainUabs.set_next( 'visibleEnergies', TrainNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy, visibleEnergy)) TrainUabs.set_next( 'sumOfpow2', TrainNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2)) TrainUabs.set_next('pow2', TrainNext(pow2, pow2_s1_s5_nxt, pow2, pow2)) TrainUabs.set_next('pos', ila.ite(StorePos, pos_sp_nxt, pos)) TrainUabs.set_next( 'per_v_pc', TrainNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt, inner_loop_pc, inner_loop_pc)) #------------------------------------ # Predict UUABS #------------------------------------ # data -> predict_result PredictUabs = uabs.add_microabstraction('predict', predict_input_done == 1) sigmoid_func = PredictUabs.fun('sigmoid', 64, [16]) # DATA_sum_, 01_D rand_func = PredictUabs.fun('rand', 64, []) # generate random number to_int_exp = PredictUabs.fun('to_int_exp', 32, [16]) # round_func = PredictUabs.fun('round', 8, [32]) # 05_D -> u8 divide_func = PredictUabs.fun( 'divide', 64, [32, 64]) # dp:32_32 / sum_of_pow2 64_64 = 64_1 hidden_unit = PredictUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1) visibleEnergy = PredictUabs.mem('visibleEnergies', KWIDTH, 16) predict_result = PredictUabs.getmem('predict_result') predict_sum = PredictUabs.reg('predict_sum', 16) predict_max = PredictUabs.reg('predict_max', 16) sumOfpow2 = PredictUabs.reg('sumOfpow2', 64) pow2 = PredictUabs.mem('pow2', KWIDTH, 32) predict_vector = PredictUabs.mem('predict_vector', VISIBLE_UNIT_WIDTH, 1) inner_loop_pc = PredictUabs.reg('per_v_pc', 4) count = PredictUabs.reg('count', 8) jstate = PredictUabs.reg('jstate', 16) expectation = PredictUabs.reg('expectation', 32) prediction = PredictUabs.reg('prediction', 8) predict_pc = PredictUabs.reg('predict_upc', 4) # Re-evaluate v_cnt = PredictUabs.reg('predict_v_cnt', 16) h_cnt = PredictUabs.reg('predict_h_cnt', 16) predict_input = PredictUabs.getmem('data') edges_input = PredictUabs.getmem('edges') nv = PredictUabs.getreg('num_visible') nh = PredictUabs.getreg('num_hidden') nu = PredictUabs.getreg('num_users') ntu = PredictUabs.getreg('num_testusers') nlp = PredictUabs.getreg('num_loops') SumEdge = predict_pc == 0 SumEdgeState = const(0, 4) SumHidden = predict_pc == 1 SumHiddenState = const(1, 4) GenResult = predict_pc == 3 GenResultState = const(3, 4) WaitForWrite = predict_pc == 2 WaitForWriteState = const(2, 4) PredictUabs.decode_exprs = [SumEdge, SumHidden, WaitForWrite] #Begin v_cnt_init = const(0, 16) h_cnt_init = const(0, 16) pc_init = const(0, 4) #SumEdge: s0 edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt predict_sum_s0_nxt = ila.ite(v_cnt == 0, const( 0, 16), predict_sum) + ila.ite( ila.load(predict_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), const(0, 16)) v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1) h_cnt_s0_nxt = ila.ite((v_cnt == nv), ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt) # Here ^^^ is for transiting to next state hidden_update_s0_0 = ila.ite( fpconst(0.5, FP01_D).ast < ila.appfun(sigmoid_func, predict_sum_s0_nxt), b1, b0) hidden_update_s0_1 = ila.ite( v_cnt == nv, ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0], hidden_update_s0_0), hidden_unit) hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), ila.store(hidden_update_s0_1, nh[HIDDEN_UNIT_WIDTH - 1:0], b1), hidden_update_s0_1) hidden_update_s0_next = hidden_update_s0_2 predict_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), SumHiddenState, SumEdgeState) jstate_s0_nxt = h0_16 count_s0_nxt = ila.const(0, 8) inner_loop_pc_s0_nxt = h0_4 # add prefix : # predict_sum_nxt = ila.ite(SumEdge, predict_sum_s0_nxt, ila.ite(SumHidden, ... ) ) #----------------------------- # SumHiddensK0-K4 : s1-s5 # #----------------------------- LastH = h_cnt == nh LastJ = jstate == K - 1 LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX) SumHiddenL0 = SumHidden & (inner_loop_pc == 0) SumHiddenL1 = SumHidden & (inner_loop_pc == 1) SumHiddenL2 = SumHidden & (inner_loop_pc == 2) SumHiddenL3 = SumHidden & (inner_loop_pc == 3) SumHiddenL4 = SumHidden & (inner_loop_pc == 4) h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1) jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1), jstate) inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc) jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1) inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc) jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc) jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt inner_loop_pc_s1_s5_L3_nxt = ila.ite(LastJ, h4_4, inner_loop_pc) jstate_s1_s5_L4_nxt = jstate_s1_s5_L3_nxt inner_loop_pc_s1_s5_L4_nxt = ila.ite( LastJ, ila.ite(LastV, h0_4, h0_4), # will choose to go back or not inner_loop_pc) def nextCondition(l0, l1, l2, l3, l4, default): return ila.ite( SumHiddenL0, l0, ila.ite( SumHiddenL1, l1, ila.ite( SumHiddenL2, l2, ila.ite(SumHiddenL3, l3, ila.ite(SumHiddenL4, l4, default))))) h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt, h_cnt, h_cnt) v_cnt_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ, ila.ite(LastV, h0_16, v_cnt + K), v_cnt) jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt, jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt, jstate_s1_s5_L4_nxt, jstate) inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt, inner_loop_pc_s1_s5_L1_nxt, inner_loop_pc_s1_s5_L2_nxt, inner_loop_pc_s1_s5_L3_nxt, inner_loop_pc_s1_s5_L4_nxt, inner_loop_pc) predict_pc_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ & LastV, GenResultState, SumHiddenState) # L0 predict_sum_s1_s5_L0_nxt = ila.ite( h_cnt == 0, h0_16, predict_sum) + ila.ite( ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), h0_16) _predict_max_origin_L0 = ila.ite( jstate == 0, fpconst(-500, FPsum).ast, predict_max) # make sure the first time we are comparing with init sum predict_max_s1_s5_L0_nxt = ila.ite( LastH, ila.ite(ila.sgt(predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0), predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0), predict_max) visibleEnergy_s1_s5_L0_nxt = ila.ite( LastH, ila.store(visibleEnergy, jstate[KWIDTH - 1:0], predict_sum_s1_s5_L0_nxt), visibleEnergy) # L1 # sum3: 64_64 -> dp: 32_32 _31_sum = fpconst(31, FPsum).ast predict_max_s1_s5_L1_nxt = ila.ite(jstate == 0, predict_max - _31_sum, predict_max) _st_val_L1 = ila.load(visibleEnergy, jstate[KWIDTH - 1:0]) - predict_max_s1_s5_L1_nxt visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0], _st_val_L1) # L2 _pow2_new_val = ila.appfun(to_int_exp, ila.load(visibleEnergy, jstate[KWIDTH - 1:0])) _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3) sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64, sumOfpow2) + _pow2_new_convert pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val) # L3 _probs = ila.appfun(divide_func, [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2]) _mul = fixpoint(_probs, FP01_D) * fixpoint(jstate, FPu16) expectation_s1_s5_L3_nxt = ila.ite(jstate == 0, h0_32, expectation) + _mul.toFormat(FP05_D) # L4 _prediction = ila.zero_extend(ila.appfun(round_func, [expectation]), 16) _pv_val = ila.ite(jstate == _prediction, b1, b0) _pv_idx = v_cnt + jstate _first_store = ila.store(predict_vector, _pv_idx[VISIBLE_UNIT_WIDTH - 1:0], _pv_val) predict_vector_s1_s5_L4_nxt = ila.ite( SumHiddenL4 & LastV & LastJ, ila.store(_first_store, nv[VISIBLE_UNIT_WIDTH - 1:0], b1), _first_store) predict_sum_s1_s5_nxt = nextCondition(predict_sum_s1_s5_L0_nxt, predict_sum, predict_sum, predict_sum, predict_sum, predict_sum) predict_max_s1_s5_nxt = nextCondition(predict_max_s1_s5_L0_nxt, predict_max_s1_s5_L1_nxt, predict_max, predict_max, predict_max, predict_max) visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt, visibleEnergy_s1_s5_L1_nxt, visibleEnergy, visibleEnergy, visibleEnergy, visibleEnergy) sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2, sumOfpow2_s1_s5_L2_nxt, sumOfpow2, sumOfpow2, sumOfpow2) pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2, pow2) expectation_s1_s5_nxt = ila.ite(SumHiddenL3, expectation_s1_s5_L3_nxt, expectation) predict_vector_s1_s5_nxt = ila.ite(SumHiddenL4, predict_vector_s1_s5_L4_nxt, predict_vector) count_s1_s5_nxt = ila.ite(SumHiddenL4 & LastV & LastJ, h0_8, count) # before s6: store pos LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX) LastJ = jstate == K - 1 v_cnt_sp_nxt = ila.ite(LastV, v_cnt + K, v_cnt + K) jstate_sp_nxt = ila.ite(LastJ, h0_16, jstate + 1) _prediction_old = ila.ite(jstate == 0, h0_8, prediction) _pv_idx = v_cnt + jstate _predict_result_sp_val = ila.load(predict_vector, _pv_idx[VISIBLE_UNIT_WIDTH - 1:0]) prediction_sp_nxt = ila.ite(_predict_result_sp_val == 1, (jstate + 1)[7:0], _prediction_old) count_sp_nxt = ila.ite(LastJ, count + 1, count) predict_result_sp_nxt = ila.ite( LastJ, ila.store(predict_result, count[PREDICT_RESULT_WIDTH - 1:0], prediction), predict_result) predict_pc_sp_nxt = ila.ite(LastV & LastJ, WaitForWriteState, GenResultState) wr_complete = PredictUabs.getreg('wr_complete') wr_req = PredictUabs.getreg('wr_request') wr_len = PredictUabs.getreg('wr_length') wr_idx = PredictUabs.getreg('wr_index') cur_idx = PredictUabs.getreg('index') # 32 exitLoop = LastV & LastJ wr_request_sp_nxt = ila.ite(exitLoop, b1, wr_req) wr_index_sp_nxt = ila.ite( exitLoop, ila.zero_extend(nm, 32) * ila.zero_extend(cur_idx, 32), wr_idx) wr_length_sp_nxt = ila.ite(exitLoop, ila.zero_extend(nm, 32), wr_len) wr_complete_sp_nxt = ila.ite(exitLoop, b0, wr_complete) # s6: #--------------------- # update edge : s6 #--------------------- FinishOneRound = (wr_req == 0) & (wr_complete == 1) predict_pc_s6_nxt = ila.ite(FinishOneRound, WaitForWriteState, WaitForWriteState) # its value does not matter because it will be terminated by predict_input_done # don't forget to set back signals in Uabs () predict_done = PredictUabs.getreg('predict_input_done') predict_uabs_index = PredictUabs.getreg('index') predict_uabs_loop_count = PredictUabs.getreg('loop_count') predict_uabs_upc = PredictUabs.getreg('upc') all_done = PredictUabs.getreg('done') # add prefix s6 !!! index_nxt_s6_nxt = ila.ite( FinishOneRound, ila.ite( (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp), predict_uabs_index, predict_uabs_index + 1), predict_uabs_index) wr_complete_s6_nxt = ila.ite(FinishOneRound, b0, wr_complete) # assert (predict_uabs_index == nu - 1) & (predict_uabs_loop_count != nlp) should never happen #loop_count_s6_nxt = ila.ite( (predict_uabs_index == nu - 1) & (predict_uabs_loop_count != nlp) , predict_uabs_loop_count + 1, predict_uabs_loop_count ) upc_s6_nxt = ila.ite( FinishOneRound, ila.ite( (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp), FinishState, StartReadState), predict_uabs_upc) predict_input_done_s6_nxt_nxt = ila.ite(FinishOneRound, b0, predict_done) all_done_s6_nxt = ila.ite( FinishOneRound & (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp), b1, b0) # data -> hidden_unit -> visible_unit -> edge # data -> edge # add # add def predictNext(e1, e2, e3, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(WaitForWrite, e3, default))) def predictNextSp(e1, e2, e3, e4, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(GenResult, e3, ila.ite(WaitForWrite, e4, default)))) def ite(inst, e, default): return ila.ite(inst, e, default) PredictUabs.set_init('predict_upc', pc_init) PredictUabs.set_init('predict_v_cnt', v_cnt_init) PredictUabs.set_init('predict_h_cnt', h_cnt_init) PredictUabs.set_next( 'jstate', predictNextSp(jstate_s0_nxt, jstate_s1_s5_nxt, jstate_sp_nxt, jstate, jstate)) PredictUabs.set_next( 'predict_sum', predictNext(predict_sum_s0_nxt, predict_sum_s1_s5_nxt, predict_sum, predict_sum)) PredictUabs.set_next( 'predict_v_cnt', predictNextSp(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt, v_cnt)) PredictUabs.set_next( 'predict_h_cnt', predictNext(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt, h_cnt)) PredictUabs.set_next( 'predict_upc', predictNextSp(predict_pc_s0_nxt, predict_pc_s1_s5_nxt, predict_pc_sp_nxt, predict_pc_s6_nxt, predict_pc)) PredictUabs.set_next( 'predict_max', predictNext(predict_max, predict_max_s1_s5_nxt, predict_max, predict_max)) PredictUabs.set_next( 'hidden_unit', predictNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit)) PredictUabs.set_next( 'count', predictNextSp(count_s0_nxt, count_s1_s5_nxt, count_sp_nxt, count, count)) PredictUabs.set_next( 'per_v_pc', predictNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt, inner_loop_pc, inner_loop_pc)) PredictUabs.set_next( 'index', predictNext(predict_uabs_index, predict_uabs_index, index_nxt_s6_nxt, predict_uabs_index)) PredictUabs.set_next( 'upc', predictNext(predict_uabs_upc, predict_uabs_upc, upc_s6_nxt, predict_uabs_upc)) PredictUabs.set_next( 'predict_input_done', predictNext(predict_done, predict_done, predict_input_done_s6_nxt_nxt, predict_done)) PredictUabs.set_next( 'done', predictNext(all_done, all_done, all_done_s6_nxt, all_done)) PredictUabs.set_next( 'wr_request', predictNextSp(wr_req, wr_req, wr_request_sp_nxt, wr_req, wr_req)) PredictUabs.set_next( 'wr_length', predictNextSp(wr_len, wr_len, wr_length_sp_nxt, wr_len, wr_len)) PredictUabs.set_next( 'wr_index', predictNextSp(wr_idx, wr_idx, wr_index_sp_nxt, wr_idx, wr_idx)) PredictUabs.set_next( 'wr_complete', predictNextSp(wr_complete, wr_complete, wr_complete_sp_nxt, wr_complete_s6_nxt, wr_complete)) # newly added PredictUabs.set_next( 'visibleEnergies', predictNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy, visibleEnergy)) PredictUabs.set_next( 'sumOfpow2', predictNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2)) PredictUabs.set_next('pow2', predictNext(pow2, pow2_s1_s5_nxt, pow2, pow2)) PredictUabs.set_next( 'expectation', predictNext(expectation, expectation_s1_s5_nxt, expectation, expectation)) PredictUabs.set_next( 'predict_vector', predictNext(predict_vector, predict_vector_s1_s5_nxt, predict_vector, predict_vector)) PredictUabs.set_next('prediction', ite(GenResult, prediction_sp_nxt, prediction)) PredictUabs.set_next('predict_result', ite(GenResult, predict_result_sp_nxt, predict_result)) #------------------------------------ # Store UABS #------------------------------------ # store is triggered by inst as uabs? # wr_grant == 1 is an instruction wr_granted = rbm.reg('wr_granted', 1) rbm.set_next('wr_granted', ila.ite((wr_request & wr_grant) == 1, b1, wr_granted)) data_out_1st_set = ila.zero_extend( ila.load(predict_result, const(0, PREDICT_RESULT_WIDTH)), 32) rbm.set_next( 'data_out', ila.ite((wr_request & wr_grant) == 1, data_out_1st_set, data_out)) # This is a hard decision, # as we set_next, the reaction as we defined will be appear in the next cycle StoreUabs = rbm.add_microabstraction('store', wr_granted == 1) store_idx = StoreUabs.reg('i', 16) nm = StoreUabs.getreg('num_movies') wr_granted = StoreUabs.getreg('wr_granted') wr_request = StoreUabs.getreg('wr_request') wr_complete = StoreUabs.getreg('wr_complete') predict_result = StoreUabs.getmem('predict_result') StoreUabs.set_init('i', h1_16) StoreUabs.set_next('i', ila.ite(store_idx < nm, store_idx + 1, store_idx)) StoreUabs.set_next('wr_granted', ila.ite(store_idx < nm, wr_granted, b0)) StoreUabs.set_next('wr_request', ila.ite(store_idx == 0, b0, wr_request)) StoreUabs.set_next('wr_complete', ila.ite(store_idx < nm, wr_complete, b1)) data_out = StoreUabs.getreg('data_out') # possibly one cycle earlier StoreUabs.set_next( 'data_out', ila.zero_extend( ila.load(predict_result, store_idx[PREDICT_RESULT_WIDTH - 1:0]), 32)) #--------------------------- # Add no next # def keepNC(Abs, name): Abs.set_next(name, Abs.getreg(name)) def keepMemNC(Abs, name): Abs.set_next(name, Abs.getmem(name)) keepNC(rbm, 'done') keepNC(rbm, 'wr_request') keepNC(rbm, 'wr_index') keepNC(rbm, 'wr_length') keepNC(rbm, 'rd_index') keepNC(rbm, 'rd_length') keepNC(rbm, 'rd_request') keepMemNC(uabs, 'edges') keepNC(rbm, 'rd_complete') keepNC(rbm, 'wr_complete') return rbm
def createAESILA(enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) # for the uinst. xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([cmd, cmdaddr, cmddata ]) # actually, the equivelant instruction m.fetch_valid = (cmd == 2) # when write to some addresses # decode wrcmds = [(cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff30)] # m.decode_exprs = wrcmds um = m.add_microabstraction('aes_compute', state != 0) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) # state state_next = ila.choice( 'state_next', [state, ila.ite(cmddata == 1, m.const(1, 2), state)]) m.set_next('aes_state', state_next) # xram m.set_next('XRAM', xram) ################################ # Micro-ILA ################################ # read data rd_data = um.reg('rd_data', 128) enc_data = um.reg('enc_data', 128) byte_cnt = um.reg('byte_cnt', 4) oped_byte_cnt = um.reg('oped_byte_cnt', 16) blk_cnt = um.reg('blk_cnt', 16) aes_time = um.reg('aes_time', 5) uaes_ctr = um.reg('uaes_ctr', 128) # change 1 um.set_init('byte_cnt', um.const(0, 4)) um.set_init('blk_cnt', um.const(0, 16)) um.set_init('oped_byte_cnt', um.const(0, 16)) um.set_init('aes_time', um.const(0, 5)) um.set_init('uaes_ctr', m.getreg('aes_ctr')) # change 2 uxram = m.getmem('XRAM') byte_cnt_16b = ila.zero_extend(byte_cnt, 16) um.fetch_expr = state um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16) for i in [1, 2, 3]] # Decode Expressionss # byte_cnt byte_cnt_inc = byte_cnt + 1 byte_cnt_nxt = ila.choice( 'byte_cnt_nxt', [m.const(0, 4), byte_cnt_inc, byte_cnt]) # 0, +1, NC um.set_next('byte_cnt', byte_cnt_nxt) # oped_byte_cnt oped_byte_cnt_inc = oped_byte_cnt + 16 oped_byte_cnt_nxt = ila.choice( 'oped_byte_cnt_nxt', [m.const(0, 16), oped_byte_cnt_inc, oped_byte_cnt]) # 0, +16, NC um.set_next('oped_byte_cnt', oped_byte_cnt_nxt) # blk_cnt blk_cnt_inc = blk_cnt + 16 more_blocks = (oped_byte_cnt_inc < oplen) blk_cnt_nxt = ila.choice('blk_cnt_nxt', [ m.const(0, 16), blk_cnt, blk_cnt_inc, ila.ite(more_blocks, blk_cnt_inc, blk_cnt) ]) um.set_next('blk_cnt', blk_cnt_nxt) aes_time_inc = aes_time + 1 aes_time_ov = aes_time == m.const(31, 5) aes_time_nxt_c = ila.ite(aes_time_ov, aes_time, aes_time_inc) aes_time_nxt = ila.choice( "aes_timeC", m.const(0, 5), aes_time_nxt_c, ila.ite(more_blocks, m.const(0, 5), aes_time_nxt_c)) aes_time_enough = aes_time > m.const(10, 5) um.set_next('aes_time', aes_time_nxt) # change 3 um.set_next( 'uaes_ctr', ila.choice( 'uaes_ctr_nxt', uaes_ctr, ila.ite( more_blocks, uaes_ctr + ila.inrange('addvalue', um.const(1, 128), um.const(128, 128)), uaes_ctr), ctr)) # ustate ustate = um.getreg('aes_state') ustate_nxt = ila.choice('ustate_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ustate, ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)), ila.ite(aes_time_enough, m.const(3, 2), m.const(2, 2)) ]) # change 4 um.set_next('aes_state', ustate_nxt) # rd_data rdblock = ila.writechunk("rd_data_chunk", rd_data, ila.load(uxram, opaddr + blk_cnt + byte_cnt_16b)) # rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) um.set_next('rd_data', rd_data_nxt) # enc_data aes_key = key0 aes_ctr = ila.choice('ctr', uaes_ctr, ctr + ila.zero_extend(blk_cnt, 128)) aes_enc_data = ila.appfun(aes, [aes_ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) um.set_next('enc_data', enc_data_nxt) #print um.get_next('enc_data') # xram write xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8) xram_w_addr = opaddr + blk_cnt + byte_cnt_16b xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data) xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes) um.set_next('XRAM', xram_nxt) return m, um