예제 #1
0
 def sliceSelectOne (modCase):
     idx = seqs[modCase]
     if modCase == gb.RAM_size - 1:
         return ila.load (gb.RAM[idx], gb.RAM_x - gb.RAM_x_1)
     else:
         return ila.ite (start == modCase,
                         ila.load (gb.RAM[idx], gb.RAM_x - gb.RAM_x_1),
                         sliceSelectOne (modCase + 1))
예제 #2
0
 def instructionFetch(self):
     self.inst_a = ila.load(
         self.mem,
         ila.zero_extend(self.pc_a[31:2],
                         instruction_format.MEM_ADDRESS_BITS))
     self.inst_b = ila.load(
         self.mem,
         ila.zero_extend(self.pc_b[31:2],
                         instruction_format.MEM_ADDRESS_BITS))
     self.opcode_a = self.inst_a[(instruction_format.OPCODE_BIT_TOP -
                                  1):instruction_format.OPCODE_BIT_BOT]
     self.opcode_b = self.inst_b[(instruction_format.OPCODE_BIT_TOP -
                                  1):instruction_format.OPCODE_BIT_BOT]
예제 #3
0
파일: ptxILA.py 프로젝트: emzha/IMDb
 def instructionFetch(self):
     self.inst = ila.load(
         self.mem,
         ila.zero_extend(self.pc[31:2],
                         instruction_format.MEM_ADDRESS_BITS))
     self.opcode = self.inst[(instruction_format.OPCODE_BIT_TOP -
                              1):instruction_format.OPCODE_BIT_BOT]
     self.fetch_expr = self.inst
     self.dest = self.inst[(instruction_format.DST_BIT_TOP -
                            1):instruction_format.DST_BIT_BOT]
     self.src1 = self.inst[(instruction_format.SRC0_BIT_TOP -
                            1):instruction_format.SRC0_BIT_BOT]
     self.src2 = self.inst[(instruction_format.SRC1_BIT_TOP -
                            1):instruction_format.SRC1_BIT_BOT]
     self.src3 = self.inst[(instruction_format.SRC2_BIT_TOP -
                            1):instruction_format.SRC2_BIT_BOT]
     self.baseImm = ila.sign_extend(
         self.inst[(instruction_format.BASE_BIT_TOP -
                    1):instruction_format.BASE_BIT_BOT],
         instruction_format.PC_BITS)
     self.branchPred = self.dest
     self.predReg = self.indexIntoReg(self.branchPred)
     self.branchImm = ila.zero_extend(
         self.inst[(instruction_format.IMM_BIT_TOP -
                    1):instruction_format.IMM_BIT_BOT],
         instruction_format.PC_BITS)
     self.sreg1 = self.indexIntoReg(self.src1)
     self.sreg2 = self.indexIntoReg(self.src2)
     self.sreg3 = self.indexIntoReg(self.src3)
     self.sregdest = self.indexIntoReg(self.dest)
예제 #4
0
    def sreg_nxt(self, regNo):
        sreg1 = self.indexToSGPR(self.rrSrc1)
        sreg2 = self.indexToSGPR(self.rrSrc2)
        #load instruction
        addr = self.indexToSGPR(self.memPtr) + ila.sign_extend(
            self.memOffSet, SCALAR_REG_BITS)
        load_val = ila.load(self.mem,
                            ila.zero_extend(addr[31:2], MEM_ADDRESS_BITS))

        return ila.ite(self.dest == regNo,\
           ila.ite(self.isRegReg,
            ila.ite(self.rrType == self.model.const(0b000, 3),
             ila.ite(self.rrOpcode == NyEncoding.ADD_I, sreg1 + sreg2,
             ila.ite(self.rrOpcode == NyEncoding.SUB_I, sreg1 - sreg2,
             ila.ite(self.rrOpcode == NyEncoding.AND, sreg1 & sreg2,
             ila.ite(self.rrOpcode == NyEncoding.OR, sreg1 | sreg2,
             ila.ite(self.rrOpcode == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo])))))
             , self.scalar_registers[regNo]),\
           ila.ite(self.isImmediate,
            ila.ite(self.immType == self.model.const(0b00, 2),
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immB,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immB,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immB,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immB,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))),\
            ila.ite(self.immType == self.model.const(0b10, 2),
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immA,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immA,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immA,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immA,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))),\
             self.scalar_registers[regNo])),\
           ila.ite(self.isLoad == self.model.const(0b1, 1), self.scalar_registers[regNo], self.scalar_registers[regNo]))),\
          self.scalar_registers[regNo])
예제 #5
0
 def instructionFetch(self):
     self.instruction = ila.load(
         self.mem, ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS))
     self.isBranch = (self.instruction[31:28] == self.model.const(
         0b1111, 4))
     self.branchOP = self.instruction[27:25]
     self.branchOffsetA = self.instruction[24:5]
     self.branchSrc = self.instruction[4:0]
     self.branchOffsetB = self.instruction[24:0]
     self.isRegReg = (self.instruction[31:29] == self.model.const(0b110, 3))
     self.rrType = self.instruction[28:26]
     self.rrOpcode = self.instruction[25:20]
     self.rrSrc2 = self.instruction[19:15]
     self.rrMask = self.instruction[14:10]
     self.rrDest = self.instruction[9:5]
     self.rrSrc1 = self.instruction[4:0]
     self.isImmediate = (self.instruction[31] == self.model.const(0b0, 1))
     self.immType = self.instruction[30:29]
     self.immOpcode = self.instruction[28:24]
     self.immA = ila.zero_extend(self.instruction[23:15], SCALAR_REG_BITS)
     self.immB = ila.zero_extend(self.instruction[23:10], SCALAR_REG_BITS)
     self.immCup = self.instruction[23:10]
     self.immClow = self.instruction[4:0]
     self.immDest = self.instruction[9:5]
     self.immMask = self.instruction[14:10]
     self.imm = ila.ite(
         self.immType[1] == self.model.const(0b0, 1),
         ila.zero_extend(self.immB, SCALAR_REG_BITS),
         ila.ite(
             self.immType == self.model.const(0b10, 2),
             ila.zero_extend(ila.concat(self.immCup, self.immClow),
                             SCALAR_REG_BITS),
             ila.ite(self.immType == self.model.const(0b11, 2),
                     ila.zero_extend(self.immA, SCALAR_REG_BITS),
                     ila.zero_extend(self.immA, SCALAR_REG_BITS))))
     self.isMem = (self.instruction[31:30] == self.model.const(0b10, 2))
     self.isLoad = self.instruction[29]
     self.memOpcode = self.instruction[28:25]
     self.memOffSetA = self.instruction[24:15]
     self.memOffSetB = self.instruction[24:10]
     self.memMask = self.instruction[14:10]
     self.memDest = self.instruction[9:5]
     self.memSrc = self.instruction[9:5]
     self.memPtr = self.instruction[4:0]
     self.memOffSet = ila.ite(
         self.memOpcode == self.model.const(0b1000, 4),
         ila.sign_extend(self.memOffSetA, SCALAR_REG_BITS),
         ila.ite(self.memOpcode == self.model.const(0b1110, 4),
                 ila.sign_extend(self.memOffSetA, SCALAR_REG_BITS),
                 ila.sign_extend(self.memOffSetB, SCALAR_REG_BITS)))
     self.isMask = (
         ((self.rrType == self.model.const(0b010, 3)) |
          (self.rrType == self.model.const(0b101, 3))) & self.isRegReg
     )  #need rewrite
     self.dest = self.instruction[9:5]
예제 #6
0
파일: ptxILASample.py 프로젝트: emzha/IMDb
 def instructionFetch(self):
     self.inst = ila.load(self.mem,
                          ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS))
     self.opcode = self.inst[31:22]
     self.fetch_expr = self.inst
     self.dest = self.inst[21:17]
     self.src1 = self.inst[16:12]
     self.src2 = self.inst[11:7]
     self.src3 = self.inst[6:2]
     self.branchPC = ila.zero_extend(self.inst[21:0], PC_BITS)
     self.sreg1 = self.indexIntoReg(self.src1)
     self.sreg2 = self.indexIntoReg(self.src2)
     self.sreg3 = self.indexIntoReg(self.src3)
     self.sregdest = self.indexIntoReg(self.dest)
예제 #7
0
    def get_reg_choices(reg):
        rs1_val = rm.indexIntoGPR(rm.rs1)
        rs2_val = rm.indexIntoGPR(rm.rs2)
        rd_val = rm.indexIntoGPR(rm.rd)
        rs_val = ila.choice('rs_sel', rs1_val, rs2_val)
        shamt = ila.choice('shift_amout', rs2_val[4:0], rm.inst[24:20])
        rs2_comb = ila.choice('rs2_or_immed', rs2_val,
                              ila.zero_extend(rm.immI, 32),
                              ila.sign_extend(rm.immI, 32))

        addr = rs1_val + rm.immI
        lw_val = ila.load(rm.mem, zext(addr[31:2]))
        load_val = getSlice(lw_val, addr[1:0])
        #load_dw   = ila.loadblk(rm.mem, zext(addr[31:2]), 2 )

        return ila.choice(
            "x%d_next" % reg,
            [
                rm.generalRegList[
                    reg],  # Remain the Same regardless of RD (i.e. S/SB instructions)
                ila.ite(
                    rm.rd == reg,  # Is this the destination register?
                    ila.choice(
                        "x%d" % reg,
                        [
                            rs1_val + rs2_comb,  # RS1 + RS2
                            rs1_val - rs2_comb,  # RS1 - RS2
                            rs1_val & rs2_comb,  # AND
                            rs1_val | rs2_comb,  # OR
                            rs1_val ^ rs2_comb,  # XOR
                            ila.ite(
                                ila.slt(rs1_val, rs2_comb),  # SLT
                                bv(1),
                                bv(0)),
                            ila.ite(ila.slt(rs1_val, rs2_comb), bv(0), bv(1)),
                            ila.ite(rs1_val < rs2_comb, bv(1), bv(0)),
                            rs1_val << zext(shamt),  # sll
                            rs1_val >> zext(shamt),  # srl
                            ila.ashr(rs1_val, zext(shamt)),  # sra
                            rm.immU,  # LUI
                            rm.immU + rm.pc,  # AUIPC
                            rm.pc + bv(4),  # JAL/JALR
                            load_val
                            #load_dw
                        ]),
                    rm.generalRegList[reg])  # Remain the same
            ])
예제 #8
0
 def instructionFetch(self):
     self.inst = ila.load(self.mem,
                          ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS))
     self.opcode = self.inst[(REG_BITS - 1):OPCODE_BIT]
     self.fetch_expr = self.inst
     self.dest = self.inst[(OPCODE_BIT - 1):DST_BIT]
     self.src1 = self.inst[(DST_BIT - 1):SRC0_BIT]
     self.src2 = self.inst[(SRC0_BIT - 1):SRC1_BIT]
     self.src3 = self.inst[(SRC1_BIT - 1):SRC2_BIT]
     self.baseImm = ila.sign_extend(self.inst[(BASE_BIT - 1):0], PC_BITS)
     self.branchPred = self.dest
     self.predReg = self.indexIntoReg(self.branchPred)
     self.branchImm = ila.zero_extend(self.inst[(DST_BIT - 1):BASE_BIT],
                                      PC_BITS)
     self.sreg1 = self.indexIntoReg(self.src1)
     self.sreg2 = self.indexIntoReg(self.src2)
     self.sreg3 = self.indexIntoReg(self.src3)
     self.sregdest = self.indexIntoReg(self.dest)
예제 #9
0
파일: riscv_um.py 프로젝트: emzha/IMDb
 def InstFetch(self):
     #self.inst = self.model.inp('inst',32)
     #self.fetch_expr = self.inst
     inst = ila.load(self.mem, ila.zero_extend(self.pc[31:2], 32)) #ila.zero_extend(self.pc[31:2], 32))
     self.inst = inst
     self.fetch_expr = self.inst
     self.opcode = self.inst[6:0]
     self.rd     = self.inst[11:7]
     self.rs1    = self.inst[19:15]
     self.rs2    = self.inst[24:20]
     self.funct3 = self.inst[14:12]
     self.funct7 = self.inst[31:25]
     self.funct12= self.inst[31:20]   
     self.immI   = ila.sign_extend( inst[31:20], XLEN)
     self.immS   = ila.sign_extend( ila.concat( [inst[31:25], inst[11:7]] ), XLEN )
     self.immB   = ila.sign_extend( ila.concat( [inst[31],inst[7], inst[30:25], inst[11:8], const(0,1) ] ) , XLEN ) 
     self.immU   = ila.concat( [inst[31:12],const(0,12)] )
     self.immJ   = ila.sign_extend( ila.concat( [inst[31], inst[19:12], inst[20], inst[30:21], const(0,1) ] ) , XLEN)
     self.csr_index = self.inst[31:20]
예제 #10
0
파일: synthesize.py 프로젝트: emzha/IMDb
def createAESILA(enable_ps):
    m = ila.Abstraction("aes")
    m.enable_parameterized_synthesis = enable_ps

    # I/O interface: this is where the commands come from.
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)
    # response.
    dataout = m.reg('dataout', 8)

    # internal arch state.
    state = m.reg('aes_state', 2)
    opaddr = m.reg('aes_addr', 16)
    oplen = m.reg('aes_len', 16)
    keysel = m.reg('aes_keysel', 1)
    ctr = m.reg('aes_ctr', 128)
    key0 = m.reg('aes_key0', 128)
    key1 = m.reg('aes_key1', 128)

    # for the uinst.
    xram = m.mem('XRAM', 16, 8)
    aes = m.fun('aes', 128, [128, 128, 128])

    # fetch is just looking at the input command.
    m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata])
    m.fetch_valid = (cmd == 1) | (cmd == 2)

    # decode
    rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff40) for i in [0, 1, 2, 3]]
    wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff40)]
    nopcmds = [
        ((state != 0) & (cmd != 1)) | ((state == 0) & (cmd != 1) & (cmd != 2))
    ]

    m.decode_exprs = rdcmds + wrcmds + nopcmds

    # read commands
    statebyte = ila.zero_extend(state, 8)
    opaddrbyte = ila.readchunk('rd_addr', opaddr, 8)
    oplenbyte = ila.readchunk('rd_len', oplen, 8)
    keyselbyte = ila.zero_extend(keysel, 8)
    ctrbyte = ila.readchunk('rd_ctr', ctr, 8)
    key0byte = ila.readchunk('rd_key0', key0, 8)
    key1byte = ila.readchunk('rd_key1', key1, 8)
    dataoutnext = ila.choice('dataout', [
        statebyte, opaddrbyte, oplenbyte, keyselbyte, ctrbyte, key0byte,
        key1byte,
        m.const(0, 8)
    ])
    m.set_next('dataout', dataoutnext)

    # write commands.
    def mb_reg_wr(name, reg):
        # multibyte register write.
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('aes_addr', opaddr)
    mb_reg_wr('aes_len', oplen)
    mb_reg_wr('aes_ctr', ctr)
    mb_reg_wr('aes_key0', key0)
    mb_reg_wr('aes_key1', key1)

    # bit-level registers
    def bit_reg_wr(name, reg, sz):
        # bitwise register write
        assert reg.type.bitwidth == sz
        reg_wr = cmddata[sz - 1:0]
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    bit_reg_wr('aes_keysel', keysel, 1)

    # these are for the uinst
    um = m.add_microabstraction('aes_compute', state != 0)

    # read data
    rd_data = um.reg('rd_data', 128)
    enc_data = um.reg('enc_data', 128)
    byte_cnt = um.reg('byte_cnt', 4)
    oped_byte_cnt = um.reg('oped_byte_cnt', 16)
    blk_cnt = um.reg('blk_cnt', 16)
    um.set_init('byte_cnt', um.const(0, 4))
    um.set_init('blk_cnt', um.const(0, 16))
    um.set_init('oped_byte_cnt', um.const(0, 16))
    uxram = m.getmem('XRAM')

    byte_cnt_16b = ila.zero_extend(byte_cnt, 16)

    um.fetch_expr = state
    um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16)
                       for i in [1, 2, 3]]

    usim = lambda s: AESmicro().simMicro(s)

    # byte_cnt
    byte_cnt_inc = byte_cnt + 1
    byte_cnt_buf = ila.choice('byte_cnt_buf', [byte_cnt_inc, byte_cnt])
    byte_cnt_nxt = ila.choice(
        'byte_cnt_nxt', [byte_cnt_inc, m.const(0, 4), byte_cnt])
    um.set_next('byte_cnt', byte_cnt_nxt)

    # oped_byte_cnt
    oped_byte_cnt_inc = oped_byte_cnt + 16
    oped_byte_cnt_nxt = ila.choice(
        'oped_byte_cnt_nxt',
        [m.const(0, 16), oped_byte_cnt, oped_byte_cnt_inc])
    um.set_next('oped_byte_cnt', oped_byte_cnt_nxt)

    # blk_cnt
    blk_cnt_inc = blk_cnt + 16
    more_blocks = (oped_byte_cnt_inc < oplen)
    blk_cnt_nxt = ila.choice('blk_cnt_nxt', [
        m.const(0, 16), blk_cnt, blk_cnt_inc,
        ila.ite(more_blocks, blk_cnt_inc, blk_cnt)
    ])
    um.set_next('blk_cnt', blk_cnt_nxt)

    # ustate
    ustate = um.getreg('aes_state')
    ustate_nxt = ila.choice('ustate_next', [
        m.const(0, 2),
        m.const(1, 2),
        m.const(2, 2),
        m.const(3, 2), ustate,
        ila.ite(more_blocks, m.const(1, 2), m.const(0, 2))
    ])
    um.set_next('aes_state', ustate_nxt)

    # rd_data
    rdblock = ila.writechunk("rd_data_chunk", rd_data,
                             ila.load(uxram, opaddr + blk_cnt + byte_cnt_16b))
    rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data)
    um.set_next('rd_data', rd_data_nxt)

    # enc_data
    aes_key = ila.ite(keysel == 0, key0, key1)
    aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data])
    enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data)
    um.set_next('enc_data', enc_data_nxt)
    #print um.get_next('enc_data')

    # xram write
    xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8)
    xram_w_addr = opaddr + blk_cnt + byte_cnt_16b
    xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data)
    xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes)
    um.set_next('XRAM', xram_nxt)

    suffix = 'en' if enable_ps else 'dis'
    timefile = open('aes-times-%s.txt' % suffix, 'wt')

    t_elapsed = 0
    # micro-synthesis
    for s in [
            'XRAM', 'aes_state', 'byte_cnt', 'blk_cnt', 'oped_byte_cnt',
            'rd_data'
    ]:
        t_elapsed = 0
        st = time.clock()
        um.synthesize(s, usim)
        dt = time.clock() - st
        t_elapsed += dt
        print >> timefile, '%s %.2f' % ('u_' + s, dt)
        print '%s: %s' % (s, str(um.get_next(s)))
        ast = um.get_next(s)
        m.exportOne(ast, 'asts/u_%s_%s' % (s, suffix))

    sim = lambda s: AESmacro().simMacro(s)
    # state
    state_next = ila.choice(
        'state_next',
        [state, ila.ite(cmddata == 1, m.const(1, 2), state)])
    m.set_next('aes_state', state_next)

    # xram
    m.set_next('XRAM', xram)
    # synthesize.
    for s in [
            'aes_state', 'aes_addr', 'aes_len', 'aes_keysel', 'aes_ctr',
            'aes_key0', 'aes_key1', 'dataout'
    ]:
        st = time.clock()
        m.synthesize(s, sim)
        dt = time.clock() - st
        t_elapsed += dt
        print >> timefile, '%s %.2f' % (s, dt)

        ast = m.get_next(s)
        print '%s: %s' % (s, str(ast))
        m.exportOne(ast, 'asts/%s_%s' % (s, suffix))
    # connect to the uinst
    m.connect_microabstraction('aes_state', um)
    m.connect_microabstraction('XRAM', um)

    print 'total time: %.2f' % t_elapsed

    #print 'aes_state: %s' % str(m.get_next('aes_state'))
    #print 'XRAM: %s' % str(m.get_next('XRAM'))

    #m.generateSim('gen/aes_sim.hpp')
    m.generateSimToDir('sim')
예제 #11
0
	def vreg_nxt(self, regNo, laneNo):
		ssreg1 = self.indexToSGPR(self.rrSrc1)
		ssreg2 = self.indexToSGPR(self.rrSrc2)
		vsreg1 = self.indexToVGPR(self.rrSrc1, self.model.const(laneNo, SCALAR_REG_BITS))
		vsreg2 = self.indexToVGPR(self.rrSrc2, self.model.const(laneNo, SCALAR_REG_BITS))
		mask = self.indexToSGPR(self.rrMask)

    	#load instruction
		addr = self.indexToSGPR(self.memPtr) + ila.sign_extend(self.memOffSet, SCALAR_REG_BITS)
		load_val = ila.load(self.mem, ila.zero_extend(addr[31:2], PC_BITS))

		return ila.ite(self.dest == regNo, 
				ila.ite(self.isRegReg,
					ila.ite(self.rrType == self.model.const(0b001, 3), 
						ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + ssreg2,
						ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - ssreg2,
						ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & ssreg2, 
						ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | ssreg2, 
						ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * ssreg2, self.vector_registers[regNo][laneNo]))))),
					ila.ite(self.rrType == self.model.const(0b100, 3),
						ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + vsreg2,
						ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - vsreg2,
						ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & vsreg2, 
						ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | vsreg2, 
						ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * vsreg2, self.vector_registers[regNo][laneNo]))))),
					ila.ite(self.rrType == self.model.const(0b010, 3),
						ila.ite(mask[laneNo] == self.model.const(0b0, 1), self.vector_registers[regNo][laneNo], 
							ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + ssreg2,
							ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - ssreg2,
							ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & ssreg2, 
							ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | ssreg2, 
							ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * ssreg2, self.vector_registers[regNo][laneNo])))))),
					ila.ite(self.rrType == self.model.const(0b101, 3),
							ila.ite(mask[laneNo] == self.model.const(0b0, 1), self.vector_registers[regNo][laneNo], 
							ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + vsreg2,
							ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - vsreg2,
							ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & vsreg2, 
							ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | vsreg2, 
							ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * vsreg2, self.vector_registers[regNo][laneNo])))))),
						self.vector_registers[regNo][laneNo])
					))),
				ila.ite(self.isImmediate,
					ila.ite(self.immType == self.model.const(0b01, 2),
						ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, vsreg1 + self.immB,
						ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, vsreg1 - self.immB,
						ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, vsreg1 & self.immB,
						ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, vsreg1 | self.immB,
						ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, vsreg1 * self.immB,
						self.vector_registers[regNo][laneNo]))))),
					ila.ite(self.immType == self.model.const(0b11, 2),
						ila.ite(mask[laneNo] == self.model.const(0b0, 1), self.vector_registers[regNo][laneNo], 
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, vsreg1 + self.immA, 
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, vsreg1 - self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, vsreg1 & self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, vsreg1 | self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, vsreg1 * self.immA, 
							self.vector_registers[regNo][laneNo])))))),
						self.vector_registers[regNo][laneNo]),
					),
				ila.ite(self.isLoad == self.model.const(0b1, 1), self.vector_registers[regNo][laneNo], self.vector_registers[regNo][laneNo])
				)),
			 self.vector_registers[regNo][laneNo])
예제 #12
0
	def sreg_nxt(self, regNo):
		sreg1 = self.indexToSGPR(self.rrSrc1)
		sreg2 = self.indexToSGPR(self.rrSrc2)
    	#load instruction
		addr = self.indexToSGPR(self.memPtr) + ila.sign_extend(self.memOffSet, SCALAR_REG_BITS)
		load_val = ila.load(self.mem, ila.zero_extend(addr[31:2], MEM_ADDRESS_BITS))

		return ila.ite(self.dest == regNo,\
					ila.ite(self.isRegReg, 
						ila.ite(self.rrType == self.model.const(0b000, 3), 
							ila.ite(self.rrOpcode == NyEncoding.ADD_I, sreg1 + sreg2, 
							ila.ite(self.rrOpcode == NyEncoding.SUB_I, sreg1 - sreg2, 
							ila.ite(self.rrOpcode == NyEncoding.AND, sreg1 & sreg2,
							ila.ite(self.rrOpcode == NyEncoding.OR, sreg1 | sreg2, 
							ila.ite(self.rrOpcode == NyEncoding.MULH_I, self.auxMull_i(sreg1, sreg2), 
							ila.ite(self.rrOpcode == NyEncoding.MULH_U, self.auxMulh_u(sreg1, sreg2), 
							ila.ite(self.rrOpcode == NyEncoding.ASHR, ila.ashr(sreg1, sreg2[4:0]), 
							ila.ite(self.rrOpcode == NyEncoding.SHR, sreg1 >> sreg2[4:0],
							ila.ite(self.rrOpcode == NyEncoding.SHL, sreg1 << sreg2[4:0],
							ila.ite(self.rrOpcode == NyEncoding.CLZ, self.aux_clz(sreg2),
							ila.ite(self.rrOpcode == NyEncoding.CTZ, self.aux_ctz(sreg2),
							ila.ite(self.rrOpcode == NyEncoding.MOVE, sreg2,
							ila.ite(self.rrOpcode == NyEncoding.CMPEQ_I, ila.ite(sreg1 == sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPNE_I, ila.ite(sreg1 != sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPGT_I, ila.ite(self.auxCmpgt_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPGE_I, ila.ite(self.auxCmpge_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPLT_I, ila.ite(self.auxCmplt_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPLE_I, ila.ite(self.auxCmple_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPGT_U, ila.ite(sreg1 > sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPGE_U, ila.ite(sreg1 < sreg2, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPLT_U, ila.ite(sreg1 < sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPLE_U, ila.ite(sreg1 > sreg2, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff,SCALAR_REG_BITS)),
								self.scalar_registers[regNo]))))))))))))))))))))))
							, self.scalar_registers[regNo]),\
					ila.ite(self.isImmediate, 
						ila.ite(self.immType == self.model.const(0b00, 2), 
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immB, 
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immB,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immB,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immB,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, self.auxMull_i(sreg1, self.immB),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULL_I, self.auxMulh_u(sreg1, self.immB),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ASHR, ila.ashr(sreg1, self.immB[4:0]),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SHR, sreg1 >> self.immB[4:0],
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SHL, sreg1 << self.immB[4:0],
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CLZ, sreg1, self.aux_clz(self.immB),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CTZ, sreg1, self.aux_ctz(self.immB),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MOVE, self.immB,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPEQ_I, ila.ite(sreg1 == self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPNE_I, ila.ite(sreg1 != self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGT_I, ila.ite(self.auxCmpgt_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGE_I, ila.ite(self.auxCmpge_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLT_I, ila.ite(self.auxCmplt_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLE_I, ila.ite(self.auxCmple_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGT_U, ila.ite(sreg1 > self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGE_U, ila.ite(sreg1 < self.immB, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLT_U, ila.ite(sreg1 < self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLE_U, ila.ite(sreg1 > self.immB, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff,SCALAR_REG_BITS)),
							 self.scalar_registers[regNo])))))))))))))))))))))),\
						ila.ite(self.immType == self.model.const(0b10, 2),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))),\
							self.scalar_registers[regNo])),\
					ila.ite(self.isLoad == self.model.const(0b1, 1), self.scalar_registers[regNo], self.scalar_registers[regNo]))),\
				self.scalar_registers[regNo])
예제 #13
0
def createFifoILA():
    m = ila.Abstraction("fifo")

    # -------------------------------------------------------------
    # Inputs
    # -------------------------------------------------------------
    cmd     = m.inp("cmd", 3)
    cmdaddr = m.inp("cmdaddr", 64)
    cmddata = m.inp("cmddata", 8)

    # -------------------------------------------------------------
    # Constants
    # -------------------------------------------------------------
    ZERO   = m.const(0x0,  8)
    ONE    = m.const(0x1,  8)
    TWO    = m.const(0x2,  8)
    THREE  = m.const(0x3,  8)
    FOUR   = m.const(0x4,  8)
    THIRTY = m.const(0x1e, 8)

    # These are the flags that status can output
    STS_VALID           = m.const(fifo_def.STS_VALID, 8)
    STS_DATA_AVAIL      = m.const(fifo_def.STS_DATA_AVAIL, 8)
    STS_DATA_EXPECT     = m.const(fifo_def.STS_DATA_EXPECT, 8)

    # these are the commands that can be written to status
    STS_GO              = m.const(fifo_def.STS_GO, 8)
    STS_COMMAND_READY   = m.const(fifo_def.STS_COMMAND_READY, 8)

    # -------------------------------------------------------------
    # Variable Definitions
    # -------------------------------------------------------------

    # Fifo State
    fifo_state = m.reg("fifo_state", 8)
    m.set_next("fifo_state", ila.choice("fifo_state_choice",
        [ZERO, ONE, TWO, THREE, FOUR]))

    # Status register
    fifo_sts = m.reg("fifo_sts", 8)
    m.set_next("fifo_sts", ila.choice("fifo_sts_choice",
        [STS_VALID, STS_VALID | STS_DATA_AVAIL, STS_VALID | STS_DATA_EXPECT, ZERO]))


    # internal index to the FIFO,
    # is amount written so far
    fifo_in_amt  = m.reg("fifo_in_amt", 8)
    m.set_next("fifo_in_amt", ila.choice("fifo_in_amt_choice",
        [fifo_in_amt, fifo_in_amt+1, ZERO]))

    fifo_in_cmdsize = m.reg("fifo_in_cmdsize", 8)
    m.set_next("fifo_in_cmdsize", ila.choice("fifo_in_cmdsize_choice",
        [fifo_in_cmdsize, cmddata, ZERO]))

    # the fifo memory.
    # 256 8 bit registers
    fifo_indata = m.mem("fifo_indata", 8, 8)
    m.set_next("fifo_indata",
            ila.choice("fifo_indata",
                [fifo_indata,
                    ila.store(fifo_indata, fifo_in_amt, cmddata)]))

    # internal index to the FIFO,
    # TODO base this size off of list of command sizes
    fifo_out_amt = m.reg("fifo_out_amt", 8)
    m.set_next("fifo_out_amt", ila.choice("fifo_out_amt_choice",
        [fifo_out_amt, fifo_out_amt-1, ZERO, THIRTY]))

    # The fifo out memory
    # another 256 8 bit registers
    fifo_outdata = m.mem("fifo_outdata", 8, 8)
    m.set_next("fifo_outdata",
                fifo_outdata)

    # dataout
    # this is what is returned by a read or write
    dataout = m.reg("dataout", 8)
    m.set_next("dataout", ila.choice("dataout_choice",
        [ZERO, fifo_outdata[fifo_out_amt], fifo_sts, fifo_def.FIFO_MAX_AMT - fifo_in_amt, fifo_out_amt]))

    # -------------------------------------------------------------
    # Decode Logic
    # -------------------------------------------------------------
    # General Information
    addresses = [fifo_def.STS_ADDR, fifo_def.FIFO_ADDR, fifo_def.BURST_ADDR]
    commandData = [fifo_def.STS_COMMAND_READY, fifo_def.STS_GO]

    # Commands start and end
    cmds = [(cmdaddr == fifo_def.STS_ADDR)
            & (cmd == fifo_def.WR)
            & (cmddata == d)
            & (fifo_out_amt == a)
            & (fifo_state == s)
            for d in commandData for a in range(2) for s in range(5)]

    # Reading the in_cmdsize
    cmdsize = [(cmdaddr == fifo_def.FIFO_ADDR)
            & (fifo_state == fifo_def.FIFO_SENDING)
            & (fifo_in_amt == 5)
            & (cmd == fifo_def.WR)
            & (cmddata == d)
            for d in commandData]
    # actual commands
    pcr_extend = [(cmdaddr == fifo_def.STS_ADDR)
            & (cmd == fifo_def.WR)
            & (cmddata == fifo_def.STS_GO)
            & (fifo_state  == fifo_def.FIFO_ACCEPTING)
            & (fifo_in_amt  == fifo_in_cmdsize)
            & (ila.load(fifo_indata, m.const(0x6, 8)) == 0)
            & (ila.load(fifo_indata, m.const(0x7, 8)) == 0)
            & (ila.load(fifo_indata, m.const(0x8, 8)) == 0)
            & (ila.load(fifo_indata, m.const(0x9, 8)) == 0x14)
            ]

    # General Reading and Writing in every state + Address
    general = [(cmdaddr == a)
             & (cmd == c)
             & (fifo_state == s)
            for a in addresses for c in [0,1,2] for s in range(5)]
    m.decode_exprs = general + cmds + pcr_extend + cmdsize

    # -------------------------------------------------------------
    # Synthesize
    # -------------------------------------------------------------
    f = fifo()
    sim = lambda s: f.simulate(s)
    for var in f.all_state:
        synth(m, var, sim)
    m.generateSim('tpm_export.cpp')
    m.generateCbmcC('tpm_export.c')
예제 #14
0
    def instructionFetch(self):
        self.inst = ila.load(
            self.mem,
            ila.zero_extend(self.pc[31:3],
                            instruction_format.MEM_ADDRESS_BITS))
        self.opcode = self.inst[(instruction_format.OPCODE_BIT_TOP -
                                 1):instruction_format.OPCODE_BIT_BOT]
        self.fetch_expr = self.inst
        self.dest = self.inst[(instruction_format.DST_BIT_TOP -
                               1):instruction_format.DST_BIT_BOT]
        self.src1 = self.inst[(instruction_format.SRC0_BIT_TOP -
                               1):instruction_format.SRC0_BIT_BOT]
        self.src2 = self.inst[(instruction_format.SRC1_BIT_TOP -
                               1):instruction_format.SRC1_BIT_BOT]
        self.src3 = self.inst[(instruction_format.SRC2_BIT_TOP -
                               1):instruction_format.SRC2_BIT_BOT]
        self.baseImm = ila.sign_extend(
            self.inst[(instruction_format.BASE_BIT_TOP -
                       1):instruction_format.BASE_BIT_BOT],
            instruction_format.PC_BITS)
        #self.branchPred = self.dest
        #(self.predReg, self.predReg_flag) = self.indexIntoReg(self.branchPred)
        self.branchImm = ila.zero_extend(
            self.inst[(instruction_format.IMM_BIT_TOP -
                       1):instruction_format.IMM_BIT_BOT],
            instruction_format.PC_BITS)
        self.ldImm = ila.zero_extend(
            self.inst[(instruction_format.IMM_BIT_TOP -
                       1):instruction_format.IMM_BIT_BOT],
            instruction_format.PC_BITS)
        self.stImm = ila.zero_extend(
            self.inst[(instruction_format.IMM_BIT_TOP -
                       1):instruction_format.IMM_BIT_BOT],
            instruction_format.PC_BITS)

        self.sreg1_flag = ila.ite((self.src1 >= self.scalar_register_num) &
                                  (self.src1 < self.register_total_num),
                                  self.long_scalar_register_flag,
                                  self.scalar_register_flag)
        self.sreg2_flag = ila.ite((self.src2 >= self.scalar_register_num) &
                                  (self.src2 < self.register_total_num),
                                  self.long_scalar_register_flag,
                                  self.scalar_register_flag)
        self.sreg3_flag = ila.ite((self.src3 >= self.scalar_register_num) &
                                  (self.src3 < self.register_total_num),
                                  self.long_scalar_register_flag,
                                  self.scalar_register_flag)
        self.sregdest_flag = ila.ite((self.dest >= self.scalar_register_num) &
                                     (self.dest < self.register_total_num),
                                     self.long_scalar_register_flag,
                                     self.scalar_register_flag)

        self.ssreg1 = self.indexIntoSReg(self.src1)
        self.ssreg2 = self.indexIntoSReg(self.src2)
        self.ssreg3 = self.indexIntoSReg(self.src3)
        self.ssregdest = self.indexIntoSReg(self.dest)

        self.lsreg1 = self.indexIntoLReg(self.src1)
        self.lsreg2 = self.indexIntoLReg(self.src2)
        self.lsreg3 = self.indexIntoLReg(self.src3)
        self.lsregdest = self.indexIntoLReg(self.dest)
        self.sreg1 = ila.ite(self.sreg1_flag, self.ssreg1,
                             self.lsreg1[instruction_format.REG_BITS - 1:0])
        self.sreg2 = ila.ite(self.sreg2_flag, self.ssreg2,
                             self.lsreg2[instruction_format.REG_BITS - 1:0])
        self.sreg3 = ila.ite(self.sreg3_flag, self.ssreg3,
                             self.lsreg3[instruction_format.REG_BITS - 1:0])
        self.sregdest = ila.ite(
            self.sregdest_flag, self.ssregdest,
            self.lsregdest[instruction_format.REG_BITS - 1:0])
예제 #15
0
def buildILA():
    #---------------------------
    # define universal constant
    #---------------------------
    K = 5
    NUM_MOVIE_MAX = 100
    NUM_HIDDEN_MAX = 100
    NUM_VISIBLE_MAX = NUM_MOVIE_MAX * K
    DATAMEM_ADDR_WIDTH = int(
        log(NUM_VISIBLE_MAX + 1) /
        log(2)) + 1  # 9 # it is definitely not dividable, but need to check
    HIDDEN_UNIT_WIDTH = int(
        log(NUM_HIDDEN_MAX + 1) /
        log(2)) + 1  # 7 # it is definitely not dividable, but need to check
    VISIBLE_UNIT_WIDTH = int(log(NUM_VISIBLE_MAX + 1) / log(2)) + 1  # 9
    EDGEMEM_ADDR_WIDTH = int(
        log((NUM_VISIBLE_MAX + 1) * (NUM_HIDDEN_MAX + 1)) / log(2)) + 1  # 16
    POS_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH
    NEG_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH
    PREDICT_RESULT_WIDTH = int(log(NUM_MOVIE_MAX) / log(2)) + 1  # 7
    KWIDTH = int(log(K) / log(2)) + 1  # 3

    #---------------------------
    # Model
    #---------------------------

    rbm = ila.Abstraction('RBM')

    conf_done = rbm.inp('conf_done', 1)
    conf_num_hidden = rbm.inp('conf_num_hidden', 32)
    conf_num_visible = rbm.inp('conf_num_visible', 32)
    conf_num_users = rbm.inp('conf_num_users', 32)
    conf_num_loops = rbm.inp('conf_num_loops', 32)
    conf_num_testusers = rbm.inp('conf_num_testusers', 32)
    conf_num_movies = rbm.inp('conf_num_movies', 32)

    rst = rbm.inp('rst', 1)

    init_done = rbm.reg('init_done', 1)
    done = rbm.reg('done', 1)
    num_hidden = rbm.reg('num_hidden', 16)
    num_visible = rbm.reg('num_visible', 16)
    num_users = rbm.reg('num_users', 16)
    num_loops = rbm.reg('num_loops', 16)
    num_testusers = rbm.reg('num_testusers', 16)
    num_movies = rbm.reg('num_movies', 16)

    # DMA output
    rd_index = rbm.reg('rd_index', 32)
    rd_length = rbm.reg('rd_length', 32)
    rd_request = rbm.reg('rd_request', 1)
    rd_grant = rbm.inp('rd_grant', 1)
    data_in = rbm.inp('data_in', 32)
    # rd_cnt    = rbm.reg('rd_cnt', 16)  # i ureg  #585

    # DMA input
    wr_grant = rbm.inp('wr_grant', 1)
    wr_request = rbm.reg('wr_request', 1)
    wr_index = rbm.reg('wr_index', 32)
    wr_length = rbm.reg('wr_length', 32)
    data_out = rbm.reg('data_out', 32)
    # wr_cnt = rbm.reg('wr_cnt', 16) : u reg

    data = rbm.mem('data', DATAMEM_ADDR_WIDTH, 8)
    rbm.mem('predict_result', PREDICT_RESULT_WIDTH, 8)

    #-------------------------------------
    #  Decoding Expressions
    #-------------------------------------
    rstInst = rst == 1
    confDoneInst = (rst == 0) & (init_done == 0) & (conf_done == 1)
    rdGrantInst = (rd_request == 1) & (rd_grant == 1)
    wrGrantInst = (wr_request == 1) & (wr_grant == 1)
    decodeExpr = [rstInst, confDoneInst, rdGrantInst, wrGrantInst]

    #-------------------------------------
    #  AUX Functions
    #-------------------------------------
    def const(v, w):
        return rbm.const(v, w)

    b0 = const(0, 1)
    b1 = const(1, 1)
    h0_8 = const(0, 8)
    h1_8 = const(1, 8)
    h0_4 = const(0, 4)
    h1_4 = const(1, 4)
    h2_4 = const(2, 4)
    h3_4 = const(3, 4)
    h4_4 = const(4, 4)
    h0_16 = const(0, 16)
    h1_16 = const(1, 16)
    h0_32 = const(0, 32)
    h0_64 = const(0, 64)

    #-------------------------------------
    #  Init conditions
    #-------------------------------------

    rbm.set_init('init_done', b0)
    rbm.set_init('done', b0)
    rbm.set_init('num_hidden', h0_16)
    rbm.set_init('num_visible', h0_16)
    rbm.set_init('num_users', h0_16)
    rbm.set_init('num_loops', h0_16)
    rbm.set_init('num_testusers', h0_16)
    rbm.set_init('num_movies', h0_16)

    #-------------------------------------
    #  Config
    #-------------------------------------

    # this means, once configured, unless reset, it cannot be reconfigured
    init_done_nxt = ila.ite(rstInst, b0, ila.ite(confDoneInst, b1, init_done))
    num_hidden_nxt = ila.ite(
        rstInst, h0_16, ila.ite(confDoneInst, conf_num_hidden[15:0],
                                num_hidden))
    num_visible_nxt = ila.ite(
        rstInst, h0_16,
        ila.ite(confDoneInst, conf_num_visible[15:0], num_visible))
    num_users_nxt = ila.ite(
        rstInst, h0_16, ila.ite(confDoneInst, conf_num_users[15:0], num_users))
    num_loops_nxt = ila.ite(
        rstInst, h0_16, ila.ite(confDoneInst, conf_num_loops[15:0], num_loops))
    num_testusers_nxt = ila.ite(
        rstInst, h0_16,
        ila.ite(confDoneInst, conf_num_testusers[15:0], num_testusers))
    num_movies_nxt = ila.ite(
        rstInst, h0_16, ila.ite(confDoneInst, conf_num_movies[15:0],
                                num_movies))

    rbm.set_next('init_done', init_done_nxt)
    rbm.set_next('num_hidden', num_hidden_nxt)
    rbm.set_next('num_visible', num_visible_nxt)
    rbm.set_next('num_users', num_users_nxt)
    rbm.set_next('num_loops', num_loops_nxt)
    rbm.set_next('num_testusers', num_testusers_nxt)
    rbm.set_next('num_movies', num_movies_nxt)

    # INST-level w/r complete
    rbm_rd_complete = rbm.reg('rd_complete', 1)
    rbm_wr_complete = rbm.reg('wr_complete', 1)
    rbm.set_init('rd_complete', b0)
    rbm.set_init('wr_complete', b0)

    #------------------------------------
    #  Compute UABS
    #------------------------------------

    uabs = rbm.add_microabstraction('compute', (init_done == 1) & (done == 0))
    index = uabs.reg('index', 16)
    loop_count = uabs.reg('loop_count', 16)
    pc = uabs.reg('upc', 4)
    edges_mem = uabs.mem('edges', EDGEMEM_ADDR_WIDTH, 8)

    nlp = uabs.getreg('num_loops')
    nm = ila.zero_extend(uabs.getreg('num_movies'), 32)
    nu = uabs.getreg('num_users')
    ntu = uabs.getreg('num_testusers')
    out_rd_request = uabs.getreg('rd_request')
    out_rd_complete = uabs.getreg('rd_complete')
    out_rd_length = uabs.getreg('rd_length')
    out_rd_index = uabs.getreg('rd_index')

    train_input_done = uabs.reg('train_input_done', 1)
    predict_input_done = uabs.reg('predict_input_done', 1)

    uabs.set_init('upc', const(0, 4))
    uabs.set_init('index', h0_16)
    uabs.set_init('loop_count', h0_16)
    uabs.set_init('train_input_done', b0)
    uabs.set_init('predict_input_done', b0)
    uabs.set_init('rd_complete', b0)

    ###  computation micro_instructions

    StartRead = (pc == 0)
    WaitReadComplete = (pc == 1) & (out_rd_complete == 0)
    DecideTrainOrPredict = (pc == 1) & (out_rd_complete == 1)
    StartTrain = (pc == 2) & (train_input_done == 1)
    StartPredict = (pc == 2) & (predict_input_done == 1)
    Finish = (pc == 3)

    StartReadState = const(0, 4)
    WaitReadCompleteState = const(1, 4)
    StartTrainOrPredict = const(2, 4)
    FinishState = const(3, 4)

    decodeExpr = [
        StartRead, WaitReadComplete, DecideTrainOrPredict, StartTrain,
        StartPredict, Finish
    ]

    out_rd_request_nxt = ila.ite(StartRead, b1, out_rd_request)
    out_rd_length_nxt = ila.ite(StartRead, 5 * nm, out_rd_length)
    out_rd_index_nxt = ila.ite(StartRead, ila.zero_extend(index, 32),
                               out_rd_index)
    out_rd_complete_nxt = ila.ite(
        StartRead, b0, ila.ite(DecideTrainOrPredict, b0, out_rd_complete))

    train_input_done_nxt = ila.ite(DecideTrainOrPredict,
                                   ila.ite(loop_count < nlp, b1, b0),
                                   train_input_done)
    predict_input_done_nxt = ila.ite(DecideTrainOrPredict,
                                     ila.ite(loop_count == nlp, b1, b0),
                                     predict_input_done)

    pc_nxt = ila.ite(
        StartRead,
        WaitReadCompleteState,
        ila.ite(
            WaitReadComplete,
            pc,
            ila.ite(
                DecideTrainOrPredict,
                StartTrainOrPredict,
                ila.ite(
                    StartTrain,
                    StartTrainOrPredict,  # StartReadState, # actually should be updated by u2inst 
                    ila.ite(
                        StartPredict,
                        StartTrainOrPredict,  # StartReadState, # actually should be updated by u2inst 
                        ila.ite(
                            Finish,
                            FinishState,
                            pc  # should never happen!
                        ))))))

    # should be updated by u2inst
    index_nxt_dummy = ila.ite(
        StartTrain | StartPredict,
        ila.ite(
            (index == nu - 1) & (loop_count != nlp),
            h0_16,
            ila.ite(
                (index == ntu - 1) & (loop_count == nlp),
                index,  # And it is not correct
                index + 1)),
        index)
    # not in use
    loop_count_nxt_dummy = ila.ite(
        StartTrain | StartPredict,
        ila.ite((index == nu - 1) & (loop_count != nlp), loop_count + 1,
                loop_count), loop_count)

    uabs.set_next('rd_request', out_rd_request_nxt)
    uabs.set_next('rd_length', out_rd_length_nxt)
    uabs.set_next('rd_index', out_rd_index_nxt)
    uabs.set_next('rd_complete', out_rd_complete_nxt)
    uabs.set_next('train_input_done', train_input_done_nxt)
    uabs.set_next('predict_input_done', predict_input_done_nxt)
    uabs.set_next('upc', pc_nxt)
    uabs.set_next('index', index)
    uabs.set_next('loop_count', loop_count)
    # this has to be updated by micro_inst
    # read_request is turned off by loaduabs
    # predict_input_done, train_input_done is turned off by uabs_train/predict

    #------------------------------------
    #  Load UABS
    #------------------------------------
    # RBM interface
    # high-level interface
    rd_granted = rbm.reg(
        'rd_granted', 1
    )  # this is only used for maintaining the validity of load UABS, no other should use
    data_nxt = ila.ite(rdGrantInst,
                       ila.store(data, const(0, DATAMEM_ADDR_WIDTH),
                                 data_in[7:0]), data)  # data #
    rd_granted_nxt = ila.ite(rdGrantInst, b1, rd_granted)
    rbm.set_next('rd_granted', rd_granted_nxt)
    rbm.set_next('data', data_nxt)

    # one change is to move these into lower abstraction
    DMAload = rbm.add_microabstraction(
        'DMAload', (rd_granted == 1))  # this is sub-instruction
    w_cnt = DMAload.reg('i', 16)

    dma_rd_request = DMAload.getreg('rd_request')
    dma_rd_length = DMAload.getreg('rd_length')
    dma_rd_index = DMAload.getreg('rd_index')

    state_update_data = DMAload.getmem('data')
    state_update_rd_request = dma_rd_request
    self_update_rd_granted = DMAload.getreg('rd_granted')

    more_read_in = w_cnt < dma_rd_length[15:0]
    last_cycle = w_cnt == dma_rd_length[15:0]
    DMAload.set_init('i', h1_16)  # h0_16 )
    DMAload.set_next('i', ila.ite(more_read_in, w_cnt + 1, w_cnt))
    DMAload.set_next('rd_request', b0)  # reset to 0 immediately
    DMAload.set_next('rd_granted',
                     ila.ite(more_read_in, self_update_rd_granted, b0))
    DMAload.set_next('rd_complete', ila.ite(more_read_in, b0, b1))
    DMAload.set_next(
        'data',
        ila.ite(
            more_read_in,
            ila.store(state_update_data, w_cnt[DATAMEM_ADDR_WIDTH - 1:0],
                      data_in[7:0]),
            ila.ite(
                last_cycle,
                ila.store(state_update_data,
                          dma_rd_length[DATAMEM_ADDR_WIDTH - 1:0], h1_8),
                state_update_data)))

    #------------------------------------
    #  Train UUABS
    #------------------------------------

    TrainUabs = uabs.add_microabstraction('train', train_input_done == 1)

    sigmoid_func = TrainUabs.fun('sigmoid', 64, [16])  # DATA_sum_, 01_D
    rand_func = TrainUabs.fun('rand', 64, [])  # generate random number
    to_int_exp = TrainUabs.fun('to_int_exp', 32, [16])  #
    divide_func = TrainUabs.fun(
        'divide', 64, [32, 64])  # dp:32_32 / sum_of_pow2 64_64 = 64_1

    hidden_unit = TrainUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1)
    visible_unit = TrainUabs.mem('visible_unit', VISIBLE_UNIT_WIDTH, 1)
    visibleEnergy = TrainUabs.mem('visibleEnergies', KWIDTH, 16)
    pow2 = TrainUabs.mem('pow2', KWIDTH, 32)
    pos = TrainUabs.mem('pos', POS_ADDR_WIDTH, 1)
    #neg          = TrainUabs.mem('neg', NEG_ADDR_WIDTH, 1 ) # not needed

    train_sum = TrainUabs.reg('train_sum', 16)
    train_max = TrainUabs.reg('train_max', 16)
    sumOfpow2 = TrainUabs.reg('sumOfpow2', 64)

    jstate = TrainUabs.reg('jstate', 16)
    inner_loop_pc = TrainUabs.reg('per_v_pc', 4)

    train_pc = TrainUabs.reg('train_upc', 4)  # Re-evaluate
    v_cnt = TrainUabs.reg('train_v_cnt', 16)
    h_cnt = TrainUabs.reg('train_h_cnt', 16)

    train_input = TrainUabs.getmem('data')
    edges_input = TrainUabs.getmem('edges')
    nv = TrainUabs.getreg('num_visible')
    nh = TrainUabs.getreg('num_hidden')
    nu = TrainUabs.getreg('num_users')
    ntu = TrainUabs.getreg('num_testusers')
    nlp = TrainUabs.getreg('num_loops')

    SumEdge = train_pc == 0
    SumEdgeState = const(0, 4)
    SumHidden = train_pc == 1
    SumHiddenState = const(1, 4)
    StorePos = train_pc == 3
    StorePosState = const(3, 4)
    EdgeUpdate = train_pc == 2
    EdgeUpdateState = const(2, 4)

    TrainUabs.decode_exprs = [SumEdge, SumHidden, EdgeUpdate]

    #Begin
    v_cnt_init = const(0, 16)
    h_cnt_init = const(0, 16)
    pc_init = const(0, 4)

    #SumEdge: s0
    edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt
    train_sum_s0_nxt = ila.ite(v_cnt == 0, const(0, 16), train_sum) + ila.ite(
        ila.load(train_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1,
        fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum),
        const(0, 16))
    v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1)
    h_cnt_s0_nxt = ila.ite((v_cnt == nv),
                           ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt)
    #                                                    Here ^^^ is for transiting to next state
    hidden_update_s0_0 = ila.ite(
        ila.appfun(rand_func) < ila.appfun(sigmoid_func, train_sum_s0_nxt), b1,
        b0)
    hidden_update_s0_1 = ila.ite(
        v_cnt == nv,
        ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0],
                  hidden_update_s0_0), hidden_unit)
    hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1),
                                 ila.store(hidden_update_s0_1,
                                           nh[HIDDEN_UNIT_WIDTH - 1:0], b1),
                                 hidden_update_s0_1)
    train_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1),
                              SumHiddenState, SumEdgeState)
    # Just like init
    jstate_s0_nxt = h0_16
    inner_loop_pc_s0_nxt = h0_4

    # add prefix :
    # train_sum_nxt = ila.ite(SumEdge, train_sum_s0_nxt, ila.ite(SumHidden, ... ) )

    # SumHiddenK0-K4 : s1-s5

    # pc:1 per_v_pc : 0     1       2       3

    LastH = h_cnt == nh
    LastJ = jstate == K - 1
    LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX)
    SumHiddenL0 = SumHidden & (inner_loop_pc == 0)
    SumHiddenL1 = SumHidden & (inner_loop_pc == 1)
    SumHiddenL2 = SumHidden & (inner_loop_pc == 2)
    SumHiddenL3 = SumHidden & (inner_loop_pc == 3)

    h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1)
    jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1),
                                  jstate)
    inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc)

    jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1)
    inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc)

    jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt
    inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc)

    jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt
    inner_loop_pc_s1_s5_L3_nxt = ila.ite(
        LastJ,
        ila.ite(LastV, h0_4, h0_4),  # will choose to go back or not
        inner_loop_pc)

    def nextCondition(l0, l1, l2, l3, default):
        return ila.ite(
            SumHiddenL0, l0,
            ila.ite(
                SumHiddenL1, l1,
                ila.ite(SumHiddenL2, l2, ila.ite(SumHiddenL3, l3, default))))

    h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt,
                                    h_cnt)
    v_cnt_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ,
                              ila.ite(LastV, h0_16, v_cnt + K), v_cnt)
    jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt,
                                     jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt,
                                     jstate)
    inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt,
                                            inner_loop_pc_s1_s5_L1_nxt,
                                            inner_loop_pc_s1_s5_L2_nxt,
                                            inner_loop_pc_s1_s5_L3_nxt,
                                            inner_loop_pc)
    train_pc_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ & LastV, StorePosState,
                                 SumHiddenState)

    # L0
    train_sum_s1_s5_L0_nxt = ila.ite(h_cnt == 0, h0_16, train_sum) + ila.ite(
        ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1,
        fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), h0_16)
    _train_max_origin_L0 = ila.ite(
        jstate == 0,
        fpconst(-500, FPsum).ast,
        train_max)  # make sure the first time we are comparing with init sum
    train_max_s1_s5_L0_nxt = ila.ite(
        LastH,
        ila.ite(ila.sgt(train_sum_s1_s5_L0_nxt, _train_max_origin_L0),
                train_sum_s1_s5_L0_nxt, _train_max_origin_L0), train_max)
    visibleEnergy_s1_s5_L0_nxt = ila.ite(
        LastH,
        ila.store(visibleEnergy, jstate[KWIDTH - 1:0], train_sum_s1_s5_L0_nxt),
        visibleEnergy)
    # L1
    # sum3: 64_64  ->   dp: 32_32
    _31_sum = fpconst(31, FPsum).ast
    train_max_s1_s5_L1_nxt = ila.ite(jstate == 0, train_max - _31_sum,
                                     train_max)
    _st_val_L1 = ila.load(visibleEnergy,
                          jstate[KWIDTH - 1:0]) - train_max_s1_s5_L1_nxt
    visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0],
                                           _st_val_L1)
    # L2
    _pow2_new_val = ila.appfun(to_int_exp,
                               ila.load(visibleEnergy, jstate[KWIDTH - 1:0]))
    _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3)
    sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64,
                                     sumOfpow2) + _pow2_new_convert
    pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val)
    # L3
    _probs = ila.appfun(divide_func,
                        [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2])
    _RAND = ila.appfun(rand_func)
    _visible_unit_new_val = ila.ite(_probs > _RAND, b1, b0)
    _vu_idx = v_cnt + jstate
    _visible_unit_s1_s5_L3_1 = ila.store(visible_unit,
                                         _vu_idx[VISIBLE_UNIT_WIDTH - 1:0],
                                         _visible_unit_new_val)
    visible_unit_s1_s5_L3_nxt = ila.ite(
        LastJ & LastV,
        ila.store(_visible_unit_s1_s5_L3_1, nv[VISIBLE_UNIT_WIDTH - 1:0], b1),
        _visible_unit_s1_s5_L3_1)
    # when exit visible unit should be made to store 1 at nv

    train_sum_s1_s5_nxt = nextCondition(train_sum_s1_s5_L0_nxt, train_sum,
                                        train_sum, train_sum, train_sum)
    train_max_s1_s5_nxt = nextCondition(train_max_s1_s5_L0_nxt,
                                        train_max_s1_s5_L1_nxt, train_max,
                                        train_max, train_max)
    visible_unit_s1_s5_nxt = nextCondition(visible_unit, visible_unit,
                                           visible_unit,
                                           visible_unit_s1_s5_L3_nxt,
                                           visible_unit)
    visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt,
                                            visibleEnergy_s1_s5_L1_nxt,
                                            visibleEnergy, visibleEnergy,
                                            visibleEnergy)
    sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2,
                                        sumOfpow2_s1_s5_L2_nxt, sumOfpow2,
                                        sumOfpow2)
    pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2)

    # before s6: store pos

    h_cnt_sp_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1)
    v_cnt_sp_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, h0_16, v_cnt + 1),
                           v_cnt)
    _data_load = ila.load(train_input, v_cnt[VISIBLE_UNIT_WIDTH - 1:0])
    _pos_sp_cond = (_data_load != 2)
    _pos_sp_val = ila.ite(_data_load != 0, b1, b0) & ila.load(
        hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0])
    _pos_st_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt
    pos_sp_nxt = ila.store(pos, _pos_st_addr, _pos_sp_val)
    train_pc_sp_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState,
                              StorePosState)

    # update edge : s6

    h_cnt_s6_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1)
    v_cnt_s6_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, v_cnt, v_cnt + 1),
                           v_cnt)

    _pos_ld_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt
    train_pos = ila.load(pos, _pos_ld_addr) != 0
    train_neg = (ila.load(
        hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) != 0) & (ila.load(
            visible_unit, v_cnt[VISIBLE_UNIT_WIDTH - 1:0]) != 0)
    edge_original = ila.load(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt)
    edge_new = ila.ite((train_pos) & (~train_neg),
                       edge_original + fpconst(LEARN_RATE, FPedge).ast,
                       ila.ite((~train_pos) & (train_neg),
                               edge_original - fpconst(LEARN_RATE, FPedge).ast,
                               edge_original))
    edge_s6_nxt = ila.store(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt,
                            edge_new)
    train_pc_s6_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState,
                              EdgeUpdateState)
    # no need to jump back itself, because the flag: train_input_done is turned back to zero
    # don't forget to set back signals in Uabs ()

    train_done = TrainUabs.getreg('train_input_done')
    train_uabs_index = TrainUabs.getreg('index')
    train_uabs_loop_count = TrainUabs.getreg('loop_count')
    train_uabs_upc = TrainUabs.getreg('upc')

    # add prefix s6 !!!
    s6_complete = (h_cnt == nh) & (v_cnt == nv)
    index_nxt_s6_nxt = ila.ite(
        s6_complete,
        ila.ite((train_uabs_index == nu - 1) & (train_uabs_loop_count != nlp),
                h0_16, train_uabs_index + 1), train_uabs_index)

    # assert (train_uabs_index == ntu - 1) & (train_uabs_loop_count == nlp) should never happen

    loop_count_s6_nxt = ila.ite(
        s6_complete & (train_uabs_index == nu - 1) &
        (train_uabs_loop_count != nlp), train_uabs_loop_count + 1,
        train_uabs_loop_count)
    upc_s6_nxt = ila.ite(s6_complete, StartReadState, train_uabs_upc)
    train_input_done_s6_nxt_nxt = ila.ite(s6_complete, b0, train_done)

    # data -> hidden_unit -> visible_unit -> edge
    # data -> edge

    # add
    def TrainNext(e1, e2, e3, default):
        return ila.ite(
            SumEdge, e1,
            ila.ite(SumHidden, e2, ila.ite(EdgeUpdate, e3, default)))

    def TrainNextSP(e1, e2, e3, e4, default):
        return ila.ite(
            SumEdge, e1,
            ila.ite(SumHidden, e2,
                    ila.ite(StorePos, e3, ila.ite(EdgeUpdate, e4, default))))

    def TrainChoice5(name, e1, e2, e3, default):
        return ila.choice(name, e1, e2, e3, default)

    def TrainChoice4(name, e1, e2, default):
        return ila.choice(name, e1, e2, default)

    def TrainChoice3(name, e1, default):
        return ila.choice(name, e1, default)

    TrainUabs.set_init('train_upc', pc_init)
    TrainUabs.set_init('train_v_cnt', v_cnt_init)
    TrainUabs.set_init('train_h_cnt', h_cnt_init)

    TrainUabs.set_next(
        'jstate', TrainNext(jstate_s0_nxt, jstate_s1_s5_nxt, jstate, jstate))
    TrainUabs.set_next(
        'train_sum',
        TrainNext(train_sum_s0_nxt, train_sum_s1_s5_nxt, train_sum, train_sum))
    TrainUabs.set_next(
        'train_v_cnt',
        TrainNextSP(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt_s6_nxt,
                    v_cnt))
    TrainUabs.set_next(
        'train_h_cnt',
        TrainNextSP(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt_sp_nxt, h_cnt_s6_nxt,
                    h_cnt))
    TrainUabs.set_next(
        'train_upc',
        TrainNextSP(train_pc_s0_nxt, train_pc_s1_s5_nxt, train_pc_sp_nxt,
                    train_pc_s6_nxt, train_pc))

    TrainUabs.set_next(
        'train_max',
        TrainNext(train_max, train_max_s1_s5_nxt, train_max, train_max))
    TrainUabs.set_next(
        'hidden_unit',
        TrainNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit))
    TrainUabs.set_next(
        'visible_unit',
        TrainNext(visible_unit, visible_unit_s1_s5_nxt, visible_unit,
                  visible_unit))
    TrainUabs.set_next('edges',
                       TrainNext(edges_mem, edges_mem, edge_s6_nxt, edges_mem))
    TrainUabs.set_next(
        'index',
        TrainNext(train_uabs_index, train_uabs_index, index_nxt_s6_nxt,
                  train_uabs_index))
    TrainUabs.set_next(
        'loop_count',
        TrainNext(train_uabs_loop_count, train_uabs_loop_count,
                  loop_count_s6_nxt, train_uabs_loop_count))
    TrainUabs.set_next(
        'upc',
        TrainNext(train_uabs_upc, train_uabs_upc, upc_s6_nxt, train_uabs_upc))
    TrainUabs.set_next(
        'train_input_done',
        TrainNext(train_done, train_done, train_input_done_s6_nxt_nxt,
                  train_done))
    # newly added
    TrainUabs.set_next(
        'visibleEnergies',
        TrainNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy,
                  visibleEnergy))
    TrainUabs.set_next(
        'sumOfpow2',
        TrainNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2))
    TrainUabs.set_next('pow2', TrainNext(pow2, pow2_s1_s5_nxt, pow2, pow2))
    TrainUabs.set_next('pos', ila.ite(StorePos, pos_sp_nxt, pos))
    TrainUabs.set_next(
        'per_v_pc',
        TrainNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt, inner_loop_pc,
                  inner_loop_pc))

    #------------------------------------
    #  Predict UUABS
    #------------------------------------
    # data -> predict_result

    PredictUabs = uabs.add_microabstraction('predict', predict_input_done == 1)

    sigmoid_func = PredictUabs.fun('sigmoid', 64, [16])  # DATA_sum_, 01_D
    rand_func = PredictUabs.fun('rand', 64, [])  # generate random number
    to_int_exp = PredictUabs.fun('to_int_exp', 32, [16])  #
    round_func = PredictUabs.fun('round', 8, [32])  # 05_D -> u8
    divide_func = PredictUabs.fun(
        'divide', 64, [32, 64])  # dp:32_32 / sum_of_pow2 64_64 = 64_1

    hidden_unit = PredictUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1)
    visibleEnergy = PredictUabs.mem('visibleEnergies', KWIDTH, 16)
    predict_result = PredictUabs.getmem('predict_result')
    predict_sum = PredictUabs.reg('predict_sum', 16)
    predict_max = PredictUabs.reg('predict_max', 16)
    sumOfpow2 = PredictUabs.reg('sumOfpow2', 64)
    pow2 = PredictUabs.mem('pow2', KWIDTH, 32)

    predict_vector = PredictUabs.mem('predict_vector', VISIBLE_UNIT_WIDTH, 1)
    inner_loop_pc = PredictUabs.reg('per_v_pc', 4)

    count = PredictUabs.reg('count', 8)
    jstate = PredictUabs.reg('jstate', 16)
    expectation = PredictUabs.reg('expectation', 32)
    prediction = PredictUabs.reg('prediction', 8)

    predict_pc = PredictUabs.reg('predict_upc', 4)  # Re-evaluate
    v_cnt = PredictUabs.reg('predict_v_cnt', 16)
    h_cnt = PredictUabs.reg('predict_h_cnt', 16)

    predict_input = PredictUabs.getmem('data')
    edges_input = PredictUabs.getmem('edges')
    nv = PredictUabs.getreg('num_visible')
    nh = PredictUabs.getreg('num_hidden')
    nu = PredictUabs.getreg('num_users')
    ntu = PredictUabs.getreg('num_testusers')
    nlp = PredictUabs.getreg('num_loops')

    SumEdge = predict_pc == 0
    SumEdgeState = const(0, 4)
    SumHidden = predict_pc == 1
    SumHiddenState = const(1, 4)
    GenResult = predict_pc == 3
    GenResultState = const(3, 4)
    WaitForWrite = predict_pc == 2
    WaitForWriteState = const(2, 4)

    PredictUabs.decode_exprs = [SumEdge, SumHidden, WaitForWrite]

    #Begin
    v_cnt_init = const(0, 16)
    h_cnt_init = const(0, 16)
    pc_init = const(0, 4)

    #SumEdge: s0
    edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt
    predict_sum_s0_nxt = ila.ite(v_cnt == 0, const(
        0, 16), predict_sum) + ila.ite(
            ila.load(predict_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1,
            fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum),
            const(0, 16))
    v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1)
    h_cnt_s0_nxt = ila.ite((v_cnt == nv),
                           ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt)
    #                                                     Here ^^^ is for transiting to next state

    hidden_update_s0_0 = ila.ite(
        fpconst(0.5, FP01_D).ast < ila.appfun(sigmoid_func,
                                              predict_sum_s0_nxt), b1, b0)
    hidden_update_s0_1 = ila.ite(
        v_cnt == nv,
        ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0],
                  hidden_update_s0_0), hidden_unit)
    hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1),
                                 ila.store(hidden_update_s0_1,
                                           nh[HIDDEN_UNIT_WIDTH - 1:0], b1),
                                 hidden_update_s0_1)
    hidden_update_s0_next = hidden_update_s0_2
    predict_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1),
                                SumHiddenState, SumEdgeState)

    jstate_s0_nxt = h0_16
    count_s0_nxt = ila.const(0, 8)
    inner_loop_pc_s0_nxt = h0_4
    # add prefix :
    # predict_sum_nxt = ila.ite(SumEdge, predict_sum_s0_nxt, ila.ite(SumHidden, ... ) )

    #-----------------------------
    # SumHiddensK0-K4 : s1-s5
    #
    #-----------------------------

    LastH = h_cnt == nh
    LastJ = jstate == K - 1
    LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX)
    SumHiddenL0 = SumHidden & (inner_loop_pc == 0)
    SumHiddenL1 = SumHidden & (inner_loop_pc == 1)
    SumHiddenL2 = SumHidden & (inner_loop_pc == 2)
    SumHiddenL3 = SumHidden & (inner_loop_pc == 3)
    SumHiddenL4 = SumHidden & (inner_loop_pc == 4)

    h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1)
    jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1),
                                  jstate)
    inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc)

    jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1)
    inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc)

    jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt
    inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc)

    jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt
    inner_loop_pc_s1_s5_L3_nxt = ila.ite(LastJ, h4_4, inner_loop_pc)

    jstate_s1_s5_L4_nxt = jstate_s1_s5_L3_nxt
    inner_loop_pc_s1_s5_L4_nxt = ila.ite(
        LastJ,
        ila.ite(LastV, h0_4, h0_4),  # will choose to go back or not
        inner_loop_pc)

    def nextCondition(l0, l1, l2, l3, l4, default):
        return ila.ite(
            SumHiddenL0, l0,
            ila.ite(
                SumHiddenL1, l1,
                ila.ite(
                    SumHiddenL2, l2,
                    ila.ite(SumHiddenL3, l3, ila.ite(SumHiddenL4, l4,
                                                     default)))))

    h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt,
                                    h_cnt, h_cnt)
    v_cnt_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ,
                              ila.ite(LastV, h0_16, v_cnt + K), v_cnt)
    jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt,
                                     jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt,
                                     jstate_s1_s5_L4_nxt, jstate)

    inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt,
                                            inner_loop_pc_s1_s5_L1_nxt,
                                            inner_loop_pc_s1_s5_L2_nxt,
                                            inner_loop_pc_s1_s5_L3_nxt,
                                            inner_loop_pc_s1_s5_L4_nxt,
                                            inner_loop_pc)

    predict_pc_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ & LastV, GenResultState,
                                   SumHiddenState)

    # L0
    predict_sum_s1_s5_L0_nxt = ila.ite(
        h_cnt == 0, h0_16, predict_sum) + ila.ite(
            ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1,
            fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum),
            h0_16)
    _predict_max_origin_L0 = ila.ite(
        jstate == 0,
        fpconst(-500, FPsum).ast,
        predict_max)  # make sure the first time we are comparing with init sum
    predict_max_s1_s5_L0_nxt = ila.ite(
        LastH,
        ila.ite(ila.sgt(predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0),
                predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0), predict_max)
    visibleEnergy_s1_s5_L0_nxt = ila.ite(
        LastH,
        ila.store(visibleEnergy, jstate[KWIDTH - 1:0],
                  predict_sum_s1_s5_L0_nxt), visibleEnergy)
    # L1
    # sum3: 64_64  ->   dp: 32_32
    _31_sum = fpconst(31, FPsum).ast
    predict_max_s1_s5_L1_nxt = ila.ite(jstate == 0, predict_max - _31_sum,
                                       predict_max)
    _st_val_L1 = ila.load(visibleEnergy,
                          jstate[KWIDTH - 1:0]) - predict_max_s1_s5_L1_nxt
    visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0],
                                           _st_val_L1)
    # L2
    _pow2_new_val = ila.appfun(to_int_exp,
                               ila.load(visibleEnergy, jstate[KWIDTH - 1:0]))
    _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3)
    sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64,
                                     sumOfpow2) + _pow2_new_convert
    pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val)
    # L3
    _probs = ila.appfun(divide_func,
                        [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2])
    _mul = fixpoint(_probs, FP01_D) * fixpoint(jstate, FPu16)
    expectation_s1_s5_L3_nxt = ila.ite(jstate == 0, h0_32,
                                       expectation) + _mul.toFormat(FP05_D)
    # L4
    _prediction = ila.zero_extend(ila.appfun(round_func, [expectation]), 16)
    _pv_val = ila.ite(jstate == _prediction, b1, b0)
    _pv_idx = v_cnt + jstate
    _first_store = ila.store(predict_vector, _pv_idx[VISIBLE_UNIT_WIDTH - 1:0],
                             _pv_val)
    predict_vector_s1_s5_L4_nxt = ila.ite(
        SumHiddenL4 & LastV & LastJ,
        ila.store(_first_store, nv[VISIBLE_UNIT_WIDTH - 1:0], b1),
        _first_store)

    predict_sum_s1_s5_nxt = nextCondition(predict_sum_s1_s5_L0_nxt,
                                          predict_sum, predict_sum,
                                          predict_sum, predict_sum,
                                          predict_sum)
    predict_max_s1_s5_nxt = nextCondition(predict_max_s1_s5_L0_nxt,
                                          predict_max_s1_s5_L1_nxt,
                                          predict_max, predict_max,
                                          predict_max, predict_max)
    visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt,
                                            visibleEnergy_s1_s5_L1_nxt,
                                            visibleEnergy, visibleEnergy,
                                            visibleEnergy, visibleEnergy)
    sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2,
                                        sumOfpow2_s1_s5_L2_nxt, sumOfpow2,
                                        sumOfpow2, sumOfpow2)
    pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2,
                                   pow2)
    expectation_s1_s5_nxt = ila.ite(SumHiddenL3, expectation_s1_s5_L3_nxt,
                                    expectation)
    predict_vector_s1_s5_nxt = ila.ite(SumHiddenL4,
                                       predict_vector_s1_s5_L4_nxt,
                                       predict_vector)
    count_s1_s5_nxt = ila.ite(SumHiddenL4 & LastV & LastJ, h0_8, count)

    # before s6: store pos
    LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX)
    LastJ = jstate == K - 1
    v_cnt_sp_nxt = ila.ite(LastV, v_cnt + K, v_cnt + K)
    jstate_sp_nxt = ila.ite(LastJ, h0_16, jstate + 1)

    _prediction_old = ila.ite(jstate == 0, h0_8, prediction)
    _pv_idx = v_cnt + jstate
    _predict_result_sp_val = ila.load(predict_vector,
                                      _pv_idx[VISIBLE_UNIT_WIDTH - 1:0])

    prediction_sp_nxt = ila.ite(_predict_result_sp_val == 1, (jstate + 1)[7:0],
                                _prediction_old)
    count_sp_nxt = ila.ite(LastJ, count + 1, count)
    predict_result_sp_nxt = ila.ite(
        LastJ,
        ila.store(predict_result, count[PREDICT_RESULT_WIDTH - 1:0],
                  prediction), predict_result)
    predict_pc_sp_nxt = ila.ite(LastV & LastJ, WaitForWriteState,
                                GenResultState)

    wr_complete = PredictUabs.getreg('wr_complete')
    wr_req = PredictUabs.getreg('wr_request')
    wr_len = PredictUabs.getreg('wr_length')
    wr_idx = PredictUabs.getreg('wr_index')
    cur_idx = PredictUabs.getreg('index')  # 32

    exitLoop = LastV & LastJ
    wr_request_sp_nxt = ila.ite(exitLoop, b1, wr_req)
    wr_index_sp_nxt = ila.ite(
        exitLoop,
        ila.zero_extend(nm, 32) * ila.zero_extend(cur_idx, 32), wr_idx)
    wr_length_sp_nxt = ila.ite(exitLoop, ila.zero_extend(nm, 32), wr_len)
    wr_complete_sp_nxt = ila.ite(exitLoop, b0, wr_complete)
    # s6:

    #---------------------
    # update edge : s6
    #---------------------

    FinishOneRound = (wr_req == 0) & (wr_complete == 1)

    predict_pc_s6_nxt = ila.ite(FinishOneRound, WaitForWriteState,
                                WaitForWriteState)
    # its value does not matter because it will be terminated by predict_input_done
    # don't forget to set back signals in Uabs ()

    predict_done = PredictUabs.getreg('predict_input_done')
    predict_uabs_index = PredictUabs.getreg('index')
    predict_uabs_loop_count = PredictUabs.getreg('loop_count')
    predict_uabs_upc = PredictUabs.getreg('upc')
    all_done = PredictUabs.getreg('done')

    # add prefix s6 !!!
    index_nxt_s6_nxt = ila.ite(
        FinishOneRound,
        ila.ite(
            (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp),
            predict_uabs_index, predict_uabs_index + 1), predict_uabs_index)

    wr_complete_s6_nxt = ila.ite(FinishOneRound, b0, wr_complete)
    # assert (predict_uabs_index == nu - 1)  & (predict_uabs_loop_count != nlp) should never happen

    #loop_count_s6_nxt = ila.ite( (predict_uabs_index == nu - 1)  & (predict_uabs_loop_count != nlp) , predict_uabs_loop_count + 1, predict_uabs_loop_count )

    upc_s6_nxt = ila.ite(
        FinishOneRound,
        ila.ite(
            (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp),
            FinishState, StartReadState), predict_uabs_upc)
    predict_input_done_s6_nxt_nxt = ila.ite(FinishOneRound, b0, predict_done)

    all_done_s6_nxt = ila.ite(
        FinishOneRound & (predict_uabs_index == ntu - 1) &
        (predict_uabs_loop_count == nlp), b1, b0)

    # data -> hidden_unit -> visible_unit -> edge
    # data -> edge

    # add

    # add
    def predictNext(e1, e2, e3, default):
        return ila.ite(
            SumEdge, e1,
            ila.ite(SumHidden, e2, ila.ite(WaitForWrite, e3, default)))

    def predictNextSp(e1, e2, e3, e4, default):
        return ila.ite(
            SumEdge, e1,
            ila.ite(SumHidden, e2,
                    ila.ite(GenResult, e3, ila.ite(WaitForWrite, e4,
                                                   default))))

    def ite(inst, e, default):
        return ila.ite(inst, e, default)

    PredictUabs.set_init('predict_upc', pc_init)
    PredictUabs.set_init('predict_v_cnt', v_cnt_init)
    PredictUabs.set_init('predict_h_cnt', h_cnt_init)

    PredictUabs.set_next(
        'jstate',
        predictNextSp(jstate_s0_nxt, jstate_s1_s5_nxt, jstate_sp_nxt, jstate,
                      jstate))
    PredictUabs.set_next(
        'predict_sum',
        predictNext(predict_sum_s0_nxt, predict_sum_s1_s5_nxt, predict_sum,
                    predict_sum))
    PredictUabs.set_next(
        'predict_v_cnt',
        predictNextSp(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt,
                      v_cnt))
    PredictUabs.set_next(
        'predict_h_cnt',
        predictNext(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt, h_cnt))
    PredictUabs.set_next(
        'predict_upc',
        predictNextSp(predict_pc_s0_nxt, predict_pc_s1_s5_nxt,
                      predict_pc_sp_nxt, predict_pc_s6_nxt, predict_pc))
    PredictUabs.set_next(
        'predict_max',
        predictNext(predict_max, predict_max_s1_s5_nxt, predict_max,
                    predict_max))
    PredictUabs.set_next(
        'hidden_unit',
        predictNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit))
    PredictUabs.set_next(
        'count',
        predictNextSp(count_s0_nxt, count_s1_s5_nxt, count_sp_nxt, count,
                      count))
    PredictUabs.set_next(
        'per_v_pc',
        predictNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt,
                    inner_loop_pc, inner_loop_pc))

    PredictUabs.set_next(
        'index',
        predictNext(predict_uabs_index, predict_uabs_index, index_nxt_s6_nxt,
                    predict_uabs_index))
    PredictUabs.set_next(
        'upc',
        predictNext(predict_uabs_upc, predict_uabs_upc, upc_s6_nxt,
                    predict_uabs_upc))
    PredictUabs.set_next(
        'predict_input_done',
        predictNext(predict_done, predict_done, predict_input_done_s6_nxt_nxt,
                    predict_done))
    PredictUabs.set_next(
        'done', predictNext(all_done, all_done, all_done_s6_nxt, all_done))

    PredictUabs.set_next(
        'wr_request',
        predictNextSp(wr_req, wr_req, wr_request_sp_nxt, wr_req, wr_req))
    PredictUabs.set_next(
        'wr_length',
        predictNextSp(wr_len, wr_len, wr_length_sp_nxt, wr_len, wr_len))
    PredictUabs.set_next(
        'wr_index',
        predictNextSp(wr_idx, wr_idx, wr_index_sp_nxt, wr_idx, wr_idx))
    PredictUabs.set_next(
        'wr_complete',
        predictNextSp(wr_complete, wr_complete, wr_complete_sp_nxt,
                      wr_complete_s6_nxt, wr_complete))
    # newly added
    PredictUabs.set_next(
        'visibleEnergies',
        predictNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy,
                    visibleEnergy))
    PredictUabs.set_next(
        'sumOfpow2',
        predictNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2))
    PredictUabs.set_next('pow2', predictNext(pow2, pow2_s1_s5_nxt, pow2, pow2))
    PredictUabs.set_next(
        'expectation',
        predictNext(expectation, expectation_s1_s5_nxt, expectation,
                    expectation))
    PredictUabs.set_next(
        'predict_vector',
        predictNext(predict_vector, predict_vector_s1_s5_nxt, predict_vector,
                    predict_vector))
    PredictUabs.set_next('prediction',
                         ite(GenResult, prediction_sp_nxt, prediction))
    PredictUabs.set_next('predict_result',
                         ite(GenResult, predict_result_sp_nxt, predict_result))

    #------------------------------------
    #  Store UABS
    #------------------------------------
    # store is triggered by inst as uabs?

    # wr_grant == 1 is an instruction
    wr_granted = rbm.reg('wr_granted', 1)
    rbm.set_next('wr_granted',
                 ila.ite((wr_request & wr_grant) == 1, b1, wr_granted))
    data_out_1st_set = ila.zero_extend(
        ila.load(predict_result, const(0, PREDICT_RESULT_WIDTH)), 32)
    rbm.set_next(
        'data_out',
        ila.ite((wr_request & wr_grant) == 1, data_out_1st_set, data_out))
    # This is a hard decision,
    # as we set_next, the reaction as we defined will be appear in the next cycle

    StoreUabs = rbm.add_microabstraction('store', wr_granted == 1)
    store_idx = StoreUabs.reg('i', 16)
    nm = StoreUabs.getreg('num_movies')
    wr_granted = StoreUabs.getreg('wr_granted')
    wr_request = StoreUabs.getreg('wr_request')
    wr_complete = StoreUabs.getreg('wr_complete')
    predict_result = StoreUabs.getmem('predict_result')

    StoreUabs.set_init('i', h1_16)
    StoreUabs.set_next('i', ila.ite(store_idx < nm, store_idx + 1, store_idx))
    StoreUabs.set_next('wr_granted', ila.ite(store_idx < nm, wr_granted, b0))
    StoreUabs.set_next('wr_request', ila.ite(store_idx == 0, b0, wr_request))
    StoreUabs.set_next('wr_complete', ila.ite(store_idx < nm, wr_complete, b1))
    data_out = StoreUabs.getreg('data_out')
    # possibly one cycle earlier
    StoreUabs.set_next(
        'data_out',
        ila.zero_extend(
            ila.load(predict_result, store_idx[PREDICT_RESULT_WIDTH - 1:0]),
            32))

    #---------------------------
    # Add no next
    #
    def keepNC(Abs, name):
        Abs.set_next(name, Abs.getreg(name))

    def keepMemNC(Abs, name):
        Abs.set_next(name, Abs.getmem(name))

    keepNC(rbm, 'done')
    keepNC(rbm, 'wr_request')
    keepNC(rbm, 'wr_index')
    keepNC(rbm, 'wr_length')
    keepNC(rbm, 'rd_index')
    keepNC(rbm, 'rd_length')
    keepNC(rbm, 'rd_request')

    keepMemNC(uabs, 'edges')

    keepNC(rbm, 'rd_complete')
    keepNC(rbm, 'wr_complete')

    return rbm
예제 #16
0
def createAESILA(enable_ps):
    m = ila.Abstraction("aes")
    m.enable_parameterized_synthesis = enable_ps

    # I/O interface: this is where the commands come from.
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)

    # internal arch state.
    state = m.reg('aes_state', 2)
    opaddr = m.reg('aes_addr', 16)
    oplen = m.reg('aes_len', 16)
    ctr = m.reg('aes_ctr', 128)
    key0 = m.reg('aes_key0', 128)

    # for the uinst.
    xram = m.mem('XRAM', 16, 8)
    aes = m.fun('aes', 128, [128, 128, 128])

    # fetch is just looking at the input command.
    m.fetch_expr = ila.concat([cmd, cmdaddr, cmddata
                               ])  # actually, the equivelant instruction
    m.fetch_valid = (cmd == 2)  # when write to some addresses

    # decode
    wrcmds = [(cmd == 2) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff30)]  #
    m.decode_exprs = wrcmds

    um = m.add_microabstraction('aes_compute', state != 0)

    # write commands.
    def mb_reg_wr(name, reg):
        # multibyte register write.
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('aes_addr', opaddr)
    mb_reg_wr('aes_len', oplen)
    mb_reg_wr('aes_ctr', ctr)
    mb_reg_wr('aes_key0', key0)
    # state
    state_next = ila.choice(
        'state_next',
        [state, ila.ite(cmddata == 1, m.const(1, 2), state)])
    m.set_next('aes_state', state_next)
    # xram
    m.set_next('XRAM', xram)

    ################################
    #           Micro-ILA
    ################################

    # read data
    rd_data = um.reg('rd_data', 128)
    enc_data = um.reg('enc_data', 128)
    byte_cnt = um.reg('byte_cnt', 4)
    oped_byte_cnt = um.reg('oped_byte_cnt', 16)
    blk_cnt = um.reg('blk_cnt', 16)
    aes_time = um.reg('aes_time', 5)
    uaes_ctr = um.reg('uaes_ctr', 128)  # change 1

    um.set_init('byte_cnt', um.const(0, 4))
    um.set_init('blk_cnt', um.const(0, 16))
    um.set_init('oped_byte_cnt', um.const(0, 16))
    um.set_init('aes_time', um.const(0, 5))
    um.set_init('uaes_ctr', m.getreg('aes_ctr'))  # change 2
    uxram = m.getmem('XRAM')

    byte_cnt_16b = ila.zero_extend(byte_cnt, 16)

    um.fetch_expr = state
    um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16)
                       for i in [1, 2, 3]]  # Decode Expressionss
    # byte_cnt
    byte_cnt_inc = byte_cnt + 1
    byte_cnt_nxt = ila.choice(
        'byte_cnt_nxt', [m.const(0, 4), byte_cnt_inc, byte_cnt])  # 0, +1, NC
    um.set_next('byte_cnt', byte_cnt_nxt)

    # oped_byte_cnt
    oped_byte_cnt_inc = oped_byte_cnt + 16
    oped_byte_cnt_nxt = ila.choice(
        'oped_byte_cnt_nxt',
        [m.const(0, 16), oped_byte_cnt_inc, oped_byte_cnt])  # 0, +16, NC
    um.set_next('oped_byte_cnt', oped_byte_cnt_nxt)

    # blk_cnt
    blk_cnt_inc = blk_cnt + 16
    more_blocks = (oped_byte_cnt_inc < oplen)
    blk_cnt_nxt = ila.choice('blk_cnt_nxt', [
        m.const(0, 16), blk_cnt, blk_cnt_inc,
        ila.ite(more_blocks, blk_cnt_inc, blk_cnt)
    ])
    um.set_next('blk_cnt', blk_cnt_nxt)

    aes_time_inc = aes_time + 1
    aes_time_ov = aes_time == m.const(31, 5)
    aes_time_nxt_c = ila.ite(aes_time_ov, aes_time, aes_time_inc)
    aes_time_nxt = ila.choice(
        "aes_timeC", m.const(0, 5), aes_time_nxt_c,
        ila.ite(more_blocks, m.const(0, 5), aes_time_nxt_c))
    aes_time_enough = aes_time > m.const(10, 5)
    um.set_next('aes_time', aes_time_nxt)

    # change 3
    um.set_next(
        'uaes_ctr',
        ila.choice(
            'uaes_ctr_nxt', uaes_ctr,
            ila.ite(
                more_blocks, uaes_ctr +
                ila.inrange('addvalue', um.const(1, 128), um.const(128, 128)),
                uaes_ctr), ctr))

    # ustate
    ustate = um.getreg('aes_state')
    ustate_nxt = ila.choice('ustate_next', [
        m.const(0, 2),
        m.const(1, 2),
        m.const(2, 2),
        m.const(3, 2), ustate,
        ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)),
        ila.ite(aes_time_enough, m.const(3, 2), m.const(2, 2))
    ])  # change 4
    um.set_next('aes_state', ustate_nxt)

    # rd_data
    rdblock = ila.writechunk("rd_data_chunk", rd_data,
                             ila.load(uxram,
                                      opaddr + blk_cnt + byte_cnt_16b))  #
    rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data)
    um.set_next('rd_data', rd_data_nxt)

    # enc_data
    aes_key = key0
    aes_ctr = ila.choice('ctr', uaes_ctr, ctr + ila.zero_extend(blk_cnt, 128))
    aes_enc_data = ila.appfun(aes, [aes_ctr, aes_key, rd_data])
    enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data)
    um.set_next('enc_data', enc_data_nxt)
    #print um.get_next('enc_data')

    # xram write
    xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8)
    xram_w_addr = opaddr + blk_cnt + byte_cnt_16b
    xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data)
    xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes)
    um.set_next('XRAM', xram_nxt)

    return m, um