Exemple #1
0
 def instructionFetch(self):
     self.inst = ila.load(
         self.mem,
         ila.zero_extend(self.pc[31:2],
                         instruction_format.MEM_ADDRESS_BITS))
     self.opcode = self.inst[(instruction_format.OPCODE_BIT_TOP -
                              1):instruction_format.OPCODE_BIT_BOT]
     self.fetch_expr = self.inst
     self.dest = self.inst[(instruction_format.DST_BIT_TOP -
                            1):instruction_format.DST_BIT_BOT]
     self.src1 = self.inst[(instruction_format.SRC0_BIT_TOP -
                            1):instruction_format.SRC0_BIT_BOT]
     self.src2 = self.inst[(instruction_format.SRC1_BIT_TOP -
                            1):instruction_format.SRC1_BIT_BOT]
     self.src3 = self.inst[(instruction_format.SRC2_BIT_TOP -
                            1):instruction_format.SRC2_BIT_BOT]
     self.baseImm = ila.sign_extend(
         self.inst[(instruction_format.BASE_BIT_TOP -
                    1):instruction_format.BASE_BIT_BOT],
         instruction_format.PC_BITS)
     self.branchPred = self.dest
     self.predReg = self.indexIntoReg(self.branchPred)
     self.branchImm = ila.zero_extend(
         self.inst[(instruction_format.IMM_BIT_TOP -
                    1):instruction_format.IMM_BIT_BOT],
         instruction_format.PC_BITS)
     self.sreg1 = self.indexIntoReg(self.src1)
     self.sreg2 = self.indexIntoReg(self.src2)
     self.sreg3 = self.indexIntoReg(self.src3)
     self.sregdest = self.indexIntoReg(self.dest)
Exemple #2
0
    def instructionFetch(self):
    	self.instruction = self.model.load(self.mem, ila.zero_extend(self.pc, MEMORY_ADDRESS_BITS))			#TODO: How to fetch the instruction in GPU?
    	self.opcode_SOPP = self.instruction[22:16]
    	self.opcode_SOP2 = self.instruction[30:23]
    	self.opcode_VOP2 = self.instruction[30:25]
    	self.opcode_SMRD = self.instruction[26:22]
        self.opcode_SOP1 = self.instruction[15:8]
        self.opcode_SOPK = self.instruction[27:23]
    	self.SOPPIdentifier = self.instruction[31:23]
    	self.SOP2Identifier = self.instruction[31]
    	self.VOP2Identifier = self.instruction[31]
    	self.SMRDIdentifier = self.instruction[31:27]
        self.VOP3Identifier = self.instruction[31:26]
        self.SOPKIdentifier = self.instruction[31:28]
        self.isSOPK = (self.SOPKIdentifier == 0b1011)
    	self.isSOPP = (self.SOPPIdentifier == 0b101111111)
    	self.isSOP2 = (self.SOP2Identifier == 0b1)
    	self.isVOP2 = (self.VOP2Identifier == 0b0)
    	self.isSMRD = (self.SMRDIdentifier == 0b11000)
        self.isVOP3 = (self.VOP3Identifier == 0b110100)
    	self.sdstSOP2 = self.instruction[22:16]
    	self.ssrc1 = self.instruction[15:8]
    	self.ssrc0 = self.instruction[7:0]
    	self.vdst = self.instruction[24:17]
    	self.vsrc1 = self.instruction[16:9]
    	self.vsrc0 = self.instruction[8:0]		#why whitepaper says its offset?
    	self.sdstSMRD = self.instruction[21:15]
    	self.sbase = self.instruction[14:9]
    	self.imm = self.instruction[8]
    	self.simm = self.instruction[15:0]

        self.extend_instruction = self.model.load(self.mem, ila.zero_extend(self.pc + 1, MEMORY_ADDRESS_BITS))
Exemple #3
0
 def writeBit(self, bitaddr, bitval):
     # FIXME
     msb1 = bitaddr[7:7] == 1
     byteaddr = ila.ite(msb1,
                        ila.concat(bitaddr[7:3], self.model.const(0, 3)),
                        ila.zero_extend(bitaddr[7:3], 8) + 32)
     byte = self.readDirect(byteaddr)
     bitindex = ila.zero_extend(bitaddr[2:0], 8)
     mask1 = ~(self.model.const(1, 8) << bitindex)
     mask2 = ila.zero_extend(bitval, 8) << bitindex
     byte_p = (mask1 & byte) | mask2
     return self.writeDirect(byteaddr, byte_p)
Exemple #4
0
 def instructionFetch(self):
     self.inst_a = ila.load(
         self.mem,
         ila.zero_extend(self.pc_a[31:2],
                         instruction_format.MEM_ADDRESS_BITS))
     self.inst_b = ila.load(
         self.mem,
         ila.zero_extend(self.pc_b[31:2],
                         instruction_format.MEM_ADDRESS_BITS))
     self.opcode_a = self.inst_a[(instruction_format.OPCODE_BIT_TOP -
                                  1):instruction_format.OPCODE_BIT_BOT]
     self.opcode_b = self.inst_b[(instruction_format.OPCODE_BIT_TOP -
                                  1):instruction_format.OPCODE_BIT_BOT]
Exemple #5
0
 def instructionFetch(self):
     self.inst = ila.load(self.mem,
                          ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS))
     self.opcode = self.inst[31:22]
     self.fetch_expr = self.inst
     self.dest = self.inst[21:17]
     self.src1 = self.inst[16:12]
     self.src2 = self.inst[11:7]
     self.src3 = self.inst[6:2]
     self.branchPC = ila.zero_extend(self.inst[21:0], PC_BITS)
     self.sreg1 = self.indexIntoReg(self.src1)
     self.sreg2 = self.indexIntoReg(self.src2)
     self.sreg3 = self.indexIntoReg(self.src3)
     self.sregdest = self.indexIntoReg(self.dest)
Exemple #6
0
 def auxMull_i(self, dataA, dataB):				
 	dataAIsNeg = dataA[31]
 	dataBIsNeg = dataB[31]
 	#First calculate whether the result is positive/negative
 	resultIsNeg = dataAIsNeg ^ dataBIsNeg
 	absDataA = ila.ite(dataAIsNeg, (~dataA) + self.model.const(0b1, SCALAR_REG_BITS), dataA)
 	absDataB = ila.ite(dataBIsNeg, (~dataB) + self.model.const(0b1, SCALAR_REG_BITS), dataB)
 	#Zero-extend the data to multiply
     absDataADoubleLength = ila.zero_extend(absDataA, 2 * SCALAR_REG_BITS)
     absDataBDoubleLength = ila.zero_extend(absDataB, 2 * SCALAR_REG_BITS)
 	absResultDoubleLength = absDataADoubleLength * absDataBDoubleLength
 	#Adjust the pos/neg of the result
 	resultDoubleLength = ila.ite(resultIsNeg, (~absResultDoubleLength) + 1, absResultDoubleLength)
 	mulResult = resultDoubleLength[SCALAR_REG_BITS - 1:0]
 	return mulResult
Exemple #7
0
 def instructionFetch(self):
     self.inst = ila.load(self.mem,
                          ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS))
     self.opcode = self.inst[(REG_BITS - 1):OPCODE_BIT]
     self.fetch_expr = self.inst
     self.dest = self.inst[(OPCODE_BIT - 1):DST_BIT]
     self.src1 = self.inst[(DST_BIT - 1):SRC0_BIT]
     self.src2 = self.inst[(SRC0_BIT - 1):SRC1_BIT]
     self.src3 = self.inst[(SRC1_BIT - 1):SRC2_BIT]
     self.baseImm = ila.sign_extend(self.inst[(BASE_BIT - 1):0], PC_BITS)
     self.branchPred = self.dest
     self.predReg = self.indexIntoReg(self.branchPred)
     self.branchImm = ila.zero_extend(self.inst[(DST_BIT - 1):BASE_BIT],
                                      PC_BITS)
     self.sreg1 = self.indexIntoReg(self.src1)
     self.sreg2 = self.indexIntoReg(self.src2)
     self.sreg3 = self.indexIntoReg(self.src3)
     self.sregdest = self.indexIntoReg(self.dest)
Exemple #8
0
 def readBit(self, bitaddr):
     msb1 = bitaddr[7:7] == 1
     byteaddr = ila.ite(msb1,
                        ila.concat(bitaddr[7:3], self.model.const(0, 3)),
                        ila.zero_extend(bitaddr[7:3], 8) + 32)
     bitindex = bitaddr[2:0]
     byte = self.readDirect(byteaddr)
     bit = byte[bitindex]
     return bit
Exemple #9
0
 def instructionFetch(self):
     self.instruction = ila.load(
         self.mem, ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS))
     self.isBranch = (self.instruction[31:28] == self.model.const(
         0b1111, 4))
     self.branchOP = self.instruction[27:25]
     self.branchOffsetA = self.instruction[24:5]
     self.branchSrc = self.instruction[4:0]
     self.branchOffsetB = self.instruction[24:0]
     self.isRegReg = (self.instruction[31:29] == self.model.const(0b110, 3))
     self.rrType = self.instruction[28:26]
     self.rrOpcode = self.instruction[25:20]
     self.rrSrc2 = self.instruction[19:15]
     self.rrMask = self.instruction[14:10]
     self.rrDest = self.instruction[9:5]
     self.rrSrc1 = self.instruction[4:0]
     self.isImmediate = (self.instruction[31] == self.model.const(0b0, 1))
     self.immType = self.instruction[30:29]
     self.immOpcode = self.instruction[28:24]
     self.immA = ila.zero_extend(self.instruction[23:15], SCALAR_REG_BITS)
     self.immB = ila.zero_extend(self.instruction[23:10], SCALAR_REG_BITS)
     self.immCup = self.instruction[23:10]
     self.immClow = self.instruction[4:0]
     self.immDest = self.instruction[9:5]
     self.immMask = self.instruction[14:10]
     self.imm = ila.ite(
         self.immType[1] == self.model.const(0b0, 1),
         ila.zero_extend(self.immB, SCALAR_REG_BITS),
         ila.ite(
             self.immType == self.model.const(0b10, 2),
             ila.zero_extend(ila.concat(self.immCup, self.immClow),
                             SCALAR_REG_BITS),
             ila.ite(self.immType == self.model.const(0b11, 2),
                     ila.zero_extend(self.immA, SCALAR_REG_BITS),
                     ila.zero_extend(self.immA, SCALAR_REG_BITS))))
     self.isMem = (self.instruction[31:30] == self.model.const(0b10, 2))
     self.isLoad = self.instruction[29]
     self.memOpcode = self.instruction[28:25]
     self.memOffSetA = self.instruction[24:15]
     self.memOffSetB = self.instruction[24:10]
     self.memMask = self.instruction[14:10]
     self.memDest = self.instruction[9:5]
     self.memSrc = self.instruction[9:5]
     self.memPtr = self.instruction[4:0]
     self.memOffSet = ila.ite(
         self.memOpcode == self.model.const(0b1000, 4),
         ila.sign_extend(self.memOffSetA, SCALAR_REG_BITS),
         ila.ite(self.memOpcode == self.model.const(0b1110, 4),
                 ila.sign_extend(self.memOffSetA, SCALAR_REG_BITS),
                 ila.sign_extend(self.memOffSetB, SCALAR_REG_BITS)))
     self.isMask = (
         ((self.rrType == self.model.const(0b010, 3)) |
          (self.rrType == self.model.const(0b101, 3))) & self.isRegReg
     )  #need rewrite
     self.dest = self.instruction[9:5]
Exemple #10
0
    def get_reg_choices(reg):
        rs1_val = rm.indexIntoGPR(rm.rs1)
        rs2_val = rm.indexIntoGPR(rm.rs2)
        rd_val = rm.indexIntoGPR(rm.rd)
        rs_val = ila.choice('rs_sel', rs1_val, rs2_val)
        shamt = ila.choice('shift_amout', rs2_val[4:0], rm.inst[24:20])
        rs2_comb = ila.choice('rs2_or_immed', rs2_val,
                              ila.zero_extend(rm.immI, 32),
                              ila.sign_extend(rm.immI, 32))

        addr = rs1_val + rm.immI
        lw_val = ila.load(rm.mem, zext(addr[31:2]))
        load_val = getSlice(lw_val, addr[1:0])
        #load_dw   = ila.loadblk(rm.mem, zext(addr[31:2]), 2 )

        return ila.choice(
            "x%d_next" % reg,
            [
                rm.generalRegList[
                    reg],  # Remain the Same regardless of RD (i.e. S/SB instructions)
                ila.ite(
                    rm.rd == reg,  # Is this the destination register?
                    ila.choice(
                        "x%d" % reg,
                        [
                            rs1_val + rs2_comb,  # RS1 + RS2
                            rs1_val - rs2_comb,  # RS1 - RS2
                            rs1_val & rs2_comb,  # AND
                            rs1_val | rs2_comb,  # OR
                            rs1_val ^ rs2_comb,  # XOR
                            ila.ite(
                                ila.slt(rs1_val, rs2_comb),  # SLT
                                bv(1),
                                bv(0)),
                            ila.ite(ila.slt(rs1_val, rs2_comb), bv(0), bv(1)),
                            ila.ite(rs1_val < rs2_comb, bv(1), bv(0)),
                            rs1_val << zext(shamt),  # sll
                            rs1_val >> zext(shamt),  # srl
                            ila.ashr(rs1_val, zext(shamt)),  # sra
                            rm.immU,  # LUI
                            rm.immU + rm.pc,  # AUIPC
                            rm.pc + bv(4),  # JAL/JALR
                            load_val
                            #load_dw
                        ]),
                    rm.generalRegList[reg])  # Remain the same
            ])
Exemple #11
0
 def InstFetch(self):
     #self.inst = self.model.inp('inst',32)
     #self.fetch_expr = self.inst
     inst = ila.load(self.mem, ila.zero_extend(self.pc[31:2], 32)) #ila.zero_extend(self.pc[31:2], 32))
     self.inst = inst
     self.fetch_expr = self.inst
     self.opcode = self.inst[6:0]
     self.rd     = self.inst[11:7]
     self.rs1    = self.inst[19:15]
     self.rs2    = self.inst[24:20]
     self.funct3 = self.inst[14:12]
     self.funct7 = self.inst[31:25]
     self.funct12= self.inst[31:20]   
     self.immI   = ila.sign_extend( inst[31:20], XLEN)
     self.immS   = ila.sign_extend( ila.concat( [inst[31:25], inst[11:7]] ), XLEN )
     self.immB   = ila.sign_extend( ila.concat( [inst[31],inst[7], inst[30:25], inst[11:8], const(0,1) ] ) , XLEN ) 
     self.immU   = ila.concat( [inst[31:12],const(0,12)] )
     self.immJ   = ila.sign_extend( ila.concat( [inst[31], inst[19:12], inst[20], inst[30:21], const(0,1) ] ) , XLEN)
     self.csr_index = self.inst[31:20]
Exemple #12
0
    def sreg_nxt(self, regNo):
        sreg1 = self.indexToSGPR(self.rrSrc1)
        sreg2 = self.indexToSGPR(self.rrSrc2)
        #load instruction
        addr = self.indexToSGPR(self.memPtr) + ila.sign_extend(
            self.memOffSet, SCALAR_REG_BITS)
        load_val = ila.load(self.mem,
                            ila.zero_extend(addr[31:2], MEM_ADDRESS_BITS))

        return ila.ite(self.dest == regNo,\
           ila.ite(self.isRegReg,
            ila.ite(self.rrType == self.model.const(0b000, 3),
             ila.ite(self.rrOpcode == NyEncoding.ADD_I, sreg1 + sreg2,
             ila.ite(self.rrOpcode == NyEncoding.SUB_I, sreg1 - sreg2,
             ila.ite(self.rrOpcode == NyEncoding.AND, sreg1 & sreg2,
             ila.ite(self.rrOpcode == NyEncoding.OR, sreg1 | sreg2,
             ila.ite(self.rrOpcode == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo])))))
             , self.scalar_registers[regNo]),\
           ila.ite(self.isImmediate,
            ila.ite(self.immType == self.model.const(0b00, 2),
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immB,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immB,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immB,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immB,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))),\
            ila.ite(self.immType == self.model.const(0b10, 2),
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immA,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immA,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immA,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immA,
             ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))),\
             self.scalar_registers[regNo])),\
           ila.ite(self.isLoad == self.model.const(0b1, 1), self.scalar_registers[regNo], self.scalar_registers[regNo]))),\
          self.scalar_registers[regNo])
 def instFetch(self):
     self.fetch_list = []
     self.fetch_list.append(self.imem[ila.zero_extend(pc[0][31:2])])
     self.fetch_list.append(self.imem[ila.zero_extend(pc[1][31:2])])
Exemple #14
0
 def perform_instruction(self, index, program_line, pc_target):
     if len(program_line) < 2:
         self.debug_log[self.current_pc] = program_line
         return
     opcode = program_line[0]
     opcode_split = re.split('\.', opcode)
     opcode_name = opcode_split[0]
     if (index == 0):
         print program_line
         print self.current_pc
     if (opcode_name != '@') & (opcode_name != 'bra'):
         self.next_state_finished.append(program_line[1])
         if opcode_name == 'bar':
             op_len = 0
             dest = self.aux_dest(program_line[0], [], index)
             self.current_pc += 4
             return
         elif opcode == 'ld.acq':
             lock_addr_name = program_line[1]
             lock_addr_reg = self.model.getreg(lock_addr_name + '_%d' %
                                               (index))
             lock_addr_type = ptx_declaration[lock_addr_name]
             op_len = int(lock_addr_type[2:])
             if op_len < instruction_format.LONG_REG_BITS:
                 lock_addr_reg = ila.zero_extend(
                     lock_addr_reg, instruction_format.LONG_REG_BITS)
             self.mutex_guard_next_list[index] = ila.ite(
                 self.pc_list[index] == self.current_pc, lock_addr_reg,
                 self.mutex_guard_next_list[index])
             self.mutex_flag_next_list[index] = ila.ite(
                 self.pc_list[index] == self.current_pc,
                 self.model.const(0x1, 1), self.mutex_flag_next_list[index])
             self.current_pc += 4
             return
         elif opcode == 'st.rel':
             self.mutex_flag_next_list[index] = ila.ite(
                 self.pc_list[index] == self.current_pc,
                 self.model.const(0x1, 1), self.mutex_flag_next_list[index])
             self.current_pc += 4
             return
         else:
             if opcode_split[-1] == 'pred':
                 op_len = 1
             elif opcode_split[-1] == 'ca':
                 op_len = int(opcode_split[-2][1:])
             elif opcode_split[-1] == 'cg':
                 op_len = int(opcode_split[-2][1:])
             else:
                 op_len = int(opcode_split[-1][1:])
         src_list = []
         for i in range(2, len(program_line)):
             src_str = program_line[i]
             src_components = re.split('\+', src_str)
             for i in range(len(src_components)):
                 src_component = src_components[i]
                 src_components[i] = self.aux_imm(src_component, index,
                                                  op_len)
             src_sum = src_components[0]
             for i in range(1, len(src_components)):
                 src_sum = src_sum + src_components[0]
             src_list.append(src_sum)
         dest = self.aux_dest(program_line[0], src_list, index)
         if not dest:
             self.debug_log[self.current_pc] = program_line
             self.current_pc += 4
             return
         dest_str = program_line[1]
         if opcode.find('atom') != -1:
             dest_str = program_line[1]
             op_len = instruction_format.LONG_REG_BITS
         if opcode.find('ld') != -1:
             if opcode.find('param') != -1:
                 self.current_pc += 4
                 return
             if opcode.find('v4') != -1:
                 self.current_pc += 4
                 return
             dest_str = program_line[1]
             op_len = instruction_format.LONG_REG_BITS
         dest = self.adjust_dest(index, dest, dest_str, op_len)
         current_next_state = self.next_state_dict[dest_str + '_%d' %
                                                   (index)]
         self.next_state_dict[dest_str + '_%d' % (index)] = ila.ite(
             self.pc_list[index] == self.current_pc, dest,
             current_next_state)
         self.current_pc += 4
         return
     else:
         if (opcode_name == '@'):
             opcode_jmp_dest = program_line[3]
             pred_guard = self.pred_one
             pred_guard_reg = program_line[1]
             if program_line[1][0] == '!':
                 pred_guard = self.pred_zero
                 pred_guard_reg = program_line[1][1:]
             opcode_pred = self.model.getreg(pred_guard_reg + '_%d' %
                                             (index))
             opcode_jmp_target = pc_target[opcode_jmp_dest]
             print opcode
             print opcode_jmp_target
             pc_jmp = ila.ite(
                 opcode_pred == pred_guard,
                 ila.const(opcode_jmp_target, instruction_format.PC_BITS),
                 self.pc_list[index] + 4)
         elif (opcode_name == 'bra'):
             opcode_jmp_dest = program_line[1]
             opcode_jmp_target = pc_target[opcode_jmp_dest]
             print opcode
             print opcode_jmp_target
             pc_jmp = ila.const(opcode_jmp_target,
                                instruction_format.PC_BITS)
         self.pc_next_list[index] = ila.ite(
             self.pc_list[index] == self.current_pc, pc_jmp,
             self.pc_next_list[index])
         self.current_pc += 4
Exemple #15
0
def buildILA():
    #---------------------------
    # define universal constant
    #---------------------------
    K = 5
    NUM_MOVIE_MAX = 100
    NUM_HIDDEN_MAX = 100
    NUM_VISIBLE_MAX = NUM_MOVIE_MAX * K
    DATAMEM_ADDR_WIDTH = int(
        log(NUM_VISIBLE_MAX + 1) /
        log(2)) + 1  # 9 # it is definitely not dividable, but need to check
    HIDDEN_UNIT_WIDTH = int(
        log(NUM_HIDDEN_MAX + 1) /
        log(2)) + 1  # 7 # it is definitely not dividable, but need to check
    VISIBLE_UNIT_WIDTH = int(log(NUM_VISIBLE_MAX + 1) / log(2)) + 1  # 9
    EDGEMEM_ADDR_WIDTH = int(
        log((NUM_VISIBLE_MAX + 1) * (NUM_HIDDEN_MAX + 1)) / log(2)) + 1  # 16
    POS_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH
    NEG_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH
    PREDICT_RESULT_WIDTH = int(log(NUM_MOVIE_MAX) / log(2)) + 1  # 7
    KWIDTH = int(log(K) / log(2)) + 1  # 3

    #---------------------------
    # Model
    #---------------------------

    rbm = ila.Abstraction('RBM')

    conf_done = rbm.inp('conf_done', 1)
    conf_num_hidden = rbm.inp('conf_num_hidden', 32)
    conf_num_visible = rbm.inp('conf_num_visible', 32)
    conf_num_users = rbm.inp('conf_num_users', 32)
    conf_num_loops = rbm.inp('conf_num_loops', 32)
    conf_num_testusers = rbm.inp('conf_num_testusers', 32)
    conf_num_movies = rbm.inp('conf_num_movies', 32)

    rst = rbm.inp('rst', 1)

    init_done = rbm.reg('init_done', 1)
    done = rbm.reg('done', 1)
    num_hidden = rbm.reg('num_hidden', 16)
    num_visible = rbm.reg('num_visible', 16)
    num_users = rbm.reg('num_users', 16)
    num_loops = rbm.reg('num_loops', 16)
    num_testusers = rbm.reg('num_testusers', 16)
    num_movies = rbm.reg('num_movies', 16)

    # DMA output
    rd_index = rbm.reg('rd_index', 32)
    rd_length = rbm.reg('rd_length', 32)
    rd_request = rbm.reg('rd_request', 1)
    rd_grant = rbm.inp('rd_grant', 1)
    data_in = rbm.inp('data_in', 32)
    # rd_cnt    = rbm.reg('rd_cnt', 16)  # i ureg  #585

    # DMA input
    wr_grant = rbm.inp('wr_grant', 1)
    wr_request = rbm.reg('wr_request', 1)
    wr_index = rbm.reg('wr_index', 32)
    wr_length = rbm.reg('wr_length', 32)
    data_out = rbm.reg('data_out', 32)
    # wr_cnt = rbm.reg('wr_cnt', 16) : u reg

    data = rbm.mem('data', DATAMEM_ADDR_WIDTH, 8)
    rbm.mem('predict_result', PREDICT_RESULT_WIDTH, 8)

    #-------------------------------------
    #  Decoding Expressions
    #-------------------------------------
    rstInst = rst == 1
    confDoneInst = (rst == 0) & (init_done == 0) & (conf_done == 1)
    rdGrantInst = (rd_request == 1) & (rd_grant == 1)
    wrGrantInst = (wr_request == 1) & (wr_grant == 1)
    decodeExpr = [rstInst, confDoneInst, rdGrantInst, wrGrantInst]

    #-------------------------------------
    #  AUX Functions
    #-------------------------------------
    def const(v, w):
        return rbm.const(v, w)

    b0 = const(0, 1)
    b1 = const(1, 1)
    h0_8 = const(0, 8)
    h1_8 = const(1, 8)
    h0_4 = const(0, 4)
    h1_4 = const(1, 4)
    h2_4 = const(2, 4)
    h3_4 = const(3, 4)
    h4_4 = const(4, 4)
    h0_16 = const(0, 16)
    h1_16 = const(1, 16)
    h0_32 = const(0, 32)
    h0_64 = const(0, 64)

    #-------------------------------------
    #  Init conditions
    #-------------------------------------

    rbm.set_init('init_done', b0)
    rbm.set_init('done', b0)
    rbm.set_init('num_hidden', h0_16)
    rbm.set_init('num_visible', h0_16)
    rbm.set_init('num_users', h0_16)
    rbm.set_init('num_loops', h0_16)
    rbm.set_init('num_testusers', h0_16)
    rbm.set_init('num_movies', h0_16)

    #-------------------------------------
    #  Config
    #-------------------------------------

    # this means, once configured, unless reset, it cannot be reconfigured
    init_done_nxt = ila.ite(rstInst, b0, ila.ite(confDoneInst, b1, init_done))
    num_hidden_nxt = ila.ite(
        rstInst, h0_16, ila.ite(confDoneInst, conf_num_hidden[15:0],
                                num_hidden))
    num_visible_nxt = ila.ite(
        rstInst, h0_16,
        ila.ite(confDoneInst, conf_num_visible[15:0], num_visible))
    num_users_nxt = ila.ite(
        rstInst, h0_16, ila.ite(confDoneInst, conf_num_users[15:0], num_users))
    num_loops_nxt = ila.ite(
        rstInst, h0_16, ila.ite(confDoneInst, conf_num_loops[15:0], num_loops))
    num_testusers_nxt = ila.ite(
        rstInst, h0_16,
        ila.ite(confDoneInst, conf_num_testusers[15:0], num_testusers))
    num_movies_nxt = ila.ite(
        rstInst, h0_16, ila.ite(confDoneInst, conf_num_movies[15:0],
                                num_movies))

    rbm.set_next('init_done', init_done_nxt)
    rbm.set_next('num_hidden', num_hidden_nxt)
    rbm.set_next('num_visible', num_visible_nxt)
    rbm.set_next('num_users', num_users_nxt)
    rbm.set_next('num_loops', num_loops_nxt)
    rbm.set_next('num_testusers', num_testusers_nxt)
    rbm.set_next('num_movies', num_movies_nxt)

    # INST-level w/r complete
    rbm_rd_complete = rbm.reg('rd_complete', 1)
    rbm_wr_complete = rbm.reg('wr_complete', 1)
    rbm.set_init('rd_complete', b0)
    rbm.set_init('wr_complete', b0)

    #------------------------------------
    #  Compute UABS
    #------------------------------------

    uabs = rbm.add_microabstraction('compute', (init_done == 1) & (done == 0))
    index = uabs.reg('index', 16)
    loop_count = uabs.reg('loop_count', 16)
    pc = uabs.reg('upc', 4)
    edges_mem = uabs.mem('edges', EDGEMEM_ADDR_WIDTH, 8)

    nlp = uabs.getreg('num_loops')
    nm = ila.zero_extend(uabs.getreg('num_movies'), 32)
    nu = uabs.getreg('num_users')
    ntu = uabs.getreg('num_testusers')
    out_rd_request = uabs.getreg('rd_request')
    out_rd_complete = uabs.getreg('rd_complete')
    out_rd_length = uabs.getreg('rd_length')
    out_rd_index = uabs.getreg('rd_index')

    train_input_done = uabs.reg('train_input_done', 1)
    predict_input_done = uabs.reg('predict_input_done', 1)

    uabs.set_init('upc', const(0, 4))
    uabs.set_init('index', h0_16)
    uabs.set_init('loop_count', h0_16)
    uabs.set_init('train_input_done', b0)
    uabs.set_init('predict_input_done', b0)
    uabs.set_init('rd_complete', b0)

    ###  computation micro_instructions

    StartRead = (pc == 0)
    WaitReadComplete = (pc == 1) & (out_rd_complete == 0)
    DecideTrainOrPredict = (pc == 1) & (out_rd_complete == 1)
    StartTrain = (pc == 2) & (train_input_done == 1)
    StartPredict = (pc == 2) & (predict_input_done == 1)
    Finish = (pc == 3)

    StartReadState = const(0, 4)
    WaitReadCompleteState = const(1, 4)
    StartTrainOrPredict = const(2, 4)
    FinishState = const(3, 4)

    decodeExpr = [
        StartRead, WaitReadComplete, DecideTrainOrPredict, StartTrain,
        StartPredict, Finish
    ]

    out_rd_request_nxt = ila.ite(StartRead, b1, out_rd_request)
    out_rd_length_nxt = ila.ite(StartRead, 5 * nm, out_rd_length)
    out_rd_index_nxt = ila.ite(StartRead, ila.zero_extend(index, 32),
                               out_rd_index)
    out_rd_complete_nxt = ila.ite(
        StartRead, b0, ila.ite(DecideTrainOrPredict, b0, out_rd_complete))

    train_input_done_nxt = ila.ite(DecideTrainOrPredict,
                                   ila.ite(loop_count < nlp, b1, b0),
                                   train_input_done)
    predict_input_done_nxt = ila.ite(DecideTrainOrPredict,
                                     ila.ite(loop_count == nlp, b1, b0),
                                     predict_input_done)

    pc_nxt = ila.ite(
        StartRead,
        WaitReadCompleteState,
        ila.ite(
            WaitReadComplete,
            pc,
            ila.ite(
                DecideTrainOrPredict,
                StartTrainOrPredict,
                ila.ite(
                    StartTrain,
                    StartTrainOrPredict,  # StartReadState, # actually should be updated by u2inst 
                    ila.ite(
                        StartPredict,
                        StartTrainOrPredict,  # StartReadState, # actually should be updated by u2inst 
                        ila.ite(
                            Finish,
                            FinishState,
                            pc  # should never happen!
                        ))))))

    # should be updated by u2inst
    index_nxt_dummy = ila.ite(
        StartTrain | StartPredict,
        ila.ite(
            (index == nu - 1) & (loop_count != nlp),
            h0_16,
            ila.ite(
                (index == ntu - 1) & (loop_count == nlp),
                index,  # And it is not correct
                index + 1)),
        index)
    # not in use
    loop_count_nxt_dummy = ila.ite(
        StartTrain | StartPredict,
        ila.ite((index == nu - 1) & (loop_count != nlp), loop_count + 1,
                loop_count), loop_count)

    uabs.set_next('rd_request', out_rd_request_nxt)
    uabs.set_next('rd_length', out_rd_length_nxt)
    uabs.set_next('rd_index', out_rd_index_nxt)
    uabs.set_next('rd_complete', out_rd_complete_nxt)
    uabs.set_next('train_input_done', train_input_done_nxt)
    uabs.set_next('predict_input_done', predict_input_done_nxt)
    uabs.set_next('upc', pc_nxt)
    uabs.set_next('index', index)
    uabs.set_next('loop_count', loop_count)
    # this has to be updated by micro_inst
    # read_request is turned off by loaduabs
    # predict_input_done, train_input_done is turned off by uabs_train/predict

    #------------------------------------
    #  Load UABS
    #------------------------------------
    # RBM interface
    # high-level interface
    rd_granted = rbm.reg(
        'rd_granted', 1
    )  # this is only used for maintaining the validity of load UABS, no other should use
    data_nxt = ila.ite(rdGrantInst,
                       ila.store(data, const(0, DATAMEM_ADDR_WIDTH),
                                 data_in[7:0]), data)  # data #
    rd_granted_nxt = ila.ite(rdGrantInst, b1, rd_granted)
    rbm.set_next('rd_granted', rd_granted_nxt)
    rbm.set_next('data', data_nxt)

    # one change is to move these into lower abstraction
    DMAload = rbm.add_microabstraction(
        'DMAload', (rd_granted == 1))  # this is sub-instruction
    w_cnt = DMAload.reg('i', 16)

    dma_rd_request = DMAload.getreg('rd_request')
    dma_rd_length = DMAload.getreg('rd_length')
    dma_rd_index = DMAload.getreg('rd_index')

    state_update_data = DMAload.getmem('data')
    state_update_rd_request = dma_rd_request
    self_update_rd_granted = DMAload.getreg('rd_granted')

    more_read_in = w_cnt < dma_rd_length[15:0]
    last_cycle = w_cnt == dma_rd_length[15:0]
    DMAload.set_init('i', h1_16)  # h0_16 )
    DMAload.set_next('i', ila.ite(more_read_in, w_cnt + 1, w_cnt))
    DMAload.set_next('rd_request', b0)  # reset to 0 immediately
    DMAload.set_next('rd_granted',
                     ila.ite(more_read_in, self_update_rd_granted, b0))
    DMAload.set_next('rd_complete', ila.ite(more_read_in, b0, b1))
    DMAload.set_next(
        'data',
        ila.ite(
            more_read_in,
            ila.store(state_update_data, w_cnt[DATAMEM_ADDR_WIDTH - 1:0],
                      data_in[7:0]),
            ila.ite(
                last_cycle,
                ila.store(state_update_data,
                          dma_rd_length[DATAMEM_ADDR_WIDTH - 1:0], h1_8),
                state_update_data)))

    #------------------------------------
    #  Train UUABS
    #------------------------------------

    TrainUabs = uabs.add_microabstraction('train', train_input_done == 1)

    sigmoid_func = TrainUabs.fun('sigmoid', 64, [16])  # DATA_sum_, 01_D
    rand_func = TrainUabs.fun('rand', 64, [])  # generate random number
    to_int_exp = TrainUabs.fun('to_int_exp', 32, [16])  #
    divide_func = TrainUabs.fun(
        'divide', 64, [32, 64])  # dp:32_32 / sum_of_pow2 64_64 = 64_1

    hidden_unit = TrainUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1)
    visible_unit = TrainUabs.mem('visible_unit', VISIBLE_UNIT_WIDTH, 1)
    visibleEnergy = TrainUabs.mem('visibleEnergies', KWIDTH, 16)
    pow2 = TrainUabs.mem('pow2', KWIDTH, 32)
    pos = TrainUabs.mem('pos', POS_ADDR_WIDTH, 1)
    #neg          = TrainUabs.mem('neg', NEG_ADDR_WIDTH, 1 ) # not needed

    train_sum = TrainUabs.reg('train_sum', 16)
    train_max = TrainUabs.reg('train_max', 16)
    sumOfpow2 = TrainUabs.reg('sumOfpow2', 64)

    jstate = TrainUabs.reg('jstate', 16)
    inner_loop_pc = TrainUabs.reg('per_v_pc', 4)

    train_pc = TrainUabs.reg('train_upc', 4)  # Re-evaluate
    v_cnt = TrainUabs.reg('train_v_cnt', 16)
    h_cnt = TrainUabs.reg('train_h_cnt', 16)

    train_input = TrainUabs.getmem('data')
    edges_input = TrainUabs.getmem('edges')
    nv = TrainUabs.getreg('num_visible')
    nh = TrainUabs.getreg('num_hidden')
    nu = TrainUabs.getreg('num_users')
    ntu = TrainUabs.getreg('num_testusers')
    nlp = TrainUabs.getreg('num_loops')

    SumEdge = train_pc == 0
    SumEdgeState = const(0, 4)
    SumHidden = train_pc == 1
    SumHiddenState = const(1, 4)
    StorePos = train_pc == 3
    StorePosState = const(3, 4)
    EdgeUpdate = train_pc == 2
    EdgeUpdateState = const(2, 4)

    TrainUabs.decode_exprs = [SumEdge, SumHidden, EdgeUpdate]

    #Begin
    v_cnt_init = const(0, 16)
    h_cnt_init = const(0, 16)
    pc_init = const(0, 4)

    #SumEdge: s0
    edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt
    train_sum_s0_nxt = ila.ite(v_cnt == 0, const(0, 16), train_sum) + ila.ite(
        ila.load(train_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1,
        fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum),
        const(0, 16))
    v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1)
    h_cnt_s0_nxt = ila.ite((v_cnt == nv),
                           ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt)
    #                                                    Here ^^^ is for transiting to next state
    hidden_update_s0_0 = ila.ite(
        ila.appfun(rand_func) < ila.appfun(sigmoid_func, train_sum_s0_nxt), b1,
        b0)
    hidden_update_s0_1 = ila.ite(
        v_cnt == nv,
        ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0],
                  hidden_update_s0_0), hidden_unit)
    hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1),
                                 ila.store(hidden_update_s0_1,
                                           nh[HIDDEN_UNIT_WIDTH - 1:0], b1),
                                 hidden_update_s0_1)
    train_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1),
                              SumHiddenState, SumEdgeState)
    # Just like init
    jstate_s0_nxt = h0_16
    inner_loop_pc_s0_nxt = h0_4

    # add prefix :
    # train_sum_nxt = ila.ite(SumEdge, train_sum_s0_nxt, ila.ite(SumHidden, ... ) )

    # SumHiddenK0-K4 : s1-s5

    # pc:1 per_v_pc : 0     1       2       3

    LastH = h_cnt == nh
    LastJ = jstate == K - 1
    LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX)
    SumHiddenL0 = SumHidden & (inner_loop_pc == 0)
    SumHiddenL1 = SumHidden & (inner_loop_pc == 1)
    SumHiddenL2 = SumHidden & (inner_loop_pc == 2)
    SumHiddenL3 = SumHidden & (inner_loop_pc == 3)

    h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1)
    jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1),
                                  jstate)
    inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc)

    jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1)
    inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc)

    jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt
    inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc)

    jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt
    inner_loop_pc_s1_s5_L3_nxt = ila.ite(
        LastJ,
        ila.ite(LastV, h0_4, h0_4),  # will choose to go back or not
        inner_loop_pc)

    def nextCondition(l0, l1, l2, l3, default):
        return ila.ite(
            SumHiddenL0, l0,
            ila.ite(
                SumHiddenL1, l1,
                ila.ite(SumHiddenL2, l2, ila.ite(SumHiddenL3, l3, default))))

    h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt,
                                    h_cnt)
    v_cnt_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ,
                              ila.ite(LastV, h0_16, v_cnt + K), v_cnt)
    jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt,
                                     jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt,
                                     jstate)
    inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt,
                                            inner_loop_pc_s1_s5_L1_nxt,
                                            inner_loop_pc_s1_s5_L2_nxt,
                                            inner_loop_pc_s1_s5_L3_nxt,
                                            inner_loop_pc)
    train_pc_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ & LastV, StorePosState,
                                 SumHiddenState)

    # L0
    train_sum_s1_s5_L0_nxt = ila.ite(h_cnt == 0, h0_16, train_sum) + ila.ite(
        ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1,
        fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), h0_16)
    _train_max_origin_L0 = ila.ite(
        jstate == 0,
        fpconst(-500, FPsum).ast,
        train_max)  # make sure the first time we are comparing with init sum
    train_max_s1_s5_L0_nxt = ila.ite(
        LastH,
        ila.ite(ila.sgt(train_sum_s1_s5_L0_nxt, _train_max_origin_L0),
                train_sum_s1_s5_L0_nxt, _train_max_origin_L0), train_max)
    visibleEnergy_s1_s5_L0_nxt = ila.ite(
        LastH,
        ila.store(visibleEnergy, jstate[KWIDTH - 1:0], train_sum_s1_s5_L0_nxt),
        visibleEnergy)
    # L1
    # sum3: 64_64  ->   dp: 32_32
    _31_sum = fpconst(31, FPsum).ast
    train_max_s1_s5_L1_nxt = ila.ite(jstate == 0, train_max - _31_sum,
                                     train_max)
    _st_val_L1 = ila.load(visibleEnergy,
                          jstate[KWIDTH - 1:0]) - train_max_s1_s5_L1_nxt
    visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0],
                                           _st_val_L1)
    # L2
    _pow2_new_val = ila.appfun(to_int_exp,
                               ila.load(visibleEnergy, jstate[KWIDTH - 1:0]))
    _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3)
    sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64,
                                     sumOfpow2) + _pow2_new_convert
    pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val)
    # L3
    _probs = ila.appfun(divide_func,
                        [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2])
    _RAND = ila.appfun(rand_func)
    _visible_unit_new_val = ila.ite(_probs > _RAND, b1, b0)
    _vu_idx = v_cnt + jstate
    _visible_unit_s1_s5_L3_1 = ila.store(visible_unit,
                                         _vu_idx[VISIBLE_UNIT_WIDTH - 1:0],
                                         _visible_unit_new_val)
    visible_unit_s1_s5_L3_nxt = ila.ite(
        LastJ & LastV,
        ila.store(_visible_unit_s1_s5_L3_1, nv[VISIBLE_UNIT_WIDTH - 1:0], b1),
        _visible_unit_s1_s5_L3_1)
    # when exit visible unit should be made to store 1 at nv

    train_sum_s1_s5_nxt = nextCondition(train_sum_s1_s5_L0_nxt, train_sum,
                                        train_sum, train_sum, train_sum)
    train_max_s1_s5_nxt = nextCondition(train_max_s1_s5_L0_nxt,
                                        train_max_s1_s5_L1_nxt, train_max,
                                        train_max, train_max)
    visible_unit_s1_s5_nxt = nextCondition(visible_unit, visible_unit,
                                           visible_unit,
                                           visible_unit_s1_s5_L3_nxt,
                                           visible_unit)
    visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt,
                                            visibleEnergy_s1_s5_L1_nxt,
                                            visibleEnergy, visibleEnergy,
                                            visibleEnergy)
    sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2,
                                        sumOfpow2_s1_s5_L2_nxt, sumOfpow2,
                                        sumOfpow2)
    pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2)

    # before s6: store pos

    h_cnt_sp_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1)
    v_cnt_sp_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, h0_16, v_cnt + 1),
                           v_cnt)
    _data_load = ila.load(train_input, v_cnt[VISIBLE_UNIT_WIDTH - 1:0])
    _pos_sp_cond = (_data_load != 2)
    _pos_sp_val = ila.ite(_data_load != 0, b1, b0) & ila.load(
        hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0])
    _pos_st_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt
    pos_sp_nxt = ila.store(pos, _pos_st_addr, _pos_sp_val)
    train_pc_sp_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState,
                              StorePosState)

    # update edge : s6

    h_cnt_s6_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1)
    v_cnt_s6_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, v_cnt, v_cnt + 1),
                           v_cnt)

    _pos_ld_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt
    train_pos = ila.load(pos, _pos_ld_addr) != 0
    train_neg = (ila.load(
        hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) != 0) & (ila.load(
            visible_unit, v_cnt[VISIBLE_UNIT_WIDTH - 1:0]) != 0)
    edge_original = ila.load(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt)
    edge_new = ila.ite((train_pos) & (~train_neg),
                       edge_original + fpconst(LEARN_RATE, FPedge).ast,
                       ila.ite((~train_pos) & (train_neg),
                               edge_original - fpconst(LEARN_RATE, FPedge).ast,
                               edge_original))
    edge_s6_nxt = ila.store(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt,
                            edge_new)
    train_pc_s6_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState,
                              EdgeUpdateState)
    # no need to jump back itself, because the flag: train_input_done is turned back to zero
    # don't forget to set back signals in Uabs ()

    train_done = TrainUabs.getreg('train_input_done')
    train_uabs_index = TrainUabs.getreg('index')
    train_uabs_loop_count = TrainUabs.getreg('loop_count')
    train_uabs_upc = TrainUabs.getreg('upc')

    # add prefix s6 !!!
    s6_complete = (h_cnt == nh) & (v_cnt == nv)
    index_nxt_s6_nxt = ila.ite(
        s6_complete,
        ila.ite((train_uabs_index == nu - 1) & (train_uabs_loop_count != nlp),
                h0_16, train_uabs_index + 1), train_uabs_index)

    # assert (train_uabs_index == ntu - 1) & (train_uabs_loop_count == nlp) should never happen

    loop_count_s6_nxt = ila.ite(
        s6_complete & (train_uabs_index == nu - 1) &
        (train_uabs_loop_count != nlp), train_uabs_loop_count + 1,
        train_uabs_loop_count)
    upc_s6_nxt = ila.ite(s6_complete, StartReadState, train_uabs_upc)
    train_input_done_s6_nxt_nxt = ila.ite(s6_complete, b0, train_done)

    # data -> hidden_unit -> visible_unit -> edge
    # data -> edge

    # add
    def TrainNext(e1, e2, e3, default):
        return ila.ite(
            SumEdge, e1,
            ila.ite(SumHidden, e2, ila.ite(EdgeUpdate, e3, default)))

    def TrainNextSP(e1, e2, e3, e4, default):
        return ila.ite(
            SumEdge, e1,
            ila.ite(SumHidden, e2,
                    ila.ite(StorePos, e3, ila.ite(EdgeUpdate, e4, default))))

    def TrainChoice5(name, e1, e2, e3, default):
        return ila.choice(name, e1, e2, e3, default)

    def TrainChoice4(name, e1, e2, default):
        return ila.choice(name, e1, e2, default)

    def TrainChoice3(name, e1, default):
        return ila.choice(name, e1, default)

    TrainUabs.set_init('train_upc', pc_init)
    TrainUabs.set_init('train_v_cnt', v_cnt_init)
    TrainUabs.set_init('train_h_cnt', h_cnt_init)

    TrainUabs.set_next(
        'jstate', TrainNext(jstate_s0_nxt, jstate_s1_s5_nxt, jstate, jstate))
    TrainUabs.set_next(
        'train_sum',
        TrainNext(train_sum_s0_nxt, train_sum_s1_s5_nxt, train_sum, train_sum))
    TrainUabs.set_next(
        'train_v_cnt',
        TrainNextSP(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt_s6_nxt,
                    v_cnt))
    TrainUabs.set_next(
        'train_h_cnt',
        TrainNextSP(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt_sp_nxt, h_cnt_s6_nxt,
                    h_cnt))
    TrainUabs.set_next(
        'train_upc',
        TrainNextSP(train_pc_s0_nxt, train_pc_s1_s5_nxt, train_pc_sp_nxt,
                    train_pc_s6_nxt, train_pc))

    TrainUabs.set_next(
        'train_max',
        TrainNext(train_max, train_max_s1_s5_nxt, train_max, train_max))
    TrainUabs.set_next(
        'hidden_unit',
        TrainNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit))
    TrainUabs.set_next(
        'visible_unit',
        TrainNext(visible_unit, visible_unit_s1_s5_nxt, visible_unit,
                  visible_unit))
    TrainUabs.set_next('edges',
                       TrainNext(edges_mem, edges_mem, edge_s6_nxt, edges_mem))
    TrainUabs.set_next(
        'index',
        TrainNext(train_uabs_index, train_uabs_index, index_nxt_s6_nxt,
                  train_uabs_index))
    TrainUabs.set_next(
        'loop_count',
        TrainNext(train_uabs_loop_count, train_uabs_loop_count,
                  loop_count_s6_nxt, train_uabs_loop_count))
    TrainUabs.set_next(
        'upc',
        TrainNext(train_uabs_upc, train_uabs_upc, upc_s6_nxt, train_uabs_upc))
    TrainUabs.set_next(
        'train_input_done',
        TrainNext(train_done, train_done, train_input_done_s6_nxt_nxt,
                  train_done))
    # newly added
    TrainUabs.set_next(
        'visibleEnergies',
        TrainNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy,
                  visibleEnergy))
    TrainUabs.set_next(
        'sumOfpow2',
        TrainNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2))
    TrainUabs.set_next('pow2', TrainNext(pow2, pow2_s1_s5_nxt, pow2, pow2))
    TrainUabs.set_next('pos', ila.ite(StorePos, pos_sp_nxt, pos))
    TrainUabs.set_next(
        'per_v_pc',
        TrainNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt, inner_loop_pc,
                  inner_loop_pc))

    #------------------------------------
    #  Predict UUABS
    #------------------------------------
    # data -> predict_result

    PredictUabs = uabs.add_microabstraction('predict', predict_input_done == 1)

    sigmoid_func = PredictUabs.fun('sigmoid', 64, [16])  # DATA_sum_, 01_D
    rand_func = PredictUabs.fun('rand', 64, [])  # generate random number
    to_int_exp = PredictUabs.fun('to_int_exp', 32, [16])  #
    round_func = PredictUabs.fun('round', 8, [32])  # 05_D -> u8
    divide_func = PredictUabs.fun(
        'divide', 64, [32, 64])  # dp:32_32 / sum_of_pow2 64_64 = 64_1

    hidden_unit = PredictUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1)
    visibleEnergy = PredictUabs.mem('visibleEnergies', KWIDTH, 16)
    predict_result = PredictUabs.getmem('predict_result')
    predict_sum = PredictUabs.reg('predict_sum', 16)
    predict_max = PredictUabs.reg('predict_max', 16)
    sumOfpow2 = PredictUabs.reg('sumOfpow2', 64)
    pow2 = PredictUabs.mem('pow2', KWIDTH, 32)

    predict_vector = PredictUabs.mem('predict_vector', VISIBLE_UNIT_WIDTH, 1)
    inner_loop_pc = PredictUabs.reg('per_v_pc', 4)

    count = PredictUabs.reg('count', 8)
    jstate = PredictUabs.reg('jstate', 16)
    expectation = PredictUabs.reg('expectation', 32)
    prediction = PredictUabs.reg('prediction', 8)

    predict_pc = PredictUabs.reg('predict_upc', 4)  # Re-evaluate
    v_cnt = PredictUabs.reg('predict_v_cnt', 16)
    h_cnt = PredictUabs.reg('predict_h_cnt', 16)

    predict_input = PredictUabs.getmem('data')
    edges_input = PredictUabs.getmem('edges')
    nv = PredictUabs.getreg('num_visible')
    nh = PredictUabs.getreg('num_hidden')
    nu = PredictUabs.getreg('num_users')
    ntu = PredictUabs.getreg('num_testusers')
    nlp = PredictUabs.getreg('num_loops')

    SumEdge = predict_pc == 0
    SumEdgeState = const(0, 4)
    SumHidden = predict_pc == 1
    SumHiddenState = const(1, 4)
    GenResult = predict_pc == 3
    GenResultState = const(3, 4)
    WaitForWrite = predict_pc == 2
    WaitForWriteState = const(2, 4)

    PredictUabs.decode_exprs = [SumEdge, SumHidden, WaitForWrite]

    #Begin
    v_cnt_init = const(0, 16)
    h_cnt_init = const(0, 16)
    pc_init = const(0, 4)

    #SumEdge: s0
    edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt
    predict_sum_s0_nxt = ila.ite(v_cnt == 0, const(
        0, 16), predict_sum) + ila.ite(
            ila.load(predict_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1,
            fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum),
            const(0, 16))
    v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1)
    h_cnt_s0_nxt = ila.ite((v_cnt == nv),
                           ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt)
    #                                                     Here ^^^ is for transiting to next state

    hidden_update_s0_0 = ila.ite(
        fpconst(0.5, FP01_D).ast < ila.appfun(sigmoid_func,
                                              predict_sum_s0_nxt), b1, b0)
    hidden_update_s0_1 = ila.ite(
        v_cnt == nv,
        ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0],
                  hidden_update_s0_0), hidden_unit)
    hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1),
                                 ila.store(hidden_update_s0_1,
                                           nh[HIDDEN_UNIT_WIDTH - 1:0], b1),
                                 hidden_update_s0_1)
    hidden_update_s0_next = hidden_update_s0_2
    predict_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1),
                                SumHiddenState, SumEdgeState)

    jstate_s0_nxt = h0_16
    count_s0_nxt = ila.const(0, 8)
    inner_loop_pc_s0_nxt = h0_4
    # add prefix :
    # predict_sum_nxt = ila.ite(SumEdge, predict_sum_s0_nxt, ila.ite(SumHidden, ... ) )

    #-----------------------------
    # SumHiddensK0-K4 : s1-s5
    #
    #-----------------------------

    LastH = h_cnt == nh
    LastJ = jstate == K - 1
    LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX)
    SumHiddenL0 = SumHidden & (inner_loop_pc == 0)
    SumHiddenL1 = SumHidden & (inner_loop_pc == 1)
    SumHiddenL2 = SumHidden & (inner_loop_pc == 2)
    SumHiddenL3 = SumHidden & (inner_loop_pc == 3)
    SumHiddenL4 = SumHidden & (inner_loop_pc == 4)

    h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1)
    jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1),
                                  jstate)
    inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc)

    jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1)
    inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc)

    jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt
    inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc)

    jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt
    inner_loop_pc_s1_s5_L3_nxt = ila.ite(LastJ, h4_4, inner_loop_pc)

    jstate_s1_s5_L4_nxt = jstate_s1_s5_L3_nxt
    inner_loop_pc_s1_s5_L4_nxt = ila.ite(
        LastJ,
        ila.ite(LastV, h0_4, h0_4),  # will choose to go back or not
        inner_loop_pc)

    def nextCondition(l0, l1, l2, l3, l4, default):
        return ila.ite(
            SumHiddenL0, l0,
            ila.ite(
                SumHiddenL1, l1,
                ila.ite(
                    SumHiddenL2, l2,
                    ila.ite(SumHiddenL3, l3, ila.ite(SumHiddenL4, l4,
                                                     default)))))

    h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt,
                                    h_cnt, h_cnt)
    v_cnt_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ,
                              ila.ite(LastV, h0_16, v_cnt + K), v_cnt)
    jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt,
                                     jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt,
                                     jstate_s1_s5_L4_nxt, jstate)

    inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt,
                                            inner_loop_pc_s1_s5_L1_nxt,
                                            inner_loop_pc_s1_s5_L2_nxt,
                                            inner_loop_pc_s1_s5_L3_nxt,
                                            inner_loop_pc_s1_s5_L4_nxt,
                                            inner_loop_pc)

    predict_pc_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ & LastV, GenResultState,
                                   SumHiddenState)

    # L0
    predict_sum_s1_s5_L0_nxt = ila.ite(
        h_cnt == 0, h0_16, predict_sum) + ila.ite(
            ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1,
            fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum),
            h0_16)
    _predict_max_origin_L0 = ila.ite(
        jstate == 0,
        fpconst(-500, FPsum).ast,
        predict_max)  # make sure the first time we are comparing with init sum
    predict_max_s1_s5_L0_nxt = ila.ite(
        LastH,
        ila.ite(ila.sgt(predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0),
                predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0), predict_max)
    visibleEnergy_s1_s5_L0_nxt = ila.ite(
        LastH,
        ila.store(visibleEnergy, jstate[KWIDTH - 1:0],
                  predict_sum_s1_s5_L0_nxt), visibleEnergy)
    # L1
    # sum3: 64_64  ->   dp: 32_32
    _31_sum = fpconst(31, FPsum).ast
    predict_max_s1_s5_L1_nxt = ila.ite(jstate == 0, predict_max - _31_sum,
                                       predict_max)
    _st_val_L1 = ila.load(visibleEnergy,
                          jstate[KWIDTH - 1:0]) - predict_max_s1_s5_L1_nxt
    visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0],
                                           _st_val_L1)
    # L2
    _pow2_new_val = ila.appfun(to_int_exp,
                               ila.load(visibleEnergy, jstate[KWIDTH - 1:0]))
    _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3)
    sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64,
                                     sumOfpow2) + _pow2_new_convert
    pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val)
    # L3
    _probs = ila.appfun(divide_func,
                        [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2])
    _mul = fixpoint(_probs, FP01_D) * fixpoint(jstate, FPu16)
    expectation_s1_s5_L3_nxt = ila.ite(jstate == 0, h0_32,
                                       expectation) + _mul.toFormat(FP05_D)
    # L4
    _prediction = ila.zero_extend(ila.appfun(round_func, [expectation]), 16)
    _pv_val = ila.ite(jstate == _prediction, b1, b0)
    _pv_idx = v_cnt + jstate
    _first_store = ila.store(predict_vector, _pv_idx[VISIBLE_UNIT_WIDTH - 1:0],
                             _pv_val)
    predict_vector_s1_s5_L4_nxt = ila.ite(
        SumHiddenL4 & LastV & LastJ,
        ila.store(_first_store, nv[VISIBLE_UNIT_WIDTH - 1:0], b1),
        _first_store)

    predict_sum_s1_s5_nxt = nextCondition(predict_sum_s1_s5_L0_nxt,
                                          predict_sum, predict_sum,
                                          predict_sum, predict_sum,
                                          predict_sum)
    predict_max_s1_s5_nxt = nextCondition(predict_max_s1_s5_L0_nxt,
                                          predict_max_s1_s5_L1_nxt,
                                          predict_max, predict_max,
                                          predict_max, predict_max)
    visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt,
                                            visibleEnergy_s1_s5_L1_nxt,
                                            visibleEnergy, visibleEnergy,
                                            visibleEnergy, visibleEnergy)
    sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2,
                                        sumOfpow2_s1_s5_L2_nxt, sumOfpow2,
                                        sumOfpow2, sumOfpow2)
    pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2,
                                   pow2)
    expectation_s1_s5_nxt = ila.ite(SumHiddenL3, expectation_s1_s5_L3_nxt,
                                    expectation)
    predict_vector_s1_s5_nxt = ila.ite(SumHiddenL4,
                                       predict_vector_s1_s5_L4_nxt,
                                       predict_vector)
    count_s1_s5_nxt = ila.ite(SumHiddenL4 & LastV & LastJ, h0_8, count)

    # before s6: store pos
    LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX)
    LastJ = jstate == K - 1
    v_cnt_sp_nxt = ila.ite(LastV, v_cnt + K, v_cnt + K)
    jstate_sp_nxt = ila.ite(LastJ, h0_16, jstate + 1)

    _prediction_old = ila.ite(jstate == 0, h0_8, prediction)
    _pv_idx = v_cnt + jstate
    _predict_result_sp_val = ila.load(predict_vector,
                                      _pv_idx[VISIBLE_UNIT_WIDTH - 1:0])

    prediction_sp_nxt = ila.ite(_predict_result_sp_val == 1, (jstate + 1)[7:0],
                                _prediction_old)
    count_sp_nxt = ila.ite(LastJ, count + 1, count)
    predict_result_sp_nxt = ila.ite(
        LastJ,
        ila.store(predict_result, count[PREDICT_RESULT_WIDTH - 1:0],
                  prediction), predict_result)
    predict_pc_sp_nxt = ila.ite(LastV & LastJ, WaitForWriteState,
                                GenResultState)

    wr_complete = PredictUabs.getreg('wr_complete')
    wr_req = PredictUabs.getreg('wr_request')
    wr_len = PredictUabs.getreg('wr_length')
    wr_idx = PredictUabs.getreg('wr_index')
    cur_idx = PredictUabs.getreg('index')  # 32

    exitLoop = LastV & LastJ
    wr_request_sp_nxt = ila.ite(exitLoop, b1, wr_req)
    wr_index_sp_nxt = ila.ite(
        exitLoop,
        ila.zero_extend(nm, 32) * ila.zero_extend(cur_idx, 32), wr_idx)
    wr_length_sp_nxt = ila.ite(exitLoop, ila.zero_extend(nm, 32), wr_len)
    wr_complete_sp_nxt = ila.ite(exitLoop, b0, wr_complete)
    # s6:

    #---------------------
    # update edge : s6
    #---------------------

    FinishOneRound = (wr_req == 0) & (wr_complete == 1)

    predict_pc_s6_nxt = ila.ite(FinishOneRound, WaitForWriteState,
                                WaitForWriteState)
    # its value does not matter because it will be terminated by predict_input_done
    # don't forget to set back signals in Uabs ()

    predict_done = PredictUabs.getreg('predict_input_done')
    predict_uabs_index = PredictUabs.getreg('index')
    predict_uabs_loop_count = PredictUabs.getreg('loop_count')
    predict_uabs_upc = PredictUabs.getreg('upc')
    all_done = PredictUabs.getreg('done')

    # add prefix s6 !!!
    index_nxt_s6_nxt = ila.ite(
        FinishOneRound,
        ila.ite(
            (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp),
            predict_uabs_index, predict_uabs_index + 1), predict_uabs_index)

    wr_complete_s6_nxt = ila.ite(FinishOneRound, b0, wr_complete)
    # assert (predict_uabs_index == nu - 1)  & (predict_uabs_loop_count != nlp) should never happen

    #loop_count_s6_nxt = ila.ite( (predict_uabs_index == nu - 1)  & (predict_uabs_loop_count != nlp) , predict_uabs_loop_count + 1, predict_uabs_loop_count )

    upc_s6_nxt = ila.ite(
        FinishOneRound,
        ila.ite(
            (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp),
            FinishState, StartReadState), predict_uabs_upc)
    predict_input_done_s6_nxt_nxt = ila.ite(FinishOneRound, b0, predict_done)

    all_done_s6_nxt = ila.ite(
        FinishOneRound & (predict_uabs_index == ntu - 1) &
        (predict_uabs_loop_count == nlp), b1, b0)

    # data -> hidden_unit -> visible_unit -> edge
    # data -> edge

    # add

    # add
    def predictNext(e1, e2, e3, default):
        return ila.ite(
            SumEdge, e1,
            ila.ite(SumHidden, e2, ila.ite(WaitForWrite, e3, default)))

    def predictNextSp(e1, e2, e3, e4, default):
        return ila.ite(
            SumEdge, e1,
            ila.ite(SumHidden, e2,
                    ila.ite(GenResult, e3, ila.ite(WaitForWrite, e4,
                                                   default))))

    def ite(inst, e, default):
        return ila.ite(inst, e, default)

    PredictUabs.set_init('predict_upc', pc_init)
    PredictUabs.set_init('predict_v_cnt', v_cnt_init)
    PredictUabs.set_init('predict_h_cnt', h_cnt_init)

    PredictUabs.set_next(
        'jstate',
        predictNextSp(jstate_s0_nxt, jstate_s1_s5_nxt, jstate_sp_nxt, jstate,
                      jstate))
    PredictUabs.set_next(
        'predict_sum',
        predictNext(predict_sum_s0_nxt, predict_sum_s1_s5_nxt, predict_sum,
                    predict_sum))
    PredictUabs.set_next(
        'predict_v_cnt',
        predictNextSp(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt,
                      v_cnt))
    PredictUabs.set_next(
        'predict_h_cnt',
        predictNext(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt, h_cnt))
    PredictUabs.set_next(
        'predict_upc',
        predictNextSp(predict_pc_s0_nxt, predict_pc_s1_s5_nxt,
                      predict_pc_sp_nxt, predict_pc_s6_nxt, predict_pc))
    PredictUabs.set_next(
        'predict_max',
        predictNext(predict_max, predict_max_s1_s5_nxt, predict_max,
                    predict_max))
    PredictUabs.set_next(
        'hidden_unit',
        predictNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit))
    PredictUabs.set_next(
        'count',
        predictNextSp(count_s0_nxt, count_s1_s5_nxt, count_sp_nxt, count,
                      count))
    PredictUabs.set_next(
        'per_v_pc',
        predictNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt,
                    inner_loop_pc, inner_loop_pc))

    PredictUabs.set_next(
        'index',
        predictNext(predict_uabs_index, predict_uabs_index, index_nxt_s6_nxt,
                    predict_uabs_index))
    PredictUabs.set_next(
        'upc',
        predictNext(predict_uabs_upc, predict_uabs_upc, upc_s6_nxt,
                    predict_uabs_upc))
    PredictUabs.set_next(
        'predict_input_done',
        predictNext(predict_done, predict_done, predict_input_done_s6_nxt_nxt,
                    predict_done))
    PredictUabs.set_next(
        'done', predictNext(all_done, all_done, all_done_s6_nxt, all_done))

    PredictUabs.set_next(
        'wr_request',
        predictNextSp(wr_req, wr_req, wr_request_sp_nxt, wr_req, wr_req))
    PredictUabs.set_next(
        'wr_length',
        predictNextSp(wr_len, wr_len, wr_length_sp_nxt, wr_len, wr_len))
    PredictUabs.set_next(
        'wr_index',
        predictNextSp(wr_idx, wr_idx, wr_index_sp_nxt, wr_idx, wr_idx))
    PredictUabs.set_next(
        'wr_complete',
        predictNextSp(wr_complete, wr_complete, wr_complete_sp_nxt,
                      wr_complete_s6_nxt, wr_complete))
    # newly added
    PredictUabs.set_next(
        'visibleEnergies',
        predictNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy,
                    visibleEnergy))
    PredictUabs.set_next(
        'sumOfpow2',
        predictNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2))
    PredictUabs.set_next('pow2', predictNext(pow2, pow2_s1_s5_nxt, pow2, pow2))
    PredictUabs.set_next(
        'expectation',
        predictNext(expectation, expectation_s1_s5_nxt, expectation,
                    expectation))
    PredictUabs.set_next(
        'predict_vector',
        predictNext(predict_vector, predict_vector_s1_s5_nxt, predict_vector,
                    predict_vector))
    PredictUabs.set_next('prediction',
                         ite(GenResult, prediction_sp_nxt, prediction))
    PredictUabs.set_next('predict_result',
                         ite(GenResult, predict_result_sp_nxt, predict_result))

    #------------------------------------
    #  Store UABS
    #------------------------------------
    # store is triggered by inst as uabs?

    # wr_grant == 1 is an instruction
    wr_granted = rbm.reg('wr_granted', 1)
    rbm.set_next('wr_granted',
                 ila.ite((wr_request & wr_grant) == 1, b1, wr_granted))
    data_out_1st_set = ila.zero_extend(
        ila.load(predict_result, const(0, PREDICT_RESULT_WIDTH)), 32)
    rbm.set_next(
        'data_out',
        ila.ite((wr_request & wr_grant) == 1, data_out_1st_set, data_out))
    # This is a hard decision,
    # as we set_next, the reaction as we defined will be appear in the next cycle

    StoreUabs = rbm.add_microabstraction('store', wr_granted == 1)
    store_idx = StoreUabs.reg('i', 16)
    nm = StoreUabs.getreg('num_movies')
    wr_granted = StoreUabs.getreg('wr_granted')
    wr_request = StoreUabs.getreg('wr_request')
    wr_complete = StoreUabs.getreg('wr_complete')
    predict_result = StoreUabs.getmem('predict_result')

    StoreUabs.set_init('i', h1_16)
    StoreUabs.set_next('i', ila.ite(store_idx < nm, store_idx + 1, store_idx))
    StoreUabs.set_next('wr_granted', ila.ite(store_idx < nm, wr_granted, b0))
    StoreUabs.set_next('wr_request', ila.ite(store_idx == 0, b0, wr_request))
    StoreUabs.set_next('wr_complete', ila.ite(store_idx < nm, wr_complete, b1))
    data_out = StoreUabs.getreg('data_out')
    # possibly one cycle earlier
    StoreUabs.set_next(
        'data_out',
        ila.zero_extend(
            ila.load(predict_result, store_idx[PREDICT_RESULT_WIDTH - 1:0]),
            32))

    #---------------------------
    # Add no next
    #
    def keepNC(Abs, name):
        Abs.set_next(name, Abs.getreg(name))

    def keepMemNC(Abs, name):
        Abs.set_next(name, Abs.getmem(name))

    keepNC(rbm, 'done')
    keepNC(rbm, 'wr_request')
    keepNC(rbm, 'wr_index')
    keepNC(rbm, 'wr_length')
    keepNC(rbm, 'rd_index')
    keepNC(rbm, 'rd_length')
    keepNC(rbm, 'rd_request')

    keepMemNC(uabs, 'edges')

    keepNC(rbm, 'rd_complete')
    keepNC(rbm, 'wr_complete')

    return rbm
Exemple #16
0
 def auxMulh_u(self, dataA, dataB):	#unsigned mul
 	dataADoubleLength = ila.zero_extend(dataA, 2 * SCALAR_REG_BITS)
 	dataBDoubleLength = ila.zero_extend(dataB, 2 * SCALAR_REG_BITS)
 	resultDoubleLength = dataADoubleLength * dataBDoubleLength
 	mulResult = resultDoubleLength[31:0]
 	return mulResult
Exemple #17
0
def createSHAILA(synstates, enable_ps):
    m = ila.Abstraction("sha")
    m.enable_parameterized_synthesis = enable_ps

    # I/O interface: this is where commands come from.
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)
    # response.
    dataout = m.reg('dataout', 8)

    # internal arch state.
    state = m.reg('sha_state', 3)
    rdaddr = m.reg('sha_rdaddr', 16)
    wraddr = m.reg('sha_wraddr', 16)
    oplen = m.reg('sha_len', 16)

    # for the uinst.
    bytes_read = m.reg('sha_bytes_read', 16)
    rd_data = m.reg('sha_rd_data', 512)
    hs_data = m.reg('sha_hs_data', 160)
    xram = m.mem('XRAM', 16, 8)
    sha = m.fun('sha', 160, [512])

    # fetch is just looking at the input command.
    m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata])
    m.fetch_valid = (cmd == 1) | (cmd == 2)

    # decode
    rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr)
              for addr in xrange(0xfe00, 0xfe10) for i in [0, 1, 2, 3, 4]]
    wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr)
              for addr in xrange(0xfe00, 0xfe10)]
    nopcmds = [(state == i) & (cmd != 1) & (cmdaddr == addr)
               for addr in xrange(0xfe00, 0xfe10) for i in [1, 2, 3, 4]]
    m.decode_exprs = rdcmds + wrcmds + nopcmds

    # read commands.
    statebyte = ila.zero_extend(state, 8)
    rdaddrbyte = ila.readchunk('rd_addr', rdaddr, 8)
    wraddrbyte = ila.readchunk('wr_addr', wraddr, 8)
    oplenbyte = ila.readchunk('op_len', oplen, 8)
    dataoutnext = ila.choice(
        'dataout',
        [statebyte, rdaddrbyte, wraddrbyte, oplenbyte,
         m.const(0, 8)])
    m.set_next('dataout', dataoutnext)

    # write commands.
    def mb_reg_wr(name, reg):
        # multibyte register write.
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('sha_rdaddr', rdaddr)
    mb_reg_wr('sha_wraddr', wraddr)
    mb_reg_wr('sha_len', oplen)

    # state
    state_next = ila.choice('state_next', [
        m.const(0, 3),
        m.const(1, 3),
        m.const(2, 3),
        m.const(3, 3),
        m.const(4, 3),
        ila.ite(cmddata == 1, m.const(1, 3), state),
        ila.ite(bytes_read < oplen, m.const(1, 3), m.const(4, 3))
    ])
    m.set_next('sha_state', state_next)

    # these are for the uinst
    # bytes_read
    #bytes_read_inc = ila.ite(bytes_read+64 <= oplen, bytes_read+64, oplen)
    bytes_read_inc = bytes_read + 64
    bytes_read_rst = ila.ite(cmddata == 1, m.const(0, 16), bytes_read)
    bytes_read_nxt = ila.choice(
        'bytes_read_nxt',
        [m.const(0, 16), bytes_read_inc, bytes_read_rst, bytes_read])
    m.set_next('sha_bytes_read', bytes_read_nxt)
    # rd_data
    rdblock_little = ila.loadblk(xram, rdaddr + bytes_read, 64)
    rdblock_big = ila.loadblk_big(xram, rdaddr + bytes_read, 64)
    rd_data_nxt = ila.choice('rd_data_nxt', rdblock_big, rdblock_little,
                             rd_data)
    m.set_next('sha_rd_data', rd_data_nxt)
    # hs_data
    sha_hs_data = ila.appfun(sha, [rd_data])
    hs_data_nxt = ila.choice('hs_data_nxt', sha_hs_data, hs_data)
    m.set_next('sha_hs_data', hs_data_nxt)
    # xram write
    xram_w_sha_little = ila.storeblk(xram, wraddr, hs_data)
    xram_w_sha_big = ila.storeblk_big(xram, wraddr, hs_data)
    xram_nxt = ila.choice('xram_nxt', xram, xram_w_sha_little, xram_w_sha_big)
    m.set_next('XRAM', xram_nxt)

    suffix = 'en' if enable_ps else 'dis'
    timefile = open('sha-times-%s.txt' % suffix, 'wt')
    t_elapsed = 0
    # synthesis.
    sim = lambda s: SHA().simulate(s)
    for s in synstates:
        st = time.clock()
        m.synthesize(s, sim)
        dt = time.clock() - st
        print >> timefile, '%s %.2f' % (s, dt)
        t_elapsed += dt

        ast = m.get_next(s)
        m.exportOne(ast, 'asts/%s_%s' % (s, suffix))

    print 'time: %.2f' % t_elapsed
    #m.generateSim('tmp/shasim.hpp')
    m.generateSimToDir('sim')
Exemple #18
0
	def vreg_nxt(self, regNo, laneNo):
		ssreg1 = self.indexToSGPR(self.rrSrc1)
		ssreg2 = self.indexToSGPR(self.rrSrc2)
		vsreg1 = self.indexToVGPR(self.rrSrc1, self.model.const(laneNo, SCALAR_REG_BITS))
		vsreg2 = self.indexToVGPR(self.rrSrc2, self.model.const(laneNo, SCALAR_REG_BITS))
		mask = self.indexToSGPR(self.rrMask)

    	#load instruction
		addr = self.indexToSGPR(self.memPtr) + ila.sign_extend(self.memOffSet, SCALAR_REG_BITS)
		load_val = ila.load(self.mem, ila.zero_extend(addr[31:2], PC_BITS))

		return ila.ite(self.dest == regNo, 
				ila.ite(self.isRegReg,
					ila.ite(self.rrType == self.model.const(0b001, 3), 
						ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + ssreg2,
						ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - ssreg2,
						ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & ssreg2, 
						ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | ssreg2, 
						ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * ssreg2, self.vector_registers[regNo][laneNo]))))),
					ila.ite(self.rrType == self.model.const(0b100, 3),
						ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + vsreg2,
						ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - vsreg2,
						ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & vsreg2, 
						ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | vsreg2, 
						ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * vsreg2, self.vector_registers[regNo][laneNo]))))),
					ila.ite(self.rrType == self.model.const(0b010, 3),
						ila.ite(mask[laneNo] == self.model.const(0b0, 1), self.vector_registers[regNo][laneNo], 
							ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + ssreg2,
							ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - ssreg2,
							ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & ssreg2, 
							ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | ssreg2, 
							ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * ssreg2, self.vector_registers[regNo][laneNo])))))),
					ila.ite(self.rrType == self.model.const(0b101, 3),
							ila.ite(mask[laneNo] == self.model.const(0b0, 1), self.vector_registers[regNo][laneNo], 
							ila.ite(self.rrOpcode == NyEncoding.ADD_I, vsreg1 + vsreg2,
							ila.ite(self.rrOpcode == NyEncoding.SUB_I, vsreg1 - vsreg2,
							ila.ite(self.rrOpcode == NyEncoding.AND, vsreg1 & vsreg2, 
							ila.ite(self.rrOpcode == NyEncoding.OR, vsreg1 | vsreg2, 
							ila.ite(self.rrOpcode == NyEncoding.MULH_I, vsreg1 * vsreg2, self.vector_registers[regNo][laneNo])))))),
						self.vector_registers[regNo][laneNo])
					))),
				ila.ite(self.isImmediate,
					ila.ite(self.immType == self.model.const(0b01, 2),
						ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, vsreg1 + self.immB,
						ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, vsreg1 - self.immB,
						ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, vsreg1 & self.immB,
						ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, vsreg1 | self.immB,
						ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, vsreg1 * self.immB,
						self.vector_registers[regNo][laneNo]))))),
					ila.ite(self.immType == self.model.const(0b11, 2),
						ila.ite(mask[laneNo] == self.model.const(0b0, 1), self.vector_registers[regNo][laneNo], 
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, vsreg1 + self.immA, 
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, vsreg1 - self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, vsreg1 & self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, vsreg1 | self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, vsreg1 * self.immA, 
							self.vector_registers[regNo][laneNo])))))),
						self.vector_registers[regNo][laneNo]),
					),
				ila.ite(self.isLoad == self.model.const(0b1, 1), self.vector_registers[regNo][laneNo], self.vector_registers[regNo][laneNo])
				)),
			 self.vector_registers[regNo][laneNo])
Exemple #19
0
def createShaIla():
    m = ila.Abstraction("sha")
    m.enable_parameterized_synthesis = 0

    # I/O interface
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)
    # response
    dataout = m.reg('dataout', 8)

    # arch states
    state = m.reg('sha_state', 3)
    rdaddr = m.reg('sha_rdaddr', 16)
    wraddr = m.reg('sha_wraddr', 16)
    oplen = m.reg('sha_len', 16)
    xram = m.mem('XRAM', 16, 8)

    # child-ILA states
    bytes_read = m.reg('sha_bytes_read', 16)
    rd_data = m.reg('sha_rd_data', 512)
    hs_data = m.reg('sha_hs_data', 160)
    sha = m.fun('sha', 160, [512])

    # fetch
    m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata])
    m.fetch_valid = (cmd == 1) | (cmd == 2)

    # read commands.
    statebyte = ila.zero_extend(state, 8)
    rdaddrbyte = ila.readchunk('rd_addr', rdaddr, 8)
    wraddrbyte = ila.readchunk('wr_addr', wraddr, 8)
    oplenbyte = ila.readchunk('op_len', oplen, 8)
    dataoutnext = ila.choice(
        'dataout',
        [statebyte, rdaddrbyte, wraddrbyte, oplenbyte,
         m.const(0, 8)])
    m.set_next('dataout', dataoutnext)

    # write commands.
    def mb_reg_wr(name, reg):
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('sha_rdaddr', rdaddr)
    mb_reg_wr('sha_wraddr', wraddr)
    mb_reg_wr('sha_len', oplen)

    # state
    state_choice = ila.choice('state_choice', [
        m.const(0, 3),
        m.const(1, 3),
        m.const(2, 3),
        m.const(3, 3),
        m.const(4, 3)
    ])
    rd_nxt = ila.ite(bytes_read < oplen, m.const(1, 3), m.const(4, 3))
    state_nxt = ila.choice('state_nxt', [
        rd_nxt, state_choice,
        ila.ite(cmddata == 1, m.const(1, 3), state), state
    ])
    m.set_next('sha_state', state_nxt)

    # bytes_read
    bytes_read_inc = bytes_read + 64
    bytes_read_rst = ila.ite(cmddata == 1, m.const(0, 16), bytes_read)
    bytes_read_nxt = ila.choice(
        'bytes_read_nxt',
        [m.const(0, 16), bytes_read_inc, bytes_read_rst, bytes_read])
    m.set_next('sha_bytes_read', bytes_read_nxt)

    # rd_data
    rdblock_little = ila.loadblk(xram, rdaddr + bytes_read, 64)
    rdblock_big = ila.loadblk_big(xram, rdaddr + bytes_read, 64)
    rd_data_nxt = ila.choice('rd_data_nxt',
                             [rdblock_big, rdblock_little, rd_data])
    m.set_next('sha_rd_data', rd_data_nxt)

    # hs_data
    sha_hs_data = ila.appfun(sha, [rd_data])
    hs_data_nxt = ila.choice('sh_data_nxt', sha_hs_data, hs_data)
    m.set_next('sha_hs_data', hs_data_nxt)

    # xram
    xram_w_sha_little = ila.storeblk(xram, wraddr, hs_data)
    xram_w_sha_big = ila.storeblk_big(xram, wraddr, hs_data)
    xram_nxt = ila.choice('xram_nxt',
                          [xram_w_sha_little, xram_w_sha_big, xram])
    m.set_next('XRAM', xram_nxt)

    return m
Exemple #20
0
 def zext(self,v):
     return ila.zero_extend(v,XLEN)
Exemple #21
0
	def sreg_nxt(self, regNo):
		sreg1 = self.indexToSGPR(self.rrSrc1)
		sreg2 = self.indexToSGPR(self.rrSrc2)
    	#load instruction
		addr = self.indexToSGPR(self.memPtr) + ila.sign_extend(self.memOffSet, SCALAR_REG_BITS)
		load_val = ila.load(self.mem, ila.zero_extend(addr[31:2], MEM_ADDRESS_BITS))

		return ila.ite(self.dest == regNo,\
					ila.ite(self.isRegReg, 
						ila.ite(self.rrType == self.model.const(0b000, 3), 
							ila.ite(self.rrOpcode == NyEncoding.ADD_I, sreg1 + sreg2, 
							ila.ite(self.rrOpcode == NyEncoding.SUB_I, sreg1 - sreg2, 
							ila.ite(self.rrOpcode == NyEncoding.AND, sreg1 & sreg2,
							ila.ite(self.rrOpcode == NyEncoding.OR, sreg1 | sreg2, 
							ila.ite(self.rrOpcode == NyEncoding.MULH_I, self.auxMull_i(sreg1, sreg2), 
							ila.ite(self.rrOpcode == NyEncoding.MULH_U, self.auxMulh_u(sreg1, sreg2), 
							ila.ite(self.rrOpcode == NyEncoding.ASHR, ila.ashr(sreg1, sreg2[4:0]), 
							ila.ite(self.rrOpcode == NyEncoding.SHR, sreg1 >> sreg2[4:0],
							ila.ite(self.rrOpcode == NyEncoding.SHL, sreg1 << sreg2[4:0],
							ila.ite(self.rrOpcode == NyEncoding.CLZ, self.aux_clz(sreg2),
							ila.ite(self.rrOpcode == NyEncoding.CTZ, self.aux_ctz(sreg2),
							ila.ite(self.rrOpcode == NyEncoding.MOVE, sreg2,
							ila.ite(self.rrOpcode == NyEncoding.CMPEQ_I, ila.ite(sreg1 == sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPNE_I, ila.ite(sreg1 != sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPGT_I, ila.ite(self.auxCmpgt_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPGE_I, ila.ite(self.auxCmpge_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPLT_I, ila.ite(self.auxCmplt_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPLE_I, ila.ite(self.auxCmple_i(sreg1, sreg2) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPGT_U, ila.ite(sreg1 > sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPGE_U, ila.ite(sreg1 < sreg2, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPLT_U, ila.ite(sreg1 < sreg2, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(self.rrOpcode == NyEncoding.CMPLE_U, ila.ite(sreg1 > sreg2, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff,SCALAR_REG_BITS)),
								self.scalar_registers[regNo]))))))))))))))))))))))
							, self.scalar_registers[regNo]),\
					ila.ite(self.isImmediate, 
						ila.ite(self.immType == self.model.const(0b00, 2), 
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immB, 
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immB,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immB,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immB,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, self.auxMull_i(sreg1, self.immB),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULL_I, self.auxMulh_u(sreg1, self.immB),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ASHR, ila.ashr(sreg1, self.immB[4:0]),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SHR, sreg1 >> self.immB[4:0],
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SHL, sreg1 << self.immB[4:0],
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CLZ, sreg1, self.aux_clz(self.immB),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CTZ, sreg1, self.aux_ctz(self.immB),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MOVE, self.immB,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPEQ_I, ila.ite(sreg1 == self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPNE_I, ila.ite(sreg1 != self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGT_I, ila.ite(self.auxCmpgt_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGE_I, ila.ite(self.auxCmpge_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLT_I, ila.ite(self.auxCmplt_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLE_I, ila.ite(self.auxCmple_i(sreg1, self.immB) == self.getConstOne(), self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGT_U, ila.ite(sreg1 > self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPGE_U, ila.ite(sreg1 < self.immB, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLT_U, ila.ite(sreg1 < self.immB, self.const(0xffff, SCALAR_REG_BITS), self.const(0b0, SCALAR_REG_BITS)),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.CMPLE_U, ila.ite(sreg1 > self.immB, self.const(0b0, SCALAR_REG_BITS), self.const(0xffff,SCALAR_REG_BITS)),
							 self.scalar_registers[regNo])))))))))))))))))))))),\
						ila.ite(self.immType == self.model.const(0b10, 2),
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.ADD_I, sreg1 + self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.SUB_I, sreg1 - self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.AND, sreg1 & self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.OR, sreg1 | self.immA,
							ila.ite(ila.zero_extend(self.immOpcode, 6) == NyEncoding.MULH_I, sreg1 * sreg2, self.scalar_registers[regNo]))))),\
							self.scalar_registers[regNo])),\
					ila.ite(self.isLoad == self.model.const(0b1, 1), self.scalar_registers[regNo], self.scalar_registers[regNo]))),\
				self.scalar_registers[regNo])
Exemple #22
0
def main():
    c = ila.Abstraction("test")
#    n = ila.Node()

    # test on boolOp
    top = c.bool(True)
    bot = c.bool(False)
    Band = (top & bot)      # 0
    Bor = (top | bot)       # 1
    Bxor = (Band ^ Bor)     # 1
    Bnor = ila.nor(Band, Bor)       # 0
    Bxnor = ila.xnor(Bxor, Bnor)    # 0
    Bnand = ila.nand(Bnor, Bxnor)   # 1
    nBnor = ~Bnor           # 1
    assert c.areEqual(nBnor, top)

    b1 = c.bit('b1')
    b2 = c.bit('b2')
    b3 = c.bit('b3')
    b4 = (b1 & b2) ^ b3
    b5 = ila.xnor(ila.nand(b1, b2), b3)
    assert c.areEqual(b4, b5)

    b6 = ila.ite(b1, b2, b3)
    b7 = (b1 & b2) | (~b1 & b3)
    assert c.areEqual(b6, b7)

    # test on bitvectorOp
    x = c.reg('x', 8)
    y = c.reg('y', 8)
    c0 = c.const(0, 8)
    c1 = c.const(1, 8)
    c2 = c.const(2, 8)
    c4 = c.const(4, 8)
    c6 = c.const(6, 8)
    c8 = c.const(8, 8)
    v1 = (x == c4)
    v2 = x << 1
    v3 = c4 << 1

    assert c.areEqual(c8, v3)
    assert c.areEqual(c8, (c4 + c4))
    assert c.areEqual(c4, (c8 - c4))
    assert c.areEqual(c8, (c4 * c2))
    assert c.areEqual(c4, (c8 / c2))
    v4 = ila.ite(v1, v2, v3)    # 8
    assert c.areEqual(v4, c8)
    assert c.areEqual(v4, v3)
    assert c.areEqual(v4, (4 + c4))
    assert c.areEqual(-c4, (c4 - 8))
    assert c.areEqual(v4, (2 * c4))
    assert c.areEqual(v4 >> 2, (v3 / c2) - 2)
    assert c.areEqual(c8 % 5, 7 % (1 << c2))
    assert c.areEqual( (x < y) ^ (y <= x), (x == y) | (x != y) )
    assert c.areEqual( (x > y) | (x == y) | ~(x >= y), top )
    assert c.areEqual( ~x ^ x, y ^ ~y)
    assert c.areEqual( ~x, ila.nand(x, x) )
    v5 = ~ila.nor(c2, c4)   # 00000110
    assert c.areEqual( ~v5, ila.xnor(c4, c2))
    v6 = c2 - c4    # 11111110
    v7 = 3 - c8     # 11111011
    v8 = ~(c2 - 2)  # 11111111
    assert c.areEqual( v8, ~c0)
    assert c.areEqual( v8 - 1, v6)
    assert c.areEqual( c4 + c1, -v7) # 00000101
    assert c.areEqual( ila.sdiv(c4, c2), c2)
    assert c.areEqual( ila.sdiv(-c4, c2), -c2)
    assert c.areEqual( ila.sdiv(v5, -4), -c1)
    assert c.areEqual( ila.srem(v5, -4), c2)
    # -6  = -4 * 1 + -2  ??
    assert c.areEqual( ila.sdiv(-6, -c4), c1)
    assert c.areEqual( ila.srem(-v5, -c4), -c2)
    assert c.areEqual( x - ila.srem(x, y), ila.sdiv(x, y) * y )
    assert c.areEqual( x - x % y, (x / y) * y )
    assert c.areEqual( ila.ashr(v6, 1), v8)
    assert c.areEqual( ila.slt(v7, v6), top)

    s1 = c.const(1, 4)
    s2 = c.const(2, 4)
    v9 = ila.concat(s1, s2) # 00010010
    v10 = (c1 << 4) + c2
    assert c.areEqual(v9, v10)
    v11 = ila.rrotate(v9, 2) # 10000100
    v12 = ila.lrotate(v9, 6)
    assert c.areEqual(v11, v12)
    s3 = c.const(9, 4)
    v13 = v9[4:1]
    assert c.areEqual(s3, v13)
    v14 = x[3:0]
    v15 = y[7:4]
    v16 = ila.concat(v15, v14)
    v17 = ((x << 4) >> 4) + ((y >> 4) << 4)
    assert c.areEqual(v16, v17)

    # imply
    v18 = ila.slt(x, 5)
    v19 = ila.sge(x, 5)
    c.areEqual(ila.implies(v18, ~v19), top)

    #nonzero & bool ite
    v20 = ila.ite( ila.nonzero(x), (x<7), ~(x>=7) )
    assert c.areEqual(v20, (x!=7) & ~(x>7))
    assert c.areEqual(ila.nonzero(c4), top)

    #add nonzero to ite
    assert c.areEqual( ila.ite(ila.nonzero(c2), top, bot), top)
    assert c.areEqual( ila.ite(ila.nonzero(c0), top, bot), bot)

    # zero/sign extend
    short = c4[3:0]
    recover = ila.zero_extend(short, 8)
    assert c.areEqual(recover, c4)

    longC4 = c.const(4, 16)
    nlongC4 = -longC4
    nshortC4 = -c4
    extNS4 = ila.sign_extend(nshortC4, 16)
    assert c.areEqual(nlongC4, extNS4)

    # extract/slice with var
    v21 = c0[3:0]
    v21r = ila.zero_extend(v21, 8)
    assert c.areEqual(c0, v21r)
    # v14 = x[3:0]
    v14ex = ila.zero_extend(v14, 8)
    v14re = (x << 4) >> 4
    assert c.areEqual(v14ex, v14re)
    # v15 = y[7:4]
    v15ex = ila.zero_extend(v15, 8)
    v15re = (y >> 4)
    assert c.areEqual(v15ex, v15re)
    """
    v21 = ila.extractIV(x, 3, c0)
    v22 = ila.extractVI(y, c4+3, 4)
    assert c.areEqual(v14ex, v21)
    assert c.areEqual(v15ex << 4, v22 << 4)

    v23 = v21 + (v22 << 4)
    assert c.areEqual(v23, v16)
    
    v24 = ila.extractVV(c8, c8-1, c0)
    assert c.areEqual(v24, c8)
    v25 = ila.extractVV(x, c8-1, c4)
    v26 = ila.zero_extend(x[7:4], 8)
    assert c.areEqual(v25, v26)

    # slice one bit
    bv1 = c.const(1, 1)
    s1 = ila.get_bit(c1, c0)
    assert c.areEqual(bv1, s1)
    """
    
    z = x & x
    bx = y[x]
    bz = y[z]
    assert c.areEqual(bx, bz)
    bx = c8[x]
    by = c8[y]
    inv = ila.implies(x == y, bx == by)
    assert c.areEqual(inv, top)
    dum = ila.ite(b1, bx, by)
    shd = ila.implies(x == y, dum == bx)
    assert c.areEqual(shd, top)
    assert c.areEqual(c6[1], c6[2])
    assert c.areEqual(c6[4], c6[c0])
Exemple #23
0
def createRsaIla():
    m = ila.Abstraction('rsa')
    m.enable_parameterized_synthesis = 0

    # I/O interface
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)
    # response
    dataout = m.reg('dataout', 8)

    # states
    state = m.reg('rsa_state', 2)
    addr = m.reg('rsa_addr', 16)
    rsa_M = m.reg('rsa_M', 2048)
    rsa_N = m.reg('rsa_N', 2048)
    rsa_E = m.reg('rsa_E', 2048)
    rsa_buff = m.reg('rsa_buff', 2048)
    byte_counter = m.reg('rsa_byte_counter', 8)
    xram = m.mem('XRAM', 16, 8)
    rsa = m.fun('rsa', 2048, [2048])

    # fetch
    m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata])
    m.fetch_valid = (cmd == 1) | (cmd == 2)

    statebyte = ila.zero_extend(state, 8)
    wraddrbyte = ila.readchunk('rsa_addr', addr, 8)
    dataout_nxt = ila.choice('dataout', [statebyte, wraddrbyte, m.const(0, 8)])
    m.set_next('dataout', dataout_nxt)

    # rsa_addr
    addr_wr = ila.writechunk('wr_addr', addr, cmddata)
    addr_nxt = ila.choice('nxt_addr', [addr_wr, addr])
    m.set_next('rsa_addr', addr_nxt)

    # rsa_state
    state_choice = ila.choice(
        'state_choice',
        [m.const(0, 2),
         m.const(1, 2),
         m.const(2, 2),
         m.const(3, 2)])
    wr_nxt = ila.ite(byte_counter == 255, m.const(0, 2), m.const(3, 2))
    state_nxt = ila.choice('rsa_state_nxt', [
        wr_nxt, state_choice,
        ila.ite(cmddata == 1, m.const(1, 2), state), state
    ])
    m.set_next('rsa_state', state_nxt)

    # byte_counter
    byte_counter_inc = byte_counter + 1
    byte_counter_rst = ila.ite(cmddata == 1, m.const(0, 8), byte_counter)
    byte_counter_nxt = ila.choice(
        'byte_counter_nxt', [byte_counter_inc, byte_counter_rst, byte_counter])
    m.set_next('rsa_byte_counter', byte_counter_nxt)

    # buff
    rsa_buff_op = ila.appfun(rsa, [rsa_M])
    rsa_buff_nxt = ila.choice('rsa_buff_nxt', rsa_buff_op, rsa_buff)
    m.set_next('rsa_buff', rsa_buff_nxt)

    # rsa_M
    m.set_next('rsa_M', rsa_M)

    # xram
    #xram_w_rsa_lit = ila.storeblk (xram, addr, rsa_buff)
    #xram_w_rsa_big = ila.storeblk_big (xram, addr, rsa_buff)
    byte_cnt_16 = ila.zero_extend(byte_counter, 16)
    sh = ila.zero_extend((255 - byte_counter) * 8, 2048)
    xram_w_rsa_data_1 = (rsa_buff >> sh)[7:0]
    #xram_w_rsa_data_2 = rsa_buff [255 - byte_cnt_16]
    xram_w_rsa_lit = ila.store(xram, addr + byte_cnt_16, xram_w_rsa_data_1)
    xram_nxt = ila.choice('xram_nxt', [xram_w_rsa_lit, xram])
    m.set_next('XRAM', xram_nxt)

    return m
Exemple #24
0
    def nxtStateFunction(self):
######next state function for pc
        m = self.model
        self.pc_nxt_32 = self.pc + m.const(0x1, PC_REG_BITS)
        self.pc_nxt_64 = self.pc + m.const(0x2, PC_REG_BITS)

        self.source_reg0 = m.indexIntoSGPR(self.ssrc0)
        self.source_reg1 = m.indexIntoSGPR(self.ssrc1)
        self.scc = m.indexIntoSGPR(0, False, True)
        self.exec = m.indexIntoSGPR(0, False, False, True)
        self.source_reg0_ext = m.indexIntoSGPR(self.ssrc0 + m.const(0x1))
        self.source_reg1_ext = m.indexIntoSGPR(self.ssrc1 + m.const(0x1))
        self.dst_reg = m.indexIntoSGPR(self.sdstSOP2)
        self.dst_reg_ext = m.indexIntoSGPR(self.sdstSOP2 + m.const(0x1))
        self.source_reg0_long = ila.concat(self.source_reg0_ext, self.source_reg0)
        self.source_reg1_long = ila.concat(self.source_reg1_ext, self.source_reg1)
        self.dst_reg_long = ila.concat(self.dst_reg, self.dst_reg_ext)
        self.source_reg2_bfe = self.source_reg1[20:16]
        self.source_reg1_bfe = self.source_reg1[4:0]
        self.source_reg2_bfe_long = self.source_reg1[22:16]
        self.source_reg1_bfe_long = self.source_reg1[5:0]
        self.nxt_dst_sop2 = ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ABSDIFF_I32_OPCODE, \
                                ila.ite(self.source_reg0 > self.source_reg1, self.source_reg0 - self.source_reg1, self.source_reg1 - self.source_reg0), \
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ADD_I32_OPCODE, self.source_reg0 + self.source_reg1,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ADD_U32_OPCODE, self.source_reg0 + self.source_reg1,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ADDC_U32_OPCODE, self.source_reg0 + self.source_reg1 + self.scc,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_AND_B32_OPCODE, self.source_reg0 & self.source_reg1,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_AND_B64_OPCODE, self.source_reg0_long & self.source_reg1_long,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ANDN2_B32_OPCODE, self.source_reg0 & (~self.source_reg1),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ANDN2_B64_OPCODE, self.source_reg0_long & (~self.source_reg1_long),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ASHR_I32_OPCODE, ila.ashr(self.source_reg0, self.source_reg1[4:0]),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ASHR_I64_OPCODE, ila.ashr(self.source_reg0_long, self.source_reg1_long[5:0]),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFE_I32_OPCODE, ila.ite(self.source_reg2_bfe == 0, m.const(0, SCALAR_REG_BITS), ila.ite((self.source_reg2_bfe + self.source_reg1_bfe) < 32, (self.source_reg0 << (SCALAR_REG_BITS - self.source_reg2_bfe - self.source_reg1_bfe)) >> (32 - self.source_reg2_bfe), source_reg0 >> source_reg1_bfe)),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFE_U32_OPCODE, ila.ite(self.source_reg2_bfe == 0, m.const(0, SCALAR_REG_BITS), ila.ite((self.source_reg2_bfe + self.source_reg1_bfe) < 32, (self.source_reg0 << (SCALAR_REG_BITS - self.source_reg2_bfe - self.source_reg1_bfe)) >> (32 - self.source_reg2_bfe), source_reg0 >> source_reg1_bfe)),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFE_I64_OPCODE, (self.source_reg0 >> self.source_reg1_bfe_long) & ((1 << self.source_reg2_bfe_long) - 1), \
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFE_U64_OPCODE, (self.source_reg0 >> self.source_reg1_bfe_long) & ((1 << self.source_reg2_bfe_long) - 1), \
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFM_B32_OPCODE, ((1 << self.source_reg0[4:0]) - 1) << self.source_reg1[4:0],\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFM_B64_OPCODE, ((1 << self.source_reg0_long[5:0]) - 1) << self.source_reg1_long[5:0],\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_CBRANCH_G_FORK_OPCODE, ,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_CSELECT_B32_OPCODE, ila.ite(self.scc, self.source_reg0, self.source_reg1),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_CSELECT_B64_OPCODE, ila.ite(self.scc, self.source_reg0_long, self.source_reg1_long),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_LSHL_B32_OPCODE, self.source_reg0 << self.source_reg1[4:0],\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_LSHL_B64_OPCODE, self.source_reg0_long << self.source_reg1_long[5:0],\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_LSHR_B32_OPCODE, self.source_reg0 >> self.source_reg0[4:0],\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_LSHR_B64_OPCODE, self.source_reg0_long >> self.source_reg1_long[5:0],\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MAX_I32_OPCODE, ila.ite(self.source_reg0 > self.source_reg1, self.source_reg0, self.source_reg1),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MAX_U32_OPCODE, ila.ite(self.source_reg0 > self.source_reg1, self.source_reg0, self.source_reg1),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MIN_I32_OPCODE, ila.ite(self.source_reg0 < self.source_reg1, self.source_reg0, self.source_reg1),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MIN_U32_OPCODE, ila.ite(self.source_reg0 < self.source_reg1, self.source_reg0, self.source_reg1),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MUL_I32_OPCODE, self.source_reg0 * self.source_reg1 ,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_NAND_B32_OPCODE, ~(self.source_reg0 & self.source_reg1),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_NAND_B64_OPCODE, ~(self.source_reg0_long & self.source_reg1_long),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_NOR_B32_OPCODE, ~(self.source_reg0 | self.source_reg1),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_NOR_B64_OPCODE, ~(self.source_reg0_long | self.source_reg1_long),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_OR_B32_OPCODE, self.source_reg0 | self.source_reg1,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_OR_B64_OPCODE, self.source_reg0_long | self.source_reg1_long,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ORN2_B32_OPCODE, self.source_reg0 | (~self.source_reg1),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ORN2_B64_OPCODE, self.source_reg0_long | (~self.source_reg1_long),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_SUB_I32_OPCODE, self.source_reg0 - self.source_reg1 ,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_SUB_U32_OPCODE, self.source_reg0 - self.source_reg1,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_SUBB_U32_OPCODE, self.source_reg0 - self.source_reg1 - self.scc,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_XNOR_B32_OPCODE, ~(self.source_reg0 ^ self.source_reg1),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_XNOR_B64_OPCODE, ~(self.source_reg0_long ^ self.source_reg1_long),\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_XOR_B32_OPCODE, self.source_reg0 ^ self.source_reg1,\
                            ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_XOR_B64_OPCODE, self.source_reg0_long ^ self.source_reg1_long),\
                            self.dst_reg\
                            )\
                            )\
                            )\
                            )\
                            )\
                            )\
                            )\
                            )\
                            )\
                            )\
                            )\
                            )\
                            )\
                            )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
                                )\
    
        self.nxt_dst_sop1 = ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_ABS_I32_OPCODE, ile.ite(self.source_reg0 > 0, self.source_reg0, -self.source_reg0),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_AND_SAVEEXEC_B64_OPCODE, self.exec,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_ANDN2_SAVEEXEC_B64_OPCODE, self.exec, \
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BCNT0_I32_B32_OPCODE, aux_count(self.source_reg0, m.const(0x1, 1), m.const(SCALAR_REG_BITS)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BCNT0_I32_B64_OPCODE, aux_count(self.source_reg0, m.const(0x1, 1), m.const(SCALAR_REG_BITS_LONG)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BCNT1_I32_B32_OPCODE, aux_count(self.source_reg0, m.const(0x1, 0), m.const(SCALAR_REG_BITS)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BCNT1_I32_B64_OPCODE, aux_count(self.source_reg0, m.const(0x1, 0), m.const(SCALAR_REG_BITS_LONG)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BITSET0_B32_OPCODE, aux_bit_set_zero(self.dst_reg, self.source_reg0[4:0]),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BITSET0_B64_OPCODE, aux_bit_set_zero(self.dst_reg_long, self.source_reg0_long[5:0]),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BITSET1_B32_OPCODE, aux_bit_set_one(self.dst_reg, self.source_reg0[4:0]),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BITSET1_B64_OPCODE, aux_bit_set_one(self.dst_reg_long, self.source_reg0_long[5:0]),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BREV_B32_OPCODE, aux_bit_rev(self.source_reg0, SCALAR_REG_BITS),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BREV_B64_OPCODE, aux_bit_rev(self.source_reg0_long, SCALAR_REG_BITS_LONG),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_CBRANCH_JOIN_OPCODE, #TODO
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_CMOV_B32_OPCODE, ila.ite(self.scc, self.source_reg0, ~(self.source_reg0 | self.dst_reg)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_CMOV_B64_OPCODE, ila.ite(self.scc, self.source_reg0_long, ~(self.source_reg0_long | self.dst_reg_long)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FF0_I32_B32_OPCODE, aux_ff_bit(self.source_reg0, SCALAR_REG_BITS, m.const(0x0, 1)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FF1_I32_B32_OPCODE, aux_ff_bit(self.source_reg0, SCALAR_REG_BITS, m.const(0x1, 1)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FF0_I32_B64_OPCODE, aux_ff_bit(self.source_reg0_long, SCALAR_REG_BITS_LONG, m.const(0x0, 1)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FF1_I32_B64_OPCODE, aux_ff_bit(self.source_reg0_long, SCALAR_REG_BITS_LONG, m.const(0x1, 1)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FLBIT_I32_OPCODE, aux_ff_op_bit(self.source_reg0, SCALAR_REG_BITS),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FLBIT_I32_I64_OPCODE, aux_ff_op_bit(self.source_reg0_long, SCALAR_REG_BITS_LONG),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FLBIT_I32_B32_OPCODE, aux_ff_bit_m(self.source_reg0, SCALAR_REG_BITS, m.const(0x1, 1)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FLBIT_I32_B64_OPCODE, aux_ff_bit_m(self.source_reg0_long, SCALAR_REG_BITS_LONG, m.const(0x1, 1)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_GETPC_B64_OPCODE, self.pc + m.const(0x4, SCALAR_REG_BITS_LONG) ,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOV_B32_OPCODE, self.source_reg0,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOV_B64_OPCODE, self.source_reg0_long,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOVRELD_B32_OPCODE, #TODO ,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOVRELD_B64_OPCODE, #TODO ,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOVRELS_B32_OPCODE, #TODO ,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOVRELS_B64_OPCODE, #TODO ,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_NAND_SAVEEXEC_B64_OPCODE, self.exec, \
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_NOR_SAVEEXEC_B64_OPCODE, self.exec,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_NOT_B32_OPCODE, ~(self.source_reg0),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_NOT_B64_OPCODE, ~(self.source_reg0_long),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_OR_SAVEEXEC_B64_OPCODE, self.exec,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_ORN2_SAVEEXEC_B64_OPCODE, self.exec ,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_QUADMASK_B32_OPCODE, ila.zero_extend(aux_quadmask(self.source_reg0, SCALAR_REG_BITS)) ,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_QUADMASK_B64_OPCODE, ila.zero_extend(aux_quadmask(self.source_reg0_long)),\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_RFE_B64_OPCODE, ila.source_reg0_long,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_WQM_B32_OPCODE, ila.zero_extend(aux_quadmask(self.source_reg0, SCALAR_REG_BITS)) ,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_WQM_B64_OPCODE, ila.zero_extend(aux_quadmask(self.source_reg0, SCALAR_REG_BITS)) ,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_SEXT_I32_I8_OPCODE, ila.sign_extend(self.source_reg0[7:0]) ,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_SEXT_I32_I16_OPCODE, ila.sign_extend(self.source_reg0[15:0]) ,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_SWAPPC_B64_OPCODE, self.pc + m.const(0x4, SCALAR_REG_BITS_LONG) ,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOV_FED_B32_OPCODE, self.source_reg0,\
                            ila.ite(self,opcode_SOP1 == Encoding.SOP1_S_XOR_SAVEEXEC_B64_OPCODE, self.exec,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_XNOR_SAVEEXEC_B64_OPCODE, self.exec,\
                            ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_SETPC_B64_OPCODE, self.dst_reg,\
                            self.dst_reg
                            )
                            )
                            )
                            )
                            )
                            )
                            )
                            )
                            )
                            )
                            )
                                )
                            )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )                                
                                )
                                )
                                )
                                )    
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )

        self.nxt_dst_sopk = ila.ite( self.opcode_SOPK == Encoding.SOPK_S_MOVK_I32_OPCODE, ila.sign_extend(self.simm, SCALAR_REG_BITS),\
                            ila.ite( self.opcode_SOPK == Encoding.SOPK_S_CMOVK_I32_OPCODE, ila.ite(self.scc != m.const(0x1, 1), ila.sign_extend(self.simm, SCALAR_REG_BITS, self.dst_reg)),\
                            ila.ite( self.opcode_SOPK == Encoding.SOPK_S_ADDK_I32_OPCODE, self.dst_reg + sign_extend(self.simm, SCALAR_REG_BITS),\
                            ila.ite( self.opcode_SOPK == Encoding.SOPK_S_MULK_I32_OPCODE, self.dst_reg * sign_extend(self.simm, SCALAR_REG_BITS),\
                            )
                            )
                            )
                            )
Exemple #25
0
    def instructionFetch(self):
        self.inst = ila.load(
            self.mem,
            ila.zero_extend(self.pc[31:3],
                            instruction_format.MEM_ADDRESS_BITS))
        self.opcode = self.inst[(instruction_format.OPCODE_BIT_TOP -
                                 1):instruction_format.OPCODE_BIT_BOT]
        self.fetch_expr = self.inst
        self.dest = self.inst[(instruction_format.DST_BIT_TOP -
                               1):instruction_format.DST_BIT_BOT]
        self.src1 = self.inst[(instruction_format.SRC0_BIT_TOP -
                               1):instruction_format.SRC0_BIT_BOT]
        self.src2 = self.inst[(instruction_format.SRC1_BIT_TOP -
                               1):instruction_format.SRC1_BIT_BOT]
        self.src3 = self.inst[(instruction_format.SRC2_BIT_TOP -
                               1):instruction_format.SRC2_BIT_BOT]
        self.baseImm = ila.sign_extend(
            self.inst[(instruction_format.BASE_BIT_TOP -
                       1):instruction_format.BASE_BIT_BOT],
            instruction_format.PC_BITS)
        #self.branchPred = self.dest
        #(self.predReg, self.predReg_flag) = self.indexIntoReg(self.branchPred)
        self.branchImm = ila.zero_extend(
            self.inst[(instruction_format.IMM_BIT_TOP -
                       1):instruction_format.IMM_BIT_BOT],
            instruction_format.PC_BITS)
        self.ldImm = ila.zero_extend(
            self.inst[(instruction_format.IMM_BIT_TOP -
                       1):instruction_format.IMM_BIT_BOT],
            instruction_format.PC_BITS)
        self.stImm = ila.zero_extend(
            self.inst[(instruction_format.IMM_BIT_TOP -
                       1):instruction_format.IMM_BIT_BOT],
            instruction_format.PC_BITS)

        self.sreg1_flag = ila.ite((self.src1 >= self.scalar_register_num) &
                                  (self.src1 < self.register_total_num),
                                  self.long_scalar_register_flag,
                                  self.scalar_register_flag)
        self.sreg2_flag = ila.ite((self.src2 >= self.scalar_register_num) &
                                  (self.src2 < self.register_total_num),
                                  self.long_scalar_register_flag,
                                  self.scalar_register_flag)
        self.sreg3_flag = ila.ite((self.src3 >= self.scalar_register_num) &
                                  (self.src3 < self.register_total_num),
                                  self.long_scalar_register_flag,
                                  self.scalar_register_flag)
        self.sregdest_flag = ila.ite((self.dest >= self.scalar_register_num) &
                                     (self.dest < self.register_total_num),
                                     self.long_scalar_register_flag,
                                     self.scalar_register_flag)

        self.ssreg1 = self.indexIntoSReg(self.src1)
        self.ssreg2 = self.indexIntoSReg(self.src2)
        self.ssreg3 = self.indexIntoSReg(self.src3)
        self.ssregdest = self.indexIntoSReg(self.dest)

        self.lsreg1 = self.indexIntoLReg(self.src1)
        self.lsreg2 = self.indexIntoLReg(self.src2)
        self.lsreg3 = self.indexIntoLReg(self.src3)
        self.lsregdest = self.indexIntoLReg(self.dest)
        self.sreg1 = ila.ite(self.sreg1_flag, self.ssreg1,
                             self.lsreg1[instruction_format.REG_BITS - 1:0])
        self.sreg2 = ila.ite(self.sreg2_flag, self.ssreg2,
                             self.lsreg2[instruction_format.REG_BITS - 1:0])
        self.sreg3 = ila.ite(self.sreg3_flag, self.ssreg3,
                             self.lsreg3[instruction_format.REG_BITS - 1:0])
        self.sregdest = ila.ite(
            self.sregdest_flag, self.ssregdest,
            self.lsregdest[instruction_format.REG_BITS - 1:0])
Exemple #26
0
def createAESILA(synstates, enable_ps):
    m = ila.Abstraction("aes")
    m.enable_parameterized_synthesis = enable_ps

    # I/O interface: this is where the commands come from.
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)
    # response.
    dataout = m.reg('dataout', 8)

    # internal arch state.
    state = m.reg('aes_state', 2)
    opaddr = m.reg('aes_addr', 16)
    oplen = m.reg('aes_len', 16)
    keysel = m.reg('aes_keysel', 1)
    ctr = m.reg('aes_ctr', 128)
    key0 = m.reg('aes_key0', 128)
    key1 = m.reg('aes_key1', 128)

    # for the uinst.
    byte_cnt = m.reg('byte_cnt', 16)
    rd_data = m.reg('rd_data', 128)
    enc_data = m.reg('enc_data', 128)
    xram = m.mem('XRAM', 16, 8)
    aes = m.fun('aes', 128, [128, 128, 128])

    # fetch is just looking at the input command.
    m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata])
    m.fetch_valid = (cmd == 1) | (cmd == 2)

    # decode
    rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff40) for i in [0, 1, 2, 3]]
    wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff40)]
    nopcmds = [(state == i) & (cmd != 1) & (cmdaddr == addr)
               for addr in xrange(0xff00, 0xff40) for i in [1, 2, 3]]
    m.decode_exprs = rdcmds + wrcmds + nopcmds

    # read commands
    statebyte = ila.zero_extend(state, 8)
    opaddrbyte = ila.readchunk('rd_addr', opaddr, 8)
    oplenbyte = ila.readchunk('rd_len', oplen, 8)
    keyselbyte = ila.zero_extend(keysel, 8)
    ctrbyte = ila.readchunk('rd_ctr', ctr, 8)
    key0byte = ila.readchunk('rd_key0', key0, 8)
    key1byte = ila.readchunk('rd_key1', key1, 8)
    dataoutnext = ila.choice('dataout', [
        statebyte, opaddrbyte, oplenbyte, keyselbyte, ctrbyte, key0byte,
        key1byte,
        m.const(0, 8)
    ])
    m.set_next('dataout', dataoutnext)

    # write commands.
    def mb_reg_wr(name, reg):
        # multibyte register write.
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('aes_addr', opaddr)
    mb_reg_wr('aes_len', oplen)
    mb_reg_wr('aes_ctr', ctr)
    mb_reg_wr('aes_key0', key0)
    mb_reg_wr('aes_key1', key1)

    # bit-level registers
    def bit_reg_wr(name, reg, sz):
        # bitwise register write
        assert reg.type.bitwidth == sz
        reg_wr = cmddata[sz - 1:0]
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    bit_reg_wr('aes_keysel', keysel, 1)

    # state
    state_next = ila.choice('state_next', [
        m.const(0, 2),
        m.const(1, 2),
        m.const(2, 2),
        m.const(3, 2),
        ila.ite(cmddata == 1, m.const(1, 2), state),
        ila.ite(byte_cnt + 16 < oplen, m.const(1, 2), m.const(0, 2))
    ])
    m.set_next('aes_state', state_next)

    # these are for the uinst
    # byte_cnt
    byte_cnt_inc = byte_cnt + 16
    byte_cnt_rst = ila.ite(cmddata == 1, m.const(0, 16), byte_cnt)
    byte_cnt_nxt = ila.choice(
        'byte_cnt_nxt', [m.const(0, 16), byte_cnt_inc, byte_cnt_rst, byte_cnt])
    m.set_next('byte_cnt', byte_cnt_nxt)
    # rd_data
    rdblock = ila.loadblk(xram, opaddr + byte_cnt, 16)
    rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data)
    m.set_next('rd_data', rd_data_nxt)
    # enc_data
    aes_key = ila.ite(keysel == 0, key0, key1)
    aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data])
    enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data)
    m.set_next('enc_data', enc_data_nxt)
    # xram write
    xram_w_aes = ila.storeblk(xram, opaddr + byte_cnt, enc_data)
    xram_nxt = ila.choice('xram_nxt', xram, xram_w_aes)
    m.set_next('XRAM', xram_nxt)

    # synthesize.
    timefile = open('aes-times-%s.txt' % ('en' if enable_ps else 'dis'), 'wt')
    sim = lambda s: AES().simulate(s)
    for s in synstates:
        st = time.clock()
        m.synthesize(s, sim)
        t_elapsed = time.clock() - st
        print >> timefile, s
        print >> timefile, '%.2f' % (t_elapsed)

        ast = m.get_next(s)
        m.exportOne(ast, 'asts/%s_%s' % (s, 'en' if enable_ps else 'dis'))

    m.generateSimToDir('sim')
Exemple #27
0
    def nextStateVALUFunction(self, threadNo):
        m = self.model
        self.vsource_reg0 = ila.ite(self.vsrc0 > 255, m.indexIntoVGPR(self.vsrc0 - m.const(0x100, VECTOR_SOURCE_BIT), threadNo), m.indexIntoSGPR(self.vsrc0))
        self.vsource_reg1 = m.indexIntoVGPR(self.vsrc1)
        self.vcc = m.indexIntoVGPR(0, 0, True)
        self.vsource_reg0_ext = ila.ite(self.vsrc0 > 255, m.indexIntoVGPR(self.vsrc0 + m.const(0x1, VECTOR_SOURCE_BIT) - m.const(0x100, VECTOR_SOURCE_BIT), threadNo), m.indexIntoSGPR(self.vsrc0 + m.const(0x1,1)))
        self.vsource_reg1_ext = m.indexIntoVGPR(self.vsrc1 + m.const(0x1, VECTOR_SOURCE_BIT - 1))
        self.vdst_reg = m.indexIntoVGPR(self.vdst)
        self.vdst_reg_ext = m.indexIntoVGPR(self.vdst + m.const(0x1, VECTOR_SOURCE_BIT - 1))
        self.vsource_reg0_long = ila.concat(self.vsource_reg0_ext, self.vsource_reg0)
        self.vsource_reg1_long = ila.concat(self.vsource_reg1_ext, self.vsource_reg1)
        self.vdst_reg_long = ila.concat(self.vdst_reg, self.vdst_reg_ext)

        self.nxt_dst_vop2 = ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CNDMASK_B32, ila.ite(self.vcc[threadNo] != 0, self.vsource_reg1, slef.vsource_reg0),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_READLANE_B32, ,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_WRITELANE_B32, , \
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ADD_F32, self.vsource_reg0 + self.vsource_reg1,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUB_F32, self.vsource_reg0 - self.vsource_reg1,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUBREV_F32, self.vsource_reg1 - self.vsource_reg0 ,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAC_LEGACY_F32, self.vsource_reg0 * self.vsource_reg1 + self.vdst_reg,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_LEGACY_F32, self.vsource_reg0 * self.vsource_reg1,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_F32, self.vsource_reg0 * self.vsource_reg1.\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_I32_I24, ila.sign_extend((self.vsource_reg0[23:0] * self.vsource_reg1[23:0])[30:0], VECTOR_REG_BITS),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_HI_I32_I24, ila.sign_extend((self.vsource_reg0[23:0] * self.vsource_reg1[23:0])[47:32], VECTOR_REG_BITS),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_U32_U24, (self.vsource_reg0[23:0] * self.vsource_reg1[23:0])[31:0],\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_HI_U32_U24, ila.zero_extend((self.vsource_reg0[23:0] * self.vsource_reg1[23:0])[47:32], VECTOR_REG_BITS),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MIN_LEGACY_F32, ila.ite(self.vsource_reg0 < self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAX_LEGACY_F32, ila.ite(self.vsource_reg0 >= self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MIN_F32, ila.ite(self.vsource_reg0 < self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAX_F32, ila.ite(self.vsource_reg0 > self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MIN_I32, ila.ite(self.vsource_reg0 < self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAX_I32, ila.ite(self.vsource_reg0 > self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ 
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MIN_U32, ila.ite(self.vsource_reg0 < self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAX_U32, ila.ite(self.vsource_reg0 > self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LSHR_B32, self.vsource_reg0 >> self.vsource_reg1[4:0],\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LSHRREV_B32, self.vsource_reg1 >> self.vsource_reg0[4:0],\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ASHR_I32, ila.ashr(self.vsource_reg0, self.vsource_reg1[4:0]),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ASHRREV_I32, ila.ashr(self.vsource_reg1, self.vsource_reg0[4:0]),\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LSHL_B32, self.vsource_reg0 << self.vsource_reg1[4:0],\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LSHLREV_B32, self.vsource_reg1 << self.vsource_reg0[4:0],\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_AND_B32, self.vsource_reg0 & self.vsource_reg1,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_OR_B32, self.vsource_reg0 | self.vsource_reg1,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_XOR_B32, self.vsource_reg0 ^ self.vsource_reg1,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_BFM_B32, ((1 << self.vsource_reg0[4:0]) - 1) << self.vsource_reg1[4:0],\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAC_F32, self.vsource_reg0 * self.vsource_reg1 + self.vdst_reg,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MADMK_F32, ,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MADAK_F32, ,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_BCNT_U32_B32, aux_count(self.vsource_reg0, False ,VECTOR_REG_BITS) + self.vsource_reg1,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MBCNT_LO_U32_B32, ,\ #TODO: ThreadMask
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MBCNT_HI_U32_B32, ,\ 
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ADD_I32, self.vsource_reg0 + self.vsource_reg1 ,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUB_I32, self.vsource_reg0 - self.vsource_reg1, \
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUBREV_I32, self.vsource_reg1 - self.vsource_reg1,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ADDC_U32, self.vsource_reg0 + self.vsource_reg1 + self.vcc[threadNo],\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUBB_U32, self.vsource_reg0 - self.vsource_reg1 - self.vcc[threadNo],\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUBBREV_U32, self.vsource_reg1 - self.vsource_reg0 - self.VCC[threadNo],\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LDEXP_F32, ,\#TODO:EXP
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PKACCUM_U8_F32, ,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PKNORM_I16_F32, ,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PKNORM_U16_F32, ,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PKRTZ_F16_F32, ,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PK_U16_U32, ,\
                            ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PK_I16_I32, ,\
                            )
                            )
                            )
                            )
                            )
                            )
                            )
                            )
                            )

                            )
                            )
                            )
                            )
                            )
                            )
                            )
                            )
                            )
                            )
                            )
                            )
                                )
                                )

                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )   
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                                )
                            )
                            )
                            )
                            )
Exemple #28
0
def synthesize(state, enable_ps):
    uc = uc8051()
    # create nicknames
    pc, iram, sp = uc.pc, uc.iram, uc.sp
    op0, op1, op2 = uc.op0, uc.op1, uc.op2
    acc, b, dptr = uc.acc, uc.b, uc.dptr
    psw = uc.psw
    rx = uc.rx
    rom = uc.rom
    model = uc.model
    model.enable_parameterized_synthesis = enable_ps
    bv = model.const

    # fetch and decode.
    model.fetch_expr = uc.op0  # s/hand for uc.rom[uc.pc]
    model.decode_exprs = [uc.op0 == i for i in xrange(0x0, 0x100)]

    ########################### PC ##############################################
    def cjmp(name, cond):
        pc_taken = ila.choice(name + '_taken', pc_rel1, pc_rel2)
        pc_seq = ila.choice(name + '_seq', pc + 2, pc + 3)
        return ila.ite(cond, pc_taken, pc_seq)

    def jmppolarity(name):
        return ila.inrange(name, bv(0, 1), bv(1, 1))

    # ajmp/acall
    pc_ajmp_pg1 = (pc + 2)[15:11]
    pc_ajmp_pg2 = ila.inrange('ajmp_page', bv(0x0, 3), bv(0x7, 3))
    pc_ajmp_pg = ila.concat(pc_ajmp_pg1, pc_ajmp_pg2)
    pc_ajmp = ila.concat(pc_ajmp_pg, op1)
    # lcall/ljmp
    pc_ljmp = ila.choice('ljmp', [ila.concat(op2, op1), ila.concat(op1, op2)])
    # ret.
    pc_ret = ila.choice('pc_ret', [
        ila.concat(iram[sp - 1], iram[sp]),
        ila.concat(iram[sp], iram[sp - 1]),
        ila.concat(iram[sp], iram[sp + 1]),
        ila.concat(iram[sp + 1], iram[sp])
    ])
    # relative to pc
    pc_rel1 = ila.choice('pc_rel1_base', [pc, pc + 1, pc + 2, pc + 3
                                          ]) + ila.sign_extend(op1, 16)
    pc_rel2 = ila.choice('pc_rel2_base', [pc, pc + 1, pc + 2, pc + 3
                                          ]) + ila.sign_extend(op2, 16)
    # sjmp
    pc_sjmp = ila.choice('sjmp', pc_rel1, pc_rel2)
    # jb
    jb_bitaddr = ila.choice('jb_bitaddr', [op1, op2])
    jb_bit = uc.readBit(jb_bitaddr)
    jx_polarity = jmppolarity('jx_polarity')
    pc_jb = cjmp('pc_jb', jb_bit == jx_polarity)
    # jc
    pc_jc = cjmp('pc_jc', uc.cy == jx_polarity)
    # jz
    acc_zero = acc == 0
    acc_nonzero = acc != 0
    jz_test = ila.choice('jz_test_polarity', acc_zero, acc_nonzero)
    pc_jz = cjmp('pc_jz', jz_test)
    # jmp
    pc_jmp = dptr + ila.zero_extend(acc, 16)
    # cjne
    cjne_src1 = ila.choice('cjne_src1', [acc, iram[rx[0]], iram[rx[1]]] + rx)
    cjne_src2 = ila.choice(
        'cjne_src2',
        [op1, op2,
         uc.readDirect(ila.choice('cjne_iram_addr', [op1, op2]))])
    cjne_taken = cjne_src1 != cjne_src2
    pc_cjne = cjmp('pc_cjne', cjne_taken)
    # djnz
    djnz_src = ila.choice(
        'djnz_src',
        [uc.readDirect(ila.choice('djnz_iram_src', [op1, op2]))] + rx)
    djnz_taken = djnz_src != 1
    pc_djnz = cjmp('pc_djnz', djnz_taken)

    pc_choices = [
        pc + 1, pc + 2, pc + 3, pc_ajmp, pc_ljmp, pc_ret, pc_sjmp, pc_jb,
        pc_jc, pc_jz, pc_jmp, pc_cjne, pc_djnz
    ]
    model.set_next('PC', ila.choice('pc', pc_choices))

    ########################### ACC ##############################################
    # various sources for ALU ops.
    acc_src2_dir_addr = ila.choice('acc_src2_dir_addr', [op1, op2])
    acc_src2_dir = ila.choice('acc_src2_dir',
                              [uc.readDirect(acc_src2_dir_addr)] + rx)
    acc_src2_indir_addr = ila.choice('acc_src2_indir_addr', [rx[0], rx[1]])
    acc_src2_indir = iram[acc_src2_indir_addr]
    src2_imm = ila.choice('src2_imm', [op1, op2])
    acc_src2 = ila.choice('acc_src2', [acc_src2_dir, acc_src2_indir, src2_imm])
    acc_rom_offset = ila.choice('acc_rom_offset',
                                [dptr, pc + 1, pc + 2, pc + 3])
    # the decimal adjust instruction. this is a bit of mess.
    # first, deal with the lower nibble
    acc_add_6 = (uc.ac == 1) | (acc[3:0] > 9)
    acc_ext9 = ila.zero_extend(acc, 9)
    acc_da_stage1 = ila.ite(acc_add_6, acc_ext9 + 6, acc_ext9)
    acc_da_cy1 = acc_da_stage1[8:8]
    # and then the upper nibble
    acc_add_60 = ((acc_da_cy1 | uc.cy) == 1) | (acc_da_stage1[7:4] > 9)
    acc_da_stage2 = ila.ite(acc_add_60, acc_da_stage1 + 0x60, acc_da_stage1)
    acc_da = acc_da_stage2[7:0]
    # instructions which modify the accumulator.
    acc_rr = ila.rrotate(acc, 1)
    acc_rrc = ila.rrotate(ila.concat(acc, uc.cy), 1)[8:1]
    acc_rl = ila.lrotate(acc, 1)
    acc_rlc = ila.lrotate(ila.concat(uc.cy, acc), 1)[7:0]
    acc_inc = acc + 1
    acc_dec = acc - 1
    acc_add = acc + acc_src2
    acc_addc = acc + acc_src2 + ila.zero_extend(uc.cy, 8)
    acc_orl = acc | acc_src2
    acc_anl = acc & acc_src2
    acc_xrl = acc ^ acc_src2
    acc_subb = acc - acc_src2 + ila.sign_extend(uc.cy, 8)
    acc_mov = acc_src2
    acc_cpl = ~acc
    acc_clr = bv(0, 8)
    acc_rom = rom[ila.zero_extend(acc, 16) + acc_rom_offset]
    acc_swap = ila.concat(acc[3:0], acc[7:4])
    # div.
    acc_div = ila.ite(b == 0, bv(0xff, 8), acc / b)
    b_div = ila.ite(b == 0, acc, acc % b)
    # mul
    mul_result = ila.zero_extend(acc, 16) * ila.zero_extend(b, 16)
    acc_mul = mul_result[7:0]
    b_mul = mul_result[15:8]
    # xchg - dir
    xchg_src2_dir_addr = ila.choice('xchg_src2_dir_addr',
                                    [op1, op2] + uc.rxaddr)
    xchg_src2_dir = uc.readDirect(xchg_src2_dir_addr)
    acc_xchg_dir = xchg_src2_dir
    # xchg - indir
    xchg_src2_indir_addr = ila.choice('xchg_src2_indir_addr', [rx[0], rx[1]])
    xchg_src2_full_indir = iram[xchg_src2_indir_addr]
    xchg_src2_half_indir = ila.concat(acc[7:4], xchg_src2_full_indir[3:0])
    xchg_src2_indir = ila.choice('xchg_src2_indir',
                                 [xchg_src2_full_indir, xchg_src2_half_indir])
    acc_xchg_indir = xchg_src2_indir
    # final acc value.
    acc_next = ila.choice('acc_r_next', [
        acc_rr, acc_rl, acc_rrc, acc_rlc, acc_inc, acc_dec, acc_add, acc_addc,
        acc_orl, acc_anl, acc_xrl, acc_mov, acc_rom, acc_clr, acc_subb,
        acc_swap, acc_cpl, acc, acc_div, acc_mul, acc_da, acc_xchg_dir,
        acc_xchg_indir, uc.xram_data_in
    ])
    model.set_next('ACC', acc_next)

    ########################### IRAM ##############################################
    # instructions where the result is a direct iram address
    dir_src1_addr = ila.choice('dir_src1_addr', [op1, op2] + uc.rxaddr)
    dir_src1 = uc.readDirect(dir_src1_addr)
    dir_src2_iram_addr = ila.choice('dir_src2_iram_addr',
                                    [op1, op2] + uc.rxaddr)
    dir_src2_iram = uc.readDirect(dir_src2_iram_addr)
    dir_src2_indir_addr = ila.choice('dir_src2_indir_addr', [rx[0], rx[1]])
    dir_src2_indir = iram[dir_src2_indir_addr]
    dir_src2 = ila.choice('dir_src2',
                          [op1, op2, acc, dir_src2_iram, dir_src2_indir])
    dir_inc = dir_src1 + 1
    dir_dec = dir_src1 - 1
    dir_orl = dir_src1 | dir_src2
    dir_anl = dir_src1 & dir_src2
    dir_xrl = dir_src1 ^ dir_src2
    dir_mov = dir_src2
    dir_result = ila.choice(
        'dir_result', [dir_inc, dir_dec, dir_orl, dir_anl, dir_xrl, dir_mov])
    dir_addrs = [dir_src1_addr]
    dir_datas = [dir_result]

    # write a bit.
    bit_src1_addr = ila.choice('bit_src1_addr', [op1, op2])
    bit_src1 = uc.readBit(bit_src1_addr)
    wrbit_data = ila.choice(
        'wrbit_data',
        [uc.cy, ~uc.cy, bit_src1, ~bit_src1,
         bv(0, 1), bv(1, 1)])
    r_bit = uc.writeBit(bit_src1_addr, wrbit_data)
    # some instructions write their result to the carry flag; which is also the first operand.
    cy_orl = uc.cy | bit_src1
    cy_orlc = uc.cy | ~bit_src1
    cy_anl = uc.cy & bit_src1
    cy_anlc = uc.cy & ~bit_src1
    cy_mov = bit_src1
    cy_cpl_bit = ~bit_src1
    cy_cpl_c = ~uc.cy
    bit_cnst1 = bv(1, 1)
    bit_cnst0 = bv(0, 1)
    bit_cy = ila.choice('bit_cy', [
        cy_orl, cy_anl, cy_orlc, cy_anlc, cy_cpl_c, cy_mov, cy_cpl_bit,
        bit_cnst1, bit_cnst0
    ])

    # instructions where the result is an indirect iram address.
    src1_indir_addr = ila.choice('src1_indir_addr', [rx[0], rx[1]])
    src1_indir = iram[src1_indir_addr]
    src2_indir_dir_addr = ila.choice('src2_indir_dir_addr', [op1, op2])
    src2_indir_dir = uc.readDirect(src2_indir_dir_addr)
    src2_indir = ila.choice('src2_indir', [op1, op2, acc, src2_indir_dir])
    src1_indir_inc = src1_indir + 1
    src1_indir_dec = src1_indir - 1
    src1_indir_mov = src2_indir
    src1_indir_result = ila.choice(
        'src1_indir_result', [src1_indir_inc, src1_indir_dec, src1_indir_mov])
    indir_addrs = [src1_indir_addr]  # indirect write addr
    indir_datas = [src1_indir_result]  # and data.
    # calls
    pc_topush = ila.choice('pc_topush', [pc + 1, pc + 2, pc + 3])
    pc_topush_lo = pc_topush[7:0]
    pc_topush_hi = pc_topush[15:8]
    pc_topush_0 = ila.choice('pc_topush_endianess',
                             [pc_topush_lo, pc_topush_hi])
    pc_topush_1 = ila.choice('pc_topush_endianess',
                             [pc_topush_hi, pc_topush_lo])
    pc_push_addr = ila.choice('pc_push_addr', [sp, sp + 1])
    iram_call = ila.store(ila.store(iram, pc_push_addr, pc_topush_0),
                          pc_push_addr + 1, pc_topush_1)

    # push or pop instructions.
    stk_iram_addr = ila.choice('stk_iram_addr', [sp, sp + 1, sp - 1])
    stk_src_dir_addr = ila.choice('stk_src_dir_addr', [op1, op2])
    stk_src_dir = uc.readDirect(stk_src_dir_addr)
    stk_src = ila.choice('stk_src', [stk_src_dir, acc])
    sp_pushpop = ila.choice('sp_pushpop', sp + 1, sp - 1)
    indir_addrs.append(stk_iram_addr)
    indir_datas.append(stk_src)

    stk_data = ila.choice('stk_data', [iram[sp], iram[sp + 1], iram[sp - 1]])
    dir_addrs.append(stk_src_dir_addr)
    dir_datas.append(stk_data)
    r_pop = uc.writeDirect(stk_src_dir_addr, stk_data)
    sp_pop = ila.ite(stk_src_dir_addr == bv(0x81, 8), r_pop.sp, sp_pushpop)

    # exchanges;  part of this implemented above in acc section.
    dir_addrs.append(xchg_src2_dir_addr)
    dir_datas.append(acc)
    xchg_src1_half_indir = ila.concat(xchg_src2_full_indir[7:4], acc[3:0])
    xchg_src1_indir = ila.choice('xchg_src1', [xchg_src1_half_indir, acc])
    indir_addrs.append(xchg_src2_indir_addr)
    indir_datas.append(xchg_src1_indir)

    # final indirect writes.
    iram_indir = ila.store(iram, ila.choice('iram_indir', indir_addrs),
                           ila.choice('iram_indir', indir_datas))
    # final direct writes.
    assert len(dir_addrs) == len(dir_datas)
    r_dir = uc.writeDirect(ila.choice('iram_dir', dir_addrs),
                           ila.choice('iram_dir', dir_datas))

    # set the next iram.
    iram_next = ila.choice(
        'iram_result', [iram, iram_indir, iram_call, r_dir.iram, r_bit.iram])
    model.set_next('IRAM', iram_next)

    ########################### PSW ##############################################
    cjne_cy = ila.ite(cjne_src1 < cjne_src2, bv(1, 1), bv(0, 1))
    # muldiv
    div_ov = ila.ite(b == 0, bv(1, 1), bv(0, 1))
    mul_ov = ila.ite(b_mul != 0, bv(1, 1), bv(0, 1))
    # da
    acc_da_cy2 = acc_da_stage2[8:8]
    acc_da_cy = acc_da_cy2 | acc_da_cy1 | uc.cy
    # alu
    alu_cy_in = ila.choice('alu_cy_in', [uc.cy, bv(0, 1)])
    alu_cy_5b = ila.choice(
        'alu_cy_5b',
        [ila.zero_extend(alu_cy_in, 5),
         ila.sign_extend(alu_cy_in, 5)])
    alu_src1_lo_5b = ila.zero_extend(acc[3:0], 5)
    alu_src2_lo_5b = ila.zero_extend(acc_src2[3:0], 5)
    alu_ac_add = (alu_src1_lo_5b + alu_src2_lo_5b + alu_cy_5b)[4:4]
    alu_ac_sub = ila.ite(alu_src1_lo_5b < (alu_src2_lo_5b + alu_cy_5b),
                         bv(1, 1), bv(0, 1))
    alu_ac = ila.choice('alu_ac', [alu_ac_add, alu_ac_sub])
    alu_src1_sext = ila.sign_extend(acc, 9)
    alu_src2_sext = ila.sign_extend(acc_src2, 9)
    alu_src1_zext = ila.zero_extend(acc, 9)
    alu_src2_zext = ila.zero_extend(acc_src2, 9)
    alu_cy_9b_sext = ila.sign_extend(alu_cy_in, 9)
    alu_cy_9b_zext = ila.zero_extend(alu_cy_in, 9)
    alu_cy_9b = ila.choice('alu_cy_9b', [alu_cy_9b_zext, alu_cy_9b_sext])
    alu_zext_9b_sum = alu_src1_zext + alu_src2_zext + alu_cy_9b
    alu_cy_add = alu_zext_9b_sum[8:8]
    alu_cy_sub1 = ila.ite(alu_src1_zext < (alu_src2_zext + alu_cy_9b),
                          bv(1, 1), bv(0, 1))
    alu_cy_sub2 = ila.ite(acc < (acc_src2 + ila.zero_extend(uc.cy, 8)),
                          bv(1, 1), bv(0, 1))
    alu_cy = ila.choice('alu_cy', [alu_cy_add, alu_cy_sub1, alu_cy_sub2])
    alu_ov_9b_src1 = ila.choice('alu_ov_9b_src1',
                                [alu_src1_sext, alu_src1_zext])
    alu_ov_9b_src2 = ila.choice('alu_ov_9b_src2',
                                [alu_src2_sext, alu_src2_zext])
    alu_9b_add = alu_ov_9b_src1 + alu_ov_9b_src2 + alu_cy_9b
    alu_9b_sub = alu_ov_9b_src1 - alu_ov_9b_src2 + alu_cy_9b
    alu_9b_res = ila.choice('alu_9b_res', [alu_9b_add, alu_9b_sub])
    alu_ov = ila.ite(alu_9b_res[8:8] != alu_9b_res[7:7], bv(1, 1), bv(0, 1))
    acc_cy = ila.choice('acc_cy', [uc.cy, acc[0:0], acc[7:7], alu_cy])
    acc_ac = ila.choice('acc_ac', [uc.ac, alu_ac])
    acc_ov = ila.choice('acc_ov', [uc.ov, alu_ov])

    psw_bit = ila.concat(bit_cy, psw[6:0])
    psw_cjne = ila.concat(cjne_cy, psw[6:0])
    psw_div = ila.concat(bv(0, 1),
                         ila.concat(psw[6:3], ila.concat(div_ov, psw[1:0])))
    psw_mul = ila.concat(bv(0, 1),
                         ila.concat(psw[6:3], ila.concat(mul_ov, psw[1:0])))
    psw_da = ila.concat(acc_da_cy, psw[6:0])
    psw_acc = ila.concat(
        acc_cy,
        ila.concat(acc_ac, ila.concat(psw[5:3], ila.concat(acc_ov, psw[1:0]))))
    psw_next = ila.choice('psw_next', [
        r_dir.psw, r_bit.psw, psw_cjne, psw_bit, psw_div, psw_mul, psw_da,
        psw_acc, psw
    ])
    model.set_next('PSW', psw_next)

    ########################### SP ##############################################
    sp_next = ila.choice('sp_next', [
        sp + 2, sp + 1, sp - 1, sp - 2, sp, sp_pop, r_pop.sp, r_dir.sp,
        r_bit.sp
    ])
    model.set_next('SP', sp_next)

    ########################### DPTR ##############################################
    mov_dptr = ila.choice(
        'mov_dptr',
        [ila.concat(op1, op2), ila.concat(op2, op1)])
    inc_dptr = dptr + 1
    dptr_n1 = ila.choice('next_dptr', [mov_dptr, inc_dptr, dptr])
    dpl_n1 = dptr[7:0]
    dph_n1 = dptr[15:8]
    dpl_next = ila.choice('dpl_next', [dpl_n1, r_dir.dpl, r_bit.dpl, uc.dpl])
    dph_next = ila.choice('dph_next', [dph_n1, r_dir.dph, r_bit.dph, uc.dph])
    model.set_next('DPL', dpl_next)
    model.set_next('DPH', dph_next)

    ########################### B #################################################
    b_next = ila.choice('b_next', [b_mul, b_div, r_bit.b, r_dir.b, uc.b])
    model.set_next('B', b_next)

    ########################## XRAM ###############################################
    xram_addr_rx = ila.concat(bv(0, 8),
                              ila.choice('lsb_xram_addr', [rx[0], rx[1]]))
    xram_addr_next = ila.choice('xram_addr',
                                [xram_addr_rx, dptr, uc.xram_addr,
                                 bv(0, 16)])
    model.set_next('XRAM_ADDR', xram_addr_next)
    xram_data_out_next = ila.choice('xram_data_out', [bv(0, 8), acc])
    model.set_next('XRAM_DATA_OUT', xram_data_out_next)

    ########################## SFRS ###############################################
    sfrs = [
        'p0', 'p1', 'p2', 'p3', 'pcon', 'tcon', 'tmod', 'tl0', 'th0', 'tl1',
        'th1', 'scon', 'sbuf', 'ie', 'ip'
    ]
    for s in sfrs:
        sfr_next = ila.choice(
            s + '_next',
            [getattr(r_bit, s),
             getattr(r_dir, s),
             getattr(uc, s)])
        model.set_next(s.upper(), sfr_next)

    for s in state:
        print s
        st = time.clock()
        model.synthesize(s, eval8051)
        t_elapsed = time.clock() - st
        ast = model.get_next(s)
        print 'time: %.2f' % t_elapsed
        model.exportOne(ast, 'asts/%s_%s' % (s, 'en' if enable_ps else 'dis'))
Exemple #29
0
def createAESILA(enable_ps):
    m = ila.Abstraction("aes")
    m.enable_parameterized_synthesis = enable_ps

    # I/O interface: this is where the commands come from.
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)
    # response.
    dataout = m.reg('dataout', 8)

    # internal arch state.
    state = m.reg('aes_state', 2)
    opaddr = m.reg('aes_addr', 16)
    oplen = m.reg('aes_len', 16)
    keysel = m.reg('aes_keysel', 1)
    ctr = m.reg('aes_ctr', 128)
    key0 = m.reg('aes_key0', 128)
    key1 = m.reg('aes_key1', 128)

    # for the uinst.
    xram = m.mem('XRAM', 16, 8)
    aes = m.fun('aes', 128, [128, 128, 128])

    # fetch is just looking at the input command.
    m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata])
    m.fetch_valid = (cmd == 1) | (cmd == 2)

    # decode
    rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff40) for i in [0, 1, 2, 3]]
    wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff40)]
    nopcmds = [
        ((state != 0) & (cmd != 1)) | ((state == 0) & (cmd != 1) & (cmd != 2))
    ]

    m.decode_exprs = rdcmds + wrcmds + nopcmds

    # read commands
    statebyte = ila.zero_extend(state, 8)
    opaddrbyte = ila.readchunk('rd_addr', opaddr, 8)
    oplenbyte = ila.readchunk('rd_len', oplen, 8)
    keyselbyte = ila.zero_extend(keysel, 8)
    ctrbyte = ila.readchunk('rd_ctr', ctr, 8)
    key0byte = ila.readchunk('rd_key0', key0, 8)
    key1byte = ila.readchunk('rd_key1', key1, 8)
    dataoutnext = ila.choice('dataout', [
        statebyte, opaddrbyte, oplenbyte, keyselbyte, ctrbyte, key0byte,
        key1byte,
        m.const(0, 8)
    ])
    m.set_next('dataout', dataoutnext)

    # write commands.
    def mb_reg_wr(name, reg):
        # multibyte register write.
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('aes_addr', opaddr)
    mb_reg_wr('aes_len', oplen)
    mb_reg_wr('aes_ctr', ctr)
    mb_reg_wr('aes_key0', key0)
    mb_reg_wr('aes_key1', key1)

    # bit-level registers
    def bit_reg_wr(name, reg, sz):
        # bitwise register write
        assert reg.type.bitwidth == sz
        reg_wr = cmddata[sz - 1:0]
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    bit_reg_wr('aes_keysel', keysel, 1)

    # these are for the uinst
    um = m.add_microabstraction('aes_compute', state != 0)

    # read data
    rd_data = um.reg('rd_data', 128)
    enc_data = um.reg('enc_data', 128)
    byte_cnt = um.reg('byte_cnt', 4)
    oped_byte_cnt = um.reg('oped_byte_cnt', 16)
    blk_cnt = um.reg('blk_cnt', 16)
    um.set_init('byte_cnt', um.const(0, 4))
    um.set_init('blk_cnt', um.const(0, 16))
    um.set_init('oped_byte_cnt', um.const(0, 16))
    uxram = m.getmem('XRAM')

    byte_cnt_16b = ila.zero_extend(byte_cnt, 16)

    um.fetch_expr = state
    um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16)
                       for i in [1, 2, 3]]

    usim = lambda s: AESmicro().simMicro(s)

    # byte_cnt
    byte_cnt_inc = byte_cnt + 1
    byte_cnt_buf = ila.choice('byte_cnt_buf', [byte_cnt_inc, byte_cnt])
    byte_cnt_nxt = ila.choice(
        'byte_cnt_nxt', [byte_cnt_inc, m.const(0, 4), byte_cnt])
    um.set_next('byte_cnt', byte_cnt_nxt)

    # oped_byte_cnt
    oped_byte_cnt_inc = oped_byte_cnt + 16
    oped_byte_cnt_nxt = ila.choice(
        'oped_byte_cnt_nxt',
        [m.const(0, 16), oped_byte_cnt, oped_byte_cnt_inc])
    um.set_next('oped_byte_cnt', oped_byte_cnt_nxt)

    # blk_cnt
    blk_cnt_inc = blk_cnt + 16
    more_blocks = (oped_byte_cnt_inc < oplen)
    blk_cnt_nxt = ila.choice('blk_cnt_nxt', [
        m.const(0, 16), blk_cnt, blk_cnt_inc,
        ila.ite(more_blocks, blk_cnt_inc, blk_cnt)
    ])
    um.set_next('blk_cnt', blk_cnt_nxt)

    # ustate
    ustate = um.getreg('aes_state')
    ustate_nxt = ila.choice('ustate_next', [
        m.const(0, 2),
        m.const(1, 2),
        m.const(2, 2),
        m.const(3, 2), ustate,
        ila.ite(more_blocks, m.const(1, 2), m.const(0, 2))
    ])
    um.set_next('aes_state', ustate_nxt)

    # rd_data
    rdblock = ila.writechunk("rd_data_chunk", rd_data,
                             ila.load(uxram, opaddr + blk_cnt + byte_cnt_16b))
    rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data)
    um.set_next('rd_data', rd_data_nxt)

    # enc_data
    aes_key = ila.ite(keysel == 0, key0, key1)
    aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data])
    enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data)
    um.set_next('enc_data', enc_data_nxt)
    #print um.get_next('enc_data')

    # xram write
    xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8)
    xram_w_addr = opaddr + blk_cnt + byte_cnt_16b
    xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data)
    xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes)
    um.set_next('XRAM', xram_nxt)

    suffix = 'en' if enable_ps else 'dis'
    timefile = open('aes-times-%s.txt' % suffix, 'wt')

    t_elapsed = 0
    # micro-synthesis
    for s in [
            'XRAM', 'aes_state', 'byte_cnt', 'blk_cnt', 'oped_byte_cnt',
            'rd_data'
    ]:
        t_elapsed = 0
        st = time.clock()
        um.synthesize(s, usim)
        dt = time.clock() - st
        t_elapsed += dt
        print >> timefile, '%s %.2f' % ('u_' + s, dt)
        print '%s: %s' % (s, str(um.get_next(s)))
        ast = um.get_next(s)
        m.exportOne(ast, 'asts/u_%s_%s' % (s, suffix))

    sim = lambda s: AESmacro().simMacro(s)
    # state
    state_next = ila.choice(
        'state_next',
        [state, ila.ite(cmddata == 1, m.const(1, 2), state)])
    m.set_next('aes_state', state_next)

    # xram
    m.set_next('XRAM', xram)
    # synthesize.
    for s in [
            'aes_state', 'aes_addr', 'aes_len', 'aes_keysel', 'aes_ctr',
            'aes_key0', 'aes_key1', 'dataout'
    ]:
        st = time.clock()
        m.synthesize(s, sim)
        dt = time.clock() - st
        t_elapsed += dt
        print >> timefile, '%s %.2f' % (s, dt)

        ast = m.get_next(s)
        print '%s: %s' % (s, str(ast))
        m.exportOne(ast, 'asts/%s_%s' % (s, suffix))
    # connect to the uinst
    m.connect_microabstraction('aes_state', um)
    m.connect_microabstraction('XRAM', um)

    print 'total time: %.2f' % t_elapsed

    #print 'aes_state: %s' % str(m.get_next('aes_state'))
    #print 'XRAM: %s' % str(m.get_next('XRAM'))

    #m.generateSim('gen/aes_sim.hpp')
    m.generateSimToDir('sim')
Exemple #30
0
def createAESILA(enable_ps):
    m = ila.Abstraction("aes")
    m.enable_parameterized_synthesis = enable_ps

    # I/O interface: this is where the commands come from.
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)

    # internal arch state.
    state = m.reg('aes_state', 2)
    opaddr = m.reg('aes_addr', 16)
    oplen = m.reg('aes_len', 16)
    ctr = m.reg('aes_ctr', 128)
    key0 = m.reg('aes_key0', 128)

    # for the uinst.
    xram = m.mem('XRAM', 16, 8)
    aes = m.fun('aes', 128, [128, 128, 128])

    # fetch is just looking at the input command.
    m.fetch_expr = ila.concat([cmd, cmdaddr, cmddata
                               ])  # actually, the equivelant instruction
    m.fetch_valid = (cmd == 2)  # when write to some addresses

    # decode
    wrcmds = [(cmd == 2) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff30)]  #
    m.decode_exprs = wrcmds

    um = m.add_microabstraction('aes_compute', state != 0)

    # write commands.
    def mb_reg_wr(name, reg):
        # multibyte register write.
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('aes_addr', opaddr)
    mb_reg_wr('aes_len', oplen)
    mb_reg_wr('aes_ctr', ctr)
    mb_reg_wr('aes_key0', key0)
    # state
    state_next = ila.choice(
        'state_next',
        [state, ila.ite(cmddata == 1, m.const(1, 2), state)])
    m.set_next('aes_state', state_next)
    # xram
    m.set_next('XRAM', xram)

    ################################
    #           Micro-ILA
    ################################

    # read data
    rd_data = um.reg('rd_data', 128)
    enc_data = um.reg('enc_data', 128)
    byte_cnt = um.reg('byte_cnt', 4)
    oped_byte_cnt = um.reg('oped_byte_cnt', 16)
    blk_cnt = um.reg('blk_cnt', 16)
    aes_time = um.reg('aes_time', 5)
    uaes_ctr = um.reg('uaes_ctr', 128)  # change 1

    um.set_init('byte_cnt', um.const(0, 4))
    um.set_init('blk_cnt', um.const(0, 16))
    um.set_init('oped_byte_cnt', um.const(0, 16))
    um.set_init('aes_time', um.const(0, 5))
    um.set_init('uaes_ctr', m.getreg('aes_ctr'))  # change 2
    uxram = m.getmem('XRAM')

    byte_cnt_16b = ila.zero_extend(byte_cnt, 16)

    um.fetch_expr = state
    um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16)
                       for i in [1, 2, 3]]  # Decode Expressionss
    # byte_cnt
    byte_cnt_inc = byte_cnt + 1
    byte_cnt_nxt = ila.choice(
        'byte_cnt_nxt', [m.const(0, 4), byte_cnt_inc, byte_cnt])  # 0, +1, NC
    um.set_next('byte_cnt', byte_cnt_nxt)

    # oped_byte_cnt
    oped_byte_cnt_inc = oped_byte_cnt + 16
    oped_byte_cnt_nxt = ila.choice(
        'oped_byte_cnt_nxt',
        [m.const(0, 16), oped_byte_cnt_inc, oped_byte_cnt])  # 0, +16, NC
    um.set_next('oped_byte_cnt', oped_byte_cnt_nxt)

    # blk_cnt
    blk_cnt_inc = blk_cnt + 16
    more_blocks = (oped_byte_cnt_inc < oplen)
    blk_cnt_nxt = ila.choice('blk_cnt_nxt', [
        m.const(0, 16), blk_cnt, blk_cnt_inc,
        ila.ite(more_blocks, blk_cnt_inc, blk_cnt)
    ])
    um.set_next('blk_cnt', blk_cnt_nxt)

    aes_time_inc = aes_time + 1
    aes_time_ov = aes_time == m.const(31, 5)
    aes_time_nxt_c = ila.ite(aes_time_ov, aes_time, aes_time_inc)
    aes_time_nxt = ila.choice(
        "aes_timeC", m.const(0, 5), aes_time_nxt_c,
        ila.ite(more_blocks, m.const(0, 5), aes_time_nxt_c))
    aes_time_enough = aes_time > m.const(10, 5)
    um.set_next('aes_time', aes_time_nxt)

    # change 3
    um.set_next(
        'uaes_ctr',
        ila.choice(
            'uaes_ctr_nxt', uaes_ctr,
            ila.ite(
                more_blocks, uaes_ctr +
                ila.inrange('addvalue', um.const(1, 128), um.const(128, 128)),
                uaes_ctr), ctr))

    # ustate
    ustate = um.getreg('aes_state')
    ustate_nxt = ila.choice('ustate_next', [
        m.const(0, 2),
        m.const(1, 2),
        m.const(2, 2),
        m.const(3, 2), ustate,
        ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)),
        ila.ite(aes_time_enough, m.const(3, 2), m.const(2, 2))
    ])  # change 4
    um.set_next('aes_state', ustate_nxt)

    # rd_data
    rdblock = ila.writechunk("rd_data_chunk", rd_data,
                             ila.load(uxram,
                                      opaddr + blk_cnt + byte_cnt_16b))  #
    rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data)
    um.set_next('rd_data', rd_data_nxt)

    # enc_data
    aes_key = key0
    aes_ctr = ila.choice('ctr', uaes_ctr, ctr + ila.zero_extend(blk_cnt, 128))
    aes_enc_data = ila.appfun(aes, [aes_ctr, aes_key, rd_data])
    enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data)
    um.set_next('enc_data', enc_data_nxt)
    #print um.get_next('enc_data')

    # xram write
    xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8)
    xram_w_addr = opaddr + blk_cnt + byte_cnt_16b
    xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data)
    xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes)
    um.set_next('XRAM', xram_nxt)

    return m, um