예제 #1
0
    def createStates(self):
        self.pc_list = []  #Two pc
        self.pc_next_list = []  #Two pc's next state function
        #self.imem_list = []
        self.next_state_dict = {}  #For next state function
        self.pred_registers = []
        self.scalar_registers = []
        self.long_scalar_registers = []
        self.log_register = self.model.reg('log_register',
                                           instruction_format.LONG_REG_BITS)
        self.check_register = self.model.reg('check_register',
                                             instruction_format.LONG_REG_BITS)
        self.en_log_register = self.model.reg('en_log_register', 1)
        self.en_check_register = self.model.reg('en_check_register', 1)
        #next state functions for monitors.
        self.log_register_next = self.log_register
        self.en_log_register_next = self.en_log_register
        self.check_register_next = self.check_register
        self.en_check_register_next = self.en_check_register
        self.arb_fun_list = [
            self.model.fun('arb_fun_0', 1, []),
            self.model.fun('arb_fun_1', 1, [])
        ]
        self.arb_list = [
            ila.appfun(self.arb_fun_list[0], []),
            ila.appfun(self.arb_fun_list[1], [])
        ]
        self.bar_inst = []
        self.bar_list = []
        self.createPC(0)
        self.createPC(1)

        self.createRegs(0)
        self.createRegs(1)
        self.createConst()
        self.bar_arb_fun = self.model.fun('bar_arb_fun', 1, [])
        self.bar_arb = self.model.reg('bar_arb', 1)
        self.model.set_next('bar_arb', ila.appfun(self.bar_arb_fun, []))
        self.bar_state_list = []
        for i in range(self.thread_num):
            self.bar_state_list.append(
                self.model.reg('bar_state_%d' % (i),
                               self.bar_spec.BAR_STATE_BITS))
        self.bar_counter_enter = self.model.reg(
            'bar_counter_enter', self.bar_spec.BAR_COUNTER_ENTER_BITS)
        self.bar_counter_exit = self.model.reg(
            'bar_counter_exit', self.bar_spec.BAR_COUNTER_EXIT_BITS)

        self.generate_next_state(0)
        self.generate_next_state(1)
        self.createBar()

        self.createLog()
        self.createCheck()

        self.set_next_state()
        self.set_next_pc(0)
        self.set_next_pc(1)
        self.set_next_bar()
예제 #2
0
    def createStates(self):
        self.pc_list = []  #Two pc
        self.pc_next_list = []  #Two pc's next state function
        self.imem = self.model.mem('imem', 32, 64)

        self.next_state_dict = {}  #For next state function
        self.pred_registers = []
        self.scalar_registers = []
        self.long_scalar_registers = []
        self.log_register = self.model.reg('log_register',
                                           instruction_format.LONG_REG_BITS)
        self.check_register = self.model.reg('check_register',
                                             instruction_format.LONG_REG_BITS)
        self.en_log_register = self.model.reg('en_log_register', 1)
        self.en_check_register = self.model.reg('en_check_register', 1)
        self.lsg_log_register = self.model.reg('lsg_log_register', 2)
        self.lsg_check_register = self.model.reg('lsg_check_register', 2)
        #next state functions for monitors.
        self.log_register_next = self.log_register
        self.en_log_register_next = self.en_log_register
        self.lsg_log_register_next = self.lsg_log_register
        self.check_register_next = self.check_register
        self.en_check_register_next = self.en_check_register
        self.lsg_check_register_next = self.lsg_check_register

        self.arb_fun_list = [
            self.model.fun('arb_fun_0', 1, []),
            self.model.fun('arb_fun_1', 1, [])
        ]
        self.arb_list = [
            ila.appfun(self.arb_fun_list[0], []),
            ila.appfun(self.arb_fun_list[1], [])
        ]
        self.bar_arrive_inst = []
        self.bar_sync_inst = []
        self.bar_aux_inst = []
        self.bar_sync_list = []
        self.bar_arrive_list = []
        self.bar_aux_list = []

        self.createPC()
        self.createRegs(0)
        self.createRegs(1)
        self.createConst()
        self.bar_state_list = []

        self.instFetch()
        self.generate_next_state(0)
        self.generate_next_state(1)

        self.createLog()
        self.createCheck()

        self.set_next_state()
        self.set_next_pc(0)
        self.set_next_pc(1)
예제 #3
0
def test2():
    m1 = ila.Abstraction('t1')
    x1 = m1.reg('x', 8)
    y1 = m1.reg('y', 8)
    f = m1.fun('foo', 8, [8])
    g = m1.fun('goo', 8, [8])

    m1.set_next('x', x1)
    m1.set_next('y', ila.appfun(f, x1))

    assert m1.areEqualUnrolled(1, y1, ila.appfun(f, x1))
    assert not m1.areEqualUnrolled(1, y1, ila.appfun(g, x1))
예제 #4
0
 def createMonitor(self):
     self.log_register = self.model.reg('log_register',
                                        instruction_format.LONG_REG_BITS)
     self.check_register = self.model.reg('check_register',
                                          instruction_format.LONG_REG_BITS)
     self.en_log_register = self.model.reg('en_log_register', 1)
     self.en_check_register = self.model.reg('en_check_register', 1)
     self.log_register_next = self.log_register
     self.en_log_register_next = self.en_log_register
     self.check_register_next = self.check_register
     self.en_check_register_next = self.en_check_register
     self.monitor_arb_fun_list = [
         self.model.fun('monitor_arb_fun_0', 1, []),
         self.model.fun('monitor_arb_fun_1', 1, [])
     ]
     self.monitor_arb_list = [
         ila.appfun(self.monitor_arb_fun_list[0], []),
         ila.appfun(self.monitor_arb_fun_list[1], [])
     ]
예제 #5
0
def main():
    iteAsNode = False
    iteAsNode = True
    hornFile = "tmp/horn_test_node.smt2"
    A = getDummyILA()
    ila.setloglevel(3, "")
    ila.enablelog("Horn")
    A.hornifyAll("tmp/horn_test_ILA.smt2")
    r2_nxt = A.get_next('r2')
    A.hornifyNode(r2_nxt, "r2_nxt")
    A.exportHornToFile(hornFile)

    m = ila.Abstraction("fun")
    x = m.reg('x', 8)
    y = m.reg('y', 16)
    f = m.fun('foo', 8, [8, 16])
    r = ila.appfun(f, x, y)
    m.hornifyBvAsInt(True)
    m.hornifyNode(r, "foo")
    m.exportHornToFile(hornFile)

    alu = ila.Abstraction("alu")
    alu.hornifyBvAsInt(True)
    aluFile = 'tmp/alu.txt'
    if not os.path.exists(aluFile):
        print 'alu file not exist'
        return
    alu.importAll(aluFile)
    r0_nxt = alu.get_next('r0')
    r1_nxt = alu.get_next('r1')
    pc_nxt = alu.get_next('pc')
    rom_nxt = alu.get_next('rom')
    """
    alu.hornifyNode(pc_nxt, "pc_nxt")
    alu.hornifyNode(r0_nxt, "r0_nxt")
    alu.hornifyNode(r1_nxt, "r1_nxt")
    alu.hornifyNode(rom_nxt, "rom_nxt")
    alu.exportHornToFile(hornFile)
    """

    alu.addHornInstr('alu_instr', alu.bool(True))
    alu.addHornNext('alu_instr', 'pc', pc_nxt)
    alu.addHornNext('alu_instr', 'r0', r0_nxt)
    alu.addHornNext('alu_instr', 'r1', r1_nxt)
    alu.addHornNext('alu_instr', 'rom', rom_nxt)

    alu.addHornChild('alu_child', 'alu_instr', alu.bool(True))
    alu.addHornNext('alu_child', 'pc', pc_nxt)
    alu.addHornNext('alu_child', 'r0', r0_nxt)
    alu.addHornNext('alu_child', 'r1', r1_nxt)
    alu.addHornNext('alu_child', 'rom', rom_nxt)
    alu.generateHornMapping('Interleave')
    #alu.generateHornMapping ('Blocking')
    alu.exportHornToFile(hornFile)
예제 #6
0
파일: ueqAES.py 프로젝트: emzha/IMDb
def readPy():
    um = ila.Abstraction("aes1")

    # init the state var.
    # common state
    state           = um.reg('aes_state', 2)
    opaddr          = um.reg('aes_addr', 16)
    oplen           = um.reg('aes_len', 16)
    keysel          = um.reg('aes_keysel', 1)
    ctr             = um.reg('aes_ctr', 128)
    key0            = um.reg('aes_key0', 128)
    key1            = um.reg('aes_key1', 128)
    xram            = um.mem('XRAM', 16, 8)
    aes             = um.fun('aes', 128, [128, 128, 128])
    # uinst state
    rd_data         = um.reg('rd_data', 128)
    enc_data        = um.reg('enc_data', 128)
    byte_cnt        = um.reg('byte_cnt', 16)

    # state
    state_next = readpyast(um, 'aes_state')
    um.set_init('aes_state', um.const(1, 2))
    um.set_next('aes_state', state_next)
    # byte_cnt
    byte_cnt_next = readpyast(um, 'byte_cnt')
    um.set_next('byte_cnt', byte_cnt_next)
    um.set_init('byte_cnt', um.const(0, 16))
    # rd_data
    rd_data_nxt = readpyast(um, 'rd_data')
    um.set_next('rd_data', rd_data_nxt)
    # enc_data
    aes_key = ila.ite(keysel == 0, key0, key1)
    aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data])
    enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data)
    um.set_next('enc_data', enc_data_nxt)
    # xram
    uxram_nxt = readpyast(um, 'XRAM')
    um.set_next('XRAM', uxram_nxt)
    # the rest doesn't change.
    um.set_next('aes_addr', opaddr)
    um.set_next('aes_len', oplen)
    um.set_ipred('aes_len', (oplen != 0) & (oplen[3:0] == 0))
    um.set_next('aes_keysel', keysel)
    um.set_next('aes_ctr', ctr)
    um.set_next('aes_key0', key0)
    um.set_next('aes_key1', key1)


    return um
예제 #7
0
def main():
    c = ila.Abstraction("test")

    top = c.bool(True)
    bot = c.bool(False)

    x = c.reg('x', 8)
    y = c.reg('y', 8)

    g = c.fun('cnst', 8, [])
    h1 = ila.appfun(g, [])
    h2 = c.const(40, 8)
    c.add_assumption((h1 >= 10) & (h1 <= 15))
    val = ila.choice('val', h1, h2)
    res = val + x + y

    def sim(d):
        x = d['x']
        y = d['y']
        d_out = {}
        d_out['res'] = (x + y + randint(11, 12)) & 0xff
        return d_out

    res_s = c.syn_elem('res', res, sim)
    assert c.areEqual(res_s, h1 + x + y)

    z = c.reg('z', 16)
    c0 = c.const(0, 8)
    c1 = c.const(1, 8)
    cmax = c.const(255, 8)

    f = c.fun('foo', 8, [8, 16])
    r = ila.appfun(f, x, z)
    t = ila.appfun(f, y, z)
    eq = x == y
    req = r == t
    assert c.areEqual(ila.implies(eq, req), top)

    assert c.areEqual(r <= cmax, top)

    up = c.const(128, 8)
    down = c.const(120, 16)
    con = ila.implies((x < up) & (z > down), ila.appfun(f, x, z) > up)
    test = ila.implies(con & (x == 125) & (z == 125), ila.appfun(f, x, z) > up)
    assert c.areEqual(test, top)

    x_next = ila.appfun(f, y, z)
    c.set_next('x', x_next)

    exportFile = 'tmp/test_ila_export.txt'
    c.exportAll(exportFile)
    c.importAll(exportFile)

    simFile = 'tmp/test_ila_sim.hpp'
    c.generateSim(simFile)
예제 #8
0
    def createStates(self):
        self.pc_list = []
        self.pc_next_list = []
        #self.imem_list = []
        self.next_state_dict = {}
        self.pred_registers = []
        self.scalar_registers = []
        self.long_scalar_registers = []
        self.log_registers = []
        self.check_registers = []
        self.en_log_registers = []
        self.en_check_registers = []
        self.bar_inst = []
        self.bar_list = []
        self.createPC(0)
        self.createPC(1)

        self.createRegs(0)
        self.createRegs(1)
        self.createConst()
        self.bar_arb_fun = self.model.fun('bar_arb_fun', 1, [])
        self.bar_arb = self.model.reg('bar_arb', 1)
        self.model.set_next('bar_arb', ila.appfun(self.bar_arb_fun, []))
        self.bar_state_list = []
        for i in range(self.thread_num):
            self.bar_state_list.append(
                self.model.reg('bar_state_%d' % (i),
                               self.bar_spec.BAR_STATE_BITS))
        self.bar_counter_enter = self.model.reg(
            'bar_counter_enter', self.bar_spec.BAR_COUNTER_ENTER_BITS)
        self.bar_counter_exit = self.model.reg(
            'bar_counter_exit', self.bar_spec.BAR_COUNTER_EXIT_BITS)

        self.generate_next_state(0)
        self.generate_next_state(1)
        self.createBar()

        self.createLog()
        self.createCheck()

        self.set_next_state()
        self.set_next_pc(0)
        self.set_next_pc(1)
        self.set_next_bar(0)
        self.set_next_bar(1)
예제 #9
0
 def createRegs(self):
     self.scalar_registers_a = []
     self.scalar_registers_b = []
     reg_book_obj = open(ptxILA.reg_book_file)
     reg_book = pickle.load(reg_book_obj)
     reg_book.remove('bar_state')
     reg_book.remove('bar_counter_enter')
     reg_book.remove('bar_counter_exit')
     '''
     for reg_name in reg_book:
         self.scalar_registers_a.append(self.model.reg(reg_name + '_a', instruction_format.REG_BITS))
         self.scalar_registers_b.append(self.model.reg(reg_name + '_b', instruction_format.REG_BITS)) 
     '''
     self.arb_fun = self.model.fun('arb_fun', 1, [])
     self.arb = self.model.reg('arb', 1)
     self.model.set_next('arb', ila.appfun(self.arb_fun,
                                           []))  #Non-determined value
     self.arbA = ila.const(0x0, 1)
     self.arbB = ila.const(0x1, 1)
     self.model.set_init('arb', self.model.const(0x0, 1))
예제 #10
0
 def createBar(self):
     self.bar_state_list = []
     for i in range(self.thread_num):
         self.bar_state_list.append(
             self.model.reg('bar_state_%d' % (i),
                            self.bar_spec.BAR_STATE_BITS))
     self.bar_arb_fun = self.model.fun('bar_arb_fun', 1, [])
     self.bar_arb = self.model.reg('bar_arb', 1)
     self.bar_arb_next = ila.ite(
         ((self.bar_state_list[0] == self.bar_spec.BAR_WAIT) &
          (self.bar_state_list[1] == self.bar_spec.BAR_WAIT)) |
         ((self.bar_state_list[0] != self.bar_spec.BAR_WAIT) &
          (self.bar_state_list[1] != self.bar_spec.BAR_WAIT)),
         ila.appfun(self.bar_arb_fun, []),
         ila.ite(self.bar_state_list[1] == self.bar_spec.BAR_WAIT,
                 self.model.const(0x0, 1), self.model.const(0x1, 1)))
     self.model.set_next('bar_arb', self.bar_arb_next)
     self.bar_counter_enter = self.model.reg(
         'bar_counter_enter', self.bar_spec.BAR_COUNTER_ENTER_BITS)
     self.bar_counter_exit = self.model.reg(
         'bar_counter_exit', self.bar_spec.BAR_COUNTER_EXIT_BITS)
예제 #11
0
    def createStates(self):
        self.pc_list = []
        self.pc_next_list = []
        #self.imem_list = []
        self.next_state_dict = {}
        self.pred_registers = []
        self.scalar_registers = []
        self.long_scalar_registers = []
        self.log_register = self.model.reg('log_register', instruction_format.LONG_REG_BITS)
        self.check_register = self.model.reg('check_register', instruction_format.LONG_REG_BITS)
        self.en_log_register = self.model.reg('en_log_register', 1)
        self.en_check_register = self.model.reg('en_check_register', 1)
        self.log_register_next = self.log_register
        self.en_log_register_next = self.en_log_register
        self.check_register_next = self.check_register
        self.en_check_register_next = self.en_check_register
        self.arb_fun = self.model.fun('arb_fun', 1, [])
        self.arb = ila.appfun(self.arb_fun, [])
        self.bar_inst = []
        self.bar_list = []
        self.createPC(0)
        self.createPC(1)
        
        self.createRegs(0)
        self.createRegs(1)
        self.createConst()
        self.bar_state_list = []
 
        self.generate_next_state(0)
        self.generate_next_state(1)
 
        self.createLog()
        self.createCheck()

        self.set_next_state()
        self.set_next_pc(0)
        self.set_next_pc(1)
예제 #12
0
def WRU1(gb):

    READY_T = gb.READY_TRUE
    READY_F = gb.READY_FALSE
    VALID_T = gb.VALID_TRUE
    VALID_F = gb.VALID_FALSE
    DATA_SIZE = gb.DATA_SIZE

    decode = (gb.arg_1_TREADY == READY_F) & \
             (gb.arg_0_TREADY == READY_F) & \
             (gb.st_ready == READY_F) \

    endPixel = (gb.RAM_x == gb.RAM_x_M - gb.RAM_x_1) & \
               (gb.RAM_y == gb.RAM_y_M - gb.RAM_y_1)
    relPixel = (gb.RAM_x == gb.RAM_x_1) & (gb.RAM_y == gb.RAM_y_M)

    # next state functions for child-states
    def genRows(idx):
        l = gb.DATA_SIZE * idx
        h = l + DATA_SIZE - 1
        res = ila.concat([
            gb.stencil[8][h:l], gb.stencil[7][h:l], gb.stencil[6][h:l],
            gb.stencil[5][h:l], gb.stencil[4][h:l], gb.stencil[3][h:l],
            gb.stencil[2][h:l], gb.stencil[1][h:l], gb.stencil[0][h:l]
        ])
        return res

    stencil_rows = []
    for i in xrange(gb.stencil_size - 1, -1, -1):
        stencil_rows.append(genRows(i))

    proc_in_nxt = ila.ite (((gb.RAM_x > gb.stencil_size - 1) & \
                            (gb.RAM_y >= gb.RAM_size)) | \
                           ((gb.RAM_x == gb.RAM_x_1) & \
                            (gb.RAM_y >  gb.RAM_size)), \
                           ila.concat (stencil_rows),
                           gb.proc_in)

    proc_in_nxt = ila.ite(relPixel, gb.proc_in, proc_in_nxt)
    gb.proc_in_nxt = ila.ite(decode, proc_in_nxt, gb.proc_in_nxt)

    # next state functions for output ports
    arg_1_TREADY_nxt = ila.ite(endPixel, READY_F, READY_T)
    gb.arg_1_TREADY_nxt = ila.ite(decode, arg_1_TREADY_nxt,
                                  gb.arg_1_TREADY_nxt)

    arg_0_TVALID_nxt = ila.ite (((gb.RAM_x > gb.stencil_size - 1) & \
                                 (gb.RAM_y >= gb.RAM_size)) | \
                                ((gb.RAM_x == gb.RAM_x_1) & \
                                 (gb.RAM_y >  gb.RAM_size)), \
                                VALID_T, VALID_F)

    arg_0_TVALID_nxt = ila.ite(relPixel, gb.arg_0_TVALID, arg_0_TVALID_nxt)
    gb.arg_0_TVALID_nxt = ila.ite(decode, arg_0_TVALID_nxt,
                                  gb.arg_0_TVALID_nxt)

    arg_0_TDATA_nxt = ila.appfun(gb.fun, proc_in_nxt)
    arg_0_TDATA_nxt = ila.ite(relPixel, gb.arg_0_TDATA, arg_0_TDATA_nxt)
    gb.arg_0_TDATA_nxt = ila.ite(decode, arg_0_TDATA_nxt, gb.arg_0_TDATA_nxt)

    # next state functions for internal arch-states
    gb.cur_pix_nxt = ila.ite(decode, gb.cur_pix, gb.cur_pix_nxt)
    gb.pre_pix_nxt = ila.ite(decode, gb.pre_pix, gb.pre_pix_nxt)

    gb.RAM_x_nxt = ila.ite(decode, gb.RAM_x, gb.RAM_x_nxt)
    gb.RAM_y_nxt = ila.ite(decode, gb.RAM_y, gb.RAM_y_nxt)
    gb.RAM_w_nxt = ila.ite(decode, gb.RAM_w, gb.RAM_w_nxt)

    for i in xrange(0, gb.RAM_size):
        gb.RAM_nxt[i] = ila.ite(decode, gb.RAM[i], gb.RAM_nxt[i])

    for i in xrange(0, gb.stencil_size - 1):
        stencil_i_nxt = ila.ite(gb.RAM_y < gb.RAM_size, gb.stencil[i],
                                gb.stencil[i + 1])
        gb.stencil_nxt[i] = ila.ite(decode, stencil_i_nxt, gb.stencil_nxt[i])

    n = gb.stencil_size - 1
    stencil_n_nxt = gb.stencil[n]
    gb.stencil_nxt[n] = ila.ite(decode, stencil_n_nxt, gb.stencil_nxt[n])

    st_ready_nxt = READY_T
    gb.st_ready_nxt = ila.ite(decode, st_ready_nxt, gb.st_ready_nxt)
예제 #13
0
def createAESILA(enable_ps):
    m = ila.Abstraction("aes")
    m.enable_parameterized_synthesis = enable_ps

    # I/O interface: this is where the commands come from.
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)

    # internal arch state.
    state = m.reg('aes_state', 2)
    opaddr = m.reg('aes_addr', 16)
    oplen = m.reg('aes_len', 16)
    ctr = m.reg('aes_ctr', 128)
    key0 = m.reg('aes_key0', 128)

    # for the uinst.
    xram = m.mem('XRAM', 16, 8)
    aes = m.fun('aes', 128, [128, 128, 128])

    # fetch is just looking at the input command.
    m.fetch_expr = ila.concat([cmd, cmdaddr, cmddata
                               ])  # actually, the equivelant instruction
    m.fetch_valid = (cmd == 2)  # when write to some addresses

    # decode
    wrcmds = [(cmd == 2) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff40)]
    m.decode_exprs = wrcmds

    m.add_assumption((state == 0) | (oplen > 1))
    um = m.add_microabstraction('aes_compute', (state != 0))

    # write commands.
    def mb_reg_wr(name, reg):
        # multibyte register write.
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('aes_addr', opaddr)
    mb_reg_wr('aes_len', oplen)
    mb_reg_wr('aes_ctr', ctr)
    mb_reg_wr('aes_key0', key0)
    # state
    state_next = ila.choice(
        'state_next',
        [state,
         m.const(0, 2),
         ila.ite((cmddata == 1), m.const(1, 2), state)])
    m.set_next('aes_state', state_next)
    # xram
    m.set_next('XRAM', xram)

    ################################
    #           Micro-ILA
    ################################

    # read data
    rd_data = um.reg('rd_data', 128)
    enc_data = um.reg('enc_data', 128)
    blk_cnt = um.reg('blk_cnt', 16)
    uaes_ctr = um.reg('uaes_ctr', 128)

    um.set_init('blk_cnt', um.const(0, 16))
    um.set_init('uaes_ctr', um.getreg('aes_ctr'))
    uxram = m.getmem('XRAM')

    um.fetch_expr = state
    um.decode_exprs = [(state == i) for i in [1, 2, 3]]  # READ/OPERATE/WRITE

    # blk_cnt
    blk_cnt_inc = blk_cnt + ila.inrange('blkcntrange', um.const(1, 16),
                                        um.const(32, 16))
    more_blocks = ila.choice('cond1', (blk_cnt_inc != oplen),
                             (oplen >= blk_cnt_inc), (oplen > blk_cnt_inc))
    blk_cnt_nxt = ila.choice('blk_cnt_nxt', [
        m.const(0, 16), blk_cnt, blk_cnt_inc,
        ila.ite(more_blocks, blk_cnt_inc, blk_cnt)
    ])
    um.set_next('blk_cnt', blk_cnt_nxt)

    # ustate
    ustate = um.getreg('aes_state')
    ustate_nxt = ila.choice('ustate_next', [
        m.const(0, 2),
        m.const(1, 2),
        m.const(2, 2),
        m.const(3, 2), ustate,
        ila.ite(more_blocks, m.const(1, 2), m.const(0, 2))
    ])  # change 4
    um.set_next('aes_state', ustate_nxt)

    # rd_data
    rdblock = ila.loadblk(uxram, opaddr + blk_cnt, 16)
    rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data)
    um.set_next('rd_data', rd_data_nxt)

    # enc_data
    aes_key = key0
    aes_enc_data = ila.appfun(aes, [uaes_ctr, aes_key, rd_data])
    enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data)
    um.set_next('enc_data', enc_data_nxt)
    #print um.get_next('enc_data')

    uaes_ctr_nxt = ila.choice(
        'uaes_ctr_nxt', uaes_ctr, uaes_ctr +
        ila.inrange('uaes_ctr_nxt_range', m.const(1, 128), m.const(128, 128)))
    um.set_next('uaes_ctr', uaes_ctr_nxt)

    # xram write
    xram_w_addr = opaddr + blk_cnt
    xram_w_aes = ila.storeblk(uxram, xram_w_addr, enc_data)
    xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes)
    um.set_next('XRAM', xram_nxt)

    return m, um
예제 #14
0
파일: syn.py 프로젝트: emzha/IMDb
def createRsaIla():
    m = ila.Abstraction('rsa')
    m.enable_parameterized_synthesis = 0

    # I/O interface
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)
    # response
    dataout = m.reg('dataout', 8)

    # states
    state = m.reg('rsa_state', 2)
    addr = m.reg('rsa_addr', 16)
    rsa_M = m.reg('rsa_M', 2048)
    rsa_N = m.reg('rsa_N', 2048)
    rsa_E = m.reg('rsa_E', 2048)
    rsa_buff = m.reg('rsa_buff', 2048)
    byte_counter = m.reg('rsa_byte_counter', 8)
    xram = m.mem('XRAM', 16, 8)
    rsa = m.fun('rsa', 2048, [2048])

    # fetch
    m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata])
    m.fetch_valid = (cmd == 1) | (cmd == 2)

    statebyte = ila.zero_extend(state, 8)
    wraddrbyte = ila.readchunk('rsa_addr', addr, 8)
    dataout_nxt = ila.choice('dataout', [statebyte, wraddrbyte, m.const(0, 8)])
    m.set_next('dataout', dataout_nxt)

    # rsa_addr
    addr_wr = ila.writechunk('wr_addr', addr, cmddata)
    addr_nxt = ila.choice('nxt_addr', [addr_wr, addr])
    m.set_next('rsa_addr', addr_nxt)

    # rsa_state
    state_choice = ila.choice(
        'state_choice',
        [m.const(0, 2),
         m.const(1, 2),
         m.const(2, 2),
         m.const(3, 2)])
    wr_nxt = ila.ite(byte_counter == 255, m.const(0, 2), m.const(3, 2))
    state_nxt = ila.choice('rsa_state_nxt', [
        wr_nxt, state_choice,
        ila.ite(cmddata == 1, m.const(1, 2), state), state
    ])
    m.set_next('rsa_state', state_nxt)

    # byte_counter
    byte_counter_inc = byte_counter + 1
    byte_counter_rst = ila.ite(cmddata == 1, m.const(0, 8), byte_counter)
    byte_counter_nxt = ila.choice(
        'byte_counter_nxt', [byte_counter_inc, byte_counter_rst, byte_counter])
    m.set_next('rsa_byte_counter', byte_counter_nxt)

    # buff
    rsa_buff_op = ila.appfun(rsa, [rsa_M])
    rsa_buff_nxt = ila.choice('rsa_buff_nxt', rsa_buff_op, rsa_buff)
    m.set_next('rsa_buff', rsa_buff_nxt)

    # rsa_M
    m.set_next('rsa_M', rsa_M)

    # xram
    #xram_w_rsa_lit = ila.storeblk (xram, addr, rsa_buff)
    #xram_w_rsa_big = ila.storeblk_big (xram, addr, rsa_buff)
    byte_cnt_16 = ila.zero_extend(byte_counter, 16)
    sh = ila.zero_extend((255 - byte_counter) * 8, 2048)
    xram_w_rsa_data_1 = (rsa_buff >> sh)[7:0]
    #xram_w_rsa_data_2 = rsa_buff [255 - byte_cnt_16]
    xram_w_rsa_lit = ila.store(xram, addr + byte_cnt_16, xram_w_rsa_data_1)
    xram_nxt = ila.choice('xram_nxt', [xram_w_rsa_lit, xram])
    m.set_next('XRAM', xram_nxt)

    return m
예제 #15
0
def U4(gb):
    VALID_T = gb.VALID_TRUE
    VALID_F = gb.VALID_FALSE
    FULL_T = gb.FULL_TRUE
    FULL_F = gb.FULL_FALSE
    EMPTY_T = gb.EMPTY_TRUE
    EMPTY_F = gb.EMPTY_FALSE
    IT_T = gb.gb_exit_it_T
    IT_F = gb.gb_exit_it_F

    ############################ decode ###################################
    decode = (gb.arg_0_TVALID == VALID_F) & \
             (((gb.gb_exit_it[0] == IT_F) & \
               (gb.stencil_stream_empty == EMPTY_F)) | \
              ((gb.gb_exit_it[0] == IT_T) & \
               (gb.gb_exit_it[7] == IT_F)))
    gb.addDecode(decode)

    ############################ next state functions #####################
    # arg_1_TREADY
    arg_1_TREADY_nxt = gb.arg_1_TREADY
    gb.arg_1_TREADY_nxt = ila.ite(decode, arg_1_TREADY_nxt,
                                  gb.arg_1_TREADY_nxt)

    # arg_0_TVALID
    arg_0_TVALID_nxt = ila.ite ((gb.gb_pp_it[7] == IT_T) & \
                                (gb.gb_exit_it[6] == IT_F),
                                VALID_T, VALID_F)
    gb.arg_0_TVALID_nxt = ila.ite(decode, arg_0_TVALID_nxt,
                                  gb.arg_0_TVALID_nxt)

    # arg_0_TDATA
    in_stencil = ila.ite(gb.stencil_stream_full == FULL_T,
                         gb.stencil_stream_buff[gb.stencil_stream_size - 1],
                         gb.stencil_stream_buff[0])
    arg_0_TDATA_nxt = ila.appfun(gb.fun, in_stencil)
    gb.arg_0_TDATA_nxt = ila.ite(decode, arg_0_TDATA_nxt, gb.arg_0_TDATA_nxt)

    # 1-D buffer for input data
    LB1D_in_nxt = gb.LB1D_in
    gb.LB1D_in_nxt = ila.ite(decode, LB1D_in_nxt, gb.LB1D_in_nxt)

    LB1D_uIn_nxt = gb.LB1D_uIn
    gb.LB1D_uIn_nxt = ila.ite(decode, LB1D_uIn_nxt, gb.LB1D_uIn_nxt)

    LB1D_buff_nxt = gb.LB1D_buff
    gb.LB1D_buff_nxt = ila.ite(decode, LB1D_buff_nxt, gb.LB1D_buff_nxt)

    # pixel position for input data
    LB1D_p_cnt_nxt = gb.LB1D_p_cnt
    gb.LB1D_p_cnt_nxt = ila.ite(decode, LB1D_p_cnt_nxt, gb.LB1D_p_cnt_nxt)

    # in stream full
    in_stream_full_nxt = gb.in_stream_full
    gb.in_stream_full_nxt = ila.ite(decode, in_stream_full_nxt,
                                    gb.in_stream_full_nxt)

    # in stream empty
    in_stream_empty_nxt = gb.in_stream_empty
    gb.in_stream_empty_nxt = ila.ite(decode, in_stream_empty_nxt,
                                     gb.in_stream_empty_nxt)

    # in stream buffer
    for i in xrange(0, gb.in_stream_size):
        in_stream_buff_nxt = gb.in_stream_buff[i]
        gb.in_stream_buff_nxt[i] = ila.ite(decode, in_stream_buff_nxt,
                                           gb.in_stream_buff_nxt[i])

    # LB2D proc x idx
    LB2D_proc_x_nxt = gb.LB2D_proc_x
    gb.LB2D_proc_x_nxt = ila.ite(decode, LB2D_proc_x_nxt, gb.LB2D_proc_x_nxt)

    # LB2D proc y idx
    LB2D_proc_y_nxt = gb.LB2D_proc_y
    gb.LB2D_proc_y_nxt = ila.ite(decode, LB2D_proc_y_nxt, gb.LB2D_proc_y_nxt)

    # LB2D proc w idx
    LB2D_proc_w_nxt = gb.LB2D_proc_w
    gb.LB2D_proc_w_nxt = ila.ite(decode, LB2D_proc_w_nxt, gb.LB2D_proc_w_nxt)

    # LB2D proc buffer
    for i in xrange(0, gb.LB2D_proc_size):
        LB2D_proc_nxt = gb.LB2D_proc[i]
        gb.LB2D_proc_nxt[i] = ila.ite(decode, LB2D_proc_nxt,
                                      gb.LB2D_proc_nxt[i])

    # slice stream full
    slice_stream_full_nxt = gb.slice_stream_full
    gb.slice_stream_full_nxt = ila.ite(decode, slice_stream_full_nxt,
                                       gb.slice_stream_full_nxt)

    # slice stream empty
    slice_stream_empty_nxt = gb.slice_stream_empty
    gb.slice_stream_empty_nxt = ila.ite(decode, slice_stream_empty_nxt,
                                        gb.slice_stream_empty_nxt)

    # slice stream buffer
    for i in xrange(0, gb.slice_stream_size):
        slice_stream_buff_nxt = gb.slice_stream_buff[i]
        gb.slice_stream_buff_nxt[i] = ila.ite(decode, slice_stream_buff_nxt,
                                              gb.slice_stream_buff_nxt[i])

    # LB2D shift x idx
    LB2D_shift_x_nxt = gb.LB2D_shift_x
    gb.LB2D_shift_x_nxt = ila.ite(decode, LB2D_shift_x_nxt,
                                  gb.LB2D_shift_x_nxt)

    # LB2D shift y idx
    LB2D_shift_y_nxt = gb.LB2D_shift_y
    gb.LB2D_shift_y_nxt = ila.ite(decode, LB2D_shift_y_nxt,
                                  gb.LB2D_shift_y_nxt)

    # LB2D shift buffer
    for i in xrange(0, gb.LB2D_shift_size):
        LB2D_shift_nxt = gb.LB2D_shift[i]
        gb.LB2D_shift_nxt[i] = ila.ite(decode, LB2D_shift_nxt,
                                       gb.LB2D_shift_nxt[i])

    # stencil_stream_full
    stencil_stream_full_nxt = ila.ite(gb.gb_exit_it[0] == IT_T, FULL_F, FULL_F)
    gb.stencil_stream_full_nxt = ila.ite(decode, stencil_stream_full_nxt,
                                         gb.stencil_stream_full_nxt)

    # stencil_stream_empty
    stencil_stream_empty_nxt = ila.ite(gb.stencil_stream_full == FULL_T,
                                       EMPTY_F, EMPTY_T)
    gb.stencil_stream_empty_nxt = ila.ite(decode, stencil_stream_empty_nxt,
                                          gb.stencil_stream_empty_nxt)

    # stencil_stream_buff
    for i in xrange(0, gb.stencil_stream_size):
        stencil_stream_buff_nxt = gb.stencil_stream_buff[i]
        gb.stencil_stream_buff_nxt[i] = ila.ite(decode,
                                                stencil_stream_buff_nxt,
                                                gb.stencil_stream_buff_nxt[i])

    # gb_p_cnt
    gb_p_cnt_nxt = ila.ite(gb.gb_p_cnt < gb.gb_p_cnt_M,
                           gb.gb_p_cnt + gb.gb_p_cnt_1, gb.gb_p_cnt_M)
    gb.gb_p_cnt_nxt = ila.ite(decode, gb_p_cnt_nxt, gb.gb_p_cnt_nxt)

    # gb_pp_it
    gb_pp_it_0_nxt = gb.gb_pp_it_T
    gb.gb_pp_it_nxt[0] = ila.ite(decode, gb_pp_it_0_nxt, gb.gb_pp_it_nxt[0])
    for i in xrange(1, gb.gb_pp_size):
        gb_pp_it_i_nxt = gb.gb_pp_it[i - 1]
        gb.gb_pp_it_nxt[i] = ila.ite(decode, gb_pp_it_i_nxt,
                                     gb.gb_pp_it_nxt[i])

    # gb_exit_it
    gb_exit_it_0_nxt = ila.ite(gb.gb_p_cnt == gb.gb_p_cnt_M, gb.gb_exit_it_T,
                               gb.gb_exit_it_F)
    gb.gb_exit_it_nxt[0] = ila.ite(decode, gb_exit_it_0_nxt,
                                   gb.gb_exit_it_nxt[0])
    for i in xrange(1, gb.gb_exit_size):
        gb_exit_it_i_nxt = gb.gb_exit_it[i - 1]
        gb.gb_exit_it_nxt[i] = ila.ite(decode, gb_exit_it_i_nxt,
                                       gb.gb_exit_it_nxt[i])
예제 #16
0
def buildILA():
    #---------------------------
    # define universal constant
    #---------------------------
    K = 5
    NUM_MOVIE_MAX = 100
    NUM_HIDDEN_MAX = 100
    NUM_VISIBLE_MAX = NUM_MOVIE_MAX * K
    DATAMEM_ADDR_WIDTH = int(
        log(NUM_VISIBLE_MAX + 1) /
        log(2)) + 1  # 9 # it is definitely not dividable, but need to check
    HIDDEN_UNIT_WIDTH = int(
        log(NUM_HIDDEN_MAX + 1) /
        log(2)) + 1  # 7 # it is definitely not dividable, but need to check
    VISIBLE_UNIT_WIDTH = int(log(NUM_VISIBLE_MAX + 1) / log(2)) + 1  # 9
    EDGEMEM_ADDR_WIDTH = int(
        log((NUM_VISIBLE_MAX + 1) * (NUM_HIDDEN_MAX + 1)) / log(2)) + 1  # 16
    POS_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH
    NEG_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH
    PREDICT_RESULT_WIDTH = int(log(NUM_MOVIE_MAX) / log(2)) + 1  # 7
    KWIDTH = int(log(K) / log(2)) + 1  # 3

    #---------------------------
    # Model
    #---------------------------

    rbm = ila.Abstraction('RBM')

    conf_done = rbm.inp('conf_done', 1)
    conf_num_hidden = rbm.inp('conf_num_hidden', 32)
    conf_num_visible = rbm.inp('conf_num_visible', 32)
    conf_num_users = rbm.inp('conf_num_users', 32)
    conf_num_loops = rbm.inp('conf_num_loops', 32)
    conf_num_testusers = rbm.inp('conf_num_testusers', 32)
    conf_num_movies = rbm.inp('conf_num_movies', 32)

    rst = rbm.inp('rst', 1)

    init_done = rbm.reg('init_done', 1)
    done = rbm.reg('done', 1)
    num_hidden = rbm.reg('num_hidden', 16)
    num_visible = rbm.reg('num_visible', 16)
    num_users = rbm.reg('num_users', 16)
    num_loops = rbm.reg('num_loops', 16)
    num_testusers = rbm.reg('num_testusers', 16)
    num_movies = rbm.reg('num_movies', 16)

    # DMA output
    rd_index = rbm.reg('rd_index', 32)
    rd_length = rbm.reg('rd_length', 32)
    rd_request = rbm.reg('rd_request', 1)
    rd_grant = rbm.inp('rd_grant', 1)
    data_in = rbm.inp('data_in', 32)
    # rd_cnt    = rbm.reg('rd_cnt', 16)  # i ureg  #585

    # DMA input
    wr_grant = rbm.inp('wr_grant', 1)
    wr_request = rbm.reg('wr_request', 1)
    wr_index = rbm.reg('wr_index', 32)
    wr_length = rbm.reg('wr_length', 32)
    data_out = rbm.reg('data_out', 32)
    # wr_cnt = rbm.reg('wr_cnt', 16) : u reg

    data = rbm.mem('data', DATAMEM_ADDR_WIDTH, 8)
    rbm.mem('predict_result', PREDICT_RESULT_WIDTH, 8)

    #-------------------------------------
    #  Decoding Expressions
    #-------------------------------------
    rstInst = rst == 1
    confDoneInst = (rst == 0) & (init_done == 0) & (conf_done == 1)
    rdGrantInst = (rd_request == 1) & (rd_grant == 1)
    wrGrantInst = (wr_request == 1) & (wr_grant == 1)
    decodeExpr = [rstInst, confDoneInst, rdGrantInst, wrGrantInst]

    #-------------------------------------
    #  AUX Functions
    #-------------------------------------
    def const(v, w):
        return rbm.const(v, w)

    b0 = const(0, 1)
    b1 = const(1, 1)
    h0_8 = const(0, 8)
    h1_8 = const(1, 8)
    h0_4 = const(0, 4)
    h1_4 = const(1, 4)
    h2_4 = const(2, 4)
    h3_4 = const(3, 4)
    h4_4 = const(4, 4)
    h0_16 = const(0, 16)
    h1_16 = const(1, 16)
    h0_32 = const(0, 32)
    h0_64 = const(0, 64)

    #-------------------------------------
    #  Init conditions
    #-------------------------------------

    rbm.set_init('init_done', b0)
    rbm.set_init('done', b0)
    rbm.set_init('num_hidden', h0_16)
    rbm.set_init('num_visible', h0_16)
    rbm.set_init('num_users', h0_16)
    rbm.set_init('num_loops', h0_16)
    rbm.set_init('num_testusers', h0_16)
    rbm.set_init('num_movies', h0_16)

    #-------------------------------------
    #  Config
    #-------------------------------------

    # this means, once configured, unless reset, it cannot be reconfigured
    init_done_nxt = ila.ite(rstInst, b0, ila.ite(confDoneInst, b1, init_done))
    num_hidden_nxt = ila.ite(
        rstInst, h0_16, ila.ite(confDoneInst, conf_num_hidden[15:0],
                                num_hidden))
    num_visible_nxt = ila.ite(
        rstInst, h0_16,
        ila.ite(confDoneInst, conf_num_visible[15:0], num_visible))
    num_users_nxt = ila.ite(
        rstInst, h0_16, ila.ite(confDoneInst, conf_num_users[15:0], num_users))
    num_loops_nxt = ila.ite(
        rstInst, h0_16, ila.ite(confDoneInst, conf_num_loops[15:0], num_loops))
    num_testusers_nxt = ila.ite(
        rstInst, h0_16,
        ila.ite(confDoneInst, conf_num_testusers[15:0], num_testusers))
    num_movies_nxt = ila.ite(
        rstInst, h0_16, ila.ite(confDoneInst, conf_num_movies[15:0],
                                num_movies))

    rbm.set_next('init_done', init_done_nxt)
    rbm.set_next('num_hidden', num_hidden_nxt)
    rbm.set_next('num_visible', num_visible_nxt)
    rbm.set_next('num_users', num_users_nxt)
    rbm.set_next('num_loops', num_loops_nxt)
    rbm.set_next('num_testusers', num_testusers_nxt)
    rbm.set_next('num_movies', num_movies_nxt)

    # INST-level w/r complete
    rbm_rd_complete = rbm.reg('rd_complete', 1)
    rbm_wr_complete = rbm.reg('wr_complete', 1)
    rbm.set_init('rd_complete', b0)
    rbm.set_init('wr_complete', b0)

    #------------------------------------
    #  Compute UABS
    #------------------------------------

    uabs = rbm.add_microabstraction('compute', (init_done == 1) & (done == 0))
    index = uabs.reg('index', 16)
    loop_count = uabs.reg('loop_count', 16)
    pc = uabs.reg('upc', 4)
    edges_mem = uabs.mem('edges', EDGEMEM_ADDR_WIDTH, 8)

    nlp = uabs.getreg('num_loops')
    nm = ila.zero_extend(uabs.getreg('num_movies'), 32)
    nu = uabs.getreg('num_users')
    ntu = uabs.getreg('num_testusers')
    out_rd_request = uabs.getreg('rd_request')
    out_rd_complete = uabs.getreg('rd_complete')
    out_rd_length = uabs.getreg('rd_length')
    out_rd_index = uabs.getreg('rd_index')

    train_input_done = uabs.reg('train_input_done', 1)
    predict_input_done = uabs.reg('predict_input_done', 1)

    uabs.set_init('upc', const(0, 4))
    uabs.set_init('index', h0_16)
    uabs.set_init('loop_count', h0_16)
    uabs.set_init('train_input_done', b0)
    uabs.set_init('predict_input_done', b0)
    uabs.set_init('rd_complete', b0)

    ###  computation micro_instructions

    StartRead = (pc == 0)
    WaitReadComplete = (pc == 1) & (out_rd_complete == 0)
    DecideTrainOrPredict = (pc == 1) & (out_rd_complete == 1)
    StartTrain = (pc == 2) & (train_input_done == 1)
    StartPredict = (pc == 2) & (predict_input_done == 1)
    Finish = (pc == 3)

    StartReadState = const(0, 4)
    WaitReadCompleteState = const(1, 4)
    StartTrainOrPredict = const(2, 4)
    FinishState = const(3, 4)

    decodeExpr = [
        StartRead, WaitReadComplete, DecideTrainOrPredict, StartTrain,
        StartPredict, Finish
    ]

    out_rd_request_nxt = ila.ite(StartRead, b1, out_rd_request)
    out_rd_length_nxt = ila.ite(StartRead, 5 * nm, out_rd_length)
    out_rd_index_nxt = ila.ite(StartRead, ila.zero_extend(index, 32),
                               out_rd_index)
    out_rd_complete_nxt = ila.ite(
        StartRead, b0, ila.ite(DecideTrainOrPredict, b0, out_rd_complete))

    train_input_done_nxt = ila.ite(DecideTrainOrPredict,
                                   ila.ite(loop_count < nlp, b1, b0),
                                   train_input_done)
    predict_input_done_nxt = ila.ite(DecideTrainOrPredict,
                                     ila.ite(loop_count == nlp, b1, b0),
                                     predict_input_done)

    pc_nxt = ila.ite(
        StartRead,
        WaitReadCompleteState,
        ila.ite(
            WaitReadComplete,
            pc,
            ila.ite(
                DecideTrainOrPredict,
                StartTrainOrPredict,
                ila.ite(
                    StartTrain,
                    StartTrainOrPredict,  # StartReadState, # actually should be updated by u2inst 
                    ila.ite(
                        StartPredict,
                        StartTrainOrPredict,  # StartReadState, # actually should be updated by u2inst 
                        ila.ite(
                            Finish,
                            FinishState,
                            pc  # should never happen!
                        ))))))

    # should be updated by u2inst
    index_nxt_dummy = ila.ite(
        StartTrain | StartPredict,
        ila.ite(
            (index == nu - 1) & (loop_count != nlp),
            h0_16,
            ila.ite(
                (index == ntu - 1) & (loop_count == nlp),
                index,  # And it is not correct
                index + 1)),
        index)
    # not in use
    loop_count_nxt_dummy = ila.ite(
        StartTrain | StartPredict,
        ila.ite((index == nu - 1) & (loop_count != nlp), loop_count + 1,
                loop_count), loop_count)

    uabs.set_next('rd_request', out_rd_request_nxt)
    uabs.set_next('rd_length', out_rd_length_nxt)
    uabs.set_next('rd_index', out_rd_index_nxt)
    uabs.set_next('rd_complete', out_rd_complete_nxt)
    uabs.set_next('train_input_done', train_input_done_nxt)
    uabs.set_next('predict_input_done', predict_input_done_nxt)
    uabs.set_next('upc', pc_nxt)
    uabs.set_next('index', index)
    uabs.set_next('loop_count', loop_count)
    # this has to be updated by micro_inst
    # read_request is turned off by loaduabs
    # predict_input_done, train_input_done is turned off by uabs_train/predict

    #------------------------------------
    #  Load UABS
    #------------------------------------
    # RBM interface
    # high-level interface
    rd_granted = rbm.reg(
        'rd_granted', 1
    )  # this is only used for maintaining the validity of load UABS, no other should use
    data_nxt = ila.ite(rdGrantInst,
                       ila.store(data, const(0, DATAMEM_ADDR_WIDTH),
                                 data_in[7:0]), data)  # data #
    rd_granted_nxt = ila.ite(rdGrantInst, b1, rd_granted)
    rbm.set_next('rd_granted', rd_granted_nxt)
    rbm.set_next('data', data_nxt)

    # one change is to move these into lower abstraction
    DMAload = rbm.add_microabstraction(
        'DMAload', (rd_granted == 1))  # this is sub-instruction
    w_cnt = DMAload.reg('i', 16)

    dma_rd_request = DMAload.getreg('rd_request')
    dma_rd_length = DMAload.getreg('rd_length')
    dma_rd_index = DMAload.getreg('rd_index')

    state_update_data = DMAload.getmem('data')
    state_update_rd_request = dma_rd_request
    self_update_rd_granted = DMAload.getreg('rd_granted')

    more_read_in = w_cnt < dma_rd_length[15:0]
    last_cycle = w_cnt == dma_rd_length[15:0]
    DMAload.set_init('i', h1_16)  # h0_16 )
    DMAload.set_next('i', ila.ite(more_read_in, w_cnt + 1, w_cnt))
    DMAload.set_next('rd_request', b0)  # reset to 0 immediately
    DMAload.set_next('rd_granted',
                     ila.ite(more_read_in, self_update_rd_granted, b0))
    DMAload.set_next('rd_complete', ila.ite(more_read_in, b0, b1))
    DMAload.set_next(
        'data',
        ila.ite(
            more_read_in,
            ila.store(state_update_data, w_cnt[DATAMEM_ADDR_WIDTH - 1:0],
                      data_in[7:0]),
            ila.ite(
                last_cycle,
                ila.store(state_update_data,
                          dma_rd_length[DATAMEM_ADDR_WIDTH - 1:0], h1_8),
                state_update_data)))

    #------------------------------------
    #  Train UUABS
    #------------------------------------

    TrainUabs = uabs.add_microabstraction('train', train_input_done == 1)

    sigmoid_func = TrainUabs.fun('sigmoid', 64, [16])  # DATA_sum_, 01_D
    rand_func = TrainUabs.fun('rand', 64, [])  # generate random number
    to_int_exp = TrainUabs.fun('to_int_exp', 32, [16])  #
    divide_func = TrainUabs.fun(
        'divide', 64, [32, 64])  # dp:32_32 / sum_of_pow2 64_64 = 64_1

    hidden_unit = TrainUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1)
    visible_unit = TrainUabs.mem('visible_unit', VISIBLE_UNIT_WIDTH, 1)
    visibleEnergy = TrainUabs.mem('visibleEnergies', KWIDTH, 16)
    pow2 = TrainUabs.mem('pow2', KWIDTH, 32)
    pos = TrainUabs.mem('pos', POS_ADDR_WIDTH, 1)
    #neg          = TrainUabs.mem('neg', NEG_ADDR_WIDTH, 1 ) # not needed

    train_sum = TrainUabs.reg('train_sum', 16)
    train_max = TrainUabs.reg('train_max', 16)
    sumOfpow2 = TrainUabs.reg('sumOfpow2', 64)

    jstate = TrainUabs.reg('jstate', 16)
    inner_loop_pc = TrainUabs.reg('per_v_pc', 4)

    train_pc = TrainUabs.reg('train_upc', 4)  # Re-evaluate
    v_cnt = TrainUabs.reg('train_v_cnt', 16)
    h_cnt = TrainUabs.reg('train_h_cnt', 16)

    train_input = TrainUabs.getmem('data')
    edges_input = TrainUabs.getmem('edges')
    nv = TrainUabs.getreg('num_visible')
    nh = TrainUabs.getreg('num_hidden')
    nu = TrainUabs.getreg('num_users')
    ntu = TrainUabs.getreg('num_testusers')
    nlp = TrainUabs.getreg('num_loops')

    SumEdge = train_pc == 0
    SumEdgeState = const(0, 4)
    SumHidden = train_pc == 1
    SumHiddenState = const(1, 4)
    StorePos = train_pc == 3
    StorePosState = const(3, 4)
    EdgeUpdate = train_pc == 2
    EdgeUpdateState = const(2, 4)

    TrainUabs.decode_exprs = [SumEdge, SumHidden, EdgeUpdate]

    #Begin
    v_cnt_init = const(0, 16)
    h_cnt_init = const(0, 16)
    pc_init = const(0, 4)

    #SumEdge: s0
    edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt
    train_sum_s0_nxt = ila.ite(v_cnt == 0, const(0, 16), train_sum) + ila.ite(
        ila.load(train_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1,
        fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum),
        const(0, 16))
    v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1)
    h_cnt_s0_nxt = ila.ite((v_cnt == nv),
                           ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt)
    #                                                    Here ^^^ is for transiting to next state
    hidden_update_s0_0 = ila.ite(
        ila.appfun(rand_func) < ila.appfun(sigmoid_func, train_sum_s0_nxt), b1,
        b0)
    hidden_update_s0_1 = ila.ite(
        v_cnt == nv,
        ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0],
                  hidden_update_s0_0), hidden_unit)
    hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1),
                                 ila.store(hidden_update_s0_1,
                                           nh[HIDDEN_UNIT_WIDTH - 1:0], b1),
                                 hidden_update_s0_1)
    train_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1),
                              SumHiddenState, SumEdgeState)
    # Just like init
    jstate_s0_nxt = h0_16
    inner_loop_pc_s0_nxt = h0_4

    # add prefix :
    # train_sum_nxt = ila.ite(SumEdge, train_sum_s0_nxt, ila.ite(SumHidden, ... ) )

    # SumHiddenK0-K4 : s1-s5

    # pc:1 per_v_pc : 0     1       2       3

    LastH = h_cnt == nh
    LastJ = jstate == K - 1
    LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX)
    SumHiddenL0 = SumHidden & (inner_loop_pc == 0)
    SumHiddenL1 = SumHidden & (inner_loop_pc == 1)
    SumHiddenL2 = SumHidden & (inner_loop_pc == 2)
    SumHiddenL3 = SumHidden & (inner_loop_pc == 3)

    h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1)
    jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1),
                                  jstate)
    inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc)

    jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1)
    inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc)

    jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt
    inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc)

    jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt
    inner_loop_pc_s1_s5_L3_nxt = ila.ite(
        LastJ,
        ila.ite(LastV, h0_4, h0_4),  # will choose to go back or not
        inner_loop_pc)

    def nextCondition(l0, l1, l2, l3, default):
        return ila.ite(
            SumHiddenL0, l0,
            ila.ite(
                SumHiddenL1, l1,
                ila.ite(SumHiddenL2, l2, ila.ite(SumHiddenL3, l3, default))))

    h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt,
                                    h_cnt)
    v_cnt_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ,
                              ila.ite(LastV, h0_16, v_cnt + K), v_cnt)
    jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt,
                                     jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt,
                                     jstate)
    inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt,
                                            inner_loop_pc_s1_s5_L1_nxt,
                                            inner_loop_pc_s1_s5_L2_nxt,
                                            inner_loop_pc_s1_s5_L3_nxt,
                                            inner_loop_pc)
    train_pc_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ & LastV, StorePosState,
                                 SumHiddenState)

    # L0
    train_sum_s1_s5_L0_nxt = ila.ite(h_cnt == 0, h0_16, train_sum) + ila.ite(
        ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1,
        fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), h0_16)
    _train_max_origin_L0 = ila.ite(
        jstate == 0,
        fpconst(-500, FPsum).ast,
        train_max)  # make sure the first time we are comparing with init sum
    train_max_s1_s5_L0_nxt = ila.ite(
        LastH,
        ila.ite(ila.sgt(train_sum_s1_s5_L0_nxt, _train_max_origin_L0),
                train_sum_s1_s5_L0_nxt, _train_max_origin_L0), train_max)
    visibleEnergy_s1_s5_L0_nxt = ila.ite(
        LastH,
        ila.store(visibleEnergy, jstate[KWIDTH - 1:0], train_sum_s1_s5_L0_nxt),
        visibleEnergy)
    # L1
    # sum3: 64_64  ->   dp: 32_32
    _31_sum = fpconst(31, FPsum).ast
    train_max_s1_s5_L1_nxt = ila.ite(jstate == 0, train_max - _31_sum,
                                     train_max)
    _st_val_L1 = ila.load(visibleEnergy,
                          jstate[KWIDTH - 1:0]) - train_max_s1_s5_L1_nxt
    visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0],
                                           _st_val_L1)
    # L2
    _pow2_new_val = ila.appfun(to_int_exp,
                               ila.load(visibleEnergy, jstate[KWIDTH - 1:0]))
    _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3)
    sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64,
                                     sumOfpow2) + _pow2_new_convert
    pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val)
    # L3
    _probs = ila.appfun(divide_func,
                        [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2])
    _RAND = ila.appfun(rand_func)
    _visible_unit_new_val = ila.ite(_probs > _RAND, b1, b0)
    _vu_idx = v_cnt + jstate
    _visible_unit_s1_s5_L3_1 = ila.store(visible_unit,
                                         _vu_idx[VISIBLE_UNIT_WIDTH - 1:0],
                                         _visible_unit_new_val)
    visible_unit_s1_s5_L3_nxt = ila.ite(
        LastJ & LastV,
        ila.store(_visible_unit_s1_s5_L3_1, nv[VISIBLE_UNIT_WIDTH - 1:0], b1),
        _visible_unit_s1_s5_L3_1)
    # when exit visible unit should be made to store 1 at nv

    train_sum_s1_s5_nxt = nextCondition(train_sum_s1_s5_L0_nxt, train_sum,
                                        train_sum, train_sum, train_sum)
    train_max_s1_s5_nxt = nextCondition(train_max_s1_s5_L0_nxt,
                                        train_max_s1_s5_L1_nxt, train_max,
                                        train_max, train_max)
    visible_unit_s1_s5_nxt = nextCondition(visible_unit, visible_unit,
                                           visible_unit,
                                           visible_unit_s1_s5_L3_nxt,
                                           visible_unit)
    visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt,
                                            visibleEnergy_s1_s5_L1_nxt,
                                            visibleEnergy, visibleEnergy,
                                            visibleEnergy)
    sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2,
                                        sumOfpow2_s1_s5_L2_nxt, sumOfpow2,
                                        sumOfpow2)
    pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2)

    # before s6: store pos

    h_cnt_sp_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1)
    v_cnt_sp_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, h0_16, v_cnt + 1),
                           v_cnt)
    _data_load = ila.load(train_input, v_cnt[VISIBLE_UNIT_WIDTH - 1:0])
    _pos_sp_cond = (_data_load != 2)
    _pos_sp_val = ila.ite(_data_load != 0, b1, b0) & ila.load(
        hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0])
    _pos_st_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt
    pos_sp_nxt = ila.store(pos, _pos_st_addr, _pos_sp_val)
    train_pc_sp_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState,
                              StorePosState)

    # update edge : s6

    h_cnt_s6_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1)
    v_cnt_s6_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, v_cnt, v_cnt + 1),
                           v_cnt)

    _pos_ld_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt
    train_pos = ila.load(pos, _pos_ld_addr) != 0
    train_neg = (ila.load(
        hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) != 0) & (ila.load(
            visible_unit, v_cnt[VISIBLE_UNIT_WIDTH - 1:0]) != 0)
    edge_original = ila.load(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt)
    edge_new = ila.ite((train_pos) & (~train_neg),
                       edge_original + fpconst(LEARN_RATE, FPedge).ast,
                       ila.ite((~train_pos) & (train_neg),
                               edge_original - fpconst(LEARN_RATE, FPedge).ast,
                               edge_original))
    edge_s6_nxt = ila.store(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt,
                            edge_new)
    train_pc_s6_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState,
                              EdgeUpdateState)
    # no need to jump back itself, because the flag: train_input_done is turned back to zero
    # don't forget to set back signals in Uabs ()

    train_done = TrainUabs.getreg('train_input_done')
    train_uabs_index = TrainUabs.getreg('index')
    train_uabs_loop_count = TrainUabs.getreg('loop_count')
    train_uabs_upc = TrainUabs.getreg('upc')

    # add prefix s6 !!!
    s6_complete = (h_cnt == nh) & (v_cnt == nv)
    index_nxt_s6_nxt = ila.ite(
        s6_complete,
        ila.ite((train_uabs_index == nu - 1) & (train_uabs_loop_count != nlp),
                h0_16, train_uabs_index + 1), train_uabs_index)

    # assert (train_uabs_index == ntu - 1) & (train_uabs_loop_count == nlp) should never happen

    loop_count_s6_nxt = ila.ite(
        s6_complete & (train_uabs_index == nu - 1) &
        (train_uabs_loop_count != nlp), train_uabs_loop_count + 1,
        train_uabs_loop_count)
    upc_s6_nxt = ila.ite(s6_complete, StartReadState, train_uabs_upc)
    train_input_done_s6_nxt_nxt = ila.ite(s6_complete, b0, train_done)

    # data -> hidden_unit -> visible_unit -> edge
    # data -> edge

    # add
    def TrainNext(e1, e2, e3, default):
        return ila.ite(
            SumEdge, e1,
            ila.ite(SumHidden, e2, ila.ite(EdgeUpdate, e3, default)))

    def TrainNextSP(e1, e2, e3, e4, default):
        return ila.ite(
            SumEdge, e1,
            ila.ite(SumHidden, e2,
                    ila.ite(StorePos, e3, ila.ite(EdgeUpdate, e4, default))))

    def TrainChoice5(name, e1, e2, e3, default):
        return ila.choice(name, e1, e2, e3, default)

    def TrainChoice4(name, e1, e2, default):
        return ila.choice(name, e1, e2, default)

    def TrainChoice3(name, e1, default):
        return ila.choice(name, e1, default)

    TrainUabs.set_init('train_upc', pc_init)
    TrainUabs.set_init('train_v_cnt', v_cnt_init)
    TrainUabs.set_init('train_h_cnt', h_cnt_init)

    TrainUabs.set_next(
        'jstate', TrainNext(jstate_s0_nxt, jstate_s1_s5_nxt, jstate, jstate))
    TrainUabs.set_next(
        'train_sum',
        TrainNext(train_sum_s0_nxt, train_sum_s1_s5_nxt, train_sum, train_sum))
    TrainUabs.set_next(
        'train_v_cnt',
        TrainNextSP(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt_s6_nxt,
                    v_cnt))
    TrainUabs.set_next(
        'train_h_cnt',
        TrainNextSP(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt_sp_nxt, h_cnt_s6_nxt,
                    h_cnt))
    TrainUabs.set_next(
        'train_upc',
        TrainNextSP(train_pc_s0_nxt, train_pc_s1_s5_nxt, train_pc_sp_nxt,
                    train_pc_s6_nxt, train_pc))

    TrainUabs.set_next(
        'train_max',
        TrainNext(train_max, train_max_s1_s5_nxt, train_max, train_max))
    TrainUabs.set_next(
        'hidden_unit',
        TrainNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit))
    TrainUabs.set_next(
        'visible_unit',
        TrainNext(visible_unit, visible_unit_s1_s5_nxt, visible_unit,
                  visible_unit))
    TrainUabs.set_next('edges',
                       TrainNext(edges_mem, edges_mem, edge_s6_nxt, edges_mem))
    TrainUabs.set_next(
        'index',
        TrainNext(train_uabs_index, train_uabs_index, index_nxt_s6_nxt,
                  train_uabs_index))
    TrainUabs.set_next(
        'loop_count',
        TrainNext(train_uabs_loop_count, train_uabs_loop_count,
                  loop_count_s6_nxt, train_uabs_loop_count))
    TrainUabs.set_next(
        'upc',
        TrainNext(train_uabs_upc, train_uabs_upc, upc_s6_nxt, train_uabs_upc))
    TrainUabs.set_next(
        'train_input_done',
        TrainNext(train_done, train_done, train_input_done_s6_nxt_nxt,
                  train_done))
    # newly added
    TrainUabs.set_next(
        'visibleEnergies',
        TrainNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy,
                  visibleEnergy))
    TrainUabs.set_next(
        'sumOfpow2',
        TrainNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2))
    TrainUabs.set_next('pow2', TrainNext(pow2, pow2_s1_s5_nxt, pow2, pow2))
    TrainUabs.set_next('pos', ila.ite(StorePos, pos_sp_nxt, pos))
    TrainUabs.set_next(
        'per_v_pc',
        TrainNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt, inner_loop_pc,
                  inner_loop_pc))

    #------------------------------------
    #  Predict UUABS
    #------------------------------------
    # data -> predict_result

    PredictUabs = uabs.add_microabstraction('predict', predict_input_done == 1)

    sigmoid_func = PredictUabs.fun('sigmoid', 64, [16])  # DATA_sum_, 01_D
    rand_func = PredictUabs.fun('rand', 64, [])  # generate random number
    to_int_exp = PredictUabs.fun('to_int_exp', 32, [16])  #
    round_func = PredictUabs.fun('round', 8, [32])  # 05_D -> u8
    divide_func = PredictUabs.fun(
        'divide', 64, [32, 64])  # dp:32_32 / sum_of_pow2 64_64 = 64_1

    hidden_unit = PredictUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1)
    visibleEnergy = PredictUabs.mem('visibleEnergies', KWIDTH, 16)
    predict_result = PredictUabs.getmem('predict_result')
    predict_sum = PredictUabs.reg('predict_sum', 16)
    predict_max = PredictUabs.reg('predict_max', 16)
    sumOfpow2 = PredictUabs.reg('sumOfpow2', 64)
    pow2 = PredictUabs.mem('pow2', KWIDTH, 32)

    predict_vector = PredictUabs.mem('predict_vector', VISIBLE_UNIT_WIDTH, 1)
    inner_loop_pc = PredictUabs.reg('per_v_pc', 4)

    count = PredictUabs.reg('count', 8)
    jstate = PredictUabs.reg('jstate', 16)
    expectation = PredictUabs.reg('expectation', 32)
    prediction = PredictUabs.reg('prediction', 8)

    predict_pc = PredictUabs.reg('predict_upc', 4)  # Re-evaluate
    v_cnt = PredictUabs.reg('predict_v_cnt', 16)
    h_cnt = PredictUabs.reg('predict_h_cnt', 16)

    predict_input = PredictUabs.getmem('data')
    edges_input = PredictUabs.getmem('edges')
    nv = PredictUabs.getreg('num_visible')
    nh = PredictUabs.getreg('num_hidden')
    nu = PredictUabs.getreg('num_users')
    ntu = PredictUabs.getreg('num_testusers')
    nlp = PredictUabs.getreg('num_loops')

    SumEdge = predict_pc == 0
    SumEdgeState = const(0, 4)
    SumHidden = predict_pc == 1
    SumHiddenState = const(1, 4)
    GenResult = predict_pc == 3
    GenResultState = const(3, 4)
    WaitForWrite = predict_pc == 2
    WaitForWriteState = const(2, 4)

    PredictUabs.decode_exprs = [SumEdge, SumHidden, WaitForWrite]

    #Begin
    v_cnt_init = const(0, 16)
    h_cnt_init = const(0, 16)
    pc_init = const(0, 4)

    #SumEdge: s0
    edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt
    predict_sum_s0_nxt = ila.ite(v_cnt == 0, const(
        0, 16), predict_sum) + ila.ite(
            ila.load(predict_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1,
            fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum),
            const(0, 16))
    v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1)
    h_cnt_s0_nxt = ila.ite((v_cnt == nv),
                           ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt)
    #                                                     Here ^^^ is for transiting to next state

    hidden_update_s0_0 = ila.ite(
        fpconst(0.5, FP01_D).ast < ila.appfun(sigmoid_func,
                                              predict_sum_s0_nxt), b1, b0)
    hidden_update_s0_1 = ila.ite(
        v_cnt == nv,
        ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0],
                  hidden_update_s0_0), hidden_unit)
    hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1),
                                 ila.store(hidden_update_s0_1,
                                           nh[HIDDEN_UNIT_WIDTH - 1:0], b1),
                                 hidden_update_s0_1)
    hidden_update_s0_next = hidden_update_s0_2
    predict_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1),
                                SumHiddenState, SumEdgeState)

    jstate_s0_nxt = h0_16
    count_s0_nxt = ila.const(0, 8)
    inner_loop_pc_s0_nxt = h0_4
    # add prefix :
    # predict_sum_nxt = ila.ite(SumEdge, predict_sum_s0_nxt, ila.ite(SumHidden, ... ) )

    #-----------------------------
    # SumHiddensK0-K4 : s1-s5
    #
    #-----------------------------

    LastH = h_cnt == nh
    LastJ = jstate == K - 1
    LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX)
    SumHiddenL0 = SumHidden & (inner_loop_pc == 0)
    SumHiddenL1 = SumHidden & (inner_loop_pc == 1)
    SumHiddenL2 = SumHidden & (inner_loop_pc == 2)
    SumHiddenL3 = SumHidden & (inner_loop_pc == 3)
    SumHiddenL4 = SumHidden & (inner_loop_pc == 4)

    h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1)
    jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1),
                                  jstate)
    inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc)

    jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1)
    inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc)

    jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt
    inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc)

    jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt
    inner_loop_pc_s1_s5_L3_nxt = ila.ite(LastJ, h4_4, inner_loop_pc)

    jstate_s1_s5_L4_nxt = jstate_s1_s5_L3_nxt
    inner_loop_pc_s1_s5_L4_nxt = ila.ite(
        LastJ,
        ila.ite(LastV, h0_4, h0_4),  # will choose to go back or not
        inner_loop_pc)

    def nextCondition(l0, l1, l2, l3, l4, default):
        return ila.ite(
            SumHiddenL0, l0,
            ila.ite(
                SumHiddenL1, l1,
                ila.ite(
                    SumHiddenL2, l2,
                    ila.ite(SumHiddenL3, l3, ila.ite(SumHiddenL4, l4,
                                                     default)))))

    h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt,
                                    h_cnt, h_cnt)
    v_cnt_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ,
                              ila.ite(LastV, h0_16, v_cnt + K), v_cnt)
    jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt,
                                     jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt,
                                     jstate_s1_s5_L4_nxt, jstate)

    inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt,
                                            inner_loop_pc_s1_s5_L1_nxt,
                                            inner_loop_pc_s1_s5_L2_nxt,
                                            inner_loop_pc_s1_s5_L3_nxt,
                                            inner_loop_pc_s1_s5_L4_nxt,
                                            inner_loop_pc)

    predict_pc_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ & LastV, GenResultState,
                                   SumHiddenState)

    # L0
    predict_sum_s1_s5_L0_nxt = ila.ite(
        h_cnt == 0, h0_16, predict_sum) + ila.ite(
            ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1,
            fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum),
            h0_16)
    _predict_max_origin_L0 = ila.ite(
        jstate == 0,
        fpconst(-500, FPsum).ast,
        predict_max)  # make sure the first time we are comparing with init sum
    predict_max_s1_s5_L0_nxt = ila.ite(
        LastH,
        ila.ite(ila.sgt(predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0),
                predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0), predict_max)
    visibleEnergy_s1_s5_L0_nxt = ila.ite(
        LastH,
        ila.store(visibleEnergy, jstate[KWIDTH - 1:0],
                  predict_sum_s1_s5_L0_nxt), visibleEnergy)
    # L1
    # sum3: 64_64  ->   dp: 32_32
    _31_sum = fpconst(31, FPsum).ast
    predict_max_s1_s5_L1_nxt = ila.ite(jstate == 0, predict_max - _31_sum,
                                       predict_max)
    _st_val_L1 = ila.load(visibleEnergy,
                          jstate[KWIDTH - 1:0]) - predict_max_s1_s5_L1_nxt
    visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0],
                                           _st_val_L1)
    # L2
    _pow2_new_val = ila.appfun(to_int_exp,
                               ila.load(visibleEnergy, jstate[KWIDTH - 1:0]))
    _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3)
    sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64,
                                     sumOfpow2) + _pow2_new_convert
    pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val)
    # L3
    _probs = ila.appfun(divide_func,
                        [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2])
    _mul = fixpoint(_probs, FP01_D) * fixpoint(jstate, FPu16)
    expectation_s1_s5_L3_nxt = ila.ite(jstate == 0, h0_32,
                                       expectation) + _mul.toFormat(FP05_D)
    # L4
    _prediction = ila.zero_extend(ila.appfun(round_func, [expectation]), 16)
    _pv_val = ila.ite(jstate == _prediction, b1, b0)
    _pv_idx = v_cnt + jstate
    _first_store = ila.store(predict_vector, _pv_idx[VISIBLE_UNIT_WIDTH - 1:0],
                             _pv_val)
    predict_vector_s1_s5_L4_nxt = ila.ite(
        SumHiddenL4 & LastV & LastJ,
        ila.store(_first_store, nv[VISIBLE_UNIT_WIDTH - 1:0], b1),
        _first_store)

    predict_sum_s1_s5_nxt = nextCondition(predict_sum_s1_s5_L0_nxt,
                                          predict_sum, predict_sum,
                                          predict_sum, predict_sum,
                                          predict_sum)
    predict_max_s1_s5_nxt = nextCondition(predict_max_s1_s5_L0_nxt,
                                          predict_max_s1_s5_L1_nxt,
                                          predict_max, predict_max,
                                          predict_max, predict_max)
    visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt,
                                            visibleEnergy_s1_s5_L1_nxt,
                                            visibleEnergy, visibleEnergy,
                                            visibleEnergy, visibleEnergy)
    sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2,
                                        sumOfpow2_s1_s5_L2_nxt, sumOfpow2,
                                        sumOfpow2, sumOfpow2)
    pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2,
                                   pow2)
    expectation_s1_s5_nxt = ila.ite(SumHiddenL3, expectation_s1_s5_L3_nxt,
                                    expectation)
    predict_vector_s1_s5_nxt = ila.ite(SumHiddenL4,
                                       predict_vector_s1_s5_L4_nxt,
                                       predict_vector)
    count_s1_s5_nxt = ila.ite(SumHiddenL4 & LastV & LastJ, h0_8, count)

    # before s6: store pos
    LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX)
    LastJ = jstate == K - 1
    v_cnt_sp_nxt = ila.ite(LastV, v_cnt + K, v_cnt + K)
    jstate_sp_nxt = ila.ite(LastJ, h0_16, jstate + 1)

    _prediction_old = ila.ite(jstate == 0, h0_8, prediction)
    _pv_idx = v_cnt + jstate
    _predict_result_sp_val = ila.load(predict_vector,
                                      _pv_idx[VISIBLE_UNIT_WIDTH - 1:0])

    prediction_sp_nxt = ila.ite(_predict_result_sp_val == 1, (jstate + 1)[7:0],
                                _prediction_old)
    count_sp_nxt = ila.ite(LastJ, count + 1, count)
    predict_result_sp_nxt = ila.ite(
        LastJ,
        ila.store(predict_result, count[PREDICT_RESULT_WIDTH - 1:0],
                  prediction), predict_result)
    predict_pc_sp_nxt = ila.ite(LastV & LastJ, WaitForWriteState,
                                GenResultState)

    wr_complete = PredictUabs.getreg('wr_complete')
    wr_req = PredictUabs.getreg('wr_request')
    wr_len = PredictUabs.getreg('wr_length')
    wr_idx = PredictUabs.getreg('wr_index')
    cur_idx = PredictUabs.getreg('index')  # 32

    exitLoop = LastV & LastJ
    wr_request_sp_nxt = ila.ite(exitLoop, b1, wr_req)
    wr_index_sp_nxt = ila.ite(
        exitLoop,
        ila.zero_extend(nm, 32) * ila.zero_extend(cur_idx, 32), wr_idx)
    wr_length_sp_nxt = ila.ite(exitLoop, ila.zero_extend(nm, 32), wr_len)
    wr_complete_sp_nxt = ila.ite(exitLoop, b0, wr_complete)
    # s6:

    #---------------------
    # update edge : s6
    #---------------------

    FinishOneRound = (wr_req == 0) & (wr_complete == 1)

    predict_pc_s6_nxt = ila.ite(FinishOneRound, WaitForWriteState,
                                WaitForWriteState)
    # its value does not matter because it will be terminated by predict_input_done
    # don't forget to set back signals in Uabs ()

    predict_done = PredictUabs.getreg('predict_input_done')
    predict_uabs_index = PredictUabs.getreg('index')
    predict_uabs_loop_count = PredictUabs.getreg('loop_count')
    predict_uabs_upc = PredictUabs.getreg('upc')
    all_done = PredictUabs.getreg('done')

    # add prefix s6 !!!
    index_nxt_s6_nxt = ila.ite(
        FinishOneRound,
        ila.ite(
            (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp),
            predict_uabs_index, predict_uabs_index + 1), predict_uabs_index)

    wr_complete_s6_nxt = ila.ite(FinishOneRound, b0, wr_complete)
    # assert (predict_uabs_index == nu - 1)  & (predict_uabs_loop_count != nlp) should never happen

    #loop_count_s6_nxt = ila.ite( (predict_uabs_index == nu - 1)  & (predict_uabs_loop_count != nlp) , predict_uabs_loop_count + 1, predict_uabs_loop_count )

    upc_s6_nxt = ila.ite(
        FinishOneRound,
        ila.ite(
            (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp),
            FinishState, StartReadState), predict_uabs_upc)
    predict_input_done_s6_nxt_nxt = ila.ite(FinishOneRound, b0, predict_done)

    all_done_s6_nxt = ila.ite(
        FinishOneRound & (predict_uabs_index == ntu - 1) &
        (predict_uabs_loop_count == nlp), b1, b0)

    # data -> hidden_unit -> visible_unit -> edge
    # data -> edge

    # add

    # add
    def predictNext(e1, e2, e3, default):
        return ila.ite(
            SumEdge, e1,
            ila.ite(SumHidden, e2, ila.ite(WaitForWrite, e3, default)))

    def predictNextSp(e1, e2, e3, e4, default):
        return ila.ite(
            SumEdge, e1,
            ila.ite(SumHidden, e2,
                    ila.ite(GenResult, e3, ila.ite(WaitForWrite, e4,
                                                   default))))

    def ite(inst, e, default):
        return ila.ite(inst, e, default)

    PredictUabs.set_init('predict_upc', pc_init)
    PredictUabs.set_init('predict_v_cnt', v_cnt_init)
    PredictUabs.set_init('predict_h_cnt', h_cnt_init)

    PredictUabs.set_next(
        'jstate',
        predictNextSp(jstate_s0_nxt, jstate_s1_s5_nxt, jstate_sp_nxt, jstate,
                      jstate))
    PredictUabs.set_next(
        'predict_sum',
        predictNext(predict_sum_s0_nxt, predict_sum_s1_s5_nxt, predict_sum,
                    predict_sum))
    PredictUabs.set_next(
        'predict_v_cnt',
        predictNextSp(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt,
                      v_cnt))
    PredictUabs.set_next(
        'predict_h_cnt',
        predictNext(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt, h_cnt))
    PredictUabs.set_next(
        'predict_upc',
        predictNextSp(predict_pc_s0_nxt, predict_pc_s1_s5_nxt,
                      predict_pc_sp_nxt, predict_pc_s6_nxt, predict_pc))
    PredictUabs.set_next(
        'predict_max',
        predictNext(predict_max, predict_max_s1_s5_nxt, predict_max,
                    predict_max))
    PredictUabs.set_next(
        'hidden_unit',
        predictNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit))
    PredictUabs.set_next(
        'count',
        predictNextSp(count_s0_nxt, count_s1_s5_nxt, count_sp_nxt, count,
                      count))
    PredictUabs.set_next(
        'per_v_pc',
        predictNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt,
                    inner_loop_pc, inner_loop_pc))

    PredictUabs.set_next(
        'index',
        predictNext(predict_uabs_index, predict_uabs_index, index_nxt_s6_nxt,
                    predict_uabs_index))
    PredictUabs.set_next(
        'upc',
        predictNext(predict_uabs_upc, predict_uabs_upc, upc_s6_nxt,
                    predict_uabs_upc))
    PredictUabs.set_next(
        'predict_input_done',
        predictNext(predict_done, predict_done, predict_input_done_s6_nxt_nxt,
                    predict_done))
    PredictUabs.set_next(
        'done', predictNext(all_done, all_done, all_done_s6_nxt, all_done))

    PredictUabs.set_next(
        'wr_request',
        predictNextSp(wr_req, wr_req, wr_request_sp_nxt, wr_req, wr_req))
    PredictUabs.set_next(
        'wr_length',
        predictNextSp(wr_len, wr_len, wr_length_sp_nxt, wr_len, wr_len))
    PredictUabs.set_next(
        'wr_index',
        predictNextSp(wr_idx, wr_idx, wr_index_sp_nxt, wr_idx, wr_idx))
    PredictUabs.set_next(
        'wr_complete',
        predictNextSp(wr_complete, wr_complete, wr_complete_sp_nxt,
                      wr_complete_s6_nxt, wr_complete))
    # newly added
    PredictUabs.set_next(
        'visibleEnergies',
        predictNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy,
                    visibleEnergy))
    PredictUabs.set_next(
        'sumOfpow2',
        predictNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2))
    PredictUabs.set_next('pow2', predictNext(pow2, pow2_s1_s5_nxt, pow2, pow2))
    PredictUabs.set_next(
        'expectation',
        predictNext(expectation, expectation_s1_s5_nxt, expectation,
                    expectation))
    PredictUabs.set_next(
        'predict_vector',
        predictNext(predict_vector, predict_vector_s1_s5_nxt, predict_vector,
                    predict_vector))
    PredictUabs.set_next('prediction',
                         ite(GenResult, prediction_sp_nxt, prediction))
    PredictUabs.set_next('predict_result',
                         ite(GenResult, predict_result_sp_nxt, predict_result))

    #------------------------------------
    #  Store UABS
    #------------------------------------
    # store is triggered by inst as uabs?

    # wr_grant == 1 is an instruction
    wr_granted = rbm.reg('wr_granted', 1)
    rbm.set_next('wr_granted',
                 ila.ite((wr_request & wr_grant) == 1, b1, wr_granted))
    data_out_1st_set = ila.zero_extend(
        ila.load(predict_result, const(0, PREDICT_RESULT_WIDTH)), 32)
    rbm.set_next(
        'data_out',
        ila.ite((wr_request & wr_grant) == 1, data_out_1st_set, data_out))
    # This is a hard decision,
    # as we set_next, the reaction as we defined will be appear in the next cycle

    StoreUabs = rbm.add_microabstraction('store', wr_granted == 1)
    store_idx = StoreUabs.reg('i', 16)
    nm = StoreUabs.getreg('num_movies')
    wr_granted = StoreUabs.getreg('wr_granted')
    wr_request = StoreUabs.getreg('wr_request')
    wr_complete = StoreUabs.getreg('wr_complete')
    predict_result = StoreUabs.getmem('predict_result')

    StoreUabs.set_init('i', h1_16)
    StoreUabs.set_next('i', ila.ite(store_idx < nm, store_idx + 1, store_idx))
    StoreUabs.set_next('wr_granted', ila.ite(store_idx < nm, wr_granted, b0))
    StoreUabs.set_next('wr_request', ila.ite(store_idx == 0, b0, wr_request))
    StoreUabs.set_next('wr_complete', ila.ite(store_idx < nm, wr_complete, b1))
    data_out = StoreUabs.getreg('data_out')
    # possibly one cycle earlier
    StoreUabs.set_next(
        'data_out',
        ila.zero_extend(
            ila.load(predict_result, store_idx[PREDICT_RESULT_WIDTH - 1:0]),
            32))

    #---------------------------
    # Add no next
    #
    def keepNC(Abs, name):
        Abs.set_next(name, Abs.getreg(name))

    def keepMemNC(Abs, name):
        Abs.set_next(name, Abs.getmem(name))

    keepNC(rbm, 'done')
    keepNC(rbm, 'wr_request')
    keepNC(rbm, 'wr_index')
    keepNC(rbm, 'wr_length')
    keepNC(rbm, 'rd_index')
    keepNC(rbm, 'rd_length')
    keepNC(rbm, 'rd_request')

    keepMemNC(uabs, 'edges')

    keepNC(rbm, 'rd_complete')
    keepNC(rbm, 'wr_complete')

    return rbm
예제 #17
0
파일: synthesize.py 프로젝트: emzha/IMDb
def createSHAILA(synstates, enable_ps):
    m = ila.Abstraction("sha")
    m.enable_parameterized_synthesis = enable_ps

    # I/O interface: this is where commands come from.
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)
    # response.
    dataout = m.reg('dataout', 8)

    # internal arch state.
    state = m.reg('sha_state', 3)
    rdaddr = m.reg('sha_rdaddr', 16)
    wraddr = m.reg('sha_wraddr', 16)
    oplen = m.reg('sha_len', 16)

    # for the uinst.
    bytes_read = m.reg('sha_bytes_read', 16)
    rd_data = m.reg('sha_rd_data', 512)
    hs_data = m.reg('sha_hs_data', 160)
    xram = m.mem('XRAM', 16, 8)
    sha = m.fun('sha', 160, [512])

    # fetch is just looking at the input command.
    m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata])
    m.fetch_valid = (cmd == 1) | (cmd == 2)

    # decode
    rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr)
              for addr in xrange(0xfe00, 0xfe10) for i in [0, 1, 2, 3, 4]]
    wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr)
              for addr in xrange(0xfe00, 0xfe10)]
    nopcmds = [(state == i) & (cmd != 1) & (cmdaddr == addr)
               for addr in xrange(0xfe00, 0xfe10) for i in [1, 2, 3, 4]]
    m.decode_exprs = rdcmds + wrcmds + nopcmds

    # read commands.
    statebyte = ila.zero_extend(state, 8)
    rdaddrbyte = ila.readchunk('rd_addr', rdaddr, 8)
    wraddrbyte = ila.readchunk('wr_addr', wraddr, 8)
    oplenbyte = ila.readchunk('op_len', oplen, 8)
    dataoutnext = ila.choice(
        'dataout',
        [statebyte, rdaddrbyte, wraddrbyte, oplenbyte,
         m.const(0, 8)])
    m.set_next('dataout', dataoutnext)

    # write commands.
    def mb_reg_wr(name, reg):
        # multibyte register write.
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('sha_rdaddr', rdaddr)
    mb_reg_wr('sha_wraddr', wraddr)
    mb_reg_wr('sha_len', oplen)

    # state
    state_next = ila.choice('state_next', [
        m.const(0, 3),
        m.const(1, 3),
        m.const(2, 3),
        m.const(3, 3),
        m.const(4, 3),
        ila.ite(cmddata == 1, m.const(1, 3), state),
        ila.ite(bytes_read < oplen, m.const(1, 3), m.const(4, 3))
    ])
    m.set_next('sha_state', state_next)

    # these are for the uinst
    # bytes_read
    #bytes_read_inc = ila.ite(bytes_read+64 <= oplen, bytes_read+64, oplen)
    bytes_read_inc = bytes_read + 64
    bytes_read_rst = ila.ite(cmddata == 1, m.const(0, 16), bytes_read)
    bytes_read_nxt = ila.choice(
        'bytes_read_nxt',
        [m.const(0, 16), bytes_read_inc, bytes_read_rst, bytes_read])
    m.set_next('sha_bytes_read', bytes_read_nxt)
    # rd_data
    rdblock_little = ila.loadblk(xram, rdaddr + bytes_read, 64)
    rdblock_big = ila.loadblk_big(xram, rdaddr + bytes_read, 64)
    rd_data_nxt = ila.choice('rd_data_nxt', rdblock_big, rdblock_little,
                             rd_data)
    m.set_next('sha_rd_data', rd_data_nxt)
    # hs_data
    sha_hs_data = ila.appfun(sha, [rd_data])
    hs_data_nxt = ila.choice('hs_data_nxt', sha_hs_data, hs_data)
    m.set_next('sha_hs_data', hs_data_nxt)
    # xram write
    xram_w_sha_little = ila.storeblk(xram, wraddr, hs_data)
    xram_w_sha_big = ila.storeblk_big(xram, wraddr, hs_data)
    xram_nxt = ila.choice('xram_nxt', xram, xram_w_sha_little, xram_w_sha_big)
    m.set_next('XRAM', xram_nxt)

    suffix = 'en' if enable_ps else 'dis'
    timefile = open('sha-times-%s.txt' % suffix, 'wt')
    t_elapsed = 0
    # synthesis.
    sim = lambda s: SHA().simulate(s)
    for s in synstates:
        st = time.clock()
        m.synthesize(s, sim)
        dt = time.clock() - st
        print >> timefile, '%s %.2f' % (s, dt)
        t_elapsed += dt

        ast = m.get_next(s)
        m.exportOne(ast, 'asts/%s_%s' % (s, suffix))

    print 'time: %.2f' % t_elapsed
    #m.generateSim('tmp/shasim.hpp')
    m.generateSimToDir('sim')
예제 #18
0
    def createStates(self):
        self.pc_list = []  #Two pc
        self.pc_next_list = []  #Two pc's next state function
        #self.imem_list = []
        self.next_state_dict = {}  #For next state function
        self.pred_registers = []
        self.scalar_registers = []
        self.long_scalar_registers = []
        self.log_register = self.model.reg('log_register',
                                           instruction_format.LONG_REG_BITS)
        self.check_register = self.model.reg('check_register',
                                             instruction_format.LONG_REG_BITS)
        self.en_log_register = self.model.reg('en_log_register', 1)
        self.en_check_register = self.model.reg('en_check_register', 1)
        self.lsg_log_register = self.model.reg('lsg_log_register', 2)
        self.lsg_check_register = self.model.reg('lsg_check_register', 2)
        self.log_atom_flag_register = self.model.reg('log_atom_flag_register',
                                                     1)
        self.check_atom_flag_register = self.model.reg(
            'check_atom_flag_register', 1)
        self.mflag_log_register = self.model.reg('mflag_log_register', 1)
        self.mflag_check_register = self.model.reg('mflag_check_register', 1)
        self.mguard_log_register = self.model.reg(
            'mguard_log_register', instruction_format.LONG_REG_BITS)
        self.mguard_check_register = self.model.reg(
            'mguard_check_register', instruction_format.LONG_REG_BITS)
        self.mutex_flag_list = []
        self.mutex_guard_list = []
        self.mutex_flag_next_list = []
        self.mutex_guard_next_list = []
        for i in range(2):
            self.mutex_flag_list.append(
                self.model.reg('mutex_flag_%d' % (i), 1))
            self.mutex_guard_list.append(
                self.model.reg('mutex_guard_%d' % (i),
                               instruction_format.LONG_REG_BITS))
            self.mutex_flag_next_list.append(self.mutex_flag_list[i])
            self.mutex_guard_next_list.append(self.mutex_guard_list[i])

        #next state functions for monitors.
        self.mflag_log_register_next_cond = ila.bool(False)
        self.mflag_check_register_next_cond = ila.bool(False)
        self.mguard_log_register_next = self.mguard_log_register
        self.mguard_check_register_next = self.mguard_check_register
        self.log_register_next = self.log_register
        self.en_log_register_next = self.en_log_register
        self.lsg_log_register_next = self.lsg_log_register
        self.check_register_next = self.check_register
        self.en_check_register_next = self.en_check_register
        self.lsg_check_register_next = self.lsg_check_register
        self.log_atom_flag_register_next = self.log_atom_flag_register
        self.check_atom_flag_register_next = self.check_atom_flag_register
        self.arb_fun_list = [
            self.model.fun('arb_fun_0', 1, []),
            self.model.fun('arb_fun_1', 1, [])
        ]
        self.arb_list = [
            ila.appfun(self.arb_fun_list[0], []),
            ila.appfun(self.arb_fun_list[1], [])
        ]
        self.arb_data_fun_list = [
            self.model.fun('arb_data_fun_0', instruction_format.LONG_REG_BITS,
                           []),
            self.model.fun('arb_data_fun_1', instruction_format.LONG_REG_BITS,
                           [])
        ]
        self.arb_data_list = [
            ila.appfun(self.arb_data_fun_list[0]),
            ila.appfun(self.arb_data_fun_list[1])
        ]
        self.bar_arrive_inst = []
        self.bar_sync_inst = []
        self.bar_aux_inst = []
        self.bar_sync_list = []
        self.bar_arrive_list = []
        self.bar_aux_list = []
        self.createPC()

        self.createRegs(0)
        self.createRegs(1)
        self.createConst()
        self.bar_state_list = []

        self.generate_next_state(0)
        self.generate_next_state(1)

        self.createLog()
        self.createCheck()

        self.set_next_state()
        self.set_next_pc(0)
        self.set_next_pc(1)
예제 #19
0
def createAESILA(enable_ps):
    m = ila.Abstraction("aes")
    m.enable_parameterized_synthesis = enable_ps

    # I/O interface: this is where the commands come from.
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)

    # internal arch state.
    state = m.reg('aes_state', 2)
    opaddr = m.reg('aes_addr', 16)
    oplen = m.reg('aes_len', 16)
    ctr = m.reg('aes_ctr', 128)
    key0 = m.reg('aes_key0', 128)

    # for the uinst.
    xram = m.mem('XRAM', 16, 8)
    aes = m.fun('aes', 128, [128, 128, 128])

    # fetch is just looking at the input command.
    m.fetch_expr = ila.concat([cmd, cmdaddr, cmddata
                               ])  # actually, the equivelant instruction
    m.fetch_valid = (cmd == 2)  # when write to some addresses

    # decode
    wrcmds = [(cmd == 2) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff30)]  #
    m.decode_exprs = wrcmds

    um = m.add_microabstraction('aes_compute', state != 0)

    # write commands.
    def mb_reg_wr(name, reg):
        # multibyte register write.
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('aes_addr', opaddr)
    mb_reg_wr('aes_len', oplen)
    mb_reg_wr('aes_ctr', ctr)
    mb_reg_wr('aes_key0', key0)
    # state
    state_next = ila.choice(
        'state_next',
        [state, ila.ite(cmddata == 1, m.const(1, 2), state)])
    m.set_next('aes_state', state_next)
    # xram
    m.set_next('XRAM', xram)

    ################################
    #           Micro-ILA
    ################################

    # read data
    rd_data = um.reg('rd_data', 128)
    enc_data = um.reg('enc_data', 128)
    byte_cnt = um.reg('byte_cnt', 4)
    oped_byte_cnt = um.reg('oped_byte_cnt', 16)
    blk_cnt = um.reg('blk_cnt', 16)
    aes_time = um.reg('aes_time', 5)
    uaes_ctr = um.reg('uaes_ctr', 128)  # change 1

    um.set_init('byte_cnt', um.const(0, 4))
    um.set_init('blk_cnt', um.const(0, 16))
    um.set_init('oped_byte_cnt', um.const(0, 16))
    um.set_init('aes_time', um.const(0, 5))
    um.set_init('uaes_ctr', m.getreg('aes_ctr'))  # change 2
    uxram = m.getmem('XRAM')

    byte_cnt_16b = ila.zero_extend(byte_cnt, 16)

    um.fetch_expr = state
    um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16)
                       for i in [1, 2, 3]]  # Decode Expressionss
    # byte_cnt
    byte_cnt_inc = byte_cnt + 1
    byte_cnt_nxt = ila.choice(
        'byte_cnt_nxt', [m.const(0, 4), byte_cnt_inc, byte_cnt])  # 0, +1, NC
    um.set_next('byte_cnt', byte_cnt_nxt)

    # oped_byte_cnt
    oped_byte_cnt_inc = oped_byte_cnt + 16
    oped_byte_cnt_nxt = ila.choice(
        'oped_byte_cnt_nxt',
        [m.const(0, 16), oped_byte_cnt_inc, oped_byte_cnt])  # 0, +16, NC
    um.set_next('oped_byte_cnt', oped_byte_cnt_nxt)

    # blk_cnt
    blk_cnt_inc = blk_cnt + 16
    more_blocks = (oped_byte_cnt_inc < oplen)
    blk_cnt_nxt = ila.choice('blk_cnt_nxt', [
        m.const(0, 16), blk_cnt, blk_cnt_inc,
        ila.ite(more_blocks, blk_cnt_inc, blk_cnt)
    ])
    um.set_next('blk_cnt', blk_cnt_nxt)

    aes_time_inc = aes_time + 1
    aes_time_ov = aes_time == m.const(31, 5)
    aes_time_nxt_c = ila.ite(aes_time_ov, aes_time, aes_time_inc)
    aes_time_nxt = ila.choice(
        "aes_timeC", m.const(0, 5), aes_time_nxt_c,
        ila.ite(more_blocks, m.const(0, 5), aes_time_nxt_c))
    aes_time_enough = aes_time > m.const(10, 5)
    um.set_next('aes_time', aes_time_nxt)

    # change 3
    um.set_next(
        'uaes_ctr',
        ila.choice(
            'uaes_ctr_nxt', uaes_ctr,
            ila.ite(
                more_blocks, uaes_ctr +
                ila.inrange('addvalue', um.const(1, 128), um.const(128, 128)),
                uaes_ctr), ctr))

    # ustate
    ustate = um.getreg('aes_state')
    ustate_nxt = ila.choice('ustate_next', [
        m.const(0, 2),
        m.const(1, 2),
        m.const(2, 2),
        m.const(3, 2), ustate,
        ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)),
        ila.ite(aes_time_enough, m.const(3, 2), m.const(2, 2))
    ])  # change 4
    um.set_next('aes_state', ustate_nxt)

    # rd_data
    rdblock = ila.writechunk("rd_data_chunk", rd_data,
                             ila.load(uxram,
                                      opaddr + blk_cnt + byte_cnt_16b))  #
    rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data)
    um.set_next('rd_data', rd_data_nxt)

    # enc_data
    aes_key = key0
    aes_ctr = ila.choice('ctr', uaes_ctr, ctr + ila.zero_extend(blk_cnt, 128))
    aes_enc_data = ila.appfun(aes, [aes_ctr, aes_key, rd_data])
    enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data)
    um.set_next('enc_data', enc_data_nxt)
    #print um.get_next('enc_data')

    # xram write
    xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8)
    xram_w_addr = opaddr + blk_cnt + byte_cnt_16b
    xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data)
    xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes)
    um.set_next('XRAM', xram_nxt)

    return m, um
예제 #20
0
def createShaIla():
    m = ila.Abstraction("sha")
    m.enable_parameterized_synthesis = 0

    # I/O interface
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)
    # response
    dataout = m.reg('dataout', 8)

    # arch states
    state = m.reg('sha_state', 3)
    rdaddr = m.reg('sha_rdaddr', 16)
    wraddr = m.reg('sha_wraddr', 16)
    oplen = m.reg('sha_len', 16)
    xram = m.mem('XRAM', 16, 8)

    # child-ILA states
    bytes_read = m.reg('sha_bytes_read', 16)
    rd_data = m.reg('sha_rd_data', 512)
    hs_data = m.reg('sha_hs_data', 160)
    sha = m.fun('sha', 160, [512])

    # fetch
    m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata])
    m.fetch_valid = (cmd == 1) | (cmd == 2)

    # read commands.
    statebyte = ila.zero_extend(state, 8)
    rdaddrbyte = ila.readchunk('rd_addr', rdaddr, 8)
    wraddrbyte = ila.readchunk('wr_addr', wraddr, 8)
    oplenbyte = ila.readchunk('op_len', oplen, 8)
    dataoutnext = ila.choice(
        'dataout',
        [statebyte, rdaddrbyte, wraddrbyte, oplenbyte,
         m.const(0, 8)])
    m.set_next('dataout', dataoutnext)

    # write commands.
    def mb_reg_wr(name, reg):
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('sha_rdaddr', rdaddr)
    mb_reg_wr('sha_wraddr', wraddr)
    mb_reg_wr('sha_len', oplen)

    # state
    state_choice = ila.choice('state_choice', [
        m.const(0, 3),
        m.const(1, 3),
        m.const(2, 3),
        m.const(3, 3),
        m.const(4, 3)
    ])
    rd_nxt = ila.ite(bytes_read < oplen, m.const(1, 3), m.const(4, 3))
    state_nxt = ila.choice('state_nxt', [
        rd_nxt, state_choice,
        ila.ite(cmddata == 1, m.const(1, 3), state), state
    ])
    m.set_next('sha_state', state_nxt)

    # bytes_read
    bytes_read_inc = bytes_read + 64
    bytes_read_rst = ila.ite(cmddata == 1, m.const(0, 16), bytes_read)
    bytes_read_nxt = ila.choice(
        'bytes_read_nxt',
        [m.const(0, 16), bytes_read_inc, bytes_read_rst, bytes_read])
    m.set_next('sha_bytes_read', bytes_read_nxt)

    # rd_data
    rdblock_little = ila.loadblk(xram, rdaddr + bytes_read, 64)
    rdblock_big = ila.loadblk_big(xram, rdaddr + bytes_read, 64)
    rd_data_nxt = ila.choice('rd_data_nxt',
                             [rdblock_big, rdblock_little, rd_data])
    m.set_next('sha_rd_data', rd_data_nxt)

    # hs_data
    sha_hs_data = ila.appfun(sha, [rd_data])
    hs_data_nxt = ila.choice('sh_data_nxt', sha_hs_data, hs_data)
    m.set_next('sha_hs_data', hs_data_nxt)

    # xram
    xram_w_sha_little = ila.storeblk(xram, wraddr, hs_data)
    xram_w_sha_big = ila.storeblk_big(xram, wraddr, hs_data)
    xram_nxt = ila.choice('xram_nxt',
                          [xram_w_sha_little, xram_w_sha_big, xram])
    m.set_next('XRAM', xram_nxt)

    return m
예제 #21
0
파일: synthesize.py 프로젝트: emzha/IMDb
def createAESILA(enable_ps):
    m = ila.Abstraction("aes")
    m.enable_parameterized_synthesis = enable_ps

    # I/O interface: this is where the commands come from.
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)
    # response.
    dataout = m.reg('dataout', 8)

    # internal arch state.
    state = m.reg('aes_state', 2)
    opaddr = m.reg('aes_addr', 16)
    oplen = m.reg('aes_len', 16)
    keysel = m.reg('aes_keysel', 1)
    ctr = m.reg('aes_ctr', 128)
    key0 = m.reg('aes_key0', 128)
    key1 = m.reg('aes_key1', 128)

    # for the uinst.
    xram = m.mem('XRAM', 16, 8)
    aes = m.fun('aes', 128, [128, 128, 128])

    # fetch is just looking at the input command.
    m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata])
    m.fetch_valid = (cmd == 1) | (cmd == 2)

    # decode
    rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff40) for i in [0, 1, 2, 3]]
    wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff40)]
    nopcmds = [
        ((state != 0) & (cmd != 1)) | ((state == 0) & (cmd != 1) & (cmd != 2))
    ]

    m.decode_exprs = rdcmds + wrcmds + nopcmds

    # read commands
    statebyte = ila.zero_extend(state, 8)
    opaddrbyte = ila.readchunk('rd_addr', opaddr, 8)
    oplenbyte = ila.readchunk('rd_len', oplen, 8)
    keyselbyte = ila.zero_extend(keysel, 8)
    ctrbyte = ila.readchunk('rd_ctr', ctr, 8)
    key0byte = ila.readchunk('rd_key0', key0, 8)
    key1byte = ila.readchunk('rd_key1', key1, 8)
    dataoutnext = ila.choice('dataout', [
        statebyte, opaddrbyte, oplenbyte, keyselbyte, ctrbyte, key0byte,
        key1byte,
        m.const(0, 8)
    ])
    m.set_next('dataout', dataoutnext)

    # write commands.
    def mb_reg_wr(name, reg):
        # multibyte register write.
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('aes_addr', opaddr)
    mb_reg_wr('aes_len', oplen)
    mb_reg_wr('aes_ctr', ctr)
    mb_reg_wr('aes_key0', key0)
    mb_reg_wr('aes_key1', key1)

    # bit-level registers
    def bit_reg_wr(name, reg, sz):
        # bitwise register write
        assert reg.type.bitwidth == sz
        reg_wr = cmddata[sz - 1:0]
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    bit_reg_wr('aes_keysel', keysel, 1)

    # these are for the uinst
    um = m.add_microabstraction('aes_compute', state != 0)

    # read data
    rd_data = um.reg('rd_data', 128)
    enc_data = um.reg('enc_data', 128)
    byte_cnt = um.reg('byte_cnt', 4)
    oped_byte_cnt = um.reg('oped_byte_cnt', 16)
    blk_cnt = um.reg('blk_cnt', 16)
    um.set_init('byte_cnt', um.const(0, 4))
    um.set_init('blk_cnt', um.const(0, 16))
    um.set_init('oped_byte_cnt', um.const(0, 16))
    uxram = m.getmem('XRAM')

    byte_cnt_16b = ila.zero_extend(byte_cnt, 16)

    um.fetch_expr = state
    um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16)
                       for i in [1, 2, 3]]

    usim = lambda s: AESmicro().simMicro(s)

    # byte_cnt
    byte_cnt_inc = byte_cnt + 1
    byte_cnt_buf = ila.choice('byte_cnt_buf', [byte_cnt_inc, byte_cnt])
    byte_cnt_nxt = ila.choice(
        'byte_cnt_nxt', [byte_cnt_inc, m.const(0, 4), byte_cnt])
    um.set_next('byte_cnt', byte_cnt_nxt)

    # oped_byte_cnt
    oped_byte_cnt_inc = oped_byte_cnt + 16
    oped_byte_cnt_nxt = ila.choice(
        'oped_byte_cnt_nxt',
        [m.const(0, 16), oped_byte_cnt, oped_byte_cnt_inc])
    um.set_next('oped_byte_cnt', oped_byte_cnt_nxt)

    # blk_cnt
    blk_cnt_inc = blk_cnt + 16
    more_blocks = (oped_byte_cnt_inc < oplen)
    blk_cnt_nxt = ila.choice('blk_cnt_nxt', [
        m.const(0, 16), blk_cnt, blk_cnt_inc,
        ila.ite(more_blocks, blk_cnt_inc, blk_cnt)
    ])
    um.set_next('blk_cnt', blk_cnt_nxt)

    # ustate
    ustate = um.getreg('aes_state')
    ustate_nxt = ila.choice('ustate_next', [
        m.const(0, 2),
        m.const(1, 2),
        m.const(2, 2),
        m.const(3, 2), ustate,
        ila.ite(more_blocks, m.const(1, 2), m.const(0, 2))
    ])
    um.set_next('aes_state', ustate_nxt)

    # rd_data
    rdblock = ila.writechunk("rd_data_chunk", rd_data,
                             ila.load(uxram, opaddr + blk_cnt + byte_cnt_16b))
    rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data)
    um.set_next('rd_data', rd_data_nxt)

    # enc_data
    aes_key = ila.ite(keysel == 0, key0, key1)
    aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data])
    enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data)
    um.set_next('enc_data', enc_data_nxt)
    #print um.get_next('enc_data')

    # xram write
    xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8)
    xram_w_addr = opaddr + blk_cnt + byte_cnt_16b
    xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data)
    xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes)
    um.set_next('XRAM', xram_nxt)

    suffix = 'en' if enable_ps else 'dis'
    timefile = open('aes-times-%s.txt' % suffix, 'wt')

    t_elapsed = 0
    # micro-synthesis
    for s in [
            'XRAM', 'aes_state', 'byte_cnt', 'blk_cnt', 'oped_byte_cnt',
            'rd_data'
    ]:
        t_elapsed = 0
        st = time.clock()
        um.synthesize(s, usim)
        dt = time.clock() - st
        t_elapsed += dt
        print >> timefile, '%s %.2f' % ('u_' + s, dt)
        print '%s: %s' % (s, str(um.get_next(s)))
        ast = um.get_next(s)
        m.exportOne(ast, 'asts/u_%s_%s' % (s, suffix))

    sim = lambda s: AESmacro().simMacro(s)
    # state
    state_next = ila.choice(
        'state_next',
        [state, ila.ite(cmddata == 1, m.const(1, 2), state)])
    m.set_next('aes_state', state_next)

    # xram
    m.set_next('XRAM', xram)
    # synthesize.
    for s in [
            'aes_state', 'aes_addr', 'aes_len', 'aes_keysel', 'aes_ctr',
            'aes_key0', 'aes_key1', 'dataout'
    ]:
        st = time.clock()
        m.synthesize(s, sim)
        dt = time.clock() - st
        t_elapsed += dt
        print >> timefile, '%s %.2f' % (s, dt)

        ast = m.get_next(s)
        print '%s: %s' % (s, str(ast))
        m.exportOne(ast, 'asts/%s_%s' % (s, suffix))
    # connect to the uinst
    m.connect_microabstraction('aes_state', um)
    m.connect_microabstraction('XRAM', um)

    print 'total time: %.2f' % t_elapsed

    #print 'aes_state: %s' % str(m.get_next('aes_state'))
    #print 'XRAM: %s' % str(m.get_next('XRAM'))

    #m.generateSim('gen/aes_sim.hpp')
    m.generateSimToDir('sim')
예제 #22
0
파일: synthesize.py 프로젝트: emzha/IMDb
def createAESILA(synstates, enable_ps):
    m = ila.Abstraction("aes")
    m.enable_parameterized_synthesis = enable_ps

    # I/O interface: this is where the commands come from.
    cmd = m.inp('cmd', 2)
    cmdaddr = m.inp('cmdaddr', 16)
    cmddata = m.inp('cmddata', 8)
    # response.
    dataout = m.reg('dataout', 8)

    # internal arch state.
    state = m.reg('aes_state', 2)
    opaddr = m.reg('aes_addr', 16)
    oplen = m.reg('aes_len', 16)
    keysel = m.reg('aes_keysel', 1)
    ctr = m.reg('aes_ctr', 128)
    key0 = m.reg('aes_key0', 128)
    key1 = m.reg('aes_key1', 128)

    # for the uinst.
    byte_cnt = m.reg('byte_cnt', 16)
    rd_data = m.reg('rd_data', 128)
    enc_data = m.reg('enc_data', 128)
    xram = m.mem('XRAM', 16, 8)
    aes = m.fun('aes', 128, [128, 128, 128])

    # fetch is just looking at the input command.
    m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata])
    m.fetch_valid = (cmd == 1) | (cmd == 2)

    # decode
    rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff40) for i in [0, 1, 2, 3]]
    wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr)
              for addr in xrange(0xff00, 0xff40)]
    nopcmds = [(state == i) & (cmd != 1) & (cmdaddr == addr)
               for addr in xrange(0xff00, 0xff40) for i in [1, 2, 3]]
    m.decode_exprs = rdcmds + wrcmds + nopcmds

    # read commands
    statebyte = ila.zero_extend(state, 8)
    opaddrbyte = ila.readchunk('rd_addr', opaddr, 8)
    oplenbyte = ila.readchunk('rd_len', oplen, 8)
    keyselbyte = ila.zero_extend(keysel, 8)
    ctrbyte = ila.readchunk('rd_ctr', ctr, 8)
    key0byte = ila.readchunk('rd_key0', key0, 8)
    key1byte = ila.readchunk('rd_key1', key1, 8)
    dataoutnext = ila.choice('dataout', [
        statebyte, opaddrbyte, oplenbyte, keyselbyte, ctrbyte, key0byte,
        key1byte,
        m.const(0, 8)
    ])
    m.set_next('dataout', dataoutnext)

    # write commands.
    def mb_reg_wr(name, reg):
        # multibyte register write.
        reg_wr = ila.writechunk('wr_' + name, reg, cmddata)
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    mb_reg_wr('aes_addr', opaddr)
    mb_reg_wr('aes_len', oplen)
    mb_reg_wr('aes_ctr', ctr)
    mb_reg_wr('aes_key0', key0)
    mb_reg_wr('aes_key1', key1)

    # bit-level registers
    def bit_reg_wr(name, reg, sz):
        # bitwise register write
        assert reg.type.bitwidth == sz
        reg_wr = cmddata[sz - 1:0]
        reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg])
        m.set_next(name, reg_nxt)

    bit_reg_wr('aes_keysel', keysel, 1)

    # state
    state_next = ila.choice('state_next', [
        m.const(0, 2),
        m.const(1, 2),
        m.const(2, 2),
        m.const(3, 2),
        ila.ite(cmddata == 1, m.const(1, 2), state),
        ila.ite(byte_cnt + 16 < oplen, m.const(1, 2), m.const(0, 2))
    ])
    m.set_next('aes_state', state_next)

    # these are for the uinst
    # byte_cnt
    byte_cnt_inc = byte_cnt + 16
    byte_cnt_rst = ila.ite(cmddata == 1, m.const(0, 16), byte_cnt)
    byte_cnt_nxt = ila.choice(
        'byte_cnt_nxt', [m.const(0, 16), byte_cnt_inc, byte_cnt_rst, byte_cnt])
    m.set_next('byte_cnt', byte_cnt_nxt)
    # rd_data
    rdblock = ila.loadblk(xram, opaddr + byte_cnt, 16)
    rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data)
    m.set_next('rd_data', rd_data_nxt)
    # enc_data
    aes_key = ila.ite(keysel == 0, key0, key1)
    aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data])
    enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data)
    m.set_next('enc_data', enc_data_nxt)
    # xram write
    xram_w_aes = ila.storeblk(xram, opaddr + byte_cnt, enc_data)
    xram_nxt = ila.choice('xram_nxt', xram, xram_w_aes)
    m.set_next('XRAM', xram_nxt)

    # synthesize.
    timefile = open('aes-times-%s.txt' % ('en' if enable_ps else 'dis'), 'wt')
    sim = lambda s: AES().simulate(s)
    for s in synstates:
        st = time.clock()
        m.synthesize(s, sim)
        t_elapsed = time.clock() - st
        print >> timefile, s
        print >> timefile, '%.2f' % (t_elapsed)

        ast = m.get_next(s)
        m.exportOne(ast, 'asts/%s_%s' % (s, 'en' if enable_ps else 'dis'))

    m.generateSimToDir('sim')