def createStates(self): self.pc_list = [] #Two pc self.pc_next_list = [] #Two pc's next state function #self.imem_list = [] self.next_state_dict = {} #For next state function self.pred_registers = [] self.scalar_registers = [] self.long_scalar_registers = [] self.log_register = self.model.reg('log_register', instruction_format.LONG_REG_BITS) self.check_register = self.model.reg('check_register', instruction_format.LONG_REG_BITS) self.en_log_register = self.model.reg('en_log_register', 1) self.en_check_register = self.model.reg('en_check_register', 1) #next state functions for monitors. self.log_register_next = self.log_register self.en_log_register_next = self.en_log_register self.check_register_next = self.check_register self.en_check_register_next = self.en_check_register self.arb_fun_list = [ self.model.fun('arb_fun_0', 1, []), self.model.fun('arb_fun_1', 1, []) ] self.arb_list = [ ila.appfun(self.arb_fun_list[0], []), ila.appfun(self.arb_fun_list[1], []) ] self.bar_inst = [] self.bar_list = [] self.createPC(0) self.createPC(1) self.createRegs(0) self.createRegs(1) self.createConst() self.bar_arb_fun = self.model.fun('bar_arb_fun', 1, []) self.bar_arb = self.model.reg('bar_arb', 1) self.model.set_next('bar_arb', ila.appfun(self.bar_arb_fun, [])) self.bar_state_list = [] for i in range(self.thread_num): self.bar_state_list.append( self.model.reg('bar_state_%d' % (i), self.bar_spec.BAR_STATE_BITS)) self.bar_counter_enter = self.model.reg( 'bar_counter_enter', self.bar_spec.BAR_COUNTER_ENTER_BITS) self.bar_counter_exit = self.model.reg( 'bar_counter_exit', self.bar_spec.BAR_COUNTER_EXIT_BITS) self.generate_next_state(0) self.generate_next_state(1) self.createBar() self.createLog() self.createCheck() self.set_next_state() self.set_next_pc(0) self.set_next_pc(1) self.set_next_bar()
def createStates(self): self.pc_list = [] #Two pc self.pc_next_list = [] #Two pc's next state function self.imem = self.model.mem('imem', 32, 64) self.next_state_dict = {} #For next state function self.pred_registers = [] self.scalar_registers = [] self.long_scalar_registers = [] self.log_register = self.model.reg('log_register', instruction_format.LONG_REG_BITS) self.check_register = self.model.reg('check_register', instruction_format.LONG_REG_BITS) self.en_log_register = self.model.reg('en_log_register', 1) self.en_check_register = self.model.reg('en_check_register', 1) self.lsg_log_register = self.model.reg('lsg_log_register', 2) self.lsg_check_register = self.model.reg('lsg_check_register', 2) #next state functions for monitors. self.log_register_next = self.log_register self.en_log_register_next = self.en_log_register self.lsg_log_register_next = self.lsg_log_register self.check_register_next = self.check_register self.en_check_register_next = self.en_check_register self.lsg_check_register_next = self.lsg_check_register self.arb_fun_list = [ self.model.fun('arb_fun_0', 1, []), self.model.fun('arb_fun_1', 1, []) ] self.arb_list = [ ila.appfun(self.arb_fun_list[0], []), ila.appfun(self.arb_fun_list[1], []) ] self.bar_arrive_inst = [] self.bar_sync_inst = [] self.bar_aux_inst = [] self.bar_sync_list = [] self.bar_arrive_list = [] self.bar_aux_list = [] self.createPC() self.createRegs(0) self.createRegs(1) self.createConst() self.bar_state_list = [] self.instFetch() self.generate_next_state(0) self.generate_next_state(1) self.createLog() self.createCheck() self.set_next_state() self.set_next_pc(0) self.set_next_pc(1)
def test2(): m1 = ila.Abstraction('t1') x1 = m1.reg('x', 8) y1 = m1.reg('y', 8) f = m1.fun('foo', 8, [8]) g = m1.fun('goo', 8, [8]) m1.set_next('x', x1) m1.set_next('y', ila.appfun(f, x1)) assert m1.areEqualUnrolled(1, y1, ila.appfun(f, x1)) assert not m1.areEqualUnrolled(1, y1, ila.appfun(g, x1))
def createMonitor(self): self.log_register = self.model.reg('log_register', instruction_format.LONG_REG_BITS) self.check_register = self.model.reg('check_register', instruction_format.LONG_REG_BITS) self.en_log_register = self.model.reg('en_log_register', 1) self.en_check_register = self.model.reg('en_check_register', 1) self.log_register_next = self.log_register self.en_log_register_next = self.en_log_register self.check_register_next = self.check_register self.en_check_register_next = self.en_check_register self.monitor_arb_fun_list = [ self.model.fun('monitor_arb_fun_0', 1, []), self.model.fun('monitor_arb_fun_1', 1, []) ] self.monitor_arb_list = [ ila.appfun(self.monitor_arb_fun_list[0], []), ila.appfun(self.monitor_arb_fun_list[1], []) ]
def main(): iteAsNode = False iteAsNode = True hornFile = "tmp/horn_test_node.smt2" A = getDummyILA() ila.setloglevel(3, "") ila.enablelog("Horn") A.hornifyAll("tmp/horn_test_ILA.smt2") r2_nxt = A.get_next('r2') A.hornifyNode(r2_nxt, "r2_nxt") A.exportHornToFile(hornFile) m = ila.Abstraction("fun") x = m.reg('x', 8) y = m.reg('y', 16) f = m.fun('foo', 8, [8, 16]) r = ila.appfun(f, x, y) m.hornifyBvAsInt(True) m.hornifyNode(r, "foo") m.exportHornToFile(hornFile) alu = ila.Abstraction("alu") alu.hornifyBvAsInt(True) aluFile = 'tmp/alu.txt' if not os.path.exists(aluFile): print 'alu file not exist' return alu.importAll(aluFile) r0_nxt = alu.get_next('r0') r1_nxt = alu.get_next('r1') pc_nxt = alu.get_next('pc') rom_nxt = alu.get_next('rom') """ alu.hornifyNode(pc_nxt, "pc_nxt") alu.hornifyNode(r0_nxt, "r0_nxt") alu.hornifyNode(r1_nxt, "r1_nxt") alu.hornifyNode(rom_nxt, "rom_nxt") alu.exportHornToFile(hornFile) """ alu.addHornInstr('alu_instr', alu.bool(True)) alu.addHornNext('alu_instr', 'pc', pc_nxt) alu.addHornNext('alu_instr', 'r0', r0_nxt) alu.addHornNext('alu_instr', 'r1', r1_nxt) alu.addHornNext('alu_instr', 'rom', rom_nxt) alu.addHornChild('alu_child', 'alu_instr', alu.bool(True)) alu.addHornNext('alu_child', 'pc', pc_nxt) alu.addHornNext('alu_child', 'r0', r0_nxt) alu.addHornNext('alu_child', 'r1', r1_nxt) alu.addHornNext('alu_child', 'rom', rom_nxt) alu.generateHornMapping('Interleave') #alu.generateHornMapping ('Blocking') alu.exportHornToFile(hornFile)
def readPy(): um = ila.Abstraction("aes1") # init the state var. # common state state = um.reg('aes_state', 2) opaddr = um.reg('aes_addr', 16) oplen = um.reg('aes_len', 16) keysel = um.reg('aes_keysel', 1) ctr = um.reg('aes_ctr', 128) key0 = um.reg('aes_key0', 128) key1 = um.reg('aes_key1', 128) xram = um.mem('XRAM', 16, 8) aes = um.fun('aes', 128, [128, 128, 128]) # uinst state rd_data = um.reg('rd_data', 128) enc_data = um.reg('enc_data', 128) byte_cnt = um.reg('byte_cnt', 16) # state state_next = readpyast(um, 'aes_state') um.set_init('aes_state', um.const(1, 2)) um.set_next('aes_state', state_next) # byte_cnt byte_cnt_next = readpyast(um, 'byte_cnt') um.set_next('byte_cnt', byte_cnt_next) um.set_init('byte_cnt', um.const(0, 16)) # rd_data rd_data_nxt = readpyast(um, 'rd_data') um.set_next('rd_data', rd_data_nxt) # enc_data aes_key = ila.ite(keysel == 0, key0, key1) aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) um.set_next('enc_data', enc_data_nxt) # xram uxram_nxt = readpyast(um, 'XRAM') um.set_next('XRAM', uxram_nxt) # the rest doesn't change. um.set_next('aes_addr', opaddr) um.set_next('aes_len', oplen) um.set_ipred('aes_len', (oplen != 0) & (oplen[3:0] == 0)) um.set_next('aes_keysel', keysel) um.set_next('aes_ctr', ctr) um.set_next('aes_key0', key0) um.set_next('aes_key1', key1) return um
def main(): c = ila.Abstraction("test") top = c.bool(True) bot = c.bool(False) x = c.reg('x', 8) y = c.reg('y', 8) g = c.fun('cnst', 8, []) h1 = ila.appfun(g, []) h2 = c.const(40, 8) c.add_assumption((h1 >= 10) & (h1 <= 15)) val = ila.choice('val', h1, h2) res = val + x + y def sim(d): x = d['x'] y = d['y'] d_out = {} d_out['res'] = (x + y + randint(11, 12)) & 0xff return d_out res_s = c.syn_elem('res', res, sim) assert c.areEqual(res_s, h1 + x + y) z = c.reg('z', 16) c0 = c.const(0, 8) c1 = c.const(1, 8) cmax = c.const(255, 8) f = c.fun('foo', 8, [8, 16]) r = ila.appfun(f, x, z) t = ila.appfun(f, y, z) eq = x == y req = r == t assert c.areEqual(ila.implies(eq, req), top) assert c.areEqual(r <= cmax, top) up = c.const(128, 8) down = c.const(120, 16) con = ila.implies((x < up) & (z > down), ila.appfun(f, x, z) > up) test = ila.implies(con & (x == 125) & (z == 125), ila.appfun(f, x, z) > up) assert c.areEqual(test, top) x_next = ila.appfun(f, y, z) c.set_next('x', x_next) exportFile = 'tmp/test_ila_export.txt' c.exportAll(exportFile) c.importAll(exportFile) simFile = 'tmp/test_ila_sim.hpp' c.generateSim(simFile)
def createStates(self): self.pc_list = [] self.pc_next_list = [] #self.imem_list = [] self.next_state_dict = {} self.pred_registers = [] self.scalar_registers = [] self.long_scalar_registers = [] self.log_registers = [] self.check_registers = [] self.en_log_registers = [] self.en_check_registers = [] self.bar_inst = [] self.bar_list = [] self.createPC(0) self.createPC(1) self.createRegs(0) self.createRegs(1) self.createConst() self.bar_arb_fun = self.model.fun('bar_arb_fun', 1, []) self.bar_arb = self.model.reg('bar_arb', 1) self.model.set_next('bar_arb', ila.appfun(self.bar_arb_fun, [])) self.bar_state_list = [] for i in range(self.thread_num): self.bar_state_list.append( self.model.reg('bar_state_%d' % (i), self.bar_spec.BAR_STATE_BITS)) self.bar_counter_enter = self.model.reg( 'bar_counter_enter', self.bar_spec.BAR_COUNTER_ENTER_BITS) self.bar_counter_exit = self.model.reg( 'bar_counter_exit', self.bar_spec.BAR_COUNTER_EXIT_BITS) self.generate_next_state(0) self.generate_next_state(1) self.createBar() self.createLog() self.createCheck() self.set_next_state() self.set_next_pc(0) self.set_next_pc(1) self.set_next_bar(0) self.set_next_bar(1)
def createRegs(self): self.scalar_registers_a = [] self.scalar_registers_b = [] reg_book_obj = open(ptxILA.reg_book_file) reg_book = pickle.load(reg_book_obj) reg_book.remove('bar_state') reg_book.remove('bar_counter_enter') reg_book.remove('bar_counter_exit') ''' for reg_name in reg_book: self.scalar_registers_a.append(self.model.reg(reg_name + '_a', instruction_format.REG_BITS)) self.scalar_registers_b.append(self.model.reg(reg_name + '_b', instruction_format.REG_BITS)) ''' self.arb_fun = self.model.fun('arb_fun', 1, []) self.arb = self.model.reg('arb', 1) self.model.set_next('arb', ila.appfun(self.arb_fun, [])) #Non-determined value self.arbA = ila.const(0x0, 1) self.arbB = ila.const(0x1, 1) self.model.set_init('arb', self.model.const(0x0, 1))
def createBar(self): self.bar_state_list = [] for i in range(self.thread_num): self.bar_state_list.append( self.model.reg('bar_state_%d' % (i), self.bar_spec.BAR_STATE_BITS)) self.bar_arb_fun = self.model.fun('bar_arb_fun', 1, []) self.bar_arb = self.model.reg('bar_arb', 1) self.bar_arb_next = ila.ite( ((self.bar_state_list[0] == self.bar_spec.BAR_WAIT) & (self.bar_state_list[1] == self.bar_spec.BAR_WAIT)) | ((self.bar_state_list[0] != self.bar_spec.BAR_WAIT) & (self.bar_state_list[1] != self.bar_spec.BAR_WAIT)), ila.appfun(self.bar_arb_fun, []), ila.ite(self.bar_state_list[1] == self.bar_spec.BAR_WAIT, self.model.const(0x0, 1), self.model.const(0x1, 1))) self.model.set_next('bar_arb', self.bar_arb_next) self.bar_counter_enter = self.model.reg( 'bar_counter_enter', self.bar_spec.BAR_COUNTER_ENTER_BITS) self.bar_counter_exit = self.model.reg( 'bar_counter_exit', self.bar_spec.BAR_COUNTER_EXIT_BITS)
def createStates(self): self.pc_list = [] self.pc_next_list = [] #self.imem_list = [] self.next_state_dict = {} self.pred_registers = [] self.scalar_registers = [] self.long_scalar_registers = [] self.log_register = self.model.reg('log_register', instruction_format.LONG_REG_BITS) self.check_register = self.model.reg('check_register', instruction_format.LONG_REG_BITS) self.en_log_register = self.model.reg('en_log_register', 1) self.en_check_register = self.model.reg('en_check_register', 1) self.log_register_next = self.log_register self.en_log_register_next = self.en_log_register self.check_register_next = self.check_register self.en_check_register_next = self.en_check_register self.arb_fun = self.model.fun('arb_fun', 1, []) self.arb = ila.appfun(self.arb_fun, []) self.bar_inst = [] self.bar_list = [] self.createPC(0) self.createPC(1) self.createRegs(0) self.createRegs(1) self.createConst() self.bar_state_list = [] self.generate_next_state(0) self.generate_next_state(1) self.createLog() self.createCheck() self.set_next_state() self.set_next_pc(0) self.set_next_pc(1)
def WRU1(gb): READY_T = gb.READY_TRUE READY_F = gb.READY_FALSE VALID_T = gb.VALID_TRUE VALID_F = gb.VALID_FALSE DATA_SIZE = gb.DATA_SIZE decode = (gb.arg_1_TREADY == READY_F) & \ (gb.arg_0_TREADY == READY_F) & \ (gb.st_ready == READY_F) \ endPixel = (gb.RAM_x == gb.RAM_x_M - gb.RAM_x_1) & \ (gb.RAM_y == gb.RAM_y_M - gb.RAM_y_1) relPixel = (gb.RAM_x == gb.RAM_x_1) & (gb.RAM_y == gb.RAM_y_M) # next state functions for child-states def genRows(idx): l = gb.DATA_SIZE * idx h = l + DATA_SIZE - 1 res = ila.concat([ gb.stencil[8][h:l], gb.stencil[7][h:l], gb.stencil[6][h:l], gb.stencil[5][h:l], gb.stencil[4][h:l], gb.stencil[3][h:l], gb.stencil[2][h:l], gb.stencil[1][h:l], gb.stencil[0][h:l] ]) return res stencil_rows = [] for i in xrange(gb.stencil_size - 1, -1, -1): stencil_rows.append(genRows(i)) proc_in_nxt = ila.ite (((gb.RAM_x > gb.stencil_size - 1) & \ (gb.RAM_y >= gb.RAM_size)) | \ ((gb.RAM_x == gb.RAM_x_1) & \ (gb.RAM_y > gb.RAM_size)), \ ila.concat (stencil_rows), gb.proc_in) proc_in_nxt = ila.ite(relPixel, gb.proc_in, proc_in_nxt) gb.proc_in_nxt = ila.ite(decode, proc_in_nxt, gb.proc_in_nxt) # next state functions for output ports arg_1_TREADY_nxt = ila.ite(endPixel, READY_F, READY_T) gb.arg_1_TREADY_nxt = ila.ite(decode, arg_1_TREADY_nxt, gb.arg_1_TREADY_nxt) arg_0_TVALID_nxt = ila.ite (((gb.RAM_x > gb.stencil_size - 1) & \ (gb.RAM_y >= gb.RAM_size)) | \ ((gb.RAM_x == gb.RAM_x_1) & \ (gb.RAM_y > gb.RAM_size)), \ VALID_T, VALID_F) arg_0_TVALID_nxt = ila.ite(relPixel, gb.arg_0_TVALID, arg_0_TVALID_nxt) gb.arg_0_TVALID_nxt = ila.ite(decode, arg_0_TVALID_nxt, gb.arg_0_TVALID_nxt) arg_0_TDATA_nxt = ila.appfun(gb.fun, proc_in_nxt) arg_0_TDATA_nxt = ila.ite(relPixel, gb.arg_0_TDATA, arg_0_TDATA_nxt) gb.arg_0_TDATA_nxt = ila.ite(decode, arg_0_TDATA_nxt, gb.arg_0_TDATA_nxt) # next state functions for internal arch-states gb.cur_pix_nxt = ila.ite(decode, gb.cur_pix, gb.cur_pix_nxt) gb.pre_pix_nxt = ila.ite(decode, gb.pre_pix, gb.pre_pix_nxt) gb.RAM_x_nxt = ila.ite(decode, gb.RAM_x, gb.RAM_x_nxt) gb.RAM_y_nxt = ila.ite(decode, gb.RAM_y, gb.RAM_y_nxt) gb.RAM_w_nxt = ila.ite(decode, gb.RAM_w, gb.RAM_w_nxt) for i in xrange(0, gb.RAM_size): gb.RAM_nxt[i] = ila.ite(decode, gb.RAM[i], gb.RAM_nxt[i]) for i in xrange(0, gb.stencil_size - 1): stencil_i_nxt = ila.ite(gb.RAM_y < gb.RAM_size, gb.stencil[i], gb.stencil[i + 1]) gb.stencil_nxt[i] = ila.ite(decode, stencil_i_nxt, gb.stencil_nxt[i]) n = gb.stencil_size - 1 stencil_n_nxt = gb.stencil[n] gb.stencil_nxt[n] = ila.ite(decode, stencil_n_nxt, gb.stencil_nxt[n]) st_ready_nxt = READY_T gb.st_ready_nxt = ila.ite(decode, st_ready_nxt, gb.st_ready_nxt)
def createAESILA(enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) # for the uinst. xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([cmd, cmdaddr, cmddata ]) # actually, the equivelant instruction m.fetch_valid = (cmd == 2) # when write to some addresses # decode wrcmds = [(cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40)] m.decode_exprs = wrcmds m.add_assumption((state == 0) | (oplen > 1)) um = m.add_microabstraction('aes_compute', (state != 0)) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) # state state_next = ila.choice( 'state_next', [state, m.const(0, 2), ila.ite((cmddata == 1), m.const(1, 2), state)]) m.set_next('aes_state', state_next) # xram m.set_next('XRAM', xram) ################################ # Micro-ILA ################################ # read data rd_data = um.reg('rd_data', 128) enc_data = um.reg('enc_data', 128) blk_cnt = um.reg('blk_cnt', 16) uaes_ctr = um.reg('uaes_ctr', 128) um.set_init('blk_cnt', um.const(0, 16)) um.set_init('uaes_ctr', um.getreg('aes_ctr')) uxram = m.getmem('XRAM') um.fetch_expr = state um.decode_exprs = [(state == i) for i in [1, 2, 3]] # READ/OPERATE/WRITE # blk_cnt blk_cnt_inc = blk_cnt + ila.inrange('blkcntrange', um.const(1, 16), um.const(32, 16)) more_blocks = ila.choice('cond1', (blk_cnt_inc != oplen), (oplen >= blk_cnt_inc), (oplen > blk_cnt_inc)) blk_cnt_nxt = ila.choice('blk_cnt_nxt', [ m.const(0, 16), blk_cnt, blk_cnt_inc, ila.ite(more_blocks, blk_cnt_inc, blk_cnt) ]) um.set_next('blk_cnt', blk_cnt_nxt) # ustate ustate = um.getreg('aes_state') ustate_nxt = ila.choice('ustate_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ustate, ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)) ]) # change 4 um.set_next('aes_state', ustate_nxt) # rd_data rdblock = ila.loadblk(uxram, opaddr + blk_cnt, 16) rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) um.set_next('rd_data', rd_data_nxt) # enc_data aes_key = key0 aes_enc_data = ila.appfun(aes, [uaes_ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) um.set_next('enc_data', enc_data_nxt) #print um.get_next('enc_data') uaes_ctr_nxt = ila.choice( 'uaes_ctr_nxt', uaes_ctr, uaes_ctr + ila.inrange('uaes_ctr_nxt_range', m.const(1, 128), m.const(128, 128))) um.set_next('uaes_ctr', uaes_ctr_nxt) # xram write xram_w_addr = opaddr + blk_cnt xram_w_aes = ila.storeblk(uxram, xram_w_addr, enc_data) xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes) um.set_next('XRAM', xram_nxt) return m, um
def createRsaIla(): m = ila.Abstraction('rsa') m.enable_parameterized_synthesis = 0 # I/O interface cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response dataout = m.reg('dataout', 8) # states state = m.reg('rsa_state', 2) addr = m.reg('rsa_addr', 16) rsa_M = m.reg('rsa_M', 2048) rsa_N = m.reg('rsa_N', 2048) rsa_E = m.reg('rsa_E', 2048) rsa_buff = m.reg('rsa_buff', 2048) byte_counter = m.reg('rsa_byte_counter', 8) xram = m.mem('XRAM', 16, 8) rsa = m.fun('rsa', 2048, [2048]) # fetch m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) statebyte = ila.zero_extend(state, 8) wraddrbyte = ila.readchunk('rsa_addr', addr, 8) dataout_nxt = ila.choice('dataout', [statebyte, wraddrbyte, m.const(0, 8)]) m.set_next('dataout', dataout_nxt) # rsa_addr addr_wr = ila.writechunk('wr_addr', addr, cmddata) addr_nxt = ila.choice('nxt_addr', [addr_wr, addr]) m.set_next('rsa_addr', addr_nxt) # rsa_state state_choice = ila.choice( 'state_choice', [m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2)]) wr_nxt = ila.ite(byte_counter == 255, m.const(0, 2), m.const(3, 2)) state_nxt = ila.choice('rsa_state_nxt', [ wr_nxt, state_choice, ila.ite(cmddata == 1, m.const(1, 2), state), state ]) m.set_next('rsa_state', state_nxt) # byte_counter byte_counter_inc = byte_counter + 1 byte_counter_rst = ila.ite(cmddata == 1, m.const(0, 8), byte_counter) byte_counter_nxt = ila.choice( 'byte_counter_nxt', [byte_counter_inc, byte_counter_rst, byte_counter]) m.set_next('rsa_byte_counter', byte_counter_nxt) # buff rsa_buff_op = ila.appfun(rsa, [rsa_M]) rsa_buff_nxt = ila.choice('rsa_buff_nxt', rsa_buff_op, rsa_buff) m.set_next('rsa_buff', rsa_buff_nxt) # rsa_M m.set_next('rsa_M', rsa_M) # xram #xram_w_rsa_lit = ila.storeblk (xram, addr, rsa_buff) #xram_w_rsa_big = ila.storeblk_big (xram, addr, rsa_buff) byte_cnt_16 = ila.zero_extend(byte_counter, 16) sh = ila.zero_extend((255 - byte_counter) * 8, 2048) xram_w_rsa_data_1 = (rsa_buff >> sh)[7:0] #xram_w_rsa_data_2 = rsa_buff [255 - byte_cnt_16] xram_w_rsa_lit = ila.store(xram, addr + byte_cnt_16, xram_w_rsa_data_1) xram_nxt = ila.choice('xram_nxt', [xram_w_rsa_lit, xram]) m.set_next('XRAM', xram_nxt) return m
def U4(gb): VALID_T = gb.VALID_TRUE VALID_F = gb.VALID_FALSE FULL_T = gb.FULL_TRUE FULL_F = gb.FULL_FALSE EMPTY_T = gb.EMPTY_TRUE EMPTY_F = gb.EMPTY_FALSE IT_T = gb.gb_exit_it_T IT_F = gb.gb_exit_it_F ############################ decode ################################### decode = (gb.arg_0_TVALID == VALID_F) & \ (((gb.gb_exit_it[0] == IT_F) & \ (gb.stencil_stream_empty == EMPTY_F)) | \ ((gb.gb_exit_it[0] == IT_T) & \ (gb.gb_exit_it[7] == IT_F))) gb.addDecode(decode) ############################ next state functions ##################### # arg_1_TREADY arg_1_TREADY_nxt = gb.arg_1_TREADY gb.arg_1_TREADY_nxt = ila.ite(decode, arg_1_TREADY_nxt, gb.arg_1_TREADY_nxt) # arg_0_TVALID arg_0_TVALID_nxt = ila.ite ((gb.gb_pp_it[7] == IT_T) & \ (gb.gb_exit_it[6] == IT_F), VALID_T, VALID_F) gb.arg_0_TVALID_nxt = ila.ite(decode, arg_0_TVALID_nxt, gb.arg_0_TVALID_nxt) # arg_0_TDATA in_stencil = ila.ite(gb.stencil_stream_full == FULL_T, gb.stencil_stream_buff[gb.stencil_stream_size - 1], gb.stencil_stream_buff[0]) arg_0_TDATA_nxt = ila.appfun(gb.fun, in_stencil) gb.arg_0_TDATA_nxt = ila.ite(decode, arg_0_TDATA_nxt, gb.arg_0_TDATA_nxt) # 1-D buffer for input data LB1D_in_nxt = gb.LB1D_in gb.LB1D_in_nxt = ila.ite(decode, LB1D_in_nxt, gb.LB1D_in_nxt) LB1D_uIn_nxt = gb.LB1D_uIn gb.LB1D_uIn_nxt = ila.ite(decode, LB1D_uIn_nxt, gb.LB1D_uIn_nxt) LB1D_buff_nxt = gb.LB1D_buff gb.LB1D_buff_nxt = ila.ite(decode, LB1D_buff_nxt, gb.LB1D_buff_nxt) # pixel position for input data LB1D_p_cnt_nxt = gb.LB1D_p_cnt gb.LB1D_p_cnt_nxt = ila.ite(decode, LB1D_p_cnt_nxt, gb.LB1D_p_cnt_nxt) # in stream full in_stream_full_nxt = gb.in_stream_full gb.in_stream_full_nxt = ila.ite(decode, in_stream_full_nxt, gb.in_stream_full_nxt) # in stream empty in_stream_empty_nxt = gb.in_stream_empty gb.in_stream_empty_nxt = ila.ite(decode, in_stream_empty_nxt, gb.in_stream_empty_nxt) # in stream buffer for i in xrange(0, gb.in_stream_size): in_stream_buff_nxt = gb.in_stream_buff[i] gb.in_stream_buff_nxt[i] = ila.ite(decode, in_stream_buff_nxt, gb.in_stream_buff_nxt[i]) # LB2D proc x idx LB2D_proc_x_nxt = gb.LB2D_proc_x gb.LB2D_proc_x_nxt = ila.ite(decode, LB2D_proc_x_nxt, gb.LB2D_proc_x_nxt) # LB2D proc y idx LB2D_proc_y_nxt = gb.LB2D_proc_y gb.LB2D_proc_y_nxt = ila.ite(decode, LB2D_proc_y_nxt, gb.LB2D_proc_y_nxt) # LB2D proc w idx LB2D_proc_w_nxt = gb.LB2D_proc_w gb.LB2D_proc_w_nxt = ila.ite(decode, LB2D_proc_w_nxt, gb.LB2D_proc_w_nxt) # LB2D proc buffer for i in xrange(0, gb.LB2D_proc_size): LB2D_proc_nxt = gb.LB2D_proc[i] gb.LB2D_proc_nxt[i] = ila.ite(decode, LB2D_proc_nxt, gb.LB2D_proc_nxt[i]) # slice stream full slice_stream_full_nxt = gb.slice_stream_full gb.slice_stream_full_nxt = ila.ite(decode, slice_stream_full_nxt, gb.slice_stream_full_nxt) # slice stream empty slice_stream_empty_nxt = gb.slice_stream_empty gb.slice_stream_empty_nxt = ila.ite(decode, slice_stream_empty_nxt, gb.slice_stream_empty_nxt) # slice stream buffer for i in xrange(0, gb.slice_stream_size): slice_stream_buff_nxt = gb.slice_stream_buff[i] gb.slice_stream_buff_nxt[i] = ila.ite(decode, slice_stream_buff_nxt, gb.slice_stream_buff_nxt[i]) # LB2D shift x idx LB2D_shift_x_nxt = gb.LB2D_shift_x gb.LB2D_shift_x_nxt = ila.ite(decode, LB2D_shift_x_nxt, gb.LB2D_shift_x_nxt) # LB2D shift y idx LB2D_shift_y_nxt = gb.LB2D_shift_y gb.LB2D_shift_y_nxt = ila.ite(decode, LB2D_shift_y_nxt, gb.LB2D_shift_y_nxt) # LB2D shift buffer for i in xrange(0, gb.LB2D_shift_size): LB2D_shift_nxt = gb.LB2D_shift[i] gb.LB2D_shift_nxt[i] = ila.ite(decode, LB2D_shift_nxt, gb.LB2D_shift_nxt[i]) # stencil_stream_full stencil_stream_full_nxt = ila.ite(gb.gb_exit_it[0] == IT_T, FULL_F, FULL_F) gb.stencil_stream_full_nxt = ila.ite(decode, stencil_stream_full_nxt, gb.stencil_stream_full_nxt) # stencil_stream_empty stencil_stream_empty_nxt = ila.ite(gb.stencil_stream_full == FULL_T, EMPTY_F, EMPTY_T) gb.stencil_stream_empty_nxt = ila.ite(decode, stencil_stream_empty_nxt, gb.stencil_stream_empty_nxt) # stencil_stream_buff for i in xrange(0, gb.stencil_stream_size): stencil_stream_buff_nxt = gb.stencil_stream_buff[i] gb.stencil_stream_buff_nxt[i] = ila.ite(decode, stencil_stream_buff_nxt, gb.stencil_stream_buff_nxt[i]) # gb_p_cnt gb_p_cnt_nxt = ila.ite(gb.gb_p_cnt < gb.gb_p_cnt_M, gb.gb_p_cnt + gb.gb_p_cnt_1, gb.gb_p_cnt_M) gb.gb_p_cnt_nxt = ila.ite(decode, gb_p_cnt_nxt, gb.gb_p_cnt_nxt) # gb_pp_it gb_pp_it_0_nxt = gb.gb_pp_it_T gb.gb_pp_it_nxt[0] = ila.ite(decode, gb_pp_it_0_nxt, gb.gb_pp_it_nxt[0]) for i in xrange(1, gb.gb_pp_size): gb_pp_it_i_nxt = gb.gb_pp_it[i - 1] gb.gb_pp_it_nxt[i] = ila.ite(decode, gb_pp_it_i_nxt, gb.gb_pp_it_nxt[i]) # gb_exit_it gb_exit_it_0_nxt = ila.ite(gb.gb_p_cnt == gb.gb_p_cnt_M, gb.gb_exit_it_T, gb.gb_exit_it_F) gb.gb_exit_it_nxt[0] = ila.ite(decode, gb_exit_it_0_nxt, gb.gb_exit_it_nxt[0]) for i in xrange(1, gb.gb_exit_size): gb_exit_it_i_nxt = gb.gb_exit_it[i - 1] gb.gb_exit_it_nxt[i] = ila.ite(decode, gb_exit_it_i_nxt, gb.gb_exit_it_nxt[i])
def buildILA(): #--------------------------- # define universal constant #--------------------------- K = 5 NUM_MOVIE_MAX = 100 NUM_HIDDEN_MAX = 100 NUM_VISIBLE_MAX = NUM_MOVIE_MAX * K DATAMEM_ADDR_WIDTH = int( log(NUM_VISIBLE_MAX + 1) / log(2)) + 1 # 9 # it is definitely not dividable, but need to check HIDDEN_UNIT_WIDTH = int( log(NUM_HIDDEN_MAX + 1) / log(2)) + 1 # 7 # it is definitely not dividable, but need to check VISIBLE_UNIT_WIDTH = int(log(NUM_VISIBLE_MAX + 1) / log(2)) + 1 # 9 EDGEMEM_ADDR_WIDTH = int( log((NUM_VISIBLE_MAX + 1) * (NUM_HIDDEN_MAX + 1)) / log(2)) + 1 # 16 POS_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH NEG_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH PREDICT_RESULT_WIDTH = int(log(NUM_MOVIE_MAX) / log(2)) + 1 # 7 KWIDTH = int(log(K) / log(2)) + 1 # 3 #--------------------------- # Model #--------------------------- rbm = ila.Abstraction('RBM') conf_done = rbm.inp('conf_done', 1) conf_num_hidden = rbm.inp('conf_num_hidden', 32) conf_num_visible = rbm.inp('conf_num_visible', 32) conf_num_users = rbm.inp('conf_num_users', 32) conf_num_loops = rbm.inp('conf_num_loops', 32) conf_num_testusers = rbm.inp('conf_num_testusers', 32) conf_num_movies = rbm.inp('conf_num_movies', 32) rst = rbm.inp('rst', 1) init_done = rbm.reg('init_done', 1) done = rbm.reg('done', 1) num_hidden = rbm.reg('num_hidden', 16) num_visible = rbm.reg('num_visible', 16) num_users = rbm.reg('num_users', 16) num_loops = rbm.reg('num_loops', 16) num_testusers = rbm.reg('num_testusers', 16) num_movies = rbm.reg('num_movies', 16) # DMA output rd_index = rbm.reg('rd_index', 32) rd_length = rbm.reg('rd_length', 32) rd_request = rbm.reg('rd_request', 1) rd_grant = rbm.inp('rd_grant', 1) data_in = rbm.inp('data_in', 32) # rd_cnt = rbm.reg('rd_cnt', 16) # i ureg #585 # DMA input wr_grant = rbm.inp('wr_grant', 1) wr_request = rbm.reg('wr_request', 1) wr_index = rbm.reg('wr_index', 32) wr_length = rbm.reg('wr_length', 32) data_out = rbm.reg('data_out', 32) # wr_cnt = rbm.reg('wr_cnt', 16) : u reg data = rbm.mem('data', DATAMEM_ADDR_WIDTH, 8) rbm.mem('predict_result', PREDICT_RESULT_WIDTH, 8) #------------------------------------- # Decoding Expressions #------------------------------------- rstInst = rst == 1 confDoneInst = (rst == 0) & (init_done == 0) & (conf_done == 1) rdGrantInst = (rd_request == 1) & (rd_grant == 1) wrGrantInst = (wr_request == 1) & (wr_grant == 1) decodeExpr = [rstInst, confDoneInst, rdGrantInst, wrGrantInst] #------------------------------------- # AUX Functions #------------------------------------- def const(v, w): return rbm.const(v, w) b0 = const(0, 1) b1 = const(1, 1) h0_8 = const(0, 8) h1_8 = const(1, 8) h0_4 = const(0, 4) h1_4 = const(1, 4) h2_4 = const(2, 4) h3_4 = const(3, 4) h4_4 = const(4, 4) h0_16 = const(0, 16) h1_16 = const(1, 16) h0_32 = const(0, 32) h0_64 = const(0, 64) #------------------------------------- # Init conditions #------------------------------------- rbm.set_init('init_done', b0) rbm.set_init('done', b0) rbm.set_init('num_hidden', h0_16) rbm.set_init('num_visible', h0_16) rbm.set_init('num_users', h0_16) rbm.set_init('num_loops', h0_16) rbm.set_init('num_testusers', h0_16) rbm.set_init('num_movies', h0_16) #------------------------------------- # Config #------------------------------------- # this means, once configured, unless reset, it cannot be reconfigured init_done_nxt = ila.ite(rstInst, b0, ila.ite(confDoneInst, b1, init_done)) num_hidden_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_hidden[15:0], num_hidden)) num_visible_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_visible[15:0], num_visible)) num_users_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_users[15:0], num_users)) num_loops_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_loops[15:0], num_loops)) num_testusers_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_testusers[15:0], num_testusers)) num_movies_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_movies[15:0], num_movies)) rbm.set_next('init_done', init_done_nxt) rbm.set_next('num_hidden', num_hidden_nxt) rbm.set_next('num_visible', num_visible_nxt) rbm.set_next('num_users', num_users_nxt) rbm.set_next('num_loops', num_loops_nxt) rbm.set_next('num_testusers', num_testusers_nxt) rbm.set_next('num_movies', num_movies_nxt) # INST-level w/r complete rbm_rd_complete = rbm.reg('rd_complete', 1) rbm_wr_complete = rbm.reg('wr_complete', 1) rbm.set_init('rd_complete', b0) rbm.set_init('wr_complete', b0) #------------------------------------ # Compute UABS #------------------------------------ uabs = rbm.add_microabstraction('compute', (init_done == 1) & (done == 0)) index = uabs.reg('index', 16) loop_count = uabs.reg('loop_count', 16) pc = uabs.reg('upc', 4) edges_mem = uabs.mem('edges', EDGEMEM_ADDR_WIDTH, 8) nlp = uabs.getreg('num_loops') nm = ila.zero_extend(uabs.getreg('num_movies'), 32) nu = uabs.getreg('num_users') ntu = uabs.getreg('num_testusers') out_rd_request = uabs.getreg('rd_request') out_rd_complete = uabs.getreg('rd_complete') out_rd_length = uabs.getreg('rd_length') out_rd_index = uabs.getreg('rd_index') train_input_done = uabs.reg('train_input_done', 1) predict_input_done = uabs.reg('predict_input_done', 1) uabs.set_init('upc', const(0, 4)) uabs.set_init('index', h0_16) uabs.set_init('loop_count', h0_16) uabs.set_init('train_input_done', b0) uabs.set_init('predict_input_done', b0) uabs.set_init('rd_complete', b0) ### computation micro_instructions StartRead = (pc == 0) WaitReadComplete = (pc == 1) & (out_rd_complete == 0) DecideTrainOrPredict = (pc == 1) & (out_rd_complete == 1) StartTrain = (pc == 2) & (train_input_done == 1) StartPredict = (pc == 2) & (predict_input_done == 1) Finish = (pc == 3) StartReadState = const(0, 4) WaitReadCompleteState = const(1, 4) StartTrainOrPredict = const(2, 4) FinishState = const(3, 4) decodeExpr = [ StartRead, WaitReadComplete, DecideTrainOrPredict, StartTrain, StartPredict, Finish ] out_rd_request_nxt = ila.ite(StartRead, b1, out_rd_request) out_rd_length_nxt = ila.ite(StartRead, 5 * nm, out_rd_length) out_rd_index_nxt = ila.ite(StartRead, ila.zero_extend(index, 32), out_rd_index) out_rd_complete_nxt = ila.ite( StartRead, b0, ila.ite(DecideTrainOrPredict, b0, out_rd_complete)) train_input_done_nxt = ila.ite(DecideTrainOrPredict, ila.ite(loop_count < nlp, b1, b0), train_input_done) predict_input_done_nxt = ila.ite(DecideTrainOrPredict, ila.ite(loop_count == nlp, b1, b0), predict_input_done) pc_nxt = ila.ite( StartRead, WaitReadCompleteState, ila.ite( WaitReadComplete, pc, ila.ite( DecideTrainOrPredict, StartTrainOrPredict, ila.ite( StartTrain, StartTrainOrPredict, # StartReadState, # actually should be updated by u2inst ila.ite( StartPredict, StartTrainOrPredict, # StartReadState, # actually should be updated by u2inst ila.ite( Finish, FinishState, pc # should never happen! )))))) # should be updated by u2inst index_nxt_dummy = ila.ite( StartTrain | StartPredict, ila.ite( (index == nu - 1) & (loop_count != nlp), h0_16, ila.ite( (index == ntu - 1) & (loop_count == nlp), index, # And it is not correct index + 1)), index) # not in use loop_count_nxt_dummy = ila.ite( StartTrain | StartPredict, ila.ite((index == nu - 1) & (loop_count != nlp), loop_count + 1, loop_count), loop_count) uabs.set_next('rd_request', out_rd_request_nxt) uabs.set_next('rd_length', out_rd_length_nxt) uabs.set_next('rd_index', out_rd_index_nxt) uabs.set_next('rd_complete', out_rd_complete_nxt) uabs.set_next('train_input_done', train_input_done_nxt) uabs.set_next('predict_input_done', predict_input_done_nxt) uabs.set_next('upc', pc_nxt) uabs.set_next('index', index) uabs.set_next('loop_count', loop_count) # this has to be updated by micro_inst # read_request is turned off by loaduabs # predict_input_done, train_input_done is turned off by uabs_train/predict #------------------------------------ # Load UABS #------------------------------------ # RBM interface # high-level interface rd_granted = rbm.reg( 'rd_granted', 1 ) # this is only used for maintaining the validity of load UABS, no other should use data_nxt = ila.ite(rdGrantInst, ila.store(data, const(0, DATAMEM_ADDR_WIDTH), data_in[7:0]), data) # data # rd_granted_nxt = ila.ite(rdGrantInst, b1, rd_granted) rbm.set_next('rd_granted', rd_granted_nxt) rbm.set_next('data', data_nxt) # one change is to move these into lower abstraction DMAload = rbm.add_microabstraction( 'DMAload', (rd_granted == 1)) # this is sub-instruction w_cnt = DMAload.reg('i', 16) dma_rd_request = DMAload.getreg('rd_request') dma_rd_length = DMAload.getreg('rd_length') dma_rd_index = DMAload.getreg('rd_index') state_update_data = DMAload.getmem('data') state_update_rd_request = dma_rd_request self_update_rd_granted = DMAload.getreg('rd_granted') more_read_in = w_cnt < dma_rd_length[15:0] last_cycle = w_cnt == dma_rd_length[15:0] DMAload.set_init('i', h1_16) # h0_16 ) DMAload.set_next('i', ila.ite(more_read_in, w_cnt + 1, w_cnt)) DMAload.set_next('rd_request', b0) # reset to 0 immediately DMAload.set_next('rd_granted', ila.ite(more_read_in, self_update_rd_granted, b0)) DMAload.set_next('rd_complete', ila.ite(more_read_in, b0, b1)) DMAload.set_next( 'data', ila.ite( more_read_in, ila.store(state_update_data, w_cnt[DATAMEM_ADDR_WIDTH - 1:0], data_in[7:0]), ila.ite( last_cycle, ila.store(state_update_data, dma_rd_length[DATAMEM_ADDR_WIDTH - 1:0], h1_8), state_update_data))) #------------------------------------ # Train UUABS #------------------------------------ TrainUabs = uabs.add_microabstraction('train', train_input_done == 1) sigmoid_func = TrainUabs.fun('sigmoid', 64, [16]) # DATA_sum_, 01_D rand_func = TrainUabs.fun('rand', 64, []) # generate random number to_int_exp = TrainUabs.fun('to_int_exp', 32, [16]) # divide_func = TrainUabs.fun( 'divide', 64, [32, 64]) # dp:32_32 / sum_of_pow2 64_64 = 64_1 hidden_unit = TrainUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1) visible_unit = TrainUabs.mem('visible_unit', VISIBLE_UNIT_WIDTH, 1) visibleEnergy = TrainUabs.mem('visibleEnergies', KWIDTH, 16) pow2 = TrainUabs.mem('pow2', KWIDTH, 32) pos = TrainUabs.mem('pos', POS_ADDR_WIDTH, 1) #neg = TrainUabs.mem('neg', NEG_ADDR_WIDTH, 1 ) # not needed train_sum = TrainUabs.reg('train_sum', 16) train_max = TrainUabs.reg('train_max', 16) sumOfpow2 = TrainUabs.reg('sumOfpow2', 64) jstate = TrainUabs.reg('jstate', 16) inner_loop_pc = TrainUabs.reg('per_v_pc', 4) train_pc = TrainUabs.reg('train_upc', 4) # Re-evaluate v_cnt = TrainUabs.reg('train_v_cnt', 16) h_cnt = TrainUabs.reg('train_h_cnt', 16) train_input = TrainUabs.getmem('data') edges_input = TrainUabs.getmem('edges') nv = TrainUabs.getreg('num_visible') nh = TrainUabs.getreg('num_hidden') nu = TrainUabs.getreg('num_users') ntu = TrainUabs.getreg('num_testusers') nlp = TrainUabs.getreg('num_loops') SumEdge = train_pc == 0 SumEdgeState = const(0, 4) SumHidden = train_pc == 1 SumHiddenState = const(1, 4) StorePos = train_pc == 3 StorePosState = const(3, 4) EdgeUpdate = train_pc == 2 EdgeUpdateState = const(2, 4) TrainUabs.decode_exprs = [SumEdge, SumHidden, EdgeUpdate] #Begin v_cnt_init = const(0, 16) h_cnt_init = const(0, 16) pc_init = const(0, 4) #SumEdge: s0 edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt train_sum_s0_nxt = ila.ite(v_cnt == 0, const(0, 16), train_sum) + ila.ite( ila.load(train_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), const(0, 16)) v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1) h_cnt_s0_nxt = ila.ite((v_cnt == nv), ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt) # Here ^^^ is for transiting to next state hidden_update_s0_0 = ila.ite( ila.appfun(rand_func) < ila.appfun(sigmoid_func, train_sum_s0_nxt), b1, b0) hidden_update_s0_1 = ila.ite( v_cnt == nv, ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0], hidden_update_s0_0), hidden_unit) hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), ila.store(hidden_update_s0_1, nh[HIDDEN_UNIT_WIDTH - 1:0], b1), hidden_update_s0_1) train_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), SumHiddenState, SumEdgeState) # Just like init jstate_s0_nxt = h0_16 inner_loop_pc_s0_nxt = h0_4 # add prefix : # train_sum_nxt = ila.ite(SumEdge, train_sum_s0_nxt, ila.ite(SumHidden, ... ) ) # SumHiddenK0-K4 : s1-s5 # pc:1 per_v_pc : 0 1 2 3 LastH = h_cnt == nh LastJ = jstate == K - 1 LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX) SumHiddenL0 = SumHidden & (inner_loop_pc == 0) SumHiddenL1 = SumHidden & (inner_loop_pc == 1) SumHiddenL2 = SumHidden & (inner_loop_pc == 2) SumHiddenL3 = SumHidden & (inner_loop_pc == 3) h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1) jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1), jstate) inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc) jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1) inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc) jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc) jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt inner_loop_pc_s1_s5_L3_nxt = ila.ite( LastJ, ila.ite(LastV, h0_4, h0_4), # will choose to go back or not inner_loop_pc) def nextCondition(l0, l1, l2, l3, default): return ila.ite( SumHiddenL0, l0, ila.ite( SumHiddenL1, l1, ila.ite(SumHiddenL2, l2, ila.ite(SumHiddenL3, l3, default)))) h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt, h_cnt) v_cnt_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ, ila.ite(LastV, h0_16, v_cnt + K), v_cnt) jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt, jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt, jstate) inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt, inner_loop_pc_s1_s5_L1_nxt, inner_loop_pc_s1_s5_L2_nxt, inner_loop_pc_s1_s5_L3_nxt, inner_loop_pc) train_pc_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ & LastV, StorePosState, SumHiddenState) # L0 train_sum_s1_s5_L0_nxt = ila.ite(h_cnt == 0, h0_16, train_sum) + ila.ite( ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), h0_16) _train_max_origin_L0 = ila.ite( jstate == 0, fpconst(-500, FPsum).ast, train_max) # make sure the first time we are comparing with init sum train_max_s1_s5_L0_nxt = ila.ite( LastH, ila.ite(ila.sgt(train_sum_s1_s5_L0_nxt, _train_max_origin_L0), train_sum_s1_s5_L0_nxt, _train_max_origin_L0), train_max) visibleEnergy_s1_s5_L0_nxt = ila.ite( LastH, ila.store(visibleEnergy, jstate[KWIDTH - 1:0], train_sum_s1_s5_L0_nxt), visibleEnergy) # L1 # sum3: 64_64 -> dp: 32_32 _31_sum = fpconst(31, FPsum).ast train_max_s1_s5_L1_nxt = ila.ite(jstate == 0, train_max - _31_sum, train_max) _st_val_L1 = ila.load(visibleEnergy, jstate[KWIDTH - 1:0]) - train_max_s1_s5_L1_nxt visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0], _st_val_L1) # L2 _pow2_new_val = ila.appfun(to_int_exp, ila.load(visibleEnergy, jstate[KWIDTH - 1:0])) _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3) sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64, sumOfpow2) + _pow2_new_convert pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val) # L3 _probs = ila.appfun(divide_func, [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2]) _RAND = ila.appfun(rand_func) _visible_unit_new_val = ila.ite(_probs > _RAND, b1, b0) _vu_idx = v_cnt + jstate _visible_unit_s1_s5_L3_1 = ila.store(visible_unit, _vu_idx[VISIBLE_UNIT_WIDTH - 1:0], _visible_unit_new_val) visible_unit_s1_s5_L3_nxt = ila.ite( LastJ & LastV, ila.store(_visible_unit_s1_s5_L3_1, nv[VISIBLE_UNIT_WIDTH - 1:0], b1), _visible_unit_s1_s5_L3_1) # when exit visible unit should be made to store 1 at nv train_sum_s1_s5_nxt = nextCondition(train_sum_s1_s5_L0_nxt, train_sum, train_sum, train_sum, train_sum) train_max_s1_s5_nxt = nextCondition(train_max_s1_s5_L0_nxt, train_max_s1_s5_L1_nxt, train_max, train_max, train_max) visible_unit_s1_s5_nxt = nextCondition(visible_unit, visible_unit, visible_unit, visible_unit_s1_s5_L3_nxt, visible_unit) visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt, visibleEnergy_s1_s5_L1_nxt, visibleEnergy, visibleEnergy, visibleEnergy) sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2, sumOfpow2_s1_s5_L2_nxt, sumOfpow2, sumOfpow2) pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2) # before s6: store pos h_cnt_sp_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1) v_cnt_sp_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, h0_16, v_cnt + 1), v_cnt) _data_load = ila.load(train_input, v_cnt[VISIBLE_UNIT_WIDTH - 1:0]) _pos_sp_cond = (_data_load != 2) _pos_sp_val = ila.ite(_data_load != 0, b1, b0) & ila.load( hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) _pos_st_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt pos_sp_nxt = ila.store(pos, _pos_st_addr, _pos_sp_val) train_pc_sp_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState, StorePosState) # update edge : s6 h_cnt_s6_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1) v_cnt_s6_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, v_cnt, v_cnt + 1), v_cnt) _pos_ld_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt train_pos = ila.load(pos, _pos_ld_addr) != 0 train_neg = (ila.load( hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) != 0) & (ila.load( visible_unit, v_cnt[VISIBLE_UNIT_WIDTH - 1:0]) != 0) edge_original = ila.load(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt) edge_new = ila.ite((train_pos) & (~train_neg), edge_original + fpconst(LEARN_RATE, FPedge).ast, ila.ite((~train_pos) & (train_neg), edge_original - fpconst(LEARN_RATE, FPedge).ast, edge_original)) edge_s6_nxt = ila.store(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt, edge_new) train_pc_s6_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState, EdgeUpdateState) # no need to jump back itself, because the flag: train_input_done is turned back to zero # don't forget to set back signals in Uabs () train_done = TrainUabs.getreg('train_input_done') train_uabs_index = TrainUabs.getreg('index') train_uabs_loop_count = TrainUabs.getreg('loop_count') train_uabs_upc = TrainUabs.getreg('upc') # add prefix s6 !!! s6_complete = (h_cnt == nh) & (v_cnt == nv) index_nxt_s6_nxt = ila.ite( s6_complete, ila.ite((train_uabs_index == nu - 1) & (train_uabs_loop_count != nlp), h0_16, train_uabs_index + 1), train_uabs_index) # assert (train_uabs_index == ntu - 1) & (train_uabs_loop_count == nlp) should never happen loop_count_s6_nxt = ila.ite( s6_complete & (train_uabs_index == nu - 1) & (train_uabs_loop_count != nlp), train_uabs_loop_count + 1, train_uabs_loop_count) upc_s6_nxt = ila.ite(s6_complete, StartReadState, train_uabs_upc) train_input_done_s6_nxt_nxt = ila.ite(s6_complete, b0, train_done) # data -> hidden_unit -> visible_unit -> edge # data -> edge # add def TrainNext(e1, e2, e3, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(EdgeUpdate, e3, default))) def TrainNextSP(e1, e2, e3, e4, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(StorePos, e3, ila.ite(EdgeUpdate, e4, default)))) def TrainChoice5(name, e1, e2, e3, default): return ila.choice(name, e1, e2, e3, default) def TrainChoice4(name, e1, e2, default): return ila.choice(name, e1, e2, default) def TrainChoice3(name, e1, default): return ila.choice(name, e1, default) TrainUabs.set_init('train_upc', pc_init) TrainUabs.set_init('train_v_cnt', v_cnt_init) TrainUabs.set_init('train_h_cnt', h_cnt_init) TrainUabs.set_next( 'jstate', TrainNext(jstate_s0_nxt, jstate_s1_s5_nxt, jstate, jstate)) TrainUabs.set_next( 'train_sum', TrainNext(train_sum_s0_nxt, train_sum_s1_s5_nxt, train_sum, train_sum)) TrainUabs.set_next( 'train_v_cnt', TrainNextSP(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt_s6_nxt, v_cnt)) TrainUabs.set_next( 'train_h_cnt', TrainNextSP(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt_sp_nxt, h_cnt_s6_nxt, h_cnt)) TrainUabs.set_next( 'train_upc', TrainNextSP(train_pc_s0_nxt, train_pc_s1_s5_nxt, train_pc_sp_nxt, train_pc_s6_nxt, train_pc)) TrainUabs.set_next( 'train_max', TrainNext(train_max, train_max_s1_s5_nxt, train_max, train_max)) TrainUabs.set_next( 'hidden_unit', TrainNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit)) TrainUabs.set_next( 'visible_unit', TrainNext(visible_unit, visible_unit_s1_s5_nxt, visible_unit, visible_unit)) TrainUabs.set_next('edges', TrainNext(edges_mem, edges_mem, edge_s6_nxt, edges_mem)) TrainUabs.set_next( 'index', TrainNext(train_uabs_index, train_uabs_index, index_nxt_s6_nxt, train_uabs_index)) TrainUabs.set_next( 'loop_count', TrainNext(train_uabs_loop_count, train_uabs_loop_count, loop_count_s6_nxt, train_uabs_loop_count)) TrainUabs.set_next( 'upc', TrainNext(train_uabs_upc, train_uabs_upc, upc_s6_nxt, train_uabs_upc)) TrainUabs.set_next( 'train_input_done', TrainNext(train_done, train_done, train_input_done_s6_nxt_nxt, train_done)) # newly added TrainUabs.set_next( 'visibleEnergies', TrainNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy, visibleEnergy)) TrainUabs.set_next( 'sumOfpow2', TrainNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2)) TrainUabs.set_next('pow2', TrainNext(pow2, pow2_s1_s5_nxt, pow2, pow2)) TrainUabs.set_next('pos', ila.ite(StorePos, pos_sp_nxt, pos)) TrainUabs.set_next( 'per_v_pc', TrainNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt, inner_loop_pc, inner_loop_pc)) #------------------------------------ # Predict UUABS #------------------------------------ # data -> predict_result PredictUabs = uabs.add_microabstraction('predict', predict_input_done == 1) sigmoid_func = PredictUabs.fun('sigmoid', 64, [16]) # DATA_sum_, 01_D rand_func = PredictUabs.fun('rand', 64, []) # generate random number to_int_exp = PredictUabs.fun('to_int_exp', 32, [16]) # round_func = PredictUabs.fun('round', 8, [32]) # 05_D -> u8 divide_func = PredictUabs.fun( 'divide', 64, [32, 64]) # dp:32_32 / sum_of_pow2 64_64 = 64_1 hidden_unit = PredictUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1) visibleEnergy = PredictUabs.mem('visibleEnergies', KWIDTH, 16) predict_result = PredictUabs.getmem('predict_result') predict_sum = PredictUabs.reg('predict_sum', 16) predict_max = PredictUabs.reg('predict_max', 16) sumOfpow2 = PredictUabs.reg('sumOfpow2', 64) pow2 = PredictUabs.mem('pow2', KWIDTH, 32) predict_vector = PredictUabs.mem('predict_vector', VISIBLE_UNIT_WIDTH, 1) inner_loop_pc = PredictUabs.reg('per_v_pc', 4) count = PredictUabs.reg('count', 8) jstate = PredictUabs.reg('jstate', 16) expectation = PredictUabs.reg('expectation', 32) prediction = PredictUabs.reg('prediction', 8) predict_pc = PredictUabs.reg('predict_upc', 4) # Re-evaluate v_cnt = PredictUabs.reg('predict_v_cnt', 16) h_cnt = PredictUabs.reg('predict_h_cnt', 16) predict_input = PredictUabs.getmem('data') edges_input = PredictUabs.getmem('edges') nv = PredictUabs.getreg('num_visible') nh = PredictUabs.getreg('num_hidden') nu = PredictUabs.getreg('num_users') ntu = PredictUabs.getreg('num_testusers') nlp = PredictUabs.getreg('num_loops') SumEdge = predict_pc == 0 SumEdgeState = const(0, 4) SumHidden = predict_pc == 1 SumHiddenState = const(1, 4) GenResult = predict_pc == 3 GenResultState = const(3, 4) WaitForWrite = predict_pc == 2 WaitForWriteState = const(2, 4) PredictUabs.decode_exprs = [SumEdge, SumHidden, WaitForWrite] #Begin v_cnt_init = const(0, 16) h_cnt_init = const(0, 16) pc_init = const(0, 4) #SumEdge: s0 edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt predict_sum_s0_nxt = ila.ite(v_cnt == 0, const( 0, 16), predict_sum) + ila.ite( ila.load(predict_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), const(0, 16)) v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1) h_cnt_s0_nxt = ila.ite((v_cnt == nv), ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt) # Here ^^^ is for transiting to next state hidden_update_s0_0 = ila.ite( fpconst(0.5, FP01_D).ast < ila.appfun(sigmoid_func, predict_sum_s0_nxt), b1, b0) hidden_update_s0_1 = ila.ite( v_cnt == nv, ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0], hidden_update_s0_0), hidden_unit) hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), ila.store(hidden_update_s0_1, nh[HIDDEN_UNIT_WIDTH - 1:0], b1), hidden_update_s0_1) hidden_update_s0_next = hidden_update_s0_2 predict_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), SumHiddenState, SumEdgeState) jstate_s0_nxt = h0_16 count_s0_nxt = ila.const(0, 8) inner_loop_pc_s0_nxt = h0_4 # add prefix : # predict_sum_nxt = ila.ite(SumEdge, predict_sum_s0_nxt, ila.ite(SumHidden, ... ) ) #----------------------------- # SumHiddensK0-K4 : s1-s5 # #----------------------------- LastH = h_cnt == nh LastJ = jstate == K - 1 LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX) SumHiddenL0 = SumHidden & (inner_loop_pc == 0) SumHiddenL1 = SumHidden & (inner_loop_pc == 1) SumHiddenL2 = SumHidden & (inner_loop_pc == 2) SumHiddenL3 = SumHidden & (inner_loop_pc == 3) SumHiddenL4 = SumHidden & (inner_loop_pc == 4) h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1) jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1), jstate) inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc) jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1) inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc) jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc) jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt inner_loop_pc_s1_s5_L3_nxt = ila.ite(LastJ, h4_4, inner_loop_pc) jstate_s1_s5_L4_nxt = jstate_s1_s5_L3_nxt inner_loop_pc_s1_s5_L4_nxt = ila.ite( LastJ, ila.ite(LastV, h0_4, h0_4), # will choose to go back or not inner_loop_pc) def nextCondition(l0, l1, l2, l3, l4, default): return ila.ite( SumHiddenL0, l0, ila.ite( SumHiddenL1, l1, ila.ite( SumHiddenL2, l2, ila.ite(SumHiddenL3, l3, ila.ite(SumHiddenL4, l4, default))))) h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt, h_cnt, h_cnt) v_cnt_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ, ila.ite(LastV, h0_16, v_cnt + K), v_cnt) jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt, jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt, jstate_s1_s5_L4_nxt, jstate) inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt, inner_loop_pc_s1_s5_L1_nxt, inner_loop_pc_s1_s5_L2_nxt, inner_loop_pc_s1_s5_L3_nxt, inner_loop_pc_s1_s5_L4_nxt, inner_loop_pc) predict_pc_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ & LastV, GenResultState, SumHiddenState) # L0 predict_sum_s1_s5_L0_nxt = ila.ite( h_cnt == 0, h0_16, predict_sum) + ila.ite( ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), h0_16) _predict_max_origin_L0 = ila.ite( jstate == 0, fpconst(-500, FPsum).ast, predict_max) # make sure the first time we are comparing with init sum predict_max_s1_s5_L0_nxt = ila.ite( LastH, ila.ite(ila.sgt(predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0), predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0), predict_max) visibleEnergy_s1_s5_L0_nxt = ila.ite( LastH, ila.store(visibleEnergy, jstate[KWIDTH - 1:0], predict_sum_s1_s5_L0_nxt), visibleEnergy) # L1 # sum3: 64_64 -> dp: 32_32 _31_sum = fpconst(31, FPsum).ast predict_max_s1_s5_L1_nxt = ila.ite(jstate == 0, predict_max - _31_sum, predict_max) _st_val_L1 = ila.load(visibleEnergy, jstate[KWIDTH - 1:0]) - predict_max_s1_s5_L1_nxt visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0], _st_val_L1) # L2 _pow2_new_val = ila.appfun(to_int_exp, ila.load(visibleEnergy, jstate[KWIDTH - 1:0])) _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3) sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64, sumOfpow2) + _pow2_new_convert pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val) # L3 _probs = ila.appfun(divide_func, [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2]) _mul = fixpoint(_probs, FP01_D) * fixpoint(jstate, FPu16) expectation_s1_s5_L3_nxt = ila.ite(jstate == 0, h0_32, expectation) + _mul.toFormat(FP05_D) # L4 _prediction = ila.zero_extend(ila.appfun(round_func, [expectation]), 16) _pv_val = ila.ite(jstate == _prediction, b1, b0) _pv_idx = v_cnt + jstate _first_store = ila.store(predict_vector, _pv_idx[VISIBLE_UNIT_WIDTH - 1:0], _pv_val) predict_vector_s1_s5_L4_nxt = ila.ite( SumHiddenL4 & LastV & LastJ, ila.store(_first_store, nv[VISIBLE_UNIT_WIDTH - 1:0], b1), _first_store) predict_sum_s1_s5_nxt = nextCondition(predict_sum_s1_s5_L0_nxt, predict_sum, predict_sum, predict_sum, predict_sum, predict_sum) predict_max_s1_s5_nxt = nextCondition(predict_max_s1_s5_L0_nxt, predict_max_s1_s5_L1_nxt, predict_max, predict_max, predict_max, predict_max) visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt, visibleEnergy_s1_s5_L1_nxt, visibleEnergy, visibleEnergy, visibleEnergy, visibleEnergy) sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2, sumOfpow2_s1_s5_L2_nxt, sumOfpow2, sumOfpow2, sumOfpow2) pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2, pow2) expectation_s1_s5_nxt = ila.ite(SumHiddenL3, expectation_s1_s5_L3_nxt, expectation) predict_vector_s1_s5_nxt = ila.ite(SumHiddenL4, predict_vector_s1_s5_L4_nxt, predict_vector) count_s1_s5_nxt = ila.ite(SumHiddenL4 & LastV & LastJ, h0_8, count) # before s6: store pos LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX) LastJ = jstate == K - 1 v_cnt_sp_nxt = ila.ite(LastV, v_cnt + K, v_cnt + K) jstate_sp_nxt = ila.ite(LastJ, h0_16, jstate + 1) _prediction_old = ila.ite(jstate == 0, h0_8, prediction) _pv_idx = v_cnt + jstate _predict_result_sp_val = ila.load(predict_vector, _pv_idx[VISIBLE_UNIT_WIDTH - 1:0]) prediction_sp_nxt = ila.ite(_predict_result_sp_val == 1, (jstate + 1)[7:0], _prediction_old) count_sp_nxt = ila.ite(LastJ, count + 1, count) predict_result_sp_nxt = ila.ite( LastJ, ila.store(predict_result, count[PREDICT_RESULT_WIDTH - 1:0], prediction), predict_result) predict_pc_sp_nxt = ila.ite(LastV & LastJ, WaitForWriteState, GenResultState) wr_complete = PredictUabs.getreg('wr_complete') wr_req = PredictUabs.getreg('wr_request') wr_len = PredictUabs.getreg('wr_length') wr_idx = PredictUabs.getreg('wr_index') cur_idx = PredictUabs.getreg('index') # 32 exitLoop = LastV & LastJ wr_request_sp_nxt = ila.ite(exitLoop, b1, wr_req) wr_index_sp_nxt = ila.ite( exitLoop, ila.zero_extend(nm, 32) * ila.zero_extend(cur_idx, 32), wr_idx) wr_length_sp_nxt = ila.ite(exitLoop, ila.zero_extend(nm, 32), wr_len) wr_complete_sp_nxt = ila.ite(exitLoop, b0, wr_complete) # s6: #--------------------- # update edge : s6 #--------------------- FinishOneRound = (wr_req == 0) & (wr_complete == 1) predict_pc_s6_nxt = ila.ite(FinishOneRound, WaitForWriteState, WaitForWriteState) # its value does not matter because it will be terminated by predict_input_done # don't forget to set back signals in Uabs () predict_done = PredictUabs.getreg('predict_input_done') predict_uabs_index = PredictUabs.getreg('index') predict_uabs_loop_count = PredictUabs.getreg('loop_count') predict_uabs_upc = PredictUabs.getreg('upc') all_done = PredictUabs.getreg('done') # add prefix s6 !!! index_nxt_s6_nxt = ila.ite( FinishOneRound, ila.ite( (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp), predict_uabs_index, predict_uabs_index + 1), predict_uabs_index) wr_complete_s6_nxt = ila.ite(FinishOneRound, b0, wr_complete) # assert (predict_uabs_index == nu - 1) & (predict_uabs_loop_count != nlp) should never happen #loop_count_s6_nxt = ila.ite( (predict_uabs_index == nu - 1) & (predict_uabs_loop_count != nlp) , predict_uabs_loop_count + 1, predict_uabs_loop_count ) upc_s6_nxt = ila.ite( FinishOneRound, ila.ite( (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp), FinishState, StartReadState), predict_uabs_upc) predict_input_done_s6_nxt_nxt = ila.ite(FinishOneRound, b0, predict_done) all_done_s6_nxt = ila.ite( FinishOneRound & (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp), b1, b0) # data -> hidden_unit -> visible_unit -> edge # data -> edge # add # add def predictNext(e1, e2, e3, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(WaitForWrite, e3, default))) def predictNextSp(e1, e2, e3, e4, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(GenResult, e3, ila.ite(WaitForWrite, e4, default)))) def ite(inst, e, default): return ila.ite(inst, e, default) PredictUabs.set_init('predict_upc', pc_init) PredictUabs.set_init('predict_v_cnt', v_cnt_init) PredictUabs.set_init('predict_h_cnt', h_cnt_init) PredictUabs.set_next( 'jstate', predictNextSp(jstate_s0_nxt, jstate_s1_s5_nxt, jstate_sp_nxt, jstate, jstate)) PredictUabs.set_next( 'predict_sum', predictNext(predict_sum_s0_nxt, predict_sum_s1_s5_nxt, predict_sum, predict_sum)) PredictUabs.set_next( 'predict_v_cnt', predictNextSp(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt, v_cnt)) PredictUabs.set_next( 'predict_h_cnt', predictNext(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt, h_cnt)) PredictUabs.set_next( 'predict_upc', predictNextSp(predict_pc_s0_nxt, predict_pc_s1_s5_nxt, predict_pc_sp_nxt, predict_pc_s6_nxt, predict_pc)) PredictUabs.set_next( 'predict_max', predictNext(predict_max, predict_max_s1_s5_nxt, predict_max, predict_max)) PredictUabs.set_next( 'hidden_unit', predictNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit)) PredictUabs.set_next( 'count', predictNextSp(count_s0_nxt, count_s1_s5_nxt, count_sp_nxt, count, count)) PredictUabs.set_next( 'per_v_pc', predictNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt, inner_loop_pc, inner_loop_pc)) PredictUabs.set_next( 'index', predictNext(predict_uabs_index, predict_uabs_index, index_nxt_s6_nxt, predict_uabs_index)) PredictUabs.set_next( 'upc', predictNext(predict_uabs_upc, predict_uabs_upc, upc_s6_nxt, predict_uabs_upc)) PredictUabs.set_next( 'predict_input_done', predictNext(predict_done, predict_done, predict_input_done_s6_nxt_nxt, predict_done)) PredictUabs.set_next( 'done', predictNext(all_done, all_done, all_done_s6_nxt, all_done)) PredictUabs.set_next( 'wr_request', predictNextSp(wr_req, wr_req, wr_request_sp_nxt, wr_req, wr_req)) PredictUabs.set_next( 'wr_length', predictNextSp(wr_len, wr_len, wr_length_sp_nxt, wr_len, wr_len)) PredictUabs.set_next( 'wr_index', predictNextSp(wr_idx, wr_idx, wr_index_sp_nxt, wr_idx, wr_idx)) PredictUabs.set_next( 'wr_complete', predictNextSp(wr_complete, wr_complete, wr_complete_sp_nxt, wr_complete_s6_nxt, wr_complete)) # newly added PredictUabs.set_next( 'visibleEnergies', predictNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy, visibleEnergy)) PredictUabs.set_next( 'sumOfpow2', predictNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2)) PredictUabs.set_next('pow2', predictNext(pow2, pow2_s1_s5_nxt, pow2, pow2)) PredictUabs.set_next( 'expectation', predictNext(expectation, expectation_s1_s5_nxt, expectation, expectation)) PredictUabs.set_next( 'predict_vector', predictNext(predict_vector, predict_vector_s1_s5_nxt, predict_vector, predict_vector)) PredictUabs.set_next('prediction', ite(GenResult, prediction_sp_nxt, prediction)) PredictUabs.set_next('predict_result', ite(GenResult, predict_result_sp_nxt, predict_result)) #------------------------------------ # Store UABS #------------------------------------ # store is triggered by inst as uabs? # wr_grant == 1 is an instruction wr_granted = rbm.reg('wr_granted', 1) rbm.set_next('wr_granted', ila.ite((wr_request & wr_grant) == 1, b1, wr_granted)) data_out_1st_set = ila.zero_extend( ila.load(predict_result, const(0, PREDICT_RESULT_WIDTH)), 32) rbm.set_next( 'data_out', ila.ite((wr_request & wr_grant) == 1, data_out_1st_set, data_out)) # This is a hard decision, # as we set_next, the reaction as we defined will be appear in the next cycle StoreUabs = rbm.add_microabstraction('store', wr_granted == 1) store_idx = StoreUabs.reg('i', 16) nm = StoreUabs.getreg('num_movies') wr_granted = StoreUabs.getreg('wr_granted') wr_request = StoreUabs.getreg('wr_request') wr_complete = StoreUabs.getreg('wr_complete') predict_result = StoreUabs.getmem('predict_result') StoreUabs.set_init('i', h1_16) StoreUabs.set_next('i', ila.ite(store_idx < nm, store_idx + 1, store_idx)) StoreUabs.set_next('wr_granted', ila.ite(store_idx < nm, wr_granted, b0)) StoreUabs.set_next('wr_request', ila.ite(store_idx == 0, b0, wr_request)) StoreUabs.set_next('wr_complete', ila.ite(store_idx < nm, wr_complete, b1)) data_out = StoreUabs.getreg('data_out') # possibly one cycle earlier StoreUabs.set_next( 'data_out', ila.zero_extend( ila.load(predict_result, store_idx[PREDICT_RESULT_WIDTH - 1:0]), 32)) #--------------------------- # Add no next # def keepNC(Abs, name): Abs.set_next(name, Abs.getreg(name)) def keepMemNC(Abs, name): Abs.set_next(name, Abs.getmem(name)) keepNC(rbm, 'done') keepNC(rbm, 'wr_request') keepNC(rbm, 'wr_index') keepNC(rbm, 'wr_length') keepNC(rbm, 'rd_index') keepNC(rbm, 'rd_length') keepNC(rbm, 'rd_request') keepMemNC(uabs, 'edges') keepNC(rbm, 'rd_complete') keepNC(rbm, 'wr_complete') return rbm
def createSHAILA(synstates, enable_ps): m = ila.Abstraction("sha") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response. dataout = m.reg('dataout', 8) # internal arch state. state = m.reg('sha_state', 3) rdaddr = m.reg('sha_rdaddr', 16) wraddr = m.reg('sha_wraddr', 16) oplen = m.reg('sha_len', 16) # for the uinst. bytes_read = m.reg('sha_bytes_read', 16) rd_data = m.reg('sha_rd_data', 512) hs_data = m.reg('sha_hs_data', 160) xram = m.mem('XRAM', 16, 8) sha = m.fun('sha', 160, [512]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # decode rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr) for addr in xrange(0xfe00, 0xfe10) for i in [0, 1, 2, 3, 4]] wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr) for addr in xrange(0xfe00, 0xfe10)] nopcmds = [(state == i) & (cmd != 1) & (cmdaddr == addr) for addr in xrange(0xfe00, 0xfe10) for i in [1, 2, 3, 4]] m.decode_exprs = rdcmds + wrcmds + nopcmds # read commands. statebyte = ila.zero_extend(state, 8) rdaddrbyte = ila.readchunk('rd_addr', rdaddr, 8) wraddrbyte = ila.readchunk('wr_addr', wraddr, 8) oplenbyte = ila.readchunk('op_len', oplen, 8) dataoutnext = ila.choice( 'dataout', [statebyte, rdaddrbyte, wraddrbyte, oplenbyte, m.const(0, 8)]) m.set_next('dataout', dataoutnext) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('sha_rdaddr', rdaddr) mb_reg_wr('sha_wraddr', wraddr) mb_reg_wr('sha_len', oplen) # state state_next = ila.choice('state_next', [ m.const(0, 3), m.const(1, 3), m.const(2, 3), m.const(3, 3), m.const(4, 3), ila.ite(cmddata == 1, m.const(1, 3), state), ila.ite(bytes_read < oplen, m.const(1, 3), m.const(4, 3)) ]) m.set_next('sha_state', state_next) # these are for the uinst # bytes_read #bytes_read_inc = ila.ite(bytes_read+64 <= oplen, bytes_read+64, oplen) bytes_read_inc = bytes_read + 64 bytes_read_rst = ila.ite(cmddata == 1, m.const(0, 16), bytes_read) bytes_read_nxt = ila.choice( 'bytes_read_nxt', [m.const(0, 16), bytes_read_inc, bytes_read_rst, bytes_read]) m.set_next('sha_bytes_read', bytes_read_nxt) # rd_data rdblock_little = ila.loadblk(xram, rdaddr + bytes_read, 64) rdblock_big = ila.loadblk_big(xram, rdaddr + bytes_read, 64) rd_data_nxt = ila.choice('rd_data_nxt', rdblock_big, rdblock_little, rd_data) m.set_next('sha_rd_data', rd_data_nxt) # hs_data sha_hs_data = ila.appfun(sha, [rd_data]) hs_data_nxt = ila.choice('hs_data_nxt', sha_hs_data, hs_data) m.set_next('sha_hs_data', hs_data_nxt) # xram write xram_w_sha_little = ila.storeblk(xram, wraddr, hs_data) xram_w_sha_big = ila.storeblk_big(xram, wraddr, hs_data) xram_nxt = ila.choice('xram_nxt', xram, xram_w_sha_little, xram_w_sha_big) m.set_next('XRAM', xram_nxt) suffix = 'en' if enable_ps else 'dis' timefile = open('sha-times-%s.txt' % suffix, 'wt') t_elapsed = 0 # synthesis. sim = lambda s: SHA().simulate(s) for s in synstates: st = time.clock() m.synthesize(s, sim) dt = time.clock() - st print >> timefile, '%s %.2f' % (s, dt) t_elapsed += dt ast = m.get_next(s) m.exportOne(ast, 'asts/%s_%s' % (s, suffix)) print 'time: %.2f' % t_elapsed #m.generateSim('tmp/shasim.hpp') m.generateSimToDir('sim')
def createStates(self): self.pc_list = [] #Two pc self.pc_next_list = [] #Two pc's next state function #self.imem_list = [] self.next_state_dict = {} #For next state function self.pred_registers = [] self.scalar_registers = [] self.long_scalar_registers = [] self.log_register = self.model.reg('log_register', instruction_format.LONG_REG_BITS) self.check_register = self.model.reg('check_register', instruction_format.LONG_REG_BITS) self.en_log_register = self.model.reg('en_log_register', 1) self.en_check_register = self.model.reg('en_check_register', 1) self.lsg_log_register = self.model.reg('lsg_log_register', 2) self.lsg_check_register = self.model.reg('lsg_check_register', 2) self.log_atom_flag_register = self.model.reg('log_atom_flag_register', 1) self.check_atom_flag_register = self.model.reg( 'check_atom_flag_register', 1) self.mflag_log_register = self.model.reg('mflag_log_register', 1) self.mflag_check_register = self.model.reg('mflag_check_register', 1) self.mguard_log_register = self.model.reg( 'mguard_log_register', instruction_format.LONG_REG_BITS) self.mguard_check_register = self.model.reg( 'mguard_check_register', instruction_format.LONG_REG_BITS) self.mutex_flag_list = [] self.mutex_guard_list = [] self.mutex_flag_next_list = [] self.mutex_guard_next_list = [] for i in range(2): self.mutex_flag_list.append( self.model.reg('mutex_flag_%d' % (i), 1)) self.mutex_guard_list.append( self.model.reg('mutex_guard_%d' % (i), instruction_format.LONG_REG_BITS)) self.mutex_flag_next_list.append(self.mutex_flag_list[i]) self.mutex_guard_next_list.append(self.mutex_guard_list[i]) #next state functions for monitors. self.mflag_log_register_next_cond = ila.bool(False) self.mflag_check_register_next_cond = ila.bool(False) self.mguard_log_register_next = self.mguard_log_register self.mguard_check_register_next = self.mguard_check_register self.log_register_next = self.log_register self.en_log_register_next = self.en_log_register self.lsg_log_register_next = self.lsg_log_register self.check_register_next = self.check_register self.en_check_register_next = self.en_check_register self.lsg_check_register_next = self.lsg_check_register self.log_atom_flag_register_next = self.log_atom_flag_register self.check_atom_flag_register_next = self.check_atom_flag_register self.arb_fun_list = [ self.model.fun('arb_fun_0', 1, []), self.model.fun('arb_fun_1', 1, []) ] self.arb_list = [ ila.appfun(self.arb_fun_list[0], []), ila.appfun(self.arb_fun_list[1], []) ] self.arb_data_fun_list = [ self.model.fun('arb_data_fun_0', instruction_format.LONG_REG_BITS, []), self.model.fun('arb_data_fun_1', instruction_format.LONG_REG_BITS, []) ] self.arb_data_list = [ ila.appfun(self.arb_data_fun_list[0]), ila.appfun(self.arb_data_fun_list[1]) ] self.bar_arrive_inst = [] self.bar_sync_inst = [] self.bar_aux_inst = [] self.bar_sync_list = [] self.bar_arrive_list = [] self.bar_aux_list = [] self.createPC() self.createRegs(0) self.createRegs(1) self.createConst() self.bar_state_list = [] self.generate_next_state(0) self.generate_next_state(1) self.createLog() self.createCheck() self.set_next_state() self.set_next_pc(0) self.set_next_pc(1)
def createAESILA(enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) # for the uinst. xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([cmd, cmdaddr, cmddata ]) # actually, the equivelant instruction m.fetch_valid = (cmd == 2) # when write to some addresses # decode wrcmds = [(cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff30)] # m.decode_exprs = wrcmds um = m.add_microabstraction('aes_compute', state != 0) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) # state state_next = ila.choice( 'state_next', [state, ila.ite(cmddata == 1, m.const(1, 2), state)]) m.set_next('aes_state', state_next) # xram m.set_next('XRAM', xram) ################################ # Micro-ILA ################################ # read data rd_data = um.reg('rd_data', 128) enc_data = um.reg('enc_data', 128) byte_cnt = um.reg('byte_cnt', 4) oped_byte_cnt = um.reg('oped_byte_cnt', 16) blk_cnt = um.reg('blk_cnt', 16) aes_time = um.reg('aes_time', 5) uaes_ctr = um.reg('uaes_ctr', 128) # change 1 um.set_init('byte_cnt', um.const(0, 4)) um.set_init('blk_cnt', um.const(0, 16)) um.set_init('oped_byte_cnt', um.const(0, 16)) um.set_init('aes_time', um.const(0, 5)) um.set_init('uaes_ctr', m.getreg('aes_ctr')) # change 2 uxram = m.getmem('XRAM') byte_cnt_16b = ila.zero_extend(byte_cnt, 16) um.fetch_expr = state um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16) for i in [1, 2, 3]] # Decode Expressionss # byte_cnt byte_cnt_inc = byte_cnt + 1 byte_cnt_nxt = ila.choice( 'byte_cnt_nxt', [m.const(0, 4), byte_cnt_inc, byte_cnt]) # 0, +1, NC um.set_next('byte_cnt', byte_cnt_nxt) # oped_byte_cnt oped_byte_cnt_inc = oped_byte_cnt + 16 oped_byte_cnt_nxt = ila.choice( 'oped_byte_cnt_nxt', [m.const(0, 16), oped_byte_cnt_inc, oped_byte_cnt]) # 0, +16, NC um.set_next('oped_byte_cnt', oped_byte_cnt_nxt) # blk_cnt blk_cnt_inc = blk_cnt + 16 more_blocks = (oped_byte_cnt_inc < oplen) blk_cnt_nxt = ila.choice('blk_cnt_nxt', [ m.const(0, 16), blk_cnt, blk_cnt_inc, ila.ite(more_blocks, blk_cnt_inc, blk_cnt) ]) um.set_next('blk_cnt', blk_cnt_nxt) aes_time_inc = aes_time + 1 aes_time_ov = aes_time == m.const(31, 5) aes_time_nxt_c = ila.ite(aes_time_ov, aes_time, aes_time_inc) aes_time_nxt = ila.choice( "aes_timeC", m.const(0, 5), aes_time_nxt_c, ila.ite(more_blocks, m.const(0, 5), aes_time_nxt_c)) aes_time_enough = aes_time > m.const(10, 5) um.set_next('aes_time', aes_time_nxt) # change 3 um.set_next( 'uaes_ctr', ila.choice( 'uaes_ctr_nxt', uaes_ctr, ila.ite( more_blocks, uaes_ctr + ila.inrange('addvalue', um.const(1, 128), um.const(128, 128)), uaes_ctr), ctr)) # ustate ustate = um.getreg('aes_state') ustate_nxt = ila.choice('ustate_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ustate, ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)), ila.ite(aes_time_enough, m.const(3, 2), m.const(2, 2)) ]) # change 4 um.set_next('aes_state', ustate_nxt) # rd_data rdblock = ila.writechunk("rd_data_chunk", rd_data, ila.load(uxram, opaddr + blk_cnt + byte_cnt_16b)) # rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) um.set_next('rd_data', rd_data_nxt) # enc_data aes_key = key0 aes_ctr = ila.choice('ctr', uaes_ctr, ctr + ila.zero_extend(blk_cnt, 128)) aes_enc_data = ila.appfun(aes, [aes_ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) um.set_next('enc_data', enc_data_nxt) #print um.get_next('enc_data') # xram write xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8) xram_w_addr = opaddr + blk_cnt + byte_cnt_16b xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data) xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes) um.set_next('XRAM', xram_nxt) return m, um
def createShaIla(): m = ila.Abstraction("sha") m.enable_parameterized_synthesis = 0 # I/O interface cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response dataout = m.reg('dataout', 8) # arch states state = m.reg('sha_state', 3) rdaddr = m.reg('sha_rdaddr', 16) wraddr = m.reg('sha_wraddr', 16) oplen = m.reg('sha_len', 16) xram = m.mem('XRAM', 16, 8) # child-ILA states bytes_read = m.reg('sha_bytes_read', 16) rd_data = m.reg('sha_rd_data', 512) hs_data = m.reg('sha_hs_data', 160) sha = m.fun('sha', 160, [512]) # fetch m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # read commands. statebyte = ila.zero_extend(state, 8) rdaddrbyte = ila.readchunk('rd_addr', rdaddr, 8) wraddrbyte = ila.readchunk('wr_addr', wraddr, 8) oplenbyte = ila.readchunk('op_len', oplen, 8) dataoutnext = ila.choice( 'dataout', [statebyte, rdaddrbyte, wraddrbyte, oplenbyte, m.const(0, 8)]) m.set_next('dataout', dataoutnext) # write commands. def mb_reg_wr(name, reg): reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('sha_rdaddr', rdaddr) mb_reg_wr('sha_wraddr', wraddr) mb_reg_wr('sha_len', oplen) # state state_choice = ila.choice('state_choice', [ m.const(0, 3), m.const(1, 3), m.const(2, 3), m.const(3, 3), m.const(4, 3) ]) rd_nxt = ila.ite(bytes_read < oplen, m.const(1, 3), m.const(4, 3)) state_nxt = ila.choice('state_nxt', [ rd_nxt, state_choice, ila.ite(cmddata == 1, m.const(1, 3), state), state ]) m.set_next('sha_state', state_nxt) # bytes_read bytes_read_inc = bytes_read + 64 bytes_read_rst = ila.ite(cmddata == 1, m.const(0, 16), bytes_read) bytes_read_nxt = ila.choice( 'bytes_read_nxt', [m.const(0, 16), bytes_read_inc, bytes_read_rst, bytes_read]) m.set_next('sha_bytes_read', bytes_read_nxt) # rd_data rdblock_little = ila.loadblk(xram, rdaddr + bytes_read, 64) rdblock_big = ila.loadblk_big(xram, rdaddr + bytes_read, 64) rd_data_nxt = ila.choice('rd_data_nxt', [rdblock_big, rdblock_little, rd_data]) m.set_next('sha_rd_data', rd_data_nxt) # hs_data sha_hs_data = ila.appfun(sha, [rd_data]) hs_data_nxt = ila.choice('sh_data_nxt', sha_hs_data, hs_data) m.set_next('sha_hs_data', hs_data_nxt) # xram xram_w_sha_little = ila.storeblk(xram, wraddr, hs_data) xram_w_sha_big = ila.storeblk_big(xram, wraddr, hs_data) xram_nxt = ila.choice('xram_nxt', [xram_w_sha_little, xram_w_sha_big, xram]) m.set_next('XRAM', xram_nxt) return m
def createAESILA(enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response. dataout = m.reg('dataout', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) keysel = m.reg('aes_keysel', 1) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) key1 = m.reg('aes_key1', 128) # for the uinst. xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # decode rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40) for i in [0, 1, 2, 3]] wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40)] nopcmds = [ ((state != 0) & (cmd != 1)) | ((state == 0) & (cmd != 1) & (cmd != 2)) ] m.decode_exprs = rdcmds + wrcmds + nopcmds # read commands statebyte = ila.zero_extend(state, 8) opaddrbyte = ila.readchunk('rd_addr', opaddr, 8) oplenbyte = ila.readchunk('rd_len', oplen, 8) keyselbyte = ila.zero_extend(keysel, 8) ctrbyte = ila.readchunk('rd_ctr', ctr, 8) key0byte = ila.readchunk('rd_key0', key0, 8) key1byte = ila.readchunk('rd_key1', key1, 8) dataoutnext = ila.choice('dataout', [ statebyte, opaddrbyte, oplenbyte, keyselbyte, ctrbyte, key0byte, key1byte, m.const(0, 8) ]) m.set_next('dataout', dataoutnext) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) mb_reg_wr('aes_key1', key1) # bit-level registers def bit_reg_wr(name, reg, sz): # bitwise register write assert reg.type.bitwidth == sz reg_wr = cmddata[sz - 1:0] reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) bit_reg_wr('aes_keysel', keysel, 1) # these are for the uinst um = m.add_microabstraction('aes_compute', state != 0) # read data rd_data = um.reg('rd_data', 128) enc_data = um.reg('enc_data', 128) byte_cnt = um.reg('byte_cnt', 4) oped_byte_cnt = um.reg('oped_byte_cnt', 16) blk_cnt = um.reg('blk_cnt', 16) um.set_init('byte_cnt', um.const(0, 4)) um.set_init('blk_cnt', um.const(0, 16)) um.set_init('oped_byte_cnt', um.const(0, 16)) uxram = m.getmem('XRAM') byte_cnt_16b = ila.zero_extend(byte_cnt, 16) um.fetch_expr = state um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16) for i in [1, 2, 3]] usim = lambda s: AESmicro().simMicro(s) # byte_cnt byte_cnt_inc = byte_cnt + 1 byte_cnt_buf = ila.choice('byte_cnt_buf', [byte_cnt_inc, byte_cnt]) byte_cnt_nxt = ila.choice( 'byte_cnt_nxt', [byte_cnt_inc, m.const(0, 4), byte_cnt]) um.set_next('byte_cnt', byte_cnt_nxt) # oped_byte_cnt oped_byte_cnt_inc = oped_byte_cnt + 16 oped_byte_cnt_nxt = ila.choice( 'oped_byte_cnt_nxt', [m.const(0, 16), oped_byte_cnt, oped_byte_cnt_inc]) um.set_next('oped_byte_cnt', oped_byte_cnt_nxt) # blk_cnt blk_cnt_inc = blk_cnt + 16 more_blocks = (oped_byte_cnt_inc < oplen) blk_cnt_nxt = ila.choice('blk_cnt_nxt', [ m.const(0, 16), blk_cnt, blk_cnt_inc, ila.ite(more_blocks, blk_cnt_inc, blk_cnt) ]) um.set_next('blk_cnt', blk_cnt_nxt) # ustate ustate = um.getreg('aes_state') ustate_nxt = ila.choice('ustate_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ustate, ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)) ]) um.set_next('aes_state', ustate_nxt) # rd_data rdblock = ila.writechunk("rd_data_chunk", rd_data, ila.load(uxram, opaddr + blk_cnt + byte_cnt_16b)) rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) um.set_next('rd_data', rd_data_nxt) # enc_data aes_key = ila.ite(keysel == 0, key0, key1) aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) um.set_next('enc_data', enc_data_nxt) #print um.get_next('enc_data') # xram write xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8) xram_w_addr = opaddr + blk_cnt + byte_cnt_16b xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data) xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes) um.set_next('XRAM', xram_nxt) suffix = 'en' if enable_ps else 'dis' timefile = open('aes-times-%s.txt' % suffix, 'wt') t_elapsed = 0 # micro-synthesis for s in [ 'XRAM', 'aes_state', 'byte_cnt', 'blk_cnt', 'oped_byte_cnt', 'rd_data' ]: t_elapsed = 0 st = time.clock() um.synthesize(s, usim) dt = time.clock() - st t_elapsed += dt print >> timefile, '%s %.2f' % ('u_' + s, dt) print '%s: %s' % (s, str(um.get_next(s))) ast = um.get_next(s) m.exportOne(ast, 'asts/u_%s_%s' % (s, suffix)) sim = lambda s: AESmacro().simMacro(s) # state state_next = ila.choice( 'state_next', [state, ila.ite(cmddata == 1, m.const(1, 2), state)]) m.set_next('aes_state', state_next) # xram m.set_next('XRAM', xram) # synthesize. for s in [ 'aes_state', 'aes_addr', 'aes_len', 'aes_keysel', 'aes_ctr', 'aes_key0', 'aes_key1', 'dataout' ]: st = time.clock() m.synthesize(s, sim) dt = time.clock() - st t_elapsed += dt print >> timefile, '%s %.2f' % (s, dt) ast = m.get_next(s) print '%s: %s' % (s, str(ast)) m.exportOne(ast, 'asts/%s_%s' % (s, suffix)) # connect to the uinst m.connect_microabstraction('aes_state', um) m.connect_microabstraction('XRAM', um) print 'total time: %.2f' % t_elapsed #print 'aes_state: %s' % str(m.get_next('aes_state')) #print 'XRAM: %s' % str(m.get_next('XRAM')) #m.generateSim('gen/aes_sim.hpp') m.generateSimToDir('sim')
def createAESILA(synstates, enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response. dataout = m.reg('dataout', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) keysel = m.reg('aes_keysel', 1) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) key1 = m.reg('aes_key1', 128) # for the uinst. byte_cnt = m.reg('byte_cnt', 16) rd_data = m.reg('rd_data', 128) enc_data = m.reg('enc_data', 128) xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # decode rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40) for i in [0, 1, 2, 3]] wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40)] nopcmds = [(state == i) & (cmd != 1) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40) for i in [1, 2, 3]] m.decode_exprs = rdcmds + wrcmds + nopcmds # read commands statebyte = ila.zero_extend(state, 8) opaddrbyte = ila.readchunk('rd_addr', opaddr, 8) oplenbyte = ila.readchunk('rd_len', oplen, 8) keyselbyte = ila.zero_extend(keysel, 8) ctrbyte = ila.readchunk('rd_ctr', ctr, 8) key0byte = ila.readchunk('rd_key0', key0, 8) key1byte = ila.readchunk('rd_key1', key1, 8) dataoutnext = ila.choice('dataout', [ statebyte, opaddrbyte, oplenbyte, keyselbyte, ctrbyte, key0byte, key1byte, m.const(0, 8) ]) m.set_next('dataout', dataoutnext) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) mb_reg_wr('aes_key1', key1) # bit-level registers def bit_reg_wr(name, reg, sz): # bitwise register write assert reg.type.bitwidth == sz reg_wr = cmddata[sz - 1:0] reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) bit_reg_wr('aes_keysel', keysel, 1) # state state_next = ila.choice('state_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ila.ite(cmddata == 1, m.const(1, 2), state), ila.ite(byte_cnt + 16 < oplen, m.const(1, 2), m.const(0, 2)) ]) m.set_next('aes_state', state_next) # these are for the uinst # byte_cnt byte_cnt_inc = byte_cnt + 16 byte_cnt_rst = ila.ite(cmddata == 1, m.const(0, 16), byte_cnt) byte_cnt_nxt = ila.choice( 'byte_cnt_nxt', [m.const(0, 16), byte_cnt_inc, byte_cnt_rst, byte_cnt]) m.set_next('byte_cnt', byte_cnt_nxt) # rd_data rdblock = ila.loadblk(xram, opaddr + byte_cnt, 16) rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) m.set_next('rd_data', rd_data_nxt) # enc_data aes_key = ila.ite(keysel == 0, key0, key1) aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) m.set_next('enc_data', enc_data_nxt) # xram write xram_w_aes = ila.storeblk(xram, opaddr + byte_cnt, enc_data) xram_nxt = ila.choice('xram_nxt', xram, xram_w_aes) m.set_next('XRAM', xram_nxt) # synthesize. timefile = open('aes-times-%s.txt' % ('en' if enable_ps else 'dis'), 'wt') sim = lambda s: AES().simulate(s) for s in synstates: st = time.clock() m.synthesize(s, sim) t_elapsed = time.clock() - st print >> timefile, s print >> timefile, '%.2f' % (t_elapsed) ast = m.get_next(s) m.exportOne(ast, 'asts/%s_%s' % (s, 'en' if enable_ps else 'dis')) m.generateSimToDir('sim')