def dmem_nxt(self): self.st_addr = self.stImm << 0x2 self.store_value = self.sregdest return ila.choice( 'dmem_nxt', [self.dmem, ila.store(self.dmem, self.st_addr, self.store_value)])
def writeDirect(self, addr, data): # FIXME fields = [ 'iram', 'p0', 'sp', 'dpl', 'dph', 'pcon' 'tcon', 'tmod', 'tl0', 'th0', 'tl1', 'th1', 'p1', 'scon', 'sbuf', 'p2', 'ie', 'p3', 'ip', 'psw', 'acc', 'b' ] r = namedtuple('MemMapState', fields) r.iram = ila.ite(addr[7:7] == 0, ila.store(self.iram, addr, data), self.iram) r.p0 = ila.ite(addr == 0x80, data, self.p0) r.sp = ila.ite(addr == 0x81, data, self.sp) r.dpl = ila.ite(addr == 0x82, data, self.dpl) r.dph = ila.ite(addr == 0x83, data, self.dph) r.pcon = ila.ite(addr == 0x87, data, self.pcon) r.tcon = ila.ite(addr == 0x88, data, self.tcon) r.tmod = ila.ite(addr == 0x89, data, self.tmod) r.tl0 = ila.ite(addr == 0x8a, data, self.tl0) r.th0 = ila.ite(addr == 0x8c, data, self.th0) r.tl1 = ila.ite(addr == 0x8b, data, self.tl1) r.th1 = ila.ite(addr == 0x8d, data, self.th1) r.p1 = ila.ite(addr == 0x90, data, self.p1) r.scon = ila.ite(addr == 0x98, data, self.scon) r.sbuf = ila.ite(addr == 0x99, data, self.sbuf) r.p2 = ila.ite(addr == 0xa0, data, self.p2) r.ie = ila.ite(addr == 0xa8, data, self.ie) r.p3 = ila.ite(addr == 0xb0, data, self.p3) r.ip = ila.ite(addr == 0xb8, data, self.ip) r.psw = ila.ite(addr == 0xd0, data, self.psw) r.acc = ila.ite(addr == 0xe0, data, self.acc) r.b = ila.ite(addr == 0xf0, data, self.b) return r
def fulltable(tab): expr = tab for i in range(1024): idx = ila.const(i, 10) val = ila.const(innerTable[i], 8) expr = ila.store(expr, idx, val) return expr
def get_mem_choices(): rs1_val = rm.indexIntoGPR(rm.rs1) rs2_val = rm.indexIntoGPR(rm.rs2) mask = ila.choice('store_mask', [bv(0xff), bv(0xffff), bv(0xffffffff)]) addr = rs1_val + rm.immS word_addr = zext(addr[31:2]) store_value = (rs2_val & mask) << (8 * zext(addr[1:0])) | ( (~(mask << (8 * zext(addr[1:0])))) & rm.mem[word_addr]) return ila.choice( "mem_nxt", [ rm.mem, # NC ila.store(rm.mem, word_addr, store_value) ])
def createRsaIla(): m = ila.Abstraction('rsa') m.enable_parameterized_synthesis = 0 # I/O interface cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response dataout = m.reg('dataout', 8) # states state = m.reg('rsa_state', 2) addr = m.reg('rsa_addr', 16) rsa_M = m.reg('rsa_M', 2048) rsa_N = m.reg('rsa_N', 2048) rsa_E = m.reg('rsa_E', 2048) rsa_buff = m.reg('rsa_buff', 2048) byte_counter = m.reg('rsa_byte_counter', 8) xram = m.mem('XRAM', 16, 8) rsa = m.fun('rsa', 2048, [2048]) # fetch m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) statebyte = ila.zero_extend(state, 8) wraddrbyte = ila.readchunk('rsa_addr', addr, 8) dataout_nxt = ila.choice('dataout', [statebyte, wraddrbyte, m.const(0, 8)]) m.set_next('dataout', dataout_nxt) # rsa_addr addr_wr = ila.writechunk('wr_addr', addr, cmddata) addr_nxt = ila.choice('nxt_addr', [addr_wr, addr]) m.set_next('rsa_addr', addr_nxt) # rsa_state state_choice = ila.choice( 'state_choice', [m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2)]) wr_nxt = ila.ite(byte_counter == 255, m.const(0, 2), m.const(3, 2)) state_nxt = ila.choice('rsa_state_nxt', [ wr_nxt, state_choice, ila.ite(cmddata == 1, m.const(1, 2), state), state ]) m.set_next('rsa_state', state_nxt) # byte_counter byte_counter_inc = byte_counter + 1 byte_counter_rst = ila.ite(cmddata == 1, m.const(0, 8), byte_counter) byte_counter_nxt = ila.choice( 'byte_counter_nxt', [byte_counter_inc, byte_counter_rst, byte_counter]) m.set_next('rsa_byte_counter', byte_counter_nxt) # buff rsa_buff_op = ila.appfun(rsa, [rsa_M]) rsa_buff_nxt = ila.choice('rsa_buff_nxt', rsa_buff_op, rsa_buff) m.set_next('rsa_buff', rsa_buff_nxt) # rsa_M m.set_next('rsa_M', rsa_M) # xram #xram_w_rsa_lit = ila.storeblk (xram, addr, rsa_buff) #xram_w_rsa_big = ila.storeblk_big (xram, addr, rsa_buff) byte_cnt_16 = ila.zero_extend(byte_counter, 16) sh = ila.zero_extend((255 - byte_counter) * 8, 2048) xram_w_rsa_data_1 = (rsa_buff >> sh)[7:0] #xram_w_rsa_data_2 = rsa_buff [255 - byte_cnt_16] xram_w_rsa_lit = ila.store(xram, addr + byte_cnt_16, xram_w_rsa_data_1) xram_nxt = ila.choice('xram_nxt', [xram_w_rsa_lit, xram]) m.set_next('XRAM', xram_nxt) return m
def WRU0 (gb): m = gb.abst READY_T = gb.READY_TRUE READY_F = gb.READY_FALSE VALID_T = gb.VALID_TRUE VALID_F = gb.VALID_FALSE DATA_SIZE = gb.DATA_SIZE decode = (gb.arg_1_TREADY == READY_F) & \ (gb.arg_0_TREADY == READY_F) & \ (gb.st_ready == READY_T) \ decode = decode & ~((gb.RAM_x == gb.RAM_x_0) & (gb.RAM_y == gb.RAM_y_0)) endPixel = (gb.RAM_x == gb.RAM_x_M) & (gb.RAM_y == gb.RAM_y_M - gb.RAM_y_1) accPixel = (gb.RAM_y < gb.RAM_size) # next state functions for output ports arg_1_TREADY_nxt = ila.ite (accPixel, READY_T, READY_F) gb.arg_1_TREADY_nxt = ila.ite (decode, arg_1_TREADY_nxt, gb.arg_1_TREADY_nxt) arg_0_TVALID_nxt = gb.arg_0_TVALID gb.arg_0_TVALID_nxt = ila.ite (decode, arg_0_TVALID_nxt, gb.arg_0_TVALID_nxt) arg_0_TDATA_nxt = gb.arg_0_TDATA gb.arg_0_TDATA_nxt = ila.ite (decode, arg_0_TDATA_nxt, gb.arg_0_TDATA_nxt) # next state functions for internal arch-states # most recent pixel cur_pix_nxt = gb.cur_pix gb.cur_pix_nxt = ila.ite (decode, cur_pix_nxt, gb.cur_pix_nxt) # previous pixel (child-state) pre_pix_nxt = gb.cur_pix gb.pre_pix_nxt = ila.ite (decode, pre_pix_nxt, gb.pre_pix_nxt) # x index (column) in the 2-D RAM RAM_x_nxt = ila.ite (gb.RAM_x == gb.RAM_x_M, gb.RAM_x_1, gb.RAM_x + gb.RAM_x_1) gb.RAM_x_nxt = ila.ite (decode, RAM_x_nxt, gb.RAM_x_nxt) # y index (row) in the 2-D RAM RAM_y_nxt = ila.ite (gb.RAM_x == gb.RAM_x_M, ila.ite (gb.RAM_y == gb.RAM_y_M, gb.RAM_y_0, gb.RAM_y + gb.RAM_y_1), gb.RAM_y) gb.RAM_y_nxt = ila.ite (decode, RAM_y_nxt, gb.RAM_y_nxt) # w index (write) in the 2-D RAM RAM_w_nxt = ila.ite (gb.RAM_x == gb.RAM_x_M, ila.ite (gb.RAM_w == gb.RAM_w_M, gb.RAM_w_0, gb.RAM_w + gb.RAM_w_1), gb.RAM_w) gb.RAM_w_nxt = ila.ite (decode, RAM_w_nxt, gb.RAM_w_nxt) # 8 488x1 bytes buffer in the 20D RAM in_byte = gb.pre_pix for i in xrange (0, gb.RAM_size): RAM_i_nxt = ila.ite (gb.RAM_w == i, ila.store (gb.RAM[i], gb.RAM_x - gb.RAM_x_1, in_byte), gb.RAM[i]) gb.RAM_nxt[i] = ila.ite (decode, RAM_i_nxt, gb.RAM_nxt[i]) # 8 1x9 bytes slice in the stencil for i in xrange (0, gb.stencil_size-1): stencil_i_nxt = gb.stencil[i] gb.stencil_nxt[i] = ila.ite (decode, stencil_i_nxt, gb.stencil_nxt[i]) def sliceSelect (start, seqs): def sliceSelectOne (modCase): idx = seqs[modCase] if modCase == gb.RAM_size - 1: return ila.load (gb.RAM[idx], gb.RAM_x - gb.RAM_x_1) else: return ila.ite (start == modCase, ila.load (gb.RAM[idx], gb.RAM_x - gb.RAM_x_1), sliceSelectOne (modCase + 1)) return sliceSelectOne (0) def genSliceSeqs (start): res = [] for i in xrange (0, gb.RAM_size): res.append ((start + i) % gb.RAM_size) return res slice_seqs = [] for i in xrange (0, gb.RAM_size): slice_seqs.append (genSliceSeqs (i)) slice_chunks = [in_byte] for i in xrange (7, -1, -1): slice_chunks.append (sliceSelect (gb.RAM_w, slice_seqs[i])) n = gb.stencil_size - 1 stencil_n_nxt = ila.ite (gb.RAM_y < gb.RAM_size, gb.stencil[n], ila.concat (slice_chunks)) gb.stencil_nxt[n] = ila.ite (decode, stencil_n_nxt, gb.stencil_nxt[n]) # stencil ready (child-state) st_ready_nxt = ila.ite (accPixel, READY_T, READY_F) gb.st_ready_nxt = ila.ite (decode, st_ready_nxt, gb.st_ready_nxt) # 9x9 stencil (child-state) proc_in_nxt = gb.proc_in gb.proc_in_nxt = ila.ite (decode, proc_in_nxt, gb.proc_in_nxt)
def U2(gb): m = gb.abst READY_T = gb.READY_TRUE READY_F = gb.READY_FALSE VALID_T = gb.VALID_TRUE VALID_F = gb.VALID_FALSE FULL_T = gb.FULL_TRUE FULL_F = gb.FULL_FALSE EMPTY_T = gb.EMPTY_TRUE EMPTY_F = gb.EMPTY_FALSE ############################ decode ################################### decode = (gb.in_stream_empty == EMPTY_F) & \ ((gb.slice_stream_full == FULL_F) | \ (gb.LB2D_proc_y < gb.LB2D_proc_size)) ############################ next state functions ##################### # arg_1_TREADY arg_1_TREADY_nxt = gb.arg_1_TREADY gb.arg_1_TREADY_nxt = ila.ite(decode, arg_1_TREADY_nxt, gb.arg_1_TREADY_nxt) # arg_0_TVALID arg_0_TVALID_nxt = gb.arg_0_TVALID gb.arg_0_TVALID_nxt = ila.ite(decode, arg_0_TVALID_nxt, gb.arg_0_TVALID_nxt) # arg_0_TDATA arg_0_TDATA_nxt = gb.arg_0_TDATA gb.arg_0_TDATA_nxt = ila.ite(decode, arg_0_TDATA_nxt, gb.arg_0_TDATA_nxt) # 1-D buffer for input data LB1D_in_nxt = gb.LB1D_in gb.LB1D_in_nxt = ila.ite(decode, LB1D_in_nxt, gb.LB1D_in_nxt) LB1D_uIn_nxt = gb.LB1D_uIn gb.LB1D_uIn_nxt = ila.ite(decode, LB1D_uIn_nxt, gb.LB1D_uIn_nxt) LB1D_buff_nxt = gb.LB1D_buff gb.LB1D_buff_nxt = ila.ite(decode, LB1D_buff_nxt, gb.LB1D_buff_nxt) # pixel position for input data LB1D_p_cnt_nxt = gb.LB1D_p_cnt gb.LB1D_p_cnt_nxt = ila.ite(decode, LB1D_p_cnt_nxt, gb.LB1D_p_cnt_nxt) # in stream full in_stream_full_nxt = FULL_F gb.in_stream_full_nxt = ila.ite(decode, in_stream_full_nxt, gb.in_stream_full_nxt) # in stream empty in_stream_empty_nxt = ila.ite(gb.in_stream_full == FULL_T, EMPTY_F, EMPTY_T) gb.in_stream_empty_nxt = ila.ite(decode, in_stream_empty_nxt, gb.in_stream_empty_nxt) # in stream buffer for i in xrange(0, gb.in_stream_size): in_stream_buff_nxt = gb.in_stream_buff[i] gb.in_stream_buff_nxt[i] = ila.ite(decode, in_stream_buff_nxt, gb.in_stream_buff_nxt[i]) # LB2D proc x idx LB2D_proc_x_nxt = ila.ite(gb.LB2D_proc_x == gb.LB2D_proc_x_M, gb.LB2D_proc_x_1, gb.LB2D_proc_x + gb.LB2D_proc_x_1) gb.LB2D_proc_x_nxt = ila.ite(decode, LB2D_proc_x_nxt, gb.LB2D_proc_x_nxt) # LB2D proc y idx LB2D_proc_y_nxt = ila.ite( gb.LB2D_proc_x == gb.LB2D_proc_x_M, ila.ite(gb.LB2D_proc_y == gb.LB2D_proc_y_M, gb.LB2D_proc_y_0, gb.LB2D_proc_y + gb.LB2D_proc_y_1), gb.LB2D_proc_y) gb.LB2D_proc_y_nxt = ila.ite(decode, LB2D_proc_y_nxt, gb.LB2D_proc_y_nxt) # LB2D proc w idx LB2D_proc_w_nxt = ila.ite( gb.LB2D_proc_x == gb.LB2D_proc_x_M, ila.ite(gb.LB2D_proc_w == gb.LB2D_proc_w_M, gb.LB2D_proc_w_0, gb.LB2D_proc_w + gb.LB2D_proc_w_1), gb.LB2D_proc_w) gb.LB2D_proc_w_nxt = ila.ite(decode, LB2D_proc_w_nxt, gb.LB2D_proc_w_nxt) # LB2D proc buffer in_byte = ila.ite(gb.in_stream_full == FULL_T, gb.in_stream_buff[gb.in_stream_size - 1], gb.in_stream_buff[0]) for i in xrange(0, gb.LB2D_proc_size): LB2D_proc_nxt = ila.ite( gb.LB2D_proc_w == i, ila.store(gb.LB2D_proc[i], gb.LB2D_proc_x - gb.LB2D_proc_x_1, in_byte), gb.LB2D_proc[i]) gb.LB2D_proc_nxt[i] = ila.ite(decode, LB2D_proc_nxt, gb.LB2D_proc_nxt[i]) # slice stream full slice_stream_full_nxt = ila.ite( gb.LB2D_proc_y < gb.LB2D_proc_size, FULL_F, ila.ite(gb.slice_stream_empty == EMPTY_T, FULL_F, FULL_T)) gb.slice_stream_full_nxt = ila.ite(decode, slice_stream_full_nxt, gb.slice_stream_full_nxt) # slice stream empty slice_stream_empty_nxt = ila.ite(gb.LB2D_proc_y < gb.LB2D_proc_size, EMPTY_T, EMPTY_F) gb.slice_stream_empty_nxt = ila.ite(decode, slice_stream_empty_nxt, gb.slice_stream_empty_nxt) # slice stream buffer def sliceSelect(start, seqs): assert (len(seqs) == gb.LB2D_proc_size) def sliceSelectOne(modCase): idx = seqs[modCase] if modCase == gb.LB2D_proc_size - 1: return ila.load(gb.LB2D_proc[idx], gb.LB2D_proc_x - gb.LB2D_proc_x_1) else: return ila.ite( start == modCase, ila.load(gb.LB2D_proc[idx], gb.LB2D_proc_x - gb.LB2D_proc_x_1), sliceSelectOne(modCase + 1)) return sliceSelectOne(0) def genSliceSeqs(start): assert (start <= gb.LB2D_proc_size) res = [] for i in xrange(0, gb.LB2D_proc_size): res.append((start + i) % gb.LB2D_proc_size) return res slice_seqs = [] for i in xrange(0, gb.LB2D_proc_size): slice_seqs.append(genSliceSeqs(i)) """ slice_seq_7 = [7, 0, 1, 2, 3, 4, 5, 6] slice_seq_6 = [6, 7, 0, 1, 2, 3, 4, 5] slice_seq_5 = [5, 6, 7, 0, 1, 2, 3, 4] slice_seq_4 = [4, 5, 6, 7, 0, 1, 2, 3] slice_seq_3 = [3, 4, 5, 6, 7, 0, 1, 2] slice_seq_2 = [2, 3, 4, 5, 6, 7, 0, 1] slice_seq_1 = [1, 2, 3, 4, 5, 6, 7, 0] slice_seq_0 = [0, 1, 2, 3, 4, 5, 6, 7] """ slice_chunks = [in_byte] for i in xrange(7, -1, -1): slice_chunks.append(sliceSelect(gb.LB2D_proc_w, slice_seqs[i])) # slice_stream_buff slice_stream_buff_0_nxt = ila.ite(gb.LB2D_proc_y < gb.LB2D_proc_size, gb.slice_stream_buff[0], ila.concat(slice_chunks)) gb.slice_stream_buff_nxt[0] = ila.ite(decode, slice_stream_buff_0_nxt, gb.slice_stream_buff_nxt[0]) for i in xrange(1, gb.slice_stream_size): slice_stream_buff_i_nxt = ila.ite(gb.LB2D_proc_y < gb.LB2D_proc_size, gb.slice_stream_buff[i], gb.slice_stream_buff[i - 1]) gb.slice_stream_buff_nxt[i] = ila.ite(decode, slice_stream_buff_i_nxt, gb.slice_stream_buff_nxt[i]) # LB2D shift x idx LB2D_shift_x_nxt = gb.LB2D_shift_x gb.LB2D_shift_x_nxt = ila.ite(decode, LB2D_shift_x_nxt, gb.LB2D_shift_x_nxt) # LB2D shift y idx LB2D_shift_y_nxt = gb.LB2D_shift_y gb.LB2D_shift_y_nxt = ila.ite(decode, LB2D_shift_y_nxt, gb.LB2D_shift_y_nxt) # LB2D shift buffer for i in xrange(0, gb.LB2D_shift_size): LB2D_shift_nxt = gb.LB2D_shift[i] gb.LB2D_shift_nxt[i] = ila.ite(decode, LB2D_shift_nxt, gb.LB2D_shift_nxt[i]) # stencil_stream_full stencil_stream_full_nxt = gb.stencil_stream_full gb.stencil_stream_full_nxt = ila.ite(decode, stencil_stream_full_nxt, gb.stencil_stream_full_nxt) # stencil_stream_empty stencil_stream_empty_nxt = gb.stencil_stream_empty gb.stencil_stream_empty_nxt = ila.ite(decode, stencil_stream_empty_nxt, gb.stencil_stream_empty_nxt) # stencil_stream_buff for i in xrange(0, gb.stencil_stream_size): stencil_stream_buff_nxt = gb.stencil_stream_buff[i] gb.stencil_stream_buff_nxt[i] = ila.ite(decode, stencil_stream_buff_nxt, gb.stencil_stream_buff_nxt[i]) # gb_p_cnt gb_p_cnt_nxt = gb.gb_p_cnt gb.gb_p_cnt_nxt = ila.ite(decode, gb_p_cnt_nxt, gb.gb_p_cnt_nxt) # gb_pp_it for i in xrange(0, gb.gb_pp_size): gb_pp_it_i_nxt = gb.gb_pp_it[i] gb.gb_pp_it_nxt[i] = ila.ite(decode, gb_pp_it_i_nxt, gb.gb_pp_it_nxt[i]) # gb_exit_it for i in xrange(0, gb.gb_exit_size): gb_exit_it_i_nxt = gb.gb_exit_it[i] gb.gb_exit_it_nxt[i] = ila.ite(decode, gb_exit_it_i_nxt, gb.gb_exit_it_nxt[i])
def main(): # ila.enablelog("Synthesizer") ila.enablelog("VerilogExport") ila.setloglevel(3,"") sys = ila.Abstraction("test") r0 = sys.reg('r0', 8) r1 = sys.reg('r1', 8) a = sys.bit('a') b = sys.bit('b') out1 = sys.reg('Rsum',8) out2 = sys.reg('Rdiff',8) out3 = sys.bit('Rbaz') out4 = sys.bit('Rshaz') out5 = sys.reg('Rdaz',8) out6 = sys.reg('Rrazmatazz',4) out7 = sys.reg('Rjazz',8) mem = sys.mem('mem1',2,4) action1 = ila.store(mem,r0[1:0],r1[3:0]) action2 = ila.store(mem,r1[1:0],r0[3:0]) action3 = ila.store( ila.store(mem,r0[7:6],r1[7:4]) ,r1[7:6],r0[7:4] ) final_action = ila.ite(a,action1, ila.ite(b,action2,action3)) sys.set_next('mem1',final_action) ex = ila.choice("function", r0+r1, r0-r1, r0+r1+1) resfoo = sys.syn_elem("sum", ex, foo) assert sys.areEqual(resfoo, r0+r1) resbar = sys.syn_elem("diff", ex, bar) assert sys.areEqual(resbar, r0-r1) a1 = ila.choice("a1", a, ~a, a&b, a|b) b1 = ila.choice("b1", [b, ~b, a&b, a|b, a^b]) a2 = ila.choice("a2", a, ~a) b2 = ila.choice("b2", b, ~b) t1 = a1 & b1 t2 = a2 & b2 y = t1 | t2 resbaz = sys.syn_elem("baz", y, baz) assert sys.areEqual(resbaz, a^b) resshaz= sys.syn_elem("shaz", y, shaz) assert sys.areEqual(resshaz, ~(a^b)) c = ila.inrange("cnst", sys.const(0x00,8), sys.const(0xff,8)) z = ila.choice("func_z", r0+r1+c, r0+r1-c) resdaz = sys.syn_elem("daz", z, daz) assert sys.areEqual(resdaz, r0 + r1 + 0x44) slc0 = ila.readslice("r0slice", r0, 4) slc1 = ila.readchunk("r1chunk", r1, 4) res = ila.choice('slice', slc0 + slc1, slc0 - slc1) resrmz = sys.syn_elem("razmatazz", res, razmatazz) assert sys.areEqual(resrmz, r0[3:0]+r1[7:4]) nr0 = ila.writeslice("wr0slice", r0, slc0) resjazz = sys.syn_elem("jazz", nr0, jazz) assert sys.areEqual(resjazz, ila.concat(r0[3:0], r0[3:0])) sys.set_next('Rsum',resfoo) sys.set_next('Rdiff',resbar) sys.set_next('Rbaz',resbaz) sys.set_next('Rshaz',resshaz) sys.set_next('Rdaz',resdaz) sys.set_next('Rrazmatazz',resrmz) sys.set_next('Rjazz',resjazz) sys.generateVerilog(VerilogFile) testVerilog(VerilogFile)
def generate_next_state(self, run_index, index): self.mem_access_list = [] instruction_book_obj = open('instruction_book', 'r') instruction_book = instruction_book_obj.readlines() current_pc = 0 next_state_finished = [] pc_target = {} current_pc = 0 for program_line in program: if len(program_line) < 2: if len(program_line) == 0: continue if program_line[0][-1] == ':': pc_target[program_line[0][:-1]] = current_pc else: current_pc += 4 current_pc = 0 for program_line in program: if len(program_line) < 2: continue opcode = program_line[0] opcode_split = re.split('\.', opcode) opcode_name = opcode_split[0] #opcode_length = int(opcode_split[-1][1:]) current_pc_in = current_pc if opcode_name == 'ld': opcode_mem_type = opcode_split[1] self.mem_access_list.append(current_pc) dest_str = program_line[1] addr_str = program_line[2] imm_pos = addr_str.find( '+') # a = 'ss+2' a.find('+'), a[:2], a[2 + 1:] imm_str = '0' if imm_pos != -1: imm_str = addr_str[(imm_pos + 1):] addr_str = addr_str[:imm_pos] dest_type = ptx_declaration[dest_str] addr_type = ptx_declaration[addr_str] dest_length = int(dest_type[2:]) addr_length = int(addr_type[2:]) dest_reg = self.model.getreg(dest_str + '_%d_%d' % (run_index, index)) addr_reg = self.model.getreg(addr_str + '_%d_%d' % (run_index, index)) if addr_length >= instruction_format.MEM_ADDRESS_BITS: address = addr_reg[(instruction_format.MEM_ADDRESS_BITS - 1):0] else: address = ila.sign_extend( addr_reg, instruction_format.MEM_ADDRESS_BITS) if dest_length > instruction_format.DMEM_BITS: if opcode_mem_type == 'param': #dest = ila.sign_extend(self.imem_list[run_index][address + ila.const(int(imm_str), instruction_format.MEM_ADDRESS_BITS)], dest_length) continue else: dest = ila.sign_extend( self.mem_list[run_index] [address + ila.const(int(imm_str), instruction_format.MEM_ADDRESS_BITS)], dest_length) elif dest_length == instruction_format.DMEM_BITS: if opcode_mem_type == 'param': #dest = self.imem_list[run_index][address + ila.const(int(imm_str), instruction_format.MEM_ADDRESS_BITS)] continue else: dest = self.mem_list[run_index][address + ila.const( int(imm_str), instruction_format.MEM_ADDRESS_BITS)] else: if opcode_mem_type == 'param': #dest_interim = self.imem_list[run_index][address + ila.const(int(imm_str), instruction_format.MEM_ADDRESS_BITS)] continue else: dest_interim = self.mem_list[run_index][ address + ila.const(int(imm_str), instruction_format.MEM_ADDRESS_BITS)] dest = dest_interim[(dest_length - 1):0] self.next_state_dict[run_index][ dest_str + '_%d_%d' % (run_index, index)] = ila.ite( self.pc_list[run_index][index] == current_pc, dest, self.next_state_dict[run_index][dest_str + '_%d_%d' % (run_index, index)]) next_state_finished.append(program_line[1]) current_pc += 4 if opcode_name == 'cvta': dest_type = ptx_declaration[program_line[1]] src_type = ptx_declaration[program_line[2]] dest_length = int(dest_type[2:]) src_length = int(src_type[2:]) dest = self.model.getreg(program_line[1] + '_%d_%d' % (run_index, index)) src = self.model.getreg(program_line[2] + '_%d_%d' % (run_index, index)) if dest_length > src_length: src = ila.sign_extend(src, dest_length) if dest_length < src_length: src = src[(dest_length - 1):0] self.next_state_dict[run_index][ program_line[1] + '_%d_%d' % (run_index, index)] = ila.ite( self.pc_list[run_index][index] == current_pc, src, self.next_state_dict[run_index][program_line[1] + '_%d_%d' % (run_index, index)]) next_state_finished.append(program_line[1]) current_pc += 4 if opcode_name == 'mov': dest_type = ptx_declaration[program_line[1]] dest_length = int(dest_type[2:]) dest = self.model.getreg(program_line[1] + '_%d_%d' % (run_index, index)) src_str = program_line[2] if src_str in ptx_declaration.keys(): src = self.model.getreg(src_str + '_%d_%d' % (run_index, index)) src_type = ptx_declaration[program_line[2]] src_length = int(src_type[2:]) if dest_length > src_length: src = ila.sign_extend(src, dest_length) if dest_length < src_length: src = src[(dest_length - 1):0] else: src = ila.const(int(src_str), dest_length) self.next_state_dict[run_index][ program_line[1] + '_%d_%d' % (run_index, index)] = ila.ite( self.pc_list[run_index][index] == current_pc, src, self.next_state_dict[run_index][program_line[1] + '_%d_%d' % (run_index, index)]) next_state_finished.append(program_line[1]) current_pc += 4 if opcode_name == 'add': dest = self.model.getreg(program_line[1] + '_%d_%d' % (run_index, index)) dest_type = ptx_declaration[program_line[1]] dest_length = int(dest_type[2:]) src0_str = program_line[2] if src0_str in ptx_declaration.keys(): src0 = self.model.getreg(src0_str + '_%d_%d' % (run_index, index)) src0_type = ptx_declaration[src0_str] src0_length = int(src0_type[2:]) if dest_length > src0_length: src0 = ila.sign_extend(src0, dest_length) elif dest_length < src0_length: src0 = src0[(dest_length - 1):0] else: src0 = ila.const(int(src0_str), dest_length) src1_str = program_line[3] if src1_str in ptx_declaration.keys(): src1 = self.model.getreg(src1_str + '_%d_%d' % (run_index, index)) src1_type = ptx_declaration[src1_str] src1_length = int(src1_type[2:]) if dest_length > src1_length: src1 = ila.sign_extend(src1, dest_length) elif dest_length < src1_length: src1 = src1[(dest_length - 1):0] else: src1 = ila.const(int(src1_str), dest_length) self.next_state_dict[run_index][ program_line[1] + '_%d_%d' % (run_index, index)] = ila.ite( self.pc_list[run_index][index] == current_pc, src0 + src1, self.next_state_dict[run_index][program_line[1] + '_%d_%d' % (run_index, index)]) next_state_finished.append(program_line[1]) current_pc += 4 if opcode_name == 'mul': dest = self.model.getreg(program_line[1] + '_%d_%d' % (run_index, index)) dest_type = ptx_declaration[program_line[1]] dest_length = int(dest_type[2:]) src0_str = program_line[2] if src0_str in ptx_declaration.keys(): src0 = self.model.getreg(src0_str + '_%d_%d' % (run_index, index)) src0_type = ptx_declaration[src0_str] src0_length = int(src0_type[2:]) if dest_length > src0_length: src0 = ila.sign_extend(src0, dest_length) elif dest_length < src0_length: src0 = src0[(dest_length - 1):0] else: src0 = ila.const(int(src0_str), dest_length) src1_str = program_line[3] if src1_str in ptx_declaration.keys(): src1 = self.model.getreg(src1_str + '_%d_%d' % (run_index, index)) src1_type = ptx_declaration[src1_str] src1_length = int(src1_type[2:]) if dest_length > src1_length: src1 = ila.sign_extend(src1, dest_length) elif dest_length < src1_length: src1 = src1[(dest_length - 1):0] else: src1 = ila.const(int(src1_str), dest_length) self.next_state_dict[run_index][ program_line[1] + '_%d_%d' % (run_index, index)] = ila.ite( self.pc_list[run_index][index] == current_pc, src0 * src1, self.next_state_dict[run_index][program_line[1] + '_%d_%d' % (run_index, index)]) next_state_finished.append(program_line[1]) current_pc += 4 if opcode_name == 'st': self.mem_access_list.append(current_pc) src_str = program_line[2] addr_str = program_line[1] imm_pos = addr_str.find( '+') # a = 'ss+2' a.find('+'), a[:2], a[2 + 1:] imm_str = '0' if imm_pos != -1: imm_str = addr[(imm_pos + 1):] addr_str = addr_str[:imm_pos] src_type = ptx_declaration[src_str] addr_type = ptx_declaration[addr_str] src_length = int(src_type[2:]) addr_length = int(addr_type[2:]) src = self.model.getreg(src_str + '_%d_%d' % (run_index, index)) addr = self.model.getreg(addr_str + '_%d_%d' % (run_index, index)) self.mem_next_state_list[run_index] = ila.ite( (self.pc_list[run_index][index] == current_pc) & (self.arb_list[run_index] == self.arb_choice_list[run_index][index]), ila.store( self.mem_list[run_index], addr[(instruction_format.MEM_ADDRESS_BITS - 1):0] + ila.const(int(imm_str), instruction_format.MEM_ADDRESS_BITS), src), self.mem_next_state_list[run_index]) current_pc += 4 if current_pc == current_pc_in: print program_line #a = self.model.reg('support', 32) #self.model.set_next('support', self.model.getreg('s3') + self.model.getreg('s1')) for reg_name in ptx_declaration.keys(): if reg_name not in next_state_finished: reg = self.model.getreg(reg_name + '_%d_%d' % (run_index, index)) self.model.set_next(reg_name + '_%d_%d' % (run_index, index), reg) self.pc_max = current_pc
def buildILA(): #--------------------------- # define universal constant #--------------------------- K = 5 NUM_MOVIE_MAX = 100 NUM_HIDDEN_MAX = 100 NUM_VISIBLE_MAX = NUM_MOVIE_MAX * K DATAMEM_ADDR_WIDTH = int( log(NUM_VISIBLE_MAX + 1) / log(2)) + 1 # 9 # it is definitely not dividable, but need to check HIDDEN_UNIT_WIDTH = int( log(NUM_HIDDEN_MAX + 1) / log(2)) + 1 # 7 # it is definitely not dividable, but need to check VISIBLE_UNIT_WIDTH = int(log(NUM_VISIBLE_MAX + 1) / log(2)) + 1 # 9 EDGEMEM_ADDR_WIDTH = int( log((NUM_VISIBLE_MAX + 1) * (NUM_HIDDEN_MAX + 1)) / log(2)) + 1 # 16 POS_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH NEG_ADDR_WIDTH = EDGEMEM_ADDR_WIDTH PREDICT_RESULT_WIDTH = int(log(NUM_MOVIE_MAX) / log(2)) + 1 # 7 KWIDTH = int(log(K) / log(2)) + 1 # 3 #--------------------------- # Model #--------------------------- rbm = ila.Abstraction('RBM') conf_done = rbm.inp('conf_done', 1) conf_num_hidden = rbm.inp('conf_num_hidden', 32) conf_num_visible = rbm.inp('conf_num_visible', 32) conf_num_users = rbm.inp('conf_num_users', 32) conf_num_loops = rbm.inp('conf_num_loops', 32) conf_num_testusers = rbm.inp('conf_num_testusers', 32) conf_num_movies = rbm.inp('conf_num_movies', 32) rst = rbm.inp('rst', 1) init_done = rbm.reg('init_done', 1) done = rbm.reg('done', 1) num_hidden = rbm.reg('num_hidden', 16) num_visible = rbm.reg('num_visible', 16) num_users = rbm.reg('num_users', 16) num_loops = rbm.reg('num_loops', 16) num_testusers = rbm.reg('num_testusers', 16) num_movies = rbm.reg('num_movies', 16) # DMA output rd_index = rbm.reg('rd_index', 32) rd_length = rbm.reg('rd_length', 32) rd_request = rbm.reg('rd_request', 1) rd_grant = rbm.inp('rd_grant', 1) data_in = rbm.inp('data_in', 32) # rd_cnt = rbm.reg('rd_cnt', 16) # i ureg #585 # DMA input wr_grant = rbm.inp('wr_grant', 1) wr_request = rbm.reg('wr_request', 1) wr_index = rbm.reg('wr_index', 32) wr_length = rbm.reg('wr_length', 32) data_out = rbm.reg('data_out', 32) # wr_cnt = rbm.reg('wr_cnt', 16) : u reg data = rbm.mem('data', DATAMEM_ADDR_WIDTH, 8) rbm.mem('predict_result', PREDICT_RESULT_WIDTH, 8) #------------------------------------- # Decoding Expressions #------------------------------------- rstInst = rst == 1 confDoneInst = (rst == 0) & (init_done == 0) & (conf_done == 1) rdGrantInst = (rd_request == 1) & (rd_grant == 1) wrGrantInst = (wr_request == 1) & (wr_grant == 1) decodeExpr = [rstInst, confDoneInst, rdGrantInst, wrGrantInst] #------------------------------------- # AUX Functions #------------------------------------- def const(v, w): return rbm.const(v, w) b0 = const(0, 1) b1 = const(1, 1) h0_8 = const(0, 8) h1_8 = const(1, 8) h0_4 = const(0, 4) h1_4 = const(1, 4) h2_4 = const(2, 4) h3_4 = const(3, 4) h4_4 = const(4, 4) h0_16 = const(0, 16) h1_16 = const(1, 16) h0_32 = const(0, 32) h0_64 = const(0, 64) #------------------------------------- # Init conditions #------------------------------------- rbm.set_init('init_done', b0) rbm.set_init('done', b0) rbm.set_init('num_hidden', h0_16) rbm.set_init('num_visible', h0_16) rbm.set_init('num_users', h0_16) rbm.set_init('num_loops', h0_16) rbm.set_init('num_testusers', h0_16) rbm.set_init('num_movies', h0_16) #------------------------------------- # Config #------------------------------------- # this means, once configured, unless reset, it cannot be reconfigured init_done_nxt = ila.ite(rstInst, b0, ila.ite(confDoneInst, b1, init_done)) num_hidden_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_hidden[15:0], num_hidden)) num_visible_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_visible[15:0], num_visible)) num_users_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_users[15:0], num_users)) num_loops_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_loops[15:0], num_loops)) num_testusers_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_testusers[15:0], num_testusers)) num_movies_nxt = ila.ite( rstInst, h0_16, ila.ite(confDoneInst, conf_num_movies[15:0], num_movies)) rbm.set_next('init_done', init_done_nxt) rbm.set_next('num_hidden', num_hidden_nxt) rbm.set_next('num_visible', num_visible_nxt) rbm.set_next('num_users', num_users_nxt) rbm.set_next('num_loops', num_loops_nxt) rbm.set_next('num_testusers', num_testusers_nxt) rbm.set_next('num_movies', num_movies_nxt) # INST-level w/r complete rbm_rd_complete = rbm.reg('rd_complete', 1) rbm_wr_complete = rbm.reg('wr_complete', 1) rbm.set_init('rd_complete', b0) rbm.set_init('wr_complete', b0) #------------------------------------ # Compute UABS #------------------------------------ uabs = rbm.add_microabstraction('compute', (init_done == 1) & (done == 0)) index = uabs.reg('index', 16) loop_count = uabs.reg('loop_count', 16) pc = uabs.reg('upc', 4) edges_mem = uabs.mem('edges', EDGEMEM_ADDR_WIDTH, 8) nlp = uabs.getreg('num_loops') nm = ila.zero_extend(uabs.getreg('num_movies'), 32) nu = uabs.getreg('num_users') ntu = uabs.getreg('num_testusers') out_rd_request = uabs.getreg('rd_request') out_rd_complete = uabs.getreg('rd_complete') out_rd_length = uabs.getreg('rd_length') out_rd_index = uabs.getreg('rd_index') train_input_done = uabs.reg('train_input_done', 1) predict_input_done = uabs.reg('predict_input_done', 1) uabs.set_init('upc', const(0, 4)) uabs.set_init('index', h0_16) uabs.set_init('loop_count', h0_16) uabs.set_init('train_input_done', b0) uabs.set_init('predict_input_done', b0) uabs.set_init('rd_complete', b0) ### computation micro_instructions StartRead = (pc == 0) WaitReadComplete = (pc == 1) & (out_rd_complete == 0) DecideTrainOrPredict = (pc == 1) & (out_rd_complete == 1) StartTrain = (pc == 2) & (train_input_done == 1) StartPredict = (pc == 2) & (predict_input_done == 1) Finish = (pc == 3) StartReadState = const(0, 4) WaitReadCompleteState = const(1, 4) StartTrainOrPredict = const(2, 4) FinishState = const(3, 4) decodeExpr = [ StartRead, WaitReadComplete, DecideTrainOrPredict, StartTrain, StartPredict, Finish ] out_rd_request_nxt = ila.ite(StartRead, b1, out_rd_request) out_rd_length_nxt = ila.ite(StartRead, 5 * nm, out_rd_length) out_rd_index_nxt = ila.ite(StartRead, ila.zero_extend(index, 32), out_rd_index) out_rd_complete_nxt = ila.ite( StartRead, b0, ila.ite(DecideTrainOrPredict, b0, out_rd_complete)) train_input_done_nxt = ila.ite(DecideTrainOrPredict, ila.ite(loop_count < nlp, b1, b0), train_input_done) predict_input_done_nxt = ila.ite(DecideTrainOrPredict, ila.ite(loop_count == nlp, b1, b0), predict_input_done) pc_nxt = ila.ite( StartRead, WaitReadCompleteState, ila.ite( WaitReadComplete, pc, ila.ite( DecideTrainOrPredict, StartTrainOrPredict, ila.ite( StartTrain, StartTrainOrPredict, # StartReadState, # actually should be updated by u2inst ila.ite( StartPredict, StartTrainOrPredict, # StartReadState, # actually should be updated by u2inst ila.ite( Finish, FinishState, pc # should never happen! )))))) # should be updated by u2inst index_nxt_dummy = ila.ite( StartTrain | StartPredict, ila.ite( (index == nu - 1) & (loop_count != nlp), h0_16, ila.ite( (index == ntu - 1) & (loop_count == nlp), index, # And it is not correct index + 1)), index) # not in use loop_count_nxt_dummy = ila.ite( StartTrain | StartPredict, ila.ite((index == nu - 1) & (loop_count != nlp), loop_count + 1, loop_count), loop_count) uabs.set_next('rd_request', out_rd_request_nxt) uabs.set_next('rd_length', out_rd_length_nxt) uabs.set_next('rd_index', out_rd_index_nxt) uabs.set_next('rd_complete', out_rd_complete_nxt) uabs.set_next('train_input_done', train_input_done_nxt) uabs.set_next('predict_input_done', predict_input_done_nxt) uabs.set_next('upc', pc_nxt) uabs.set_next('index', index) uabs.set_next('loop_count', loop_count) # this has to be updated by micro_inst # read_request is turned off by loaduabs # predict_input_done, train_input_done is turned off by uabs_train/predict #------------------------------------ # Load UABS #------------------------------------ # RBM interface # high-level interface rd_granted = rbm.reg( 'rd_granted', 1 ) # this is only used for maintaining the validity of load UABS, no other should use data_nxt = ila.ite(rdGrantInst, ila.store(data, const(0, DATAMEM_ADDR_WIDTH), data_in[7:0]), data) # data # rd_granted_nxt = ila.ite(rdGrantInst, b1, rd_granted) rbm.set_next('rd_granted', rd_granted_nxt) rbm.set_next('data', data_nxt) # one change is to move these into lower abstraction DMAload = rbm.add_microabstraction( 'DMAload', (rd_granted == 1)) # this is sub-instruction w_cnt = DMAload.reg('i', 16) dma_rd_request = DMAload.getreg('rd_request') dma_rd_length = DMAload.getreg('rd_length') dma_rd_index = DMAload.getreg('rd_index') state_update_data = DMAload.getmem('data') state_update_rd_request = dma_rd_request self_update_rd_granted = DMAload.getreg('rd_granted') more_read_in = w_cnt < dma_rd_length[15:0] last_cycle = w_cnt == dma_rd_length[15:0] DMAload.set_init('i', h1_16) # h0_16 ) DMAload.set_next('i', ila.ite(more_read_in, w_cnt + 1, w_cnt)) DMAload.set_next('rd_request', b0) # reset to 0 immediately DMAload.set_next('rd_granted', ila.ite(more_read_in, self_update_rd_granted, b0)) DMAload.set_next('rd_complete', ila.ite(more_read_in, b0, b1)) DMAload.set_next( 'data', ila.ite( more_read_in, ila.store(state_update_data, w_cnt[DATAMEM_ADDR_WIDTH - 1:0], data_in[7:0]), ila.ite( last_cycle, ila.store(state_update_data, dma_rd_length[DATAMEM_ADDR_WIDTH - 1:0], h1_8), state_update_data))) #------------------------------------ # Train UUABS #------------------------------------ TrainUabs = uabs.add_microabstraction('train', train_input_done == 1) sigmoid_func = TrainUabs.fun('sigmoid', 64, [16]) # DATA_sum_, 01_D rand_func = TrainUabs.fun('rand', 64, []) # generate random number to_int_exp = TrainUabs.fun('to_int_exp', 32, [16]) # divide_func = TrainUabs.fun( 'divide', 64, [32, 64]) # dp:32_32 / sum_of_pow2 64_64 = 64_1 hidden_unit = TrainUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1) visible_unit = TrainUabs.mem('visible_unit', VISIBLE_UNIT_WIDTH, 1) visibleEnergy = TrainUabs.mem('visibleEnergies', KWIDTH, 16) pow2 = TrainUabs.mem('pow2', KWIDTH, 32) pos = TrainUabs.mem('pos', POS_ADDR_WIDTH, 1) #neg = TrainUabs.mem('neg', NEG_ADDR_WIDTH, 1 ) # not needed train_sum = TrainUabs.reg('train_sum', 16) train_max = TrainUabs.reg('train_max', 16) sumOfpow2 = TrainUabs.reg('sumOfpow2', 64) jstate = TrainUabs.reg('jstate', 16) inner_loop_pc = TrainUabs.reg('per_v_pc', 4) train_pc = TrainUabs.reg('train_upc', 4) # Re-evaluate v_cnt = TrainUabs.reg('train_v_cnt', 16) h_cnt = TrainUabs.reg('train_h_cnt', 16) train_input = TrainUabs.getmem('data') edges_input = TrainUabs.getmem('edges') nv = TrainUabs.getreg('num_visible') nh = TrainUabs.getreg('num_hidden') nu = TrainUabs.getreg('num_users') ntu = TrainUabs.getreg('num_testusers') nlp = TrainUabs.getreg('num_loops') SumEdge = train_pc == 0 SumEdgeState = const(0, 4) SumHidden = train_pc == 1 SumHiddenState = const(1, 4) StorePos = train_pc == 3 StorePosState = const(3, 4) EdgeUpdate = train_pc == 2 EdgeUpdateState = const(2, 4) TrainUabs.decode_exprs = [SumEdge, SumHidden, EdgeUpdate] #Begin v_cnt_init = const(0, 16) h_cnt_init = const(0, 16) pc_init = const(0, 4) #SumEdge: s0 edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt train_sum_s0_nxt = ila.ite(v_cnt == 0, const(0, 16), train_sum) + ila.ite( ila.load(train_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), const(0, 16)) v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1) h_cnt_s0_nxt = ila.ite((v_cnt == nv), ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt) # Here ^^^ is for transiting to next state hidden_update_s0_0 = ila.ite( ila.appfun(rand_func) < ila.appfun(sigmoid_func, train_sum_s0_nxt), b1, b0) hidden_update_s0_1 = ila.ite( v_cnt == nv, ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0], hidden_update_s0_0), hidden_unit) hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), ila.store(hidden_update_s0_1, nh[HIDDEN_UNIT_WIDTH - 1:0], b1), hidden_update_s0_1) train_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), SumHiddenState, SumEdgeState) # Just like init jstate_s0_nxt = h0_16 inner_loop_pc_s0_nxt = h0_4 # add prefix : # train_sum_nxt = ila.ite(SumEdge, train_sum_s0_nxt, ila.ite(SumHidden, ... ) ) # SumHiddenK0-K4 : s1-s5 # pc:1 per_v_pc : 0 1 2 3 LastH = h_cnt == nh LastJ = jstate == K - 1 LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX) SumHiddenL0 = SumHidden & (inner_loop_pc == 0) SumHiddenL1 = SumHidden & (inner_loop_pc == 1) SumHiddenL2 = SumHidden & (inner_loop_pc == 2) SumHiddenL3 = SumHidden & (inner_loop_pc == 3) h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1) jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1), jstate) inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc) jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1) inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc) jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc) jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt inner_loop_pc_s1_s5_L3_nxt = ila.ite( LastJ, ila.ite(LastV, h0_4, h0_4), # will choose to go back or not inner_loop_pc) def nextCondition(l0, l1, l2, l3, default): return ila.ite( SumHiddenL0, l0, ila.ite( SumHiddenL1, l1, ila.ite(SumHiddenL2, l2, ila.ite(SumHiddenL3, l3, default)))) h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt, h_cnt) v_cnt_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ, ila.ite(LastV, h0_16, v_cnt + K), v_cnt) jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt, jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt, jstate) inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt, inner_loop_pc_s1_s5_L1_nxt, inner_loop_pc_s1_s5_L2_nxt, inner_loop_pc_s1_s5_L3_nxt, inner_loop_pc) train_pc_s1_s5_nxt = ila.ite(SumHiddenL3 & LastJ & LastV, StorePosState, SumHiddenState) # L0 train_sum_s1_s5_L0_nxt = ila.ite(h_cnt == 0, h0_16, train_sum) + ila.ite( ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), h0_16) _train_max_origin_L0 = ila.ite( jstate == 0, fpconst(-500, FPsum).ast, train_max) # make sure the first time we are comparing with init sum train_max_s1_s5_L0_nxt = ila.ite( LastH, ila.ite(ila.sgt(train_sum_s1_s5_L0_nxt, _train_max_origin_L0), train_sum_s1_s5_L0_nxt, _train_max_origin_L0), train_max) visibleEnergy_s1_s5_L0_nxt = ila.ite( LastH, ila.store(visibleEnergy, jstate[KWIDTH - 1:0], train_sum_s1_s5_L0_nxt), visibleEnergy) # L1 # sum3: 64_64 -> dp: 32_32 _31_sum = fpconst(31, FPsum).ast train_max_s1_s5_L1_nxt = ila.ite(jstate == 0, train_max - _31_sum, train_max) _st_val_L1 = ila.load(visibleEnergy, jstate[KWIDTH - 1:0]) - train_max_s1_s5_L1_nxt visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0], _st_val_L1) # L2 _pow2_new_val = ila.appfun(to_int_exp, ila.load(visibleEnergy, jstate[KWIDTH - 1:0])) _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3) sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64, sumOfpow2) + _pow2_new_convert pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val) # L3 _probs = ila.appfun(divide_func, [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2]) _RAND = ila.appfun(rand_func) _visible_unit_new_val = ila.ite(_probs > _RAND, b1, b0) _vu_idx = v_cnt + jstate _visible_unit_s1_s5_L3_1 = ila.store(visible_unit, _vu_idx[VISIBLE_UNIT_WIDTH - 1:0], _visible_unit_new_val) visible_unit_s1_s5_L3_nxt = ila.ite( LastJ & LastV, ila.store(_visible_unit_s1_s5_L3_1, nv[VISIBLE_UNIT_WIDTH - 1:0], b1), _visible_unit_s1_s5_L3_1) # when exit visible unit should be made to store 1 at nv train_sum_s1_s5_nxt = nextCondition(train_sum_s1_s5_L0_nxt, train_sum, train_sum, train_sum, train_sum) train_max_s1_s5_nxt = nextCondition(train_max_s1_s5_L0_nxt, train_max_s1_s5_L1_nxt, train_max, train_max, train_max) visible_unit_s1_s5_nxt = nextCondition(visible_unit, visible_unit, visible_unit, visible_unit_s1_s5_L3_nxt, visible_unit) visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt, visibleEnergy_s1_s5_L1_nxt, visibleEnergy, visibleEnergy, visibleEnergy) sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2, sumOfpow2_s1_s5_L2_nxt, sumOfpow2, sumOfpow2) pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2) # before s6: store pos h_cnt_sp_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1) v_cnt_sp_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, h0_16, v_cnt + 1), v_cnt) _data_load = ila.load(train_input, v_cnt[VISIBLE_UNIT_WIDTH - 1:0]) _pos_sp_cond = (_data_load != 2) _pos_sp_val = ila.ite(_data_load != 0, b1, b0) & ila.load( hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) _pos_st_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt pos_sp_nxt = ila.store(pos, _pos_st_addr, _pos_sp_val) train_pc_sp_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState, StorePosState) # update edge : s6 h_cnt_s6_nxt = ila.ite(h_cnt == nh, h0_16, h_cnt + 1) v_cnt_s6_nxt = ila.ite(h_cnt == nh, ila.ite(v_cnt == nv, v_cnt, v_cnt + 1), v_cnt) _pos_ld_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt train_pos = ila.load(pos, _pos_ld_addr) != 0 train_neg = (ila.load( hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) != 0) & (ila.load( visible_unit, v_cnt[VISIBLE_UNIT_WIDTH - 1:0]) != 0) edge_original = ila.load(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt) edge_new = ila.ite((train_pos) & (~train_neg), edge_original + fpconst(LEARN_RATE, FPedge).ast, ila.ite((~train_pos) & (train_neg), edge_original - fpconst(LEARN_RATE, FPedge).ast, edge_original)) edge_s6_nxt = ila.store(edges_mem, (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt, edge_new) train_pc_s6_nxt = ila.ite((h_cnt == nh) & (v_cnt == nv), EdgeUpdateState, EdgeUpdateState) # no need to jump back itself, because the flag: train_input_done is turned back to zero # don't forget to set back signals in Uabs () train_done = TrainUabs.getreg('train_input_done') train_uabs_index = TrainUabs.getreg('index') train_uabs_loop_count = TrainUabs.getreg('loop_count') train_uabs_upc = TrainUabs.getreg('upc') # add prefix s6 !!! s6_complete = (h_cnt == nh) & (v_cnt == nv) index_nxt_s6_nxt = ila.ite( s6_complete, ila.ite((train_uabs_index == nu - 1) & (train_uabs_loop_count != nlp), h0_16, train_uabs_index + 1), train_uabs_index) # assert (train_uabs_index == ntu - 1) & (train_uabs_loop_count == nlp) should never happen loop_count_s6_nxt = ila.ite( s6_complete & (train_uabs_index == nu - 1) & (train_uabs_loop_count != nlp), train_uabs_loop_count + 1, train_uabs_loop_count) upc_s6_nxt = ila.ite(s6_complete, StartReadState, train_uabs_upc) train_input_done_s6_nxt_nxt = ila.ite(s6_complete, b0, train_done) # data -> hidden_unit -> visible_unit -> edge # data -> edge # add def TrainNext(e1, e2, e3, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(EdgeUpdate, e3, default))) def TrainNextSP(e1, e2, e3, e4, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(StorePos, e3, ila.ite(EdgeUpdate, e4, default)))) def TrainChoice5(name, e1, e2, e3, default): return ila.choice(name, e1, e2, e3, default) def TrainChoice4(name, e1, e2, default): return ila.choice(name, e1, e2, default) def TrainChoice3(name, e1, default): return ila.choice(name, e1, default) TrainUabs.set_init('train_upc', pc_init) TrainUabs.set_init('train_v_cnt', v_cnt_init) TrainUabs.set_init('train_h_cnt', h_cnt_init) TrainUabs.set_next( 'jstate', TrainNext(jstate_s0_nxt, jstate_s1_s5_nxt, jstate, jstate)) TrainUabs.set_next( 'train_sum', TrainNext(train_sum_s0_nxt, train_sum_s1_s5_nxt, train_sum, train_sum)) TrainUabs.set_next( 'train_v_cnt', TrainNextSP(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt_s6_nxt, v_cnt)) TrainUabs.set_next( 'train_h_cnt', TrainNextSP(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt_sp_nxt, h_cnt_s6_nxt, h_cnt)) TrainUabs.set_next( 'train_upc', TrainNextSP(train_pc_s0_nxt, train_pc_s1_s5_nxt, train_pc_sp_nxt, train_pc_s6_nxt, train_pc)) TrainUabs.set_next( 'train_max', TrainNext(train_max, train_max_s1_s5_nxt, train_max, train_max)) TrainUabs.set_next( 'hidden_unit', TrainNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit)) TrainUabs.set_next( 'visible_unit', TrainNext(visible_unit, visible_unit_s1_s5_nxt, visible_unit, visible_unit)) TrainUabs.set_next('edges', TrainNext(edges_mem, edges_mem, edge_s6_nxt, edges_mem)) TrainUabs.set_next( 'index', TrainNext(train_uabs_index, train_uabs_index, index_nxt_s6_nxt, train_uabs_index)) TrainUabs.set_next( 'loop_count', TrainNext(train_uabs_loop_count, train_uabs_loop_count, loop_count_s6_nxt, train_uabs_loop_count)) TrainUabs.set_next( 'upc', TrainNext(train_uabs_upc, train_uabs_upc, upc_s6_nxt, train_uabs_upc)) TrainUabs.set_next( 'train_input_done', TrainNext(train_done, train_done, train_input_done_s6_nxt_nxt, train_done)) # newly added TrainUabs.set_next( 'visibleEnergies', TrainNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy, visibleEnergy)) TrainUabs.set_next( 'sumOfpow2', TrainNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2)) TrainUabs.set_next('pow2', TrainNext(pow2, pow2_s1_s5_nxt, pow2, pow2)) TrainUabs.set_next('pos', ila.ite(StorePos, pos_sp_nxt, pos)) TrainUabs.set_next( 'per_v_pc', TrainNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt, inner_loop_pc, inner_loop_pc)) #------------------------------------ # Predict UUABS #------------------------------------ # data -> predict_result PredictUabs = uabs.add_microabstraction('predict', predict_input_done == 1) sigmoid_func = PredictUabs.fun('sigmoid', 64, [16]) # DATA_sum_, 01_D rand_func = PredictUabs.fun('rand', 64, []) # generate random number to_int_exp = PredictUabs.fun('to_int_exp', 32, [16]) # round_func = PredictUabs.fun('round', 8, [32]) # 05_D -> u8 divide_func = PredictUabs.fun( 'divide', 64, [32, 64]) # dp:32_32 / sum_of_pow2 64_64 = 64_1 hidden_unit = PredictUabs.mem('hidden_unit', HIDDEN_UNIT_WIDTH, 1) visibleEnergy = PredictUabs.mem('visibleEnergies', KWIDTH, 16) predict_result = PredictUabs.getmem('predict_result') predict_sum = PredictUabs.reg('predict_sum', 16) predict_max = PredictUabs.reg('predict_max', 16) sumOfpow2 = PredictUabs.reg('sumOfpow2', 64) pow2 = PredictUabs.mem('pow2', KWIDTH, 32) predict_vector = PredictUabs.mem('predict_vector', VISIBLE_UNIT_WIDTH, 1) inner_loop_pc = PredictUabs.reg('per_v_pc', 4) count = PredictUabs.reg('count', 8) jstate = PredictUabs.reg('jstate', 16) expectation = PredictUabs.reg('expectation', 32) prediction = PredictUabs.reg('prediction', 8) predict_pc = PredictUabs.reg('predict_upc', 4) # Re-evaluate v_cnt = PredictUabs.reg('predict_v_cnt', 16) h_cnt = PredictUabs.reg('predict_h_cnt', 16) predict_input = PredictUabs.getmem('data') edges_input = PredictUabs.getmem('edges') nv = PredictUabs.getreg('num_visible') nh = PredictUabs.getreg('num_hidden') nu = PredictUabs.getreg('num_users') ntu = PredictUabs.getreg('num_testusers') nlp = PredictUabs.getreg('num_loops') SumEdge = predict_pc == 0 SumEdgeState = const(0, 4) SumHidden = predict_pc == 1 SumHiddenState = const(1, 4) GenResult = predict_pc == 3 GenResultState = const(3, 4) WaitForWrite = predict_pc == 2 WaitForWriteState = const(2, 4) PredictUabs.decode_exprs = [SumEdge, SumHidden, WaitForWrite] #Begin v_cnt_init = const(0, 16) h_cnt_init = const(0, 16) pc_init = const(0, 4) #SumEdge: s0 edge_load_addr = (NUM_HIDDEN_MAX + 1) * v_cnt + h_cnt predict_sum_s0_nxt = ila.ite(v_cnt == 0, const( 0, 16), predict_sum) + ila.ite( ila.load(predict_input, v_cnt[DATAMEM_ADDR_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), const(0, 16)) v_cnt_s0_nxt = ila.ite(v_cnt == nv, h0_16, v_cnt + 1) h_cnt_s0_nxt = ila.ite((v_cnt == nv), ila.ite(h_cnt == nh - 1, h0_16, h_cnt + 1), h_cnt) # Here ^^^ is for transiting to next state hidden_update_s0_0 = ila.ite( fpconst(0.5, FP01_D).ast < ila.appfun(sigmoid_func, predict_sum_s0_nxt), b1, b0) hidden_update_s0_1 = ila.ite( v_cnt == nv, ila.store(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0], hidden_update_s0_0), hidden_unit) hidden_update_s0_2 = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), ila.store(hidden_update_s0_1, nh[HIDDEN_UNIT_WIDTH - 1:0], b1), hidden_update_s0_1) hidden_update_s0_next = hidden_update_s0_2 predict_pc_s0_nxt = ila.ite((v_cnt == nv) & (h_cnt == nh - 1), SumHiddenState, SumEdgeState) jstate_s0_nxt = h0_16 count_s0_nxt = ila.const(0, 8) inner_loop_pc_s0_nxt = h0_4 # add prefix : # predict_sum_nxt = ila.ite(SumEdge, predict_sum_s0_nxt, ila.ite(SumHidden, ... ) ) #----------------------------- # SumHiddensK0-K4 : s1-s5 # #----------------------------- LastH = h_cnt == nh LastJ = jstate == K - 1 LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX) SumHiddenL0 = SumHidden & (inner_loop_pc == 0) SumHiddenL1 = SumHidden & (inner_loop_pc == 1) SumHiddenL2 = SumHidden & (inner_loop_pc == 2) SumHiddenL3 = SumHidden & (inner_loop_pc == 3) SumHiddenL4 = SumHidden & (inner_loop_pc == 4) h_cnt_s1_s5_L0_nxt = ila.ite(LastH, h0_16, h_cnt + 1) jstate_s1_s5_L0_nxt = ila.ite(LastH, ila.ite(LastJ, h0_16, jstate + 1), jstate) inner_loop_pc_s1_s5_L0_nxt = ila.ite(LastJ & LastH, h1_4, inner_loop_pc) jstate_s1_s5_L1_nxt = ila.ite(LastJ, h0_16, jstate + 1) inner_loop_pc_s1_s5_L1_nxt = ila.ite(LastJ, h2_4, inner_loop_pc) jstate_s1_s5_L2_nxt = jstate_s1_s5_L1_nxt inner_loop_pc_s1_s5_L2_nxt = ila.ite(LastJ, h3_4, inner_loop_pc) jstate_s1_s5_L3_nxt = jstate_s1_s5_L2_nxt inner_loop_pc_s1_s5_L3_nxt = ila.ite(LastJ, h4_4, inner_loop_pc) jstate_s1_s5_L4_nxt = jstate_s1_s5_L3_nxt inner_loop_pc_s1_s5_L4_nxt = ila.ite( LastJ, ila.ite(LastV, h0_4, h0_4), # will choose to go back or not inner_loop_pc) def nextCondition(l0, l1, l2, l3, l4, default): return ila.ite( SumHiddenL0, l0, ila.ite( SumHiddenL1, l1, ila.ite( SumHiddenL2, l2, ila.ite(SumHiddenL3, l3, ila.ite(SumHiddenL4, l4, default))))) h_cnt_s1_s5_nxt = nextCondition(h_cnt_s1_s5_L0_nxt, h_cnt, h_cnt, h_cnt, h_cnt, h_cnt) v_cnt_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ, ila.ite(LastV, h0_16, v_cnt + K), v_cnt) jstate_s1_s5_nxt = nextCondition(jstate_s1_s5_L0_nxt, jstate_s1_s5_L1_nxt, jstate_s1_s5_L2_nxt, jstate_s1_s5_L3_nxt, jstate_s1_s5_L4_nxt, jstate) inner_loop_pc_s1_s5_nxt = nextCondition(inner_loop_pc_s1_s5_L0_nxt, inner_loop_pc_s1_s5_L1_nxt, inner_loop_pc_s1_s5_L2_nxt, inner_loop_pc_s1_s5_L3_nxt, inner_loop_pc_s1_s5_L4_nxt, inner_loop_pc) predict_pc_s1_s5_nxt = ila.ite(SumHiddenL4 & LastJ & LastV, GenResultState, SumHiddenState) # L0 predict_sum_s1_s5_L0_nxt = ila.ite( h_cnt == 0, h0_16, predict_sum) + ila.ite( ila.load(hidden_unit, h_cnt[HIDDEN_UNIT_WIDTH - 1:0]) == 1, fpconvert(ila.load(edges_input, edge_load_addr), FPedge, FPsum), h0_16) _predict_max_origin_L0 = ila.ite( jstate == 0, fpconst(-500, FPsum).ast, predict_max) # make sure the first time we are comparing with init sum predict_max_s1_s5_L0_nxt = ila.ite( LastH, ila.ite(ila.sgt(predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0), predict_sum_s1_s5_L0_nxt, _predict_max_origin_L0), predict_max) visibleEnergy_s1_s5_L0_nxt = ila.ite( LastH, ila.store(visibleEnergy, jstate[KWIDTH - 1:0], predict_sum_s1_s5_L0_nxt), visibleEnergy) # L1 # sum3: 64_64 -> dp: 32_32 _31_sum = fpconst(31, FPsum).ast predict_max_s1_s5_L1_nxt = ila.ite(jstate == 0, predict_max - _31_sum, predict_max) _st_val_L1 = ila.load(visibleEnergy, jstate[KWIDTH - 1:0]) - predict_max_s1_s5_L1_nxt visibleEnergy_s1_s5_L1_nxt = ila.store(visibleEnergy, jstate[KWIDTH - 1:0], _st_val_L1) # L2 _pow2_new_val = ila.appfun(to_int_exp, ila.load(visibleEnergy, jstate[KWIDTH - 1:0])) _pow2_new_convert = fpconvert(_pow2_new_val, FPpow, FPsum3) sumOfpow2_s1_s5_L2_nxt = ila.ite(jstate == 0, h0_64, sumOfpow2) + _pow2_new_convert pow2_s1_s5_L2_nxt = ila.store(pow2, jstate[KWIDTH - 1:0], _pow2_new_val) # L3 _probs = ila.appfun(divide_func, [ila.load(pow2, jstate[KWIDTH - 1:0]), sumOfpow2]) _mul = fixpoint(_probs, FP01_D) * fixpoint(jstate, FPu16) expectation_s1_s5_L3_nxt = ila.ite(jstate == 0, h0_32, expectation) + _mul.toFormat(FP05_D) # L4 _prediction = ila.zero_extend(ila.appfun(round_func, [expectation]), 16) _pv_val = ila.ite(jstate == _prediction, b1, b0) _pv_idx = v_cnt + jstate _first_store = ila.store(predict_vector, _pv_idx[VISIBLE_UNIT_WIDTH - 1:0], _pv_val) predict_vector_s1_s5_L4_nxt = ila.ite( SumHiddenL4 & LastV & LastJ, ila.store(_first_store, nv[VISIBLE_UNIT_WIDTH - 1:0], b1), _first_store) predict_sum_s1_s5_nxt = nextCondition(predict_sum_s1_s5_L0_nxt, predict_sum, predict_sum, predict_sum, predict_sum, predict_sum) predict_max_s1_s5_nxt = nextCondition(predict_max_s1_s5_L0_nxt, predict_max_s1_s5_L1_nxt, predict_max, predict_max, predict_max, predict_max) visibleEnergy_s1_s5_nxt = nextCondition(visibleEnergy_s1_s5_L0_nxt, visibleEnergy_s1_s5_L1_nxt, visibleEnergy, visibleEnergy, visibleEnergy, visibleEnergy) sumOfpow2_s1_s5_nxt = nextCondition(sumOfpow2, sumOfpow2, sumOfpow2_s1_s5_L2_nxt, sumOfpow2, sumOfpow2, sumOfpow2) pow2_s1_s5_nxt = nextCondition(pow2, pow2, pow2_s1_s5_L2_nxt, pow2, pow2, pow2) expectation_s1_s5_nxt = ila.ite(SumHiddenL3, expectation_s1_s5_L3_nxt, expectation) predict_vector_s1_s5_nxt = ila.ite(SumHiddenL4, predict_vector_s1_s5_L4_nxt, predict_vector) count_s1_s5_nxt = ila.ite(SumHiddenL4 & LastV & LastJ, h0_8, count) # before s6: store pos LastV = (v_cnt + K == nv) | (v_cnt + K >= NUM_VISIBLE_MAX) LastJ = jstate == K - 1 v_cnt_sp_nxt = ila.ite(LastV, v_cnt + K, v_cnt + K) jstate_sp_nxt = ila.ite(LastJ, h0_16, jstate + 1) _prediction_old = ila.ite(jstate == 0, h0_8, prediction) _pv_idx = v_cnt + jstate _predict_result_sp_val = ila.load(predict_vector, _pv_idx[VISIBLE_UNIT_WIDTH - 1:0]) prediction_sp_nxt = ila.ite(_predict_result_sp_val == 1, (jstate + 1)[7:0], _prediction_old) count_sp_nxt = ila.ite(LastJ, count + 1, count) predict_result_sp_nxt = ila.ite( LastJ, ila.store(predict_result, count[PREDICT_RESULT_WIDTH - 1:0], prediction), predict_result) predict_pc_sp_nxt = ila.ite(LastV & LastJ, WaitForWriteState, GenResultState) wr_complete = PredictUabs.getreg('wr_complete') wr_req = PredictUabs.getreg('wr_request') wr_len = PredictUabs.getreg('wr_length') wr_idx = PredictUabs.getreg('wr_index') cur_idx = PredictUabs.getreg('index') # 32 exitLoop = LastV & LastJ wr_request_sp_nxt = ila.ite(exitLoop, b1, wr_req) wr_index_sp_nxt = ila.ite( exitLoop, ila.zero_extend(nm, 32) * ila.zero_extend(cur_idx, 32), wr_idx) wr_length_sp_nxt = ila.ite(exitLoop, ila.zero_extend(nm, 32), wr_len) wr_complete_sp_nxt = ila.ite(exitLoop, b0, wr_complete) # s6: #--------------------- # update edge : s6 #--------------------- FinishOneRound = (wr_req == 0) & (wr_complete == 1) predict_pc_s6_nxt = ila.ite(FinishOneRound, WaitForWriteState, WaitForWriteState) # its value does not matter because it will be terminated by predict_input_done # don't forget to set back signals in Uabs () predict_done = PredictUabs.getreg('predict_input_done') predict_uabs_index = PredictUabs.getreg('index') predict_uabs_loop_count = PredictUabs.getreg('loop_count') predict_uabs_upc = PredictUabs.getreg('upc') all_done = PredictUabs.getreg('done') # add prefix s6 !!! index_nxt_s6_nxt = ila.ite( FinishOneRound, ila.ite( (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp), predict_uabs_index, predict_uabs_index + 1), predict_uabs_index) wr_complete_s6_nxt = ila.ite(FinishOneRound, b0, wr_complete) # assert (predict_uabs_index == nu - 1) & (predict_uabs_loop_count != nlp) should never happen #loop_count_s6_nxt = ila.ite( (predict_uabs_index == nu - 1) & (predict_uabs_loop_count != nlp) , predict_uabs_loop_count + 1, predict_uabs_loop_count ) upc_s6_nxt = ila.ite( FinishOneRound, ila.ite( (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp), FinishState, StartReadState), predict_uabs_upc) predict_input_done_s6_nxt_nxt = ila.ite(FinishOneRound, b0, predict_done) all_done_s6_nxt = ila.ite( FinishOneRound & (predict_uabs_index == ntu - 1) & (predict_uabs_loop_count == nlp), b1, b0) # data -> hidden_unit -> visible_unit -> edge # data -> edge # add # add def predictNext(e1, e2, e3, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(WaitForWrite, e3, default))) def predictNextSp(e1, e2, e3, e4, default): return ila.ite( SumEdge, e1, ila.ite(SumHidden, e2, ila.ite(GenResult, e3, ila.ite(WaitForWrite, e4, default)))) def ite(inst, e, default): return ila.ite(inst, e, default) PredictUabs.set_init('predict_upc', pc_init) PredictUabs.set_init('predict_v_cnt', v_cnt_init) PredictUabs.set_init('predict_h_cnt', h_cnt_init) PredictUabs.set_next( 'jstate', predictNextSp(jstate_s0_nxt, jstate_s1_s5_nxt, jstate_sp_nxt, jstate, jstate)) PredictUabs.set_next( 'predict_sum', predictNext(predict_sum_s0_nxt, predict_sum_s1_s5_nxt, predict_sum, predict_sum)) PredictUabs.set_next( 'predict_v_cnt', predictNextSp(v_cnt_s0_nxt, v_cnt_s1_s5_nxt, v_cnt_sp_nxt, v_cnt, v_cnt)) PredictUabs.set_next( 'predict_h_cnt', predictNext(h_cnt_s0_nxt, h_cnt_s1_s5_nxt, h_cnt, h_cnt)) PredictUabs.set_next( 'predict_upc', predictNextSp(predict_pc_s0_nxt, predict_pc_s1_s5_nxt, predict_pc_sp_nxt, predict_pc_s6_nxt, predict_pc)) PredictUabs.set_next( 'predict_max', predictNext(predict_max, predict_max_s1_s5_nxt, predict_max, predict_max)) PredictUabs.set_next( 'hidden_unit', predictNext(hidden_update_s0_2, hidden_unit, hidden_unit, hidden_unit)) PredictUabs.set_next( 'count', predictNextSp(count_s0_nxt, count_s1_s5_nxt, count_sp_nxt, count, count)) PredictUabs.set_next( 'per_v_pc', predictNext(inner_loop_pc_s0_nxt, inner_loop_pc_s1_s5_nxt, inner_loop_pc, inner_loop_pc)) PredictUabs.set_next( 'index', predictNext(predict_uabs_index, predict_uabs_index, index_nxt_s6_nxt, predict_uabs_index)) PredictUabs.set_next( 'upc', predictNext(predict_uabs_upc, predict_uabs_upc, upc_s6_nxt, predict_uabs_upc)) PredictUabs.set_next( 'predict_input_done', predictNext(predict_done, predict_done, predict_input_done_s6_nxt_nxt, predict_done)) PredictUabs.set_next( 'done', predictNext(all_done, all_done, all_done_s6_nxt, all_done)) PredictUabs.set_next( 'wr_request', predictNextSp(wr_req, wr_req, wr_request_sp_nxt, wr_req, wr_req)) PredictUabs.set_next( 'wr_length', predictNextSp(wr_len, wr_len, wr_length_sp_nxt, wr_len, wr_len)) PredictUabs.set_next( 'wr_index', predictNextSp(wr_idx, wr_idx, wr_index_sp_nxt, wr_idx, wr_idx)) PredictUabs.set_next( 'wr_complete', predictNextSp(wr_complete, wr_complete, wr_complete_sp_nxt, wr_complete_s6_nxt, wr_complete)) # newly added PredictUabs.set_next( 'visibleEnergies', predictNext(visibleEnergy, visibleEnergy_s1_s5_nxt, visibleEnergy, visibleEnergy)) PredictUabs.set_next( 'sumOfpow2', predictNext(sumOfpow2, sumOfpow2_s1_s5_nxt, sumOfpow2, sumOfpow2)) PredictUabs.set_next('pow2', predictNext(pow2, pow2_s1_s5_nxt, pow2, pow2)) PredictUabs.set_next( 'expectation', predictNext(expectation, expectation_s1_s5_nxt, expectation, expectation)) PredictUabs.set_next( 'predict_vector', predictNext(predict_vector, predict_vector_s1_s5_nxt, predict_vector, predict_vector)) PredictUabs.set_next('prediction', ite(GenResult, prediction_sp_nxt, prediction)) PredictUabs.set_next('predict_result', ite(GenResult, predict_result_sp_nxt, predict_result)) #------------------------------------ # Store UABS #------------------------------------ # store is triggered by inst as uabs? # wr_grant == 1 is an instruction wr_granted = rbm.reg('wr_granted', 1) rbm.set_next('wr_granted', ila.ite((wr_request & wr_grant) == 1, b1, wr_granted)) data_out_1st_set = ila.zero_extend( ila.load(predict_result, const(0, PREDICT_RESULT_WIDTH)), 32) rbm.set_next( 'data_out', ila.ite((wr_request & wr_grant) == 1, data_out_1st_set, data_out)) # This is a hard decision, # as we set_next, the reaction as we defined will be appear in the next cycle StoreUabs = rbm.add_microabstraction('store', wr_granted == 1) store_idx = StoreUabs.reg('i', 16) nm = StoreUabs.getreg('num_movies') wr_granted = StoreUabs.getreg('wr_granted') wr_request = StoreUabs.getreg('wr_request') wr_complete = StoreUabs.getreg('wr_complete') predict_result = StoreUabs.getmem('predict_result') StoreUabs.set_init('i', h1_16) StoreUabs.set_next('i', ila.ite(store_idx < nm, store_idx + 1, store_idx)) StoreUabs.set_next('wr_granted', ila.ite(store_idx < nm, wr_granted, b0)) StoreUabs.set_next('wr_request', ila.ite(store_idx == 0, b0, wr_request)) StoreUabs.set_next('wr_complete', ila.ite(store_idx < nm, wr_complete, b1)) data_out = StoreUabs.getreg('data_out') # possibly one cycle earlier StoreUabs.set_next( 'data_out', ila.zero_extend( ila.load(predict_result, store_idx[PREDICT_RESULT_WIDTH - 1:0]), 32)) #--------------------------- # Add no next # def keepNC(Abs, name): Abs.set_next(name, Abs.getreg(name)) def keepMemNC(Abs, name): Abs.set_next(name, Abs.getmem(name)) keepNC(rbm, 'done') keepNC(rbm, 'wr_request') keepNC(rbm, 'wr_index') keepNC(rbm, 'wr_length') keepNC(rbm, 'rd_index') keepNC(rbm, 'rd_length') keepNC(rbm, 'rd_request') keepMemNC(uabs, 'edges') keepNC(rbm, 'rd_complete') keepNC(rbm, 'wr_complete') return rbm
def createFifoILA(): m = ila.Abstraction("fifo") # ------------------------------------------------------------- # Inputs # ------------------------------------------------------------- cmd = m.inp("cmd", 3) cmdaddr = m.inp("cmdaddr", 64) cmddata = m.inp("cmddata", 8) # ------------------------------------------------------------- # Constants # ------------------------------------------------------------- ZERO = m.const(0x0, 8) ONE = m.const(0x1, 8) TWO = m.const(0x2, 8) THREE = m.const(0x3, 8) FOUR = m.const(0x4, 8) THIRTY = m.const(0x1e, 8) # These are the flags that status can output STS_VALID = m.const(fifo_def.STS_VALID, 8) STS_DATA_AVAIL = m.const(fifo_def.STS_DATA_AVAIL, 8) STS_DATA_EXPECT = m.const(fifo_def.STS_DATA_EXPECT, 8) # these are the commands that can be written to status STS_GO = m.const(fifo_def.STS_GO, 8) STS_COMMAND_READY = m.const(fifo_def.STS_COMMAND_READY, 8) # ------------------------------------------------------------- # Variable Definitions # ------------------------------------------------------------- # Fifo State fifo_state = m.reg("fifo_state", 8) m.set_next("fifo_state", ila.choice("fifo_state_choice", [ZERO, ONE, TWO, THREE, FOUR])) # Status register fifo_sts = m.reg("fifo_sts", 8) m.set_next("fifo_sts", ila.choice("fifo_sts_choice", [STS_VALID, STS_VALID | STS_DATA_AVAIL, STS_VALID | STS_DATA_EXPECT, ZERO])) # internal index to the FIFO, # is amount written so far fifo_in_amt = m.reg("fifo_in_amt", 8) m.set_next("fifo_in_amt", ila.choice("fifo_in_amt_choice", [fifo_in_amt, fifo_in_amt+1, ZERO])) fifo_in_cmdsize = m.reg("fifo_in_cmdsize", 8) m.set_next("fifo_in_cmdsize", ila.choice("fifo_in_cmdsize_choice", [fifo_in_cmdsize, cmddata, ZERO])) # the fifo memory. # 256 8 bit registers fifo_indata = m.mem("fifo_indata", 8, 8) m.set_next("fifo_indata", ila.choice("fifo_indata", [fifo_indata, ila.store(fifo_indata, fifo_in_amt, cmddata)])) # internal index to the FIFO, # TODO base this size off of list of command sizes fifo_out_amt = m.reg("fifo_out_amt", 8) m.set_next("fifo_out_amt", ila.choice("fifo_out_amt_choice", [fifo_out_amt, fifo_out_amt-1, ZERO, THIRTY])) # The fifo out memory # another 256 8 bit registers fifo_outdata = m.mem("fifo_outdata", 8, 8) m.set_next("fifo_outdata", fifo_outdata) # dataout # this is what is returned by a read or write dataout = m.reg("dataout", 8) m.set_next("dataout", ila.choice("dataout_choice", [ZERO, fifo_outdata[fifo_out_amt], fifo_sts, fifo_def.FIFO_MAX_AMT - fifo_in_amt, fifo_out_amt])) # ------------------------------------------------------------- # Decode Logic # ------------------------------------------------------------- # General Information addresses = [fifo_def.STS_ADDR, fifo_def.FIFO_ADDR, fifo_def.BURST_ADDR] commandData = [fifo_def.STS_COMMAND_READY, fifo_def.STS_GO] # Commands start and end cmds = [(cmdaddr == fifo_def.STS_ADDR) & (cmd == fifo_def.WR) & (cmddata == d) & (fifo_out_amt == a) & (fifo_state == s) for d in commandData for a in range(2) for s in range(5)] # Reading the in_cmdsize cmdsize = [(cmdaddr == fifo_def.FIFO_ADDR) & (fifo_state == fifo_def.FIFO_SENDING) & (fifo_in_amt == 5) & (cmd == fifo_def.WR) & (cmddata == d) for d in commandData] # actual commands pcr_extend = [(cmdaddr == fifo_def.STS_ADDR) & (cmd == fifo_def.WR) & (cmddata == fifo_def.STS_GO) & (fifo_state == fifo_def.FIFO_ACCEPTING) & (fifo_in_amt == fifo_in_cmdsize) & (ila.load(fifo_indata, m.const(0x6, 8)) == 0) & (ila.load(fifo_indata, m.const(0x7, 8)) == 0) & (ila.load(fifo_indata, m.const(0x8, 8)) == 0) & (ila.load(fifo_indata, m.const(0x9, 8)) == 0x14) ] # General Reading and Writing in every state + Address general = [(cmdaddr == a) & (cmd == c) & (fifo_state == s) for a in addresses for c in [0,1,2] for s in range(5)] m.decode_exprs = general + cmds + pcr_extend + cmdsize # ------------------------------------------------------------- # Synthesize # ------------------------------------------------------------- f = fifo() sim = lambda s: f.simulate(s) for var in f.all_state: synth(m, var, sim) m.generateSim('tpm_export.cpp') m.generateCbmcC('tpm_export.c')
def model(num_regs, reg_size, paramsyn): reg_field_width = int(math.log(num_regs, 2)) assert (1 << reg_field_width) == num_regs # create the alu. alu = alu_sim(reg_field_width, reg_size) sys = ila.Abstraction("alu") sys.enable_parameterized_synthesis = paramsyn # state elements. rom = sys.mem('rom', alu.ROM_ADDR_WIDTH, alu.OPCODE_WIDTH) pc = sys.reg('pc', alu.ROM_ADDR_WIDTH) opcode = rom[pc] regs = [sys.reg('r%d' % i, alu.REG_SIZE) for i in xrange(alu.NUM_REGS)] # get the two sources. rs = ila.choice('rs', regs) rt = ila.choice('rt', regs) rs = [rs + rt, rs - rt, rs & rt, rs | rt, ~rs, -rs, ~rt, -rt] # set next. sys.set_next('pc', ila.choice('pc', pc + 1, pc + 2)) # set rom next. sys.set_next('rom', rom) regs_next = [] for i in xrange(alu.NUM_REGS): ri_next = ila.choice('result%d' % i, rs + [regs[i]]) sys.set_next('r%d' % i, ri_next) # set the fetch expressions. sys.fetch_expr = opcode # now set the decode expressions. sys.decode_exprs = [opcode == i for i in xrange(alu.NUM_OPCODES)] # synthesize pc first. sys.synthesize('pc', lambda s: alu.alusim(s)) pc_next = sys.get_next('pc') assert sys.areEqual(pc_next, pc + 1) # now synthesize. st = time.clock() sys.synthesize(lambda s: alu.alusim(s)) et = time.clock() print 'time for synthesis: %.3f' % (et - st) regs_next = aluexpr(rom, pc, regs) for i in xrange(alu.NUM_REGS): rn1 = sys.get_next('r%d' % i) rn2 = regs_next[i] assert sys.areEqual(rn1, rn2) # addr 16 bit, data 8 bit xram = sys.mem('xram', 8, 8) wrrd = sys.reg('wrrd', 8) data = sys.const(0xfe, 8) addr = sys.const(0x04, 8) xram = ila.store(xram, addr, data) wrrd_next = xram[addr] sys.set_next('wrrd', wrrd_next) wrrdblx = sys.reg('wrrdblx', 24) datablx = sys.const(0x0f00fe, 24) xram = ila.storeblk(xram, addr, datablx) wrrdblx_next = ila.loadblk(xram, addr, 3) sys.set_next('wrrdblx', wrrdblx_next) #sys.add_assumption(opcode == 0x80) #print sys.syn_elem("r0", sys.get_next('r0'), alusim) expFile = "tmp/test_ila_export.txt" sys.exportAll(expFile) # now import into a new abstraction and check. sysp = ila.Abstraction("alu") sysp.importAll(expFile) romp = sysp.getmem('rom') pcp = sysp.getreg('pc') regsp = [sysp.getreg('r%d' % i) for i in xrange(alu.NUM_REGS)] regs_next = aluexpr(romp, pcp, regsp) for i in xrange(alu.NUM_REGS): rn1 = sysp.get_next('r%d' % i) rn2 = regs_next[i] assert sysp.areEqual(rn1, rn2) #os.unlink(expFile) #simFile = "tmp/test_ila_sim.hpp" #sys.generateSim(simFile) path = 'tmp/dir' if not os.path.exists(path): os.makedirs(path) sys.generateSimToDir(path)
def main(): sys = ila.Abstraction("test") iram = sys.mem('iram', 8, 8) addr = sys.reg('addr', 8) print iram, iram.type data = iram[addr] addrp = sys.reg('addrp', 8) datap = iram[addrp] t = sys.bool(True) f = sys.bool(False) assert sys.areEqual((addr != addrp) | (data == datap), t) print data, data.type datap = data+1 print datap, datap.type iramp = ila.store(iram, addr, data+1) print iramp, iramp.type assert sys.areEqual(iramp[addr], data+1) assert not sys.areEqual(data, data+1) m = ila.MemValues(8, 8, 0xff) print m for i in xrange(0x80, 0x90): m[i] = i-0x80 print m for i in xrange(0x0, 0x100): if i >= 0x80 and i < 0x90: assert m[i] == i-0x80 else: assert m[i] == 0xff print m m1 = sys.const(m) assert m.default == 0xff m.default = 0x0 print m assert m[0] == 0 print m.values m2 = sys.const(m) # assert not sys.areEqual(m1[addr], m2[addr]) ante = ((addr >= 0x80) & (addr < 0x90)) conseq = (m1[addr] == m2[addr]) assert sys.areEqual(ila.implies(ante, conseq), t) assert not sys.areEqual(conseq, t) r1 = iram[addr]+1 r2 = iram[addr]+iram[addr+1] r = ila.choice('r', r1, r2) print sys.syn_elem("foo", r, foo) def bar(d): print d ram = d['iram'] ram_ = ila.MemValues(8, 8, ram.default) print ram print ram_ for (ad, da) in ram.values: ram_[ad] = da addr = d['addr'] print ram_, addr, ram[addr] if addr != 0: ram_[addr] = ram_[addr]+1 print ram_ return { "bar": ram_ } r1 = ila.store(iram, addr, iram[addr]+1) r2 = ila.store(iram, addr, iram[addr]+2) r3 = ila.ite(addr != 0, r1, iram) rp = ila.choice('rp', r1, r2, r3) expr = sys.syn_elem("bar", rp, bar) assert sys.areEqual(expr, r3) ila.setloglevel(3, "") data = sys.const(0xdeadbeef, 32) print data iramp = ila.storeblk(iram, addrp, data) d0 = iramp[addrp] d1 = iramp[addrp+1] d2 = iramp[addrp+2] d3 = iramp[addrp+3] datablk = ila.loadblk(iramp, addrp, 4) assert sys.areEqual(datablk, data) assert sys.areEqual(datablk, ila.concat([d3, d2, d1, d0])) assert sys.areEqual(ila.concat(d0, d1), sys.const(0xefbe, 16)) assert sys.areEqual(ila.concat(d2, d3), sys.const(0xadde, 16))
def GenTLBFlush(tlb_pte_buf, proc): expr = tlb_pte_buf default = proc.const(0, 32) for idx in range(TLB_SIZE): expr = ila.store(expr, proc.const(idx, TLB_IDX_LEN), default) return expr
def ssa_next_state(self): self.model.set_next('pc', self.pc + 4) instruction_book_obj = open('instruction_book', 'r') instruction_book = instruction_book_obj.readlines() current_pc = 0 next_state_finished = [] for program_line in program: if len(program_line) < 2: continue opcode = program_line[0] opcode_split = re.split('\.', opcode) opcode_name = opcode_split[0] opcode_length = int(opcode_split[-1][1:]) current_pc_in = current_pc if opcode_name == 'ld': dest_type = ssa_declaration[program_line[1]] addr_type = ssa_declaration[program_line[2]] dest_length = int(dest_type[2:]) addr_length = int(addr_type[2:]) dest_reg = self.model.getreg(program_line[1]) addr_reg = self.model.getreg(program_line[2]) if dest_length > instruction_format.DMEM_BITS: dest = ila.sign_extend(self.mem[addr_reg[(instruction_format.MEM_ADDRESS_BITS - 1) : 0]], dest_length) elif dest_length == instruction_format.DMEM_BITS: dest = self.mem[addr_reg[(instruction_format.MEM_ADDRESS_BITS - 1) : 0]] else: dest_interim = self.mem[addr_reg[(instruction_format.MEM_ADDRESS_BITS - 1) : 0]] dest = dest_interim[(dest_length - 1) : 0] self.model.set_next(program_line[1], ila.ite(self.pc == current_pc, dest, dest_reg)) next_state_finished.append(program_line[1]) current_pc += 4 if opcode_name == 'cvta': dest_type = ssa_declaration[program_line[1]] src_type = ssa_declaration[program_line[2]] dest_length = int(dest_type[2:]) src_length = int(src_type[2:]) dest = self.model.getreg(program_line[1]) src = self.model.getreg(program_line[2]) if dest_length > src_length: src = ila.sign_extend(src, dest_length) if dest_length < src_length: src = src[(dest_length - 1) : 0] self.model.set_next(program_line[1], ila.ite(self.pc == current_pc, src, dest)) next_state_finished.append(program_line[1]) current_pc += 4 if opcode_name == 'mov': dest_type = ssa_declaration[program_line[1]] src_type = ssa_declaration[program_line[2]] dest_length = int(dest_type[2:]) src_length = int(src_type[2:]) dest = self.model.getreg(program_line[1]) src = self.model.getreg(program_line[2]) if dest_length > src_length: src = ila.sign_extend(src, dest_length) if dest_length < src_length: src = src[(dest_length - 1) : 0] self.model.set_next(program_line[1], ila.ite(self.pc == current_pc, src, dest)) next_state_finished.append(program_line[1]) current_pc += 4 if opcode_name == 'add': dest = self.model.getreg(program_line[1]) src0 = self.model.getreg(program_line[2]) src1 = self.model.getreg(program_line[3]) dest_type = ssa_declaration[program_line[1]] src0_type = ssa_declaration[program_line[2]] src1_type = ssa_declaration[program_line[3]] dest_length = int(dest_type[2:]) src0_length = int(src0_type[2:]) src1_length = int(src1_type[2:]) if dest_length > src0_length: src0 = ila.sign_extend(src0, dest_length) if dest_length < src0_length: src0 = src0[(dest_length - 1) : 0] if dest_length > src1_length: src1 = ila.sign_extend(src1, dest_length) if dest_length < src1_length: src1 = src1[(dest_length - 1) : 0] self.model.set_next(program_line[1], ila.ite(self.pc == current_pc, src0 + src1, dest)) next_state_finished.append(program_line[1]) current_pc += 4 if opcode_name == 'mul': dest_type = ssa_declaration[program_line[1]] src0_type = ssa_declaration[program_line[2]] dest_length = int(dest_type[2:]) src0_length = int(src0_type[2:]) dest = self.model.getreg(program_line[1]) src0 = self.model.getreg(program_line[2]) src1 = ila.const(int(program_line[3]), dest_length) if dest_length > src0_length: src0 = ila.sign_extend(src0, dest_length) if dest_length < src0_length: src0 = src0[(dest_length - 1) : 0] self.model.set_next(program_line[1], ila.ite(self.pc == current_pc, src0 * src1, dest)) next_state_finished.append(program_line[1]) current_pc += 4 if opcode_name == 'st': addr = self.model.getreg(program_line[1]) src = self.model.getreg(program_line[2]) self.model.set_next('mem', ila.ite(self.pc == current_pc,ila.store(self.mem, addr[(instruction_format.MEM_ADDRESS_BITS - 1):0], src[(instruction_format.DMEM_BITS - 1):0]), self.mem)) current_pc += 4 if current_pc == current_pc_in: print program_line a = self.model.reg('support', 32) self.model.set_next('support', self.model.getreg('s3') + self.model.getreg('s1')) for reg_name in ssa_declaration.keys(): if reg_name not in next_state_finished: reg = self.model.getreg(reg_name) self.model.set_next(reg_name, reg)
def synthesize(state, enable_ps): uc = uc8051() # create nicknames pc, iram, sp = uc.pc, uc.iram, uc.sp op0, op1, op2 = uc.op0, uc.op1, uc.op2 acc, b, dptr = uc.acc, uc.b, uc.dptr psw = uc.psw rx = uc.rx rom = uc.rom model = uc.model model.enable_parameterized_synthesis = enable_ps bv = model.const # fetch and decode. model.fetch_expr = uc.op0 # s/hand for uc.rom[uc.pc] model.decode_exprs = [uc.op0 == i for i in xrange(0x0, 0x100)] ########################### PC ############################################## def cjmp(name, cond): pc_taken = ila.choice(name + '_taken', pc_rel1, pc_rel2) pc_seq = ila.choice(name + '_seq', pc + 2, pc + 3) return ila.ite(cond, pc_taken, pc_seq) def jmppolarity(name): return ila.inrange(name, bv(0, 1), bv(1, 1)) # ajmp/acall pc_ajmp_pg1 = (pc + 2)[15:11] pc_ajmp_pg2 = ila.inrange('ajmp_page', bv(0x0, 3), bv(0x7, 3)) pc_ajmp_pg = ila.concat(pc_ajmp_pg1, pc_ajmp_pg2) pc_ajmp = ila.concat(pc_ajmp_pg, op1) # lcall/ljmp pc_ljmp = ila.choice('ljmp', [ila.concat(op2, op1), ila.concat(op1, op2)]) # ret. pc_ret = ila.choice('pc_ret', [ ila.concat(iram[sp - 1], iram[sp]), ila.concat(iram[sp], iram[sp - 1]), ila.concat(iram[sp], iram[sp + 1]), ila.concat(iram[sp + 1], iram[sp]) ]) # relative to pc pc_rel1 = ila.choice('pc_rel1_base', [pc, pc + 1, pc + 2, pc + 3 ]) + ila.sign_extend(op1, 16) pc_rel2 = ila.choice('pc_rel2_base', [pc, pc + 1, pc + 2, pc + 3 ]) + ila.sign_extend(op2, 16) # sjmp pc_sjmp = ila.choice('sjmp', pc_rel1, pc_rel2) # jb jb_bitaddr = ila.choice('jb_bitaddr', [op1, op2]) jb_bit = uc.readBit(jb_bitaddr) jx_polarity = jmppolarity('jx_polarity') pc_jb = cjmp('pc_jb', jb_bit == jx_polarity) # jc pc_jc = cjmp('pc_jc', uc.cy == jx_polarity) # jz acc_zero = acc == 0 acc_nonzero = acc != 0 jz_test = ila.choice('jz_test_polarity', acc_zero, acc_nonzero) pc_jz = cjmp('pc_jz', jz_test) # jmp pc_jmp = dptr + ila.zero_extend(acc, 16) # cjne cjne_src1 = ila.choice('cjne_src1', [acc, iram[rx[0]], iram[rx[1]]] + rx) cjne_src2 = ila.choice( 'cjne_src2', [op1, op2, uc.readDirect(ila.choice('cjne_iram_addr', [op1, op2]))]) cjne_taken = cjne_src1 != cjne_src2 pc_cjne = cjmp('pc_cjne', cjne_taken) # djnz djnz_src = ila.choice( 'djnz_src', [uc.readDirect(ila.choice('djnz_iram_src', [op1, op2]))] + rx) djnz_taken = djnz_src != 1 pc_djnz = cjmp('pc_djnz', djnz_taken) pc_choices = [ pc + 1, pc + 2, pc + 3, pc_ajmp, pc_ljmp, pc_ret, pc_sjmp, pc_jb, pc_jc, pc_jz, pc_jmp, pc_cjne, pc_djnz ] model.set_next('PC', ila.choice('pc', pc_choices)) ########################### ACC ############################################## # various sources for ALU ops. acc_src2_dir_addr = ila.choice('acc_src2_dir_addr', [op1, op2]) acc_src2_dir = ila.choice('acc_src2_dir', [uc.readDirect(acc_src2_dir_addr)] + rx) acc_src2_indir_addr = ila.choice('acc_src2_indir_addr', [rx[0], rx[1]]) acc_src2_indir = iram[acc_src2_indir_addr] src2_imm = ila.choice('src2_imm', [op1, op2]) acc_src2 = ila.choice('acc_src2', [acc_src2_dir, acc_src2_indir, src2_imm]) acc_rom_offset = ila.choice('acc_rom_offset', [dptr, pc + 1, pc + 2, pc + 3]) # the decimal adjust instruction. this is a bit of mess. # first, deal with the lower nibble acc_add_6 = (uc.ac == 1) | (acc[3:0] > 9) acc_ext9 = ila.zero_extend(acc, 9) acc_da_stage1 = ila.ite(acc_add_6, acc_ext9 + 6, acc_ext9) acc_da_cy1 = acc_da_stage1[8:8] # and then the upper nibble acc_add_60 = ((acc_da_cy1 | uc.cy) == 1) | (acc_da_stage1[7:4] > 9) acc_da_stage2 = ila.ite(acc_add_60, acc_da_stage1 + 0x60, acc_da_stage1) acc_da = acc_da_stage2[7:0] # instructions which modify the accumulator. acc_rr = ila.rrotate(acc, 1) acc_rrc = ila.rrotate(ila.concat(acc, uc.cy), 1)[8:1] acc_rl = ila.lrotate(acc, 1) acc_rlc = ila.lrotate(ila.concat(uc.cy, acc), 1)[7:0] acc_inc = acc + 1 acc_dec = acc - 1 acc_add = acc + acc_src2 acc_addc = acc + acc_src2 + ila.zero_extend(uc.cy, 8) acc_orl = acc | acc_src2 acc_anl = acc & acc_src2 acc_xrl = acc ^ acc_src2 acc_subb = acc - acc_src2 + ila.sign_extend(uc.cy, 8) acc_mov = acc_src2 acc_cpl = ~acc acc_clr = bv(0, 8) acc_rom = rom[ila.zero_extend(acc, 16) + acc_rom_offset] acc_swap = ila.concat(acc[3:0], acc[7:4]) # div. acc_div = ila.ite(b == 0, bv(0xff, 8), acc / b) b_div = ila.ite(b == 0, acc, acc % b) # mul mul_result = ila.zero_extend(acc, 16) * ila.zero_extend(b, 16) acc_mul = mul_result[7:0] b_mul = mul_result[15:8] # xchg - dir xchg_src2_dir_addr = ila.choice('xchg_src2_dir_addr', [op1, op2] + uc.rxaddr) xchg_src2_dir = uc.readDirect(xchg_src2_dir_addr) acc_xchg_dir = xchg_src2_dir # xchg - indir xchg_src2_indir_addr = ila.choice('xchg_src2_indir_addr', [rx[0], rx[1]]) xchg_src2_full_indir = iram[xchg_src2_indir_addr] xchg_src2_half_indir = ila.concat(acc[7:4], xchg_src2_full_indir[3:0]) xchg_src2_indir = ila.choice('xchg_src2_indir', [xchg_src2_full_indir, xchg_src2_half_indir]) acc_xchg_indir = xchg_src2_indir # final acc value. acc_next = ila.choice('acc_r_next', [ acc_rr, acc_rl, acc_rrc, acc_rlc, acc_inc, acc_dec, acc_add, acc_addc, acc_orl, acc_anl, acc_xrl, acc_mov, acc_rom, acc_clr, acc_subb, acc_swap, acc_cpl, acc, acc_div, acc_mul, acc_da, acc_xchg_dir, acc_xchg_indir, uc.xram_data_in ]) model.set_next('ACC', acc_next) ########################### IRAM ############################################## # instructions where the result is a direct iram address dir_src1_addr = ila.choice('dir_src1_addr', [op1, op2] + uc.rxaddr) dir_src1 = uc.readDirect(dir_src1_addr) dir_src2_iram_addr = ila.choice('dir_src2_iram_addr', [op1, op2] + uc.rxaddr) dir_src2_iram = uc.readDirect(dir_src2_iram_addr) dir_src2_indir_addr = ila.choice('dir_src2_indir_addr', [rx[0], rx[1]]) dir_src2_indir = iram[dir_src2_indir_addr] dir_src2 = ila.choice('dir_src2', [op1, op2, acc, dir_src2_iram, dir_src2_indir]) dir_inc = dir_src1 + 1 dir_dec = dir_src1 - 1 dir_orl = dir_src1 | dir_src2 dir_anl = dir_src1 & dir_src2 dir_xrl = dir_src1 ^ dir_src2 dir_mov = dir_src2 dir_result = ila.choice( 'dir_result', [dir_inc, dir_dec, dir_orl, dir_anl, dir_xrl, dir_mov]) dir_addrs = [dir_src1_addr] dir_datas = [dir_result] # write a bit. bit_src1_addr = ila.choice('bit_src1_addr', [op1, op2]) bit_src1 = uc.readBit(bit_src1_addr) wrbit_data = ila.choice( 'wrbit_data', [uc.cy, ~uc.cy, bit_src1, ~bit_src1, bv(0, 1), bv(1, 1)]) r_bit = uc.writeBit(bit_src1_addr, wrbit_data) # some instructions write their result to the carry flag; which is also the first operand. cy_orl = uc.cy | bit_src1 cy_orlc = uc.cy | ~bit_src1 cy_anl = uc.cy & bit_src1 cy_anlc = uc.cy & ~bit_src1 cy_mov = bit_src1 cy_cpl_bit = ~bit_src1 cy_cpl_c = ~uc.cy bit_cnst1 = bv(1, 1) bit_cnst0 = bv(0, 1) bit_cy = ila.choice('bit_cy', [ cy_orl, cy_anl, cy_orlc, cy_anlc, cy_cpl_c, cy_mov, cy_cpl_bit, bit_cnst1, bit_cnst0 ]) # instructions where the result is an indirect iram address. src1_indir_addr = ila.choice('src1_indir_addr', [rx[0], rx[1]]) src1_indir = iram[src1_indir_addr] src2_indir_dir_addr = ila.choice('src2_indir_dir_addr', [op1, op2]) src2_indir_dir = uc.readDirect(src2_indir_dir_addr) src2_indir = ila.choice('src2_indir', [op1, op2, acc, src2_indir_dir]) src1_indir_inc = src1_indir + 1 src1_indir_dec = src1_indir - 1 src1_indir_mov = src2_indir src1_indir_result = ila.choice( 'src1_indir_result', [src1_indir_inc, src1_indir_dec, src1_indir_mov]) indir_addrs = [src1_indir_addr] # indirect write addr indir_datas = [src1_indir_result] # and data. # calls pc_topush = ila.choice('pc_topush', [pc + 1, pc + 2, pc + 3]) pc_topush_lo = pc_topush[7:0] pc_topush_hi = pc_topush[15:8] pc_topush_0 = ila.choice('pc_topush_endianess', [pc_topush_lo, pc_topush_hi]) pc_topush_1 = ila.choice('pc_topush_endianess', [pc_topush_hi, pc_topush_lo]) pc_push_addr = ila.choice('pc_push_addr', [sp, sp + 1]) iram_call = ila.store(ila.store(iram, pc_push_addr, pc_topush_0), pc_push_addr + 1, pc_topush_1) # push or pop instructions. stk_iram_addr = ila.choice('stk_iram_addr', [sp, sp + 1, sp - 1]) stk_src_dir_addr = ila.choice('stk_src_dir_addr', [op1, op2]) stk_src_dir = uc.readDirect(stk_src_dir_addr) stk_src = ila.choice('stk_src', [stk_src_dir, acc]) sp_pushpop = ila.choice('sp_pushpop', sp + 1, sp - 1) indir_addrs.append(stk_iram_addr) indir_datas.append(stk_src) stk_data = ila.choice('stk_data', [iram[sp], iram[sp + 1], iram[sp - 1]]) dir_addrs.append(stk_src_dir_addr) dir_datas.append(stk_data) r_pop = uc.writeDirect(stk_src_dir_addr, stk_data) sp_pop = ila.ite(stk_src_dir_addr == bv(0x81, 8), r_pop.sp, sp_pushpop) # exchanges; part of this implemented above in acc section. dir_addrs.append(xchg_src2_dir_addr) dir_datas.append(acc) xchg_src1_half_indir = ila.concat(xchg_src2_full_indir[7:4], acc[3:0]) xchg_src1_indir = ila.choice('xchg_src1', [xchg_src1_half_indir, acc]) indir_addrs.append(xchg_src2_indir_addr) indir_datas.append(xchg_src1_indir) # final indirect writes. iram_indir = ila.store(iram, ila.choice('iram_indir', indir_addrs), ila.choice('iram_indir', indir_datas)) # final direct writes. assert len(dir_addrs) == len(dir_datas) r_dir = uc.writeDirect(ila.choice('iram_dir', dir_addrs), ila.choice('iram_dir', dir_datas)) # set the next iram. iram_next = ila.choice( 'iram_result', [iram, iram_indir, iram_call, r_dir.iram, r_bit.iram]) model.set_next('IRAM', iram_next) ########################### PSW ############################################## cjne_cy = ila.ite(cjne_src1 < cjne_src2, bv(1, 1), bv(0, 1)) # muldiv div_ov = ila.ite(b == 0, bv(1, 1), bv(0, 1)) mul_ov = ila.ite(b_mul != 0, bv(1, 1), bv(0, 1)) # da acc_da_cy2 = acc_da_stage2[8:8] acc_da_cy = acc_da_cy2 | acc_da_cy1 | uc.cy # alu alu_cy_in = ila.choice('alu_cy_in', [uc.cy, bv(0, 1)]) alu_cy_5b = ila.choice( 'alu_cy_5b', [ila.zero_extend(alu_cy_in, 5), ila.sign_extend(alu_cy_in, 5)]) alu_src1_lo_5b = ila.zero_extend(acc[3:0], 5) alu_src2_lo_5b = ila.zero_extend(acc_src2[3:0], 5) alu_ac_add = (alu_src1_lo_5b + alu_src2_lo_5b + alu_cy_5b)[4:4] alu_ac_sub = ila.ite(alu_src1_lo_5b < (alu_src2_lo_5b + alu_cy_5b), bv(1, 1), bv(0, 1)) alu_ac = ila.choice('alu_ac', [alu_ac_add, alu_ac_sub]) alu_src1_sext = ila.sign_extend(acc, 9) alu_src2_sext = ila.sign_extend(acc_src2, 9) alu_src1_zext = ila.zero_extend(acc, 9) alu_src2_zext = ila.zero_extend(acc_src2, 9) alu_cy_9b_sext = ila.sign_extend(alu_cy_in, 9) alu_cy_9b_zext = ila.zero_extend(alu_cy_in, 9) alu_cy_9b = ila.choice('alu_cy_9b', [alu_cy_9b_zext, alu_cy_9b_sext]) alu_zext_9b_sum = alu_src1_zext + alu_src2_zext + alu_cy_9b alu_cy_add = alu_zext_9b_sum[8:8] alu_cy_sub1 = ila.ite(alu_src1_zext < (alu_src2_zext + alu_cy_9b), bv(1, 1), bv(0, 1)) alu_cy_sub2 = ila.ite(acc < (acc_src2 + ila.zero_extend(uc.cy, 8)), bv(1, 1), bv(0, 1)) alu_cy = ila.choice('alu_cy', [alu_cy_add, alu_cy_sub1, alu_cy_sub2]) alu_ov_9b_src1 = ila.choice('alu_ov_9b_src1', [alu_src1_sext, alu_src1_zext]) alu_ov_9b_src2 = ila.choice('alu_ov_9b_src2', [alu_src2_sext, alu_src2_zext]) alu_9b_add = alu_ov_9b_src1 + alu_ov_9b_src2 + alu_cy_9b alu_9b_sub = alu_ov_9b_src1 - alu_ov_9b_src2 + alu_cy_9b alu_9b_res = ila.choice('alu_9b_res', [alu_9b_add, alu_9b_sub]) alu_ov = ila.ite(alu_9b_res[8:8] != alu_9b_res[7:7], bv(1, 1), bv(0, 1)) acc_cy = ila.choice('acc_cy', [uc.cy, acc[0:0], acc[7:7], alu_cy]) acc_ac = ila.choice('acc_ac', [uc.ac, alu_ac]) acc_ov = ila.choice('acc_ov', [uc.ov, alu_ov]) psw_bit = ila.concat(bit_cy, psw[6:0]) psw_cjne = ila.concat(cjne_cy, psw[6:0]) psw_div = ila.concat(bv(0, 1), ila.concat(psw[6:3], ila.concat(div_ov, psw[1:0]))) psw_mul = ila.concat(bv(0, 1), ila.concat(psw[6:3], ila.concat(mul_ov, psw[1:0]))) psw_da = ila.concat(acc_da_cy, psw[6:0]) psw_acc = ila.concat( acc_cy, ila.concat(acc_ac, ila.concat(psw[5:3], ila.concat(acc_ov, psw[1:0])))) psw_next = ila.choice('psw_next', [ r_dir.psw, r_bit.psw, psw_cjne, psw_bit, psw_div, psw_mul, psw_da, psw_acc, psw ]) model.set_next('PSW', psw_next) ########################### SP ############################################## sp_next = ila.choice('sp_next', [ sp + 2, sp + 1, sp - 1, sp - 2, sp, sp_pop, r_pop.sp, r_dir.sp, r_bit.sp ]) model.set_next('SP', sp_next) ########################### DPTR ############################################## mov_dptr = ila.choice( 'mov_dptr', [ila.concat(op1, op2), ila.concat(op2, op1)]) inc_dptr = dptr + 1 dptr_n1 = ila.choice('next_dptr', [mov_dptr, inc_dptr, dptr]) dpl_n1 = dptr[7:0] dph_n1 = dptr[15:8] dpl_next = ila.choice('dpl_next', [dpl_n1, r_dir.dpl, r_bit.dpl, uc.dpl]) dph_next = ila.choice('dph_next', [dph_n1, r_dir.dph, r_bit.dph, uc.dph]) model.set_next('DPL', dpl_next) model.set_next('DPH', dph_next) ########################### B ################################################# b_next = ila.choice('b_next', [b_mul, b_div, r_bit.b, r_dir.b, uc.b]) model.set_next('B', b_next) ########################## XRAM ############################################### xram_addr_rx = ila.concat(bv(0, 8), ila.choice('lsb_xram_addr', [rx[0], rx[1]])) xram_addr_next = ila.choice('xram_addr', [xram_addr_rx, dptr, uc.xram_addr, bv(0, 16)]) model.set_next('XRAM_ADDR', xram_addr_next) xram_data_out_next = ila.choice('xram_data_out', [bv(0, 8), acc]) model.set_next('XRAM_DATA_OUT', xram_data_out_next) ########################## SFRS ############################################### sfrs = [ 'p0', 'p1', 'p2', 'p3', 'pcon', 'tcon', 'tmod', 'tl0', 'th0', 'tl1', 'th1', 'scon', 'sbuf', 'ie', 'ip' ] for s in sfrs: sfr_next = ila.choice( s + '_next', [getattr(r_bit, s), getattr(r_dir, s), getattr(uc, s)]) model.set_next(s.upper(), sfr_next) for s in state: print s st = time.clock() model.synthesize(s, eval8051) t_elapsed = time.clock() - st ast = model.get_next(s) print 'time: %.2f' % t_elapsed model.exportOne(ast, 'asts/%s_%s' % (s, 'en' if enable_ps else 'dis'))
def createAESILA(enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response. dataout = m.reg('dataout', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) keysel = m.reg('aes_keysel', 1) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) key1 = m.reg('aes_key1', 128) # for the uinst. xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # decode rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40) for i in [0, 1, 2, 3]] wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40)] nopcmds = [ ((state != 0) & (cmd != 1)) | ((state == 0) & (cmd != 1) & (cmd != 2)) ] m.decode_exprs = rdcmds + wrcmds + nopcmds # read commands statebyte = ila.zero_extend(state, 8) opaddrbyte = ila.readchunk('rd_addr', opaddr, 8) oplenbyte = ila.readchunk('rd_len', oplen, 8) keyselbyte = ila.zero_extend(keysel, 8) ctrbyte = ila.readchunk('rd_ctr', ctr, 8) key0byte = ila.readchunk('rd_key0', key0, 8) key1byte = ila.readchunk('rd_key1', key1, 8) dataoutnext = ila.choice('dataout', [ statebyte, opaddrbyte, oplenbyte, keyselbyte, ctrbyte, key0byte, key1byte, m.const(0, 8) ]) m.set_next('dataout', dataoutnext) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) mb_reg_wr('aes_key1', key1) # bit-level registers def bit_reg_wr(name, reg, sz): # bitwise register write assert reg.type.bitwidth == sz reg_wr = cmddata[sz - 1:0] reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) bit_reg_wr('aes_keysel', keysel, 1) # these are for the uinst um = m.add_microabstraction('aes_compute', state != 0) # read data rd_data = um.reg('rd_data', 128) enc_data = um.reg('enc_data', 128) byte_cnt = um.reg('byte_cnt', 4) oped_byte_cnt = um.reg('oped_byte_cnt', 16) blk_cnt = um.reg('blk_cnt', 16) um.set_init('byte_cnt', um.const(0, 4)) um.set_init('blk_cnt', um.const(0, 16)) um.set_init('oped_byte_cnt', um.const(0, 16)) uxram = m.getmem('XRAM') byte_cnt_16b = ila.zero_extend(byte_cnt, 16) um.fetch_expr = state um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16) for i in [1, 2, 3]] usim = lambda s: AESmicro().simMicro(s) # byte_cnt byte_cnt_inc = byte_cnt + 1 byte_cnt_buf = ila.choice('byte_cnt_buf', [byte_cnt_inc, byte_cnt]) byte_cnt_nxt = ila.choice( 'byte_cnt_nxt', [byte_cnt_inc, m.const(0, 4), byte_cnt]) um.set_next('byte_cnt', byte_cnt_nxt) # oped_byte_cnt oped_byte_cnt_inc = oped_byte_cnt + 16 oped_byte_cnt_nxt = ila.choice( 'oped_byte_cnt_nxt', [m.const(0, 16), oped_byte_cnt, oped_byte_cnt_inc]) um.set_next('oped_byte_cnt', oped_byte_cnt_nxt) # blk_cnt blk_cnt_inc = blk_cnt + 16 more_blocks = (oped_byte_cnt_inc < oplen) blk_cnt_nxt = ila.choice('blk_cnt_nxt', [ m.const(0, 16), blk_cnt, blk_cnt_inc, ila.ite(more_blocks, blk_cnt_inc, blk_cnt) ]) um.set_next('blk_cnt', blk_cnt_nxt) # ustate ustate = um.getreg('aes_state') ustate_nxt = ila.choice('ustate_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ustate, ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)) ]) um.set_next('aes_state', ustate_nxt) # rd_data rdblock = ila.writechunk("rd_data_chunk", rd_data, ila.load(uxram, opaddr + blk_cnt + byte_cnt_16b)) rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) um.set_next('rd_data', rd_data_nxt) # enc_data aes_key = ila.ite(keysel == 0, key0, key1) aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) um.set_next('enc_data', enc_data_nxt) #print um.get_next('enc_data') # xram write xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8) xram_w_addr = opaddr + blk_cnt + byte_cnt_16b xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data) xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes) um.set_next('XRAM', xram_nxt) suffix = 'en' if enable_ps else 'dis' timefile = open('aes-times-%s.txt' % suffix, 'wt') t_elapsed = 0 # micro-synthesis for s in [ 'XRAM', 'aes_state', 'byte_cnt', 'blk_cnt', 'oped_byte_cnt', 'rd_data' ]: t_elapsed = 0 st = time.clock() um.synthesize(s, usim) dt = time.clock() - st t_elapsed += dt print >> timefile, '%s %.2f' % ('u_' + s, dt) print '%s: %s' % (s, str(um.get_next(s))) ast = um.get_next(s) m.exportOne(ast, 'asts/u_%s_%s' % (s, suffix)) sim = lambda s: AESmacro().simMacro(s) # state state_next = ila.choice( 'state_next', [state, ila.ite(cmddata == 1, m.const(1, 2), state)]) m.set_next('aes_state', state_next) # xram m.set_next('XRAM', xram) # synthesize. for s in [ 'aes_state', 'aes_addr', 'aes_len', 'aes_keysel', 'aes_ctr', 'aes_key0', 'aes_key1', 'dataout' ]: st = time.clock() m.synthesize(s, sim) dt = time.clock() - st t_elapsed += dt print >> timefile, '%s %.2f' % (s, dt) ast = m.get_next(s) print '%s: %s' % (s, str(ast)) m.exportOne(ast, 'asts/%s_%s' % (s, suffix)) # connect to the uinst m.connect_microabstraction('aes_state', um) m.connect_microabstraction('XRAM', um) print 'total time: %.2f' % t_elapsed #print 'aes_state: %s' % str(m.get_next('aes_state')) #print 'XRAM: %s' % str(m.get_next('XRAM')) #m.generateSim('gen/aes_sim.hpp') m.generateSimToDir('sim')
def createAESILA(enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) # for the uinst. xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([cmd, cmdaddr, cmddata ]) # actually, the equivelant instruction m.fetch_valid = (cmd == 2) # when write to some addresses # decode wrcmds = [(cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff30)] # m.decode_exprs = wrcmds um = m.add_microabstraction('aes_compute', state != 0) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) # state state_next = ila.choice( 'state_next', [state, ila.ite(cmddata == 1, m.const(1, 2), state)]) m.set_next('aes_state', state_next) # xram m.set_next('XRAM', xram) ################################ # Micro-ILA ################################ # read data rd_data = um.reg('rd_data', 128) enc_data = um.reg('enc_data', 128) byte_cnt = um.reg('byte_cnt', 4) oped_byte_cnt = um.reg('oped_byte_cnt', 16) blk_cnt = um.reg('blk_cnt', 16) aes_time = um.reg('aes_time', 5) uaes_ctr = um.reg('uaes_ctr', 128) # change 1 um.set_init('byte_cnt', um.const(0, 4)) um.set_init('blk_cnt', um.const(0, 16)) um.set_init('oped_byte_cnt', um.const(0, 16)) um.set_init('aes_time', um.const(0, 5)) um.set_init('uaes_ctr', m.getreg('aes_ctr')) # change 2 uxram = m.getmem('XRAM') byte_cnt_16b = ila.zero_extend(byte_cnt, 16) um.fetch_expr = state um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16) for i in [1, 2, 3]] # Decode Expressionss # byte_cnt byte_cnt_inc = byte_cnt + 1 byte_cnt_nxt = ila.choice( 'byte_cnt_nxt', [m.const(0, 4), byte_cnt_inc, byte_cnt]) # 0, +1, NC um.set_next('byte_cnt', byte_cnt_nxt) # oped_byte_cnt oped_byte_cnt_inc = oped_byte_cnt + 16 oped_byte_cnt_nxt = ila.choice( 'oped_byte_cnt_nxt', [m.const(0, 16), oped_byte_cnt_inc, oped_byte_cnt]) # 0, +16, NC um.set_next('oped_byte_cnt', oped_byte_cnt_nxt) # blk_cnt blk_cnt_inc = blk_cnt + 16 more_blocks = (oped_byte_cnt_inc < oplen) blk_cnt_nxt = ila.choice('blk_cnt_nxt', [ m.const(0, 16), blk_cnt, blk_cnt_inc, ila.ite(more_blocks, blk_cnt_inc, blk_cnt) ]) um.set_next('blk_cnt', blk_cnt_nxt) aes_time_inc = aes_time + 1 aes_time_ov = aes_time == m.const(31, 5) aes_time_nxt_c = ila.ite(aes_time_ov, aes_time, aes_time_inc) aes_time_nxt = ila.choice( "aes_timeC", m.const(0, 5), aes_time_nxt_c, ila.ite(more_blocks, m.const(0, 5), aes_time_nxt_c)) aes_time_enough = aes_time > m.const(10, 5) um.set_next('aes_time', aes_time_nxt) # change 3 um.set_next( 'uaes_ctr', ila.choice( 'uaes_ctr_nxt', uaes_ctr, ila.ite( more_blocks, uaes_ctr + ila.inrange('addvalue', um.const(1, 128), um.const(128, 128)), uaes_ctr), ctr)) # ustate ustate = um.getreg('aes_state') ustate_nxt = ila.choice('ustate_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ustate, ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)), ila.ite(aes_time_enough, m.const(3, 2), m.const(2, 2)) ]) # change 4 um.set_next('aes_state', ustate_nxt) # rd_data rdblock = ila.writechunk("rd_data_chunk", rd_data, ila.load(uxram, opaddr + blk_cnt + byte_cnt_16b)) # rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) um.set_next('rd_data', rd_data_nxt) # enc_data aes_key = key0 aes_ctr = ila.choice('ctr', uaes_ctr, ctr + ila.zero_extend(blk_cnt, 128)) aes_enc_data = ila.appfun(aes, [aes_ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) um.set_next('enc_data', enc_data_nxt) #print um.get_next('enc_data') # xram write xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8) xram_w_addr = opaddr + blk_cnt + byte_cnt_16b xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data) xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes) um.set_next('XRAM', xram_nxt) return m, um