def ExtractPAddr(pte, bigpage, vaddr): pte_ppn1 = pte[31:20] pte_ppn = pte[31:10] bigTrans = ila.concat([pte_ppn1, vaddr[21:12], vaddr[11:0]]) # byte address smlTrans = ila.concat([pte_ppn, vaddr[11:0]]) # byte address fullAddr = ila.ite(bigpage, bigTrans, smlTrans) return fullAddr[31:0]
def table_lookup(b,tab, byte0): # modify p retList = [0,0,0,0] retList[0] = [ tab[ ila.concat([b[0], ila.const(0,2) ]) ] , tab[ ila.concat([b[0], ila.const(1,2) ]) ], tab[ ila.concat([b[0], ila.const(2,2) ]) ], tab[ ila.concat([b[0], ila.const(3,2) ]) ] ] retList[1] = [ tab[ ila.concat([b[1], ila.const(0,2) ]) ] , tab[ ila.concat([b[1], ila.const(1,2) ]) ], tab[ ila.concat([b[1], ila.const(2,2) ]) ], tab[ ila.concat([b[1], ila.const(3,2) ]) ] ] retList[2] = [ tab[ ila.concat([b[2], ila.const(0,2) ]) ] , tab[ ila.concat([b[2], ila.const(1,2) ]) ], tab[ ila.concat([b[2], ila.const(2,2) ]) ], tab[ ila.concat([b[2], ila.const(3,2) ]) ] ] retList[3] = [ tab[ ila.concat([b[3], ila.const(0,2) ]) ] , tab[ ila.concat([b[3], ila.const(1,2) ]) ], tab[ ila.concat([b[3], ila.const(2,2) ]) ], tab[ ila.concat([b[3], ila.const(3,2) ]) ] ] return retList
def main(): sys = ila.Abstraction("test") r0 = sys.reg('r0', 8) r1 = sys.reg('r1', 8) a = sys.bit('a') b = sys.bit('b') ex = ila.choice("function", r0 + r1, r0 - r1, r0 + r1 + 1) resfoo = sys.syn_elem("sum", ex, foo) assert sys.areEqual(resfoo, r0 + r1) resbar = sys.syn_elem("diff", ex, bar) assert sys.areEqual(resbar, r0 - r1) a1 = ila.choice("a1", a, ~a, a & b, a | b) b1 = ila.choice("b1", [b, ~b, a & b, a | b, a ^ b]) a2 = ila.choice("a2", a, ~a) b2 = ila.choice("b2", b, ~b) t1 = a1 & b1 t2 = a2 & b2 y = t1 | t2 resbaz = sys.syn_elem("baz", y, baz) assert sys.areEqual(resbaz, a ^ b) resshaz = sys.syn_elem("shaz", y, shaz) assert sys.areEqual(resshaz, ~(a ^ b)) c = ila.inrange("cnst", sys.const(0x00, 8), sys.const(0xff, 8)) z = ila.choice("func_z", r0 + r1 + c, r0 + r1 - c) resdaz = sys.syn_elem("daz", z, daz) assert sys.areEqual(resdaz, r0 + r1 + 0x44) slc0 = ila.readslice("r0slice", r0, 4) slc1 = ila.readchunk("r1chunk", r1, 4) res = ila.choice('slice', slc0 + slc1, slc0 - slc1) resrmz = sys.syn_elem("razmatazz", res, razmatazz) assert sys.areEqual(resrmz, r0[3:0] + r1[7:4]) nr0 = ila.writeslice("wr0slice", r0, slc0) resjazz = sys.syn_elem("jazz", nr0, jazz) assert sys.areEqual(resjazz, ila.concat(r0[3:0], r0[3:0])) nr1 = ila.writechunk("wr0chunk", r0, slc0) resjazy = sys.syn_elem("jazz", nr1, jazz) assert sys.areEqual(resjazy, ila.concat(r0[3:0], r0[3:0]))
def createILA(): m = ila.Abstraction('acc_regs') # input ports cmd = m.inp ('cmd', 2) cmdaddr = m.inp ('cmdaddr', 16) cmddata = m.inp ('cmddata', 8) # arch states state = m.reg ('acc_state', 3) rd_addr = m.reg ('rd_addr', 16) wr_addr = m.reg ('wr_addr', 16) oplen = m.reg ('acc_len', 16) xram = m.mem ('XRAM', 16, 8) # micro-arch states bytes_read = m.reg ('bytes_read', 16) # fetch function and fetch valid function m.fetch_expr = ila.concat ([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # state_nxt state_nxt = ila.choice ('state_nxt', [ m.const (0, 3), m.const (1, 3), m.const (2, 3), m.const (3, 3), m.const (4, 3), state]) m.set_next ('acc_state', state_nxt) # NOTE next state functions for rd_addr, wr_addr, oplen and xram is ignore. return m
def buildPrivInst( self): # Should handle x0-x31 update, should not touch pc/csrs riscv = self.riscv inst = riscv.inst opcode = riscv.opcode funct12 = riscv.funct12 CSRRegs = riscv.CSRRegs #-------------------- # CSR decomposition #-------------------- status_SD = CSRRegs['mstatus'][XLEN - 1] status_VM = CSRRegs['mstatus'][28:24] status_MXR = CSRRegs['mstatus'][19] status_PUM = CSRRegs['mstatus'][18] status_MPRV = CSRRegs['mstatus'][17] status_MPP = CSRRegs['mstatus'][12:11] status_HPP = CSRRegs['mstatus'][10:9] status_SPP = CSRRegs['mstatus'][8] status_MPIE = CSRRegs['mstatus'][7] status_HPIE = CSRRegs['mstatus'][6] status_SPIE = CSRRegs['mstatus'][5] status_UPIE = CSRRegs['mstatus'][4] status_MIE = CSRRegs['mstatus'][3] status_HIE = CSRRegs['mstatus'][2] status_SIE = CSRRegs['mstatus'][1] status_UIE = CSRRegs['mstatus'][0] (csrRWExpr,csrRSExpr,csrRCExpr,csrIExpr,EcallExpr,EbrkExpr,TrapRetExp,SfenceVMExpr) \ = self.PrivDecode() self.isTrapReturn = (inst == 0x10200073) | (inst == 0x30200073) self.isMRET = inst == 0x30200073 self.isSRET = inst == 0x10200073 self.isEBreak = (opcode == RVEncoding.SYSTEM) & (funct12 == RVEncoding.EBREAK) self.trapRetPC = ila.ite(self.isMRET, CSRRegs['mepc'], CSRRegs['sepc']) self.trapRetPriv = ila.ite( self.isMRET, ila.ite(status_MPP == RVEncoding.HYPERVISOR, const(RVEncoding.SUPERVISOR, 2), status_MPP), ila.concat([b0, status_SPP])) # gen CSR Inst Update List, add interrupt choice to the merge ones csr_W_inst_update = {} # Indep CSRNAME-> update function for csrName, CSRinf in riscv.CSRInfo.items(): ### OPT-OUT ### if CSRinf.parent is not None: continue old = CSRRegs[csrName] csr_W_inst_update[csrName] = old self.csr_W_inst_update = csr_W_inst_update """
def genRows(idx): l = gb.DATA_SIZE * idx h = l + DATA_SIZE - 1 res = ila.concat([ gb.stencil[8][h:l], gb.stencil[7][h:l], gb.stencil[6][h:l], gb.stencil[5][h:l], gb.stencil[4][h:l], gb.stencil[3][h:l], gb.stencil[2][h:l], gb.stencil[1][h:l], gb.stencil[0][h:l] ]) return res
def readBit(self, bitaddr): msb1 = bitaddr[7:7] == 1 byteaddr = ila.ite(msb1, ila.concat(bitaddr[7:3], self.model.const(0, 3)), ila.zero_extend(bitaddr[7:3], 8) + 32) bitindex = bitaddr[2:0] byte = self.readDirect(byteaddr) bit = byte[bitindex] return bit
def InstFetch(self): #self.inst = self.model.inp('inst',32) #self.fetch_expr = self.inst inst = ila.load(self.mem, ila.zero_extend(self.pc[31:2], 32)) #ila.zero_extend(self.pc[31:2], 32)) self.inst = inst self.fetch_expr = self.inst self.opcode = self.inst[6:0] self.rd = self.inst[11:7] self.rs1 = self.inst[19:15] self.rs2 = self.inst[24:20] self.funct3 = self.inst[14:12] self.funct7 = self.inst[31:25] self.funct12= self.inst[31:20] self.immI = ila.sign_extend( inst[31:20], XLEN) self.immS = ila.sign_extend( ila.concat( [inst[31:25], inst[11:7]] ), XLEN ) self.immB = ila.sign_extend( ila.concat( [inst[31],inst[7], inst[30:25], inst[11:8], const(0,1) ] ) , XLEN ) self.immU = ila.concat( [inst[31:12],const(0,12)] ) self.immJ = ila.sign_extend( ila.concat( [inst[31], inst[19:12], inst[20], inst[30:21], const(0,1) ] ) , XLEN) self.csr_index = self.inst[31:20]
def genRows(idx): l = gb.DATA_SIZE * idx h = l + gb.DATA_SIZE - 1 res = ila.concat([ in_slice[h:l], gb.LB2D_shift[7][h:l], gb.LB2D_shift[6][h:l], gb.LB2D_shift[5][h:l], gb.LB2D_shift[4][h:l], gb.LB2D_shift[3][h:l], gb.LB2D_shift[2][h:l], gb.LB2D_shift[1][h:l], gb.LB2D_shift[0][h:l] ]) return res
def instructionFetch(self): self.instruction = ila.load( self.mem, ila.zero_extend(self.pc[31:2], MEM_ADDRESS_BITS)) self.isBranch = (self.instruction[31:28] == self.model.const( 0b1111, 4)) self.branchOP = self.instruction[27:25] self.branchOffsetA = self.instruction[24:5] self.branchSrc = self.instruction[4:0] self.branchOffsetB = self.instruction[24:0] self.isRegReg = (self.instruction[31:29] == self.model.const(0b110, 3)) self.rrType = self.instruction[28:26] self.rrOpcode = self.instruction[25:20] self.rrSrc2 = self.instruction[19:15] self.rrMask = self.instruction[14:10] self.rrDest = self.instruction[9:5] self.rrSrc1 = self.instruction[4:0] self.isImmediate = (self.instruction[31] == self.model.const(0b0, 1)) self.immType = self.instruction[30:29] self.immOpcode = self.instruction[28:24] self.immA = ila.zero_extend(self.instruction[23:15], SCALAR_REG_BITS) self.immB = ila.zero_extend(self.instruction[23:10], SCALAR_REG_BITS) self.immCup = self.instruction[23:10] self.immClow = self.instruction[4:0] self.immDest = self.instruction[9:5] self.immMask = self.instruction[14:10] self.imm = ila.ite( self.immType[1] == self.model.const(0b0, 1), ila.zero_extend(self.immB, SCALAR_REG_BITS), ila.ite( self.immType == self.model.const(0b10, 2), ila.zero_extend(ila.concat(self.immCup, self.immClow), SCALAR_REG_BITS), ila.ite(self.immType == self.model.const(0b11, 2), ila.zero_extend(self.immA, SCALAR_REG_BITS), ila.zero_extend(self.immA, SCALAR_REG_BITS)))) self.isMem = (self.instruction[31:30] == self.model.const(0b10, 2)) self.isLoad = self.instruction[29] self.memOpcode = self.instruction[28:25] self.memOffSetA = self.instruction[24:15] self.memOffSetB = self.instruction[24:10] self.memMask = self.instruction[14:10] self.memDest = self.instruction[9:5] self.memSrc = self.instruction[9:5] self.memPtr = self.instruction[4:0] self.memOffSet = ila.ite( self.memOpcode == self.model.const(0b1000, 4), ila.sign_extend(self.memOffSetA, SCALAR_REG_BITS), ila.ite(self.memOpcode == self.model.const(0b1110, 4), ila.sign_extend(self.memOffSetA, SCALAR_REG_BITS), ila.sign_extend(self.memOffSetB, SCALAR_REG_BITS))) self.isMask = ( ((self.rrType == self.model.const(0b010, 3)) | (self.rrType == self.model.const(0b101, 3))) & self.isRegReg ) #need rewrite self.dest = self.instruction[9:5]
def __init__(self): self.model = ila.Abstraction("oc8051") self.createInputs() self.op0 = self.rom[self.pc] self.op1 = self.rom[self.pc + 1] self.op2 = self.rom[self.pc + 2] self.opcode = ila.concat(self.op2, ila.concat(self.op1, self.op0)) self.dptr = ila.concat(self.dph, self.dpl) self.cy = self.psw[7:7] self.ac = self.psw[6:6] self.ov = self.psw[2:2] self._Rbank = self.psw[4:3] self.rxaddr = [ ila.concat(self.model.const(0, 3), ila.concat(self._Rbank, self.model.const(i, 3))) for i in xrange(8) ] self.rx = [self.iram[RxAddr_i] for RxAddr_i in self.rxaddr]
def writeBit(self, bitaddr, bitval): # FIXME msb1 = bitaddr[7:7] == 1 byteaddr = ila.ite(msb1, ila.concat(bitaddr[7:3], self.model.const(0, 3)), ila.zero_extend(bitaddr[7:3], 8) + 32) byte = self.readDirect(byteaddr) bitindex = ila.zero_extend(bitaddr[2:0], 8) mask1 = ~(self.model.const(1, 8) << bitindex) mask2 = ila.zero_extend(bitval, 8) << bitindex byte_p = (mask1 & byte) | mask2 return self.writeDirect(byteaddr, byte_p)
def expand_key_128(inp,rcon,tab): # k1 k0b # inp: 128 ,rcon 8, out 1,2:128 [k0,k1,k2,k3] = slice128to32(inp) v0 = ila.concat([k0[31:24] ^ rcon , k0[23:0]]) v1 = v0 ^ k1 v2 = v1 ^ k2 v3 = v2 ^ k3 [k0a,k1a,k2a,k3a] = [v0,v1,v2,v3] k4a = S4 ( cat([k3[23:0], k3[31:24] ]), tab ) [k0b,k1b,k2b,k3b] = [k0a^k4a, k1a^k4a, k2a^k4a, k3a^k4a] return [k0b,k1b,k2b,k3b]
def createIla(): m = ila.Abstraction ('sha') m.enable_parameterized_synthesis = 0 # input ports cmd = m.inp ('cmd', 2) cmdaddr = m.inp ('cmdaddr', 16) cmddata = m.inp ('cmddata', 8) # arch states state = m.reg ('sha_state', 3) rd_addr = m.reg ('sha_rdaddr', 16) wr_addr = m.reg ('sha_wraddr', 16) oplen = m.reg ('sha_len', 16) rd_data = m.reg ('sha_rd_data', 512) hs_data = m.reg ('sha_hs_data', 160) xram = m.mem ('XRAM', 16, 8) sha = m.fun ('sha', 160, [512]) # fetch is just looking at the input command. m.fetch_expr = ila.concat ([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # write commands. def mb_reg_wr (name, reg): # multibyte reg write. reg_wr = ila.writechunk ('wr_' + name, reg, cmddata) reg_nxt = ila.choice ('nxt_' + name, [reg_wr, reg]) m.set_next (name, reg_nxt) mb_reg_wr ('sha_rdaddr', rd_addr) mb_reg_wr ('sha_wraddr', wr_addr) mb_reg_wr ('sha_len', oplen) # state (atomic) state_nxt = ila.choice ('state_nxt', [ m.const (0, 3), m.const (1, 3), m.const (2, 3), m.const (3, 3), m.const (4, 3), state]) m.set_next ('sha_state', state_nxt) # xram xram_w_sha_little = ila.storeblk (xram, wr_addr, hs_data) xram_w_sha_big = ila.storeblk_big (xram, wr_addr, hs_data) xram_cho = ila.choice ('xram_nxt', xram, xram_w_sha_little, xram_w_sha_big) xram_nxt = ila.ite ((state == 0) & (cmddata == 1), xram_cho, xram) m.set_next ('XRAM', xram_nxt) return m
def modify(): # Create the ILA container. m = ila.Abstraction('alu') # Import the whole ILA from file. m.importAll('ALU.ila') # Completely define transition relations for unspecified states. for i in xrange(0, 8): regName = 'reg{}'.format(i) m.set_next(regName, m.getreg(regName)) out_nxt = m.get_next('output') inp_nxt = ila.concat(out_nxt, out_nxt) m.set_next('input', inp_nxt) # Export the result m.exportAll('ALU_moore.ila')
def createILA(): m = ila.Abstraction('acc_regs') m.enable_parameterized_synthesis = 0 # input ports cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # arch states state = m.reg('acc_state', 3) rd_addr = m.reg('rd_addr', 16) wr_addr = m.reg('wr_addr', 16) oplen = m.reg('acc_len', 16) xram = m.mem('XRAM', 16, 8) bytes_read = m.reg('bytes_read', 16) # fetch function and fetch valid fuction m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) m.add_assumption(oplen > 0) # acc_state id_nxt = ila.ite(cmddata == 1, m.const(1, 3), m.const(0, 3)) state_nxt = ila.choice('state_nxt', id_nxt, state) m.set_next('acc_state', state_nxt) # bytes_read bytes_read_inc = bytes_read + 1 bytes_read_rst = ila.ite(cmddata == 1, m.const(0, 16), bytes_read) bytes_read_nxt = ila.choice( 'bytes_read_nxt', [m.const(0, 16), bytes_read_inc, bytes_read_rst, bytes_read]) m.set_next('bytes_read', bytes_read_nxt) return m
def Lto128(L): return ila.concat( [L[0],L[1],L[2],L[3]] )
def createAESILA(enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response. dataout = m.reg('dataout', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) keysel = m.reg('aes_keysel', 1) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) key1 = m.reg('aes_key1', 128) # for the uinst. xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # decode rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40) for i in [0, 1, 2, 3]] wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40)] nopcmds = [ ((state != 0) & (cmd != 1)) | ((state == 0) & (cmd != 1) & (cmd != 2)) ] m.decode_exprs = rdcmds + wrcmds + nopcmds # read commands statebyte = ila.zero_extend(state, 8) opaddrbyte = ila.readchunk('rd_addr', opaddr, 8) oplenbyte = ila.readchunk('rd_len', oplen, 8) keyselbyte = ila.zero_extend(keysel, 8) ctrbyte = ila.readchunk('rd_ctr', ctr, 8) key0byte = ila.readchunk('rd_key0', key0, 8) key1byte = ila.readchunk('rd_key1', key1, 8) dataoutnext = ila.choice('dataout', [ statebyte, opaddrbyte, oplenbyte, keyselbyte, ctrbyte, key0byte, key1byte, m.const(0, 8) ]) m.set_next('dataout', dataoutnext) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) mb_reg_wr('aes_key1', key1) # bit-level registers def bit_reg_wr(name, reg, sz): # bitwise register write assert reg.type.bitwidth == sz reg_wr = cmddata[sz - 1:0] reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) bit_reg_wr('aes_keysel', keysel, 1) # these are for the uinst um = m.add_microabstraction('aes_compute', state != 0) # read data rd_data = um.reg('rd_data', 128) enc_data = um.reg('enc_data', 128) byte_cnt = um.reg('byte_cnt', 4) oped_byte_cnt = um.reg('oped_byte_cnt', 16) blk_cnt = um.reg('blk_cnt', 16) um.set_init('byte_cnt', um.const(0, 4)) um.set_init('blk_cnt', um.const(0, 16)) um.set_init('oped_byte_cnt', um.const(0, 16)) uxram = m.getmem('XRAM') byte_cnt_16b = ila.zero_extend(byte_cnt, 16) um.fetch_expr = state um.decode_exprs = [(state == i) & (byte_cnt == j) for j in xrange(16) for i in [1, 2, 3]] usim = lambda s: AESmicro().simMicro(s) # byte_cnt byte_cnt_inc = byte_cnt + 1 byte_cnt_buf = ila.choice('byte_cnt_buf', [byte_cnt_inc, byte_cnt]) byte_cnt_nxt = ila.choice( 'byte_cnt_nxt', [byte_cnt_inc, m.const(0, 4), byte_cnt]) um.set_next('byte_cnt', byte_cnt_nxt) # oped_byte_cnt oped_byte_cnt_inc = oped_byte_cnt + 16 oped_byte_cnt_nxt = ila.choice( 'oped_byte_cnt_nxt', [m.const(0, 16), oped_byte_cnt, oped_byte_cnt_inc]) um.set_next('oped_byte_cnt', oped_byte_cnt_nxt) # blk_cnt blk_cnt_inc = blk_cnt + 16 more_blocks = (oped_byte_cnt_inc < oplen) blk_cnt_nxt = ila.choice('blk_cnt_nxt', [ m.const(0, 16), blk_cnt, blk_cnt_inc, ila.ite(more_blocks, blk_cnt_inc, blk_cnt) ]) um.set_next('blk_cnt', blk_cnt_nxt) # ustate ustate = um.getreg('aes_state') ustate_nxt = ila.choice('ustate_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ustate, ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)) ]) um.set_next('aes_state', ustate_nxt) # rd_data rdblock = ila.writechunk("rd_data_chunk", rd_data, ila.load(uxram, opaddr + blk_cnt + byte_cnt_16b)) rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) um.set_next('rd_data', rd_data_nxt) # enc_data aes_key = ila.ite(keysel == 0, key0, key1) aes_enc_data = ila.appfun(aes, [ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) um.set_next('enc_data', enc_data_nxt) #print um.get_next('enc_data') # xram write xram_w_data = ila.readchunk('enc_data_chunk', enc_data, 8) xram_w_addr = opaddr + blk_cnt + byte_cnt_16b xram_w_aes = ila.store(uxram, xram_w_addr, xram_w_data) xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes) um.set_next('XRAM', xram_nxt) suffix = 'en' if enable_ps else 'dis' timefile = open('aes-times-%s.txt' % suffix, 'wt') t_elapsed = 0 # micro-synthesis for s in [ 'XRAM', 'aes_state', 'byte_cnt', 'blk_cnt', 'oped_byte_cnt', 'rd_data' ]: t_elapsed = 0 st = time.clock() um.synthesize(s, usim) dt = time.clock() - st t_elapsed += dt print >> timefile, '%s %.2f' % ('u_' + s, dt) print '%s: %s' % (s, str(um.get_next(s))) ast = um.get_next(s) m.exportOne(ast, 'asts/u_%s_%s' % (s, suffix)) sim = lambda s: AESmacro().simMacro(s) # state state_next = ila.choice( 'state_next', [state, ila.ite(cmddata == 1, m.const(1, 2), state)]) m.set_next('aes_state', state_next) # xram m.set_next('XRAM', xram) # synthesize. for s in [ 'aes_state', 'aes_addr', 'aes_len', 'aes_keysel', 'aes_ctr', 'aes_key0', 'aes_key1', 'dataout' ]: st = time.clock() m.synthesize(s, sim) dt = time.clock() - st t_elapsed += dt print >> timefile, '%s %.2f' % (s, dt) ast = m.get_next(s) print '%s: %s' % (s, str(ast)) m.exportOne(ast, 'asts/%s_%s' % (s, suffix)) # connect to the uinst m.connect_microabstraction('aes_state', um) m.connect_microabstraction('XRAM', um) print 'total time: %.2f' % t_elapsed #print 'aes_state: %s' % str(m.get_next('aes_state')) #print 'XRAM: %s' % str(m.get_next('XRAM')) #m.generateSim('gen/aes_sim.hpp') m.generateSimToDir('sim')
def WRU1(gb): READY_T = gb.READY_TRUE READY_F = gb.READY_FALSE VALID_T = gb.VALID_TRUE VALID_F = gb.VALID_FALSE DATA_SIZE = gb.DATA_SIZE decode = (gb.arg_1_TREADY == READY_F) & \ (gb.arg_0_TREADY == READY_F) & \ (gb.st_ready == READY_F) \ endPixel = (gb.RAM_x == gb.RAM_x_M - gb.RAM_x_1) & \ (gb.RAM_y == gb.RAM_y_M - gb.RAM_y_1) relPixel = (gb.RAM_x == gb.RAM_x_1) & (gb.RAM_y == gb.RAM_y_M) # next state functions for child-states def genRows(idx): l = gb.DATA_SIZE * idx h = l + DATA_SIZE - 1 res = ila.concat([ gb.stencil[8][h:l], gb.stencil[7][h:l], gb.stencil[6][h:l], gb.stencil[5][h:l], gb.stencil[4][h:l], gb.stencil[3][h:l], gb.stencil[2][h:l], gb.stencil[1][h:l], gb.stencil[0][h:l] ]) return res stencil_rows = [] for i in xrange(gb.stencil_size - 1, -1, -1): stencil_rows.append(genRows(i)) proc_in_nxt = ila.ite (((gb.RAM_x > gb.stencil_size - 1) & \ (gb.RAM_y >= gb.RAM_size)) | \ ((gb.RAM_x == gb.RAM_x_1) & \ (gb.RAM_y > gb.RAM_size)), \ ila.concat (stencil_rows), gb.proc_in) proc_in_nxt = ila.ite(relPixel, gb.proc_in, proc_in_nxt) gb.proc_in_nxt = ila.ite(decode, proc_in_nxt, gb.proc_in_nxt) # next state functions for output ports arg_1_TREADY_nxt = ila.ite(endPixel, READY_F, READY_T) gb.arg_1_TREADY_nxt = ila.ite(decode, arg_1_TREADY_nxt, gb.arg_1_TREADY_nxt) arg_0_TVALID_nxt = ila.ite (((gb.RAM_x > gb.stencil_size - 1) & \ (gb.RAM_y >= gb.RAM_size)) | \ ((gb.RAM_x == gb.RAM_x_1) & \ (gb.RAM_y > gb.RAM_size)), \ VALID_T, VALID_F) arg_0_TVALID_nxt = ila.ite(relPixel, gb.arg_0_TVALID, arg_0_TVALID_nxt) gb.arg_0_TVALID_nxt = ila.ite(decode, arg_0_TVALID_nxt, gb.arg_0_TVALID_nxt) arg_0_TDATA_nxt = ila.appfun(gb.fun, proc_in_nxt) arg_0_TDATA_nxt = ila.ite(relPixel, gb.arg_0_TDATA, arg_0_TDATA_nxt) gb.arg_0_TDATA_nxt = ila.ite(decode, arg_0_TDATA_nxt, gb.arg_0_TDATA_nxt) # next state functions for internal arch-states gb.cur_pix_nxt = ila.ite(decode, gb.cur_pix, gb.cur_pix_nxt) gb.pre_pix_nxt = ila.ite(decode, gb.pre_pix, gb.pre_pix_nxt) gb.RAM_x_nxt = ila.ite(decode, gb.RAM_x, gb.RAM_x_nxt) gb.RAM_y_nxt = ila.ite(decode, gb.RAM_y, gb.RAM_y_nxt) gb.RAM_w_nxt = ila.ite(decode, gb.RAM_w, gb.RAM_w_nxt) for i in xrange(0, gb.RAM_size): gb.RAM_nxt[i] = ila.ite(decode, gb.RAM[i], gb.RAM_nxt[i]) for i in xrange(0, gb.stencil_size - 1): stencil_i_nxt = ila.ite(gb.RAM_y < gb.RAM_size, gb.stencil[i], gb.stencil[i + 1]) gb.stencil_nxt[i] = ila.ite(decode, stencil_i_nxt, gb.stencil_nxt[i]) n = gb.stencil_size - 1 stencil_n_nxt = gb.stencil[n] gb.stencil_nxt[n] = ila.ite(decode, stencil_n_nxt, gb.stencil_nxt[n]) st_ready_nxt = READY_T gb.st_ready_nxt = ila.ite(decode, st_ready_nxt, gb.st_ready_nxt)
def createAESILA(enable_ps): m = ila.Abstraction("aes") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where the commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # internal arch state. state = m.reg('aes_state', 2) opaddr = m.reg('aes_addr', 16) oplen = m.reg('aes_len', 16) ctr = m.reg('aes_ctr', 128) key0 = m.reg('aes_key0', 128) # for the uinst. xram = m.mem('XRAM', 16, 8) aes = m.fun('aes', 128, [128, 128, 128]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([cmd, cmdaddr, cmddata ]) # actually, the equivelant instruction m.fetch_valid = (cmd == 2) # when write to some addresses # decode wrcmds = [(cmd == 2) & (cmdaddr == addr) for addr in xrange(0xff00, 0xff40)] m.decode_exprs = wrcmds m.add_assumption((state == 0) | (oplen > 1)) um = m.add_microabstraction('aes_compute', (state != 0)) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('aes_addr', opaddr) mb_reg_wr('aes_len', oplen) mb_reg_wr('aes_ctr', ctr) mb_reg_wr('aes_key0', key0) # state state_next = ila.choice( 'state_next', [state, m.const(0, 2), ila.ite((cmddata == 1), m.const(1, 2), state)]) m.set_next('aes_state', state_next) # xram m.set_next('XRAM', xram) ################################ # Micro-ILA ################################ # read data rd_data = um.reg('rd_data', 128) enc_data = um.reg('enc_data', 128) blk_cnt = um.reg('blk_cnt', 16) uaes_ctr = um.reg('uaes_ctr', 128) um.set_init('blk_cnt', um.const(0, 16)) um.set_init('uaes_ctr', um.getreg('aes_ctr')) uxram = m.getmem('XRAM') um.fetch_expr = state um.decode_exprs = [(state == i) for i in [1, 2, 3]] # READ/OPERATE/WRITE # blk_cnt blk_cnt_inc = blk_cnt + ila.inrange('blkcntrange', um.const(1, 16), um.const(32, 16)) more_blocks = ila.choice('cond1', (blk_cnt_inc != oplen), (oplen >= blk_cnt_inc), (oplen > blk_cnt_inc)) blk_cnt_nxt = ila.choice('blk_cnt_nxt', [ m.const(0, 16), blk_cnt, blk_cnt_inc, ila.ite(more_blocks, blk_cnt_inc, blk_cnt) ]) um.set_next('blk_cnt', blk_cnt_nxt) # ustate ustate = um.getreg('aes_state') ustate_nxt = ila.choice('ustate_next', [ m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2), ustate, ila.ite(more_blocks, m.const(1, 2), m.const(0, 2)) ]) # change 4 um.set_next('aes_state', ustate_nxt) # rd_data rdblock = ila.loadblk(uxram, opaddr + blk_cnt, 16) rd_data_nxt = ila.choice('rd_data_nxt', rdblock, rd_data) um.set_next('rd_data', rd_data_nxt) # enc_data aes_key = key0 aes_enc_data = ila.appfun(aes, [uaes_ctr, aes_key, rd_data]) enc_data_nxt = ila.ite(state == 2, aes_enc_data, enc_data) um.set_next('enc_data', enc_data_nxt) #print um.get_next('enc_data') uaes_ctr_nxt = ila.choice( 'uaes_ctr_nxt', uaes_ctr, uaes_ctr + ila.inrange('uaes_ctr_nxt_range', m.const(1, 128), m.const(128, 128))) um.set_next('uaes_ctr', uaes_ctr_nxt) # xram write xram_w_addr = opaddr + blk_cnt xram_w_aes = ila.storeblk(uxram, xram_w_addr, enc_data) xram_nxt = ila.choice('xram_nxt', uxram, xram_w_aes) um.set_next('XRAM', xram_nxt) return m, um
def createRsaIla(): m = ila.Abstraction('rsa') m.enable_parameterized_synthesis = 0 # I/O interface cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response dataout = m.reg('dataout', 8) # states state = m.reg('rsa_state', 2) addr = m.reg('rsa_addr', 16) rsa_M = m.reg('rsa_M', 2048) rsa_N = m.reg('rsa_N', 2048) rsa_E = m.reg('rsa_E', 2048) rsa_buff = m.reg('rsa_buff', 2048) byte_counter = m.reg('rsa_byte_counter', 8) xram = m.mem('XRAM', 16, 8) rsa = m.fun('rsa', 2048, [2048]) # fetch m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) statebyte = ila.zero_extend(state, 8) wraddrbyte = ila.readchunk('rsa_addr', addr, 8) dataout_nxt = ila.choice('dataout', [statebyte, wraddrbyte, m.const(0, 8)]) m.set_next('dataout', dataout_nxt) # rsa_addr addr_wr = ila.writechunk('wr_addr', addr, cmddata) addr_nxt = ila.choice('nxt_addr', [addr_wr, addr]) m.set_next('rsa_addr', addr_nxt) # rsa_state state_choice = ila.choice( 'state_choice', [m.const(0, 2), m.const(1, 2), m.const(2, 2), m.const(3, 2)]) wr_nxt = ila.ite(byte_counter == 255, m.const(0, 2), m.const(3, 2)) state_nxt = ila.choice('rsa_state_nxt', [ wr_nxt, state_choice, ila.ite(cmddata == 1, m.const(1, 2), state), state ]) m.set_next('rsa_state', state_nxt) # byte_counter byte_counter_inc = byte_counter + 1 byte_counter_rst = ila.ite(cmddata == 1, m.const(0, 8), byte_counter) byte_counter_nxt = ila.choice( 'byte_counter_nxt', [byte_counter_inc, byte_counter_rst, byte_counter]) m.set_next('rsa_byte_counter', byte_counter_nxt) # buff rsa_buff_op = ila.appfun(rsa, [rsa_M]) rsa_buff_nxt = ila.choice('rsa_buff_nxt', rsa_buff_op, rsa_buff) m.set_next('rsa_buff', rsa_buff_nxt) # rsa_M m.set_next('rsa_M', rsa_M) # xram #xram_w_rsa_lit = ila.storeblk (xram, addr, rsa_buff) #xram_w_rsa_big = ila.storeblk_big (xram, addr, rsa_buff) byte_cnt_16 = ila.zero_extend(byte_counter, 16) sh = ila.zero_extend((255 - byte_counter) * 8, 2048) xram_w_rsa_data_1 = (rsa_buff >> sh)[7:0] #xram_w_rsa_data_2 = rsa_buff [255 - byte_cnt_16] xram_w_rsa_lit = ila.store(xram, addr + byte_cnt_16, xram_w_rsa_data_1) xram_nxt = ila.choice('xram_nxt', [xram_w_rsa_lit, xram]) m.set_next('XRAM', xram_nxt) return m
def WRU0 (gb): m = gb.abst READY_T = gb.READY_TRUE READY_F = gb.READY_FALSE VALID_T = gb.VALID_TRUE VALID_F = gb.VALID_FALSE DATA_SIZE = gb.DATA_SIZE decode = (gb.arg_1_TREADY == READY_F) & \ (gb.arg_0_TREADY == READY_F) & \ (gb.st_ready == READY_T) \ decode = decode & ~((gb.RAM_x == gb.RAM_x_0) & (gb.RAM_y == gb.RAM_y_0)) endPixel = (gb.RAM_x == gb.RAM_x_M) & (gb.RAM_y == gb.RAM_y_M - gb.RAM_y_1) accPixel = (gb.RAM_y < gb.RAM_size) # next state functions for output ports arg_1_TREADY_nxt = ila.ite (accPixel, READY_T, READY_F) gb.arg_1_TREADY_nxt = ila.ite (decode, arg_1_TREADY_nxt, gb.arg_1_TREADY_nxt) arg_0_TVALID_nxt = gb.arg_0_TVALID gb.arg_0_TVALID_nxt = ila.ite (decode, arg_0_TVALID_nxt, gb.arg_0_TVALID_nxt) arg_0_TDATA_nxt = gb.arg_0_TDATA gb.arg_0_TDATA_nxt = ila.ite (decode, arg_0_TDATA_nxt, gb.arg_0_TDATA_nxt) # next state functions for internal arch-states # most recent pixel cur_pix_nxt = gb.cur_pix gb.cur_pix_nxt = ila.ite (decode, cur_pix_nxt, gb.cur_pix_nxt) # previous pixel (child-state) pre_pix_nxt = gb.cur_pix gb.pre_pix_nxt = ila.ite (decode, pre_pix_nxt, gb.pre_pix_nxt) # x index (column) in the 2-D RAM RAM_x_nxt = ila.ite (gb.RAM_x == gb.RAM_x_M, gb.RAM_x_1, gb.RAM_x + gb.RAM_x_1) gb.RAM_x_nxt = ila.ite (decode, RAM_x_nxt, gb.RAM_x_nxt) # y index (row) in the 2-D RAM RAM_y_nxt = ila.ite (gb.RAM_x == gb.RAM_x_M, ila.ite (gb.RAM_y == gb.RAM_y_M, gb.RAM_y_0, gb.RAM_y + gb.RAM_y_1), gb.RAM_y) gb.RAM_y_nxt = ila.ite (decode, RAM_y_nxt, gb.RAM_y_nxt) # w index (write) in the 2-D RAM RAM_w_nxt = ila.ite (gb.RAM_x == gb.RAM_x_M, ila.ite (gb.RAM_w == gb.RAM_w_M, gb.RAM_w_0, gb.RAM_w + gb.RAM_w_1), gb.RAM_w) gb.RAM_w_nxt = ila.ite (decode, RAM_w_nxt, gb.RAM_w_nxt) # 8 488x1 bytes buffer in the 20D RAM in_byte = gb.pre_pix for i in xrange (0, gb.RAM_size): RAM_i_nxt = ila.ite (gb.RAM_w == i, ila.store (gb.RAM[i], gb.RAM_x - gb.RAM_x_1, in_byte), gb.RAM[i]) gb.RAM_nxt[i] = ila.ite (decode, RAM_i_nxt, gb.RAM_nxt[i]) # 8 1x9 bytes slice in the stencil for i in xrange (0, gb.stencil_size-1): stencil_i_nxt = gb.stencil[i] gb.stencil_nxt[i] = ila.ite (decode, stencil_i_nxt, gb.stencil_nxt[i]) def sliceSelect (start, seqs): def sliceSelectOne (modCase): idx = seqs[modCase] if modCase == gb.RAM_size - 1: return ila.load (gb.RAM[idx], gb.RAM_x - gb.RAM_x_1) else: return ila.ite (start == modCase, ila.load (gb.RAM[idx], gb.RAM_x - gb.RAM_x_1), sliceSelectOne (modCase + 1)) return sliceSelectOne (0) def genSliceSeqs (start): res = [] for i in xrange (0, gb.RAM_size): res.append ((start + i) % gb.RAM_size) return res slice_seqs = [] for i in xrange (0, gb.RAM_size): slice_seqs.append (genSliceSeqs (i)) slice_chunks = [in_byte] for i in xrange (7, -1, -1): slice_chunks.append (sliceSelect (gb.RAM_w, slice_seqs[i])) n = gb.stencil_size - 1 stencil_n_nxt = ila.ite (gb.RAM_y < gb.RAM_size, gb.stencil[n], ila.concat (slice_chunks)) gb.stencil_nxt[n] = ila.ite (decode, stencil_n_nxt, gb.stencil_nxt[n]) # stencil ready (child-state) st_ready_nxt = ila.ite (accPixel, READY_T, READY_F) gb.st_ready_nxt = ila.ite (decode, st_ready_nxt, gb.st_ready_nxt) # 9x9 stencil (child-state) proc_in_nxt = gb.proc_in gb.proc_in_nxt = ila.ite (decode, proc_in_nxt, gb.proc_in_nxt)
def aux_bit_rev(self, src_reg, bits): result = src_reg[bits - 1] for i in range(bits - 1)[::-1]: result = ila.concat(result, src_reg[i]) return result
def nextStateVALUFunction(self, threadNo): m = self.model self.vsource_reg0 = ila.ite(self.vsrc0 > 255, m.indexIntoVGPR(self.vsrc0 - m.const(0x100, VECTOR_SOURCE_BIT), threadNo), m.indexIntoSGPR(self.vsrc0)) self.vsource_reg1 = m.indexIntoVGPR(self.vsrc1) self.vcc = m.indexIntoVGPR(0, 0, True) self.vsource_reg0_ext = ila.ite(self.vsrc0 > 255, m.indexIntoVGPR(self.vsrc0 + m.const(0x1, VECTOR_SOURCE_BIT) - m.const(0x100, VECTOR_SOURCE_BIT), threadNo), m.indexIntoSGPR(self.vsrc0 + m.const(0x1,1))) self.vsource_reg1_ext = m.indexIntoVGPR(self.vsrc1 + m.const(0x1, VECTOR_SOURCE_BIT - 1)) self.vdst_reg = m.indexIntoVGPR(self.vdst) self.vdst_reg_ext = m.indexIntoVGPR(self.vdst + m.const(0x1, VECTOR_SOURCE_BIT - 1)) self.vsource_reg0_long = ila.concat(self.vsource_reg0_ext, self.vsource_reg0) self.vsource_reg1_long = ila.concat(self.vsource_reg1_ext, self.vsource_reg1) self.vdst_reg_long = ila.concat(self.vdst_reg, self.vdst_reg_ext) self.nxt_dst_vop2 = ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CNDMASK_B32, ila.ite(self.vcc[threadNo] != 0, self.vsource_reg1, slef.vsource_reg0),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_READLANE_B32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_WRITELANE_B32, , \ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ADD_F32, self.vsource_reg0 + self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUB_F32, self.vsource_reg0 - self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUBREV_F32, self.vsource_reg1 - self.vsource_reg0 ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAC_LEGACY_F32, self.vsource_reg0 * self.vsource_reg1 + self.vdst_reg,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_LEGACY_F32, self.vsource_reg0 * self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_F32, self.vsource_reg0 * self.vsource_reg1.\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_I32_I24, ila.sign_extend((self.vsource_reg0[23:0] * self.vsource_reg1[23:0])[30:0], VECTOR_REG_BITS),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_HI_I32_I24, ila.sign_extend((self.vsource_reg0[23:0] * self.vsource_reg1[23:0])[47:32], VECTOR_REG_BITS),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_U32_U24, (self.vsource_reg0[23:0] * self.vsource_reg1[23:0])[31:0],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MUL_HI_U32_U24, ila.zero_extend((self.vsource_reg0[23:0] * self.vsource_reg1[23:0])[47:32], VECTOR_REG_BITS),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MIN_LEGACY_F32, ila.ite(self.vsource_reg0 < self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAX_LEGACY_F32, ila.ite(self.vsource_reg0 >= self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MIN_F32, ila.ite(self.vsource_reg0 < self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAX_F32, ila.ite(self.vsource_reg0 > self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MIN_I32, ila.ite(self.vsource_reg0 < self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAX_I32, ila.ite(self.vsource_reg0 > self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MIN_U32, ila.ite(self.vsource_reg0 < self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAX_U32, ila.ite(self.vsource_reg0 > self.vsource_reg1, self.vsource_reg0, self.vsource_reg1),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LSHR_B32, self.vsource_reg0 >> self.vsource_reg1[4:0],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LSHRREV_B32, self.vsource_reg1 >> self.vsource_reg0[4:0],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ASHR_I32, ila.ashr(self.vsource_reg0, self.vsource_reg1[4:0]),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ASHRREV_I32, ila.ashr(self.vsource_reg1, self.vsource_reg0[4:0]),\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LSHL_B32, self.vsource_reg0 << self.vsource_reg1[4:0],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LSHLREV_B32, self.vsource_reg1 << self.vsource_reg0[4:0],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_AND_B32, self.vsource_reg0 & self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_OR_B32, self.vsource_reg0 | self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_XOR_B32, self.vsource_reg0 ^ self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_BFM_B32, ((1 << self.vsource_reg0[4:0]) - 1) << self.vsource_reg1[4:0],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MAC_F32, self.vsource_reg0 * self.vsource_reg1 + self.vdst_reg,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MADMK_F32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MADAK_F32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_BCNT_U32_B32, aux_count(self.vsource_reg0, False ,VECTOR_REG_BITS) + self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MBCNT_LO_U32_B32, ,\ #TODO: ThreadMask ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_MBCNT_HI_U32_B32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ADD_I32, self.vsource_reg0 + self.vsource_reg1 ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUB_I32, self.vsource_reg0 - self.vsource_reg1, \ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUBREV_I32, self.vsource_reg1 - self.vsource_reg1,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_ADDC_U32, self.vsource_reg0 + self.vsource_reg1 + self.vcc[threadNo],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUBB_U32, self.vsource_reg0 - self.vsource_reg1 - self.vcc[threadNo],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_SUBBREV_U32, self.vsource_reg1 - self.vsource_reg0 - self.VCC[threadNo],\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_LDEXP_F32, ,\#TODO:EXP ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PKACCUM_U8_F32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PKNORM_I16_F32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PKNORM_U16_F32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PKRTZ_F16_F32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PK_U16_U32, ,\ ila.ite( self.opcode_VOP2 == Encoding.VOP2_V_CVT_PK_I16_I32, ,\ ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) )
def nxtStateFunction(self): ######next state function for pc m = self.model self.pc_nxt_32 = self.pc + m.const(0x1, PC_REG_BITS) self.pc_nxt_64 = self.pc + m.const(0x2, PC_REG_BITS) self.source_reg0 = m.indexIntoSGPR(self.ssrc0) self.source_reg1 = m.indexIntoSGPR(self.ssrc1) self.scc = m.indexIntoSGPR(0, False, True) self.exec = m.indexIntoSGPR(0, False, False, True) self.source_reg0_ext = m.indexIntoSGPR(self.ssrc0 + m.const(0x1)) self.source_reg1_ext = m.indexIntoSGPR(self.ssrc1 + m.const(0x1)) self.dst_reg = m.indexIntoSGPR(self.sdstSOP2) self.dst_reg_ext = m.indexIntoSGPR(self.sdstSOP2 + m.const(0x1)) self.source_reg0_long = ila.concat(self.source_reg0_ext, self.source_reg0) self.source_reg1_long = ila.concat(self.source_reg1_ext, self.source_reg1) self.dst_reg_long = ila.concat(self.dst_reg, self.dst_reg_ext) self.source_reg2_bfe = self.source_reg1[20:16] self.source_reg1_bfe = self.source_reg1[4:0] self.source_reg2_bfe_long = self.source_reg1[22:16] self.source_reg1_bfe_long = self.source_reg1[5:0] self.nxt_dst_sop2 = ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ABSDIFF_I32_OPCODE, \ ila.ite(self.source_reg0 > self.source_reg1, self.source_reg0 - self.source_reg1, self.source_reg1 - self.source_reg0), \ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ADD_I32_OPCODE, self.source_reg0 + self.source_reg1,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ADD_U32_OPCODE, self.source_reg0 + self.source_reg1,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ADDC_U32_OPCODE, self.source_reg0 + self.source_reg1 + self.scc,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_AND_B32_OPCODE, self.source_reg0 & self.source_reg1,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_AND_B64_OPCODE, self.source_reg0_long & self.source_reg1_long,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ANDN2_B32_OPCODE, self.source_reg0 & (~self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ANDN2_B64_OPCODE, self.source_reg0_long & (~self.source_reg1_long),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ASHR_I32_OPCODE, ila.ashr(self.source_reg0, self.source_reg1[4:0]),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ASHR_I64_OPCODE, ila.ashr(self.source_reg0_long, self.source_reg1_long[5:0]),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFE_I32_OPCODE, ila.ite(self.source_reg2_bfe == 0, m.const(0, SCALAR_REG_BITS), ila.ite((self.source_reg2_bfe + self.source_reg1_bfe) < 32, (self.source_reg0 << (SCALAR_REG_BITS - self.source_reg2_bfe - self.source_reg1_bfe)) >> (32 - self.source_reg2_bfe), source_reg0 >> source_reg1_bfe)),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFE_U32_OPCODE, ila.ite(self.source_reg2_bfe == 0, m.const(0, SCALAR_REG_BITS), ila.ite((self.source_reg2_bfe + self.source_reg1_bfe) < 32, (self.source_reg0 << (SCALAR_REG_BITS - self.source_reg2_bfe - self.source_reg1_bfe)) >> (32 - self.source_reg2_bfe), source_reg0 >> source_reg1_bfe)),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFE_I64_OPCODE, (self.source_reg0 >> self.source_reg1_bfe_long) & ((1 << self.source_reg2_bfe_long) - 1), \ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFE_U64_OPCODE, (self.source_reg0 >> self.source_reg1_bfe_long) & ((1 << self.source_reg2_bfe_long) - 1), \ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFM_B32_OPCODE, ((1 << self.source_reg0[4:0]) - 1) << self.source_reg1[4:0],\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_BFM_B64_OPCODE, ((1 << self.source_reg0_long[5:0]) - 1) << self.source_reg1_long[5:0],\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_CBRANCH_G_FORK_OPCODE, ,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_CSELECT_B32_OPCODE, ila.ite(self.scc, self.source_reg0, self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_CSELECT_B64_OPCODE, ila.ite(self.scc, self.source_reg0_long, self.source_reg1_long),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_LSHL_B32_OPCODE, self.source_reg0 << self.source_reg1[4:0],\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_LSHL_B64_OPCODE, self.source_reg0_long << self.source_reg1_long[5:0],\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_LSHR_B32_OPCODE, self.source_reg0 >> self.source_reg0[4:0],\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_LSHR_B64_OPCODE, self.source_reg0_long >> self.source_reg1_long[5:0],\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MAX_I32_OPCODE, ila.ite(self.source_reg0 > self.source_reg1, self.source_reg0, self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MAX_U32_OPCODE, ila.ite(self.source_reg0 > self.source_reg1, self.source_reg0, self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MIN_I32_OPCODE, ila.ite(self.source_reg0 < self.source_reg1, self.source_reg0, self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MIN_U32_OPCODE, ila.ite(self.source_reg0 < self.source_reg1, self.source_reg0, self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_MUL_I32_OPCODE, self.source_reg0 * self.source_reg1 ,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_NAND_B32_OPCODE, ~(self.source_reg0 & self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_NAND_B64_OPCODE, ~(self.source_reg0_long & self.source_reg1_long),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_NOR_B32_OPCODE, ~(self.source_reg0 | self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_NOR_B64_OPCODE, ~(self.source_reg0_long | self.source_reg1_long),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_OR_B32_OPCODE, self.source_reg0 | self.source_reg1,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_OR_B64_OPCODE, self.source_reg0_long | self.source_reg1_long,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ORN2_B32_OPCODE, self.source_reg0 | (~self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_ORN2_B64_OPCODE, self.source_reg0_long | (~self.source_reg1_long),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_SUB_I32_OPCODE, self.source_reg0 - self.source_reg1 ,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_SUB_U32_OPCODE, self.source_reg0 - self.source_reg1,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_SUBB_U32_OPCODE, self.source_reg0 - self.source_reg1 - self.scc,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_XNOR_B32_OPCODE, ~(self.source_reg0 ^ self.source_reg1),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_XNOR_B64_OPCODE, ~(self.source_reg0_long ^ self.source_reg1_long),\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_XOR_B32_OPCODE, self.source_reg0 ^ self.source_reg1,\ ila.ite(self.opcode_SOP2 == Encoding.SOP2_S_XOR_B64_OPCODE, self.source_reg0_long ^ self.source_reg1_long),\ self.dst_reg\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ )\ self.nxt_dst_sop1 = ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_ABS_I32_OPCODE, ile.ite(self.source_reg0 > 0, self.source_reg0, -self.source_reg0),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_AND_SAVEEXEC_B64_OPCODE, self.exec,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_ANDN2_SAVEEXEC_B64_OPCODE, self.exec, \ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BCNT0_I32_B32_OPCODE, aux_count(self.source_reg0, m.const(0x1, 1), m.const(SCALAR_REG_BITS)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BCNT0_I32_B64_OPCODE, aux_count(self.source_reg0, m.const(0x1, 1), m.const(SCALAR_REG_BITS_LONG)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BCNT1_I32_B32_OPCODE, aux_count(self.source_reg0, m.const(0x1, 0), m.const(SCALAR_REG_BITS)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BCNT1_I32_B64_OPCODE, aux_count(self.source_reg0, m.const(0x1, 0), m.const(SCALAR_REG_BITS_LONG)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BITSET0_B32_OPCODE, aux_bit_set_zero(self.dst_reg, self.source_reg0[4:0]),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BITSET0_B64_OPCODE, aux_bit_set_zero(self.dst_reg_long, self.source_reg0_long[5:0]),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BITSET1_B32_OPCODE, aux_bit_set_one(self.dst_reg, self.source_reg0[4:0]),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BITSET1_B64_OPCODE, aux_bit_set_one(self.dst_reg_long, self.source_reg0_long[5:0]),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BREV_B32_OPCODE, aux_bit_rev(self.source_reg0, SCALAR_REG_BITS),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_BREV_B64_OPCODE, aux_bit_rev(self.source_reg0_long, SCALAR_REG_BITS_LONG),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_CBRANCH_JOIN_OPCODE, #TODO ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_CMOV_B32_OPCODE, ila.ite(self.scc, self.source_reg0, ~(self.source_reg0 | self.dst_reg)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_CMOV_B64_OPCODE, ila.ite(self.scc, self.source_reg0_long, ~(self.source_reg0_long | self.dst_reg_long)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FF0_I32_B32_OPCODE, aux_ff_bit(self.source_reg0, SCALAR_REG_BITS, m.const(0x0, 1)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FF1_I32_B32_OPCODE, aux_ff_bit(self.source_reg0, SCALAR_REG_BITS, m.const(0x1, 1)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FF0_I32_B64_OPCODE, aux_ff_bit(self.source_reg0_long, SCALAR_REG_BITS_LONG, m.const(0x0, 1)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FF1_I32_B64_OPCODE, aux_ff_bit(self.source_reg0_long, SCALAR_REG_BITS_LONG, m.const(0x1, 1)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FLBIT_I32_OPCODE, aux_ff_op_bit(self.source_reg0, SCALAR_REG_BITS),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FLBIT_I32_I64_OPCODE, aux_ff_op_bit(self.source_reg0_long, SCALAR_REG_BITS_LONG),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FLBIT_I32_B32_OPCODE, aux_ff_bit_m(self.source_reg0, SCALAR_REG_BITS, m.const(0x1, 1)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_FLBIT_I32_B64_OPCODE, aux_ff_bit_m(self.source_reg0_long, SCALAR_REG_BITS_LONG, m.const(0x1, 1)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_GETPC_B64_OPCODE, self.pc + m.const(0x4, SCALAR_REG_BITS_LONG) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOV_B32_OPCODE, self.source_reg0,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOV_B64_OPCODE, self.source_reg0_long,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOVRELD_B32_OPCODE, #TODO ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOVRELD_B64_OPCODE, #TODO ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOVRELS_B32_OPCODE, #TODO ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOVRELS_B64_OPCODE, #TODO ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_NAND_SAVEEXEC_B64_OPCODE, self.exec, \ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_NOR_SAVEEXEC_B64_OPCODE, self.exec,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_NOT_B32_OPCODE, ~(self.source_reg0),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_NOT_B64_OPCODE, ~(self.source_reg0_long),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_OR_SAVEEXEC_B64_OPCODE, self.exec,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_ORN2_SAVEEXEC_B64_OPCODE, self.exec ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_QUADMASK_B32_OPCODE, ila.zero_extend(aux_quadmask(self.source_reg0, SCALAR_REG_BITS)) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_QUADMASK_B64_OPCODE, ila.zero_extend(aux_quadmask(self.source_reg0_long)),\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_RFE_B64_OPCODE, ila.source_reg0_long,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_WQM_B32_OPCODE, ila.zero_extend(aux_quadmask(self.source_reg0, SCALAR_REG_BITS)) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_WQM_B64_OPCODE, ila.zero_extend(aux_quadmask(self.source_reg0, SCALAR_REG_BITS)) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_SEXT_I32_I8_OPCODE, ila.sign_extend(self.source_reg0[7:0]) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_SEXT_I32_I16_OPCODE, ila.sign_extend(self.source_reg0[15:0]) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_SWAPPC_B64_OPCODE, self.pc + m.const(0x4, SCALAR_REG_BITS_LONG) ,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_MOV_FED_B32_OPCODE, self.source_reg0,\ ila.ite(self,opcode_SOP1 == Encoding.SOP1_S_XOR_SAVEEXEC_B64_OPCODE, self.exec,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_XNOR_SAVEEXEC_B64_OPCODE, self.exec,\ ila.ite(self.opcode_SOP1 == Encoding.SOP1_S_SETPC_B64_OPCODE, self.dst_reg,\ self.dst_reg ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) ) self.nxt_dst_sopk = ila.ite( self.opcode_SOPK == Encoding.SOPK_S_MOVK_I32_OPCODE, ila.sign_extend(self.simm, SCALAR_REG_BITS),\ ila.ite( self.opcode_SOPK == Encoding.SOPK_S_CMOVK_I32_OPCODE, ila.ite(self.scc != m.const(0x1, 1), ila.sign_extend(self.simm, SCALAR_REG_BITS, self.dst_reg)),\ ila.ite( self.opcode_SOPK == Encoding.SOPK_S_ADDK_I32_OPCODE, self.dst_reg + sign_extend(self.simm, SCALAR_REG_BITS),\ ila.ite( self.opcode_SOPK == Encoding.SOPK_S_MULK_I32_OPCODE, self.dst_reg * sign_extend(self.simm, SCALAR_REG_BITS),\ ) ) ) )
def U3(gb): FULL_T = gb.FULL_TRUE FULL_F = gb.FULL_FALSE EMPTY_T = gb.EMPTY_TRUE EMPTY_F = gb.EMPTY_FALSE ############################ decode ################################### decode = (gb.slice_stream_empty == EMPTY_F) & \ ((gb.stencil_stream_full == FULL_F) | \ ((gb.LB2D_shift_x < gb.LB2D_shift_size) & \ (gb.LB2D_shift_x > gb.LB2D_shift_x_0))) \ condLast = gb.LB2D_shift_x == gb.LB2D_shift_x_M ############################ next state functions ##################### # arg_1_TREADY arg_1_TREADY_nxt = gb.arg_1_TREADY gb.arg_1_TREADY_nxt = ila.ite(decode, arg_1_TREADY_nxt, gb.arg_1_TREADY_nxt) # arg_0_TVALID arg_0_TVALID_nxt = gb.arg_0_TVALID gb.arg_0_TVALID_nxt = ila.ite(decode, arg_0_TVALID_nxt, gb.arg_0_TVALID_nxt) # arg_0_TDATA arg_0_TDATA_nxt = gb.arg_0_TDATA gb.arg_0_TDATA_nxt = ila.ite(decode, arg_0_TDATA_nxt, gb.arg_0_TDATA_nxt) # 1-D buffer for input data LB1D_in_nxt = gb.LB1D_in gb.LB1D_in_nxt = ila.ite(decode, LB1D_in_nxt, gb.LB1D_in_nxt) LB1D_uIn_nxt = gb.LB1D_uIn gb.LB1D_uIn_nxt = ila.ite(decode, LB1D_uIn_nxt, gb.LB1D_uIn_nxt) LB1D_buff_nxt = gb.LB1D_buff gb.LB1D_buff_nxt = ila.ite(decode, LB1D_buff_nxt, gb.LB1D_buff_nxt) # pixel position for input data LB1D_p_cnt_nxt = gb.LB1D_p_cnt gb.LB1D_p_cnt_nxt = ila.ite(decode, LB1D_p_cnt_nxt, gb.LB1D_p_cnt_nxt) # in stream full in_stream_full_nxt = gb.in_stream_full gb.in_stream_full_nxt = ila.ite(decode, in_stream_full_nxt, gb.in_stream_full_nxt) # in stream empty in_stream_empty_nxt = gb.in_stream_empty gb.in_stream_empty_nxt = ila.ite(decode, in_stream_empty_nxt, gb.in_stream_empty_nxt) # in stream buffer for i in xrange(0, gb.in_stream_size): in_stream_buff_nxt = gb.in_stream_buff[i] gb.in_stream_buff_nxt[i] = ila.ite(decode, in_stream_buff_nxt, gb.in_stream_buff_nxt[i]) # LB2D proc x idx LB2D_proc_x_nxt = gb.LB2D_proc_x gb.LB2D_proc_x_nxt = ila.ite(decode, LB2D_proc_x_nxt, gb.LB2D_proc_x_nxt) # LB2D proc y idx LB2D_proc_y_nxt = gb.LB2D_proc_y gb.LB2D_proc_y_nxt = ila.ite(decode, LB2D_proc_y_nxt, gb.LB2D_proc_y_nxt) # LB2D proc w idx LB2D_proc_w_nxt = gb.LB2D_proc_w gb.LB2D_proc_w_nxt = ila.ite(decode, LB2D_proc_w_nxt, gb.LB2D_proc_w_nxt) # LB2D proc buffer for i in xrange(0, gb.LB2D_proc_size): LB2D_proc_nxt = gb.LB2D_proc[i] gb.LB2D_proc_nxt[i] = ila.ite(decode, LB2D_proc_nxt, gb.LB2D_proc_nxt[i]) # slice stream full slice_stream_full_nxt = FULL_F gb.slice_stream_full_nxt = ila.ite(decode, slice_stream_full_nxt, gb.slice_stream_full_nxt) # slice stream empty slice_stream_empty_nxt = ila.ite(gb.slice_stream_full == FULL_T, EMPTY_F, EMPTY_T) gb.slice_stream_empty_nxt = ila.ite(decode, slice_stream_empty_nxt, gb.slice_stream_empty_nxt) # slice stream buffer for i in xrange(0, gb.slice_stream_size): slice_stream_buff_nxt = gb.slice_stream_buff[i] gb.slice_stream_buff_nxt[i] = ila.ite(decode, slice_stream_buff_nxt, gb.slice_stream_buff_nxt[i]) # LB2D shift x idx LB2D_shift_x_nxt = ila.ite(condLast, gb.LB2D_shift_x_0, gb.LB2D_shift_x + gb.LB2D_shift_x_1) gb.LB2D_shift_x_nxt = ila.ite(decode, LB2D_shift_x_nxt, gb.LB2D_shift_x_nxt) # LB2D shift y idx LB2D_shift_y_nxt = ila.ite( gb.LB2D_shift_y < gb.LB2D_shift_y_M, ila.ite(gb.LB2D_shift_x < gb.LB2D_shift_x_M, gb.LB2D_shift_y, gb.LB2D_shift_y + gb.LB2D_shift_y_1), gb.LB2D_shift_y_M) gb.LB2D_shift_y_nxt = ila.ite(decode, LB2D_shift_y_nxt, gb.LB2D_shift_y_nxt) # LB2D shift buffer in_slice = ila.ite(gb.slice_stream_full == FULL_T, gb.slice_stream_buff[gb.slice_stream_size - 1], gb.slice_stream_buff[0]) LB2D_shift_7_nxt = in_slice gb.LB2D_shift_nxt[gb.LB2D_shift_size-1] = \ ila.ite (decode, LB2D_shift_7_nxt, gb.LB2D_shift_nxt[gb.LB2D_shift_size-1]) for i in xrange(0, gb.LB2D_shift_size - 1): LB2D_shift_i_nxt = gb.LB2D_shift[i + 1] gb.LB2D_shift_nxt[i] = ila.ite(decode, LB2D_shift_i_nxt, gb.LB2D_shift_nxt[i]) passToStencil = (gb.LB2D_shift_x >= gb.LB2D_shift_size) | \ (gb.LB2D_shift_x == gb.LB2D_shift_x_0) # stencil_stream_full stencil_stream_full_nxt = ila.ite( ~passToStencil, gb.stencil_stream_full, ila.ite(gb.stencil_stream_empty == EMPTY_T, FULL_F, FULL_T)) gb.stencil_stream_full_nxt = ila.ite(decode, stencil_stream_full_nxt, gb.stencil_stream_full_nxt) # stencil_stream_empty stencil_stream_empty_nxt = ila.ite(~passToStencil, gb.stencil_stream_empty, EMPTY_F) gb.stencil_stream_empty_nxt = ila.ite(decode, stencil_stream_empty_nxt, gb.stencil_stream_empty_nxt) # stencil_stream_buff def genRows(idx): l = gb.DATA_SIZE * idx h = l + gb.DATA_SIZE - 1 res = ila.concat([ in_slice[h:l], gb.LB2D_shift[7][h:l], gb.LB2D_shift[6][h:l], gb.LB2D_shift[5][h:l], gb.LB2D_shift[4][h:l], gb.LB2D_shift[3][h:l], gb.LB2D_shift[2][h:l], gb.LB2D_shift[1][h:l], gb.LB2D_shift[0][h:l] ]) return res stencil_rows = [] stencil_rows.append(genRows(gb.LB2D_shift_size)) for i in xrange(gb.LB2D_shift_size - 1, -1, -1): stencil_rows.append(genRows(i)) stencil_stream_buff_0_nxt = ila.ite(passToStencil, ila.concat(stencil_rows), gb.stencil_stream_buff[0]) gb.stencil_stream_buff_nxt[0] = ila.ite(decode, stencil_stream_buff_0_nxt, gb.stencil_stream_buff_nxt[0]) for i in xrange(1, gb.stencil_stream_size): stencil_stream_buff_i_nxt = gb.stencil_stream_buff[i - 1] gb.stencil_stream_buff_nxt[i] = ila.ite(decode & passToStencil, stencil_stream_buff_i_nxt, gb.stencil_stream_buff_nxt[i]) # gb_p_cnt gb_p_cnt_nxt = gb.gb_p_cnt gb.gb_p_cnt_nxt = ila.ite(decode, gb_p_cnt_nxt, gb.gb_p_cnt_nxt) # gb_pp_it for i in xrange(0, gb.gb_pp_size): gb_pp_it_i_nxt = gb.gb_pp_it[i] gb.gb_pp_it_nxt[i] = ila.ite(decode, gb_pp_it_i_nxt, gb.gb_pp_it_nxt[i]) # gb_exit_it for i in xrange(0, gb.gb_exit_size): gb_exit_it_i_nxt = gb.gb_exit_it[i] gb.gb_exit_it_nxt[i] = ila.ite(decode, gb_exit_it_i_nxt, gb.gb_exit_it_nxt[i])
def U2(gb): m = gb.abst READY_T = gb.READY_TRUE READY_F = gb.READY_FALSE VALID_T = gb.VALID_TRUE VALID_F = gb.VALID_FALSE FULL_T = gb.FULL_TRUE FULL_F = gb.FULL_FALSE EMPTY_T = gb.EMPTY_TRUE EMPTY_F = gb.EMPTY_FALSE ############################ decode ################################### decode = (gb.in_stream_empty == EMPTY_F) & \ ((gb.slice_stream_full == FULL_F) | \ (gb.LB2D_proc_y < gb.LB2D_proc_size)) ############################ next state functions ##################### # arg_1_TREADY arg_1_TREADY_nxt = gb.arg_1_TREADY gb.arg_1_TREADY_nxt = ila.ite(decode, arg_1_TREADY_nxt, gb.arg_1_TREADY_nxt) # arg_0_TVALID arg_0_TVALID_nxt = gb.arg_0_TVALID gb.arg_0_TVALID_nxt = ila.ite(decode, arg_0_TVALID_nxt, gb.arg_0_TVALID_nxt) # arg_0_TDATA arg_0_TDATA_nxt = gb.arg_0_TDATA gb.arg_0_TDATA_nxt = ila.ite(decode, arg_0_TDATA_nxt, gb.arg_0_TDATA_nxt) # 1-D buffer for input data LB1D_in_nxt = gb.LB1D_in gb.LB1D_in_nxt = ila.ite(decode, LB1D_in_nxt, gb.LB1D_in_nxt) LB1D_uIn_nxt = gb.LB1D_uIn gb.LB1D_uIn_nxt = ila.ite(decode, LB1D_uIn_nxt, gb.LB1D_uIn_nxt) LB1D_buff_nxt = gb.LB1D_buff gb.LB1D_buff_nxt = ila.ite(decode, LB1D_buff_nxt, gb.LB1D_buff_nxt) # pixel position for input data LB1D_p_cnt_nxt = gb.LB1D_p_cnt gb.LB1D_p_cnt_nxt = ila.ite(decode, LB1D_p_cnt_nxt, gb.LB1D_p_cnt_nxt) # in stream full in_stream_full_nxt = FULL_F gb.in_stream_full_nxt = ila.ite(decode, in_stream_full_nxt, gb.in_stream_full_nxt) # in stream empty in_stream_empty_nxt = ila.ite(gb.in_stream_full == FULL_T, EMPTY_F, EMPTY_T) gb.in_stream_empty_nxt = ila.ite(decode, in_stream_empty_nxt, gb.in_stream_empty_nxt) # in stream buffer for i in xrange(0, gb.in_stream_size): in_stream_buff_nxt = gb.in_stream_buff[i] gb.in_stream_buff_nxt[i] = ila.ite(decode, in_stream_buff_nxt, gb.in_stream_buff_nxt[i]) # LB2D proc x idx LB2D_proc_x_nxt = ila.ite(gb.LB2D_proc_x == gb.LB2D_proc_x_M, gb.LB2D_proc_x_1, gb.LB2D_proc_x + gb.LB2D_proc_x_1) gb.LB2D_proc_x_nxt = ila.ite(decode, LB2D_proc_x_nxt, gb.LB2D_proc_x_nxt) # LB2D proc y idx LB2D_proc_y_nxt = ila.ite( gb.LB2D_proc_x == gb.LB2D_proc_x_M, ila.ite(gb.LB2D_proc_y == gb.LB2D_proc_y_M, gb.LB2D_proc_y_0, gb.LB2D_proc_y + gb.LB2D_proc_y_1), gb.LB2D_proc_y) gb.LB2D_proc_y_nxt = ila.ite(decode, LB2D_proc_y_nxt, gb.LB2D_proc_y_nxt) # LB2D proc w idx LB2D_proc_w_nxt = ila.ite( gb.LB2D_proc_x == gb.LB2D_proc_x_M, ila.ite(gb.LB2D_proc_w == gb.LB2D_proc_w_M, gb.LB2D_proc_w_0, gb.LB2D_proc_w + gb.LB2D_proc_w_1), gb.LB2D_proc_w) gb.LB2D_proc_w_nxt = ila.ite(decode, LB2D_proc_w_nxt, gb.LB2D_proc_w_nxt) # LB2D proc buffer in_byte = ila.ite(gb.in_stream_full == FULL_T, gb.in_stream_buff[gb.in_stream_size - 1], gb.in_stream_buff[0]) for i in xrange(0, gb.LB2D_proc_size): LB2D_proc_nxt = ila.ite( gb.LB2D_proc_w == i, ila.store(gb.LB2D_proc[i], gb.LB2D_proc_x - gb.LB2D_proc_x_1, in_byte), gb.LB2D_proc[i]) gb.LB2D_proc_nxt[i] = ila.ite(decode, LB2D_proc_nxt, gb.LB2D_proc_nxt[i]) # slice stream full slice_stream_full_nxt = ila.ite( gb.LB2D_proc_y < gb.LB2D_proc_size, FULL_F, ila.ite(gb.slice_stream_empty == EMPTY_T, FULL_F, FULL_T)) gb.slice_stream_full_nxt = ila.ite(decode, slice_stream_full_nxt, gb.slice_stream_full_nxt) # slice stream empty slice_stream_empty_nxt = ila.ite(gb.LB2D_proc_y < gb.LB2D_proc_size, EMPTY_T, EMPTY_F) gb.slice_stream_empty_nxt = ila.ite(decode, slice_stream_empty_nxt, gb.slice_stream_empty_nxt) # slice stream buffer def sliceSelect(start, seqs): assert (len(seqs) == gb.LB2D_proc_size) def sliceSelectOne(modCase): idx = seqs[modCase] if modCase == gb.LB2D_proc_size - 1: return ila.load(gb.LB2D_proc[idx], gb.LB2D_proc_x - gb.LB2D_proc_x_1) else: return ila.ite( start == modCase, ila.load(gb.LB2D_proc[idx], gb.LB2D_proc_x - gb.LB2D_proc_x_1), sliceSelectOne(modCase + 1)) return sliceSelectOne(0) def genSliceSeqs(start): assert (start <= gb.LB2D_proc_size) res = [] for i in xrange(0, gb.LB2D_proc_size): res.append((start + i) % gb.LB2D_proc_size) return res slice_seqs = [] for i in xrange(0, gb.LB2D_proc_size): slice_seqs.append(genSliceSeqs(i)) """ slice_seq_7 = [7, 0, 1, 2, 3, 4, 5, 6] slice_seq_6 = [6, 7, 0, 1, 2, 3, 4, 5] slice_seq_5 = [5, 6, 7, 0, 1, 2, 3, 4] slice_seq_4 = [4, 5, 6, 7, 0, 1, 2, 3] slice_seq_3 = [3, 4, 5, 6, 7, 0, 1, 2] slice_seq_2 = [2, 3, 4, 5, 6, 7, 0, 1] slice_seq_1 = [1, 2, 3, 4, 5, 6, 7, 0] slice_seq_0 = [0, 1, 2, 3, 4, 5, 6, 7] """ slice_chunks = [in_byte] for i in xrange(7, -1, -1): slice_chunks.append(sliceSelect(gb.LB2D_proc_w, slice_seqs[i])) # slice_stream_buff slice_stream_buff_0_nxt = ila.ite(gb.LB2D_proc_y < gb.LB2D_proc_size, gb.slice_stream_buff[0], ila.concat(slice_chunks)) gb.slice_stream_buff_nxt[0] = ila.ite(decode, slice_stream_buff_0_nxt, gb.slice_stream_buff_nxt[0]) for i in xrange(1, gb.slice_stream_size): slice_stream_buff_i_nxt = ila.ite(gb.LB2D_proc_y < gb.LB2D_proc_size, gb.slice_stream_buff[i], gb.slice_stream_buff[i - 1]) gb.slice_stream_buff_nxt[i] = ila.ite(decode, slice_stream_buff_i_nxt, gb.slice_stream_buff_nxt[i]) # LB2D shift x idx LB2D_shift_x_nxt = gb.LB2D_shift_x gb.LB2D_shift_x_nxt = ila.ite(decode, LB2D_shift_x_nxt, gb.LB2D_shift_x_nxt) # LB2D shift y idx LB2D_shift_y_nxt = gb.LB2D_shift_y gb.LB2D_shift_y_nxt = ila.ite(decode, LB2D_shift_y_nxt, gb.LB2D_shift_y_nxt) # LB2D shift buffer for i in xrange(0, gb.LB2D_shift_size): LB2D_shift_nxt = gb.LB2D_shift[i] gb.LB2D_shift_nxt[i] = ila.ite(decode, LB2D_shift_nxt, gb.LB2D_shift_nxt[i]) # stencil_stream_full stencil_stream_full_nxt = gb.stencil_stream_full gb.stencil_stream_full_nxt = ila.ite(decode, stencil_stream_full_nxt, gb.stencil_stream_full_nxt) # stencil_stream_empty stencil_stream_empty_nxt = gb.stencil_stream_empty gb.stencil_stream_empty_nxt = ila.ite(decode, stencil_stream_empty_nxt, gb.stencil_stream_empty_nxt) # stencil_stream_buff for i in xrange(0, gb.stencil_stream_size): stencil_stream_buff_nxt = gb.stencil_stream_buff[i] gb.stencil_stream_buff_nxt[i] = ila.ite(decode, stencil_stream_buff_nxt, gb.stencil_stream_buff_nxt[i]) # gb_p_cnt gb_p_cnt_nxt = gb.gb_p_cnt gb.gb_p_cnt_nxt = ila.ite(decode, gb_p_cnt_nxt, gb.gb_p_cnt_nxt) # gb_pp_it for i in xrange(0, gb.gb_pp_size): gb_pp_it_i_nxt = gb.gb_pp_it[i] gb.gb_pp_it_nxt[i] = ila.ite(decode, gb_pp_it_i_nxt, gb.gb_pp_it_nxt[i]) # gb_exit_it for i in xrange(0, gb.gb_exit_size): gb_exit_it_i_nxt = gb.gb_exit_it[i] gb.gb_exit_it_nxt[i] = ila.ite(decode, gb_exit_it_i_nxt, gb.gb_exit_it_nxt[i])
def cat(L): return ila.concat(L)
def aux_quadmask(self, src_reg, bits): result = (src_reg[0] | src_reg[1] | src_reg[2] | src_reg[3]) for i in range(bits / 4 - 1): result = ila.concat((src_reg[i * 4 + 4] | src_reg[i * 4 + 5] | src_reg[i * 4 + 6] | src_reg[i * 4 + 7]),result) return result
def createSHAILA(synstates, enable_ps): m = ila.Abstraction("sha") m.enable_parameterized_synthesis = enable_ps # I/O interface: this is where commands come from. cmd = m.inp('cmd', 2) cmdaddr = m.inp('cmdaddr', 16) cmddata = m.inp('cmddata', 8) # response. dataout = m.reg('dataout', 8) # internal arch state. state = m.reg('sha_state', 3) rdaddr = m.reg('sha_rdaddr', 16) wraddr = m.reg('sha_wraddr', 16) oplen = m.reg('sha_len', 16) # for the uinst. bytes_read = m.reg('sha_bytes_read', 16) rd_data = m.reg('sha_rd_data', 512) hs_data = m.reg('sha_hs_data', 160) xram = m.mem('XRAM', 16, 8) sha = m.fun('sha', 160, [512]) # fetch is just looking at the input command. m.fetch_expr = ila.concat([state, cmd, cmdaddr, cmddata]) m.fetch_valid = (cmd == 1) | (cmd == 2) # decode rdcmds = [(state == i) & (cmd == 1) & (cmdaddr == addr) for addr in xrange(0xfe00, 0xfe10) for i in [0, 1, 2, 3, 4]] wrcmds = [(state == 0) & (cmd == 2) & (cmdaddr == addr) for addr in xrange(0xfe00, 0xfe10)] nopcmds = [(state == i) & (cmd != 1) & (cmdaddr == addr) for addr in xrange(0xfe00, 0xfe10) for i in [1, 2, 3, 4]] m.decode_exprs = rdcmds + wrcmds + nopcmds # read commands. statebyte = ila.zero_extend(state, 8) rdaddrbyte = ila.readchunk('rd_addr', rdaddr, 8) wraddrbyte = ila.readchunk('wr_addr', wraddr, 8) oplenbyte = ila.readchunk('op_len', oplen, 8) dataoutnext = ila.choice( 'dataout', [statebyte, rdaddrbyte, wraddrbyte, oplenbyte, m.const(0, 8)]) m.set_next('dataout', dataoutnext) # write commands. def mb_reg_wr(name, reg): # multibyte register write. reg_wr = ila.writechunk('wr_' + name, reg, cmddata) reg_nxt = ila.choice('nxt_' + name, [reg_wr, reg]) m.set_next(name, reg_nxt) mb_reg_wr('sha_rdaddr', rdaddr) mb_reg_wr('sha_wraddr', wraddr) mb_reg_wr('sha_len', oplen) # state state_next = ila.choice('state_next', [ m.const(0, 3), m.const(1, 3), m.const(2, 3), m.const(3, 3), m.const(4, 3), ila.ite(cmddata == 1, m.const(1, 3), state), ila.ite(bytes_read < oplen, m.const(1, 3), m.const(4, 3)) ]) m.set_next('sha_state', state_next) # these are for the uinst # bytes_read #bytes_read_inc = ila.ite(bytes_read+64 <= oplen, bytes_read+64, oplen) bytes_read_inc = bytes_read + 64 bytes_read_rst = ila.ite(cmddata == 1, m.const(0, 16), bytes_read) bytes_read_nxt = ila.choice( 'bytes_read_nxt', [m.const(0, 16), bytes_read_inc, bytes_read_rst, bytes_read]) m.set_next('sha_bytes_read', bytes_read_nxt) # rd_data rdblock_little = ila.loadblk(xram, rdaddr + bytes_read, 64) rdblock_big = ila.loadblk_big(xram, rdaddr + bytes_read, 64) rd_data_nxt = ila.choice('rd_data_nxt', rdblock_big, rdblock_little, rd_data) m.set_next('sha_rd_data', rd_data_nxt) # hs_data sha_hs_data = ila.appfun(sha, [rd_data]) hs_data_nxt = ila.choice('hs_data_nxt', sha_hs_data, hs_data) m.set_next('sha_hs_data', hs_data_nxt) # xram write xram_w_sha_little = ila.storeblk(xram, wraddr, hs_data) xram_w_sha_big = ila.storeblk_big(xram, wraddr, hs_data) xram_nxt = ila.choice('xram_nxt', xram, xram_w_sha_little, xram_w_sha_big) m.set_next('XRAM', xram_nxt) suffix = 'en' if enable_ps else 'dis' timefile = open('sha-times-%s.txt' % suffix, 'wt') t_elapsed = 0 # synthesis. sim = lambda s: SHA().simulate(s) for s in synstates: st = time.clock() m.synthesize(s, sim) dt = time.clock() - st print >> timefile, '%s %.2f' % (s, dt) t_elapsed += dt ast = m.get_next(s) m.exportOne(ast, 'asts/%s_%s' % (s, suffix)) print 'time: %.2f' % t_elapsed #m.generateSim('tmp/shasim.hpp') m.generateSimToDir('sim')