def TestBits(): import corepy.arch.ppc.platform as env prgm = env.Program() code = prgm.get_stream() proc = env.Processor() ppc.set_active_code(code) b = Bits(0xB0) e = Bits(0xE0000) a = Bits(0xCA) f = Bits(0x5) x = Bits(0, reg = prgm.gp_return) mask = Bits(0xF) byte = Bits(8) # 8 bits halfbyte = Bits(4) f.v = (a & mask) ^ f x.v = (b << byte) | (e >> byte) | ((a & mask) << halfbyte) | (f | mask) prgm.add(code) r = proc.execute(prgm) assert(r == 0xBEAF) return
def TestRange(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = vars.UnsignedWord(0) for i in syn_range(code, 7): a.v = a + 1 for i in syn_range(code, 20, 31): a.v = a + 1 for i in syn_range(code, 20, 26, 2): a.v = a + 1 util.return_var(a) a.release_register(code) proc = synppc.Processor() r = proc.execute(code) # print 'should be 21:', r assert (r == 21) return
def TestRange(): prgm = synppc.Program() code = prgm.get_stream() prgm.add(code) ppc.set_active_code(code) a = vars.UnsignedWord(0) for i in syn_range(code, 7): a.v = a + 1 for i in syn_range(code, 20, 31): a.v = a + 1 for i in syn_range(code, 20, 26, 2): a.v = a + 1 util.return_var( a) #a.release_register(code) proc = synppc.Processor() r = proc.execute(prgm) # print 'should be 21:', r assert(r == 21) return
def TestVarIter(): prgm = synppc.Program() code = prgm.get_stream() prgm.add(code) ppc.set_active_code(code) a = array.array('I', range(4)) for i in var_iter(code, a): i.v = i + 10 ai = array.array('i', range(4)) for i in var_iter(code, ai): i.v = i + 10 # b = array.array('H', range(4)) # for i in var_iter(code, b): # i.v = i + 10 # bi = array.array('h', range(4)) # for i in var_iter(code, bi): # i.v = i + 10 # c = array.array('B', range(4)) # for i in var_iter(code, c): # i.v = i + 10 # ci = array.array('b', range(4)) # for i in var_iter(code, ci): # i.v = i + 10 f = array.array('f', range(4)) f10 = vars.SingleFloat(10.0) for i in var_iter(code, f): i.v = i + f10 d = array.array('d', range(4)) d10 = vars.DoubleFloat(10.0) for i in var_iter(code, d): i.v = i + d10 proc = synppc.Processor() r = proc.execute(prgm) _array_check(a) _array_check(ai) # print b # print bi # print c # print ci _array_check(f) _array_check(d) # print 'TODO: Implememnt the rest of the integer types (or have a clean way of upcasting to signed/unsigned int)' return
def TestVecIter(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = array.array('I', range(16)) for i in vector_iter(code, a): i.v = vmx.vadduws.ex(i, i) ai = array.array('i', range(16)) for i in vector_iter(code, ai): i.v = vmx.vaddsws.ex(i, i) b = array.array('H', range(16)) for i in vector_iter(code, b): i.v = vmx.vadduhs.ex(i, i) bi = array.array('h', range(16)) for i in vector_iter(code, bi): i.v = vmx.vaddshs.ex(i, i) c = array.array('B', range(16)) for i in vector_iter(code, c): i.v = vmx.vaddubs.ex(i, i) ci = array.array('b', range(16)) for i in vector_iter(code, ci): i.v = vmx.vaddsbs.ex(i, i) ften = vmx_vars.BitType(10.0) f = array.array('f', range(16)) for i in vector_iter(code, f): i.v = vmx.vaddfp.ex(i, i) proc = synppc.Processor() r = proc.execute(code) expected = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] _array_check(a, expected) _array_check(ai, expected) _array_check(b, expected) _array_check(bi, expected) _array_check(c, expected) _array_check(ci, expected) _array_check(f, expected) return
def TestExternalStop(): prgm = synppc.Program() code = prgm.get_stream() prgm.add(code) ppc.set_active_code(code) # Data data = array.array('d', range(5*5)) # Constants - read only n_rows = vars.SignedWord(5) n_cols = vars.SignedWord(5) addr = vars.SignedWord(data.buffer_info()[0]) dbl_size = vars.SignedWord(synppc.WORD_SIZE * 2) row_bytes = vars.SignedWord(synppc.WORD_SIZE * 5 * 2) # Variables - read/write sum = vars.DoubleFloat(0.0) x = vars.DoubleFloat(0.0) offset = vars.SignedWord(0) # Iterators i_iter = syn_iter(code, 0, mode = INC) i_iter.set_external_stop(n_rows.reg) j_ctr = syn_iter(code, 0, mode = CTR) j_ctr.set_external_stop(n_cols.reg) for i in i_iter: offset.v = vars.SignedWord.cast(i) * row_bytes # Note that j_cnt is unreadable since it's in the ctr register for j_cnt in j_ctr: # Load the next vaule in the matrix ppc.lfdx(x, addr, offset) sum.v = vars.fmadd(x, x, sum) # sum += x*x offset.v = offset + dbl_size # code.add(ppc.Illegal()) util.return_var(sum) proc = synppc.Processor() r = proc.execute(prgm, mode = 'fp') # print 'Test external stop: ', r assert(r == 4900.0) return
def TestVecIter(): prgm = synppc.Program() code = prgm.get_stream() prgm.add(code) ppc.set_active_code(code) a = extarray.extarray('I', range(16)) for i in vector_iter(code, a): i.v = vmx.vadduws.ex(i, i) ai = extarray.extarray('i', range(16)) for i in vector_iter(code, ai): i.v = vmx.vaddsws.ex(i, i) b = extarray.extarray('H', range(16)) for i in vector_iter(code, b): i.v = vmx.vadduhs.ex(i, i) bi = extarray.extarray('h', range(16)) for i in vector_iter(code, bi): i.v = vmx.vaddshs.ex(i, i) c = extarray.extarray('B', range(16)) for i in vector_iter(code, c): i.v = vmx.vaddubs.ex(i, i) ci = extarray.extarray('b', range(16)) for i in vector_iter(code, ci): i.v = vmx.vaddsbs.ex(i, i) ften = vmx_vars.BitType(10.0) f = extarray.extarray('f', range(16)) for i in vector_iter(code, f): i.v = vmx.vaddfp.ex(i, i) proc = synppc.Processor() r = proc.execute(prgm) expected = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30] _array_check(a, expected) _array_check(ai, expected) _array_check(b, expected) _array_check(bi, expected) _array_check(c, expected) _array_check(ci, expected) _array_check(f, expected) return
def TestCodedCall(): code = InstructionStream() proc = Processor() a = array.array("d", [3.14]) load_word(code, code.gp_return, a.buffer_info()[0]) ppc.set_active_code(code) ppc.lfd(code.fp_return, code.gp_return, 0) code.print_code() r = proc.execute(code, mode="fp", debug=True) assert r == 3.14 print "float result:", r return
def TestCodedCall(): code = InstructionStream() proc = Processor() a = array.array('d', [3.14]) load_word(code, code.gp_return, a.buffer_info()[0]) ppc.set_active_code(code) ppc.lfd(code.fp_return, code.gp_return, 0) code.print_code() r = proc.execute(code, mode='fp', debug=True) assert (r == 3.14) print 'float result:', r return
def TestMemoryDesc(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = array.array('I', range(4)) m = memory_desc('I', a.buffer_info()[0], 4) for i in var_iter(code, m): i.v = i + 10 proc = synppc.Processor() r = proc.execute(code) _array_check(a) return
def TestExtended(): class Add10(spe.ExtendedInstruction): isa_module = ppc def __init__(self, d, value): self.d = d self.value = value spe.ExtendedInstruction.__init__(self) return def block(self): for i in range(10): ppc.addi(self.d, self.d, self.value) return code = InstructionStream() proc = Processor() # Using code.add code.add(ppc.addi(code.gp_return, 0, 0)) code.add(Add10(code.gp_return, 1)) Add10.ex(1).eval(code, reg=code.gp_return) code.print_code() r = proc.execute(code) print r assert (r == 20) # Using active code code.reset() ppc.set_active_code(code) ppc.addi(code.gp_return, 0, 0) Add10(code.gp_return, 1) Add10.ex(1).eval(ppc.get_active_code(), reg=code.gp_return) code.print_code() r = proc.execute(code) print r assert (r == 20) return
def TestExtended(): class Add10(spe.ExtendedInstruction): isa_module = ppc def __init__(self, d, value): self.d = d self.value = value spe.ExtendedInstruction.__init__(self) return def block(self): for i in range(10): ppc.addi(self.d, self.d, self.value) return code = InstructionStream() proc = Processor() # Using code.add code.add(ppc.addi(code.gp_return, 0, 0)) code.add(Add10(code.gp_return, 1)) Add10.ex(1).eval(code, reg=code.gp_return) code.print_code() r = proc.execute(code) print r assert r == 20 # Using active code code.reset() ppc.set_active_code(code) ppc.addi(code.gp_return, 0, 0) Add10(code.gp_return, 1) Add10.ex(1).eval(ppc.get_active_code(), reg=code.gp_return) code.print_code() r = proc.execute(code) print r assert r == 20 return
def TestMemoryDesc(): prgm = synppc.Program() code = prgm.get_stream() prgm.add(code) ppc.set_active_code(code) a = array.array('I', range(4)) m = memory_desc('I', a.buffer_info()[0], 4) for i in var_iter(code, m): i.v = i + 10 proc = synppc.Processor() r = proc.execute(prgm) _array_check(a) return
def synthesize(self, code, tB, N): """ Extract a block from B and pack it for fast access. tB is transposed. """ old_code = ppc.get_active_code() ppc.set_active_code(code) code.add_storage(tB) self._init_constants(code, tB, N) self._init_vars() self._load_params() self._pack_b(code) ppc.set_active_code(old_code) return
def SimpleTest(): """ Just make sure things are working... """ import corepy.arch.ppc.platform as env prgm = env.Program() code = prgm.get_stream() proc = env.Processor() prgm.add(code) # Without active code a = SignedWord(11, code) b = SignedWord(31, code, reg = code.prgm.acquire_register()) c = SignedWord(code = code, reg = prgm.gp_return) byte_mask = Bits(0xFF, code) code.add(ppc.addi(prgm.gp_return, 0, 31)) # c.v = a + SignedWord.cast(b & byte_mask) + 12 c.v = a + (byte_mask & b) + 12 if True: r = proc.execute(prgm) assert(r == (42 + 12)) # With active code code.reset() ppc.set_active_code(code) a = SignedWord(11) b = SignedWord(31) c = SignedWord(reg = prgm.gp_return) byte_mask = Bits(0xFF) c.v = a + (b & byte_mask) ppc.set_active_code(None) r = proc.execute(prgm) # code.print_code() assert(r == 42) return
def synthesize(self, prgm, tB, M, K, N, kc, nc, mr=1, nr=1): code = prgm.get_stream() old_code = ppc.get_active_code() ppc.set_active_code(code) gepb = SynGEPB(self.gepb_mode) packb = SynPackB() gepb._init_constants(M, K, N, kc, nc, mr, nr, True) packb._init_constants(prgm, tB, N) gepb._init_vars() # Reuse the C/C_aux registers for B. They are set in init pointers. packb._init_vars2(gepb.p_C, gepb.c[0][0], gepb.r_tB_addr) gepb._load_params() packb._load_params(pvB=7) # kN = k * N * 8 # for j in range(0, N * 8, nc * 8): for j in syn_iter(code, N, nc): # # Pack B into tB -- tB1.transpose(B[k:k+kc, j:j+nc]) # pack_params.p1 = B_addr + kN + j # (k * N + j) * 8 packb.vN.v = N packb._pack_b(code) # proc.execute(cgepb, params = pm) gepb._init_pointers() gepb._gepb(code) # pm.p3 += nc8 gepb.r_C_addr.v = gepb.r_C_addr + nc * 8 packb.vB.v = packb.vB + nc * 8 # /end for j ppc.set_active_code(old_code) return
def synthesize(self, prgm, tB, M, K, N, kc, nc, mr = 1, nr = 1): code = prgm.get_stream() old_code = ppc.get_active_code() ppc.set_active_code(code) gepb = SynGEPB(self.gepb_mode) packb = SynPackB() gepb._init_constants(M, K, N, kc, nc, mr, nr, True) packb._init_constants(prgm, tB, N) gepb._init_vars() # Reuse the C/C_aux registers for B. They are set in init pointers. packb._init_vars2(gepb.p_C, gepb.c[0][0], gepb.r_tB_addr) gepb._load_params() packb._load_params(pvB = 7) # kN = k * N * 8 # for j in range(0, N * 8, nc * 8): for j in syn_iter(code, N, nc): # # Pack B into tB -- tB1.transpose(B[k:k+kc, j:j+nc]) # pack_params.p1 = B_addr + kN + j # (k * N + j) * 8 packb.vN.v = N packb._pack_b(code) # proc.execute(cgepb, params = pm) gepb._init_pointers() gepb._gepb(code) # pm.p3 += nc8 gepb.r_C_addr.v = gepb.r_C_addr + nc * 8 packb.vB.v = packb.vB + nc * 8 # /end for j ppc.set_active_code(old_code) return
def synthesize(self, prgm, tB, N): """ Extract a block from B and pack it for fast access. tB is transposed. """ code = prgm.get_stream() old_code = ppc.get_active_code() ppc.set_active_code(code) prgm.add_storage(tB) self._init_constants(prgm, tB, N) self._init_vars() self._load_params() self._pack_b(code) ppc.set_active_code(old_code) return
def TestZipIter(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = array.array('I', range(16, 32)) b = array.array('I', range(32, 48)) c = array.array('I', [0 for i in range(16)]) sum = vars.UnsignedWord(0) for i, j, k in zip_iter(code, var_iter(code, a), var_iter(code, b), var_iter(code, c, store_only=True)): k.v = i + j sum.v = sum + 1 av = vector_iter(code, array.array('I', range(16))) bv = vector_iter(code, array.array('I', range(16, 32))) cv = vector_iter(code, array.array('I', [0 for i in range(16)]), store_only=True) for i, j, k in zip_iter(code, av, bv, cv): k.v = vmx.vadduws.ex(i, j) # i + j util.return_var(sum) proc = synppc.Processor() r = proc.execute(code) assert (r == 16) print a print b print c print av.data print bv.data print cv.data print 'TODO: Finish checking TestZipIter values' return
def synthesize(self, prgm, M, K, N, kc, nc, mr = 1, nr = 1, _transpose = False): """ tA is M x nc tB is nc x kc C is M x nc I is the current block column in C """ code = prgm.get_stream() old_code = ppc.get_active_code() ppc.set_active_code(code) self._init_constants(M, K, N, kc, nc, mr, nr, _transpose) self._init_vars() self._load_params() self._init_pointers() self._gepb(code) ppc.set_active_code(old_code) return
def synthesize(self, code, M, K, N, kc, nc, mr = 1, nr = 1, _transpose = False): """ tA is M x nc tB is nc x kc C is M x nc I is the current block column in C """ old_code = ppc.get_active_code() ppc.set_active_code(code) self._init_constants(M, K, N, kc, nc, mr, nr, _transpose) self._init_vars() self._load_params() self._init_pointers() self._gepb(code) ppc.set_active_code(old_code) return
def TestZipIter(): prgm = synppc.Program() code = prgm.get_stream() ppc.set_active_code(code) prgm.add(code) a = extarray.extarray('I', range(16, 32)) b = extarray.extarray('I', range(32, 48)) c = extarray.extarray('I', [0 for i in range(16)]) sum = vars.UnsignedWord(0) for i, j, k in zip_iter(code, var_iter(code, a), var_iter(code, b), var_iter(code, c, store_only = True)): k.v = i + j sum.v = sum + 1 av = vector_iter(code, extarray.extarray('I', range(16))) bv = vector_iter(code, extarray.extarray('I', range(16, 32))) cv = vector_iter(code, extarray.extarray('I', [0 for i in range(16)]), store_only = True) for i, j, k in zip_iter(code, av, bv, cv): k.v = vmx.vadduws.ex(i, j) # i + j util.return_var(sum) proc = synppc.Processor() r = proc.execute(prgm, mode = 'int') assert(r == 16) print a print b print c print av.data print bv.data print cv.data print 'TODO: Finish checking TestZipIter values' return
def TestNestedIter(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = vars.UnsignedWord(0) for i in syn_iter(code, 5): for j in syn_iter(code, 5): for k in syn_iter(code, 5): a.v = a + i + j + k util.return_var(a) a.release_register() proc = synppc.Processor() r = proc.execute(code) # print 'should be 750:', r assert (r == 750) return
def TestNestedIter(): prgm = synppc.Program() code = prgm.get_stream() prgm.add(code) ppc.set_active_code(code) a = vars.UnsignedWord(0) for i in syn_iter(code, 5): for j in syn_iter(code, 5): for k in syn_iter(code, 5): a.v = a + i + j + k util.return_var(a) #a.release_register() proc = synppc.Processor() r = proc.execute(prgm) # print 'should be 750:', r assert(r == 750) return
def TestBits(): from corepy.arch.ppc.platform import Processor, InstructionStream code = InstructionStream() proc = Processor() ppc.set_active_code(code) b = Bits(0xB0) e = Bits(0xE0000) a = Bits(0xCA) f = Bits(0x5) x = Bits(0, reg=code.gp_return) mask = Bits(0xF) byte = Bits(8) # 8 bits halfbyte = Bits(4) f.v = (a & mask) ^ f x.v = (b << byte) | (e >> byte) | ((a & mask) << halfbyte) | (f | mask) r = proc.execute(code) assert (r == 0xBEAF) return
def TestFloatingPoint(float_type): import corepy.arch.ppc.platform as env prgm = env.Program() code = prgm.get_stream() proc = env.Processor() ppc.set_active_code(code) x = float_type(1.0) y = float_type(2.0) z = float_type(3.0) a = float_type() b = float_type() c = float_type() d = float_type() # Set the size of the float based on whether its double or single # Initialize a data array based on float type as well. if float_type == SingleFloat: float_size = 4 data = array.array('f', (1.0, 2.0, 3.0, 4.0)) else: float_size = 8 data = array.array('d', (1.0, 2.0, 3.0, 4.0)) # Create some data addr = data.buffer_info()[0] # Load from addr a.load(addr) # Load from addr with idx in register offset = Bits(float_size) b.load(data.buffer_info()[0], offset) # Load from addr with constant idx c.load(data.buffer_info()[0], float_size * 2) # Load from addr with addr as a register reg_addr = Bits(addr) d.load(reg_addr) r = float_type(reg = prgm.fp_return) r.v = (x + y) / y r.v = fmadd(a, y, z + z) + fnmadd(a, y, z + z) + fmsub(x, y, z) + fnmsub(x, y, z) x.v = -x r.v = r + x - x + a + b - c + d - d # Store from addr a.v = 11.0 a.store(addr) # Store from addr with idx in register offset = Bits(float_size) b.v = 12.0 b.store(data.buffer_info()[0], offset) # Store from addr with constant idx c.v = 13.0 c.store(data.buffer_info()[0], float_size * 2) # Store from addr with addr as a register d.v = 14.0 reg_addr = UnsignedWord(addr) reg_addr.v = reg_addr + float_size * 3 d.store(reg_addr) prgm.add(code) r = proc.execute(prgm, mode='fp') assert(r == 0.0) assert(data[0] == 11.0) assert(data[1] == 12.0) assert(data[2] == 13.0) assert(data[3] == 14.0) return
def _set_active_code(self, code): return ppc.set_active_code(code)
def TestLiterals(): import corepy.arch.ppc.platform as env prgm = env.Program() code = prgm.get_stream() prgm += code proc = env.Processor() ppc.set_active_code(code) vmx.set_active_code(code) zero = Bits.cast(SignedByte(0)) target = Bits() # Signed versions use splat, unsigned arrays b = Byte(2) sb = SignedByte(-2) vmx.vaddsbs(b, b, sb) h = Halfword(9999) sh = SignedHalfword(-9999) vmx.vaddshs(h, h, sh) w = Word(99999) sw = SignedWord(-99999) vmx.vaddsws(w, w, sw) # Combine the results (should be [0,0,0,0]) vmx.vor(target, b, h) vmx.vor(target, target, w) # Array initializers b = Byte(range(16)) sb = SignedByte(range(16)) vmx.vsubsbs(b, b, sb) vmx.vor(target, target, b) h = Halfword([9999, 9998, 9997, 9996, 9995, 9994, 9993, 9992]) sh = SignedHalfword([9999, 9998, 9997, 9996, 9995, 9994, 9993, 9992]) vmx.vsubshs(h, h, sh) vmx.vor(target, target, h) w = Word([99999, 99998, 99997, 99996]) sw = SignedWord([99999, 99998, 99997, 99996]) vmx.vsubsws(w, w, sw) target.v = vmx.vor.ex(target, w) result = extarray.extarray('I', [42, 42, 42, 42]) r_addr = prgm.acquire_register() util.load_word(code, r_addr, result.buffer_info()[0]) vmx.stvx(target, 0, r_addr) ppc.set_active_code(None) vmx.set_active_code(None) r = proc.execute(prgm) print result for i in result: assert (i == 0) # for i in result: print '%08X' % i, # print return
def TestLiterals(): import corepy.arch.ppc.platform as env prgm = env.Program() code = prgm.get_stream() prgm += code proc = env.Processor() ppc.set_active_code(code) vmx.set_active_code(code) zero = Bits.cast(SignedByte(0)) target = Bits() # Signed versions use splat, unsigned arrays b = Byte(2) sb = SignedByte(-2) vmx.vaddsbs(b, b, sb) h = Halfword(9999) sh = SignedHalfword(-9999) vmx.vaddshs(h, h, sh) w = Word(99999) sw = SignedWord(-99999) vmx.vaddsws(w, w, sw) # Combine the results (should be [0,0,0,0]) vmx.vor(target, b, h) vmx.vor(target, target, w) # Array initializers b = Byte(range(16)) sb = SignedByte(range(16)) vmx.vsubsbs(b, b, sb) vmx.vor(target, target, b) h = Halfword([9999,9998,9997,9996,9995,9994,9993,9992]) sh = SignedHalfword([9999,9998,9997,9996,9995,9994,9993,9992]) vmx.vsubshs(h, h, sh) vmx.vor(target, target, h) w = Word([99999,99998,99997,99996]) sw = SignedWord([99999,99998,99997,99996]) vmx.vsubsws(w, w, sw) target.v = vmx.vor.ex(target, w) result = extarray.extarray('I', [42,42,42,42]) r_addr = prgm.acquire_register() util.load_word(code, r_addr, result.buffer_info()[0]) vmx.stvx(target, 0, r_addr) ppc.set_active_code(None) vmx.set_active_code(None) r = proc.execute(prgm) print result for i in result: assert(i == 0) # for i in result: print '%08X' % i, # print return
# code is the current InstructionStream, where new code is added prgm = env.Program() code = prgm.get_stream() # proc is a platform-specific execution environemnt proc = env.Processor() # Setting the active code allows you call instructions directly # and automatically add them to the instruction stream. # # Add instruction without active code: # code.add(ppc.addi(...)) # # Add instruction wit active code: # ppc.addi(...) ppc.set_active_code(code) ppc.addi(prgm.gp_return, 0, 12) ppc.b(prgm.lbl_epilogue) prgm.add(code) prgm.print_code(pro=True, epi=True, binary=True) r = proc.execute(prgm, debug=True) print 'int result:', r assert(r == 12) code.reset() a = array.array('d', [3.14])
# code is the current Synthetic Programm code = env.InstructionStream() # proc is a platform-specific execution environemnt proc = env.Processor() # Setting the active code allows you call instructions directly # and automatically add them to the instruction stream. # # Add instruction without active code: # code.add(ppc.addi(...)) # # Add instruction wit active code: # ppc.addi(...) ppc.set_active_code(code) ppc.addi(code.gp_return, 0, 12) ppc.b(code.lbl_epilogue) code.cache_code() code.print_code(pro=True, epi=True, binary=True) r = proc.execute(code, debug=True) print 'int result:', r assert (r == 12) code.reset() a = array.array('d', [3.14])