def TestRange(): prgm = synppc.Program() code = prgm.get_stream() prgm.add(code) ppc.set_active_code(code) a = vars.UnsignedWord(0) for i in syn_range(code, 7): a.v = a + 1 for i in syn_range(code, 20, 31): a.v = a + 1 for i in syn_range(code, 20, 26, 2): a.v = a + 1 util.return_var( a) #a.release_register(code) proc = synppc.Processor() r = proc.execute(prgm) # print 'should be 21:', r assert(r == 21) return
def TestIter(): code = synppc.InstructionStream() # code.add(ppc.Illegal()) a = vars.SignedWord(0, code=code) for i in syn_iter(code, 16, 4): a.v = a + 1 for i in syn_iter(code, 16, 4, mode=DEC): a.v = a + 1 for i in syn_iter(code, 16, 4, mode=INC): a.v = a + 1 for i in syn_iter(code, 16, 4, mode=INC): a.v = a + vars.SignedWord.cast(i) util.return_var(a) a.release_register(code) proc = synppc.Processor() r = proc.execute(code) # print 'should be 36:', r assert (r == 36) return
def TestRange(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = vars.UnsignedWord(0) for i in syn_range(code, 7): a.v = a + 1 for i in syn_range(code, 20, 31): a.v = a + 1 for i in syn_range(code, 20, 26, 2): a.v = a + 1 util.return_var(a) a.release_register(code) proc = synppc.Processor() r = proc.execute(code) # print 'should be 21:', r assert (r == 21) return
def TestIter(): prgm = synppc.Program() code = prgm.get_stream() prgm.add(code) a = vars.SignedWord(0, code = code) for i in syn_iter(code, 16, 4): a.v = a + 1 for i in syn_iter(code, 16, 4, mode = DEC): a.v = a + 1 for i in syn_iter(code, 16, 4, mode = INC): a.v = a + 1 for i in syn_iter(code, 16, 4, mode = INC): a.v = a + vars.SignedWord.cast(i) util.return_var(a) #a.release_register(code) proc = synppc.Processor() r = proc.execute(prgm) # print 'should be 36:', r assert(r == 36) return
def TestBits(): import corepy.arch.ppc.platform as env prgm = env.Program() code = prgm.get_stream() proc = env.Processor() ppc.set_active_code(code) b = Bits(0xB0) e = Bits(0xE0000) a = Bits(0xCA) f = Bits(0x5) x = Bits(0, reg = prgm.gp_return) mask = Bits(0xF) byte = Bits(8) # 8 bits halfbyte = Bits(4) f.v = (a & mask) ^ f x.v = (b << byte) | (e >> byte) | ((a & mask) << halfbyte) | (f | mask) prgm.add(code) r = proc.execute(prgm) assert(r == 0xBEAF) return
def TestVarIter(): prgm = synppc.Program() code = prgm.get_stream() prgm.add(code) ppc.set_active_code(code) a = array.array('I', range(4)) for i in var_iter(code, a): i.v = i + 10 ai = array.array('i', range(4)) for i in var_iter(code, ai): i.v = i + 10 # b = array.array('H', range(4)) # for i in var_iter(code, b): # i.v = i + 10 # bi = array.array('h', range(4)) # for i in var_iter(code, bi): # i.v = i + 10 # c = array.array('B', range(4)) # for i in var_iter(code, c): # i.v = i + 10 # ci = array.array('b', range(4)) # for i in var_iter(code, ci): # i.v = i + 10 f = array.array('f', range(4)) f10 = vars.SingleFloat(10.0) for i in var_iter(code, f): i.v = i + f10 d = array.array('d', range(4)) d10 = vars.DoubleFloat(10.0) for i in var_iter(code, d): i.v = i + d10 proc = synppc.Processor() r = proc.execute(prgm) _array_check(a) _array_check(ai) # print b # print bi # print c # print ci _array_check(f) _array_check(d) # print 'TODO: Implememnt the rest of the integer types (or have a clean way of upcasting to signed/unsigned int)' return
def syn_gemm_pp(A, B, C, mc, kc, nc, mr=1, nr=1, gepb_mode = gepb_simple): """ """ cgepp = synppc.InstructionStream() proc = synppc.Processor() gepp = SynGEPP(gepb_mode) M, N = C.shape K = A.shape[0] nc = min(nc, N) kc = min(kc, K) mc = min(mc, M) tA = Numeric.zeros((M, kc), typecode = Numeric.Float) tB = Numeric.zeros((nc, kc), typecode = Numeric.Float) + 14.0 C_aux = Numeric.zeros((mr, nc), typecode=Numeric.Float) cgepp.set_debug(True) gepp.synthesize(cgepp, tB, M, K, N, kc, nc, mr, nr) cgepp.cache_code() # cgepp.print_code() B_addr = synppc.array_address(B) C_addr = synppc.array_address(C) pack_params = synppc.ExecParams() pm = synppc.ExecParams() pm.p1 = synppc.array_address(tA) pm.p2 = synppc.array_address(tB) pm.p3 = C_addr pm.p4 = synppc.array_address(C_aux) nc8 = nc * 8 total = 0.0 start = time.time() k = 0 for k in range(0, K, kc): # Pack A into tA tA[:,:] = A[:,k:k+kc] pm.p3 = C_addr pm.p5 = B_addr + k * N * 8 proc.execute(cgepp, params = pm) end = time.time() return end - start
def test_syn_pack_b(): # Create a 10x10 B array of indices B = Numeric.zeros((10, 10), typecode = Numeric.Float) a = Numeric.arange(10) for i in range(10): B[i,:] = a + i * 10 B.shape = (10,10) # Create the packed array tB = Numeric.arange(25, typecode = Numeric.Float) * 0.0 tB.shape = (5,5) B_offset = 3 * 10 + 0 K, N = B.shape nc, kc = tB.shape pack_b = SynPackB() code = synppc.InstructionStream() proc = synppc.Processor() params = synppc.ExecParams() pack_b.synthesize(code, tB, N) params.p1 = synppc.array_address(B) + B_offset * 8 proc.execute(code, params = params) # Validate B.shape = (K * N,) tB_valid = Numeric.arange(nc*kc, typecode = Numeric.Float) * 0.0 for i in range(kc): B_row = B_offset + i * N for j in range(nc): b = B_row + j tb = j * kc + i tB_valid[tb] = B[b] tB_valid.shape = (nc,kc) B.shape = (K, N) _validate('syn_pack_b', nc, nc, N, tB, tB_valid) return
def TestVecIter(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = array.array('I', range(16)) for i in vector_iter(code, a): i.v = vmx.vadduws.ex(i, i) ai = array.array('i', range(16)) for i in vector_iter(code, ai): i.v = vmx.vaddsws.ex(i, i) b = array.array('H', range(16)) for i in vector_iter(code, b): i.v = vmx.vadduhs.ex(i, i) bi = array.array('h', range(16)) for i in vector_iter(code, bi): i.v = vmx.vaddshs.ex(i, i) c = array.array('B', range(16)) for i in vector_iter(code, c): i.v = vmx.vaddubs.ex(i, i) ci = array.array('b', range(16)) for i in vector_iter(code, ci): i.v = vmx.vaddsbs.ex(i, i) ften = vmx_vars.BitType(10.0) f = array.array('f', range(16)) for i in vector_iter(code, f): i.v = vmx.vaddfp.ex(i, i) proc = synppc.Processor() r = proc.execute(code) expected = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] _array_check(a, expected) _array_check(ai, expected) _array_check(b, expected) _array_check(bi, expected) _array_check(c, expected) _array_check(ci, expected) _array_check(f, expected) return
def TestExternalStop(): prgm = synppc.Program() code = prgm.get_stream() prgm.add(code) ppc.set_active_code(code) # Data data = array.array('d', range(5*5)) # Constants - read only n_rows = vars.SignedWord(5) n_cols = vars.SignedWord(5) addr = vars.SignedWord(data.buffer_info()[0]) dbl_size = vars.SignedWord(synppc.WORD_SIZE * 2) row_bytes = vars.SignedWord(synppc.WORD_SIZE * 5 * 2) # Variables - read/write sum = vars.DoubleFloat(0.0) x = vars.DoubleFloat(0.0) offset = vars.SignedWord(0) # Iterators i_iter = syn_iter(code, 0, mode = INC) i_iter.set_external_stop(n_rows.reg) j_ctr = syn_iter(code, 0, mode = CTR) j_ctr.set_external_stop(n_cols.reg) for i in i_iter: offset.v = vars.SignedWord.cast(i) * row_bytes # Note that j_cnt is unreadable since it's in the ctr register for j_cnt in j_ctr: # Load the next vaule in the matrix ppc.lfdx(x, addr, offset) sum.v = vars.fmadd(x, x, sum) # sum += x*x offset.v = offset + dbl_size # code.add(ppc.Illegal()) util.return_var(sum) proc = synppc.Processor() r = proc.execute(prgm, mode = 'fp') # print 'Test external stop: ', r assert(r == 4900.0) return
def TestMemoryDesc(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = array.array('I', range(4)) m = memory_desc('I', a.buffer_info()[0], 4) for i in var_iter(code, m): i.v = i + 10 proc = synppc.Processor() r = proc.execute(code) _array_check(a) return
def TestMemoryDesc(): prgm = synppc.Program() code = prgm.get_stream() prgm.add(code) ppc.set_active_code(code) a = array.array('I', range(4)) m = memory_desc('I', a.buffer_info()[0], 4) for i in var_iter(code, m): i.v = i + 10 proc = synppc.Processor() r = proc.execute(prgm) _array_check(a) return
def SimpleTest(): """ Just make sure things are working... """ import corepy.arch.ppc.platform as env prgm = env.Program() code = prgm.get_stream() proc = env.Processor() prgm.add(code) # Without active code a = SignedWord(11, code) b = SignedWord(31, code, reg = code.prgm.acquire_register()) c = SignedWord(code = code, reg = prgm.gp_return) byte_mask = Bits(0xFF, code) code.add(ppc.addi(prgm.gp_return, 0, 31)) # c.v = a + SignedWord.cast(b & byte_mask) + 12 c.v = a + (byte_mask & b) + 12 if True: r = proc.execute(prgm) assert(r == (42 + 12)) # With active code code.reset() ppc.set_active_code(code) a = SignedWord(11) b = SignedWord(31) c = SignedWord(reg = prgm.gp_return) byte_mask = Bits(0xFF) c.v = a + (b & byte_mask) ppc.set_active_code(None) r = proc.execute(prgm) # code.print_code() assert(r == 42) return
def TestZipIter(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = array.array('I', range(16, 32)) b = array.array('I', range(32, 48)) c = array.array('I', [0 for i in range(16)]) sum = vars.UnsignedWord(0) for i, j, k in zip_iter(code, var_iter(code, a), var_iter(code, b), var_iter(code, c, store_only=True)): k.v = i + j sum.v = sum + 1 av = vector_iter(code, array.array('I', range(16))) bv = vector_iter(code, array.array('I', range(16, 32))) cv = vector_iter(code, array.array('I', [0 for i in range(16)]), store_only=True) for i, j, k in zip_iter(code, av, bv, cv): k.v = vmx.vadduws.ex(i, j) # i + j util.return_var(sum) proc = synppc.Processor() r = proc.execute(code) assert (r == 16) print a print b print c print av.data print bv.data print cv.data print 'TODO: Finish checking TestZipIter values' return
def TestNestedIter(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = vars.UnsignedWord(0) for i in syn_iter(code, 5): for j in syn_iter(code, 5): for k in syn_iter(code, 5): a.v = a + i + j + k util.return_var(a) a.release_register() proc = synppc.Processor() r = proc.execute(code) # print 'should be 750:', r assert (r == 750) return
def test_syn_gepb(): gepb = SynGEPB() code = synppc.InstructionStream() code.set_debug(True) m, k, n = (128, 32, 32) A, B, C = create_matrices(m, k, n) kc = k nc = 32 mr = 4 nr = 4 C_aux = Numeric.zeros((mr, nc), typecode=Numeric.Float) # + 13.0 A_addr = synppc.array_address(A) B_addr = synppc.array_address(B) C_addr = synppc.array_address(C) C_aux_addr = synppc.array_address(C_aux) gepb.synthesize(code, m, k, n, kc, nc, mr, nr) # , A_addr, B_addr, C_addr) # code.print_code() params = synppc.ExecParams() params.p1 = A_addr params.p2 = B_addr params.p3 = C_addr params.p4 = C_aux_addr # code.print_code() proc = synppc.Processor() proc.execute(code, params = params) C_valid = Numeric.matrixmultiply(A, B) _validate('syn_gepb', m,n,k, C, C_valid) return
def TestNestedIter(): prgm = synppc.Program() code = prgm.get_stream() prgm.add(code) ppc.set_active_code(code) a = vars.UnsignedWord(0) for i in syn_iter(code, 5): for j in syn_iter(code, 5): for k in syn_iter(code, 5): a.v = a + i + j + k util.return_var(a) #a.release_register() proc = synppc.Processor() r = proc.execute(prgm) # print 'should be 750:', r assert(r == 750) return
def TestLiterals(): import corepy.arch.ppc.platform as env prgm = env.Program() code = prgm.get_stream() prgm += code proc = env.Processor() ppc.set_active_code(code) vmx.set_active_code(code) zero = Bits.cast(SignedByte(0)) target = Bits() # Signed versions use splat, unsigned arrays b = Byte(2) sb = SignedByte(-2) vmx.vaddsbs(b, b, sb) h = Halfword(9999) sh = SignedHalfword(-9999) vmx.vaddshs(h, h, sh) w = Word(99999) sw = SignedWord(-99999) vmx.vaddsws(w, w, sw) # Combine the results (should be [0,0,0,0]) vmx.vor(target, b, h) vmx.vor(target, target, w) # Array initializers b = Byte(range(16)) sb = SignedByte(range(16)) vmx.vsubsbs(b, b, sb) vmx.vor(target, target, b) h = Halfword([9999, 9998, 9997, 9996, 9995, 9994, 9993, 9992]) sh = SignedHalfword([9999, 9998, 9997, 9996, 9995, 9994, 9993, 9992]) vmx.vsubshs(h, h, sh) vmx.vor(target, target, h) w = Word([99999, 99998, 99997, 99996]) sw = SignedWord([99999, 99998, 99997, 99996]) vmx.vsubsws(w, w, sw) target.v = vmx.vor.ex(target, w) result = extarray.extarray('I', [42, 42, 42, 42]) r_addr = prgm.acquire_register() util.load_word(code, r_addr, result.buffer_info()[0]) vmx.stvx(target, 0, r_addr) ppc.set_active_code(None) vmx.set_active_code(None) r = proc.execute(prgm) print result for i in result: assert (i == 0) # for i in result: print '%08X' % i, # print return
def syn_gemm(A, B, C, mc, kc, nc, mr=1, nr=1, gepb_mode = gepb_simple): """ """ cgepb = synppc.InstructionStream() cpackb = synppc.InstructionStream() proc = synppc.Processor() gepb = SynGEPB(gepb_mode) packb = SynPackB() M, N = C.shape K = A.shape[0] nc = min(nc, N) kc = min(kc, K) mc = min(mc, M) tA = Numeric.zeros((M, kc), typecode = Numeric.Float) tB = Numeric.zeros((nc, kc), typecode = Numeric.Float) + 14.0 C_aux = Numeric.zeros((mr, nc), typecode=Numeric.Float) cgepb.set_debug(True) gepb.synthesize(cgepb, M, K, N, kc, nc, mr, nr, _transpose = True) cgepb.cache_code() # cgepb.print_code() cpackb.set_debug(True) packb.synthesize(cpackb, tB, N) cpackb.cache_code() # cpackb.print_code() B_addr = synppc.array_address(B) C_addr = synppc.array_address(C) pack_params = synppc.ExecParams() pm = synppc.ExecParams() pm.p1 = synppc.array_address(tA) pm.p2 = synppc.array_address(tB) pm.p3 = C_addr pm.p4 = synppc.array_address(C_aux) nc8 = nc * 8 total = 0.0 start = time.time() # print hex(pm.p3), hex(pm.p4) k = 0 for k in range(0, K, kc): # Pack A into tA tA[:,:] = A[:,k:k+kc] pm.p3 = C_addr # kN = B_addr + k * N * 8 pack_params.p1 = B_addr + k * N * 8 for j in range(0, N, nc): # print k, j, M, K, N, kc, nc, mr, nr # Pack B into tB -- # tB[:,:] = Numeric.transpose(B[k:k+kc, j:j+nc]) proc.execute(cpackb, params = pack_params) # start1 = time.time() proc.execute(cgepb, params = pm) # stop1 = time.time() # total += stop1 - start1 # print 'ping' pack_params.p1 += nc8 pm.p3 += nc8 end = time.time() return end - start
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import sys import array import corepy.arch.ppc.isa as ppc import corepy.arch.vmx.isa as vmx import corepy.arch.ppc.platform as env import corepy.arch.ppc.types.ppc_types as vars from corepy.arch.ppc.lib.util import load_word # code is the current Synthetic Programm code = env.InstructionStream() # proc is a platform-specific execution environemnt proc = env.Processor() # Setting the active code allows you call instructions directly # and automatically add them to the instruction stream. # # Add instruction without active code: # code.add(ppc.addi(...)) # # Add instruction wit active code: # ppc.addi(...) ppc.set_active_code(code) ppc.addi(code.gp_return, 0, 12) ppc.b(code.lbl_epilogue) code.cache_code()
def TestFloatingPoint(float_type): import corepy.arch.ppc.platform as env prgm = env.Program() code = prgm.get_stream() proc = env.Processor() ppc.set_active_code(code) x = float_type(1.0) y = float_type(2.0) z = float_type(3.0) a = float_type() b = float_type() c = float_type() d = float_type() # Set the size of the float based on whether its double or single # Initialize a data array based on float type as well. if float_type == SingleFloat: float_size = 4 data = array.array('f', (1.0, 2.0, 3.0, 4.0)) else: float_size = 8 data = array.array('d', (1.0, 2.0, 3.0, 4.0)) # Create some data addr = data.buffer_info()[0] # Load from addr a.load(addr) # Load from addr with idx in register offset = Bits(float_size) b.load(data.buffer_info()[0], offset) # Load from addr with constant idx c.load(data.buffer_info()[0], float_size * 2) # Load from addr with addr as a register reg_addr = Bits(addr) d.load(reg_addr) r = float_type(reg = prgm.fp_return) r.v = (x + y) / y r.v = fmadd(a, y, z + z) + fnmadd(a, y, z + z) + fmsub(x, y, z) + fnmsub(x, y, z) x.v = -x r.v = r + x - x + a + b - c + d - d # Store from addr a.v = 11.0 a.store(addr) # Store from addr with idx in register offset = Bits(float_size) b.v = 12.0 b.store(data.buffer_info()[0], offset) # Store from addr with constant idx c.v = 13.0 c.store(data.buffer_info()[0], float_size * 2) # Store from addr with addr as a register d.v = 14.0 reg_addr = UnsignedWord(addr) reg_addr.v = reg_addr + float_size * 3 d.store(reg_addr) prgm.add(code) r = proc.execute(prgm, mode='fp') assert(r == 0.0) assert(data[0] == 11.0) assert(data[1] == 12.0) assert(data[2] == 13.0) assert(data[3] == 14.0) return