def TestRange(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = vars.UnsignedWord(0) for i in syn_range(code, 7): a.v = a + 1 for i in syn_range(code, 20, 31): a.v = a + 1 for i in syn_range(code, 20, 26, 2): a.v = a + 1 util.return_var(a) a.release_register(code) proc = synppc.Processor() r = proc.execute(code) # print 'should be 21:', r assert (r == 21) return
def TestIter(): code = synppc.InstructionStream() # code.add(ppc.Illegal()) a = vars.SignedWord(0, code=code) for i in syn_iter(code, 16, 4): a.v = a + 1 for i in syn_iter(code, 16, 4, mode=DEC): a.v = a + 1 for i in syn_iter(code, 16, 4, mode=INC): a.v = a + 1 for i in syn_iter(code, 16, 4, mode=INC): a.v = a + vars.SignedWord.cast(i) util.return_var(a) a.release_register(code) proc = synppc.Processor() r = proc.execute(code) # print 'should be 36:', r assert (r == 36) return
def TestVarIter(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = array.array('I', range(4)) for i in var_iter(code, a): i.v = i + 10 ai = array.array('i', range(4)) for i in var_iter(code, ai): i.v = i + 10 # b = array.array('H', range(4)) # for i in var_iter(code, b): # i.v = i + 10 # bi = array.array('h', range(4)) # for i in var_iter(code, bi): # i.v = i + 10 # c = array.array('B', range(4)) # for i in var_iter(code, c): # i.v = i + 10 # ci = array.array('b', range(4)) # for i in var_iter(code, ci): # i.v = i + 10 f = array.array('f', range(4)) f10 = vars.SingleFloat(10.0) for i in var_iter(code, f): i.v = i + f10 d = array.array('d', range(4)) d10 = vars.DoubleFloat(10.0) for i in var_iter(code, d): i.v = i + d10 proc = synppc.Processor() r = proc.execute(code) _array_check(a) _array_check(ai) # print b # print bi # print c # print ci _array_check(f) _array_check(d) # print 'TODO: Implememnt the rest of the integer types (or have a clean way of upcasting to signed/unsigned int)' return
def syn_gemm_pp(A, B, C, mc, kc, nc, mr=1, nr=1, gepb_mode = gepb_simple): """ """ cgepp = synppc.InstructionStream() proc = synppc.Processor() gepp = SynGEPP(gepb_mode) M, N = C.shape K = A.shape[0] nc = min(nc, N) kc = min(kc, K) mc = min(mc, M) tA = Numeric.zeros((M, kc), typecode = Numeric.Float) tB = Numeric.zeros((nc, kc), typecode = Numeric.Float) + 14.0 C_aux = Numeric.zeros((mr, nc), typecode=Numeric.Float) cgepp.set_debug(True) gepp.synthesize(cgepp, tB, M, K, N, kc, nc, mr, nr) cgepp.cache_code() # cgepp.print_code() B_addr = synppc.array_address(B) C_addr = synppc.array_address(C) pack_params = synppc.ExecParams() pm = synppc.ExecParams() pm.p1 = synppc.array_address(tA) pm.p2 = synppc.array_address(tB) pm.p3 = C_addr pm.p4 = synppc.array_address(C_aux) nc8 = nc * 8 total = 0.0 start = time.time() k = 0 for k in range(0, K, kc): # Pack A into tA tA[:,:] = A[:,k:k+kc] pm.p3 = C_addr pm.p5 = B_addr + k * N * 8 proc.execute(cgepp, params = pm) end = time.time() return end - start
def test_syn_pack_b(): # Create a 10x10 B array of indices B = Numeric.zeros((10, 10), typecode = Numeric.Float) a = Numeric.arange(10) for i in range(10): B[i,:] = a + i * 10 B.shape = (10,10) # Create the packed array tB = Numeric.arange(25, typecode = Numeric.Float) * 0.0 tB.shape = (5,5) B_offset = 3 * 10 + 0 K, N = B.shape nc, kc = tB.shape pack_b = SynPackB() code = synppc.InstructionStream() proc = synppc.Processor() params = synppc.ExecParams() pack_b.synthesize(code, tB, N) params.p1 = synppc.array_address(B) + B_offset * 8 proc.execute(code, params = params) # Validate B.shape = (K * N,) tB_valid = Numeric.arange(nc*kc, typecode = Numeric.Float) * 0.0 for i in range(kc): B_row = B_offset + i * N for j in range(nc): b = B_row + j tb = j * kc + i tB_valid[tb] = B[b] tB_valid.shape = (nc,kc) B.shape = (K, N) _validate('syn_pack_b', nc, nc, N, tB, tB_valid) return
def TestVecIter(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = array.array('I', range(16)) for i in vector_iter(code, a): i.v = vmx.vadduws.ex(i, i) ai = array.array('i', range(16)) for i in vector_iter(code, ai): i.v = vmx.vaddsws.ex(i, i) b = array.array('H', range(16)) for i in vector_iter(code, b): i.v = vmx.vadduhs.ex(i, i) bi = array.array('h', range(16)) for i in vector_iter(code, bi): i.v = vmx.vaddshs.ex(i, i) c = array.array('B', range(16)) for i in vector_iter(code, c): i.v = vmx.vaddubs.ex(i, i) ci = array.array('b', range(16)) for i in vector_iter(code, ci): i.v = vmx.vaddsbs.ex(i, i) ften = vmx_vars.BitType(10.0) f = array.array('f', range(16)) for i in vector_iter(code, f): i.v = vmx.vaddfp.ex(i, i) proc = synppc.Processor() r = proc.execute(code) expected = [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30] _array_check(a, expected) _array_check(ai, expected) _array_check(b, expected) _array_check(bi, expected) _array_check(c, expected) _array_check(ci, expected) _array_check(f, expected) return
def TestMemoryDesc(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = array.array('I', range(4)) m = memory_desc('I', a.buffer_info()[0], 4) for i in var_iter(code, m): i.v = i + 10 proc = synppc.Processor() r = proc.execute(code) _array_check(a) return
def TestExternalStop(): code = synppc.InstructionStream() ppc.set_active_code(code) # Data data = array.array('d', range(5 * 5)) # Constants - read only n_rows = vars.SignedWord(5) n_cols = vars.SignedWord(5) addr = vars.SignedWord(data.buffer_info()[0]) dbl_size = vars.SignedWord(synppc.WORD_SIZE * 2) row_bytes = vars.SignedWord(synppc.WORD_SIZE * 5 * 2) # Variables - read/write sum = vars.DoubleFloat(0.0) x = vars.DoubleFloat(0.0) offset = vars.SignedWord(0) # Iterators i_iter = syn_iter(code, 0, mode=INC) i_iter.set_external_stop(n_rows.reg) j_ctr = syn_iter(code, 0, mode=CTR) j_ctr.set_external_stop(n_cols.reg) for i in i_iter: offset.v = vars.SignedWord.cast(i) * row_bytes # Note that j_cnt is unreadable since it's in the ctr register for j_cnt in j_ctr: # Load the next vaule in the matrix ppc.lfdx(x, addr, offset) sum.v = vars.fmadd(x, x, sum) # sum += x*x offset.v = offset + dbl_size # code.add(ppc.Illegal()) util.return_var(sum) proc = synppc.Processor() r = proc.execute(code, mode='fp') # print 'Test external stop: ', r assert (r == 4900.0) return
def TestZipIter(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = array.array('I', range(16, 32)) b = array.array('I', range(32, 48)) c = array.array('I', [0 for i in range(16)]) sum = vars.UnsignedWord(0) for i, j, k in zip_iter(code, var_iter(code, a), var_iter(code, b), var_iter(code, c, store_only=True)): k.v = i + j sum.v = sum + 1 av = vector_iter(code, array.array('I', range(16))) bv = vector_iter(code, array.array('I', range(16, 32))) cv = vector_iter(code, array.array('I', [0 for i in range(16)]), store_only=True) for i, j, k in zip_iter(code, av, bv, cv): k.v = vmx.vadduws.ex(i, j) # i + j util.return_var(sum) proc = synppc.Processor() r = proc.execute(code) assert (r == 16) print a print b print c print av.data print bv.data print cv.data print 'TODO: Finish checking TestZipIter values' return
def TestNestedIter(): code = synppc.InstructionStream() ppc.set_active_code(code) # code.add(ppc.Illegal()) a = vars.UnsignedWord(0) for i in syn_iter(code, 5): for j in syn_iter(code, 5): for k in syn_iter(code, 5): a.v = a + i + j + k util.return_var(a) a.release_register() proc = synppc.Processor() r = proc.execute(code) # print 'should be 750:', r assert (r == 750) return
def test_syn_gepb(): gepb = SynGEPB() code = synppc.InstructionStream() code.set_debug(True) m, k, n = (128, 32, 32) A, B, C = create_matrices(m, k, n) kc = k nc = 32 mr = 4 nr = 4 C_aux = Numeric.zeros((mr, nc), typecode=Numeric.Float) # + 13.0 A_addr = synppc.array_address(A) B_addr = synppc.array_address(B) C_addr = synppc.array_address(C) C_aux_addr = synppc.array_address(C_aux) gepb.synthesize(code, m, k, n, kc, nc, mr, nr) # , A_addr, B_addr, C_addr) # code.print_code() params = synppc.ExecParams() params.p1 = A_addr params.p2 = B_addr params.p3 = C_addr params.p4 = C_aux_addr # code.print_code() proc = synppc.Processor() proc.execute(code, params = params) C_valid = Numeric.matrixmultiply(A, B) _validate('syn_gepb', m,n,k, C, C_valid) return
def syn_gemm(A, B, C, mc, kc, nc, mr=1, nr=1, gepb_mode = gepb_simple): """ """ cgepb = synppc.InstructionStream() cpackb = synppc.InstructionStream() proc = synppc.Processor() gepb = SynGEPB(gepb_mode) packb = SynPackB() M, N = C.shape K = A.shape[0] nc = min(nc, N) kc = min(kc, K) mc = min(mc, M) tA = Numeric.zeros((M, kc), typecode = Numeric.Float) tB = Numeric.zeros((nc, kc), typecode = Numeric.Float) + 14.0 C_aux = Numeric.zeros((mr, nc), typecode=Numeric.Float) cgepb.set_debug(True) gepb.synthesize(cgepb, M, K, N, kc, nc, mr, nr, _transpose = True) cgepb.cache_code() # cgepb.print_code() cpackb.set_debug(True) packb.synthesize(cpackb, tB, N) cpackb.cache_code() # cpackb.print_code() B_addr = synppc.array_address(B) C_addr = synppc.array_address(C) pack_params = synppc.ExecParams() pm = synppc.ExecParams() pm.p1 = synppc.array_address(tA) pm.p2 = synppc.array_address(tB) pm.p3 = C_addr pm.p4 = synppc.array_address(C_aux) nc8 = nc * 8 total = 0.0 start = time.time() # print hex(pm.p3), hex(pm.p4) k = 0 for k in range(0, K, kc): # Pack A into tA tA[:,:] = A[:,k:k+kc] pm.p3 = C_addr # kN = B_addr + k * N * 8 pack_params.p1 = B_addr + k * N * 8 for j in range(0, N, nc): # print k, j, M, K, N, kc, nc, mr, nr # Pack B into tB -- # tB[:,:] = Numeric.transpose(B[k:k+kc, j:j+nc]) proc.execute(cpackb, params = pack_params) # start1 = time.time() proc.execute(cgepb, params = pm) # stop1 = time.time() # total += stop1 - start1 # print 'ping' pack_params.p1 += nc8 pm.p3 += nc8 end = time.time() return end - start
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import sys import array import corepy.arch.ppc.isa as ppc import corepy.arch.vmx.isa as vmx import corepy.arch.ppc.platform as env import corepy.arch.ppc.types.ppc_types as vars from corepy.arch.ppc.lib.util import load_word # code is the current Synthetic Programm code = env.InstructionStream() # proc is a platform-specific execution environemnt proc = env.Processor() # Setting the active code allows you call instructions directly # and automatically add them to the instruction stream. # # Add instruction without active code: # code.add(ppc.addi(...)) # # Add instruction wit active code: # ppc.addi(...) ppc.set_active_code(code) ppc.addi(code.gp_return, 0, 12)