예제 #1
0
파일: gemm.py 프로젝트: tmaone/efi
def syn_gemm_pp(A, B, C, mc, kc, nc, mr=1, nr=1, gepb_mode = gepb_simple):
  """
  """
  cgepp = synppc.InstructionStream()
  proc = synppc.Processor()

  gepp = SynGEPP(gepb_mode)  
  
  M, N = C.shape
  K = A.shape[0]

  nc = min(nc, N)
  kc = min(kc, K)
  mc = min(mc, M)

  tA = Numeric.zeros((M, kc), typecode = Numeric.Float)
  tB = Numeric.zeros((nc, kc), typecode = Numeric.Float) + 14.0
  C_aux = Numeric.zeros((mr, nc), typecode=Numeric.Float) 

  cgepp.set_debug(True)
  gepp.synthesize(cgepp, tB, M, K, N, kc, nc, mr, nr)
  cgepp.cache_code()
  # cgepp.print_code()

  B_addr = synppc.array_address(B)
  C_addr = synppc.array_address(C)

  pack_params = synppc.ExecParams()
  pm = synppc.ExecParams()

  pm.p1 = synppc.array_address(tA)
  pm.p2 = synppc.array_address(tB)
  pm.p3 = C_addr
  pm.p4 = synppc.array_address(C_aux)  

  nc8 = nc * 8
  total = 0.0

  start = time.time()

  k = 0
  for k in range(0, K, kc):
    # Pack A into tA
    tA[:,:] = A[:,k:k+kc]

    pm.p3 = C_addr
    pm.p5 = B_addr + k * N * 8
    proc.execute(cgepp, params = pm)

  end = time.time()

  return end - start
예제 #2
0
파일: gemm.py 프로젝트: tmaone/efi
def test_syn_pack_b():

  # Create a 10x10 B array of indices
  B = Numeric.zeros((10, 10), typecode = Numeric.Float)
  a = Numeric.arange(10)

  for i in range(10):
    B[i,:] = a + i * 10

  B.shape = (10,10)

  # Create the packed array
  tB = Numeric.arange(25, typecode = Numeric.Float) * 0.0
  tB.shape = (5,5)

  B_offset = 3 * 10 + 0

  K, N = B.shape
  nc, kc = tB.shape

  pack_b = SynPackB()
  
  code = synppc.InstructionStream()
  proc = synppc.Processor()
  params = synppc.ExecParams()

  pack_b.synthesize(code, tB, N)
  
  params.p1 = synppc.array_address(B) + B_offset * 8

  proc.execute(code, params = params)
  
  
  # Validate
  B.shape  = (K * N,)

  tB_valid = Numeric.arange(nc*kc, typecode = Numeric.Float) * 0.0

  for i in range(kc):
    B_row = B_offset + i * N
    for j in range(nc):
      b  = B_row + j
      tb = j * kc + i
      tB_valid[tb] = B[b]
        
  tB_valid.shape = (nc,kc)
  B.shape  = (K, N)

  _validate('syn_pack_b', nc, nc, N, tB, tB_valid)

  return
예제 #3
0
파일: gemm.py 프로젝트: tmaone/efi
def test_syn_gepb():
  gepb = SynGEPB()
  code = synppc.InstructionStream()
  code.set_debug(True)
  
  m, k, n = (128, 32, 32)
  A, B, C = create_matrices(m, k, n)
  kc = k
  nc = 32

  mr = 4
  nr = 4
  
  C_aux = Numeric.zeros((mr, nc), typecode=Numeric.Float) # + 13.0
  
  A_addr = synppc.array_address(A)
  B_addr = synppc.array_address(B)
  C_addr = synppc.array_address(C)
  C_aux_addr = synppc.array_address(C_aux)

  gepb.synthesize(code, m, k, n, kc, nc, mr, nr) # , A_addr, B_addr, C_addr)

  # code.print_code()
  
  params = synppc.ExecParams()
  params.p1 = A_addr
  params.p2 = B_addr
  params.p3 = C_addr
  params.p4 = C_aux_addr  
  
  # code.print_code()
  proc = synppc.Processor()
  proc.execute(code, params = params)

  C_valid = Numeric.matrixmultiply(A, B)

  _validate('syn_gepb', m,n,k, C, C_valid)
  return
예제 #4
0
파일: gemm.py 프로젝트: tmaone/efi
def syn_gemm(A, B, C, mc, kc, nc, mr=1, nr=1, gepb_mode = gepb_simple):
  """
  """
  cgepb = synppc.InstructionStream()
  cpackb = synppc.InstructionStream()  
  proc = synppc.Processor()

  gepb = SynGEPB(gepb_mode)  

  packb = SynPackB()
  
  M, N = C.shape
  K = A.shape[0]

  nc = min(nc, N)
  kc = min(kc, K)
  mc = min(mc, M)

  tA = Numeric.zeros((M, kc), typecode = Numeric.Float)
  tB = Numeric.zeros((nc, kc), typecode = Numeric.Float) + 14.0
  C_aux = Numeric.zeros((mr, nc), typecode=Numeric.Float)

  cgepb.set_debug(True)
  gepb.synthesize(cgepb, M, K, N, kc, nc, mr, nr, _transpose = True)
  cgepb.cache_code()
  # cgepb.print_code()

  cpackb.set_debug(True)
  packb.synthesize(cpackb, tB, N)
  cpackb.cache_code()
  # cpackb.print_code()  

  B_addr = synppc.array_address(B)
  C_addr = synppc.array_address(C)

  pack_params = synppc.ExecParams()
  pm = synppc.ExecParams()

  pm.p1 = synppc.array_address(tA)
  pm.p2 = synppc.array_address(tB)
  pm.p3 = C_addr
  pm.p4 = synppc.array_address(C_aux)  

  nc8 = nc * 8
  total = 0.0

  start = time.time()
  
  # print hex(pm.p3), hex(pm.p4)
  k = 0
  for k in range(0, K, kc):
    # Pack A into tA
    tA[:,:] = A[:,k:k+kc]

    pm.p3 = C_addr

    # kN = B_addr + k * N * 8
    pack_params.p1 =  B_addr + k * N * 8
    for j in range(0, N, nc):
      # print k, j, M, K, N, kc, nc, mr, nr
      # Pack B into tB --
      # tB[:,:] = Numeric.transpose(B[k:k+kc, j:j+nc])
      proc.execute(cpackb, params = pack_params)

      # start1 = time.time()
      proc.execute(cgepb, params = pm)
      # stop1  = time.time()
      # total += stop1 - start1
      # print 'ping'
      pack_params.p1 +=  nc8
      pm.p3 += nc8 

  end = time.time()

  return end - start