def test_accel(A, B, w_width, a_width): assert A.shape[1] == B.shape[1], "sliced shape not match" ctx = tvm.cpu(0) f = tsim.load_module() a_arr = [] b_arr = [] packedA = pack_8(A) packedB = pack_8(B) dtype = 'uint64' for i in range(packedA.shape[0]): a_arr.append(tvm.nd.array(np.array([packedA[i]]).astype(dtype), ctx)) for i in range(packedB.shape[0]): b_arr.append(tvm.nd.array(np.array([packedB[i]]).astype(dtype), ctx)) cycles = 0 accum = tvm.nd.array(np.array([0]).astype("uint64"), ctx) for i in range(len(a_arr)): for j in range(len(b_arr)): shift = np.uint8(i * w_width + j * a_width) if i == 0 and j == 0: cycles += f(a_arr[i], b_arr[j], shift, accum, np.uint32(1)) # reset accumulator else: cycles += f(a_arr[i], b_arr[j], shift, accum, np.uint32(0)) # no reset return (accum.asnumpy()[0], cycles)
def test_accel(A, B, i_width, w_width): assert A.shape[1] == B.shape[2], "sliced shape not match" dtype = A.dtype ctx = tvm.cpu(0) f = tsim.load_module() a_arr = [] b_arr = [] for i in range(A.shape[0]): list_a = np.zeros(A.shape[1]).astype(dtype) for j in range(A.shape[1]): list_a[j] = A[i][j] a_arr.append(tvm.nd.array(list_a.astype(dtype), ctx)) for i in range(B.shape[0]): # transpose list_b = np.zeros((B.shape[2], B.shape[1])).astype(dtype) for j in range(B.shape[2]): for k in range(B.shape[1]): list_b[j][k] = B[i][j][k] b_arr.append(tvm.nd.array(list_b.astype(dtype), ctx)) cycles = 0 accum = tvm.nd.array(np.zeros(A.shape[1]).astype("uint32"), ctx) for i in range(len(a_arr)): for j in range(len(b_arr)): shift = np.uint8(i * i_width + j * w_width) if i == 0 and j == 0: cycles += f(b_arr[j], a_arr[i], shift, accum, np.uint32(1)) # reset accumulator else: cycles += f(b_arr[j], a_arr[i], shift, accum, np.uint32(0)) # no reset return (accum.asnumpy(), cycles)
def test_accel(): rmax = 64 dtype = "uint64" n = 1 << np.random.randint(0, 5) ctx = tvm.cpu(0) a = tvm.nd.array(np.random.randint(rmax, size=n).astype(dtype), ctx) b = tvm.nd.array(np.random.randint(rmax, size=n).astype(dtype), ctx) c = tvm.nd.array(np.zeros(n).astype(dtype), ctx) f = tsim.load_module() f(a, b, c) msg = "n:{}".format(n) np.testing.assert_equal(c.asnumpy(), a.asnumpy() + b.asnumpy(), err_msg = "[FAIL] " + msg) print("[PASS] " + msg)
def test_accel(): rmax = 64 dtype = "uint64" n = np.random.randint(1, rmax) c = np.random.randint(0, rmax) ctx = tvm.cpu(0) a = tvm.nd.array(np.random.randint(rmax, size=n).astype(dtype), ctx) b = tvm.nd.array(np.zeros(n).astype(dtype), ctx) f = tsim.load_module() cycles = f(a, b, c) msg = "cycles:{0:4} n:{1:2} c:{2:2}".format(cycles, n, c) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + c, err_msg = "[FAIL] " + msg) print("[PASS] " + msg)
def test_accel(A, B, slice_width): assert A.shape == B.shape, "sliced shape not match" dtype = A.dtype ctx = tvm.cpu(0) f = tsim.load_module() print(dtype) a_arr = [] b_arr = [] # print("this is driver array") for i in range(A.shape[0]): list_a = np.zeros(A.shape[1]).astype(dtype) list_b = np.zeros(A.shape[1]).astype(dtype) for j in range(A.shape[1]): list_a[j] = A[i][j] list_b[j] = B[i][j] # print("a[" + str(i) + "]") # print(list_a) # print("b[" + str(i) + "]") # print(list_b) a_arr.append(tvm.nd.array(list_a.astype(dtype), ctx)) b_arr.append(tvm.nd.array(list_b.astype(dtype), ctx)) cycles = 0 print("here") accum = tvm.nd.array(np.array([0]).astype("uint64"), ctx) for i in range(len(a_arr)): for j in range(len(a_arr)): shift = np.uint8((i + j) * slice_width) if i == 0 and j == 0: cycles += f(a_arr[i], b_arr[j], shift, accum, np.uint32(1)) # reset accumulator else: cycles += f(a_arr[i], b_arr[j], shift, accum, np.uint32(0)) # no reset print(a_arr[i]) print(b_arr[j]) print("\n") # cycles = f(a, b, c) # msg = "cycles:{0:4} n:{1:2} c:{2:2}".format(cycles, n, c) # np.testing.assert_equal(b.asnumpy(), a.asnumpy() + c, err_msg = "[FAIL] " + msg) # print("[PASS] " + msg) return (accum.asnumpy()[0], cycles)