Ejemplo n.º 1
0
def test_matrix_multiply():
    shapeX = (500, 700)
    shapeY = (700, 1000)
    shapeZ = (500, 1000)
    x = np.random.uniform(0, 10, size=shapeX).astype(dtype)
    y = np.random.uniform(0, 10, size=shapeY).astype(dtype)
    z = np.zeros(shapeZ).astype(dtype)
    arr_x = tvm.nd.array(x, ctx=ctx)
    arr_y = tvm.nd.array(y, ctx=ctx)
    arr_z = tvm.nd.array(z, ctx=ctx)
   
    matrix_mul = tvm_op.make_matrix_mul(shapeX, False, shapeY, False, tgt, tgt_host, "matrix_mul")
    matrix_mul(arr_x, arr_y, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(x, y), z, rtol=1e-5)

    shapeX = (1000, 500)
    shapeY = (2000, 500)
    shapeZ = (1000, 2000)
    x = np.random.uniform(0, 10, size=shapeX).astype(dtype)
    y = np.random.uniform(0, 10, size=shapeY).astype(dtype)
    z = np.zeros(shapeZ).astype(dtype)
    arr_x = tvm.nd.array(x, ctx=ctx)
    arr_y = tvm.nd.array(y, ctx=ctx)
    arr_z = tvm.nd.array(z, ctx=ctx)

    matrix_mul = tvm_op.make_matrix_mul(shapeX, False, shapeY, True, tgt, tgt_host, "matrix_mul")
    matrix_mul(arr_x, arr_y, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(x, np.transpose(y)), z, rtol=1e-5)
    
    shapeX = (500, 1000)
    shapeY = (500, 2000)
    shapeZ = (1000, 2000)   
    x = np.random.uniform(0, 10, size=shapeX).astype(dtype)
    y = np.random.uniform(0, 10, size=shapeY).astype(dtype)
    z = np.zeros(shapeZ).astype(dtype)
    arr_x = tvm.nd.array(x, ctx=ctx)
    arr_y = tvm.nd.array(y, ctx=ctx)
    arr_z = tvm.nd.array(z, ctx=ctx)

    matrix_mul = tvm_op.make_matrix_mul(shapeX, True, shapeY, False, tgt, tgt_host, "matrix_mul")
    matrix_mul(arr_x, arr_y, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(np.transpose(x), y), z, rtol=1e-5)
    
    shapeX = (500, 1000)
    shapeY = (2000, 500)
    shapeZ = (1000, 2000)   
    x = np.random.uniform(0, 10, size=shapeX).astype(dtype)
    y = np.random.uniform(0, 10, size=shapeY).astype(dtype)
    z = np.zeros(shapeZ).astype(dtype)
    arr_x = tvm.nd.array(x, ctx=ctx)
    arr_y = tvm.nd.array(y, ctx=ctx)
    arr_z = tvm.nd.array(z, ctx=ctx)

    matrix_mul = tvm_op.make_matrix_mul(shapeX, True, shapeY, True, tgt, tgt_host, "matrix_mul")
    matrix_mul(arr_x, arr_y, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(np.transpose(x), np.transpose(y)), z, rtol=1e-5)
def test_matrix_multiply():
    shapeX = (500, 700)
    shapeY = (700, 1000)
    shapeZ = (500, 1000)
    x = np.random.uniform(0, 10, size=shapeX).astype(dtype)
    y = np.random.uniform(0, 10, size=shapeY).astype(dtype)
    z = np.zeros(shapeZ).astype(dtype)
    arr_x = tvm.nd.array(x, ctx=ctx)
    arr_y = tvm.nd.array(y, ctx=ctx)
    arr_z = tvm.nd.array(z, ctx=ctx)
   
    matrix_mul = tvm_op.make_matrix_mul(shapeX, False, shapeY, False, tgt, tgt_host, "matrix_mul")
    matrix_mul(arr_x, arr_y, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(x, y), z, rtol=1e-5)

    shapeX = (1000, 500)
    shapeY = (2000, 500)
    shapeZ = (1000, 2000)
    x = np.random.uniform(0, 10, size=shapeX).astype(dtype)
    y = np.random.uniform(0, 10, size=shapeY).astype(dtype)
    z = np.zeros(shapeZ).astype(dtype)
    arr_x = tvm.nd.array(x, ctx=ctx)
    arr_y = tvm.nd.array(y, ctx=ctx)
    arr_z = tvm.nd.array(z, ctx=ctx)

    matrix_mul = tvm_op.make_matrix_mul(shapeX, False, shapeY, True, tgt, tgt_host, "matrix_mul")
    matrix_mul(arr_x, arr_y, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(x, np.transpose(y)), z, rtol=1e-5)
    
    shapeX = (500, 1000)
    shapeY = (500, 2000)
    shapeZ = (1000, 2000)   
    x = np.random.uniform(0, 10, size=shapeX).astype(dtype)
    y = np.random.uniform(0, 10, size=shapeY).astype(dtype)
    z = np.zeros(shapeZ).astype(dtype)
    arr_x = tvm.nd.array(x, ctx=ctx)
    arr_y = tvm.nd.array(y, ctx=ctx)
    arr_z = tvm.nd.array(z, ctx=ctx)

    matrix_mul = tvm_op.make_matrix_mul(shapeX, True, shapeY, False, tgt, tgt_host, "matrix_mul")
    matrix_mul(arr_x, arr_y, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(np.transpose(x), y), z, rtol=1e-5)
    
    shapeX = (500, 1000)
    shapeY = (2000, 500)
    shapeZ = (1000, 2000)   
    x = np.random.uniform(0, 10, size=shapeX).astype(dtype)
    y = np.random.uniform(0, 10, size=shapeY).astype(dtype)
    z = np.zeros(shapeZ).astype(dtype)
    arr_x = tvm.nd.array(x, ctx=ctx)
    arr_y = tvm.nd.array(y, ctx=ctx)
    arr_z = tvm.nd.array(z, ctx=ctx)

    matrix_mul = tvm_op.make_matrix_mul(shapeX, True, shapeY, True, tgt, tgt_host, "matrix_mul")
    matrix_mul(arr_x, arr_y, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(np.transpose(x), np.transpose(y)), z, rtol=1e-5)
def test_GEMM_performance():
    pf_tgt_host = "llvm"
    pf_tgt = "llvm -mcpu=core-avx2"
    dtype = "float32"
    pf_ctx = tvm.context(tgt, 0)
    M = 1000
    N = 256
    K = 784
    transpose_A = False
    transpose_B = False
    gemm_func = tvm_op.make_matrix_mul(
        (M, K), transpose_A, (K, N), transpose_B, pf_tgt, pf_tgt_host, "GEMM")

    # Random generated tensor for testing
    a = tvm.nd.array(np.random.rand(M, K).astype(dtype), pf_ctx)
    b = tvm.nd.array(np.random.rand(K, N).astype(dtype), pf_ctx)
    c = tvm.nd.array(np.zeros((M, N), dtype=dtype), ctx)

    np_repeat = 100
    np_runing_time = timeit.timeit(setup='import numpy as np\n'
                                   'M = ' + str(M) + '\n'
                                   'K = ' + str(K) + '\n'
                                   'N = ' + str(N) + '\n'
                                   'dtype = "float32"\n'
                                   'a = np.random.rand(M, K).astype(dtype)\n'
                                   'b = np.random.rand(K, N).astype(dtype)\n',
                                   stmt='answer = np.dot(a, b)',
                                   number=np_repeat)
    print("Numpy running time: %f" % (np_runing_time / np_repeat))

    answer = np.dot(a.asnumpy(), b.asnumpy())

    gemm_func(a, b, c)
    tvm.testing.assert_allclose(c.asnumpy(), answer, rtol=1e-5)

    evaluator = gemm_func.time_evaluator(gemm_func.entry_name,
                                         pf_ctx,
                                         number=1)
    print('Baseline: %f' % evaluator(a, b, c).mean)
Ejemplo n.º 4
0
def test_matrix_multiply_time():
    M = 512
    K = 2048
    N = 4096
    shapeX = (M, K)
    shapeY = (K, N)
    shapeZ = (M, N)
    x = np.random.uniform(0, 10, size=shapeX).astype(dtype)
    y = np.random.uniform(0, 10, size=shapeY).astype(dtype)
    z = np.zeros(shapeZ).astype(dtype)
    arr_x = tvm.nd.array(x, ctx=ctx)
    arr_y = tvm.nd.array(y, ctx=ctx)
    arr_z = tvm.nd.array(z, ctx=ctx)

    matrix_mul = tvm_op.make_matrix_mul(shapeX, False, shapeY, False, tgt,
                                        tgt_host, "matrix_mul")
    matrix_mul(arr_x, arr_y, arr_z)
    z = arr_z.asnumpy()
    np.testing.assert_allclose(np.dot(x, y), z, rtol=1e-5)

    np_repeat = 100
    np_runing_time = timeit.timeit(
        setup='import numpy\n'
        'M = ' + str(M) + '\n'
        'K = ' + str(K) + '\n'
        'N = ' + str(N) + '\n'
        'dtype = "float32"\n'
        'a = numpy.random.rand(M, K).astype(dtype)\n'
        'b = numpy.random.rand(K, N).astype(dtype)\n',
        stmt='answer = numpy.dot(a, b)',
        number=np_repeat)
    print("Numpy running time: %f" % (np_runing_time / np_repeat))

    evaluator = matrix_mul.time_evaluator(matrix_mul.entry_name,
                                          ctx,
                                          number=10)
    print('Opt1: %f' % evaluator(arr_x, arr_y, arr_z).mean)