Esempio n. 1
0
def batch_matmul_cblas(cfg, x, y):
    """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are
    data in batch.

    Parameters
    ----------
    cfg : ConfigSpace
        Autotvm tuning space config file
    x : tvm.Tensor
        3-D with shape [batch, M, K]
    y : tvm.Tensor
        3-D with shape [batch, N, K]
    Returns
    -------
    output : tvm.Tensor
        3-D with shape [batch, M, N]
    """
    assert len(x.shape) == 3 and len(
        y.shape) == 3, "only support 3-dim batch_matmul"
    XB, M, XK = get_const_tuple(x.shape)
    YB, N, YK = get_const_tuple(y.shape)
    assert XB == YB, "batch dimension doesn't match"
    assert XK == YK, "shapes of x and y is inconsistant"
    cfg.add_flop(XB * M * N * XK * 2)
    return cblas.batch_matmul(x, y, False, True)
def verify_batch_matmul(batch, m, l, n, transa=False, transb=False, iterative=False, dtype=tvm.float32):
    ashape = (batch, l, n) if transa else (batch, n, l)
    bshape = (batch, m, l) if transb else (batch, l, m)
    A = tvm.placeholder(ashape, name='A', dtype=dtype)
    B = tvm.placeholder(bshape, name='B', dtype=dtype)
    C = cblas.batch_matmul(A, B, transa, transb)
    D = tvm.compute(C.shape, lambda k, i, j: C[k, i,j], name="D")
    s = tvm.create_schedule(D.op)

    def get_numpy(a, b, transa, transb):
        if transa:
            a = a.transpose(0, 2, 1)
        if not transb:
            b = b.transpose(0, 2, 1)
        return topi.testing.batch_matmul(a, b)

    def verify(target="llvm"):
        if not tvm.module.enabled(target):
            print("skip because %s is not enabled..." % target)
            return
        if not tvm.get_global_func("tvm.contrib.cblas.matmul", True):
            print("skip because extern function is not available")
            return
        ctx = tvm.cpu(0)
        f = tvm.build(s, [A, B, D], target)
        a = tvm.nd.array(np.random.uniform(size=ashape).astype(A.dtype), ctx)
        b = tvm.nd.array(np.random.uniform(size=bshape).astype(B.dtype), ctx)
        d = tvm.nd.array(np.zeros((batch, n, m), dtype=D.dtype), ctx)
        f(a, b, d)
        tvm.testing.assert_allclose(
            d.asnumpy(), get_numpy(a.asnumpy(), b.asnumpy(), transa, transb), rtol=1e-5)
    verify()
Esempio n. 3
0
def verify_batch_matmul(batch,
                        m,
                        l,
                        n,
                        lib,
                        transa=False,
                        transb=False,
                        iterative=False,
                        dtype="float32"):
    ashape = (batch, l, n) if transa else (batch, n, l)
    bshape = (batch, m, l) if transb else (batch, l, m)
    A = te.placeholder(ashape, name="A", dtype=dtype)
    B = te.placeholder(bshape, name="B", dtype=dtype)
    C = cblas.batch_matmul(A, B, transa, transb)
    D = te.compute(C.shape, lambda k, i, j: C[k, i, j], name="D")
    s = te.create_schedule(D.op)

    def get_numpy(a, b, transa, transb):
        if transa:
            a = a.transpose(0, 2, 1)
        if not transb:
            b = b.transpose(0, 2, 1)
        return tvm.topi.testing.batch_matmul(a, b)

    def compile(f, name="test_batch_matmul", ext=".so"):
        path = name + ext
        f.export_library(path)
        mod = tvm.runtime.load_module(path)
        f = mod[name]
        return f

    def verify(target="llvm"):
        if not tvm.testing.device_enabled(target):
            print("skip because %s is not enabled..." % target)
            return
        if not tvm.get_global_func(lib.__name__ + ".matmul", True):
            print("skip because extern function is not available")
            return
        dev = tvm.cpu(0)
        name = "test_batch_matmul"
        f = tvm.build(s, [A, B, D], target, name=name)
        if target == "c":
            f = compile(f, name)
        a = tvm.nd.array(np.random.uniform(size=ashape).astype(A.dtype), dev)
        b = tvm.nd.array(np.random.uniform(size=bshape).astype(B.dtype), dev)
        d = tvm.nd.array(np.zeros((batch, n, m), dtype=D.dtype), dev)
        f(a, b, d)
        tvm.testing.assert_allclose(d.asnumpy(),
                                    get_numpy(a.asnumpy(), b.asnumpy(), transa,
                                              transb),
                                    rtol=1e-5)

    verify("llvm")
    verify("c")
Esempio n. 4
0
def batch_matmul_x86(x, y):
    """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are
    data in batch.

    Parameters
    ----------
    x : tvm.Tensor
        3-D with shape [batch, M, K]

    y : tvm.Tensor
        3-D with shape [batch, N, K]

    Returns
    -------
    output : tvm.Tensor
        3-D with shape [batch, M, N]
    """
    target = tvm.target.current_target()
    if "cblas" in target.libs:
        return cblas.batch_matmul(x, y, False, True)
    return batch_matmul_default(x, y)
Esempio n. 5
0
def _declaration_batch_matmul_nopack(cfg, x, y):
    """Computes batch matrix multiplication of `x` and `y` when `x` and `y` are
    data in batch.

    Parameters
    ----------
    cfg : ConfigSpace
        Autotvm tuning space config file
    x : tvm.Tensor
        3-D with shape [batch, M, K]
    y : tvm.Tensor
        3-D with shape [batch, N, K]
    Returns
    -------
    output : tvm.Tensor
        3-D with shape [batch, M, N]
    """
    target = tvm.target.current_target()
    if "cblas" in target.libs:
        return cblas.batch_matmul(x, y, False, True)

    assert len(x.shape) == 3 and len(
        y.shape) == 3, "only support 3-dim batch_matmul"
    XB, M, XK = get_const_tuple(x.shape)
    YB, N, YK = get_const_tuple(y.shape)
    assert XB == YB, "batch dimension doesn't match"
    assert XK == YK, "shapes of x and y is inconsistant"
    B = XB
    K = XK
    if cfg.is_fallback:
        _default_batch_matmul_nopack_config(cfg, M, N, K)

    k = tvm.reduce_axis((0, K), name='k')
    C = tvm.compute(
        (B, M, N),
        lambda b, i, j: tvm.sum(x[b, i, k] * y[b, j, k], axis=k),
        tag='batch_matmul')
    return C