Ejemplo n.º 1
0
    def check(device, dtype, m=32, n=32):
        ctx = tvm.context(device, 0)
        if not ctx.exist or not tvm.runtime.enabled(device):
            print("skip because", device, "is not enabled..")
            return
        if dtype == "float16" and not have_fp16(ctx.compute_version):
            print("Skip because gpu does not have fp16 support")
            return

        a = te.placeholder((m, n), name="a", dtype=dtype)
        b = te.placeholder((m, n), name="b", dtype=dtype)
        c = a + b
        d = a * b
        e = topi.elemwise_sum([c, d])
        g = topi.sum(e)
        with tvm.target.create(device):
            sg = topi.cuda.schedule_reduce(g)
            func = tvm.build(sg, [a, b, g], device)
            a_np = np.random.uniform(size=(m, n)).astype(a.dtype)
            b_np = np.random.uniform(size=(m, n)).astype(b.dtype)
            g_np = np.sum(np.add(a_np * b_np, a_np + b_np))
            a_nd = tvm.nd.array(a_np, ctx)
            b_nd = tvm.nd.array(b_np, ctx)
            g_nd = tvm.nd.array(np.zeros(g_np.shape, dtype=g_np.dtype), ctx)
            func(a_nd, b_nd, g_nd)
            tvm.testing.assert_allclose(g_nd.asnumpy(), g_np, rtol=1e-3)
Ejemplo n.º 2
0
def verify_elemwise_sum(num_args, dtype):
    shape = (3, 5, 4)

    tvm_placeholders = []
    for i in range(num_args):
        tvm_placeholders.append(te.placeholder(shape, name="data" + str(i), dtype=dtype))
    esum = topi.elemwise_sum(tvm_placeholders)
    s = te.create_schedule([esum.op])

    @memoize("topi.tests.test_topi_elemwise_sum")
    def get_ref_data():
        np_nd = [np.random.uniform(0, 10, size=shape).astype(dtype) for i in range(num_args)]
        return np_nd

    np_nd = get_ref_data()

    def check_device(device):
        if not tvm.testing.device_enabled(device):
            print("Skip because %s is not enabled" % device)
            return

        ctx = tvm.context(device, 0)
        out = tvm.nd.array(np.zeros(shape, dtype=dtype), ctx)
        f = tvm.build(s, tvm_placeholders + [esum], device, name="elemwise_sum")
        tvm_nd = [tvm.nd.array(nd, ctx) for nd in np_nd] + [out]
        f(*tvm_nd)
        np_out = np.sum(np.array(np_nd), axis=0)
        tvm.testing.assert_allclose(out.asnumpy(), np_out, rtol=1e-5)

    for device in ["llvm"]:
        check_device(device)
Ejemplo n.º 3
0
 def check(device, dtype, m=32, n=32):
     if not tvm.testing.device_enabled(device):
         print("Skipping", device)
         return
     dev = tvm.device(device, 0)
     a = te.placeholder((m, n), name="a", dtype=dtype)
     b = te.placeholder((m, n), name="b", dtype=dtype)
     c = a + b
     d = a * b
     e = topi.elemwise_sum([c, d])
     g = topi.sum(e)
     with tvm.target.Target(device):
         sg = topi.cuda.schedule_reduce(g)
         func = tvm.build(sg, [a, b, g], device)
         a_np = np.random.uniform(size=(m, n)).astype(a.dtype)
         b_np = np.random.uniform(size=(m, n)).astype(b.dtype)
         g_np = np.sum(np.add(a_np * b_np, a_np + b_np))
         a_nd = tvm.nd.array(a_np, dev)
         b_nd = tvm.nd.array(b_np, dev)
         g_nd = tvm.nd.array(np.zeros(g_np.shape, dtype=g_np.dtype), dev)
         func(a_nd, b_nd, g_nd)
         tvm.testing.assert_allclose(g_nd.asnumpy(), g_np, rtol=1e-3)
Ejemplo n.º 4
0
c = a + b  # same as topi.broadcast_add
d = a * b  # same as topi.broadcast_mul

######################################################################
# Overloaded with the same syntax, TOPI handles broadcasting a primitive (`int`, `float`) to a tensor :code:`d - 3.14`.

######################################################################
# Generic schedules and fusing operations
# ---------------------------------------
# Up to now, we have seen an example of how TOPI can save us from writing explicit computations in lower level API.
# But it doesn't stop here. Still we did the scheduling as before. TOPI also provides higher level
# scheduling recipes depending on a given context. For example, for CUDA,
# we can schedule the following series of operations ending with :code:`topi.sum` using only
# :code:`topi.generic.schedule_reduce`
#
e = topi.elemwise_sum([c, d])
f = e / 2.0
g = topi.sum(f)
with tvm.target.cuda():
    sg = topi.cuda.schedule_reduce(g)
    print(tvm.lower(sg, [a, b], simple_mode=True))

######################################################################
# As you can see, scheduled stages of computation have been accumulated and we can examine them by
#
print(sg.stages)

######################################################################
# We can test the correctness by comparing with :code:`numpy` result as follows
#
func = tvm.build(sg, [a, b, g], "cuda")