def check_device(device):
     if not tvm.testing.device_enabled(device):
         print("Skip because %s is not enabled" % device)
         return
     print("Running on target: %s" % device)
     with tvm.target.Target(device):
         fcompute, fschedule = tvm.topi.testing.dispatch(
             device, _conv2d_nhwc_implement)
         B = fcompute(A, W, stride, padding, dilation, dtype)
         s = fschedule([B])
     dev = tvm.device(device, 0)
     a = tvm.nd.array(a_np, dev)
     w = tvm.nd.array(w_np, dev)
     b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                      dev)
     func = tvm.build(s, [A, W, B], device)
     func(a, w, b)
     tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5)
    def check_target(target, ir):
        C = te.extern(
            (n, ),
            [],
            lambda ins, outs: ir(outs[0]),
            name="collatz",
            dtype="int32",
        )
        s = te.create_schedule(C.op)

        with tvm.transform.PassContext(opt_level=3):
            func = tvm.build(s, [C], target)

        dev = tvm.device(target, 0)
        c = tvm.nd.array(np.zeros(n, dtype=C.dtype), dev)
        func(c)
        ref = np.array([collatz_ref(i) for i in range(n)])
        tvm.testing.assert_allclose(c.numpy(), ref)
Example #3
0
def profile_and_build_vm(
    mod,
    params,
    sm,
    tmp_dir="./tmp",
    lib_path="compile.so",
    vmcode_path="vmcode.ro",
    use_fast_math=False,
):
    mod = partition_for_cutlass(mod)
    mod, num_cutlass_partition = tune_cutlass_kernels(mod, sm, tmp_dir=tmp_dir)
    with tvm.transform.PassContext(opt_level=3):
        vm_exec = relay.vm.compile(mod, target="cuda", params=params)
    vm_exec = build_cutlass_kernels_vm(
        vm_exec, sm, tmp_dir, lib_path, vmcode_path, use_fast_math=use_fast_math
    )
    dev = tvm.device("cuda", 0)
    return VirtualMachine(vm_exec, dev), dev, num_cutlass_partition
Example #4
0
def test_out_of_bounds_llvm(index_a, index_b):
    n = te.size_var("n")
    A = te.placeholder((n, ), name="A")
    B = te.placeholder((n, ), name="B")
    C = te.compute(A.shape,
                   lambda i: A[i + index_a] + B[i + index_b],
                   name="C")
    s = te.create_schedule(C.op)
    tgt = "llvm"
    tgt_host = "llvm"
    stmt = tvm.lower(s, [A, B, C], simple_mode=True)
    print(stmt)
    fadd = tvm.build(s, [A, B, C], tgt, target_host=tgt_host, name="myadd")
    dev = tvm.device(tgt, 0)
    a = tvm.nd.array(np.random.uniform(size=1024).astype(A.dtype), dev)
    b = tvm.nd.array(np.random.uniform(size=1024).astype(B.dtype), dev)
    c = tvm.nd.array(np.zeros(1024, dtype=C.dtype), dev)
    fadd(a, b, c)
    def check_device(device):
        dev = tvm.device(device, 0)
        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            if bgemm == "direct":
                fcompute, fschedule = tvm.topi.testing.dispatch(
                    device, _conv2d_nhwc_winograd_direct)
            elif bgemm == "tensorcore":
                fcompute, fschedule = tvm.topi.testing.dispatch(
                    device, _conv2d_nhwc_winograd_tensorcore)
            C = fcompute(A, W, stride, padding, dilation, "float32")
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, dev)
        w = tvm.nd.array(w_np, dev)
        b = tvm.nd.array(b_np, dev)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         dev)
        if add_bias:
            func = tvm.build(
                s,
                [A, W, bias, C],
                device,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                (batch, in_channel, in_size, num_filter, kernel, stride,
                 padding_sum, dilation),
            )
            func(a, w, b, c)
        else:
            func = tvm.build(
                s,
                [A, W, C],
                device,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                (batch, in_channel, in_size, num_filter, kernel, stride,
                 padding_sum, dilation),
            )
            func(a, w, c)

        tvm.testing.assert_allclose(c.numpy(), c_np, rtol=2e-3)
def test_dense_dense():
    M, N, K = 128, 128, 128
    data_shape = (M, K)
    weight_shape = (N, K)

    relay_mod = tvm.IRModule.from_expr(
        get_dense_dense(data_shape, weight_shape))

    # print(relay.transform.InferType()(relay_mod))

    data_np = np.random.randn(*data_shape).astype("float32")
    weight1_np = np.random.randn(*weight_shape).astype("float32")
    weight2_np = np.random.randn(*weight_shape).astype("float32")

    target = "llvm"
    params = {"weight1": weight1_np, "weight2": weight2_np}

    def schedule_fn(task, sch):
        if "nn_dense_nn_dense" in task.task_name:
            schedule_dense_dense(sch)
            return True
        return False

    database = apply_fixed_schedules(relay_mod, target, params, schedule_fn)

    with ApplyHistoryBest(database):
        with tvm.transform.PassContext(
                opt_level=3,
                config={"relay.backend.use_meta_schedule": True},
        ):
            lib = relay.build(relay_mod, target=target, params=params)

    dev = tvm.device(target, 0)

    runtime = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))

    runtime.set_input("data", data_np)
    runtime.run()

    out = runtime.get_output(0).numpy()

    ref = get_ref(data_np, weight1_np, weight2_np)

    tvm.testing.assert_allclose(out, ref, atol=1e-4, rtol=1e-4)
    def check_device(device):
        dev = tvm.device(device, 0)
        if not tvm.testing.device_enabled(device):
            print("Skip because %s is not enabled" % device)
            return
        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            fcompute, fschedule = tvm.topi.testing.dispatch(
                device, _conv2d_nchw_winograd_implement)
            C = fcompute(A, W, stride, padding, dilation, dtype)
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, dev)
        w = tvm.nd.array(w_np, dev)
        b = tvm.nd.array(b_np, dev)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         dev)
        if add_bias:
            func = tvm.build(
                s,
                [A, W, bias, C],
                device,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                (batch, in_channel, in_size, num_filter, kernel, stride,
                 padding_sum, dilation),
            )
            func(a, w, b, c)
        else:
            func = tvm.build(
                s,
                [A, W, C],
                device,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                (batch, in_channel, in_size, num_filter, kernel, stride,
                 padding_sum, dilation),
            )
            func(a, w, c)

        rtol = 1e-3
        tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=rtol)
def test_nat_add():
    mod = tvm.IRModule()
    p = Prelude(mod)
    p.mod.import_from_std("nat.rly")
    nat, z, s = p.mod.get_type("nat")
    add = p.mod.get_global_var("nat_add")
    dev = tvm.device("llvm", 0)
    intrp = create_executor(mod=mod, device=dev, target="llvm")
    assert mod[add].checked_type == relay.FuncType([nat(), nat()], nat())
    assert count(p, intrp.evaluate(add(s(z()), s(z())))) == 2
    expr = add(s(z()), s(z()))
    f = relay.GlobalVar("f")
    mod[f] = relay.Function([], expr)
    mod = transform.InferType()(mod)
    mod = transform.ToBasicBlockNormalForm()(mod)
    opt_expr = mod["f"]
    assert count(p, intrp.evaluate(opt_expr.body)) == 2
    assert not Feature.fLet in detect_feature(mod[add])
    check_basic_block_normal_form(opt_expr)
Example #9
0
    def check_target(target):
        if not tvm.testing.device_enabled(target):
            print("Skip because %s is not enabled" % target)
            return
        dev = tvm.device(target, 0)
        print("Running on target: %s" % target)
        with tvm.target.Target(target):
            fcompute, fschedule = tvm.topi.testing.dispatch(
                target, _argsort_implement)
            out = fcompute(data, axis=axis, is_ascend=is_ascend)
            s = fschedule(out)

        tvm_data = tvm.nd.array(np_data, dev)
        tvm_out = tvm.nd.array(np.zeros(dshape, dtype=data_dtype), dev)
        f = tvm.build(s, [data, out], target)
        f(tvm_data, tvm_out)
        tvm.testing.assert_allclose(tvm_out.asnumpy(),
                                    np_indices.astype(data_dtype),
                                    rtol=1e0)
def test_check_and_update_host_consist_4():
    """Test `check_and_update_host_consist` by using TVM Objects"""
    cuda_device_type = tvm.device("cuda").device_type
    target = {cuda_device_type: Target(target="cuda", host="llvm")}
    host = None
    target_1, host_1 = Target.check_and_update_host_consist(target, host)
    assert isinstance(target_1, dict)
    assert target_1[cuda_device_type].kind.name == "cuda"
    assert target_1[cuda_device_type].host.kind.name == "llvm"
    assert host_1 is None

    target = {cuda_device_type: Target(tvm.runtime.container.String("cuda"))}
    host = Target(tvm.runtime.container.String("llvm"))
    target = tvm.runtime.convert(target)
    assert isinstance(target, tvm.ir.container.Map)
    target_2, host_2 = Target.check_and_update_host_consist(target, host)
    assert isinstance(target_2, dict)
    assert target_2[cuda_device_type].kind.name == "cuda"
    assert host_2.kind.name == "llvm"
Example #11
0
def build_and_run(inputs, func, target, target_host, *args, **kwargs):
    schedule, placeholders, binds = func(*args, **kwargs)

    func = tvm.build(
        schedule, placeholders, target=tvm.target.Target(target, host=target_host), binds=binds
    )
    dev = tvm.device(target)
    tensors = []
    for tensor in inputs:
        tensors.append(tvm.nd.array(tensor, dev))
    tensors.append(
        tvm.nd.array(
            numpy.zeros([i.value for i in placeholders[-1].shape], dtype=placeholders[-1].dtype),
            dev,
        )
    )
    func(*tensors)

    return tensors[-1].asnumpy()
Example #12
0
 def check_device(device, host="llvm"):
     dev = tvm.device(device, 0)
     if not tvm.testing.device_enabled(device):
         print("skip because %s is not enabled.." % device)
         return
     freduce = tvm.build(
         s, args=[A, B], target=tvm.target.Target(device, host), name="myreduce"
     )
     # launch the kernel.
     n = 1028
     m = 129
     x = tvm.nd.array(np.random.uniform(size=(n, m)).astype(A.dtype), dev)
     y = tvm.nd.array(np.zeros(n, dtype=B.dtype), dev)
     freduce(x, y)
     npy = y.numpy()
     npy[:2] = 0
     res = np_reducer(x.numpy(), axis=1)
     res[:2] = 0
     tvm.testing.assert_allclose(npy, res, rtol=1e-4)
def test_double_splitting_with_indivisible_factors():
    m = 48
    dtype = "float32"
    A = te.placeholder((m,), name="A", dtype=dtype)
    C = te.compute((m,), lambda i: A[i], name="C")
    D = te.compute((m,), lambda i: C[i], name="D")

    s = te.create_schedule(D.op)
    co, ci = s[C].split(C.op.axis[0], factor=10)
    do, di = s[D].split(D.op.axis[0], 32)
    s[C].compute_at(s[D], do)

    target = "llvm"
    with tvm.transform.PassContext(config={"tir.LoopPartition": {"partition_const_loop": True}}):
        f = tvm.lower(s, [A, C, D], name="fadd1", simple_mode=False)
        func = tvm.build(f, target=target)

    top_produce = f["fadd1"].body
    assert not any(collect_visit(top_produce, lambda x: isinstance(x, tvm.tir.IfThenElse)))

    # check functional correctness of generated code
    dev = tvm.device(target, 0)
    a = tvm.nd.array(
        numpy.ones(
            m,
        ).astype(dtype),
        dev,
    )
    c = tvm.nd.array(
        numpy.zeros(
            m,
        ).astype(dtype),
        dev,
    )
    d = tvm.nd.array(
        numpy.zeros(
            m,
        ).astype(dtype),
        dev,
    )
    func(a, c, d)
    tvm.testing.assert_allclose(c.numpy(), a.numpy(), rtol=1e-5)
    tvm.testing.assert_allclose(d.numpy(), a.numpy(), rtol=1e-5)
Example #14
0
def profile_and_build(mod,
                      params,
                      sm,
                      tmp_dir="./tmp",
                      lib_path="compile.so",
                      use_fast_math=False):
    mod = partition_for_cutlass(mod)
    mod, num_cutlass_partition = tune_cutlass_kernels(
        mod, sm, profile_all=False, use_multiprocessing=False, tmp_dir=tmp_dir)
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod, target="cuda", params=params)
    lib = build_cutlass_kernels(lib,
                                sm,
                                tmp_dir,
                                lib_path,
                                use_fast_math=use_fast_math)
    dev = tvm.device("cuda", 0)
    rt_mod = tvm.contrib.graph_executor.GraphModule(lib["default"](dev))
    return rt_mod, dev, num_cutlass_partition
    def check_device(device):
        dev = tvm.device(device, 0)
        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            fcompute, fschedule = tvm.topi.testing.dispatch(
                device, _conv3d_ndhwc_tensorcore_implement)
            C = fcompute(A, W, stride, padding, dilation, 1, "float16")
            if add_bias:
                C = topi.add(C, bias)
            if add_relu:
                C = topi.nn.relu(C)
            s = fschedule([C])

        a = tvm.nd.array(a_np, dev)
        w = tvm.nd.array(w_np, dev)
        b = tvm.nd.array(b_np, dev)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                         dev)
        if add_bias:
            func = tvm.build(
                s,
                [A, W, bias, C],
                device,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                (batch, in_channel, in_size, num_filter, kernel, stride,
                 padding_sum, dilation),
            )
            func(a, w, b, c)
        else:
            func = tvm.build(
                s,
                [A, W, C],
                device,
                name="relu_%d_%d_%d_%d_%d_%d_%d_%d" %
                (batch, in_channel, in_size, num_filter, kernel, stride,
                 padding_sum, dilation),
            )
            func(a, w, c)

        # Tensorcores are very inaccurate, with large shapes, the accumulation
        # error is high especially away from 1. We disable atol as it is very
        # large for these numbers that are far away from 1.
        tvm.testing.assert_allclose(c.numpy(), c_np, atol=1e200, rtol=0.01)
Example #16
0
def _get_targets():
    target_str = os.environ.get("TVM_TEST_TARGETS", "")
    if len(target_str) == 0:
        target_str = DEFAULT_TEST_TARGETS
    targets = set()
    for dev in target_str.split(";"):
        if len(dev) == 0:
            continue
        target_kind = dev.split()[0]
        if tvm.runtime.enabled(target_kind) and tvm.device(target_kind, 0).exist:
            targets.add(dev)
    if len(targets) == 0:
        logging.warning(
            "None of the following targets are supported by this build of TVM: %s."
            " Try setting TVM_TEST_TARGETS to a supported target. Defaulting to llvm.",
            target_str,
        )
        return {"llvm"}
    return targets
Example #17
0
def test_gemm_tensorcore():
    """Test running gemm on tensorcore."""
    dev = tvm.device("cuda", 0)
    a_np = np.random.uniform(size=(1024, 1024)).astype("float16")
    b_np = np.random.uniform(size=(1024, 1024)).astype("float16")
    c_np = np.dot(a_np.astype("float32"), b_np.T.astype("float32"))
    buff_a = tvm.nd.array(a_np, dev)
    buff_b = tvm.nd.array(b_np, dev)
    buff_c = tvm.nd.array(np.zeros((1024, 1024), dtype="float32"), dev)
    myfunc = tvm.build(tensorcore_gemm, target="cuda", name="dense")
    myfunc(buff_a, buff_b, buff_c)
    tvm.testing.assert_allclose(buff_c.numpy(), c_np, rtol=1e-3)

    evaluator = myfunc.time_evaluator(myfunc.entry_name, dev, number=100)
    time_elapsed = evaluator(buff_a, buff_b, buff_c).mean
    num_flops = 2 * 1024 * 1024 * 1024
    gflops = num_flops / (time_elapsed * 1e3) / 1e6
    print("gemm with tensor core: %f ms" % (time_elapsed * 1e3))
    print("GFLOPS: %f" % gflops)
 def check_device(device):
     if not tvm.testing.device_enabled(device):
         return
     dev = tvm.device(device, 0)
     s = te.create_schedule(D.op)
     for stage in [C, D]:
         xo, xi = s[stage].split(stage.op.axis[0], factor=4)
         s[stage].bind(xo, te.thread_axis("blockIdx.x"))
         s[stage].bind(xi, te.thread_axis("threadIdx.x"))
     f = tvm.build(s, [A, B, D], device)
     a_np = np.random.uniform(size=n).astype(A.dtype)
     a = tvm.nd.array(a_np, dev)
     b = tvm.nd.array(np.random.uniform(size=n).astype(B.dtype), dev)
     d = tvm.nd.array(np.zeros(n, dtype=D.dtype), dev)
     f(a, b, d)
     np.testing.assert_equal(
         d.numpy(),
         np.logical_and(a.numpy() > b.numpy(), a.numpy() > 1).astype("float32"),
     )
Example #19
0
 def check_device(device):
     if not tvm.runtime.module.enabled(device):
         print("Skip because %s is not enabled" % device)
         return
     ctx = tvm.device(device, 0)
     a = tvm.nd.array(a_np, ctx)
     w = tvm.nd.array(w_np, ctx)
     b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype),
                      ctx)
     c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype),
                      ctx)
     with tvm.build_config(auto_unroll_max_step=128,
                           unroll_explicit=device == 'rocm'):
         func1 = tvm.build(s1, [A, W, B], device)
         func1(a, w, b)
         tvm.testing.assert_allclose(b.asnumpy(), b_np, rtol=1e-5)
         func2 = tvm.build(s2, [A, W, C], device)
         func2(a, w, c)
         tvm.testing.assert_allclose(c.asnumpy(), c_np, rtol=1e-5)
Example #20
0
        def check_device(device):
            dev = tvm.device(device, 0)
            if not tvm.testing.device_enabled(device):
                print("skip because %s is not enabled.." % device)
                return
            fadd = tvm.build(s, [A, B, C], device, name="myadd")

            # launch the kernel.
            n = 1024
            a = tvm.nd.array((np.random.uniform(size=n) * 256).astype(A.dtype),
                             dev)
            b = tvm.nd.array((np.random.uniform(size=n) * 256).astype(B.dtype),
                             dev)
            c = tvm.nd.array(np.zeros(n, dtype=C.dtype), dev)
            ftimer = fadd.time_evaluator(fadd.entry_name, dev, number=1)
            tcost = ftimer(a, b, c).mean
            tvm.testing.assert_allclose(c.asnumpy(),
                                        a.asnumpy() + b.asnumpy(),
                                        rtol=1e-6)
    def __init__(self, model, train_loader, num_classes, criterion,
                 lr: Union[float, Callable[[int], float]],  # lr(epoch,) -> lr
                 debug_mode=False, print_freq=1000, target='llvm', dtype='float64'):
        self.model = model
        self.train_loader = train_loader
        self.num_classes = num_classes
        self.criterion = criterion
        self.lr = lr if isinstance(lr, float) else lr(0)
        self._lr_func = lr if not isinstance(lr, float) else lambda epoch: lr

        self.debug_mode = debug_mode
        self.print_freq = print_freq
        self.target = target
        self.dtype = dtype
        self.ctx = tvm.device(target)

        self._build_func()
        self._allocate_buffers_for_endpoints()
        self._initialize_weights()
def test_opencl_ternary_expression():
    def check_if_then_else(dev, n, dtype):
        A = te.placeholder((n, ), name="A", dtype=dtype)
        true_value = tvm.tir.const(1, dtype=dtype)
        false_value = tvm.tir.const(3, dtype=dtype)
        max_lhs = tvm.tir.const(2, dtype=dtype)
        max_rhs = tvm.tir.if_then_else(A[0] > 0, true_value, false_value)
        C = te.compute((n, ), lambda i: tvm.te.max(max_lhs, max_rhs), name="C")
        s = te.create_schedule(C.op)
        s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x"))
        fun = tvm.build(s, [A, C], target)

        a = tvm.nd.empty((n, ), A.dtype, dev)
        c = tvm.nd.empty((n, ), A.dtype, dev)
        # Only need to test compiling here
        fun(a, c)

    def check_select(dev, n, dtype):
        A = te.placeholder((n, ), name="A", dtype=dtype)
        true_value = tvm.tir.const(1, dtype=dtype)
        false_value = tvm.tir.const(3, dtype=dtype)
        max_lhs = tvm.tir.const(2, dtype=dtype)
        max_rhs = tvm.tir.Select(A[0] > 0, true_value, false_value)
        C = te.compute((n, ), lambda i: tvm.te.max(max_lhs, max_rhs), name="C")
        s = te.create_schedule(C.op)
        s[C].bind(s[C].op.axis[0], te.thread_axis("threadIdx.x"))
        fun = tvm.build(s, [A, C], target)

        a = tvm.nd.empty((n, ), A.dtype, dev)
        c = tvm.nd.empty((n, ), A.dtype, dev)
        # Only need to test compiling here
        fun(a, c)

    dev = tvm.device(target, 0)

    check_if_then_else(dev, 1, "int8")
    check_if_then_else(dev, 1, "uint8")
    check_if_then_else(dev, 1, "int16")
    check_if_then_else(dev, 1, "uint16")
    check_select(dev, 1, "int8")
    check_select(dev, 1, "uint8")
    check_select(dev, 1, "int16")
    check_select(dev, 1, "uint16")
    def check_target(target, dev):
        dev = tvm.device(target, 0)
        with tvm.target.Target(target):
            fcompute, fschedule = tvm.topi.testing.dispatch(target, _conv1d_transpose_ncw_implement)
            B = fcompute(A, W, stride, padding, A.dtype, output_padding)
            C = topi.nn.relu(B)
            s1 = fschedule([B])
            s2 = fschedule([C])
        a = tvm.nd.array(a_np, dev)
        w = tvm.nd.array(w_np, dev)
        b = tvm.nd.array(np.zeros(get_const_tuple(B.shape), dtype=B.dtype), dev)
        c = tvm.nd.array(np.zeros(get_const_tuple(C.shape), dtype=C.dtype), dev)

        func1 = tvm.build(s1, [A, W, B], target)
        func2 = tvm.build(s2, [A, W, C], target)
        func1(a, w, b)
        func2(a, w, c)
        tvm.testing.assert_allclose(b.numpy(), b_np, rtol=1e-5)
        tvm.testing.assert_allclose(c.numpy(), c_np, rtol=1e-5)
Example #24
0
def test_stable_sort_by_key():
    size = 6
    keys = te.placeholder((size, ), name="keys", dtype="int32")
    values = te.placeholder((size, ), name="values", dtype="int32")

    keys_out, values_out = stable_sort_by_key_thrust(keys, values)

    for target in ["cuda", "rocm"]:
        if not tvm.testing.device_enabled(target):
            print("Skip because %s is not enabled" % target)
            continue

        with tvm.target.Target(target + " -libs=thrust") as tgt:
            if not thrust_check_func[target](
                    tgt, "tvm.contrib.thrust.stable_sort_by_key"):
                print("skip because thrust is not enabled...")
                return

            dev = tvm.device(target, 0)
            s = te.create_schedule([keys_out.op, values_out.op])
            f = tvm.build(s, [keys, values, keys_out, values_out], target)

            keys_np = np.array([1, 4, 2, 8, 2, 7], np.int32)
            values_np = np.random.randint(0, 10,
                                          size=(size, )).astype(np.int32)
            keys_np_out = np.zeros(keys_np.shape, np.int32)
            values_np_out = np.zeros(values_np.shape, np.int32)
            keys_in = tvm.nd.array(keys_np, dev)
            values_in = tvm.nd.array(values_np, dev)
            keys_out = tvm.nd.array(keys_np_out, dev)
            values_out = tvm.nd.array(values_np_out, dev)
            f(keys_in, values_in, keys_out, values_out)

            ref_keys_out = np.sort(keys_np)
            ref_values_out = np.array(
                [values_np[i] for i in np.argsort(keys_np)])
            tvm.testing.assert_allclose(keys_out.asnumpy(),
                                        ref_keys_out,
                                        rtol=1e-5)
            tvm.testing.assert_allclose(values_out.asnumpy(),
                                        ref_values_out,
                                        rtol=1e-5)
Example #25
0
def check_graph_runtime(target,
                        ref_res,
                        device,
                        func,
                        params,
                        config,
                        opt_level,
                        expected_index=None):
    with tvm.transform.PassContext(opt_level=opt_level, config=config):
        graph, lib, new_params = relay.build(func, target, params=params)
        contexts = [tvm.cpu(0), tvm.device(device)]
        graph_json = json.loads(graph)
        if "device_index" in graph_json["attrs"]:
            device_index = graph_json["attrs"]["device_index"][1]
            assert device_index == expected_index
        mod = graph_runtime.create(graph, lib, contexts)
        mod.set_input(**new_params)
        mod.run()
        res = mod.get_output(0).asnumpy()
        tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
Example #26
0
    def check_target(device):
        dev = tvm.device(device, 0)
        if not tvm.testing.device_enabled(device):
            print("skip because %s is not enabled.." % device)
            return
        fapi = tvm.lower(s, args=[A0, A1, B0, B1])
        fargmax = tvm.build(fapi, target=device, name="argmax")

        np_idx = np.repeat(np.arange(nn, dtype="int32").reshape(1, nn),
                           mm,
                           axis=0)
        np_val = np.random.uniform(size=(mm, nn)).astype("float32")
        np_res = np.argmax(np_val, axis=1)

        nd_idx = tvm.nd.array(np_idx, dev)
        nd_val = tvm.nd.array(np_val, dev)
        nd_res0 = tvm.nd.array(np.zeros(mm, dtype="int32"), dev)
        nd_res1 = tvm.nd.array(np.zeros(mm, dtype="float32"), dev)
        fargmax(nd_idx, nd_val, nd_res0, nd_res1)
        tvm.testing.assert_allclose(np_res, nd_res0.numpy())
Example #27
0
    def check_target(target, ir):
        if not tvm.testing.device_enabled(target):
            return

        C = te.extern(
            shape,
            [],
            lambda ins, outs: ir(outs[0]),
            name="mandel_ir",
            dtype="float32",
        )
        s = te.create_schedule(C.op)

        with tvm.transform.PassContext(opt_level=3):
            func = tvm.build(s, [C], target)

        dev = tvm.device(target, 0)
        c = tvm.nd.array(np.zeros(shape, dtype=C.dtype), dev)
        func(c)
        tvm.testing.assert_allclose(c.numpy(), ref, rtol=1e-5, atol=1e-5)
    def verify_with_input(sorted_sequence_np, values_np, right):
        sorted_sequence = te.placeholder(sorted_sequence_np.shape, dtype="float32")
        values = te.placeholder(values_np.shape, dtype="float32")
        out_dtype = "int32"
        implementations = get_implementations()
        fcompute, fschedule = tvm.topi.testing.dispatch(target, implementations)

        with tvm.target.Target(target):
            indices = fcompute(sorted_sequence, values, right, out_dtype)
            s = fschedule([indices])

        func = tvm.build(s, [sorted_sequence, values, indices], target=target)
        dev = tvm.device(target, 0)

        a = tvm.nd.array(sorted_sequence_np, dev)
        b = tvm.nd.array(values_np, dev)
        c = tvm.nd.array(np.zeros(values_np.shape, dtype=indices.dtype), dev)
        func(a, b, c)
        ref = searchsorted_ref(sorted_sequence_np, values_np, right, out_dtype)
        np.testing.assert_equal(c.numpy(), ref)
Example #29
0
    def check_device(device):
        dev = tvm.device(device, 0)
        if device == "cuda" and not tvm.contrib.nvcc.have_int8(
                dev.compute_version):
            print("Skip because int8 intrinsics are not available")
            return

        print("Running on target: %s" % device)
        with tvm.target.Target(device):
            D = topi.cuda.dense_int8(A, B, C if use_bias else None, out_dtype)
            D = topi.nn.relu(D)
            s = topi.cuda.schedule_dense_int8([D])
        a = tvm.nd.array(a_np, dev)
        b = tvm.nd.array(b_np, dev)
        c = tvm.nd.array(c_np, dev)
        d = tvm.nd.array(np.zeros(get_const_tuple(D.shape), dtype=out_dtype),
                         dev)
        f = tvm.build(s, [A, B, C, D], device, name="dense")
        f(a, b, c, d)
        tvm.testing.assert_allclose(d.numpy(), d_np, rtol=1e-5)
Example #30
0
def enabled_targets():
    """Get all enabled targets with associated contexts.

    In most cases, you should use :py:func:`tvm.testing.parametrize_targets` instead of
    this function.

    In this context, enabled means that TVM was built with support for this
    target and the target name appears in the TVM_TEST_TARGETS environment
    variable. If TVM_TEST_TARGETS is not set, it defaults to variable
    DEFAULT_TEST_TARGETS in this module.

    If you use this function in a test, you **must** decorate the test with
    :py:func:`tvm.testing.uses_gpu` (otherwise it will never be run on the gpu).

    Returns
    -------
    targets: list
        A list of pairs of all enabled devices and the associated context
    """
    return [(tgt, tvm.device(tgt)) for tgt in _get_targets()]