Example #1
0
    def check_sharing():
        x = relay.var("x", shape=(1, 10))
        y = relay.var("y", shape=(1, 10))
        z = relay.add(x, y)
        func = relay.Function([x, y], z)

        x_in = np.ones((1, 10)).astype("float32")
        params = {"x": x_in}
        graph, lib, params = relay.build(func, target="llvm", params=params)

        mod_shared = graph_executor.create(graph, lib, tvm.cpu(0))
        mod_shared.load_params(runtime.save_param_dict(params))
        num_mods = 10
        mods = [
            graph_executor.create(graph, lib, tvm.cpu(0))
            for _ in range(num_mods)
        ]

        for mod in mods:
            mod.share_params(mod_shared, runtime.save_param_dict(params))

        a = np.random.uniform(size=(1, 10)).astype("float32")
        for mod in mods:
            mod.run(y=a)
            out = mod.get_output(0, tvm.nd.empty((1, 10)))
            np.testing.assert_equal(out.asnumpy(), x_in + a)

        # Explicitly delete the shared module and verify correctness.
        del mod_shared
        for mod in mods:
            mod.run(y=a)
            out = mod.get_output(0, tvm.nd.empty((1, 10)))
            np.testing.assert_equal(out.asnumpy(), x_in + a)
            del mod
Example #2
0
def qnn_dense_driver(test_configuration):
    in_dtype = test_configuration["dtype"]
    out_dtype = test_configuration["out_dtype"]
    quantized_data_name = "quantized_data"
    quantized_kernel_name = "quantized_kernel"
    expected_out_dtype = test_configuration["out_dtype"]
    bias_name = "bias"
    quantized_data = relay.var(quantized_data_name,
                               shape=test_configuration["input_shape"],
                               dtype=in_dtype)
    quantized_kernel = relay.var(quantized_kernel_name,
                                 shape=test_configuration["kernel_shape"],
                                 dtype=in_dtype)
    mod = relay.qnn.op.dense(
        quantized_data,
        quantized_kernel,
        relay.const(test_configuration["input_zero_point"], "int32"),
        relay.const(test_configuration["kernel_zero_point"], "int32"),
        relay.const(test_configuration["input_scale"], "float32"),
        relay.const(test_configuration["kernel_scale"], "float32"),
        test_configuration["units"],
    )
    if test_configuration[bias_name] is not None:
        bias = relay.var(bias_name,
                         shape=test_configuration["bias"].shape,
                         dtype=out_dtype)
        mod = relay.nn.bias_add(mod, bias)
    if test_configuration["requantize"] is not None:
        requantize_config = test_configuration["requantize"]
        mod = relay.qnn.op.requantize(
            mod,
            input_scale=relay.const(requantize_config["input_scale"],
                                    "float32"),
            input_zero_point=relay.const(0, "int32"),
            output_scale=relay.const(requantize_config["output_scale"],
                                     "float32"),
            output_zero_point=relay.const(
                requantize_config["output_zero_point"], "int32"),
            out_dtype=requantize_config["out_dtype"],
        )
        expected_out_dtype = requantize_config["out_dtype"]

    mod = relay.Function(relay.analysis.free_vars(mod), mod)
    mod = tvm.IRModule.from_expr(mod)
    mod = relay.transform.InferType()(mod)
    mod = relay.qnn.transform.CanonicalizeOps()(mod)
    with tvm.transform.PassContext(opt_level=2):
        graph, lib, params = relay.build(mod, "llvm", params=None)
        mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
        mod.set_input(quantized_data_name,
                      test_configuration[quantized_data_name])
        mod.set_input(quantized_kernel_name,
                      test_configuration[quantized_kernel_name])
        if test_configuration[bias_name] is not None:
            mod.set_input(bias_name, test_configuration[bias_name])
        mod.set_input(**params)
        mod.run()
        res = mod.get_output(0).asnumpy()
        np.testing.assert_equal(res, test_configuration["output"])
        assert res.dtype == expected_out_dtype
Example #3
0
    def test_apply(relay_op, name, f_numpy, low, high, step, dtype="float32"):
        a_np = np.arange(low, high, step).astype(dtype).reshape((1, -1))
        b_np = f_numpy(a_np)

        x = relay.var("x", shape=a_np.shape, dtype="float32")
        y = relay_op(x)
        func = relay.Function([x], y)
        mod = tvm.IRModule.from_expr(func)

        with tvm.transform.PassContext(opt_level=3,
                                       required_pass=["FastMath"]):
            graph, lib, params = relay.build(mod, target=target, params=None)

        # Check that the op related to fast math have been convered to function in lib
        func_name = "fused_" + name
        # When there're multiple targets in tvm.testing.parametrize_targets, the function
        # built will have a "_1" in function name
        assert func_name in graph

        m = graph_executor.create(graph, lib, dev)
        # Set inputs
        m.set_input("x", tvm.nd.array(a_np, dev))
        m.set_input(**params)
        # Execute
        m.run()
        # Get outputs
        tvm_output = m.get_output(0)
        tvm.testing.assert_allclose(tvm_output.numpy(),
                                    b_np,
                                    rtol=1e-5,
                                    atol=1e-5)
Example #4
0
 def check_verify():
     mod = graph_executor.create(graph, mhost, dev)
     mod.set_input(**params)
     mod.run()
     out = mod.get_output(0, tvm.nd.empty(shape))
     np.testing.assert_equal(out.numpy(),
                             tensor_a + tensor_b - tensor_c + tensor_d)
Example #5
0
 def check_verify():
     mlib = tvm.build(s, [A, B], "llvm", name="myadd")
     mod = graph_executor.create(graph, mlib, tvm.cpu(0))
     a = np.random.uniform(size=(n, )).astype(A.dtype)
     mod.run(x=a)
     out = mod.get_output(0, tvm.nd.empty((n, )))
     np.testing.assert_equal(out.asnumpy(), a + 1)
Example #6
0
def quantize_test_driver(in_dtype, quant_args, axis, out_dtype, in_data,
                         verify_output_data):
    shape = in_data.shape
    input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
    output_zero_point = relay.const(quant_args["out_zero_point"])
    output_scale = relay.const(quant_args["out_scale"])
    quantized_output = relay.qnn.op.quantize(
        input_data,
        output_scale=output_scale,
        output_zero_point=output_zero_point,
        axis=axis,
        out_dtype=out_dtype,
    )
    mod = relay.Function(relay.analysis.free_vars(quantized_output),
                         quantized_output)
    mod = tvm.IRModule.from_expr(mod)
    with tvm.transform.PassContext(opt_level=3):
        graph, lib, params = relay.build(mod, "llvm", params=None)
        rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
        rt_mod.set_input(input_data=in_data)
        rt_mod.set_input(**params)
        rt_mod.run()
        res = rt_mod.get_output(0).numpy()
        np.testing.assert_equal(res, verify_output_data)
        assert res.dtype == out_dtype
def test_benchmark():
    mod, params = mlp.get_workload(1)
    lib = relay.build(mod, target="llvm", params=params)
    exe = graph_executor.create(lib.get_graph_json(), lib.lib, tvm.cpu())
    data = tvm.nd.array(np.random.rand(1, 1, 28, 28).astype("float32"))
    result = exe.benchmark(tvm.cpu(),
                           data=data,
                           func_name="run",
                           repeat=2,
                           number=1)
    assert result.mean == result.median
    assert result.mean > 0
    assert len(result.results) == 2

    with patch.object(
            tvm.runtime.module.Module,
            "time_evaluator",
            return_value=lambda: tvm.runtime.module.BenchmarkResult(
                [1, 2, 2, 5]),
    ) as method:
        result = exe.benchmark(tvm.cpu(),
                               data=data,
                               func_name="run",
                               repeat=2,
                               number=1)
        assert result.mean == 2.5
        assert result.median == 2.0
        assert result.max == 5
        assert result.min == 1
        assert result.std == 1.5
Example #8
0
def relay_micro_build(func, dev_config, params=None):
    """Create a graph executor module with a micro device context from a Relay function.

    Parameters
    ----------
    func : relay.Function
        function to compile

    dev_config : Dict[str, Any]
        MicroTVM config dict for the target device

    params : dict
        input parameters that do not change during inference

    Return
    ------
    mod : tvm.runtime.Module
        graph executor module for the target device
    """
    with tvm.transform.PassContext(
        disabled_pass={"FuseOps"}, config={"tir.disable_vectorize": True}
    ):
        graph, c_mod, params = relay.build(func, target=TARGET, params=params)
    micro_mod = micro.create_micro_mod(c_mod, dev_config)
    ctx = tvm.micro_dev(0)
    mod = graph_executor.create(graph, micro_mod, ctx)
    mod.set_input(**params)
    return mod
def test_benchmark_end_to_end_rpc():
    server = rpc.Server("127.0.0.1")
    remote = rpc.connect(server.host, server.port)

    mod, params = mlp.get_workload(1)
    lib = relay.build(mod, target="cuda", params=params)

    temp = utils.tempdir()
    path = temp.relpath("library.so")
    lib.export_library(path)
    remote.upload(path)
    rlib = remote.load_module("library.so")

    dev = remote.device("cuda")
    exe = graph_executor.create(lib.get_graph_json(), rlib, dev)

    data = tvm.nd.array(np.random.rand(1, 1, 28, 28).astype("float32"),
                        device=dev)
    result = exe.benchmark(dev,
                           data=data,
                           func_name="run",
                           repeat=2,
                           number=1,
                           end_to_end=True)
    assert result.mean > 0
    assert len(result.results) == 2
Example #10
0
def verify(mod, goldens):
    with tvm.transform.PassContext(opt_level=3):
        graph, lib, params = relay.build(mod, "llvm", params=None)
        golden_data, golden_output = goldens
        rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
        rt_mod.set_input("quantized_data", golden_data)
        rt_mod.set_input(**params)
        rt_mod.run()
        res = rt_mod.get_output(0).asnumpy()
        np.testing.assert_equal(res, golden_output)
 def get_output(func, golden_inputs):
     with tvm.transform.PassContext(opt_level=2):
         golden_data, golden_weight = golden_inputs
         params = {"kernel": golden_weight}
         libs = relay.build(func, "llvm", params=params)
         mod = graph_executor.create(libs.graph_json, libs.lib, device=tvm.cpu(0))
         mod.set_input("data", golden_data)
         mod.set_input(**libs.params)
         mod.run()
         res = mod.get_output(0).numpy()
         return res
Example #12
0
def test_legacy_compatibility():
    mod, params = relay.testing.synthetic.get_workload()
    with relay.build_config(opt_level=3):
        graph, lib, graph_params = relay.build_module.build(mod, "llvm", params=params)
    data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
    dev = tvm.cpu()
    module = graph_executor.create(graph, lib, dev)
    module.set_input("data", data)
    module.set_input(**graph_params)
    module.run()
    out = module.get_output(0).numpy()
    tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
Example #13
0
    def check_device(device, target_device):
        if not tvm.runtime.enabled(target_device):
            print("Skip test because {} is not enabled.".format(target_device))
            return

        device_dev = tvm.device(device)
        graph = get_simplex_graph(host_dev.device_type, device_dev.device_type)
        shape = (4, )

        # Create module for add whose target is the device.
        tensor_a = te.placeholder(shape, name="A")
        tensor_b = te.placeholder(shape, name="B")
        elemwise_add = te.compute(shape,
                                  lambda *i: tensor_a(*i) + tensor_b(*i),
                                  name="elemwise_add")
        target = topi.cpp.TEST_create_target(device)
        schedule_add = topi.cpp.cuda.schedule_injective(target, [elemwise_add])
        lower_add = tvm.lower(schedule_add, [tensor_a, tensor_b, elemwise_add],
                              name="elemwise_add")

        # Insert copy. Neither compute nor schedule is required for the copy
        # node. The compute will be performed at runtime which is just data
        # copy from the input to the output.
        tensor_copy = te.placeholder(shape, name="__copy")

        # Create module for sub whose target is the host.
        tensor_c = te.placeholder(shape, name="C")
        elemwise_sub = te.compute(shape,
                                  lambda *i: tensor_copy(*i) - tensor_c(*i),
                                  name="elemwise_sub")
        schedule_sub = te.create_schedule(elemwise_sub.op)
        lower_sub = tvm.lower(schedule_sub,
                              [tensor_copy, tensor_c, elemwise_sub],
                              name="elemwise_sub")

        target_flist = {target_device: lower_add, target_host: lower_sub}
        target = tvm.target.Target(target, target_host)
        mhost = tvm.build(target_flist, target=target)
        dev = [host_dev, device_dev]
        mod = graph_executor.create(graph, mhost, dev)
        params = {}
        params["A"] = tensor_a = np.random.uniform(size=shape).astype(
            tensor_a.dtype)
        params["B"] = tensor_b = np.random.uniform(size=shape).astype(
            tensor_b.dtype)
        params["C"] = tensor_c = np.random.uniform(size=shape).astype(
            tensor_c.dtype)
        mod.set_input(**params)
        mod.run()
        out = mod.get_output(0, tvm.nd.empty(shape))
        np.testing.assert_equal(out.asnumpy(),
                                (tensor_a + tensor_b) - tensor_c)
def test_benchmark_end_to_end(dev, target):
    mod, params = mlp.get_workload(1)
    lib = relay.build(mod, target=target, params=params)
    exe = graph_executor.create(lib.get_graph_json(), lib.lib, dev)
    data = tvm.nd.array(np.random.rand(1, 1, 28, 28).astype("float32"))
    result = exe.benchmark(dev,
                           data=data,
                           func_name="run",
                           repeat=2,
                           number=1,
                           end_to_end=True)
    assert result.mean > 0
    assert len(result.results) == 2
Example #15
0
 def check_load_module():
     temp = utils.tempdir()
     path_lib = temp.relpath("deploy.so")
     mhost.export_library(path_lib)
     with open(temp.relpath("deploy.json"), "w") as out_file:
         out_file.write(graph)
     loaded_lib = tvm.runtime.load_module(path_lib)
     loaded_graph = open(temp.relpath("deploy.json")).read()
     mod = graph_executor.create(loaded_graph, loaded_lib, dev)
     mod.set_input(**params)
     mod.run()
     out = mod.get_output(0, tvm.nd.empty(shape))
     np.testing.assert_equal(out.numpy(),
                             tensor_a + tensor_b - tensor_c + tensor_d)
Example #16
0
def test_build(build_dir):
    """Sanity check with the cat image we download."""
    graph = open(osp.join(build_dir, "deploy_graph.json")).read()
    lib = tvm.runtime.load_module(osp.join(build_dir, "deploy_lib.so"))
    params = bytearray(open(osp.join(build_dir, "deploy_param.params"), "rb").read())
    input_data = get_cat_image()
    dev = tvm.cpu()
    module = graph_executor.create(graph, lib, dev)
    module.load_params(params)
    module.run(data=input_data)
    out = module.get_output(0).numpy()
    top1 = np.argmax(out[0])
    synset = download_img_labels()
    print("TVM prediction top-1:", top1, synset[top1])
def qnn_batch_matmul_driver(test_configuration):
    in_dtype = test_configuration["dtype"]
    out_dtype = test_configuration["out_dtype"]
    quantized_x_name = "quantized_x"
    quantized_y_name = "quantized_y"
    expected_out_dtype = test_configuration["out_dtype"]
    quantized_x = relay.var(quantized_x_name,
                            shape=test_configuration["x_shape"],
                            dtype=in_dtype)
    quantized_y = relay.var(quantized_y_name,
                            shape=test_configuration["y_shape"],
                            dtype=in_dtype)
    mod = relay.qnn.op.batch_matmul(
        quantized_x,
        quantized_y,
        relay.const(test_configuration["x_zero_point"], "int32"),
        relay.const(test_configuration["y_zero_point"], "int32"),
        relay.const(test_configuration["x_scale"], "float32"),
        relay.const(test_configuration["y_scale"], "float32"),
    )
    if test_configuration["requantize"] is not None:
        requantize_config = test_configuration["requantize"]
        mod = relay.qnn.op.requantize(
            mod,
            input_scale=relay.const(requantize_config["input_scale"],
                                    "float32"),
            input_zero_point=relay.const(0, "int32"),
            output_scale=relay.const(requantize_config["output_scale"],
                                     "float32"),
            output_zero_point=relay.const(
                requantize_config["output_zero_point"], "int32"),
            out_dtype=requantize_config["out_dtype"],
        )
        expected_out_dtype = requantize_config["out_dtype"]

    mod = relay.Function(relay.analysis.free_vars(mod), mod)
    mod = tvm.IRModule.from_expr(mod)
    mod = relay.transform.InferType()(mod)
    mod = relay.qnn.transform.CanonicalizeOps()(mod)
    with tvm.transform.PassContext(opt_level=2):
        graph, lib, params = relay.build(mod, "llvm", params=None)
        mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
        mod.set_input(quantized_x_name, test_configuration[quantized_x_name])
        mod.set_input(quantized_y_name, test_configuration[quantized_y_name])
        mod.set_input(**params)
        mod.run()
        res = mod.get_output(0).numpy()
        np.testing.assert_equal(res, test_configuration["output"])
        assert res.dtype == expected_out_dtype
Example #18
0
 def check_remote(server):
     mlib = tvm.build(s, [A, B], "llvm", name="myadd")
     remote = rpc.connect(server.host, server.port)
     temp = utils.tempdir()
     dev = remote.cpu(0)
     path_dso = temp.relpath("dev_lib.so")
     mlib.export_library(path_dso)
     remote.upload(path_dso)
     mlib = remote.load_module("dev_lib.so")
     mod = graph_executor.create(graph, mlib, remote.cpu(0))
     a = np.random.uniform(size=(n, )).astype(A.dtype)
     mod.run(x=tvm.nd.array(a, dev))
     out = tvm.nd.empty((n, ), device=dev)
     out = mod.get_output(0, out)
     np.testing.assert_equal(out.numpy(), a + 1)
Example #19
0
    def load_tvm(self, export_dir):
        """Load tvm module from export directory"""
        self.export_dir = export_dir
        self.tvm_lib = load_module(os.path.join(export_dir, TVM_ASSETS[0]))
        with open(os.path.join(export_dir, TVM_ASSETS[1]),
                  "r",
                  encoding="utf8") as f:
            self.tvm_graph = f.read()
        with open(os.path.join(export_dir, TVM_ASSETS[2]), "rb") as f:
            self.tvm_params = relay.load_param_dict(f.read())

        self.tvm_module = graph_executor.create(self.tvm_graph,
                                                self.tvm_lib,
                                                device=self.dev)
        self.tvm_module.set_input(**self.tvm_params)
        return self.tvm_module
Example #20
0
def verify(data):
    if not tvm.runtime.enabled("llvm"):
        print("Skip because llvm is not enabled")
        return
    mod, params = relay.testing.synthetic.get_workload()
    with relay.build_config(opt_level=3):
        graph, lib, graph_params = relay.build_module.build(mod, "llvm", params=params)

    dev = tvm.cpu()
    module = graph_executor.create(graph, lib, dev)
    module.set_input("data", data)
    module.set_input(**graph_params)
    module.run()
    out = module.get_output(0).numpy()

    return out
Example #21
0
def verify_fused_batch_norm(shape):
    g = tf.Graph()
    with g.as_default():
        input_tensor = tf.placeholder(tf.float32, shape=shape, name="input")
        alpha = tf.constant(
            np.random.rand(shape[-1], ),
            dtype=tf.float32,
            name="alpha",
        )
        beta = tf.constant(
            np.random.rand(shape[-1], ),
            dtype=tf.float32,
            name="beta",
        )
        bn = tf.nn.fused_batch_norm(x=input_tensor,
                                    offset=beta,
                                    scale=alpha,
                                    name="bn")
        out = tf.identity(bn[0], name="output")
    data = np.random.rand(*shape)
    with tf.Session(graph=out.graph) as sess:
        sess.run([tf.global_variables_initializer()])
        tf_out = sess.run(out, feed_dict={input_tensor: data})
        constant_graph = graph_util.convert_variables_to_constants(
            sess, sess.graph_def, ["output"])

    for device in ["llvm"]:
        dev = tvm.device(device, 0)
        if not tvm.testing.device_enabled(device):
            print("Skip because %s is not enabled" % device)
            continue
        mod, params = relay.frontend.from_tensorflow(constant_graph,
                                                     outputs=["output"])
        with tvm.transform.PassContext(opt_level=3):
            graph, lib, params = relay.build(mod, target=device, params=params)
        from tvm.contrib import graph_executor

        m = graph_executor.create(graph, lib, dev)
        m.set_input(**params)
        m.set_input("input", data)
        m.run()
        tvm_out = m.get_output(0)
        tvm.testing.assert_allclose(tvm_out.numpy(),
                                    tf_out.astype(tvm_out.dtype),
                                    atol=1e-3,
                                    rtol=1e-3)
Example #22
0
def run_func(func, params, x):
    with tvm.transform.PassContext(opt_level=3):
        graph, lib, new_params = relay.build(func, "llvm", params=params)

    from tvm.contrib import graph_executor

    dev = tvm.cpu(0)
    dtype = "float32"
    m = graph_executor.create(graph, lib, dev)
    # set inputs
    m.set_input("data", tvm.nd.array(x.astype(dtype)))
    m.set_input(**new_params)
    # execute
    m.run()
    # get outputs
    tvm_output = m.get_output(0)
    return tvm_output.asnumpy()
Example #23
0
    def _build_tvm(self, debug_runtime=False):
        # compile kernels with history best records
        with autotvm.apply_history_best(self.log_file):
            with tvm.transform.PassContext(opt_level=3):
                self.tvm_graph, self.tvm_lib, self.tvm_params = relay.build(
                    self.mod, target=self.target, params=self.params)

        if not debug_runtime:
            self.tvm_module = graph_executor.create(self.tvm_graph,
                                                    self.tvm_lib,
                                                    device=self.dev)
        else:
            self.tvm_module = debug_executor.create(self.tvm_graph,
                                                    self.tvm_lib,
                                                    device=self.dev)
        self.tvm_module.set_input(**self.tvm_params)
        return self.tvm_module
Example #24
0
def test_fac_relay_build():
    #  Check the default optimize pipeline
    shape_x = [1, 5, 5, 4]
    shape_w = [3, 3, 4, 1]

    x_np = np.random.randint(-128, 127, size=shape_x,
                             dtype="int8").astype("float32")
    w_np = np.random.randint(-128, 127, size=shape_w,
                             dtype="int8").astype("float32")

    weight = relay.const(w_np)
    data = relay.var("data", shape=shape_x, dtype="float32")
    op1 = relay.nn.space_to_batch_nd(data,
                                     block_shape=[2, 2],
                                     paddings=[[2, 3], [2, 3]])
    op2 = relay.nn.conv2d(
        op1,
        weight,
        padding=[0, 0, 0, 0],
        groups=4,
        channels=4,
        kernel_size=[3, 3],
        data_layout="NHWC",
        kernel_layout="HWOI",
    )
    expr = relay.nn.batch_to_space_nd(op2,
                                      block_shape=[2, 2],
                                      crops=[[0, 1], [0, 1]])

    mod_def = tvm.relay.transform.InferType()(tvm.IRModule.from_expr(expr))
    result_def = (relay.create_executor(
        "vm", mod=mod_def, device=tvm.cpu(),
        target="llvm").evaluate()(x_np).numpy())

    graph, lib, params = relay.build(mod_def, "llvm", params=None)
    rt_mod = graph_executor.create(graph, lib, device=tvm.cpu())
    rt_mod.set_input("data", x_np)
    rt_mod.set_input(**params)
    rt_mod.run()
    result_flat = rt_mod.get_output(0).numpy()

    assert "space_to_batch_nd" not in graph
    assert "conv2d" in graph
    assert "batch_to_space_nd" not in graph

    assert np.array_equal(result_def, result_flat)
def test_with_params():
    x = relay.var("x", shape=(10, 5))
    y = relay.var("y", shape=(1, 5))
    z = relay.add(x, y)
    z = relay.exp(z)
    func = relay.Function([x, y], z)
    x_data = np.random.rand(10, 5).astype("float32")
    y_data = np.random.rand(1, 5).astype("float32")
    params = {"y": y_data}
    graph, lib, params = relay.build(tvm.IRModule.from_expr(func),
                                     "llvm",
                                     params=params)
    mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
    mod.set_input(**params)
    mod.set_input(x=x_data)
    mod.run()
    res = mod.get_output(0).numpy()
    ref_res = np.exp(y_data + x_data)
    tvm.testing.assert_allclose(res, ref_res, atol=1e-5, rtol=1e-5)
Example #26
0
def test_tflite_output_multiplier_greater_than_one():
    with TempOpAttr("qnn.conv2d", "FTVMQnnLegalize", legalize_qnn_conv2d):

        # uint8 input
        data_shape = (2, 1, 2, 4)
        data_dtype = "uint8"
        kernel_shape = (3, 1, 2, 2)
        kernel_dtype = "uint8"
        ref_func, qnn_func = get_funcs(
            data_shape=data_shape,
            data_dtype=data_dtype,
            kernel_shape=kernel_shape,
            kernel_dtype=kernel_dtype,
            input_scale=1.0,
            kernel_scale=1.0,
            input_zero_point=128,
            kernel_zero_point=128,
            kernel_size=(2, 2),
            padding=(0, 0),
            strides=(2, 2),
            dilation=(1, 1),
            data_layout="NCHW",
            kernel_layout="OIHW",
            out_dtype="int32",
        )
        golden_data = 128 + np.array((1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 3, 4, 1, 2, 3, 4)).reshape(
            data_shape
        ).astype("uint8")
        golden_weight = 128 + np.array((1, 2, 3, 4, -1, 1, -1, 1, -1, -1, 1, 1)).reshape(
            kernel_shape
        )
        golden_weight = golden_weight.astype("uint8")

        with tvm.transform.PassContext(opt_level=2):
            params = {"kernel": golden_weight}
            graph, lib, params = relay.build(qnn_func, "llvm", params=params)
            mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
            mod.set_input("data", golden_data)
            mod.set_input(**params)
            mod.run()
            qnn_output = mod.get_output(0).numpy()
        golden_output = np.array((17, 17, 0, 0, 2, 2, 16, 36, 2, 2, 0, 0)).reshape(2, 3, 1, 2)
        np.testing.assert_equal(qnn_output, golden_output)
Example #27
0
def check_graph_executor(target,
                         ref_res,
                         device,
                         func,
                         params,
                         config,
                         opt_level,
                         expected_index=None):
    with tvm.transform.PassContext(opt_level=opt_level, config=config):
        graph, lib, new_params = relay.build(func, target, params=params)
        contexts = [tvm.cpu(0), tvm.device(device)]
        graph_json = json.loads(graph)
        if "device_index" in graph_json["attrs"]:
            device_index = graph_json["attrs"]["device_index"][1]
            assert device_index == expected_index
        mod = graph_executor.create(graph, lib, contexts)
        mod.set_input(**new_params)
        mod.run()
        res = mod.get_output(0).numpy()
        tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
Example #28
0
    def verify_graph_executor(remote, target, shape, dtype):
        x = relay.var("x")
        y = relay.const(1)
        z = relay.add(x, y)
        func = relay.Function([x], z)

        x_in = np.ones(shape).astype(dtype)
        params = {"x": x_in}
        graph, lib, params = relay.build(func, target=target, params=params)

        temp = utils.tempdir()
        path_dso = temp.relpath("dev_lib.o")
        lib.save(path_dso)
        remote.upload(path_dso)
        lib = remote.load_module("dev_lib.o")
        dev = remote.cpu(0)
        mod = graph_executor.create(graph, lib, dev)
        mod.load_params(runtime.save_param_dict(params))
        mod.run()
        out = mod.get_output(0, tvm.nd.empty(shape, dtype=dtype, device=dev))
        tvm.testing.assert_allclose(x_in + 1, out.numpy())
def test_compile_fused_identity_cast():
    # a fused function that would optimized to identity
    x = relay.var("x", shape=[16], dtype="float32")
    y = relay.cast(x, "float32")
    func1 = relay.Function([x], y).with_attr("Primitive", 1)

    # a fused function with param pass-through
    x = relay.var("x", shape=[16], dtype="float32")
    y = relay.add(x, relay.const(3.14, "float32"))
    func2 = relay.Function([x], relay.Tuple([x, y])).with_attr("Primitive", 1)

    x_global = relay.var("xx", shape=[16], dtype="float32")
    tup = func2(x_global)
    y_global = func1(relay.TupleGetItem(tup, 0) + relay.TupleGetItem(tup, 1))

    mod = tvm.IRModule.from_expr(relay.Function([x_global], y_global))
    for target, device in tvm.testing.enabled_targets():
        with tvm.transform.PassContext(opt_level=2):
            graph, lib, _ = relay.build(mod, target=target)
            executor = graph_executor.create(graph, lib, device=device)
            executor.run()
Example #30
0
 def dequantize_test_driver(in_dtype, quant_args, in_data,
                            verify_output_data):
     shape = in_data.shape
     input_data = relay.var("input_data", shape=shape, dtype=in_dtype)
     min_range = quant_args["min_range"]
     max_range = quant_args["max_range"]
     dequantized_output = dequantize_mxnet_min_max(input_data,
                                                   min_range=min_range,
                                                   max_range=max_range,
                                                   in_dtype=in_dtype)
     mod = relay.Function(relay.analysis.free_vars(dequantized_output),
                          dequantized_output)
     mod = tvm.IRModule.from_expr(mod)
     with tvm.transform.PassContext(opt_level=3):
         graph, lib, params = relay.build(mod, "llvm", params=None)
         rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0))
         rt_mod.set_input(input_data=in_data)
         rt_mod.set_input(**params)
         rt_mod.run()
         res = rt_mod.get_output(0).asnumpy()
         assert np.allclose(res, verify_output_data)
         assert res.dtype == np.float32