Ejemplo n.º 1
0
def generate_ref_data(mod, input_data, params=None, target="llvm"):
    """Generate reference data through executing the relay module"""
    with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}):
        lib = relay.build(mod, target=target, params=params)

    lib_name = "mod.so"
    temp = utils.tempdir()
    lib_path = temp.relpath(lib_name)
    lib.export_library(lib_path)
    lib = tvm.runtime.load_module(lib_path)
    grt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu()))
    grt_mod.set_input(**input_data)
    grt_mod.run()
    output_count = grt_mod.get_num_outputs()
    out = [grt_mod.get_output(i).numpy() for i in range(output_count)]
    if isinstance(mod, tvm.relay.Function):
        main = mod
    else:
        main = mod["main"]
    if main.attrs is None or main.attrs["output_tensor_names"] is None:
        output_tensor_names = ["output" if i == 0 else f"output{i+1}" for i in range(output_count)]
    else:
        output_tensor_names = main.attrs["output_tensor_names"]

    return dict(zip(output_tensor_names, out))
Ejemplo n.º 2
0
def _get_tvm_output(net, data, build_dtype="float32", states=None):
    """Compute TVM output"""
    dtype = "float32"
    mod, params = relay.frontend.from_darknet(net, data.shape, dtype)
    # verify that from_darknet creates a valid, parsable relay program
    mod = relay.transform.InferType()(mod)
    astext(mod)

    target = "llvm"
    shape_dict = {"data": data.shape}
    lib = relay.build(mod, target, params=params)

    # Execute on TVM
    dev = tvm.cpu(0)
    m = graph_executor.GraphModule(lib["default"](dev))
    # set inputs
    m.set_input("data", tvm.nd.array(data.astype(dtype)))
    if states:
        for name in states.keys():
            m.set_input(name, tvm.nd.array(states[name].astype(dtype)))
    m.run()
    # get outputs
    tvm_out = []
    for i in range(m.get_num_outputs()):
        tvm_out.append(m.get_output(i).numpy())
    return tvm_out
Ejemplo n.º 3
0
def infer_value(input_val, params, mod=None):
    """A hack for getting the value of an expression by evaluating a
    portion of the relay graph. This is often needed for functions that
    whose output shape depends on the value of a tensor.
    """
    # Check that all free variables have associated parameters.
    assert all(var.name_hint in params.keys() for var in analysis.free_vars(
        input_val)), "All inputs to infer must be available in params."
    try:
        # TODO(kevinthesun): Use VM for all cases.
        # pylint: disable=import-outside-toplevel
        from tvm.contrib import graph_executor

        func = _function.Function(analysis.free_vars(input_val), input_val)
        with tvm.transform.PassContext(opt_level=0):
            lib = tvm.relay.build(func, target="llvm", params=params)
        dev = tvm.cpu(0)
        m = graph_executor.GraphModule(lib["default"](dev))
        m.run()
        return m.get_output(0)
    except Exception:
        if isinstance(mod, IRModule):
            mod["main"] = _function.Function(analysis.free_vars(input_val),
                                             input_val)
        else:
            mod = IRModule.from_expr(input_val)
        exc = tvm.relay.create_executor("debug",
                                        mod=mod,
                                        device=tvm.cpu(),
                                        target="llvm")
        inputs = []
        for param in mod["main"].params:
            inputs.append(params[param.name_hint])
        result = exc.evaluate()(*inputs)
        return result
Ejemplo n.º 4
0
def test_cpu():
    if not tvm.testing.device_enabled("llvm"):
        print("Skip because llvm is not enabled")
        return
    mod, params = relay.testing.synthetic.get_workload()
    with relay.build_config(opt_level=3):
        complied_graph_lib = relay.build_module.build(mod,
                                                      "llvm",
                                                      params=params)
    data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
    # raw api
    dev = tvm.cpu()
    gmod = complied_graph_lib["default"](dev)
    set_input = gmod["set_input"]
    run = gmod["run"]
    get_output = gmod["get_output"]
    set_input("data", tvm.nd.array(data))
    run()
    out = get_output(0).asnumpy()
    tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

    # graph executor wrapper
    gmod = graph_executor.GraphModule(complied_graph_lib["default"](dev))
    gmod.set_input("data", data)
    gmod.run()
    out = gmod.get_output(0).asnumpy()
    tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
Ejemplo n.º 5
0
def test_graph_executor_remote_run(host, port):
    remote_session = rpc.connect(host, port)
    target = tvm.target.Target(target=f"llvm -mtriple={ARCH}-apple-darwin")
    device = remote_session.cpu(0)

    size = 100
    a = np.random.uniform(size=size).astype(DTYPE)
    b = np.random.uniform(size=size).astype(DTYPE)
    mod, params = get_add_relay_module(a, b)
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod,
                          target=target,
                          target_host=target,
                          params=params)

    path_dso = export_lib(lib)
    remote_session.upload(path_dso)
    lib = remote_session.load_module(DSO_NAME)

    gen_module = graph_executor.GraphModule(lib["default"](device))

    # Check set input
    gen_module.set_input("a", tvm.nd.array(a))
    gen_module.set_input("b", tvm.nd.array(b))
    tvm.testing.assert_allclose(gen_module.get_input(0).numpy(), a)
    tvm.testing.assert_allclose(gen_module.get_input(1).numpy(), b)

    # Check run
    gen_module.run()
    out = gen_module.get_output(0)
    tvm.testing.assert_allclose(out.numpy(), a + b)
Ejemplo n.º 6
0
    def tune_and_evaluate():
        print("Begin tuning...")
        tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
        tune_option = auto_scheduler.TuningOptions(
            num_measure_trials=200,
            builder=auto_scheduler.LocalBuilder(build_func="ndk"),
            runner=auto_scheduler.RPCRunner(
                device_key,
                host=rpc_host,
                port=rpc_port,
                timeout=30,
                repeat=1,
                min_repeat_ms=200,
                enable_cpu_cache_flush=True,
            ),
            measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
        )

        tuner.tune(tune_option)

        # Compile with the history best
        print("Compile...")
        with auto_scheduler.ApplyHistoryBest(log_file):
            with tvm.transform.PassContext(
                    opt_level=3,
                    config={"relay.backend.use_auto_scheduler": True}):
                lib = relay.build(mod, target=target, params=params)

        # Export library
        tmp = tempdir()
        filename = "net.so"
        lib.export_library(tmp.relpath(filename), ndk.create_shared)

        # Upload module to device
        print("Upload...")
        remote = auto_scheduler.utils.request_remote(device_key,
                                                     rpc_host,
                                                     rpc_port,
                                                     timeout=10000)
        remote.upload(tmp.relpath(filename))
        rlib = remote.load_module(filename)

        # Create graph executor
        dev = remote.cpu()
        module = graph_executor.GraphModule(rlib["default"](dev))
        for key, value in shape_dict.items():
            data_tvm = tvm.nd.array(
                (np.random.uniform(size=value)).astype("float32"))
            module.set_input(key, data_tvm)

        # Evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run",
                                              dev,
                                              repeat=3,
                                              min_repeat_ms=500)
        prof_res = np.array(ftimer().results) * 1e3  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
def run_tvm(lib):
    from tvm.contrib import graph_executor

    rt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu(0)))
    rt_mod.set_input("input", data)
    rt_mod.run()
    tvm_res = rt_mod.get_output(0).numpy()
    tvm_pred = np.squeeze(tvm_res).argsort()[-5:][::-1]
    return tvm_pred, rt_mod
Ejemplo n.º 8
0
 def verify(data):
     mod, params = relay.testing.synthetic.get_workload(input_shape=input_shape)
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build_module.build(mod, "llvm", params=params)
     dev = tvm.cpu()
     module = graph_executor.GraphModule(lib["default"](dev))
     module.set_input("data", data)
     module.run()
     out = module.get_output(0).asnumpy()
     return out
Ejemplo n.º 9
0
 def get_tvm_output(xs, target, dev, dtype="float32"):
     shape_dict = {name: x.shape for (name, x) in zip(keras_model.input_names, xs)}
     mod, params = relay.frontend.from_keras(keras_model, shape_dict, layout=layout)
     with tvm.transform.PassContext(opt_level=2):
         lib = relay.build(mod, target, params=params)
     m = graph_executor.GraphModule(lib["default"](dev))
     for name, x in zip(keras_model.input_names, xs):
         m.set_input(name, tvm.nd.array(x.astype(dtype)))
     m.run()
     return [m.get_output(i).numpy() for i in range(m.get_num_outputs())]
Ejemplo n.º 10
0
def evaluate_performance(lib, data_shape):
    # upload parameters to device
    dev = tvm.cpu()
    data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
    module = runtime.GraphModule(lib["default"](dev))
    module.set_input(input_name, data_tvm)

    # evaluate
    print("Evaluate inference time cost...")
    print(module.benchmark(dev, number=100, repeat=3))
Ejemplo n.º 11
0
def run(lib, dev):
    # Build TVM runtime
    m = graph_executor.GraphModule(lib["default"](dev))
    tvm_input = tvm.nd.array(x.asnumpy(), device=dev)
    m.set_input("data", tvm_input)
    # execute
    m.run()
    # get outputs
    class_IDs, scores, bounding_boxs = m.get_output(0), m.get_output(1), m.get_output(2)
    return class_IDs, scores, bounding_boxs
Ejemplo n.º 12
0
    def verify_rpc_gpu_remove_package_params(obj_format):
        if not tvm.testing.device_enabled("cuda"):
            print("Skip because cuda is not enabled")
            return
        mod, params = relay.testing.synthetic.get_workload()
        with relay.build_config(opt_level=3):
            complied_graph_lib = relay.build_module.build(mod,
                                                          "cuda",
                                                          params=params)

        from tvm.contrib import utils

        temp = utils.tempdir()
        if obj_format == ".so":
            file_name = "deploy_lib.so"
        else:
            assert obj_format == ".tar"
            file_name = "deploy_lib.tar"
        path_lib = temp.relpath(file_name)
        complied_graph_lib_no_params = complied_graph_lib["remove_params"]()
        complied_graph_lib_no_params.export_library(path_lib)
        path_params = temp.relpath("deploy_param.params")
        with open(path_params, "wb") as fo:
            fo.write(runtime.save_param_dict(complied_graph_lib.get_params()))

        from tvm import rpc

        remote = rpc.LocalSession()
        remote.upload(path_lib)
        loaded_lib = remote.load_module(path_lib)
        data = np.random.uniform(-1, 1,
                                 size=input_shape(mod)).astype("float32")
        dev = remote.cuda()

        # raw api
        gmod = loaded_lib["default"](dev)
        set_input = gmod["set_input"]
        run = gmod["run"]
        get_output = gmod["get_output"]
        load_params = gmod["load_params"]
        loaded_params = bytearray(open(path_params, "rb").read())
        set_input("data", tvm.nd.array(data, device=dev))
        load_params(loaded_params)
        run()
        out = get_output(0).numpy()
        tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

        # graph executor wrapper
        gmod = graph_executor.GraphModule(loaded_lib["default"](dev))
        loaded_params = bytearray(open(path_params, "rb").read())
        gmod.set_input("data", data)
        gmod.load_params(loaded_params)
        gmod.run()
        out = gmod.get_output(0).numpy()
        tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
Ejemplo n.º 13
0
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, input_shape, _ = get_network(network, batch_size=1)
    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        params=params,
        ops=(relay.op.get("nn.conv2d"), ))

    # run tuning tasks
    print("Tuning...")
    tune_tasks(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.apply_history_best(log_file):
        print("Compile...")
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build_module.build(mod, target=target, params=params)

        # export library
        tmp = tempdir()
        if use_android:
            from tvm.contrib import ndk

            filename = "net.so"
            lib.export_library(tmp.relpath(filename), ndk.create_shared)
        else:
            filename = "net.tar"
            lib.export_library(tmp.relpath(filename))

        # upload module to device
        print("Upload...")
        remote = autotvm.measure.request_remote(device_key,
                                                "0.0.0.0",
                                                9190,
                                                timeout=10000)
        remote.upload(tmp.relpath(filename))
        rlib = remote.load_module(filename)

        # upload parameters to device
        dev = remote.device(str(target), 0)
        module = runtime.GraphModule(rlib["default"](dev))
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=input_shape)).astype(dtype))
        #module.set_input("data", data_tvm)
        input_tensor = "input"
        module.set_input(input_tensor, data_tvm)
        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", dev, number=1, repeat=10)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Ejemplo n.º 14
0
def compile_raw_onnx_model(onnx_model,
                           img_data,
                           target="llvm",
                           input_name="data"):
    shape_dict = {input_name: img_data.shape}
    mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)
    with tvm.transform.PassContext(opt_level=0):
        lib = relay.build(mod, target=target, params=params)
    dev = tvm.device(str(target), 0)
    module = graph_executor.GraphModule(lib["default"](dev))
    return module, params, target, mod
Ejemplo n.º 15
0
def compile_tuned_onnx_model(tuning_option,
                             mod,
                             params,
                             transform,
                             target="llvm"):
    with tvm.autotvm.apply_history_best(tuning_option["tuning_records"]):
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build(mod, target=target, params=params)
    dev = tvm.device(str(target), 0)
    module = graph_executor.GraphModule(lib["default"](dev))
    return module
Ejemplo n.º 16
0
def verify_model(func, input_data, rtol=1e-5, atol=1e-5):
    if not (isinstance(input_data, (tuple, list))):
        input_data = [input_data]

    input_spec = []
    input_names = []
    input_shape_dict = {}
    compiled_input = {}
    for idx, data in enumerate(input_data):
        input_name = "input{}".format(idx)
        input_spec.append(
            paddle.static.InputSpec(dtype=data.dtype,
                                    shape=data.shape,
                                    name=input_name))
        input_names.append(input_name)
        input_shape_dict[input_name] = data.shape
        if isinstance(data, np.ndarray):
            compiled_input[input_name] = data
        else:
            compiled_input[input_name] = data.numpy()

    baseline_model = get_paddle_model(func, input_spec)
    baseline_outputs = baseline_model(*[input[:] for input in input_data])

    # get paddle outputs
    if isinstance(baseline_outputs, (tuple, list)):
        baseline_outputs = tuple(out.numpy() for out in baseline_outputs)
    else:
        baseline_outputs = (baseline_outputs.numpy(), )

    mod, params = relay.frontend.from_paddle(baseline_model, input_shape_dict)
    parms_num = min(len(input_names), len(mod["main"].params))
    compiled_names = []
    for arg in mod["main"].params[:parms_num]:
        assert arg.name_hint in input_names or arg.name_hint in params
        if arg.name_hint in input_names:
            compiled_names.append(arg.name_hint)

    with tvm.transform.PassContext(opt_level=3):
        for target, dev in tvm.testing.enabled_targets():
            lib = relay.build(mod, target=target, params=params)
            gmod = graph_executor.GraphModule(lib["default"](dev))
            for name in compiled_names:
                gmod.set_input(name, compiled_input[name])
            gmod.run()

            for i, baseline_output in enumerate(baseline_outputs):
                compiled_output = gmod.get_output(i).numpy()

                assert_shapes_match(baseline_output, compiled_output)
                tvm.testing.assert_allclose(baseline_output,
                                            compiled_output,
                                            rtol=rtol,
                                            atol=atol)
def tune_and_evaluate():
    print("Begin tuning...")
    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
    tune_option = auto_scheduler.TuningOptions(
        num_measure_trials=
        200,  # change this to 20000 to achieve the best performance
        builder=auto_scheduler.LocalBuilder(
            build_func="ndk" if use_ndk else "default"),
        runner=auto_scheduler.RPCRunner(device_key,
                                        host="0.0.0.0",
                                        port=9190,
                                        repeat=3,
                                        timeout=50),
        measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
    )

    tuner.tune(tune_option)

    # Compile the whole network
    print("Compile...")
    with auto_scheduler.ApplyHistoryBest(log_file):
        with tvm.transform.PassContext(
                opt_level=3, config={"relay.backend.use_auto_scheduler":
                                     True}):
            lib = relay.build(mod, target, params=params)

    # Create graph executor
    print("=============== Request Remote ===============")
    from tvm.auto_scheduler.utils import request_remote

    remote = request_remote(device_key, "0.0.0.0", 9190)
    dev = remote.cl()
    from tvm.contrib import utils, ndk

    temp = utils.tempdir()
    filename = "deploy_lib.so"
    path_lib = temp.relpath(filename)
    lib.export_library(path_lib, ndk.create_shared)
    remote.upload(path_lib)
    loaded_lib = remote.load_module(filename)
    module = graph_executor.GraphModule(loaded_lib["default"](dev))
    data = (np.random.uniform(size=input_shape)).astype(dtype)
    data_tvm = tvm.nd.array(data)
    module.set_input("data", data_tvm)

    # Evaluate
    print("Evaluate inference time cost...")
    ftimer = module.module.time_evaluator("run",
                                          dev,
                                          repeat=3,
                                          min_repeat_ms=500)
    prof_res = np.array(ftimer().results) * 1e3  # convert to millisecond
    print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
          (np.mean(prof_res), np.std(prof_res)))
Ejemplo n.º 18
0
def tune_and_evaluate():
    print("Begin tuning...")
    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
    tune_option = auto_scheduler.TuningOptions(
        num_measure_trials=200,  # change this to 20000 to achieve the best performance
        builder=auto_scheduler.LocalBuilder(build_func="ndk" if use_ndk else "default"),
        runner=auto_scheduler.RPCRunner(
            device_key,
            host=rpc_host,
            port=rpc_port,
            timeout=30,
            repeat=1,
            min_repeat_ms=200,
            enable_cpu_cache_flush=True,
        ),
        measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
    )

    tuner.tune(tune_option)

    # Compile with the history best
    print("Compile...")
    with auto_scheduler.ApplyHistoryBest(log_file):
        with tvm.transform.PassContext(
            opt_level=3, config={"relay.backend.use_auto_scheduler": True}
        ):
            lib = relay.build(mod, target=target, params=params)

    # Export library
    tmp = tempdir()
    if use_ndk:
        from tvm.contrib import ndk

        filename = "net.so"
        lib.export_library(tmp.relpath(filename), ndk.create_shared)
    else:
        filename = "net.tar"
        lib.export_library(tmp.relpath(filename))

    # Upload module to device
    print("Upload...")
    remote = auto_scheduler.utils.request_remote(device_key, rpc_host, rpc_port, timeout=10000)
    remote.upload(tmp.relpath(filename))
    rlib = remote.load_module(filename)

    # Create graph executor
    dev = remote.cpu()
    module = graph_executor.GraphModule(rlib["default"](dev))
    data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))
    module.set_input("data", data_tvm)

    # Evaluate
    print("Evaluate inference time cost...")
    print(module.benchmark(dev, repeat=3, min_repeat_ms=500))
Ejemplo n.º 19
0
def get_tvm_output(
    func, x, params, target, device, out_shape=(1, 1000), input_name="image", dtype="float32"
):
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(func, target, params=params)
    m = graph_executor.GraphModule(lib["default"](device))
    # set inputs
    m.set_input(input_name, tvm.nd.array(x.astype(dtype)))
    m.run()
    # get outputs
    out = m.get_output(0, tvm.nd.empty(out_shape, dtype))
    return out.asnumpy()
Ejemplo n.º 20
0
def run_relay_graph(mod, params, shape_dict, target, dev):
    with relay.build_config(opt_level=3):
        lib = relay.build(mod, target=target, params=params)
    input_shape = shape_dict["input_1"]
    dummy_data = np.random.uniform(size=input_shape, low=0, high=input_shape[1]).astype("int32")

    m = graph_executor.GraphModule(lib["default"](dev))
    m.set_input(0, dummy_data)
    m.run()
    tvm_output = m.get_output(0)

    print(m.benchmark(dev, repeat=5, number=5))
    return tvm_output
Ejemplo n.º 21
0
def evaluate_performance(lib, data_shape):
    # upload parameters to device
    dev = tvm.cpu()
    data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype))
    module = runtime.GraphModule(lib["default"](dev))
    module.set_input(input_name, data_tvm)

    # evaluate
    print("Evaluate inference time cost...")
    ftimer = module.module.time_evaluator("run", dev, number=100, repeat=3)
    prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
    print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
          (np.mean(prof_res), np.std(prof_res)))
Ejemplo n.º 22
0
def build_and_run(
    mod,
    inputs,
    outputs,
    params,
    device,
    enable_clml=True,
    no_runs=1,
    config=None,
):
    """Build and run the relay module."""
    if config is None:
        config = {}

    try:
        libm = build_module(mod, device.target, device.target_host, params,
                            enable_clml)

        clml_modules = extract_clml_modules(libm)
        for mod in clml_modules:
            source = mod.get_source("json")
            codegen = json.loads(source)["nodes"]
            # remove input and const names as these cannot be predetermined
            for node in range(len(codegen)):
                if codegen[node]["op"] == "input" or codegen[node][
                        "op"] == "const":
                    codegen[node]["name"] = ""
            codegen_str = json.dumps(codegen, sort_keys=True, indent=2)

    except Exception as e:
        err_msg = "The module could not be built.\n"
        if config:
            err_msg += f"The test failed with the following parameters: {config}\n"
        err_msg += str(e)
        raise Exception(err_msg)

    lib = update_lib(libm, device.device, device.cross_compile)
    gen_module = graph_executor.GraphModule(lib["default"](
        device.device.cl(0)))
    gen_module.set_input(**inputs)
    out = []
    for _ in range(no_runs):
        gen_module.run()
        out.append([gen_module.get_output(i) for i in range(outputs)])
    time_f = gen_module.module.time_evaluator("run",
                                              device.device.cl(0),
                                              number=50)
    cost = time_f().mean
    print("%g secs/iteration\n" % cost)
    return out
Ejemplo n.º 23
0
def run_tvm_graph(coreml_model,
                  target,
                  device,
                  input_data,
                  input_name,
                  output_shape,
                  output_dtype="float32"):
    """Generic function to compile on relay and execute on tvm"""
    if isinstance(input_data, list):
        shape_dict = {}
        dtype_dict = {}
        for i, e in enumerate(input_name):
            shape_dict[e] = input_data[i].shape
            dtype_dict[e] = input_data[i].dtype
    else:
        shape_dict = {input_name: input_data.shape}
        dtype_dict = {input_name: input_data.dtype}

    mod, params = relay.frontend.from_coreml(coreml_model, shape_dict)
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod, target, params=params)

    from tvm.contrib import graph_executor

    m = graph_executor.GraphModule(lib["default"](device))
    # set inputs
    if isinstance(input_data, list):
        for i, e in enumerate(input_name):
            m.set_input(
                e, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
    else:
        m.set_input(input_name,
                    tvm.nd.array(input_data.astype(input_data.dtype)))

    # execute
    m.run()
    # get outputs
    if isinstance(output_shape, list) and isinstance(output_dtype, list):
        tvm_output_list = []
        for i, s in enumerate(output_shape):
            tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i]))
            tvm_output_list.append(tvm_output.numpy())
        return tvm_output_list
    else:
        if not output_shape:
            tvm_output = m.get_output(0)
        else:
            tvm_output = m.get_output(
                0, tvm.nd.empty((output_shape), output_dtype))
        return tvm_output.numpy()
Ejemplo n.º 24
0
def test_graph_executor_api():
    dname_0, dname_1 = "data_0", "data_1"
    data_0, data_1 = [
        relay.var(c, shape=(1, 1), dtype="float32")
        for c in [dname_0, dname_1]
    ]
    net = relay.add(data_0, data_1)
    func = relay.Function((data_0, data_1), net)

    lib = relay.build(tvm.IRModule.from_expr(func), "llvm")
    mod = graph_executor.GraphModule(lib["default"](tvm.cpu(0)))

    assert mod.get_input_index(dname_1) == 1
    assert mod.get_input_index(dname_0) == 0
    assert mod.get_input_index("Invalid") == -1
Ejemplo n.º 25
0
def check_graph_executor(
    target, ref_res, device, func, params, config, opt_level, expected_index=None
):
    with tvm.transform.PassContext(opt_level=opt_level, config=config):
        graph_executor_factory = relay.build(func, target, params=params)

        contexts = [tvm.cpu(0), tvm.device(device)]
        graph_json = json.loads(graph_executor_factory.graph_json)
        if "device_index" in graph_json["attrs"]:
            device_index = graph_json["attrs"]["device_index"][1]
            assert device_index == expected_index
        mod = graph_executor.GraphModule(graph_executor_factory["default"](*contexts))
        mod.run()
        res = mod.get_output(0).numpy()
        tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
Ejemplo n.º 26
0
def get_tvm_model(traced_model, X_ex):
    mod, params = relay.frontend.from_pytorch(traced_model, input_infos=[('input0', X_ex.shape)])

    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod, target=TARGET, params=params)

    dev = tvm.device(str(TARGET), 0)
    module = graph_executor.GraphModule(lib["default"](dev))

    module.set_input("input0", X_ex)
    module.run()  # just a test run to make sure it works

    # mod is an IR struct. Used downstream. params IDK, used downstream.
    # module is a Relay Python collable
    return mod, params, module
Ejemplo n.º 27
0
def _run_tvm(data, proto_file, blob_file):
    """ Run caffe model by TVM according to .caffemodel and .prototxt"""
    init_net = pb.NetParameter()
    predict_net = pb.NetParameter()

    # load model
    with open(proto_file, "r") as f:
        text_format.Merge(f.read(), predict_net)
    # load blob
    with open(blob_file, "rb") as f:
        init_net.ParseFromString(f.read())

    shape_dict = dict()
    dtype_dict = dict()
    if isinstance(data, (tuple, list)):
        for idx, d in enumerate(data):
            shape_dict["data" + str(idx)] = d.shape
            dtype_dict["data" + str(idx)] = "float32"
    else:
        shape_dict = {"data": data.shape}
        dtype_dict = {"data": "float32"}

    mod, params = relay.frontend.from_caffe(init_net, predict_net, shape_dict,
                                            dtype_dict)

    target = "llvm"
    target_host = "llvm"

    dev = tvm.cpu(0)
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod,
                          target=target,
                          target_host=target_host,
                          params=params)
    dtype = "float32"
    m = graph_executor.GraphModule(lib["default"](dev))
    if isinstance(data, (tuple, list)):
        for idx, d in enumerate(data):
            m.set_input("data" + str(idx), tvm.nd.array(d.astype(dtype)))
    else:
        m.set_input("data", tvm.nd.array(data.astype(dtype)))
    # execute
    m.run()
    tvm_output = list()
    # get outputs
    for i in range(m.get_num_outputs()):
        tvm_output.append(m.get_output(i).asnumpy())
    return tvm_output
Ejemplo n.º 28
0
    def verify_gpu_export(obj_format):
        if not tvm.testing.device_enabled("cuda"):
            print("Skip because cuda is not enabled")
            return
        mod, params = relay.testing.synthetic.get_workload()
        with relay.build_config(opt_level=3):
            complied_graph_lib = relay.build_module.build(mod,
                                                          "cuda",
                                                          params=params)

        from tvm.contrib import utils

        temp = utils.tempdir()
        if obj_format == ".so":
            file_name = "deploy_lib.so"
        else:
            assert obj_format == ".tar"
            file_name = "deploy_lib.tar"
        path_lib = temp.relpath(file_name)
        complied_graph_lib.export_library(path_lib)

        data = np.random.uniform(-1, 1,
                                 size=input_shape(mod)).astype("float32")

        # run the setup in a separate function, so the load_lib
        # can get destructed right away
        # test the robustness wrt to parent module destruction
        def setup_gmod():
            loaded_lib = tvm.runtime.load_module(path_lib)
            dev = tvm.cuda()
            return loaded_lib["default"](dev)

        gmod = setup_gmod()
        # raw api
        set_input = gmod["set_input"]
        run = gmod["run"]
        get_output = gmod["get_output"]
        set_input("data", tvm.nd.array(data))
        run()
        out = get_output(0).numpy()
        tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

        # graph executor wrapper
        gmod = graph_executor.GraphModule(setup_gmod())
        gmod.set_input("data", data)
        gmod.run()
        out = gmod.get_output(0).numpy()
        tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
Ejemplo n.º 29
0
def generate_ref_data(mod, input_data, params=None, target="llvm"):
    """Generate reference data through executing the relay module"""
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod, target=target, params=params)

    lib_name = "mod.so"
    temp = utils.tempdir()
    lib_path = temp.relpath(lib_name)
    lib.export_library(lib_path)
    lib = tvm.runtime.load_module(lib_path)
    grt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu()))
    grt_mod.set_input(**input_data)
    grt_mod.run()
    output_count = grt_mod.get_num_outputs()
    out = [grt_mod.get_output(i).numpy() for i in range(output_count)]
    return out
def run_func(func, params, x):
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(func, "llvm", params=params)

    from tvm.contrib import graph_executor

    dev = tvm.cpu(0)
    dtype = "float32"
    m = graph_executor.GraphModule(lib["default"](dev))
    # set inputs
    m.set_input("data", tvm.nd.array(x.astype(dtype)))
    # execute
    m.run()
    # get outputs
    tvm_output = m.get_output(0)
    return tvm_output.asnumpy()