Exemple #1
0
def benchmark(network, target, log_file):
    mod, params, input_shape, output_shape = get_network(network)
    # covert to NCHW
    desired_layouts = {'nn.conv2d': ['NCHW', 'default']}
    seq = tvm.transform.Sequential([
        relay.transform.RemoveUnusedFunctions(),
        relay.transform.ConvertLayout(desired_layouts)
    ])
    with tvm.transform.PassContext(opt_level=3):
        mod = seq(mod)

    if network in ["bert"]:
        with autotvm.apply_history_best(log_file):
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build_module.build(mod,
                                               target=target,
                                               params=params)
                # upload parameters to device
            ctx = tvm.context(str(target), 0)
            data_tvm = tvm.nd.array(
                (np.random.uniform(size=input_shape[0])).astype(dtype))
            token_types_tvm = tvm.nd.array(
                np.random.uniform(size=input_shape[1]).astype(dtype))
            valid_length_tvm = tvm.nd.array(
                np.random.uniform(size=input_shape[2]).astype(dtype))
            module = runtime.GraphModule(lib["default"](ctx))
            module.set_input(data0=data_tvm,
                             data1=token_types_tvm,
                             data2=valid_length_tvm)
    else:
        with autotvm.apply_history_best(log_file):
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build_module.build(mod,
                                               target=target,
                                               params=params)

            # upload parameters to device
            ctx = tvm.context(str(target), 0)
            data_tvm = tvm.nd.array(
                (np.random.uniform(size=input_shape)).astype(dtype))
            module = runtime.GraphModule(lib["default"](ctx))
            module.set_input(args.inputname, data_tvm)

    # evaluate
    print("Evaluate...")
    ftimer = module.module.time_evaluator("run",
                                          ctx,
                                          number=1,
                                          repeat=args.repeat)
    prof_res = np.array(
        ftimer().results) * 1000  # multiply 1000 for converting to millisecond
    print(
        "%-20s %-19s (%s)" %
        (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)))
def benchmark(network, batch_size, dtype, target, log_prefix, repeat):
    layout = "NCHW"
    mod, params, input_name, input_shape, output_shape = get_network(
        network, batch_size, dtype, layout)

    if use_graph_tuner(network, batch_size, dtype, target):
        log_file = log_prefix + ".graph.log"
        history_best_context = autotvm.apply_graph_best(log_file)
    else:
        log_file = log_prefix + ".kernel.log"
        history_best_context = autotvm.apply_history_best(log_file)

    assert os.path.exists(
        log_file), "The log file '%s' does not exist." % log_file
    print("Use log file %s" % log_file)

    if network in ["bert"]:
        # Build module
        with history_best_context:
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build(mod, target=target, params=params)
        ctx = tvm.context(str(target), 0)
        module = runtime.GraphModule(lib["default"](ctx))

        # Feed input data
        seq_length = input_shape[0][1]
        data = np.random.uniform(size=input_shape[0])
        token_types = np.random.uniform(size=input_shape[1])
        valid_length = np.array([seq_length] * batch_size)
        module.set_input(data0=data, data1=token_types, data2=valid_length)
    else:
        # Build module
        with history_best_context:
            with tvm.transform.PassContext(opt_level=3):
                lib = relay.build(mod, target=target, params=params)
        ctx = tvm.context(str(target), 0)
        module = runtime.GraphModule(lib["default"](ctx))

        # Feed input data
        data = np.random.uniform(size=input_shape)
        module.set_input(input_name, data)

    # Evaluate
    ftimer = module.module.time_evaluator("run",
                                          ctx,
                                          min_repeat_ms=500,
                                          repeat=repeat)
    return np.array(ftimer().results)
Exemple #3
0
    def get_graph_runtime_output(mod,
                                 data,
                                 params,
                                 target,
                                 ctx,
                                 dtype="float32",
                                 number=2,
                                 repeat=20):
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build(mod, target, params=params)

        m = graph_runtime.GraphModule(lib["default"](ctx))
        # set inputs
        m.set_input("data", data)
        m.run()
        out = m.get_output(0, tvm.nd.empty(out_shape, dtype))

        if measure:
            print("Evaluate graph runtime inference cost of {} on "
                  "{}".format(model, repr(ctx)))
            ftimer = m.module.time_evaluator("run", ctx, number=1, repeat=20)
            # Measure in millisecond.
            prof_res = np.array(ftimer().results) * 1000
            print(
                "Mean graph runtime inference time (std dev): %.2f ms (%.2f ms)"
                % (np.mean(prof_res), np.std(prof_res)))

        return out.asnumpy()
Exemple #4
0
def test_gpu():
    mod, params = relay.testing.synthetic.get_workload()
    with relay.build_config(opt_level=3):
        complied_graph_lib = relay.build_module.build(mod,
                                                      "cuda",
                                                      params=params)
    data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
    dev = tvm.gpu()

    # raw api
    gmod = complied_graph_lib["default"](dev)
    set_input = gmod["set_input"]
    run = gmod["run"]
    get_output = gmod["get_output"]
    set_input("data", tvm.nd.array(data))
    run()
    out = get_output(0).asnumpy()
    tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

    # graph runtime wrapper
    gmod = graph_runtime.GraphModule(complied_graph_lib["default"](dev))
    gmod.set_input("data", data)
    gmod.run()
    out = gmod.get_output(0).asnumpy()
    tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
Exemple #5
0
    def prepare(self, ctx_id):
        if self.use_tvm_build:
            print("Use TVM ArcFace Model")
            loaded_lib = tvm.runtime.load_module(self.param_file)
            self.model = graph_runtime.GraphModule(loaded_lib["default"](tvm.cpu()))

        else:
            if self.param_file:
                print("Use mxnet ArcFace Model")
                pos = self.param_file.rfind('-')
                prefix = self.param_file[0:pos]
                pos2 = self.param_file.rfind('.')
                epoch = int(self.param_file[pos+1:pos2])
                sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
                all_layers = sym.get_internals()
                sym = all_layers['fc1_output']
                if ctx_id>=0:
                    ctx = mx.gpu(ctx_id)
                else:
                    ctx = mx.cpu()
                model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
                data_shape = (1,3)+self.image_size
                model.bind(data_shapes=[('data', data_shape)])
                model.set_params(arg_params, aux_params)
                #warmup
                data = mx.nd.zeros(shape=data_shape)
                db = mx.io.DataBatch(data=(data,))
                model.forward(db, is_train=False)
                embedding = model.get_outputs()[0].asnumpy()
                self.model = model
            else:
                pass
Exemple #6
0
    def run(mod, target):
        with relay.build_config(opt_level=3):
            lib = relay.build(mod, target=target, target_host=target_host, params=params)
        path_dso = temp.relpath("deploy.dylib")
        lib.export_library(path_dso, xcode.create_dylib, arch=arch, sdk=sdk)
        xcode.codesign(path_dso)

        # Start RPC test server that contains the compiled library.
        xcode.popen_test_rpc(proxy_host, proxy_port, key, destination=destination, libs=[path_dso])

        # connect to the proxy
        remote = rpc.connect(proxy_host, proxy_port, key=key)

        if target == "metal":
            ctx = remote.metal(0)
        else:
            ctx = remote.cpu(0)
        lib = remote.load_module("deploy.dylib")
        m = graph_runtime.GraphModule(lib["default"](ctx))

        m.set_input("data", tvm.nd.array(image, ctx))
        m.run()
        tvm_output = m.get_output(0)
        top1 = np.argmax(tvm_output.asnumpy()[0])
        print("TVM prediction top-1:", top1, synset[top1])

        # evaluate
        ftimer = m.module.time_evaluator("run", ctx, number=3, repeat=10)
        prof_res = np.array(ftimer().results) * 1000
        print("%-19s (%s)" % ("%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)))
def test_cpu():
    if not tvm.testing.device_enabled("llvm"):
        print("Skip because llvm is not enabled")
        return
    mod, params = relay.testing.synthetic.get_workload()
    with relay.build_config(opt_level=3):
        complied_graph_lib = relay.build_module.build(mod, "llvm", params=params)
    data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
    # raw api
    ctx = tvm.cpu()
    gmod = complied_graph_lib["default"](ctx)
    set_input = gmod["set_input"]
    run = gmod["run"]
    get_output = gmod["get_output"]
    set_input("data", tvm.nd.array(data))
    run()
    out = get_output(0).asnumpy()
    tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

    # graph runtime wrapper
    gmod = graph_runtime.GraphModule(complied_graph_lib["default"](ctx))
    gmod.set_input("data", data)
    gmod.run()
    out = gmod.get_output(0).asnumpy()
    tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
Exemple #8
0
def _get_tvm_output(net, data, build_dtype="float32", states=None):
    """Compute TVM output"""
    dtype = "float32"
    mod, params = relay.frontend.from_darknet(net, data.shape, dtype)
    # verify that from_darknet creates a valid, parsable relay program
    mod = relay.transform.InferType()(mod)
    astext(mod)

    target = "llvm"
    shape_dict = {"data": data.shape}
    lib = relay.build(mod, target, params=params)

    # Execute on TVM
    ctx = tvm.cpu(0)
    m = graph_runtime.GraphModule(lib["default"](ctx))
    # set inputs
    m.set_input("data", tvm.nd.array(data.astype(dtype)))
    if states:
        for name in states.keys():
            m.set_input(name, tvm.nd.array(states[name].astype(dtype)))
    m.run()
    # get outputs
    tvm_out = []
    for i in range(m.get_num_outputs()):
        tvm_out.append(m.get_output(i).asnumpy())
    return tvm_out
Exemple #9
0
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, data_shape, out_shape = get_network(model_name, batch_size)
    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        params=params,
        ops=(relay.op.get("nn.conv2d"), ))

    # run tuning tasks
    tune_kernels(tasks, **tuning_opt)
    tune_graph(mod["main"], data_shape, log_file, graph_opt_sch_file)

    # compile kernels with graph-level best records
    with autotvm.apply_graph_best(graph_opt_sch_file):
        print("Compile...")
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build_module.build(mod, target=target, params=params)

        # upload parameters to device
        dev = tvm.cpu()
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=data_shape)).astype(dtype))
        module = runtime.GraphModule(lib["default"](dev))
        module.set_input(input_name, data_tvm)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", dev, number=100, repeat=3)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Exemple #10
0
        def get_output(data, lib):
            dev = tvm.cpu()
            module = graph_runtime.GraphModule(lib["default"](dev))
            module.set_input("data", data)
            module.run()

            return module.get_output(0).asnumpy()
def test_gpu():
    if not tvm.runtime.enabled("cuda"):
        print("Skip because cuda is not enabled")
        return
    mod, params = relay.testing.resnet.get_workload(num_layers=18)
    with relay.build_config(opt_level=3):
        complied_graph_lib = relay.build_module.build(mod,
                                                      "cuda",
                                                      params=params)
    data = np.random.uniform(-1, 1, size=(1, 3, 224, 224)).astype("float32")
    ctx = tvm.gpu()

    # raw api
    gmod = complied_graph_lib['default'](ctx)
    set_input = gmod["set_input"]
    run = gmod["run"]
    get_output = gmod["get_output"]
    set_input("data", tvm.nd.array(data))
    run()
    out = get_output(0).asnumpy()
    tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

    # graph runtime wrapper
    gmod = graph_runtime.GraphModule(complied_graph_lib['default'](ctx))
    gmod.set_input("data", data)
    gmod.run()
    out = gmod.get_output(0).asnumpy()
    tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, input_shape, out_shape = get_network(network, batch_size=1)
    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        params=params,
        ops=(relay.op.get("nn.conv2d"), ))

    # run tuning tasks
    print("Tuning...")
    tune_tasks(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.apply_history_best(log_file):
        print("Compile...")
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build_module.build(mod, target=target, params=params)

        # load parameters
        ctx = tvm.context(str(target), 0)
        module = runtime.GraphModule(lib["default"](ctx))
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=input_shape)).astype(dtype))
        module.set_input("data", data_tvm)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=600)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Exemple #13
0
def verify_result(
    mod,
    map_inputs,
    out_shape,
    result,
    tol=1e-5,
    target="llvm",
    device=tvm.cpu(),
    params=None,
    dpu_target="DPUCADX8G",
    tvm_ops=0,
):
    """To check the result between reference and byoc vitis-ai flow"""

    lib = build_module(mod, target, params=params, dpu_target=dpu_target, tvm_ops=tvm_ops)
    lib = update_lib(lib)
    rt_mod = graph_runtime.GraphModule(lib["default"](tvm.cpu()))

    for name, data in map_inputs.items():
        rt_mod.set_input(name, data)
    rt_mod.set_input(**params)
    rt_mod.run()

    out_shapes = out_shape if isinstance(out_shape, list) else [out_shape]
    results = result if isinstance(result, list) else [result]

    for idx, shape in enumerate(out_shapes):
        out = tvm.nd.empty(shape, device=device)
        out = rt_mod.get_output(idx, out)
        tvm.testing.assert_allclose(out.asnumpy(), results[idx], rtol=tol, atol=tol)
Exemple #14
0
def build_and_run(
    mod,
    inputs,
    outputs,
    params,
    device,
    enable_acl=True,
    no_runs=1,
    tvm_ops=0,
    acl_partitions=1,
    config=None,
):
    """Build and run the relay module."""
    if config is None:
        config = {}

    try:
        lib = build_module(mod, device.target, params, enable_acl, tvm_ops, acl_partitions)
    except Exception as e:
        err_msg = "The module could not be built.\n"
        if config:
            err_msg += f"The test failed with the following parameters: {config}\n"
        err_msg += str(e)
        raise Exception(err_msg)

    lib = update_lib(lib, device.device, device.cross_compile)
    gen_module = graph_runtime.GraphModule(lib["default"](device.device.cpu(0)))
    gen_module.set_input(**inputs)
    out = []
    for _ in range(no_runs):
        gen_module.run()
        out.append([gen_module.get_output(i) for i in range(outputs)])
    return out
Exemple #15
0
def infer_value(input_val, params, mod=None):
    """A hack for getting the value of an expression by evaluating a
    portion of the relay graph. This is often needed for functions that
    whose output shape depends on the value of a tensor.
    """
    # Check that all free variables have associated parameters.
    assert all(
        var.name_hint in params.keys() for var in analysis.free_vars(input_val)
    ), "All inputs to infer must be available in params."
    try:
        # TODO(kevinthesun): Use VM for all cases.
        # pylint: disable=import-outside-toplevel
        from tvm.contrib import graph_runtime

        func = _function.Function(analysis.free_vars(input_val), input_val)
        with tvm.transform.PassContext(opt_level=0):
            lib = tvm.relay.build(func, target="llvm", params=params)
        ctx = tvm.cpu(0)
        m = graph_runtime.GraphModule(lib["default"](ctx))
        m.run()
        return m.get_output(0)
    except Exception:
        if isinstance(mod, IRModule):
            mod["main"] = _function.Function(analysis.free_vars(input_val), input_val)
        else:
            mod = IRModule.from_expr(input_val)
        exc = tvm.relay.create_executor("debug", mod=mod, ctx=tvm.cpu(), target="llvm")
        inputs = []
        for param in mod["main"].params:
            inputs.append(params[param.name_hint])
        result = exc.evaluate()(*inputs)
        return result
Exemple #16
0
def run_tvm(lib):
    from tvm.contrib import graph_runtime
    rt_mod = graph_runtime.GraphModule(lib['default'](tvm.cpu(0)))
    rt_mod.set_input('input', data)
    rt_mod.run()
    tvm_res = rt_mod.get_output(0).asnumpy()
    tvm_pred = np.squeeze(tvm_res).argsort()[-5:][::-1]
    return tvm_pred, rt_mod
def tune_and_evaluate(tuning_opt):
    # extract workloads from relay program
    print("Extract tasks...")
    mod, params, input_shape, _ = get_network(network, batch_size=1)
    tasks = autotvm.task.extract_from_program(
        mod["main"],
        target=target,
        target_host=target_host,
        params=params,
        ops=(relay.op.get("nn.conv2d"), ),
    )

    # run tuning tasks
    print("Tuning...")
    tune_tasks(tasks, **tuning_opt)

    # compile kernels with history best records
    with autotvm.apply_history_best(log_file):
        print("Compile...")
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build_module.build(mod,
                                           target=target,
                                           params=params,
                                           target_host=target_host)
        # export library
        tmp = tempdir()
        if use_android:
            from tvm.contrib import ndk

            filename = "net.so"
            lib.export_library(tmp.relpath(filename), ndk.create_shared)
        else:
            filename = "net.tar"
            lib.export_library(tmp.relpath(filename))

        # upload module to device
        print("Upload...")
        remote = autotvm.measure.request_remote(device_key,
                                                "0.0.0.0",
                                                9190,
                                                timeout=10000)
        remote.upload(tmp.relpath(filename))
        rlib = remote.load_module(filename)

        # upload parameters to device
        ctx = remote.context(str(target), 0)
        module = runtime.GraphModule(rlib["default"](ctx))
        data_tvm = tvm.nd.array(
            (np.random.uniform(size=input_shape)).astype(dtype))
        module.set_input("data", data_tvm)

        # evaluate
        print("Evaluate inference time cost...")
        ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=30)
        prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
        print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
              (np.mean(prof_res), np.std(prof_res)))
Exemple #18
0
def tune_and_evaluate():
    print("Begin tuning...")
    tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
    tune_option = auto_scheduler.TuningOptions(
        num_measure_trials=
        200,  # change this to 20000 to achieve the best performance
        builder=auto_scheduler.LocalBuilder(
            build_func="ndk" if use_ndk else "default"),
        runner=auto_scheduler.RPCRunner(device_key,
                                        host="0.0.0.0",
                                        port=9190,
                                        repeat=3,
                                        timeout=50),
        measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
    )

    tuner.tune(tune_option)

    # Compile the whole network
    print("Compile...")
    with auto_scheduler.ApplyHistoryBest(log_file):
        with tvm.transform.PassContext(
                opt_level=3, config={"relay.backend.use_auto_scheduler":
                                     True}):
            lib = relay.build(mod,
                              target=target,
                              target_host=target_host,
                              params=params)

    # Create graph runtime
    print("=============== Request Remote ===============")
    from tvm.auto_scheduler.utils import request_remote

    remote = request_remote(device_key, "0.0.0.0", 9190)
    ctx = remote.cl()
    from tvm.contrib import utils, ndk

    temp = utils.tempdir()
    filename = "deploy_lib.so"
    path_lib = temp.relpath(filename)
    lib.export_library(path_lib, ndk.create_shared)
    remote.upload(path_lib)
    loaded_lib = remote.load_module(filename)
    module = graph_runtime.GraphModule(loaded_lib["default"](ctx))
    data = (np.random.uniform(size=input_shape)).astype(dtype)
    data_tvm = tvm.nd.array(data)
    module.set_input("data", data_tvm)

    # Evaluate
    print("Evaluate inference time cost...")
    ftimer = module.module.time_evaluator("run",
                                          ctx,
                                          repeat=3,
                                          min_repeat_ms=500)
    prof_res = np.array(ftimer().results) * 1e3  # convert to millisecond
    print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
          (np.mean(prof_res), np.std(prof_res)))
Exemple #19
0
def benchmark(network, batch_size, dtype, target, log_file, repeat):
    layout = "NHWC"
    mod, params, input_name, input_shape, output_shape = get_network(
        network, batch_size, dtype, layout
    )

    assert os.path.exists(log_file), "The log file '%s' does not exist." % log_file
    print("Use log file %s" % log_file)

    if network in ["bert"]:
        # Build module
        with auto_scheduler.ApplyHistoryBest(log_file):
            with tvm.transform.PassContext(
                opt_level=3, config={"relay.backend.use_auto_scheduler": True}
            ):
                lib = relay.build(mod, target=target, params=params)

        ctx = tvm.context(str(target), 0)
        module = runtime.GraphModule(lib["default"](ctx))

        # Feed input data
        seq_length = input_shape[0][1]
        data = np.random.uniform(size=input_shape[0])
        token_types = np.random.uniform(size=input_shape[1])
        valid_length = np.array([seq_length] * batch_size)
        module.set_input(data0=data, data1=token_types, data2=valid_length)
    else:
        # Build module
        with auto_scheduler.ApplyHistoryBest(log_file):
            with tvm.transform.PassContext(
                opt_level=3, config={"relay.backend.use_auto_scheduler": True}
            ):
                lib = relay.build(mod, target=target, params=params)
        ctx = tvm.context(str(target), 0)
        module = runtime.GraphModule(lib["default"](ctx))

        # Feed input data
        data = np.random.uniform(size=input_shape)
        module.set_input(input_name, data)

    # Evaluate
    ftimer = module.module.time_evaluator("run", ctx, min_repeat_ms=500, repeat=repeat)
    return np.array(ftimer().results)
Exemple #20
0
def run(lib, ctx):
    # Build TVM runtime
    m = graph_runtime.GraphModule(lib["default"](ctx))
    tvm_input = tvm.nd.array(x.asnumpy(), ctx=ctx)
    m.set_input("data", tvm_input)
    # execute
    m.run()
    # get outputs
    class_IDs, scores, bounding_boxs = m.get_output(0), m.get_output(1), m.get_output(2)
    return class_IDs, scores, bounding_boxs
Exemple #21
0
 def get_tvm_output(xs, target, ctx, dtype="float32"):
     shape_dict = {name: x.shape for (name, x) in zip(keras_model.input_names, xs)}
     mod, params = relay.frontend.from_keras(keras_model, shape_dict, layout=layout)
     with tvm.transform.PassContext(opt_level=2):
         lib = relay.build(mod, target, params=params)
     m = graph_runtime.GraphModule(lib["default"](ctx))
     for name, x in zip(keras_model.input_names, xs):
         m.set_input(name, tvm.nd.array(x.astype(dtype)))
     m.run()
     return [m.get_output(i).asnumpy() for i in range(m.get_num_outputs())]
Exemple #22
0
    def verify_rpc_gpu_remove_package_params(obj_format):
        if not tvm.testing.device_enabled("cuda"):
            print("Skip because cuda is not enabled")
            return
        mod, params = relay.testing.synthetic.get_workload()
        with relay.build_config(opt_level=3):
            complied_graph_lib = relay.build_module.build(mod,
                                                          "cuda",
                                                          params=params)

        from tvm.contrib import utils

        temp = utils.tempdir()
        if obj_format == ".so":
            file_name = "deploy_lib.so"
        else:
            assert obj_format == ".tar"
            file_name = "deploy_lib.tar"
        path_lib = temp.relpath(file_name)
        complied_graph_lib_no_params = complied_graph_lib["remove_params"]()
        complied_graph_lib_no_params.export_library(path_lib)
        path_params = temp.relpath("deploy_param.params")
        with open(path_params, "wb") as fo:
            fo.write(runtime.save_param_dict(complied_graph_lib.get_params()))

        from tvm import rpc

        remote = rpc.LocalSession()
        remote.upload(path_lib)
        loaded_lib = remote.load_module(path_lib)
        data = np.random.uniform(-1, 1,
                                 size=input_shape(mod)).astype("float32")
        dev = remote.gpu()

        # raw api
        gmod = loaded_lib["default"](dev)
        set_input = gmod["set_input"]
        run = gmod["run"]
        get_output = gmod["get_output"]
        load_params = gmod["load_params"]
        loaded_params = bytearray(open(path_params, "rb").read())
        set_input("data", tvm.nd.array(data, device=dev))
        load_params(loaded_params)
        run()
        out = get_output(0).asnumpy()
        tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

        # graph runtime wrapper
        gmod = graph_runtime.GraphModule(loaded_lib["default"](dev))
        loaded_params = bytearray(open(path_params, "rb").read())
        gmod.set_input("data", data)
        gmod.load_params(loaded_params)
        gmod.run()
        out = gmod.get_output(0).asnumpy()
        tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
Exemple #23
0
def tf2tvm_runner(model_name, batch_size=1, backend='cuda'):
    # tvm cuda will have issue with mobilenet
    if model_name == 'mobilenet' and backend == 'cuda':
        return None
    model, shape = util.tf_keras_model(model_name)
    # TODO: why tvm needs reversed shape
    shape = shape[::-1]
    data = np.random.rand(batch_size, *shape)
    # input_name has to match model's input name
    # use  model.input_names[0] instead of input_1 to compile different models inside same round
    # TODO: why would same models with cuda/lvvm can compile in same process? (different backends models doens't affect each other?)
    input_name = model.input_names[0]
    shape_dict = {input_name: data.shape}
    mod, params = relay.frontend.from_keras(model, shape_dict)

    if backend == 'llvm':
        with tvm.transform.PassContext(opt_level=3):
            lib = relay.build(mod,
                              target='llvm',
                              target_host='llvm',
                              params=params)

        ctx = tvm.cpu()
        module = graph_runtime.GraphModule(lib["default"](ctx))
    else:
        with tvm.transform.PassContext(opt_level=3):
            # has to specify target to tvm.target.cuda(), 'cuda' doesn't work
            lib = relay.build(mod, target=tvm.target.cuda(), params=params)

        ctx = tvm.gpu()
        module = graph_runtime.GraphModule(lib["default"](ctx))

    # FIXME: why neccessary to have dtype as float32 here, failed with float64?
    dtype = "float32"
    data = tvm.nd.array(data.astype(dtype))

    def runner(data_size):
        for _ in range(data_size // batch_size):
            module.set_input(input_name, data)
            tvm_output = module.get_output(0)

    return runner
    def verify_rpc_gpu_remove_package_params(obj_format):
        if not tvm.runtime.enabled("cuda"):
            print("Skip because cuda is not enabled")
            return
        mod, params = relay.testing.resnet.get_workload(num_layers=18)
        with relay.build_config(opt_level=3):
            complied_graph_lib = relay.build_module.build(mod,
                                                          "cuda",
                                                          params=params)

        from tvm.contrib import util
        temp = util.tempdir()
        if obj_format == ".so":
            file_name = "deploy_lib.so"
        else:
            assert obj_format == ".tar"
            file_name = "deploy_lib.tar"
        path_lib = temp.relpath(file_name)
        complied_graph_lib_no_params = complied_graph_lib["remove_params"]()
        complied_graph_lib_no_params.export_library(path_lib)
        path_params = temp.relpath("deploy_param.params")
        with open(path_params, "wb") as fo:
            fo.write(relay.save_param_dict(complied_graph_lib.get_params()))

        from tvm import rpc
        server = rpc.Server("localhost", use_popen=True)
        remote = rpc.connect(server.host, server.port)
        remote.upload(path_lib)
        loaded_lib = remote.load_module(path_lib)
        data = np.random.uniform(-1, 1,
                                 size=(1, 3, 224, 224)).astype("float32")
        ctx = remote.gpu()

        # raw api
        gmod = loaded_lib['default'](ctx)
        set_input = gmod["set_input"]
        run = gmod["run"]
        get_output = gmod["get_output"]
        load_params = gmod["load_params"]
        loaded_params = bytearray(open(path_params, "rb").read())
        set_input("data", tvm.nd.array(data, ctx=ctx))
        load_params(loaded_params)
        run()
        out = get_output(0).asnumpy()
        tvm.testing.assert_allclose(out, verify(data), atol=1e-5)

        # graph runtime wrapper
        gmod = graph_runtime.GraphModule(loaded_lib['default'](ctx))
        loaded_params = bytearray(open(path_params, "rb").read())
        gmod.set_input("data", data)
        gmod.load_params(loaded_params)
        gmod.run()
        out = gmod.get_output(0).asnumpy()
        tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
Exemple #25
0
 def verify(data):
     mod, params = relay.testing.synthetic.get_workload(
         input_shape=input_shape)
     with tvm.transform.PassContext(opt_level=3):
         lib = relay.build_module.build(mod, "llvm", params=params)
     ctx = tvm.cpu()
     module = graph_runtime.GraphModule(lib["default"](ctx))
     module.set_input("data", data)
     module.run()
     out = module.get_output(0).asnumpy()
     return out
Exemple #26
0
def torch2tvm_runner(model_name, batch_size=1, backend='cuda'):
    # TODO: add batch
    input_name = "input0"
    model, shape = util.torch_model(model_name)
    model.eval()
    data = torch.randn([batch_size] + shape, dtype=torch.float32)

    shape_list = [(input_name, data.shape)]
    scripted_model = torch.jit.trace(model, data).eval()
    mod, params = relay.frontend.from_pytorch(scripted_model, shape_list)

    # TODO: how opt_level affects performance
    opt_level = 3
    if backend == 'llvm':
        with tvm.transform.PassContext(opt_level=opt_level):
            lib = relay.build(mod,
                              target='llvm',
                              target_host='llvm',
                              params=params)

        ctx = tvm.cpu()
        module = graph_runtime.GraphModule(lib["default"](ctx))
    else:
        target = tvm.target.cuda()
        with tvm.transform.PassContext(opt_level=opt_level):
            lib = relay.build(mod, target, params=params)

        ctx = tvm.gpu()
        module = graph_runtime.GraphModule(lib["default"](ctx))

    data = tvm.nd.array(data)

    def runner(data_size):
        for _ in range(data_size // batch_size):
            data = torch.randn([batch_size] + shape, dtype=torch.float32)
            module.set_input(input_name, data)
            module.run()
            tvm_output = module.get_output(0)

    return runner
Exemple #27
0
def onnx2tvm_runner(model_name, batch_size=1, backend='cuda'):
    model, shape = util.onnx_model(model_name)

    data = np.random.rand(batch_size, *shape)
    input_name = model.graph.input[0].name

    shape_dict = {input_name: tuple([batch_size, *shape])}
    mod, params = relay.frontend.from_onnx(model, shape_dict)

    # TODO: how opt_level affects performance
    opt_level = 3
    if backend == 'llvm':
        with tvm.transform.PassContext(opt_level=opt_level):
            lib = relay.build(mod,
                              target='llvm',
                              target_host='llvm',
                              params=params)

        ctx = tvm.cpu()
        module = graph_runtime.GraphModule(lib["default"](ctx))
        module.set_input(input_name, data)
    else:
        target = tvm.target.cuda()
        with tvm.transform.PassContext(opt_level=opt_level):
            lib = relay.build(mod, target, params=params)

        ctx = tvm.gpu()
        module = graph_runtime.GraphModule(lib["default"](ctx))
        module.set_input(input_name, data)

    dtype = "float32"
    data = tvm.nd.array(data.astype(dtype))

    def runner(data_size):
        for _ in range(data_size // batch_size):
            module.set_input(input_name, data)
            module.run()

    return runner
Exemple #28
0
def run_tvm_graph(coreml_model,
                  target,
                  ctx,
                  input_data,
                  input_name,
                  output_shape,
                  output_dtype="float32"):
    """ Generic function to compile on relay and execute on tvm """
    if isinstance(input_data, list):
        shape_dict = {}
        dtype_dict = {}
        for i, e in enumerate(input_name):
            shape_dict[e] = input_data[i].shape
            dtype_dict[e] = input_data[i].dtype
    else:
        shape_dict = {input_name: input_data.shape}
        dtype_dict = {input_name: input_data.dtype}

    mod, params = relay.frontend.from_coreml(coreml_model, shape_dict)
    with tvm.transform.PassContext(opt_level=3):
        lib = relay.build(mod, target, params=params)

    from tvm.contrib import graph_runtime

    m = graph_runtime.GraphModule(lib["default"](ctx))
    # set inputs
    if isinstance(input_data, list):
        for i, e in enumerate(input_name):
            m.set_input(
                e, tvm.nd.array(input_data[i].astype(input_data[i].dtype)))
    else:
        m.set_input(input_name,
                    tvm.nd.array(input_data.astype(input_data.dtype)))

    # execute
    m.run()
    # get outputs
    if isinstance(output_shape, list) and isinstance(output_dtype, list):
        tvm_output_list = []
        for i, s in enumerate(output_shape):
            tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i]))
            tvm_output_list.append(tvm_output.asnumpy())
        return tvm_output_list
    else:
        if not output_shape:
            tvm_output = m.get_output(0)
        else:
            tvm_output = m.get_output(
                0, tvm.nd.empty((output_shape), output_dtype))
        return tvm_output.asnumpy()
Exemple #29
0
def evaluate(lib, ctx, name_n_data, dtype):
    # Setup runtime module.
    mod = runtime.GraphModule(lib["default"](ctx))
    for name, data in name_n_data.items():
        mod.set_input(name, data)

    # Evaluate performance.
    sys.stderr.write("Evaluate inference time cost...\n")
    ftimer = mod.module.time_evaluator("run",
                                       ctx,
                                       number=5,
                                       min_repeat_ms=1000)
    prof_res = np.array(ftimer().results) * 1000  # convert to millisecond
    sys.stderr.write("Median inference time: %.2f ms\n" % np.median(prof_res))
Exemple #30
0
def run(lib, inputs, outputs, npu=True):
    # Export and load lib to confirm this works
    lib_name = "mod.so"
    temp = util.tempdir()
    lib_path = temp.relpath(lib_name)
    lib.export_library(lib_path)
    lib = tvm.runtime.load_module(lib_path)
    module = graph_runtime.GraphModule(lib["default"](tvm.cpu()))
    module.set_input(**inputs)
    module.run()
    out = [module.get_output(i) for i in range(outputs)]
    if not npu:
        inference_result(0, out)
    return out