def generate_ref_data(mod, input_data, params=None, target="llvm"): """Generate reference data through executing the relay module""" with tvm.transform.PassContext(opt_level=3, config={"tir.disable_vectorize": True}): lib = relay.build(mod, target=target, params=params) lib_name = "mod.so" temp = utils.tempdir() lib_path = temp.relpath(lib_name) lib.export_library(lib_path) lib = tvm.runtime.load_module(lib_path) grt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu())) grt_mod.set_input(**input_data) grt_mod.run() output_count = grt_mod.get_num_outputs() out = [grt_mod.get_output(i).numpy() for i in range(output_count)] if isinstance(mod, tvm.relay.Function): main = mod else: main = mod["main"] if main.attrs is None or main.attrs["output_tensor_names"] is None: output_tensor_names = ["output" if i == 0 else f"output{i+1}" for i in range(output_count)] else: output_tensor_names = main.attrs["output_tensor_names"] return dict(zip(output_tensor_names, out))
def _get_tvm_output(net, data, build_dtype="float32", states=None): """Compute TVM output""" dtype = "float32" mod, params = relay.frontend.from_darknet(net, data.shape, dtype) # verify that from_darknet creates a valid, parsable relay program mod = relay.transform.InferType()(mod) astext(mod) target = "llvm" shape_dict = {"data": data.shape} lib = relay.build(mod, target, params=params) # Execute on TVM dev = tvm.cpu(0) m = graph_executor.GraphModule(lib["default"](dev)) # set inputs m.set_input("data", tvm.nd.array(data.astype(dtype))) if states: for name in states.keys(): m.set_input(name, tvm.nd.array(states[name].astype(dtype))) m.run() # get outputs tvm_out = [] for i in range(m.get_num_outputs()): tvm_out.append(m.get_output(i).numpy()) return tvm_out
def infer_value(input_val, params, mod=None): """A hack for getting the value of an expression by evaluating a portion of the relay graph. This is often needed for functions that whose output shape depends on the value of a tensor. """ # Check that all free variables have associated parameters. assert all(var.name_hint in params.keys() for var in analysis.free_vars( input_val)), "All inputs to infer must be available in params." try: # TODO(kevinthesun): Use VM for all cases. # pylint: disable=import-outside-toplevel from tvm.contrib import graph_executor func = _function.Function(analysis.free_vars(input_val), input_val) with tvm.transform.PassContext(opt_level=0): lib = tvm.relay.build(func, target="llvm", params=params) dev = tvm.cpu(0) m = graph_executor.GraphModule(lib["default"](dev)) m.run() return m.get_output(0) except Exception: if isinstance(mod, IRModule): mod["main"] = _function.Function(analysis.free_vars(input_val), input_val) else: mod = IRModule.from_expr(input_val) exc = tvm.relay.create_executor("debug", mod=mod, device=tvm.cpu(), target="llvm") inputs = [] for param in mod["main"].params: inputs.append(params[param.name_hint]) result = exc.evaluate()(*inputs) return result
def test_cpu(): if not tvm.testing.device_enabled("llvm"): print("Skip because llvm is not enabled") return mod, params = relay.testing.synthetic.get_workload() with relay.build_config(opt_level=3): complied_graph_lib = relay.build_module.build(mod, "llvm", params=params) data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32") # raw api dev = tvm.cpu() gmod = complied_graph_lib["default"](dev) set_input = gmod["set_input"] run = gmod["run"] get_output = gmod["get_output"] set_input("data", tvm.nd.array(data)) run() out = get_output(0).asnumpy() tvm.testing.assert_allclose(out, verify(data), atol=1e-5) # graph executor wrapper gmod = graph_executor.GraphModule(complied_graph_lib["default"](dev)) gmod.set_input("data", data) gmod.run() out = gmod.get_output(0).asnumpy() tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def test_graph_executor_remote_run(host, port): remote_session = rpc.connect(host, port) target = tvm.target.Target(target=f"llvm -mtriple={ARCH}-apple-darwin") device = remote_session.cpu(0) size = 100 a = np.random.uniform(size=size).astype(DTYPE) b = np.random.uniform(size=size).astype(DTYPE) mod, params = get_add_relay_module(a, b) with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, target_host=target, params=params) path_dso = export_lib(lib) remote_session.upload(path_dso) lib = remote_session.load_module(DSO_NAME) gen_module = graph_executor.GraphModule(lib["default"](device)) # Check set input gen_module.set_input("a", tvm.nd.array(a)) gen_module.set_input("b", tvm.nd.array(b)) tvm.testing.assert_allclose(gen_module.get_input(0).numpy(), a) tvm.testing.assert_allclose(gen_module.get_input(1).numpy(), b) # Check run gen_module.run() out = gen_module.get_output(0) tvm.testing.assert_allclose(out.numpy(), a + b)
def tune_and_evaluate(): print("Begin tuning...") tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials=200, builder=auto_scheduler.LocalBuilder(build_func="ndk"), runner=auto_scheduler.RPCRunner( device_key, host=rpc_host, port=rpc_port, timeout=30, repeat=1, min_repeat_ms=200, enable_cpu_cache_flush=True, ), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option) # Compile with the history best print("Compile...") with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, params=params) # Export library tmp = tempdir() filename = "net.so" lib.export_library(tmp.relpath(filename), ndk.create_shared) # Upload module to device print("Upload...") remote = auto_scheduler.utils.request_remote(device_key, rpc_host, rpc_port, timeout=10000) remote.upload(tmp.relpath(filename)) rlib = remote.load_module(filename) # Create graph executor dev = remote.cpu() module = graph_executor.GraphModule(rlib["default"](dev)) for key, value in shape_dict.items(): data_tvm = tvm.nd.array( (np.random.uniform(size=value)).astype("float32")) module.set_input(key, data_tvm) # Evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", dev, repeat=3, min_repeat_ms=500) prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
def run_tvm(lib): from tvm.contrib import graph_executor rt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu(0))) rt_mod.set_input("input", data) rt_mod.run() tvm_res = rt_mod.get_output(0).numpy() tvm_pred = np.squeeze(tvm_res).argsort()[-5:][::-1] return tvm_pred, rt_mod
def verify(data): mod, params = relay.testing.synthetic.get_workload(input_shape=input_shape) with tvm.transform.PassContext(opt_level=3): lib = relay.build_module.build(mod, "llvm", params=params) dev = tvm.cpu() module = graph_executor.GraphModule(lib["default"](dev)) module.set_input("data", data) module.run() out = module.get_output(0).asnumpy() return out
def get_tvm_output(xs, target, dev, dtype="float32"): shape_dict = {name: x.shape for (name, x) in zip(keras_model.input_names, xs)} mod, params = relay.frontend.from_keras(keras_model, shape_dict, layout=layout) with tvm.transform.PassContext(opt_level=2): lib = relay.build(mod, target, params=params) m = graph_executor.GraphModule(lib["default"](dev)) for name, x in zip(keras_model.input_names, xs): m.set_input(name, tvm.nd.array(x.astype(dtype))) m.run() return [m.get_output(i).numpy() for i in range(m.get_num_outputs())]
def evaluate_performance(lib, data_shape): # upload parameters to device dev = tvm.cpu() data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype)) module = runtime.GraphModule(lib["default"](dev)) module.set_input(input_name, data_tvm) # evaluate print("Evaluate inference time cost...") print(module.benchmark(dev, number=100, repeat=3))
def run(lib, dev): # Build TVM runtime m = graph_executor.GraphModule(lib["default"](dev)) tvm_input = tvm.nd.array(x.asnumpy(), device=dev) m.set_input("data", tvm_input) # execute m.run() # get outputs class_IDs, scores, bounding_boxs = m.get_output(0), m.get_output(1), m.get_output(2) return class_IDs, scores, bounding_boxs
def verify_rpc_gpu_remove_package_params(obj_format): if not tvm.testing.device_enabled("cuda"): print("Skip because cuda is not enabled") return mod, params = relay.testing.synthetic.get_workload() with relay.build_config(opt_level=3): complied_graph_lib = relay.build_module.build(mod, "cuda", params=params) from tvm.contrib import utils temp = utils.tempdir() if obj_format == ".so": file_name = "deploy_lib.so" else: assert obj_format == ".tar" file_name = "deploy_lib.tar" path_lib = temp.relpath(file_name) complied_graph_lib_no_params = complied_graph_lib["remove_params"]() complied_graph_lib_no_params.export_library(path_lib) path_params = temp.relpath("deploy_param.params") with open(path_params, "wb") as fo: fo.write(runtime.save_param_dict(complied_graph_lib.get_params())) from tvm import rpc remote = rpc.LocalSession() remote.upload(path_lib) loaded_lib = remote.load_module(path_lib) data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32") dev = remote.cuda() # raw api gmod = loaded_lib["default"](dev) set_input = gmod["set_input"] run = gmod["run"] get_output = gmod["get_output"] load_params = gmod["load_params"] loaded_params = bytearray(open(path_params, "rb").read()) set_input("data", tvm.nd.array(data, device=dev)) load_params(loaded_params) run() out = get_output(0).numpy() tvm.testing.assert_allclose(out, verify(data), atol=1e-5) # graph executor wrapper gmod = graph_executor.GraphModule(loaded_lib["default"](dev)) loaded_params = bytearray(open(path_params, "rb").read()) gmod.set_input("data", data) gmod.load_params(loaded_params) gmod.run() out = gmod.get_output(0).numpy() tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def tune_and_evaluate(tuning_opt): # extract workloads from relay program print("Extract tasks...") mod, params, input_shape, _ = get_network(network, batch_size=1) tasks = autotvm.task.extract_from_program( mod["main"], target=target, params=params, ops=(relay.op.get("nn.conv2d"), )) # run tuning tasks print("Tuning...") tune_tasks(tasks, **tuning_opt) # compile kernels with history best records with autotvm.apply_history_best(log_file): print("Compile...") with tvm.transform.PassContext(opt_level=3): lib = relay.build_module.build(mod, target=target, params=params) # export library tmp = tempdir() if use_android: from tvm.contrib import ndk filename = "net.so" lib.export_library(tmp.relpath(filename), ndk.create_shared) else: filename = "net.tar" lib.export_library(tmp.relpath(filename)) # upload module to device print("Upload...") remote = autotvm.measure.request_remote(device_key, "0.0.0.0", 9190, timeout=10000) remote.upload(tmp.relpath(filename)) rlib = remote.load_module(filename) # upload parameters to device dev = remote.device(str(target), 0) module = runtime.GraphModule(rlib["default"](dev)) data_tvm = tvm.nd.array( (np.random.uniform(size=input_shape)).astype(dtype)) #module.set_input("data", data_tvm) input_tensor = "input" module.set_input(input_tensor, data_tvm) # evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", dev, number=1, repeat=10) prof_res = np.array(ftimer().results) * 1000 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
def compile_raw_onnx_model(onnx_model, img_data, target="llvm", input_name="data"): shape_dict = {input_name: img_data.shape} mod, params = relay.frontend.from_onnx(onnx_model, shape_dict) with tvm.transform.PassContext(opt_level=0): lib = relay.build(mod, target=target, params=params) dev = tvm.device(str(target), 0) module = graph_executor.GraphModule(lib["default"](dev)) return module, params, target, mod
def compile_tuned_onnx_model(tuning_option, mod, params, transform, target="llvm"): with tvm.autotvm.apply_history_best(tuning_option["tuning_records"]): with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, params=params) dev = tvm.device(str(target), 0) module = graph_executor.GraphModule(lib["default"](dev)) return module
def verify_model(func, input_data, rtol=1e-5, atol=1e-5): if not (isinstance(input_data, (tuple, list))): input_data = [input_data] input_spec = [] input_names = [] input_shape_dict = {} compiled_input = {} for idx, data in enumerate(input_data): input_name = "input{}".format(idx) input_spec.append( paddle.static.InputSpec(dtype=data.dtype, shape=data.shape, name=input_name)) input_names.append(input_name) input_shape_dict[input_name] = data.shape if isinstance(data, np.ndarray): compiled_input[input_name] = data else: compiled_input[input_name] = data.numpy() baseline_model = get_paddle_model(func, input_spec) baseline_outputs = baseline_model(*[input[:] for input in input_data]) # get paddle outputs if isinstance(baseline_outputs, (tuple, list)): baseline_outputs = tuple(out.numpy() for out in baseline_outputs) else: baseline_outputs = (baseline_outputs.numpy(), ) mod, params = relay.frontend.from_paddle(baseline_model, input_shape_dict) parms_num = min(len(input_names), len(mod["main"].params)) compiled_names = [] for arg in mod["main"].params[:parms_num]: assert arg.name_hint in input_names or arg.name_hint in params if arg.name_hint in input_names: compiled_names.append(arg.name_hint) with tvm.transform.PassContext(opt_level=3): for target, dev in tvm.testing.enabled_targets(): lib = relay.build(mod, target=target, params=params) gmod = graph_executor.GraphModule(lib["default"](dev)) for name in compiled_names: gmod.set_input(name, compiled_input[name]) gmod.run() for i, baseline_output in enumerate(baseline_outputs): compiled_output = gmod.get_output(i).numpy() assert_shapes_match(baseline_output, compiled_output) tvm.testing.assert_allclose(baseline_output, compiled_output, rtol=rtol, atol=atol)
def tune_and_evaluate(): print("Begin tuning...") tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials= 200, # change this to 20000 to achieve the best performance builder=auto_scheduler.LocalBuilder( build_func="ndk" if use_ndk else "default"), runner=auto_scheduler.RPCRunner(device_key, host="0.0.0.0", port=9190, repeat=3, timeout=50), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option) # Compile the whole network print("Compile...") with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target, params=params) # Create graph executor print("=============== Request Remote ===============") from tvm.auto_scheduler.utils import request_remote remote = request_remote(device_key, "0.0.0.0", 9190) dev = remote.cl() from tvm.contrib import utils, ndk temp = utils.tempdir() filename = "deploy_lib.so" path_lib = temp.relpath(filename) lib.export_library(path_lib, ndk.create_shared) remote.upload(path_lib) loaded_lib = remote.load_module(filename) module = graph_executor.GraphModule(loaded_lib["default"](dev)) data = (np.random.uniform(size=input_shape)).astype(dtype) data_tvm = tvm.nd.array(data) module.set_input("data", data_tvm) # Evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", dev, repeat=3, min_repeat_ms=500) prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
def tune_and_evaluate(): print("Begin tuning...") tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials=200, # change this to 20000 to achieve the best performance builder=auto_scheduler.LocalBuilder(build_func="ndk" if use_ndk else "default"), runner=auto_scheduler.RPCRunner( device_key, host=rpc_host, port=rpc_port, timeout=30, repeat=1, min_repeat_ms=200, enable_cpu_cache_flush=True, ), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option) # Compile with the history best print("Compile...") with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True} ): lib = relay.build(mod, target=target, params=params) # Export library tmp = tempdir() if use_ndk: from tvm.contrib import ndk filename = "net.so" lib.export_library(tmp.relpath(filename), ndk.create_shared) else: filename = "net.tar" lib.export_library(tmp.relpath(filename)) # Upload module to device print("Upload...") remote = auto_scheduler.utils.request_remote(device_key, rpc_host, rpc_port, timeout=10000) remote.upload(tmp.relpath(filename)) rlib = remote.load_module(filename) # Create graph executor dev = remote.cpu() module = graph_executor.GraphModule(rlib["default"](dev)) data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype)) module.set_input("data", data_tvm) # Evaluate print("Evaluate inference time cost...") print(module.benchmark(dev, repeat=3, min_repeat_ms=500))
def get_tvm_output( func, x, params, target, device, out_shape=(1, 1000), input_name="image", dtype="float32" ): with tvm.transform.PassContext(opt_level=3): lib = relay.build(func, target, params=params) m = graph_executor.GraphModule(lib["default"](device)) # set inputs m.set_input(input_name, tvm.nd.array(x.astype(dtype))) m.run() # get outputs out = m.get_output(0, tvm.nd.empty(out_shape, dtype)) return out.asnumpy()
def run_relay_graph(mod, params, shape_dict, target, dev): with relay.build_config(opt_level=3): lib = relay.build(mod, target=target, params=params) input_shape = shape_dict["input_1"] dummy_data = np.random.uniform(size=input_shape, low=0, high=input_shape[1]).astype("int32") m = graph_executor.GraphModule(lib["default"](dev)) m.set_input(0, dummy_data) m.run() tvm_output = m.get_output(0) print(m.benchmark(dev, repeat=5, number=5)) return tvm_output
def evaluate_performance(lib, data_shape): # upload parameters to device dev = tvm.cpu() data_tvm = tvm.nd.array((np.random.uniform(size=data_shape)).astype(dtype)) module = runtime.GraphModule(lib["default"](dev)) module.set_input(input_name, data_tvm) # evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", dev, number=100, repeat=3) prof_res = np.array(ftimer().results) * 1000 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
def build_and_run( mod, inputs, outputs, params, device, enable_clml=True, no_runs=1, config=None, ): """Build and run the relay module.""" if config is None: config = {} try: libm = build_module(mod, device.target, device.target_host, params, enable_clml) clml_modules = extract_clml_modules(libm) for mod in clml_modules: source = mod.get_source("json") codegen = json.loads(source)["nodes"] # remove input and const names as these cannot be predetermined for node in range(len(codegen)): if codegen[node]["op"] == "input" or codegen[node][ "op"] == "const": codegen[node]["name"] = "" codegen_str = json.dumps(codegen, sort_keys=True, indent=2) except Exception as e: err_msg = "The module could not be built.\n" if config: err_msg += f"The test failed with the following parameters: {config}\n" err_msg += str(e) raise Exception(err_msg) lib = update_lib(libm, device.device, device.cross_compile) gen_module = graph_executor.GraphModule(lib["default"]( device.device.cl(0))) gen_module.set_input(**inputs) out = [] for _ in range(no_runs): gen_module.run() out.append([gen_module.get_output(i) for i in range(outputs)]) time_f = gen_module.module.time_evaluator("run", device.device.cl(0), number=50) cost = time_f().mean print("%g secs/iteration\n" % cost) return out
def run_tvm_graph(coreml_model, target, device, input_data, input_name, output_shape, output_dtype="float32"): """Generic function to compile on relay and execute on tvm""" if isinstance(input_data, list): shape_dict = {} dtype_dict = {} for i, e in enumerate(input_name): shape_dict[e] = input_data[i].shape dtype_dict[e] = input_data[i].dtype else: shape_dict = {input_name: input_data.shape} dtype_dict = {input_name: input_data.dtype} mod, params = relay.frontend.from_coreml(coreml_model, shape_dict) with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target, params=params) from tvm.contrib import graph_executor m = graph_executor.GraphModule(lib["default"](device)) # set inputs if isinstance(input_data, list): for i, e in enumerate(input_name): m.set_input( e, tvm.nd.array(input_data[i].astype(input_data[i].dtype))) else: m.set_input(input_name, tvm.nd.array(input_data.astype(input_data.dtype))) # execute m.run() # get outputs if isinstance(output_shape, list) and isinstance(output_dtype, list): tvm_output_list = [] for i, s in enumerate(output_shape): tvm_output = m.get_output(i, tvm.nd.empty((s), output_dtype[i])) tvm_output_list.append(tvm_output.numpy()) return tvm_output_list else: if not output_shape: tvm_output = m.get_output(0) else: tvm_output = m.get_output( 0, tvm.nd.empty((output_shape), output_dtype)) return tvm_output.numpy()
def test_graph_executor_api(): dname_0, dname_1 = "data_0", "data_1" data_0, data_1 = [ relay.var(c, shape=(1, 1), dtype="float32") for c in [dname_0, dname_1] ] net = relay.add(data_0, data_1) func = relay.Function((data_0, data_1), net) lib = relay.build(tvm.IRModule.from_expr(func), "llvm") mod = graph_executor.GraphModule(lib["default"](tvm.cpu(0))) assert mod.get_input_index(dname_1) == 1 assert mod.get_input_index(dname_0) == 0 assert mod.get_input_index("Invalid") == -1
def check_graph_executor( target, ref_res, device, func, params, config, opt_level, expected_index=None ): with tvm.transform.PassContext(opt_level=opt_level, config=config): graph_executor_factory = relay.build(func, target, params=params) contexts = [tvm.cpu(0), tvm.device(device)] graph_json = json.loads(graph_executor_factory.graph_json) if "device_index" in graph_json["attrs"]: device_index = graph_json["attrs"]["device_index"][1] assert device_index == expected_index mod = graph_executor.GraphModule(graph_executor_factory["default"](*contexts)) mod.run() res = mod.get_output(0).numpy() tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
def get_tvm_model(traced_model, X_ex): mod, params = relay.frontend.from_pytorch(traced_model, input_infos=[('input0', X_ex.shape)]) with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=TARGET, params=params) dev = tvm.device(str(TARGET), 0) module = graph_executor.GraphModule(lib["default"](dev)) module.set_input("input0", X_ex) module.run() # just a test run to make sure it works # mod is an IR struct. Used downstream. params IDK, used downstream. # module is a Relay Python collable return mod, params, module
def _run_tvm(data, proto_file, blob_file): """ Run caffe model by TVM according to .caffemodel and .prototxt""" init_net = pb.NetParameter() predict_net = pb.NetParameter() # load model with open(proto_file, "r") as f: text_format.Merge(f.read(), predict_net) # load blob with open(blob_file, "rb") as f: init_net.ParseFromString(f.read()) shape_dict = dict() dtype_dict = dict() if isinstance(data, (tuple, list)): for idx, d in enumerate(data): shape_dict["data" + str(idx)] = d.shape dtype_dict["data" + str(idx)] = "float32" else: shape_dict = {"data": data.shape} dtype_dict = {"data": "float32"} mod, params = relay.frontend.from_caffe(init_net, predict_net, shape_dict, dtype_dict) target = "llvm" target_host = "llvm" dev = tvm.cpu(0) with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, target_host=target_host, params=params) dtype = "float32" m = graph_executor.GraphModule(lib["default"](dev)) if isinstance(data, (tuple, list)): for idx, d in enumerate(data): m.set_input("data" + str(idx), tvm.nd.array(d.astype(dtype))) else: m.set_input("data", tvm.nd.array(data.astype(dtype))) # execute m.run() tvm_output = list() # get outputs for i in range(m.get_num_outputs()): tvm_output.append(m.get_output(i).asnumpy()) return tvm_output
def verify_gpu_export(obj_format): if not tvm.testing.device_enabled("cuda"): print("Skip because cuda is not enabled") return mod, params = relay.testing.synthetic.get_workload() with relay.build_config(opt_level=3): complied_graph_lib = relay.build_module.build(mod, "cuda", params=params) from tvm.contrib import utils temp = utils.tempdir() if obj_format == ".so": file_name = "deploy_lib.so" else: assert obj_format == ".tar" file_name = "deploy_lib.tar" path_lib = temp.relpath(file_name) complied_graph_lib.export_library(path_lib) data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32") # run the setup in a separate function, so the load_lib # can get destructed right away # test the robustness wrt to parent module destruction def setup_gmod(): loaded_lib = tvm.runtime.load_module(path_lib) dev = tvm.cuda() return loaded_lib["default"](dev) gmod = setup_gmod() # raw api set_input = gmod["set_input"] run = gmod["run"] get_output = gmod["get_output"] set_input("data", tvm.nd.array(data)) run() out = get_output(0).numpy() tvm.testing.assert_allclose(out, verify(data), atol=1e-5) # graph executor wrapper gmod = graph_executor.GraphModule(setup_gmod()) gmod.set_input("data", data) gmod.run() out = gmod.get_output(0).numpy() tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def generate_ref_data(mod, input_data, params=None, target="llvm"): """Generate reference data through executing the relay module""" with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, params=params) lib_name = "mod.so" temp = utils.tempdir() lib_path = temp.relpath(lib_name) lib.export_library(lib_path) lib = tvm.runtime.load_module(lib_path) grt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu())) grt_mod.set_input(**input_data) grt_mod.run() output_count = grt_mod.get_num_outputs() out = [grt_mod.get_output(i).numpy() for i in range(output_count)] return out
def run_func(func, params, x): with tvm.transform.PassContext(opt_level=3): lib = relay.build(func, "llvm", params=params) from tvm.contrib import graph_executor dev = tvm.cpu(0) dtype = "float32" m = graph_executor.GraphModule(lib["default"](dev)) # set inputs m.set_input("data", tvm.nd.array(x.astype(dtype))) # execute m.run() # get outputs tvm_output = m.get_output(0) return tvm_output.asnumpy()