def check_result(mod, ref_mod, map_inputs, out_shape, tol=1e-5, target="llvm", ctx=tvm.cpu(), params=None): if sys.platform == "win32": print("Skip test on Windows for now") return # Run the reference result compile_engine.get().clear() with tvm.transform.PassContext(opt_level=3): json, lib, param = relay.build(ref_mod, target=target, params=params) rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx) for name, data in map_inputs.items(): rt_mod.set_input(name, data) rt_mod.set_input(**param) rt_mod.run() out = tvm.nd.empty(out_shape, ctx=ctx) out = rt_mod.get_output(0, out) ref_result = out.asnumpy() def check_vm_result(): compile_engine.get().clear() with relay.build_config(opt_level=3): exe = relay.vm.compile(mod, target=target, params=params) code, lib = exe.save() exe = runtime.vm.Executable.load_exec(code, lib) vm = runtime.vm.VirtualMachine(exe) vm.init(ctx) out = vm.run(**map_inputs) tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol) def check_graph_runtime_result(): compile_engine.get().clear() with relay.build_config(opt_level=3): json, lib, param = relay.build(mod, target=target, params=params) rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx) for name, data in map_inputs.items(): rt_mod.set_input(name, data) rt_mod.set_input(**param) rt_mod.run() out = tvm.nd.empty(out_shape, ctx=ctx) out = rt_mod.get_output(0, out) tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol) check_vm_result() check_graph_runtime_result()
def check_vm_result(): compile_engine.get().clear() with relay.build_config(opt_level=3): exe = relay.vm.compile(mod, target=target, params=params) code, lib = exe.save() exe = runtime.vm.Executable.load_exec(code, lib) vm = runtime.vm.VirtualMachine(exe, ctx) out = vm.run(**map_inputs) tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol)
def test_exec(mod, params, ref_mod, ref_params, out_shape): ishape = (1, 3, 224, 224) i_data = np.random.randn(*ishape).astype(np.float32) ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) ref_res = ref_ex.evaluate()(i_data, **ref_params) compile_engine.get().clear() mod = get_partitoned_mod(mod, params, dnnl_patterns) check_result(mod, {"data": i_data}, out_shape, ref_res.asnumpy(), tol=1e-5, params=params)
def check_vm_result(): compile_engine.get().clear() with tvm.transform.PassContext(opt_level=3): exe = relay.vm.compile(mod, target=target, params=params) code, lib = exe.save() lib = update_lib(lib) exe = runtime.vm.Executable.load_exec(code, lib) vm = runtime.vm.VirtualMachine(exe, ctx) outs = vm.run(**map_inputs) outs = outs if isinstance(outs, runtime.container.ADT) else [outs] results = result if isinstance(result, list) else [result] for out, ref in zip(outs, results): tvm.testing.assert_allclose(out.asnumpy(), ref, rtol=tol, atol=tol)
def check_graph_runtime_result(): compile_engine.get().clear() with relay.build_config(opt_level=3): json, lib, param = relay.build(mod, target=target, params=params) rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx) for name, data in map_inputs.items(): rt_mod.set_input(name, data) rt_mod.set_input(**param) rt_mod.run() out = tvm.nd.empty(out_shape, ctx=ctx) out = rt_mod.get_output(0, out) tvm.testing.assert_allclose(out.asnumpy(), ref_result, rtol=tol, atol=tol)
def tune_and_evaluate(mod, params, input_shape, dtype, measure_top_n, target, tuning_opt, graph_log_file, best_results_file): """Tune a model with the ranking model and evaluate the performance.""" sys.stderr.write("Extract conv2d tasks...\n") tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params) # Run tuning tasks. if graph_log_file is not None and not os.path.exists(graph_log_file): best_results = tune_kernels(tasks, True, measure_top_n, **tuning_opt) tune_graph(mod["main"], input_shape[1], target, tuning_opt['log_filename'], graph_log_file) else: best_results = tune_kernels(tasks, False, measure_top_n, **tuning_opt) with open(best_results_file, 'w') as of: json.dump(best_results, of) dispatch_ctx = tvm.autotvm.task.DispatchContext.current if graph_log_file is not None and os.path.exists(graph_log_file): sys.stderr.write("Compile model with graph tuning...\n") tvm.autotvm.task.DispatchContext.current = autotvm.apply_graph_best( graph_log_file) elif os.path.exists(tuning_opt['log_filename']): sys.stderr.write("Compile model without graph tuning...\n") tvm.autotvm.task.DispatchContext.current = autotvm.apply_history_best( tuning_opt['log_filename']) else: sys.stderr.write("Compile model with fallback + tophub...\n") compile_engine.get().clear() with relay.build_config(opt_level=3): graph, lib, params = relay.build_module.build(mod, target=target, params=params) tvm.autotvm.task.DispatchContext.current = dispatch_ctx # Load parameters. ctx = tvm.context(str(target), 0) module = runtime.create(graph, lib, ctx) data_tvm = tvm.nd.array( (np.random.uniform(size=input_shape[1])).astype(dtype)) module.set_input(input_shape[0], data_tvm) module.set_input(**params) # Evaluate performance. sys.stderr.write("Evaluate inference time cost...\n") ftimer = module.module.time_evaluator("run", ctx, number=100, repeat=3) prof_res = np.array(ftimer().results) * 1000 # convert to millisecond sys.stderr.write("Median inference time: %.2f ms\n" % np.median(prof_res))
def __init__(self, mod, tgt) -> None: super().__init__() self.mod = mod self.tgt = tgt self.engine = compile_engine.get() self.bindings = [[]] self.gv_map = {}
def generate_ref_data(mod, input_data, params=None, target="llvm"): """Generate reference data through executing the relay module""" compile_engine.get().clear() with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, params=params) lib_name = "mod.so" temp = utils.tempdir() lib_path = temp.relpath(lib_name) lib.export_library(lib_path) lib = tvm.runtime.load_module(lib_path) grt_mod = graph_executor.GraphModule(lib["default"](tvm.cpu())) grt_mod.set_input(**input_data) grt_mod.run() output_count = grt_mod.get_num_outputs() out = [grt_mod.get_output(i).numpy() for i in range(output_count)] return out
def __init__(self, mod, target) -> None: super().__init__() self.mod = mod self.tgt = target self.engine = compile_engine.get() self.fun_no = 0 self.var_no = 0 self.var_map = {}
def test_extern_dnnl_mobilenet(): if not tvm.get_global_func("relay.ext.dnnl", True): print("skip because DNNL codegen is not available") return dtype = "float32" ishape = (1, 3, 224, 224) ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1, dtype="float32") mod = transform.AnnotateTarget(["dnnl"])(ref_mod) mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0)) ref_res = ref_ex.evaluate()(i_data, **params) compile_engine.get().clear() check_result(mod, {"data": i_data}, (1, 1000), ref_res.asnumpy(), tol=1e-5, params=params)
def check_graph_runtime_result(): compile_engine.get().clear() with tvm.transform.PassContext(opt_level=3): json, lib, param = relay.build(mod, target=target, params=params) lib = update_lib(lib) rt_mod = tvm.contrib.graph_runtime.create(json, lib, ctx) for name, data in map_inputs.items(): rt_mod.set_input(name, data) rt_mod.set_input(**param) rt_mod.run() out_shapes = out_shape if isinstance(out_shape, list) else [out_shape] results = result if isinstance(result, list) else [result] for idx, shape in enumerate(out_shapes): out = tvm.nd.empty(shape, ctx=ctx) out = rt_mod.get_output(idx, out) tvm.testing.assert_allclose(out.asnumpy(), results[idx], rtol=tol, atol=tol)
def tune_and_evaluate(mod, params, inputs, dtype, measure_top_n, target, tuning_opt, ref_log_filename): """Tune a model with the ranking model and evaluate the performance.""" sys.stderr.write("Extract conv2d tasks...\n") tasks = autotvm.task.extract_from_program(mod["main"], target=target, params=params) tic = time.time() dispatch_ctx = tvm.autotvm.task.DispatchContext.current if not os.path.exists(tuning_opt["log_filename"]): tune_kernels(tasks, measure_top_n, **tuning_opt) sys.stderr.write("Tuning time: %.2f mins\n" % ((time.time() - tic) / 60)) sys.stderr.write("Compile new tuned model...\n") tvm.autotvm.task.DispatchContext.current = autotvm.apply_history_best( tuning_opt["log_filename"]) compile_engine.get().clear() with relay.build_config(opt_level=3): lib = relay.build_module.build(mod, target=target, params=params) tvm.autotvm.task.DispatchContext.current = dispatch_ctx sys.stderr.write("Compile baseline model...\n") compile_engine.get().clear() with relay.build_config(opt_level=3): base_lib = relay.build_module.build(mod, target=target, params=params) ref_lib = None if os.path.exists(ref_log_filename): sys.stderr.write("Compile reference model...\n") tvm.autotvm.task.DispatchContext.current = autotvm.apply_history_best( ref_log_filename) compile_engine.get().clear() with relay.build_config(opt_level=3): ref_lib = relay.build_module.build(mod, target=target, params=params) tvm.autotvm.task.DispatchContext.current = dispatch_ctx ctx = tvm.context(str(target), 0) data_tvm = {} for name, shape in inputs.items(): data_tvm[name] = tvm.nd.array( (np.random.uniform(size=shape)).astype(dtype)) # Evaluate baseline module sys.stderr.write("Baseline\n") evaluate(base_lib, ctx, data_tvm, dtype) # Evaluate reference module if ref_lib: sys.stderr.write("Reference\n") evaluate(ref_lib, ctx, data_tvm, dtype) # Evaluate module evaluate(lib, ctx, data_tvm, dtype)