def tune_and_evaluate(): print("Begin tuning...") tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials= 200, # change this to 20000 to achieve the best performance builder=auto_scheduler.LocalBuilder( build_func="ndk" if use_ndk else "default"), runner=auto_scheduler.RPCRunner( device_key, host=rpc_host, port=rpc_port, timeout=30, repeat=1, min_repeat_ms=200, enable_cpu_cache_flush=True, ), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option) # Compile with the history best print("Compile...") with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, params=params) # Export library tmp = tempdir() if use_ndk: from tvm.contrib import ndk filename = "net.so" lib.export_library(tmp.relpath(filename), ndk.create_shared) else: filename = "net.tar" lib.export_library(tmp.relpath(filename)) # Upload module to device print("Upload...") remote = auto_scheduler.utils.request_remote(device_key, rpc_host, rpc_port, timeout=10000) remote.upload(tmp.relpath(filename)) rlib = remote.load_module(filename) # Create graph executor dev = remote.cpu() module = graph_executor.GraphModule(rlib["default"](dev)) data_tvm = tvm.nd.array( (np.random.uniform(size=input_shape)).astype(dtype)) module.set_input("data", data_tvm) # Evaluate print("Evaluate inference time cost...") print(module.benchmark(dev, repeat=3, min_repeat_ms=500))
def _autoscheduler_test_helper(model, tmpdir_name, tasks_weights=None, early_stopping=1, tuning_records=None): tasks, weights = tasks_weights if tasks_weights else _get_tasks(model) log_file = os.path.join(tmpdir_name, "autoscheduler.json") tuning_options = auto_scheduler.TuningOptions( num_measure_trials=1, measure_callbacks=[auto_scheduler.RecordToFile(log_file)], runner="local", builder="local", verbose=0, early_stopping=early_stopping, ) tvmc.autotuner.schedule_tasks(tasks[:1], weights[:1], tuning_options, tuning_records) # testing whether the log file was produced assert path.exists(log_file), "autoscheduler log file should exist" with auto_scheduler.ApplyHistoryBest(log_file) as best: assert isinstance(best, auto_scheduler.dispatcher.ApplyHistoryBest ), "unable to load the best results of tuning" return log_file
def _autoscheduler_test_helper(model, tmpdir_name, early_stopping=1, prior_records=None): tvmc_model = tvmc.frontends.load_model(model) log_file = os.path.join(tmpdir_name, "autoscheduler.json") hardware_params = auto_scheduler.HardwareParams(num_cores=4, target="llvm") tvmc.tune( tvmc_model, target="llvm", tuning_records=log_file, prior_records=prior_records, early_stopping=early_stopping, enable_autoscheduler=True, trials=2, hardware_params=hardware_params, ) # testing whether the log file was produced assert path.exists(log_file), "autoscheduler log file should exist" with auto_scheduler.ApplyHistoryBest(log_file) as best: assert isinstance(best, auto_scheduler.dispatcher.ApplyHistoryBest ), "unable to load the best results of tuning" return log_file
def local_auto_scheduler(self, repeat=1, min_repeat_ms=300, timeout=10, num_measure_trials=200): # extract tasks tasks, task_weights = auto_scheduler.extract_tasks( self.mod["main"], self.params, self.target) for idx, task in enumerate(tasks): logger.debug("========== Task %d (workload key: %s) ==========" % (idx, task.workload_key)) logger.debug(task.compute_dag) # generate tuner tuner = auto_scheduler.TaskScheduler(tasks, task_weights) logging.info("Begin tuning...") measure_ctx = auto_scheduler.LocalRPCMeasureContext( repeat=repeat, min_repeat_ms=min_repeat_ms, timeout=timeout) tune_option = auto_scheduler.TuningOptions( num_measure_trials=num_measure_trials, runner=measure_ctx.runner, measure_callbacks=[auto_scheduler.RecordToFile(self.log_file)], ) tuner.tune(tune_option) # update self.lib with auto_scheduler.ApplyHistoryBest(self.log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): self._lib = relay.build(self.mod, target=self.target, params=self.params) logger.info(f"load optimized library from {self.log_file}")
def test_tuning_cuda(): auto_scheduler.enable_relay_integration() # Extract tasks mod, params = get_network("mlp") target = tvm.target.Target("cuda") tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target) objective = lambda costs: sum(c * w for c, w in zip(costs, task_weights)) with tempfile.NamedTemporaryFile() as fp: log_file = fp.name # Tuning measure_ctx = auto_scheduler.LocalRPCMeasureContext(timeout=100) tuner = auto_scheduler.TaskScheduler(tasks, objective) tune_option = auto_scheduler.TuningOptions( num_measure_trials=2, num_measures_per_round=1, runner=measure_ctx.runner, measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option, search_policy="sketch.random") del measure_ctx # Compile with the history best with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, params=params) # Todo(merrymercy): compile without any history to test the fallback mechanism auto_scheduler.enable_relay_integration(False)
def tune_network(network, target): # Extract tasks mod, params = get_network(network) target = tvm.target.Target(target) tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target) with tempfile.NamedTemporaryFile() as fp: log_file = fp.name # Tuning measure_ctx = auto_scheduler.LocalRPCMeasureContext(timeout=60) tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials=100, num_measures_per_round=2, early_stopping=1, runner=measure_ctx.runner, builder=auto_scheduler.LocalBuilder(timeout=60), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option, search_policy="sketch.random") del measure_ctx # Compile with the history best with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, params=params)
def tune_and_evaluate(): print("Begin tuning...") tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials=200, builder=auto_scheduler.LocalBuilder(build_func="ndk"), runner=auto_scheduler.RPCRunner( device_key, host=rpc_host, port=rpc_port, timeout=30, repeat=1, min_repeat_ms=200, enable_cpu_cache_flush=True, ), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option) # Compile with the history best print("Compile...") with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, params=params) # Export library tmp = tempdir() filename = "net.so" lib.export_library(tmp.relpath(filename), ndk.create_shared) # Upload module to device print("Upload...") remote = auto_scheduler.utils.request_remote(device_key, rpc_host, rpc_port, timeout=10000) remote.upload(tmp.relpath(filename)) rlib = remote.load_module(filename) # Create graph executor dev = remote.cpu() module = graph_executor.GraphModule(rlib["default"](dev)) for key, value in shape_dict.items(): data_tvm = tvm.nd.array( (np.random.uniform(size=value)).astype("float32")) module.set_input(key, data_tvm) # Evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", dev, repeat=3, min_repeat_ms=500) prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
def tune_network(network, target): # Extract tasks mod, params = get_network(network) target = tvm.target.Target(target) tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target) with tempfile.NamedTemporaryFile() as fp: log_file = fp.name # Tuning measure_ctx = auto_scheduler.LocalRPCMeasureContext(timeout=60) tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials=100, num_measures_per_round=2, early_stopping=1, runner=measure_ctx.runner, builder=auto_scheduler.LocalBuilder(timeout=60), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option, search_policy="sketch.random") del measure_ctx # Compile with the history best with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, params=params) # Compile without auto-scheduler and any other optimization for correctness check with tvm.transform.PassContext(opt_level=0): lib2 = relay.build(mod, target=target, params=params) # Check the correctness def get_output(data, lib): ctx = tvm.gpu() module = graph_runtime.GraphModule(lib["default"](ctx)) module.set_input("data", data) module.run() return module.get_output(0).asnumpy() np.random.seed(0) if network == "mlp": data = np.random.uniform(size=(1, 32)) elif network == "winograd-test": data = np.random.uniform(size=(1, 23, 40, 32)) else: raise ValueError("Unknown network: " + network) actual_output = get_output(data, lib) expected_output = get_output(data, lib2) tvm.testing.assert_allclose(actual_output, expected_output, rtol=1e-4, atol=1e-4)
def tune_and_evaluate(): print("Begin tuning...") tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials= 200, # change this to 20000 to achieve the best performance builder=auto_scheduler.LocalBuilder( build_func="ndk" if use_ndk else "default"), runner=auto_scheduler.RPCRunner(device_key, host="0.0.0.0", port=9190, repeat=3, timeout=50), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option) # Compile the whole network print("Compile...") with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, target_host=target_host, params=params) # Create graph runtime print("=============== Request Remote ===============") from tvm.auto_scheduler.utils import request_remote remote = request_remote(device_key, "0.0.0.0", 9190) ctx = remote.cl() from tvm.contrib import utils, ndk temp = utils.tempdir() filename = "deploy_lib.so" path_lib = temp.relpath(filename) lib.export_library(path_lib, ndk.create_shared) remote.upload(path_lib) loaded_lib = remote.load_module(filename) module = graph_runtime.GraphModule(loaded_lib["default"](ctx)) data = (np.random.uniform(size=input_shape)).astype(dtype) data_tvm = tvm.nd.array(data) module.set_input("data", data_tvm) # Evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", ctx, repeat=3, min_repeat_ms=500) prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
def benchmark(network, batch_size, dtype, target, log_file, repeat): layout = "NHWC" mod, params, input_name, input_shape, output_shape = get_network( network, batch_size, dtype, layout ) assert os.path.exists(log_file), "The log file '%s' does not exist." % log_file print("Use log file %s" % log_file) if network in ["bert"]: # Build module with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True} ): lib = relay.build(mod, target=target, params=params) ctx = tvm.context(str(target), 0) module = runtime.GraphModule(lib["default"](ctx)) # Feed input data seq_length = input_shape[0][1] data = np.random.uniform(size=input_shape[0]) token_types = np.random.uniform(size=input_shape[1]) valid_length = np.array([seq_length] * batch_size) module.set_input(data0=data, data1=token_types, data2=valid_length) else: # Build module with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True} ): lib = relay.build(mod, target=target, params=params) ctx = tvm.context(str(target), 0) module = runtime.GraphModule(lib["default"](ctx)) # Feed input data data = np.random.uniform(size=input_shape) module.set_input(input_name, data) # Evaluate ftimer = module.module.time_evaluator("run", ctx, min_repeat_ms=500, repeat=repeat) return np.array(ftimer().results)
def benchmark(network, target, log_file): mod, params, input_shape, output_shape = get_network(network) if network == "bert": with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, params=params) # upload parameters to device ctx = tvm.context(str(target), 0) data_tvm = tvm.nd.array((np.random.uniform(size=input_shape[0])).astype(dtype)) token_types_tvm = tvm.nd.array(np.random.uniform(size=input_shape[1]).astype(dtype)) valid_length_tvm = tvm.nd.array(np.random.uniform(size=input_shape[2]).astype(dtype)) module = runtime.GraphModule(lib["default"](ctx)) module.set_input(data0=data_tvm, data1=token_types_tvm, data2=valid_length_tvm) else: # convert to NHWC layout desired_layouts = {'nn.conv2d': ['NHWC', 'default']} seq = tvm.transform.Sequential([relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts)]) with tvm.transform.PassContext(opt_level=3): mod = seq(mod) with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, params=params) # upload parameters to device ctx = tvm.context(str(target), 0) data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype)) module = runtime.GraphModule(lib["default"](ctx)) module.set_input(args.inputname, data_tvm) # evaluate print("Evaluate...") ftimer = module.module.time_evaluator("run", ctx, number=1, repeat=args.repeat) prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond print( "%-20s %-19s (%s)" % (network, "%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)) )
def tune_and_check(mod, data, weight): # Extract tasks from a relay program target = tvm.target.Target("llvm") tasks, task_weights = auto_scheduler.extract_tasks( mod, target=target, params={"weight": weight}) with tempfile.NamedTemporaryFile() as fp: log_file = fp.name # Tune tasks tuner = auto_scheduler.TaskScheduler(tasks, task_weights, callbacks=[]) tune_option = auto_scheduler.TuningOptions( num_measure_trials=1, num_measures_per_round=1, builder=auto_scheduler.LocalBuilder(timeout=60), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option, search_policy="sketch.random") # Compile with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}, ): lib = relay.build(mod, target=target, params={"weight": weight}) # Compile without auto-scheduler for correctness check with tvm.transform.PassContext(opt_level=0): lib2 = relay.build(mod, target=target, params={"weight": weight}) def get_output(data, lib): dev = tvm.cpu() module = graph_executor.GraphModule(lib["default"](dev)) module.set_input("data", data) module.run() return module.get_output(0).numpy() # Check correctness actual_output = get_output(data, lib) expected_output = get_output(data, lib2) tvm.testing.assert_allclose(actual_output, expected_output, rtol=1e-4, atol=2e-4)
def compile_and_run(disabled_pass={}): with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}, disabled_pass=disabled_pass, ): lib = relay.build(mod, target=target, params={"weight": weight}) ctx = tvm.cpu() module = graph_runtime.GraphModule(lib["default"](ctx)) module.set_input("data", data) module.run() return module.get_output(0).asnumpy()
def lib(self): if getattr(self, '_lib', None) is None: if self.log_file is not None and os.path.exists(self.log_file): with auto_scheduler.ApplyHistoryBest(self.log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): self._lib = relay.build(self.mod, target=self.target, params=self.params) logger.info(f"load optimized library from {self.log_file}") else: with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): self._lib = relay.build(self.mod, target=self.target, params=self.params) logger.info("load unoptimzed library") return self._lib
def remote_auto_scheduler(self, device_key, rpc_host, rpc_port): # generate tasks tasks, task_weights = auto_scheduler.extract_tasks( self.mod["main"], self.params, self.target) for idx, task in enumerate(tasks): logger.debug("========== Task %d (workload key: %s) ==========" % (idx, task.workload_key)) logger.debug(task.compute_dag) # generate tuner tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials=200, builder=auto_scheduler.LocalBuilder(), runner=auto_scheduler.RPCRunner( device_key, host=rpc_host, port=rpc_port, timeout=30, repeat=1, min_repeat_ms=200, enable_cpu_cache_flush=True, ), measure_callbacks=[auto_scheduler.RecordToFile(self.log_file)], ) tuner.tune(tune_option) # update self.lib with auto_scheduler.ApplyHistoryBest(self.log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): self._lib = relay.build(self.mod, target=self.target, params=self.params) logger.info(f"load optimized library from {self.log_file}")
def tune_network(network, target): auto_scheduler.enable_relay_integration() # Extract tasks mod, params = get_network(network) target = tvm.target.Target(target) tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target) with tempfile.NamedTemporaryFile() as fp: log_file = fp.name # Tuning measure_ctx = auto_scheduler.LocalRPCMeasureContext(timeout=60) tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials=100, num_measures_per_round=2, early_stopping=1, runner=measure_ctx.runner, builder=auto_scheduler.LocalBuilder(timeout=60), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option, search_policy="sketch.random") del measure_ctx # Compile with the history best with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, params=params) # Todo(merrymercy): when the cpu backend is upstreamed, do the following things: # 1. compile without history to test the fallback mechanism # 2. check the correctness of layout rewrite / winograd pre-transform auto_scheduler.enable_relay_integration(False)
# # You can terminate the tuning earlier by forcibly killing this process. # As long as you get at least one valid schedule for each task in the log file, # you should be able to do the compilation (the secion below). # ################################################################# # Compile and Evaluate # -------------------- # After auto-tuning, we can compile the network with the best schedules we found. # All measurement records are dumped into the log file during auto-tuning, # so we can read the log file and load the best schedules. # Compile with the history best print("Compile...") with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, params=params) # Create graph runtime ctx = tvm.context(str(target), 0) module = graph_runtime.GraphModule(lib["default"](ctx)) data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype)) module.set_input("data", data_tvm) # Evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", ctx, repeat=3, min_repeat_ms=500) prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" %
def _tvm_compile(fx_module, example_inputs, target=None, tuning_logfile=None, use_ansor_tuning=False): import tvm from tvm import relay, auto_scheduler from tvm.contrib import graph_executor import os # Find the target and device for TVM. dev = tvm.cpu(0) if target is None: raise ValueError("Setup the TVM target correctly.") elif isinstance(target, str): if "cuda" in target: dev = tvm.cuda(0) target = tvm.target.Target(target) elif isinstance(target, tvm.target.target.Target): if "cuda" in target.keys: dev = tvm.cuda(0) # JIT the model and pass it to Torchscript to Relay frontend parser. TVM # tutorials suggest tracing instead of scripting. The main reason is to # avoid Pythonic computation to show up in JIT module. However, with Python # key tracing, AOT Autograd leads to simpler graphs. Therefore, we use # scripting here to retrieve the JIT module. jit_mod = torch.jit.script(fx_module) shape_list = [(f"inp_{idx}", i.shape) for idx, i in enumerate(example_inputs)] mod, params = relay.frontend.from_pytorch(jit_mod, shape_list) # TVM Autotuning if use_ansor_tuning: tasks, task_weights = auto_scheduler.extract_tasks( mod["main"], params, target) if tuning_logfile is None: log_file = f"{time.time()}.json" else: log_file = f"{tuning_logfile}.json" if len(tasks) != 0: tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials=20000, measure_callbacks=[auto_scheduler.RecordToFile(log_file)], # early_stopping=1000, # verbose=2, ) tuner.tune(tune_option) elif tuning_logfile is not None: log_file = f"{tuning_logfile}.json" if use_ansor_tuning or tuning_logfile is not None: assert os.path.exists(log_file) with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, params=params) else: with tvm.transform.PassContext(opt_level=3): lib = relay.build(mod, target=target, params=params) # Get a graph executor graph module m = graph_executor.GraphModule(lib["default"](dev)) def exec_tvm(*args): for idx, arg in enumerate(args, 0): if arg.dim() != 0: m.set_input( f"inp_{idx}", tvm.nd.from_dlpack( torch.utils.dlpack.to_dlpack(arg.contiguous())), ) m.run() outs = [ torch.utils.dlpack.from_dlpack(m.get_output(i).to_dlpack()) for i in range(m.get_num_outputs()) ] return outs return exec_tvm
def compile_model( mod, params, target, dump_code=None, target_host=None, tuning_records=None, alter_layout=None, disabled_pass=None, ): """Compile a model from a supported framework into a TVM module. This function takes a union of the arguments of both frontends.load_model and compiler.compile_relay. The resulting TVM module can be executed using the graph executor. Parameters ---------- mod: IRModule The relay module to be compiled. params: dict A dictionary containing the module's parameters. target : str The target for which to compile. Can be a plain string or a path. dump_code : list, optional Dump the generated code for the specified source types, on the requested target. target_host : str, optional The target of the host machine if host-side code needs to be generated. tuning_records: str, optional Path to the file produced by the tuning to be used during compilation. alter_layout: str, optional The layout to convert the graph to. Note, the convert layout pass doesn't currently guarantee the whole of the graph will be converted to the chosen layout. disabled_pass: str, optional Comma-separated list of passes which needs to be disabled during compilation Returns ------- graph : str A JSON-serialized TVM execution graph. lib : tvm.module.Module A TVM module containing the compiled functions. params : dict The parameters (weights) for the TVM module. dumps : dict Dictionary containing the dumps specified. """ dump_code = [x.strip() for x in dump_code.split(",")] if dump_code else None config = {} if alter_layout: mod = common.convert_graph_layout(mod, alter_layout) tvm_target, extra_targets = common.target_from_cli(target) target_host = tvm_target if not target_host else target_host tvm_target, target_host = Target.check_and_update_host_consist( tvm_target, target_host) for codegen_from_cli in extra_targets: codegen = composite_target.get_codegen_by_target( codegen_from_cli["name"]) partition_function = codegen["pass_pipeline"] mod = partition_function(mod, params, **codegen_from_cli["opts"]) if codegen["config_key"] is not None: config[codegen["config_key"]] = codegen_from_cli["opts"] if tuning_records and os.path.exists(tuning_records): logger.debug("tuning records file provided: %s", tuning_records) use_autoscheduler = True try: auto_scheduler.load_records(tuning_records) except tvm._ffi.base.TVMError: use_autoscheduler = False if use_autoscheduler: with auto_scheduler.ApplyHistoryBest(tuning_records): config["relay.backend.use_auto_scheduler"] = True with tvm.transform.PassContext(opt_level=3, config=config, disabled_pass=disabled_pass): logger.debug("building relay graph with autoscheduler") graph_module = relay.build(mod, target=target, params=params) else: with autotvm.apply_history_best(tuning_records): with tvm.transform.PassContext(opt_level=3, config=config, disabled_pass=disabled_pass): logger.debug("building relay graph with tuning records") graph_module = relay.build(mod, tvm_target, params=params) else: with tvm.transform.PassContext(opt_level=3, config=config, disabled_pass=disabled_pass): logger.debug("building relay graph (no tuning records provided)") graph_module = relay.build(mod, tvm_target, params=params) # Generate output dump files with sources dump_code = dump_code or [] dumps = {} for source_type in dump_code: lib = graph_module.get_lib() # TODO lib.get_source call have inconsistent behavior for unsupported # formats (@leandron). source = str(mod) if source_type == "relay" else lib.get_source( source_type) dumps[source_type] = source # TODO we need to update this return to use the updated graph module APIs # as these getter functions will be deprecated in the next release (@leandron) return graph_module.get_json(), graph_module.get_lib( ), graph_module.get_params(), dumps
def tune_and_evaluate(): print("Begin tuning...") tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tune_option = auto_scheduler.TuningOptions( num_measure_trials= 200, # change this to 20000 to achieve the best performance runner=auto_scheduler.RPCRunner( device_key, host="0.0.0.0", port=9191, timeout=30, repeat=1, min_repeat_ms=200, enable_cpu_cache_flush=True, ), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option) # Compile with the history best print("Compile...") with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): lib = relay.build(mod, target=target, params=params) # Export library tmp = tempdir() if use_ndk: from tvm.contrib import ndk filename = "net.so" lib.export_library(tmp.relpath(filename), ndk.create_shared) else: filename = "net.tar" lib.export_library(tmp.relpath(filename)) # Upload module to device print("Upload...") remote = auto_scheduler.utils.request_remote(device_key, "0.0.0.0", 9191, timeout=10000) remote.upload(tmp.relpath(filename)) rlib = remote.load_module(filename) # Create graph runtime dev = remote.cpu() module = graph_runtime.GraphModule(rlib["default"](dev)) data_tvm = tvm.nd.array( (np.random.uniform(size=input_shape)).astype(dtype)) module.set_input("data", data_tvm) # Evaluate print("Evaluate inference time cost...") ftimer = module.module.time_evaluator("run", dev, repeat=3, min_repeat_ms=500) prof_res = np.array(ftimer().results) * 1e3 # convert to millisecond print("Mean inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
def onnx_compile(model_string, model_path, target, target_host, opt_level, opset, freeze_params, input_shapes, nhwc=False, tuning_logfile="", tuning_type=AUTO_TVM_TYPE): model = onnx.load_model_from_string(bytes(model_string)) if model_path: base_dir = os.path.dirname(os.path.abspath(model_path)) onnx.load_external_data_for_model(model, base_dir) # Collect only feed input names from all input names all_input_names = [node.name for node in model.graph.input] all_initializer = [node.name for node in model.graph.initializer] net_feed_input_names = list(set(all_input_names) - set(all_initializer)) # Match names and input shapes all_input_mapping = [(name, shape) for (name, shape) in zip(all_input_names, input_shapes)] # Using an ordereddict maintains input ordering. shape_dict = collections.OrderedDict(all_input_mapping) # Get only feed input pairs feed_shape_dict = {} for name in net_feed_input_names: feed_shape_dict[name] = shape_dict[name] irmod, params = relay.frontend.from_onnx(model, feed_shape_dict, opset=opset, freeze_params=freeze_params) # TODO(vvchernov): replace prints by logger, but investigate ORT logging system for python before # Also see lines 91, 106 # print("Build TVM graph executor") # Tuning file can be set by client through ep options if tuning_logfile == "": tuning_logfile = os.getenv("AUTOTVM_TUNING_LOG") if tuning_type == ANSOR_TYPE: if tuning_logfile: desired_layouts = { "nn.conv2d": ["NHWC", "default"], "nn.conv2d_transpose": ["NHWC", "default"], "nn.upsampling": ["NHWC", "default"], "vision.roi_align": ["NHWC", "default"], } # print("Use tuning file from ", ANSOR_TYPE, ": ", tuning_logfile) with auto_scheduler.ApplyHistoryBest(tuning_logfile): with tvm.transform.PassContext(opt_level=opt_level, config={"relay.backend.use_auto_scheduler": True}): if nhwc: irmod = relay.transform.InferType()(irmod) model_nhwc = relay.transform.ConvertLayout(desired_layouts)(irmod) model_nhwc = tvm.relay.transform.EliminateCommonSubexpr()(model_nhwc) irmod = tvm.relay.transform.FoldConstant()(model_nhwc) lib = relay.build(irmod, target=target, target_host=target_host) else: with tvm.transform.PassContext(opt_level=opt_level): lib = relay.build(irmod, target=target, target_host=target_host, params=params) elif tuning_type == AUTO_TVM_TYPE: with relay.build_config(opt_level=opt_level): if tuning_logfile: # print("Use tuning file from ", AUTO_TVM_TYPE, ": ", tuning_logfile) with autotvm.apply_history_best(tuning_logfile): # XXX: do not pass parameters to relay.build otherwise they will be inline into the module lib = relay.build(irmod, target_host=target_host, target=target) else: lib = relay.build(irmod, target_host=target_host, target=target) else: # TODO(vvchernov): replace prints by logger, but investigate ORT logging system for python before # print is not commented out while it declares error print("ERROR: Tuning log type {} is unsupported. ".format(tuning_type), "Only {} and {} types are supported".format(ANSOR_TYPE, AUTO_TVM_TYPE)) return None ctx = tvm.device(target, 0) m = graph_executor.GraphModule(lib["default"](ctx)) return m.module
def main(): log_file = os.path.join(ARGS.work_dir, f"{ARGS.model_name}.json") runner = auto_scheduler.RPCRunner( key=ARGS.rpc_key, host=ARGS.rpc_host, port=ARGS.rpc_port, n_parallel=cpu_count(logical=True), number=ARGS.number, repeat=ARGS.repeat, min_repeat_ms=ARGS.min_repeat_ms, enable_cpu_cache_flush=ARGS.cpu_flush, timeout=ARGS.rpc_config.session_timeout_sec, ) if ARGS.target.kind.name == "llvm": hardware_params = auto_scheduler.HardwareParams( num_cores=int(ARGS.target.attrs["num-cores"]), target=ARGS.target, ) elif ARGS.target.kind.name == "cuda": hardware_params = auto_scheduler.HardwareParams( num_cores=-1, vector_unit_bytes=16, cache_line_bytes=64, max_shared_memory_per_block=int( ARGS.target.attrs["max_shared_memory_per_block"]), max_threads_per_block=int( ARGS.target.attrs["max_threads_per_block"]), # The value `max_local_memory_per_block` is not used in AutoScheduler, # but is required by the API. max_local_memory_per_block=12345678, max_vthread_extent=8, warp_size=32, ) else: raise NotImplementedError(f"Unsupported target {ARGS.target}") describe() print(f"Workload: {ARGS.model_name}") onnx_model = onnx.load(ARGS.onnx_path) shape_dict = {} for item in ARGS.input_shape: print(f" input_name : {item['name']}") print(f" input_shape: {item['shape']}") print(f" input_dtype: {item['dtype']}") shape_dict[item["name"]] = item["shape"] mod, params = from_onnx(onnx_model, shape_dict, freeze_params=True) input_data = { item["name"]: generate_input_data(item["shape"], item["dtype"]) for item in ARGS.input_shape } with ms.Profiler() as profiler: tasks, task_weights = auto_scheduler.extract_tasks( mod["main"], params, target=ARGS.target, hardware_params=hardware_params, ) for idx, (task, task_weight) in enumerate(zip(tasks, task_weights)): print(f"==== Task {idx}: {task.desc} " f"(weight {task_weight} key: {task.workload_key}) =====") print(task.compute_dag) if ARGS.num_trials > 0: tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tuner.tune( auto_scheduler.TuningOptions( num_measure_trials=ARGS.num_trials, runner=runner, measure_callbacks=[ auto_scheduler.RecordToFile(log_file), ], ), adaptive_training=ARGS.adaptive_training, ) relay_build = { "graph": relay.build, "vm": relay.vm.compile }[ARGS.backend] with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}, ): lib = relay_build( mod, target=ARGS.target, params=params, ) print("Tuning Time:") print(profiler.table()) run_module_via_rpc( rpc_config=ARGS.rpc_config, lib=lib, dev_type=ARGS.target.kind.name, args=input_data, continuation=create_timer(ARGS.backend), backend=ARGS.backend, )
def onnx_compile(model_string, model_path, executor, target, target_host, opt_level, opset, freeze_params, input_shapes, nhwc=False, tuning_logfile="", tuning_type=AUTO_TVM_TYPE): def get_tvm_executor(irmod, executor, target, params): if executor == "vm": log.info("Build TVM virtual machine") lib = vm.compile( copy.deepcopy(irmod), target, params=params, ) elif executor == "graph": log.info("Build TVM graph executor") lib = relay.build(irmod, target=target, params=params) else: log.error("Executor type {} is unsupported. ".format(executor) + "Only \"vm\" and \"graph\" types are supported") return None return lib model = onnx.load_model_from_string(bytes(model_string)) if model_path: base_dir = os.path.dirname(os.path.abspath(model_path)) onnx.load_external_data_for_model(model, base_dir) # Collect only feed input names from all input names all_input_names = [node.name for node in model.graph.input] all_initializer = [node.name for node in model.graph.initializer] net_feed_input_names = list(set(all_input_names) - set(all_initializer)) # Match names and input shapes all_input_mapping = [(name, shape) for (name, shape) in zip(all_input_names, input_shapes)] # Using an ordereddict maintains input ordering. shape_dict = collections.OrderedDict(all_input_mapping) # Get only feed input pairs feed_shape_dict = {} for name in net_feed_input_names: feed_shape_dict[name] = shape_dict[name] irmod, params = relay.frontend.from_onnx(model, feed_shape_dict, opset=opset, freeze_params=freeze_params) irmod = relay.transform.DynamicToStatic()(irmod) # Tuning file can be set by client through ep options if tuning_logfile == "": tuning_logfile = os.getenv("AUTOTVM_TUNING_LOG") lib = None tvm_target = tvm.target.Target(target, host=target_host) if tuning_logfile: if tuning_type == ANSOR_TYPE: desired_layouts = { "nn.conv2d": ["NHWC", "default"], "nn.conv2d_transpose": ["NHWC", "default"], "nn.upsampling": ["NHWC", "default"], "vision.roi_align": ["NHWC", "default"], } log.info("Use tuning file from ", ANSOR_TYPE, ": ", tuning_logfile) with auto_scheduler.ApplyHistoryBest(tuning_logfile): with tvm.transform.PassContext( opt_level=opt_level, config={ "relay.backend.use_auto_scheduler": True, "relay.FuseOps.max_depth": 30, } ): if nhwc: seq = tvm.transform.Sequential( [ relay.transform.InferType(), relay.transform.ConvertLayout(desired_layouts), relay.transform.EliminateCommonSubexpr(), relay.transform.FoldConstant(), ] ) irmod = seq(irmod) lib = get_tvm_executor(irmod, executor, tvm_target, params) elif tuning_type == AUTO_TVM_TYPE: with relay.build_config(opt_level=opt_level): log.info("Use tuning file from ", AUTO_TVM_TYPE, ": ", tuning_logfile) with autotvm.apply_history_best(tuning_logfile): lib = get_tvm_executor(irmod, executor, tvm_target, params) else: log.error("Tuning log type {} is unsupported. ".format(tuning_type) + "Only {} and {} types are supported".format(ANSOR_TYPE, AUTO_TVM_TYPE)) return None else: with tvm.transform.PassContext(opt_level=opt_level): lib = get_tvm_executor(irmod, executor, tvm_target, params) if lib is None: return None ctx = tvm.device(target, 0) if executor == "vm": m = tvm.runtime.vm.VirtualMachine(lib, ctx) elif executor == "graph": m = graph_executor.GraphModule(lib["default"](ctx)) else: print("ERROR: Executor type {} is unsupported. ".format(executor), "Only \"vm\" and \"graph\" types are supported") return None return m.module
def main(): log_file = os.path.join(ARGS.work_dir, f"{ARGS.model_name}.json") runner = auto_scheduler.RPCRunner( key=ARGS.rpc_key, host=ARGS.rpc_host, port=ARGS.rpc_port, n_parallel=cpu_count(logical=True), number=ARGS.number, repeat=ARGS.repeat, min_repeat_ms=ARGS.min_repeat_ms, enable_cpu_cache_flush=ARGS.cpu_flush, ) if ARGS.target.kind.name == "llvm": hardware_params = auto_scheduler.HardwareParams( num_cores=int(ARGS.target.attrs["num-cores"]), target=ARGS.target, ) elif ARGS.target.kind.name == "cuda": hardware_params = auto_scheduler.HardwareParams( num_cores=-1, vector_unit_bytes=16, cache_line_bytes=64, max_shared_memory_per_block=int( ARGS.target.attrs["max_shared_memory_per_block"]), max_threads_per_block=int( ARGS.target.attrs["max_threads_per_block"]), # The value `max_local_memory_per_block` is not used in AutoScheduler, # but is required by the API. max_local_memory_per_block=12345678, max_vthread_extent=8, warp_size=32, ) else: raise NotImplementedError(f"Unsupported target {ARGS.target}") describe() print(f"Workload: {ARGS.model_name}") onnx_model = onnx.load(ARGS.onnx_path) shape_dict = {} for item in ARGS.input_shape: print(f" input_name: {item['name']}") print(f" input_shape: {item['shape']}") print(f" input_dtype: {item['dtype']}") shape_dict[item["name"]] = item["shape"] mod, params = from_onnx(onnx_model, shape_dict, freeze_params=True) tasks, task_weights = auto_scheduler.extract_tasks( mod["main"], params, target=ARGS.target, hardware_params=hardware_params, ) for idx, (task, task_weight) in enumerate(zip(tasks, task_weights)): print( f"==== Task {idx}: {task.desc} (weight {task_weight} key: {task.workload_key}) =====" ) print(task.compute_dag) tuner = auto_scheduler.TaskScheduler(tasks, task_weights) tuner.tune( auto_scheduler.TuningOptions( num_measure_trials=ARGS.num_trials, runner=runner, measure_callbacks=[ auto_scheduler.RecordToFile(log_file), ], )) with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}, ): lib = relay.build( mod, target=ARGS.target, params=params, ) graph, rt_mod, params = lib.graph_json, lib.lib, lib.params input_data = {} for item in ARGS.input_shape: input_name, input_shape, input_dtype = item["name"], item[ "shape"], item["dtype"] if input_dtype.startswith("float"): input_data[input_name] = np.random.uniform( size=input_shape).astype(input_dtype) else: input_data[input_name] = np.random.randint(low=0, high=10000, size=input_shape, dtype=input_dtype) def f_timer(rt_mod, dev, input_data): # pylint: disable=import-outside-toplevel from tvm.contrib.graph_executor import GraphModule # pylint: enable=import-outside-toplevel mod = GraphModule(rt_mod["default"](dev)) for input_name, input_value in input_data.items(): mod.set_input(input_name, input_value) ftimer = mod.module.time_evaluator( "run", dev, min_repeat_ms=500, repeat=3, ) results = list(np.array(ftimer().results) * 1000.0) # type: ignore print("Running time in time_evaluator: ", results) run_module_via_rpc( rpc_config=ARGS.rpc_config, lib=lib, dev_type=ARGS.target.kind.name, args=input_data, continuation=f_timer, ) def f_per_layer(rt_mod, dev, input_data): # pylint: disable=import-outside-toplevel from tvm.contrib.debugger.debug_executor import create # pylint: enable=import-outside-toplevel mod = create(graph, rt_mod, dev) for input_name, input_value in input_data.items(): mod.set_input(input_name, input_value) graph_nodes = [n["name"] for n in json.loads(graph)["nodes"]] graph_time = mod.run_individual(number=10, repeat=1, min_repeat_ms=5000) print("|graph_nodes| = ", len(graph_nodes)) print("|graph_time| = ", len(graph_time)) graph_nodes_time = { k: float(v) for k, v in zip(graph_nodes, graph_time) } for k, v in graph_nodes_time.items(): print(f"{k} : {v:.3f}") run_module_via_rpc( rpc_config=ARGS.rpc_config, lib=rt_mod, dev_type=ARGS.target.kind.name, args=input_data, continuation=f_per_layer, )
def compile_model( tvmc_model: TVMCModel, target: str, opt_level: int = 3, executor: Optional[Executor] = Executor("graph"), runtime: Optional[Runtime] = Runtime("cpp"), tuning_records: Optional[str] = None, package_path: Optional[str] = None, cross: Optional[Union[str, Callable]] = None, cross_options: Optional[str] = None, output_format: str = "so", dump_code: Optional[List[str]] = None, target_host: Optional[str] = None, desired_layout: Optional[str] = None, disabled_pass: Optional[str] = None, pass_context_configs: Optional[List[str]] = None, additional_target_options: Optional[Dict[str, Dict[str, Any]]] = None, ): """Compile a model from a supported framework into a TVM module. This function takes a union of the arguments of both frontends.load_model and compiler.compile_relay. The resulting TVM module can be executed using the graph executor. Parameters ---------- tvmc_model : TVMCModel The model object that should be compiled. target : str The target for which to compile. Can be a plain string or a path. opt_level : int The option that controls various sorts of optimizations. tuning_records : str A path to tuning records produced using tvmc.tune. When provided, compilation will use more optimized kernels leading to better results. package_path : str, optional The path to export the compiled model to. If not provided it will be saved in a temporary directory. cross : str or callable object, optional Function that performs the actual compilation cross_options : str, optional Command line options to be passed to the cross compiler. output_format : str What format to use when saving the function library. Must be one of "so" or "tar". When compiling for a remote device without a cross compiler, "tar" will likely work better. dump_code : list, optional Dump the generated code for the specified source types, on the requested target. target_host : str, optional The target of the host machine if host-side code needs to be generated. desired_layout: str, optional The layout to convert the graph to. Note, the convert layout pass doesn't currently guarantee the whole of the graph will be converted to the chosen layout. disabled_pass: str, optional Comma-separated list of passes which needs to be disabled during compilation pass_context_configs: list[str], optional List of strings containing a set of configurations to be passed to the PassContext. additional_target_options: Optional[Dict[str, Dict[str, Any]]] Additional target options in a dictionary to combine with initial Target arguments Returns ------- compiled_model : TVMCPackage The compiled TVMCModel ready to be run. """ mod, params = tvmc_model.mod, tvmc_model.params config = parse_configs(pass_context_configs) if desired_layout: mod = convert_graph_layout(mod, desired_layout) tvm_target, extra_targets = target_from_cli(target, additional_target_options) tvm_target, target_host = Target.check_and_update_host_consist(tvm_target, target_host) for codegen_from_cli in extra_targets: codegen = composite_target.get_codegen_by_target(codegen_from_cli["name"]) partition_function = codegen["pass_pipeline"] if codegen["config_key"] is not None: config[codegen["config_key"]] = codegen_from_cli["opts"] with tvm.transform.PassContext(config=config): mod = partition_function(mod, params, **codegen_from_cli["opts"]) if tuning_records and os.path.exists(tuning_records): logger.debug("tuning records file provided: %s", tuning_records) use_autoscheduler = True try: auto_scheduler.load_records(tuning_records) except tvm._ffi.base.TVMError: use_autoscheduler = False if use_autoscheduler: with auto_scheduler.ApplyHistoryBest(tuning_records): config["relay.backend.use_auto_scheduler"] = True with tvm.transform.PassContext( opt_level=opt_level, config=config, disabled_pass=disabled_pass ): logger.debug("building relay graph with autoscheduler") graph_module = relay.build( mod, target=tvm_target, executor=executor, runtime=runtime, params=params ) else: with autotvm.apply_history_best(tuning_records): with tvm.transform.PassContext( opt_level=opt_level, config=config, disabled_pass=disabled_pass ): logger.debug("building relay graph with tuning records") graph_module = relay.build( mod, target=tvm_target, executor=executor, runtime=runtime, params=params ) else: with tvm.transform.PassContext( opt_level=opt_level, config=config, disabled_pass=disabled_pass ): logger.debug("building relay graph (no tuning records provided)") graph_module = relay.build( mod, target=tvm_target, executor=executor, runtime=runtime, params=params ) # Generate output dump files with sources if dump_code is None: dump_code = [] if not isinstance(dump_code, list): dump_code = [dump_code] dumps = {} for source_type in dump_code: lib = graph_module.get_lib() # TODO lib.get_source call have inconsistent behavior for unsupported # formats (@leandron). source = str(mod) if source_type == "relay" else lib.get_source(source_type) dumps[source_type] = source # Create a new tvmc model package object from the graph definition. package_path = tvmc_model.export_package( graph_module, package_path, cross, cross_options, output_format, ) # Write dumps to file. if dumps: save_dumps(package_path, dumps) return TVMCPackage(package_path)
def compile_model( path, target, dump_code=None, target_host=None, model_format=None, tuning_records=None, alter_layout=None, shape_dict=None, ): """Compile a model from a supported framework into a TVM module. This function takes a union of the arguments of both frontends.load_model and compiler.compile_relay. The resulting TVM module can be executed using the graph runtime. Parameters ---------- path: str Path to a file target : str The target for which to compile. Can be a plain string or a path. dump_code : list, optional Dump the generated code for the specified source types, on the requested target. target_host : str, optional The target of the host machine if host-side code needs to be generated. model_format: str, optional A string representing a name of a frontend to be used tuning_records: str, optional Path to the file produced by the tuning to be used during compilation. alter_layout: str, optional The layout to convert the graph to. Note, the convert layout pass doesn't currently guarantee the whole of the graph will be converted to the chosen layout. shape_dict: dict, optional A mapping from input names to their shape. When present, the default shapes in the model will be overwritten. Returns ------- graph : str A JSON-serialized TVM execution graph. lib : tvm.module.Module A TVM module containing the compiled functions. params : dict The parameters (weights) for the TVM module. dumps : dict Dictionary containing the dumps specified. """ dump_code = [x.strip() for x in dump_code.split(",")] if dump_code else None mod, params = frontends.load_model(path, model_format, shape_dict) if alter_layout: mod = common.convert_graph_layout(mod, alter_layout) tvm_target = common.target_from_cli(target) target_host = tvm_target if not target_host else target_host if tuning_records and os.path.exists(tuning_records): logger.debug("tuning records file provided: %s", tuning_records) use_autoscheduler = True try: auto_scheduler.load_records(tuning_records) except tvm._ffi.base.TVMError: use_autoscheduler = False if use_autoscheduler: with auto_scheduler.ApplyHistoryBest(tuning_records): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True}): logger.debug("building relay graph with autoscheduler") graph_module = relay.build(mod, target=target, params=params, target_host=target_host) else: with autotvm.apply_history_best(tuning_records): with tvm.transform.PassContext(opt_level=3): logger.debug("building relay graph with tuning records") graph_module = relay.build(mod, tvm_target, params=params, target_host=target_host) else: with tvm.transform.PassContext(opt_level=3): logger.debug("building relay graph (no tuning records provided)") graph_module = relay.build(mod, tvm_target, params=params, target_host=target_host) # Generate output dump files with sources dump_code = dump_code or [] dumps = {} for source_type in dump_code: lib = graph_module.get_lib() # TODO lib.get_source call have inconsistent behavior for unsupported # formats (@leandron). source = str(mod) if source_type == "relay" else lib.get_source( source_type) dumps[source_type] = source # TODO we need to update this return to use the updated graph module APIs # as these getter functions will be deprecated in the next release (@leandron) return graph_module.get_json(), graph_module.get_lib( ), graph_module.get_params(), dumps
def tune_network(network, target): # Extract tasks mod, params = get_network(network) target = tvm.target.Target(target) tasks, task_weights = auto_scheduler.extract_tasks(mod["main"], params, target) with tempfile.NamedTemporaryFile() as fp: log_file = fp.name # Tuning measure_ctx = auto_scheduler.LocalRPCMeasureContext(timeout=60, device=0) tuner = auto_scheduler.TaskScheduler(tasks, task_weights, callbacks=[]) tune_option = auto_scheduler.TuningOptions( num_measure_trials=100, num_measures_per_round=2, early_stopping=1, runner=measure_ctx.runner, builder=auto_scheduler.LocalBuilder(timeout=60), measure_callbacks=[auto_scheduler.RecordToFile(log_file)], ) tuner.tune(tune_option, search_policy="sketch.random") del measure_ctx # Compile with the history best with auto_scheduler.ApplyHistoryBest(log_file): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True} ): lib = relay.build(mod, target=target, params=params) # Also test that multiple log files can be loaded. with auto_scheduler.ApplyHistoryBest([log_file, log_file]) as best: assert isinstance( best, auto_scheduler.dispatcher.ApplyHistoryBest ), "Unable to load multiple log files jointly." # Confirm iterables can be directly loaded. loaded_recs = auto_scheduler.dispatcher.load_records(log_file) with auto_scheduler.ApplyHistoryBest(iter(loaded_recs)) as best: assert isinstance( best, auto_scheduler.dispatcher.ApplyHistoryBest ), "Unable to ingest logs from an interator." # Sample a schedule when missing with auto_scheduler.ApplyHistoryBestOrSample(None, num_measure=2): with tvm.transform.PassContext( opt_level=3, config={"relay.backend.use_auto_scheduler": True} ): lib2 = relay.build(mod, target=target, params=params) # Compile without auto-scheduler and any other optimization for correctness check with tvm.transform.PassContext(opt_level=0): ref_lib = relay.build(mod, target=target, params=params) # Check the correctness def get_output(data, lib): dev = tvm.cuda() module = graph_executor.GraphModule(lib["default"](dev)) module.set_input("data", data) module.run() return module.get_output(0).numpy() np.random.seed(0) if network == "mlp": data = np.random.uniform(size=(1, 32)) elif network == "winograd-test": data = np.random.uniform(size=(1, 23, 40, 32)) else: raise ValueError("Unknown network: " + network) actual_output1 = get_output(data, lib) actual_output2 = get_output(data, lib2) expected_output = get_output(data, ref_lib) tvm.testing.assert_allclose(actual_output1, expected_output, rtol=1e-4, atol=1e-4) tvm.testing.assert_allclose(actual_output2, expected_output, rtol=1e-4, atol=1e-4)