def partition( self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None ) -> tvm.IRModule: """Partition the relay graph in parts supported and unsupported by the target hardware accelerator. Parameters ---------- mod : tvm.IRModule The relay module to be partitioned. params: Optional[Dict[str, tvm.runtime.NDArray]] Returns ------- out : tvm.IRModule The partitioned relay module. """ if params: mod["main"] = bind_params_by_name(mod["main"], params) pass_sequence = [] pass_sequence.extend( [p[1] for p in self._relay_passes if p[0] == PassPhase.PRE_PARTITIONING] ) pass_sequence.append(relay.transform.MergeComposite(self._pattern_table())) pass_sequence.append(relay.transform.AnnotateTarget(self.target_name)) if self.merge_compiler_regions: pass_sequence.append(relay.transform.MergeCompilerRegions()) pass_sequence.append(relay.transform.PartitionGraph()) pass_sequence.extend( [p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_0] ) sequential_passes = tvm.transform.Sequential(pass_sequence) mod = sequential_passes(mod) # Defunctionalize the partitioned functions to allow lowering for gvar, func in mod.functions.items(): mod.update_func(gvar, relay.transform.Defunctionalization(func, mod)) post_partition_passes_1 = tvm.transform.Sequential( [p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_1] ) mod = post_partition_passes_1(mod) return mod
def get_multiple_input_relay_mod(tensor_type, shape, data_name0, data_name1): x, y = [ relay.var(c, shape=shape, dtype=tensor_type) for c in [data_name0, data_name1] ] f = relay.Function([x, y], x + y) return IRModule.from_expr(f)
def __init__( self, relay_mod: tvm.IRModule, relay_param: Dict[str, tvm.runtime.NDArray] = None, plotter: Plotter = None, parser: VizParser = None, ): self._plotter = plotter if plotter is not None else TermPlotter() self._relay_param = relay_param if relay_param is not None else {} self._parser = parser if parser is not None else TermVizParser() global_vars = relay_mod.get_global_vars() graph_names = [] # If we have main function, put it to the first. # Then main function can be shown on the top. for gv_node in global_vars: if gv_node.name_hint == "main": graph_names.insert(0, gv_node.name_hint) else: graph_names.append(gv_node.name_hint) node_to_id = {} # callback to generate an unique string-ID for nodes. def traverse_expr(node): if node in node_to_id: return node_to_id[node] = str(len(node_to_id)) for name in graph_names: node_to_id.clear() relay.analysis.post_order_visit(relay_mod[name], traverse_expr) graph = self._plotter.create_graph(name) self._add_nodes(graph, node_to_id)
def test_multi_targets(): # Build an IRModule. n = 10 x = relay.var("x", shape=(n,)) y = relay.var("y", shape=(n,)) z = relay.var("z", shape=(n,)) f = relay.Function([x, y, z], x + relay.op.annotation.on_device(y + z, tvm.cpu())) mod = IRModule.from_expr(f) # Compile to VMExecutable. with tvm.transform.PassContext( opt_level=3, config={"relay.fallback_device_type": tvm.cuda().device_type} ): exe = relay.vm.compile( mod, target={"cpu": tvm.target.Target("llvm"), "cuda": tvm.target.Target("cuda")} ) # Run vm = runtime.vm.VirtualMachine(exe, [tvm.cuda(), tvm.cpu()]) x_data = np.random.rand( n, ).astype("float32") y_data = np.random.rand( n, ).astype("float32") z_data = np.random.rand( n, ).astype("float32") actual_result = vm.invoke("main", x_data, y_data, z_data) # Test expected_result = x_data + y_data + z_data tvm.testing.assert_allclose(actual_result.numpy(), expected_result)
def __init__( self, mod: Optional[IRModule] = None, *, target: Optional[Target] = None, space_generator: Optional["SpaceGenerator"] = None, search_strategy: Optional["SearchStrategy"] = None, sch_rules: Optional[List["ScheduleRule"]] = None, postprocs: Optional[List["Postproc"]] = None, mutator_probs: Optional[Dict["Mutator", float]] = None, task_name: str = "main", rand_state: int = -1, num_threads: Optional[int] = None, ): if isinstance(mod, PrimFunc): mod = IRModule.from_expr(mod) if num_threads is None: num_threads = cpu_count() self.__init_handle_by_constructor__( _ffi_api.TuneContext, # type: ignore # pylint: disable=no-member mod, target, space_generator, search_strategy, sch_rules, postprocs, mutator_probs, task_name, rand_state, num_threads, )
def prune_tensorrt_subgraphs(mod: tvm.IRModule) -> tvm.IRModule: """ Un-partition those partitions which: - have no multiply-accumulates (if remove_no_mac_subgraphs is True) - can't actually be supported by TensorRT now that we see the whole partition.""" global_vars_to_inline = [ gv for gv in mod.get_global_vars() if mod[gv].attrs and mod[gv].attrs["Compiler"] == "tensorrt" and not is_valid_subgraph(mod[gv].params, mod[gv].body) ] return relay.transform.InlineCompilerFunctionsBoundTo( global_vars_to_inline)(mod)
def test_let_bound_constants(): """This tests for an ICHECK failure for ill-formed IR with let-bound constants""" x = relay.var("x", shape=(3,), dtype="int32") y = relay.take(x, relay.const(0)) z = relay.const(1) f = relay.Function([x], relay.stack((z, y), axis=0)) mod = IRModule.from_expr(f) compiler = VMCompiler() compiler.optimize(mod, target="llvm")
def test_get_input_index(target, dev): # Build a IRModule. data_0, data_1 = ["d1", "d2"] x, y = [relay.var(c, shape=(10,)) for c in [data_0, data_1]] f = relay.Function([x, y], x + y) mod = IRModule.from_expr(f) # Compile to VMExecutable. vm_exec = vm.compile(mod, target=target) vm_factory = runtime.vm.VirtualMachine(vm_exec, dev) assert vm_factory.get_input_index(data_1) == 1 assert vm_factory.get_input_index(data_0) == 0 assert vm_factory.get_input_index("invalid") == -1
def prune_tensorrt_subgraphs(mod: tvm.IRModule) -> tvm.IRModule: """ Removes invalid subgraphs and those with no multiply-accumulates (if remove_no_max_subgraphs is set). """ class SubgraphRemover(ExprMutator): """ Reverts subgraphs in subgraphs_to_remove back to TVM instead of using an external codegen. """ def __init__( self, subgraphs_to_remove: List[str], mod: tvm.IRModule, new_mod: tvm.IRModule ) -> None: ExprMutator.__init__(self) self.subgraphs_to_remove = subgraphs_to_remove self.mod = mod self.new_mod = new_mod def visit_call(self, call: relay.expr.Call) -> relay.expr.Expr: if isinstance(call.op, GlobalVar): name = call.op.name_hint if name in self.subgraphs_to_remove: # "Inline" the subgraph back into new main function. func = self.mod[name] var_map = {} for arg, param in zip(call.args, func.params): var_map[param] = super().visit(arg) new_body = relay.bind(func.body, var_map) return new_body if name != "main": args = [] for arg in call.args: args.append(super().visit(arg)) return call.op(*args) return super().visit_call(call) subgraphs_to_remove: List[str] = [] # Remove invalid subgraphs for subgraph in mod.get_global_vars(): name = subgraph.name_hint if not mod[name].attrs or mod[name].attrs["Compiler"] != "tensorrt": continue if not is_valid_subgraph(mod[name].params, mod[name].body): subgraphs_to_remove.append(name) # Create new pruned module new_mod = tvm.IRModule(mod.functions, mod.type_definitions) new_mod["main"] = SubgraphRemover(subgraphs_to_remove, mod, new_mod).visit(mod["main"]) new_mod = transform.RemoveUnusedFunctions()(new_mod) return new_mod
def create_module(functions=None): """Construct a module from list of functions. Parameters ----------- functions: Optional[dict]. Map of GlobalVar or str to PrimFunc Returns ------- mod : IRModule An IRModule containing the passed definitions """ return IRModule(functions=functions)
def test_get_output_multiple(target, dev): # Build a IRModule. x = relay.var("x", shape=(10,)) f = relay.Function([x], relay.Tuple([x + x, x])) mod = IRModule.from_expr(f) # Compile to VMExecutable. vm_exec = vm.compile(mod, target=target) vm_factory = runtime.vm.VirtualMachine(vm_exec, dev) inp = np.ones(10, dtype="float32") vm_factory.invoke_stateful("main", inp) outputs = vm_factory.get_outputs() assert len(outputs) == 2 np.testing.assert_allclose(outputs[0].numpy(), inp + inp) np.testing.assert_allclose(outputs[1].numpy(), inp)
def test_vm_rpc(): """ This test checks to make sure you can export a VMExecutable, upload it to a remote machine using RPC and then execute it on the other machine. """ target = tvm.target.Target("llvm --host=llvm") # Build a IRModule. x = relay.var("x", shape=(10, 1)) f = relay.Function([x], x + x) mod = IRModule.from_expr(f) # Compile to VMExecutable. vm_exec = vm.compile(mod, target=target) # Export to Disk temp = utils.tempdir() path = temp.relpath("vm_library.so") vm_exec.mod.export_library(path) # Use local rpc server for testing. # Server must use popen so it doesn't inherit the current process state. It # will crash otherwise. server = rpc.Server("localhost", port=9120, use_popen=True) time.sleep(2) remote = rpc.connect(server.host, server.port, session_timeout=10) # Upload the serialized Executable. remote.upload(path) # Get a handle to remote Executable. rexec = remote.load_module("vm_library.so") ctx = remote.cpu() # Build a VM out of the executable and context. vm_factory = runtime.vm.VirtualMachine(rexec, ctx) np_input = np.random.uniform(size=(10, 1)).astype("float32") input_tensor = tvm.nd.array(np_input, ctx) # Invoke its "main" function. out = vm_factory.invoke("main", input_tensor) # Check the result. np.testing.assert_allclose(out.asnumpy(), np_input + np_input) # delete tensors before the server shuts down so we don't throw errors. del input_tensor del out server.terminate()
def test_get_output_single(): target = tvm.target.Target("llvm") # Build a IRModule. x = relay.var("x", shape=(10, )) f = relay.Function([x], x + x) mod = IRModule.from_expr(f) # Compile to VMExecutable. vm_exec = vm.compile(mod, target=target) vm_factory = runtime.vm.VirtualMachine(vm_exec, tvm.cpu()) inp = np.ones(10, dtype="float32") vm_factory.invoke_stateful("main", inp) outputs = vm_factory.get_outputs() assert len(outputs) == 1 np.testing.assert_allclose(outputs[0].numpy(), inp + inp)
def test_error_c_interface(): """Checks that an error occurs when using the packed API in combination with C interface""" two = relay.add(relay.const(1), relay.const(1)) func = relay.Function([], two) with pytest.raises( tvm.TVMError, match=re.escape( 'Need unpacked-api == false (got: 0) and interface-api == "packed" (got: c) when ' "targeting c++ runtime"), ): tvm.relay.build( IRModule.from_expr(func), target="llvm", executor=backend.Executor("aot", {"interface-api": "c"}), )
def test_tir_external_generation(check_result): shape = (8,) x_data = np.random.randint(255, size=shape).astype("float32") y_data = np.random.randint(255, size=shape).astype("float32") inputs = {"x": x_data, "y": y_data} x0 = relay.var("x0", shape=shape, dtype="float32") y0 = relay.var("y0", shape=shape, dtype="float32") z = x0 + y0 f = relay.Function([x0, y0], z) f = set_external_func_attr(f, "example_target_hook", "replace_add_with_subtract") x = relay.var("x", shape=(8,), dtype="float32") y = relay.var("y", shape=(8,), dtype="float32") call = relay.Call(f, [x, y]) func = IRModule.from_expr(call) check_result(func, inputs, (8,), x_data - y_data)
def test_type_args(): x = relay.var("x", shape=(10, 10)) y = relay.var("y", shape=(1, 10)) z = relay.add(x, y) # InferTypeLocal does not support populating the type_args field mod = infer_mod(IRModule.from_expr(z)) mod = infer_mod(mod, annotate_spans=False) ty_args = mod["main"].body.type_args assert len(ty_args) == 2 assert ty_args[0].dtype == "float32" assert ty_args[1].dtype == "float32" sh1 = ty_args[0].shape sh2 = ty_args[1].shape assert sh1[0].value == 10 assert sh1[1].value == 10 assert sh2[0].value == 1 assert sh2[1].value == 10
def ir_module(input_module: type) -> IRModule: """Decorate a python class as tvm IRModule. Parameters ---------- input_module : type The python class to be parsed. Returns ------- output : IRModule The result IRModule. """ if inspect.isclass(input_module): func_dict = { name: f for name, f in input_module.__dict__.items() if isinstance(f, BaseFunc) } return IRModule(func_dict) raise TypeError("Only class definitions are supported.")
def test_error_c_interface(): interface_api = "c" use_unpacked_api = False test_runner = AOT_DEFAULT_RUNNER two = relay.add(relay.const(1), relay.const(1)) func = relay.Function([], two) with pytest.raises( tvm.TVMError, match=re.escape( 'Need unpacked-api == false (got: 0) and interface-api == "packed" (got: c) when ' "targeting c++ runtime"), ): tvm.relay.build( IRModule.from_expr(func), target="llvm", executor=backend.Executor("aot", {"interface-api": "c"}), )
def test_runtime_module_generation(check_result): shape = (8,) x_data = np.random.randint(255, size=shape).astype("float32") y_data = np.random.randint(255, size=shape).astype("float32") inputs = {"x": x_data, "y": y_data} x0 = relay.var("x0", shape=shape, dtype="float32") y0 = relay.var("y0", shape=shape, dtype="float32") z = x0 + y0 func = relay.Function([x0, y0], z) func = set_external_func_attr(func, "example_target_hook", "replace_add_with_subtract") # Test hook to trigger TIRToRuntime code generation func = func.with_attr("tir_to_runtime", True) x = relay.var("x", shape=(8,), dtype="float32") y = relay.var("y", shape=(8,), dtype="float32") call = relay.Call(func, [x, y]) func = IRModule.from_expr(call) check_result(func, inputs, (8,), x_data * y_data)
def transform_Class(self, node): """Class definition visitor. A class can have multiple function definitions and a single :code:`__tvm_meta__` statement. Each class corresponds to a single :code:`IRModule`. Example ------- .. code-block:: python @tvm.script.ir_module class MyClass: __tvm_meta__ = {} def A(): T.evaluate(0) """ if len(node.assignments) == 1: if not ( len(node.assignments[0].lhs) == 1 and isinstance(node.assignments[0].lhs[0], ast.Var) and node.assignments[0].lhs[0].id.name == "__tvm_meta__" ): self.report_error( "The only top level assignments allowed are `__tvm_meta__ = ...`", node.assignments[0].span, ) self.init_meta( MetaUnparser().do_transform(node.assignments[0].rhs, self._diagnostic_context) ) elif len(node.assignments) > 1: self.report_error( "Only a single top level `__tvm_meta__` is allowed", ast.Span.union([x.span for x in node.assignments[1:]]), ) return IRModule( {GlobalVar(name): self.transform(func) for name, func in node.funcs.items()} )
def test_vm_rpc(): """ This test checks to make sure you can export a VMExecutable, upload it to a remote machine using RPC and then execute it on the other machine. """ target = tvm.target.Target("llvm --host=llvm") # Build a IRModule. x = relay.var("x", shape=(10, 1)) f = relay.Function([x], x + x) mod = IRModule.from_expr(f) # Compile to VMExecutable. vm_exec = vm.compile(mod, target=target) # Export to Disk temp = utils.tempdir() path = temp.relpath("vm_library.so") vm_exec.mod.export_library(path) # Use LocalRPC for testing. remote = rpc.LocalSession() # Upload the serialized Executable. remote.upload(path) # Get a handle to remote Executable. rexec = remote.load_module("vm_library.so") ctx = remote.cpu() # Build a VM out of the executable and context. vm_factory = runtime.vm.VirtualMachine(rexec, ctx) np_input = np.random.uniform(size=(10, 1)).astype("float32") input_tensor = tvm.nd.array(np_input, ctx) # Invoke its "main" function. out = vm_factory.invoke("main", [input_tensor]) # Check the result. np.testing.assert_allclose(out.asnumpy(), np_input + np_input)
def infer_expr(expr, annotate_spans=True): mod = IRModule.from_expr(expr) mod = infer_mod(mod, annotate_spans) mod = transform.InferType()(mod) entry = mod["main"] return entry if isinstance(expr, relay.Function) else entry.body
def get_one_input_relay_mod(tensor_type, shape, data_name): x = relay.var(data_name, shape=shape, dtype=tensor_type) y = relay.exp(x) f = relay.Function([x], y) return IRModule.from_expr(f)
def test_meta_schedule_dynamic_loop_extent(): a = relay.var("a", shape=(1, 8, 8, 512), dtype="float32") b = relay.nn.adaptive_avg_pool2d(a, (7, 7), "NHWC") mod = IRModule({"main": relay.Function([a], b)}) extracted_tasks = ms.extract_task_from_relay(mod, target="llvm", params={}) assert not extracted_tasks
def __init__( self, mod: Optional[IRModule] = None, *, target: Optional[Target] = None, space_generator: Union[None, "SCH_FN_TYPE", "ScheduleFn", "SpaceGenerator"] = None, search_strategy: Union[None, "SearchStrategy", "TuneConfig"] = None, sch_rules: Union[None, str, List["ScheduleRule"]] = None, postprocs: Union[None, str, List["Postproc"]] = None, mutator_probs: Union[None, str, Dict["Mutator", float]] = None, task_name: str = "main", logger: Optional[logging.Logger] = None, rand_state: int = -1, num_threads: Optional[int] = None, ): # pylint: disable=import-outside-toplevel from . import default_config from .space_generator import ScheduleFn from .tune import TuneConfig # pylint: enable=import-outside-toplevel if isinstance(mod, PrimFunc): mod = IRModule.from_expr(mod) if callable(space_generator): space_generator = ScheduleFn(space_generator) if isinstance(search_strategy, TuneConfig): search_strategy = search_strategy.create_strategy() if isinstance(sch_rules, str): if sch_rules == "default": if target is None: raise ValueError( "target is required when sch_rules is 'default'") sch_rules = default_config.schedule_rules(None, target) else: raise ValueError( "sch_rules should be a list of ScheduleRule or 'default'") if isinstance(postprocs, str): if postprocs == "default": if target is None: raise ValueError( "target is required when postprocs is 'default'") postprocs = default_config.postproc(None, target) else: raise ValueError( "postprocs should be a list of Postproc or 'default'") if isinstance(mutator_probs, str): if mutator_probs == "default": if target is None: raise ValueError( "target is required when mutator_probs is 'default'") mutator_probs = default_config.mutator_probs(None, target) if logger is None: self.logger = logging.getLogger(__name__) else: self.logger = None if num_threads is None: num_threads = cpu_count() self.__init_handle_by_constructor__( _ffi_api.TuneContext, # type: ignore # pylint: disable=no-member mod, target, space_generator, search_strategy, sch_rules, postprocs, mutator_probs, task_name, make_logging_func(logger), rand_state, num_threads, ) _ffi_api.TuneContextInitialize(self) # type: ignore # pylint: disable=no-member