Esempio n. 1
0
    def partition(
        self, mod: tvm.IRModule, params: Optional[Dict[str, tvm.runtime.NDArray]] = None
    ) -> tvm.IRModule:
        """Partition the relay graph in parts supported and unsupported by the
        target hardware accelerator.

        Parameters
        ----------
        mod : tvm.IRModule
            The relay module to be partitioned.

        params: Optional[Dict[str, tvm.runtime.NDArray]]

        Returns
        -------
        out : tvm.IRModule
            The partitioned relay module.

        """
        if params:
            mod["main"] = bind_params_by_name(mod["main"], params)

        pass_sequence = []
        pass_sequence.extend(
            [p[1] for p in self._relay_passes if p[0] == PassPhase.PRE_PARTITIONING]
        )
        pass_sequence.append(relay.transform.MergeComposite(self._pattern_table()))
        pass_sequence.append(relay.transform.AnnotateTarget(self.target_name))
        if self.merge_compiler_regions:
            pass_sequence.append(relay.transform.MergeCompilerRegions())
        pass_sequence.append(relay.transform.PartitionGraph())
        pass_sequence.extend(
            [p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_0]
        )

        sequential_passes = tvm.transform.Sequential(pass_sequence)
        mod = sequential_passes(mod)

        # Defunctionalize the partitioned functions to allow lowering
        for gvar, func in mod.functions.items():
            mod.update_func(gvar, relay.transform.Defunctionalization(func, mod))

        post_partition_passes_1 = tvm.transform.Sequential(
            [p[1] for p in self._relay_passes if p[0] == PassPhase.POST_PARTITIONING_1]
        )
        mod = post_partition_passes_1(mod)

        return mod
Esempio n. 2
0
def get_multiple_input_relay_mod(tensor_type, shape, data_name0, data_name1):
    x, y = [
        relay.var(c, shape=shape, dtype=tensor_type)
        for c in [data_name0, data_name1]
    ]
    f = relay.Function([x, y], x + y)
    return IRModule.from_expr(f)
Esempio n. 3
0
    def __init__(
        self,
        relay_mod: tvm.IRModule,
        relay_param: Dict[str, tvm.runtime.NDArray] = None,
        plotter: Plotter = None,
        parser: VizParser = None,
    ):
        self._plotter = plotter if plotter is not None else TermPlotter()
        self._relay_param = relay_param if relay_param is not None else {}
        self._parser = parser if parser is not None else TermVizParser()

        global_vars = relay_mod.get_global_vars()
        graph_names = []
        # If we have main function, put it to the first.
        # Then main function can be shown on the top.
        for gv_node in global_vars:
            if gv_node.name_hint == "main":
                graph_names.insert(0, gv_node.name_hint)
            else:
                graph_names.append(gv_node.name_hint)

        node_to_id = {}

        # callback to generate an unique string-ID for nodes.
        def traverse_expr(node):
            if node in node_to_id:
                return
            node_to_id[node] = str(len(node_to_id))

        for name in graph_names:
            node_to_id.clear()
            relay.analysis.post_order_visit(relay_mod[name], traverse_expr)
            graph = self._plotter.create_graph(name)
            self._add_nodes(graph, node_to_id)
Esempio n. 4
0
def test_multi_targets():
    # Build an IRModule.
    n = 10
    x = relay.var("x", shape=(n,))
    y = relay.var("y", shape=(n,))
    z = relay.var("z", shape=(n,))
    f = relay.Function([x, y, z], x + relay.op.annotation.on_device(y + z, tvm.cpu()))
    mod = IRModule.from_expr(f)

    # Compile to VMExecutable.
    with tvm.transform.PassContext(
        opt_level=3, config={"relay.fallback_device_type": tvm.cuda().device_type}
    ):
        exe = relay.vm.compile(
            mod, target={"cpu": tvm.target.Target("llvm"), "cuda": tvm.target.Target("cuda")}
        )

    # Run
    vm = runtime.vm.VirtualMachine(exe, [tvm.cuda(), tvm.cpu()])
    x_data = np.random.rand(
        n,
    ).astype("float32")
    y_data = np.random.rand(
        n,
    ).astype("float32")
    z_data = np.random.rand(
        n,
    ).astype("float32")
    actual_result = vm.invoke("main", x_data, y_data, z_data)

    # Test
    expected_result = x_data + y_data + z_data
    tvm.testing.assert_allclose(actual_result.numpy(), expected_result)
Esempio n. 5
0
    def __init__(
        self,
        mod: Optional[IRModule] = None,
        *,
        target: Optional[Target] = None,
        space_generator: Optional["SpaceGenerator"] = None,
        search_strategy: Optional["SearchStrategy"] = None,
        sch_rules: Optional[List["ScheduleRule"]] = None,
        postprocs: Optional[List["Postproc"]] = None,
        mutator_probs: Optional[Dict["Mutator", float]] = None,
        task_name: str = "main",
        rand_state: int = -1,
        num_threads: Optional[int] = None,
    ):
        if isinstance(mod, PrimFunc):
            mod = IRModule.from_expr(mod)
        if num_threads is None:
            num_threads = cpu_count()

        self.__init_handle_by_constructor__(
            _ffi_api.TuneContext,  # type: ignore # pylint: disable=no-member
            mod,
            target,
            space_generator,
            search_strategy,
            sch_rules,
            postprocs,
            mutator_probs,
            task_name,
            rand_state,
            num_threads,
        )
Esempio n. 6
0
def prune_tensorrt_subgraphs(mod: tvm.IRModule) -> tvm.IRModule:
    """
    Un-partition those partitions which:
     - have no multiply-accumulates (if remove_no_mac_subgraphs is True)
     - can't actually be supported by TensorRT now that we see the whole partition."""
    global_vars_to_inline = [
        gv for gv in mod.get_global_vars()
        if mod[gv].attrs and mod[gv].attrs["Compiler"] == "tensorrt"
        and not is_valid_subgraph(mod[gv].params, mod[gv].body)
    ]
    return relay.transform.InlineCompilerFunctionsBoundTo(
        global_vars_to_inline)(mod)
Esempio n. 7
0
def test_let_bound_constants():
    """This tests for an ICHECK failure for ill-formed IR with let-bound constants"""

    x = relay.var("x", shape=(3,), dtype="int32")
    y = relay.take(x, relay.const(0))
    z = relay.const(1)

    f = relay.Function([x], relay.stack((z, y), axis=0))
    mod = IRModule.from_expr(f)

    compiler = VMCompiler()
    compiler.optimize(mod, target="llvm")
Esempio n. 8
0
def test_get_input_index(target, dev):
    # Build a IRModule.
    data_0, data_1 = ["d1", "d2"]
    x, y = [relay.var(c, shape=(10,)) for c in [data_0, data_1]]
    f = relay.Function([x, y], x + y)
    mod = IRModule.from_expr(f)

    # Compile to VMExecutable.
    vm_exec = vm.compile(mod, target=target)
    vm_factory = runtime.vm.VirtualMachine(vm_exec, dev)
    assert vm_factory.get_input_index(data_1) == 1
    assert vm_factory.get_input_index(data_0) == 0
    assert vm_factory.get_input_index("invalid") == -1
Esempio n. 9
0
def prune_tensorrt_subgraphs(mod: tvm.IRModule) -> tvm.IRModule:
    """
    Removes invalid subgraphs and those with no multiply-accumulates (if remove_no_max_subgraphs
    is set).
    """

    class SubgraphRemover(ExprMutator):
        """
        Reverts subgraphs in subgraphs_to_remove back to TVM instead of using an external codegen.
        """

        def __init__(
            self, subgraphs_to_remove: List[str], mod: tvm.IRModule, new_mod: tvm.IRModule
        ) -> None:
            ExprMutator.__init__(self)
            self.subgraphs_to_remove = subgraphs_to_remove
            self.mod = mod
            self.new_mod = new_mod

        def visit_call(self, call: relay.expr.Call) -> relay.expr.Expr:
            if isinstance(call.op, GlobalVar):
                name = call.op.name_hint
                if name in self.subgraphs_to_remove:
                    # "Inline" the subgraph back into new main function.
                    func = self.mod[name]
                    var_map = {}
                    for arg, param in zip(call.args, func.params):
                        var_map[param] = super().visit(arg)
                    new_body = relay.bind(func.body, var_map)
                    return new_body
                if name != "main":
                    args = []
                    for arg in call.args:
                        args.append(super().visit(arg))
                    return call.op(*args)
            return super().visit_call(call)

    subgraphs_to_remove: List[str] = []
    # Remove invalid subgraphs
    for subgraph in mod.get_global_vars():
        name = subgraph.name_hint
        if not mod[name].attrs or mod[name].attrs["Compiler"] != "tensorrt":
            continue
        if not is_valid_subgraph(mod[name].params, mod[name].body):
            subgraphs_to_remove.append(name)
    # Create new pruned module
    new_mod = tvm.IRModule(mod.functions, mod.type_definitions)
    new_mod["main"] = SubgraphRemover(subgraphs_to_remove, mod, new_mod).visit(mod["main"])
    new_mod = transform.RemoveUnusedFunctions()(new_mod)
    return new_mod
Esempio n. 10
0
def create_module(functions=None):
    """Construct a module from list of functions.

    Parameters
    -----------
    functions: Optional[dict].
        Map of GlobalVar or str to PrimFunc

    Returns
    -------
    mod : IRModule
        An IRModule containing the passed definitions
    """

    return IRModule(functions=functions)
Esempio n. 11
0
def test_get_output_multiple(target, dev):
    # Build a IRModule.
    x = relay.var("x", shape=(10,))
    f = relay.Function([x], relay.Tuple([x + x, x]))
    mod = IRModule.from_expr(f)

    # Compile to VMExecutable.
    vm_exec = vm.compile(mod, target=target)
    vm_factory = runtime.vm.VirtualMachine(vm_exec, dev)
    inp = np.ones(10, dtype="float32")
    vm_factory.invoke_stateful("main", inp)
    outputs = vm_factory.get_outputs()
    assert len(outputs) == 2
    np.testing.assert_allclose(outputs[0].numpy(), inp + inp)
    np.testing.assert_allclose(outputs[1].numpy(), inp)
Esempio n. 12
0
def test_vm_rpc():
    """
    This test checks to make sure you can export a VMExecutable,
    upload it to a remote machine using RPC and then execute it
    on the other machine.
    """
    target = tvm.target.Target("llvm --host=llvm")

    # Build a IRModule.
    x = relay.var("x", shape=(10, 1))
    f = relay.Function([x], x + x)
    mod = IRModule.from_expr(f)

    # Compile to VMExecutable.
    vm_exec = vm.compile(mod, target=target)

    # Export to Disk
    temp = utils.tempdir()
    path = temp.relpath("vm_library.so")
    vm_exec.mod.export_library(path)

    # Use local rpc server for testing.
    # Server must use popen so it doesn't inherit the current process state. It
    # will crash otherwise.
    server = rpc.Server("localhost", port=9120, use_popen=True)
    time.sleep(2)
    remote = rpc.connect(server.host, server.port, session_timeout=10)

    # Upload the serialized Executable.
    remote.upload(path)
    # Get a handle to remote Executable.
    rexec = remote.load_module("vm_library.so")

    ctx = remote.cpu()
    # Build a VM out of the executable and context.
    vm_factory = runtime.vm.VirtualMachine(rexec, ctx)
    np_input = np.random.uniform(size=(10, 1)).astype("float32")
    input_tensor = tvm.nd.array(np_input, ctx)
    # Invoke its "main" function.
    out = vm_factory.invoke("main", input_tensor)
    # Check the result.
    np.testing.assert_allclose(out.asnumpy(), np_input + np_input)

    # delete tensors before the server shuts down so we don't throw errors.
    del input_tensor
    del out

    server.terminate()
Esempio n. 13
0
def test_get_output_single():
    target = tvm.target.Target("llvm")

    # Build a IRModule.
    x = relay.var("x", shape=(10, ))
    f = relay.Function([x], x + x)
    mod = IRModule.from_expr(f)

    # Compile to VMExecutable.
    vm_exec = vm.compile(mod, target=target)
    vm_factory = runtime.vm.VirtualMachine(vm_exec, tvm.cpu())
    inp = np.ones(10, dtype="float32")
    vm_factory.invoke_stateful("main", inp)
    outputs = vm_factory.get_outputs()
    assert len(outputs) == 1
    np.testing.assert_allclose(outputs[0].numpy(), inp + inp)
Esempio n. 14
0
def test_error_c_interface():
    """Checks that an error occurs when using the packed API in combination with C interface"""

    two = relay.add(relay.const(1), relay.const(1))
    func = relay.Function([], two)

    with pytest.raises(
            tvm.TVMError,
            match=re.escape(
                'Need unpacked-api == false (got: 0) and interface-api == "packed" (got: c) when '
                "targeting c++ runtime"),
    ):
        tvm.relay.build(
            IRModule.from_expr(func),
            target="llvm",
            executor=backend.Executor("aot", {"interface-api": "c"}),
        )
Esempio n. 15
0
def test_tir_external_generation(check_result):
    shape = (8,)
    x_data = np.random.randint(255, size=shape).astype("float32")
    y_data = np.random.randint(255, size=shape).astype("float32")
    inputs = {"x": x_data, "y": y_data}

    x0 = relay.var("x0", shape=shape, dtype="float32")
    y0 = relay.var("y0", shape=shape, dtype="float32")
    z = x0 + y0
    f = relay.Function([x0, y0], z)
    f = set_external_func_attr(f, "example_target_hook", "replace_add_with_subtract")

    x = relay.var("x", shape=(8,), dtype="float32")
    y = relay.var("y", shape=(8,), dtype="float32")
    call = relay.Call(f, [x, y])
    func = IRModule.from_expr(call)

    check_result(func, inputs, (8,), x_data - y_data)
Esempio n. 16
0
def test_type_args():
    x = relay.var("x", shape=(10, 10))
    y = relay.var("y", shape=(1, 10))
    z = relay.add(x, y)

    # InferTypeLocal does not support populating the type_args field
    mod = infer_mod(IRModule.from_expr(z))
    mod = infer_mod(mod, annotate_spans=False)
    ty_args = mod["main"].body.type_args
    assert len(ty_args) == 2
    assert ty_args[0].dtype == "float32"
    assert ty_args[1].dtype == "float32"
    sh1 = ty_args[0].shape
    sh2 = ty_args[1].shape
    assert sh1[0].value == 10
    assert sh1[1].value == 10
    assert sh2[0].value == 1
    assert sh2[1].value == 10
Esempio n. 17
0
def ir_module(input_module: type) -> IRModule:
    """Decorate a python class as tvm IRModule.

    Parameters
    ----------
    input_module : type
        The python class to be parsed.

    Returns
    -------
    output : IRModule
        The result IRModule.
    """
    if inspect.isclass(input_module):
        func_dict = {
            name: f for name, f in input_module.__dict__.items() if isinstance(f, BaseFunc)
        }
        return IRModule(func_dict)
    raise TypeError("Only class definitions are supported.")
Esempio n. 18
0
def test_error_c_interface():
    interface_api = "c"
    use_unpacked_api = False
    test_runner = AOT_DEFAULT_RUNNER

    two = relay.add(relay.const(1), relay.const(1))
    func = relay.Function([], two)

    with pytest.raises(
            tvm.TVMError,
            match=re.escape(
                'Need unpacked-api == false (got: 0) and interface-api == "packed" (got: c) when '
                "targeting c++ runtime"),
    ):
        tvm.relay.build(
            IRModule.from_expr(func),
            target="llvm",
            executor=backend.Executor("aot", {"interface-api": "c"}),
        )
Esempio n. 19
0
def test_runtime_module_generation(check_result):
    shape = (8,)
    x_data = np.random.randint(255, size=shape).astype("float32")
    y_data = np.random.randint(255, size=shape).astype("float32")
    inputs = {"x": x_data, "y": y_data}

    x0 = relay.var("x0", shape=shape, dtype="float32")
    y0 = relay.var("y0", shape=shape, dtype="float32")
    z = x0 + y0
    func = relay.Function([x0, y0], z)
    func = set_external_func_attr(func, "example_target_hook", "replace_add_with_subtract")
    # Test hook to trigger TIRToRuntime code generation
    func = func.with_attr("tir_to_runtime", True)

    x = relay.var("x", shape=(8,), dtype="float32")
    y = relay.var("y", shape=(8,), dtype="float32")
    call = relay.Call(func, [x, y])
    func = IRModule.from_expr(call)

    check_result(func, inputs, (8,), x_data * y_data)
Esempio n. 20
0
    def transform_Class(self, node):
        """Class definition visitor.

        A class can have multiple function definitions and a single
        :code:`__tvm_meta__` statement. Each class corresponds to a single
        :code:`IRModule`.

        Example
        -------
        .. code-block:: python

            @tvm.script.ir_module
            class MyClass:
                __tvm_meta__ = {}
                def A():
                    T.evaluate(0)
        """
        if len(node.assignments) == 1:
            if not (
                len(node.assignments[0].lhs) == 1
                and isinstance(node.assignments[0].lhs[0], ast.Var)
                and node.assignments[0].lhs[0].id.name == "__tvm_meta__"
            ):
                self.report_error(
                    "The only top level assignments allowed are `__tvm_meta__ = ...`",
                    node.assignments[0].span,
                )
            self.init_meta(
                MetaUnparser().do_transform(node.assignments[0].rhs, self._diagnostic_context)
            )
        elif len(node.assignments) > 1:
            self.report_error(
                "Only a single top level `__tvm_meta__` is allowed",
                ast.Span.union([x.span for x in node.assignments[1:]]),
            )

        return IRModule(
            {GlobalVar(name): self.transform(func) for name, func in node.funcs.items()}
        )
Esempio n. 21
0
def test_vm_rpc():
    """
    This test checks to make sure you can export a VMExecutable,
    upload it to a remote machine using RPC and then execute it
    on the other machine.
    """
    target = tvm.target.Target("llvm --host=llvm")

    # Build a IRModule.
    x = relay.var("x", shape=(10, 1))
    f = relay.Function([x], x + x)
    mod = IRModule.from_expr(f)

    # Compile to VMExecutable.
    vm_exec = vm.compile(mod, target=target)

    # Export to Disk
    temp = utils.tempdir()
    path = temp.relpath("vm_library.so")
    vm_exec.mod.export_library(path)

    # Use LocalRPC for testing.
    remote = rpc.LocalSession()

    # Upload the serialized Executable.
    remote.upload(path)
    # Get a handle to remote Executable.
    rexec = remote.load_module("vm_library.so")

    ctx = remote.cpu()
    # Build a VM out of the executable and context.
    vm_factory = runtime.vm.VirtualMachine(rexec, ctx)
    np_input = np.random.uniform(size=(10, 1)).astype("float32")
    input_tensor = tvm.nd.array(np_input, ctx)
    # Invoke its "main" function.
    out = vm_factory.invoke("main", [input_tensor])
    # Check the result.
    np.testing.assert_allclose(out.asnumpy(), np_input + np_input)
Esempio n. 22
0
def infer_expr(expr, annotate_spans=True):
    mod = IRModule.from_expr(expr)
    mod = infer_mod(mod, annotate_spans)
    mod = transform.InferType()(mod)
    entry = mod["main"]
    return entry if isinstance(expr, relay.Function) else entry.body
Esempio n. 23
0
def get_one_input_relay_mod(tensor_type, shape, data_name):
    x = relay.var(data_name, shape=shape, dtype=tensor_type)
    y = relay.exp(x)
    f = relay.Function([x], y)
    return IRModule.from_expr(f)
def test_meta_schedule_dynamic_loop_extent():
    a = relay.var("a", shape=(1, 8, 8, 512), dtype="float32")
    b = relay.nn.adaptive_avg_pool2d(a, (7, 7), "NHWC")
    mod = IRModule({"main": relay.Function([a], b)})
    extracted_tasks = ms.extract_task_from_relay(mod, target="llvm", params={})
    assert not extracted_tasks
Esempio n. 25
0
    def __init__(
        self,
        mod: Optional[IRModule] = None,
        *,
        target: Optional[Target] = None,
        space_generator: Union[None, "SCH_FN_TYPE", "ScheduleFn",
                               "SpaceGenerator"] = None,
        search_strategy: Union[None, "SearchStrategy", "TuneConfig"] = None,
        sch_rules: Union[None, str, List["ScheduleRule"]] = None,
        postprocs: Union[None, str, List["Postproc"]] = None,
        mutator_probs: Union[None, str, Dict["Mutator", float]] = None,
        task_name: str = "main",
        logger: Optional[logging.Logger] = None,
        rand_state: int = -1,
        num_threads: Optional[int] = None,
    ):
        # pylint: disable=import-outside-toplevel
        from . import default_config
        from .space_generator import ScheduleFn
        from .tune import TuneConfig

        # pylint: enable=import-outside-toplevel
        if isinstance(mod, PrimFunc):
            mod = IRModule.from_expr(mod)
        if callable(space_generator):
            space_generator = ScheduleFn(space_generator)
        if isinstance(search_strategy, TuneConfig):
            search_strategy = search_strategy.create_strategy()
        if isinstance(sch_rules, str):
            if sch_rules == "default":
                if target is None:
                    raise ValueError(
                        "target is required when sch_rules is 'default'")
                sch_rules = default_config.schedule_rules(None, target)
            else:
                raise ValueError(
                    "sch_rules should be a list of ScheduleRule or 'default'")
        if isinstance(postprocs, str):
            if postprocs == "default":
                if target is None:
                    raise ValueError(
                        "target is required when postprocs is 'default'")
                postprocs = default_config.postproc(None, target)
            else:
                raise ValueError(
                    "postprocs should be a list of Postproc or 'default'")
        if isinstance(mutator_probs, str):
            if mutator_probs == "default":
                if target is None:
                    raise ValueError(
                        "target is required when mutator_probs is 'default'")
                mutator_probs = default_config.mutator_probs(None, target)
        if logger is None:
            self.logger = logging.getLogger(__name__)
        else:
            self.logger = None
        if num_threads is None:
            num_threads = cpu_count()
        self.__init_handle_by_constructor__(
            _ffi_api.TuneContext,  # type: ignore # pylint: disable=no-member
            mod,
            target,
            space_generator,
            search_strategy,
            sch_rules,
            postprocs,
            mutator_probs,
            task_name,
            make_logging_func(logger),
            rand_state,
            num_threads,
        )
        _ffi_api.TuneContextInitialize(self)  # type: ignore # pylint: disable=no-member