Beispiel #1
0
def partition_for_vitis_ai(mod, params=None, dpu=None, **opts):
    """Partition the Relay expression for offloading operators to Vitis AI DPU

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.
    dpu : str
        The DPU identifier (e.g. DPUCZDX8G-zcu104, DPUCADF8H)

    Returns
    -------
    ret : Module
    """

    if dpu is None:
        raise ValueError(
            "Please pass Vitis AI DPU identifier to the partitioning function")

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)

    desired_layouts_in_partition = {
        "nn.conv2d": ["NHWC", "default"],
        "nn.upsampling": ["NHWC"],
        "image.resize2d": ["NHWC"],
    }
    desired_layouts_in_main = {
        "nn.conv2d": ["NCHW", "default"],
        "nn.upsampling": ["NCHW"],
        "image.resize2d": ["NCHW"],
    }
    seq = tvm.transform.Sequential([
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout(desired_layouts_in_partition),
        transform.FoldConstant(),
        transform.InferType(),
        VitisAIAnnotationPass("vitis_ai", dpu, params),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout(desired_layouts_in_main),
        transform.FoldConstant(),
    ])

    with tvm.transform.PassContext(opt_level=3):
        return seq(mod)
Beispiel #2
0
def get_pass_order(use_patterns):
    """
    Get the pass ordering based on using predicates or patterns.

    Parameters
    ----------
    use_patterns: Bool
        True if pass needs to work with op patterns
    Returns
    ----------
    ret : Sequential
        Pass object
    """
    return (tvm.transform.Sequential([
        transform.InferType(),
        RemoveDropoutPass(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout({
            "nn.conv1d": ["NCW", "default"],
            "nn.conv2d": ["NCHW", "default"],
            "nn.conv3d": ["NCDHW", "default"],
            "nn.conv2d_transpose": ["NCHW", "default"],
        }),
        transform.FoldConstant(),
        transform.MergeComposite(pattern_table()),
        transform.AnnotateTarget("tensorrt"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.InlineComposites("tensorrt"),
        transform.InferType(),
    ]) if use_patterns else tvm.transform.Sequential([
        transform.InferType(),
        RemoveDropoutPass(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout({
            "nn.conv1d": ["NCW", "default"],
            "nn.conv2d": ["NCHW", "default"],
            "nn.conv3d": ["NCDHW", "default"],
            "nn.conv2d_transpose": ["NCHW", "default"],
        }),
        transform.FoldConstant(),
        transform.AnnotateTarget("tensorrt"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.InferType(),
    ]))
Beispiel #3
0
def partition_for_tensorrt(
    mod: tvm.IRModule,
    params: Optional[Dict[str, tvm.nd.NDArray]] = None,
    # CAUTION: Can't use default Target("tensorrt") here since the target kind is only available
    #          if is_tensorrt_compiler_enabled() == True.
    target: Optional[tvm.target.Target] = None,
) -> tvm.IRModule:
    """Partition all functions in mod to greedily offload supported operators to TensorRT.

    Parameters
    ----------
    mod : tvm.IRModule
        The module to partition.
    target : tvm.target.Target
        A target of kind "tensorrt" describing additional partitioning and compilation options.
    params : Optional[Dict[str, tvm.nd.NDArray]]
        Constant input parameters.

    Returns
    -------
    partitioned_mod : tvm.IRModule
        The partitioned module.

    """
    assert is_tensorrt_compiler_enabled(
    ), "Can only partition for TensorRT if it is enabled"
    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)
    if target is None:
        # Use a default target. The get_tensorrt_target() function will similarly create an
        # equivalent default target when compilation continues after partitioning.
        target = tvm.target.Target("tensorrt")

    seq = tvm.transform.Sequential([
        transform.InferType(),
        RemoveDropoutPass(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout({
            "nn.conv1d": ["NCW", "default"],
            "nn.conv2d": ["NCHW", "default"],
            "nn.conv3d": ["NCDHW", "default"],
            "nn.conv2d_transpose": ["NCHW", "default"],
        }),
        transform.FoldConstant(),
        transform.MergeComposite(pattern_table()),
        transform.AnnotateTarget("tensorrt"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.InferType(),
    ])
    with target:
        mod = seq(mod)
        mod = prune_tensorrt_subgraphs(mod)
    return mod
Beispiel #4
0
def prune_tensorrt_subgraphs(mod: tvm.IRModule) -> tvm.IRModule:
    """
    Removes invalid subgraphs and those with no multiply-accumulates (if remove_no_max_subgraphs
    is set).
    """

    class SubgraphRemover(ExprMutator):
        """
        Reverts subgraphs in subgraphs_to_remove back to TVM instead of using an external codegen.
        """

        def __init__(
            self, subgraphs_to_remove: List[str], mod: tvm.IRModule, new_mod: tvm.IRModule
        ) -> None:
            ExprMutator.__init__(self)
            self.subgraphs_to_remove = subgraphs_to_remove
            self.mod = mod
            self.new_mod = new_mod

        def visit_call(self, call: relay.expr.Call) -> relay.expr.Expr:
            if isinstance(call.op, GlobalVar):
                name = call.op.name_hint
                if name in self.subgraphs_to_remove:
                    # "Inline" the subgraph back into new main function.
                    func = self.mod[name]
                    var_map = {}
                    for arg, param in zip(call.args, func.params):
                        var_map[param] = super().visit(arg)
                    new_body = relay.bind(func.body, var_map)
                    return new_body
                if name != "main":
                    args = []
                    for arg in call.args:
                        args.append(super().visit(arg))
                    return call.op(*args)
            return super().visit_call(call)

    subgraphs_to_remove: List[str] = []
    # Remove invalid subgraphs
    for subgraph in mod.get_global_vars():
        name = subgraph.name_hint
        if not mod[name].attrs or mod[name].attrs["Compiler"] != "tensorrt":
            continue
        if not is_valid_subgraph(mod[name].params, mod[name].body):
            subgraphs_to_remove.append(name)
    # Create new pruned module
    new_mod = tvm.IRModule(mod.functions, mod.type_definitions)
    new_mod["main"] = SubgraphRemover(subgraphs_to_remove, mod, new_mod).visit(mod["main"])
    new_mod = transform.RemoveUnusedFunctions()(new_mod)
    return new_mod
Beispiel #5
0
Datei: dnnl.py Projekt: were/tvm
def prune_dnnl_subgraphs(mod):
    """
    Removes invalid subgraphs, which does not contain compute intensive dnnl ops.
    """
    class SubgraphRemover(ExprMutator):
        """
        Reverts subgraphs in subgraphs_to_remove back to TVM instead of using an external codegen.
        """
        def __init__(self, subgraphs_to_remove, mod, new_mod):
            ExprMutator.__init__(self)
            self.subgraphs_to_remove = subgraphs_to_remove
            self.mod = mod
            self.new_mod = new_mod

        def visit_call(self, call):
            if isinstance(call.op, GlobalVar):
                name = call.op.name_hint
                if name in self.subgraphs_to_remove:
                    # "Inline" the subgraph back into new main function.
                    func = self.mod[name]
                    var_map = {}
                    for arg, param in zip(call.args, func.params):
                        var_map[param] = super().visit(arg)
                    new_body = relay.bind(func.body, var_map)
                    return new_body
                if name != "main":
                    args = []
                    for arg in call.args:
                        args.append(super().visit(arg))
                    return call.op(*args)
            return super().visit_call(call)

    subgraphs_to_remove = []
    # If only one subgraph, do nothing.
    if len(mod.get_global_vars()) <= 2:
        return mod
    # Remove invalid subgraphs
    for subgraph in mod.get_global_vars():
        name = subgraph.name_hint
        if not mod[name].attrs or mod[name].attrs["Compiler"] != "dnnl":
            continue
        if not is_valid_subgraph(mod[name].body):
            subgraphs_to_remove.append(name)
    # Create new pruned module
    new_mod = tvm.IRModule(mod.functions, mod.type_definitions)
    new_mod["main"] = SubgraphRemover(subgraphs_to_remove, mod,
                                      new_mod).visit(mod["main"])
    new_mod = transform.RemoveUnusedFunctions()(new_mod)
    return new_mod
Beispiel #6
0
def partition_for_tensorrt(
    mod,
    params=None,
    version=None,
    use_implicit_batch=True,
    remove_no_mac_subgraphs=False,
    max_workspace_size=1 << 30,
):
    """Partition the graph greedily offloading supported operators to TensorRT.

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.
    version : Optional[Tuple[int, int, int]]
        TensorRT version to target as tuple of (major, minor, patch). If TVM is compiled with
        USE_TENSORRT_RUNTIME=ON, the linked TensorRT version will be used instead.
    use_implicit_batch : Optional[bool]
        Use TensorRT implicit batch mode (default true). Setting to false will enable explicit batch
        mode which will widen supported operators to include those which modify the batch dimension,
        but may reduce performance for some models.
    remove_no_mac_subgraphs : Optional[bool]
        Removes subgraphs which have been partitioned for TensorRT if they do not have any
        multiply-accumulate operations. The removed subgraphs will go through TVM's standard
        compilation instead. Can improve performance.
    max_workspace_size : Optional[int]
        How many bytes of workspace size to allow each subgraph to use for TensorRT engine creation.
        See TensorRT documentation for more info.
    Returns
    -------
    mod_and_config : Tuple[Module, Dict[str, Any]]
        A tuple of 1) annotated and partitioned module and 2) "relay.ext.tensorrt.options"
        configuration which should be given to PassContext when building.
    """
    config = {
        "use_implicit_batch": use_implicit_batch,
        "max_workspace_size": max_workspace_size,
        "remove_no_mac_subgraphs": remove_no_mac_subgraphs,
    }
    if version:
        assert isinstance(version, tuple) and len(version) == 3
        config["tensorrt_version"] = version
    else:
        linked_version = tuple(
            tvm.get_global_func("relay.op.get_tensorrt_version")())
        if not linked_version:
            logger.warning(
                "TVM was not built against TensorRT and no version was provided to "
                "partition_for_tensorrt. Defaulting to 6.0.1")
            linked_version = (6, 0, 1)
        config["tensorrt_version"] = linked_version

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)
    seq = tvm.transform.Sequential([
        transform.InferType(),
        RemoveDropoutPass(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout({
            "nn.conv2d": ["NCHW", "default"],
            "nn.conv3d": ["NCDHW", "default"],
            "nn.conv2d_transpose": ["NCHW", "default"],
        }),
        transform.FoldConstant(),
        transform.AnnotateTarget("tensorrt"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.InferType(),
    ])
    with tvm.transform.PassContext(
            opt_level=3, config={"relay.ext.tensorrt.options": config}):
        mod = seq(mod)
        mod = prune_tensorrt_subgraphs(mod)
    return mod, config
Beispiel #7
0
###############################################################################
# PARTITION & BUILD
#
# Module pass to partition Relay for Vitis-AI acceleration. Targets can be
# dpuv1, dpuv2-zcu104 and dpuv2-zcu102
# Afterwards build graph, lib and params using standard TVM flow.
##############################################################################

tvm_target = 'llvm'
lib_kwargs = {}

mod, params = relay.frontend.from_mxnet(block, shape_dict)
mod = relay.transform.InferType()(mod)
mod["main"] = bind_params_by_name(mod["main"], params)
mod = transform.RemoveUnusedFunctions()(mod)

# For the edge target we recommend converting the layout to NHWC for best performance
desired_layouts = {'nn.conv2d': ['NHWC', 'default']}
seq = tvm.transform.Sequential([
    relay.transform.RemoveUnusedFunctions(),
    relay.transform.ConvertLayout(desired_layouts),
    relay.transform.FoldConstant()
])
with tvm.transform.PassContext(opt_level=3):
    mod = seq(mod)

mod = partition_for_vitis_ai(mod, params, dpu=target)

# Convert convolutions that won't be executed on DPU back to NCHW
desired_layouts = {'nn.conv2d': ['NCHW', 'default']}