Exemple #1
0
def partition_for_ethosn78(mod, params=None, **opts):
    """Partition the graph greedily offloading supported
    operators to Arm Ethos-N NPU.

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.

    Returns
    -------
    ret : annotated and partitioned module.
    """
    if not opts or opts.get("variant", "").lower() != "ethos-n78":
        raise ValueError("When targeting Ethos(TM)-N78, -variant=Ethos-N78 should be set.")

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)

    seq = tvm.transform.Sequential(
        [
            transform.InferType(),
            transform.MergeComposite(pattern_table()),
            transform.AnnotateTarget("ethos-n"),
            transform.MergeCompilerRegions(),
            transform.PartitionGraph(),
        ]
    )

    return seq(mod)
    def annotate(func, compiler):
        """
        An annotator for Core ML.
        """
        # Bind free variables to the constant values.
        bind_dict = {}
        for arg in func.params:
            name = arg.name_hint
            if name in params:
                bind_dict[arg] = relay.const(params[name])

        func = relay.bind(func, bind_dict)

        # Annotate the entire graph for Core ML
        mod = tvm.IRModule()
        mod["main"] = func

        seq = tvm.transform.Sequential([
            transform.SimplifyInference(),
            transform.FoldConstant(),
            transform.FoldScaleAxis(),
            transform.AnnotateTarget(compiler),
            transform.MergeCompilerRegions(),
            transform.PartitionGraph(),
        ])

        with relay.build_config(opt_level=3):
            mod = seq(mod)

        return mod
Exemple #3
0
def partition_for_dnnl(mod, params=None):
    """Partition the graph greedily offloading supported operators to DNNL.

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.
    Returns
    -------
    mod : Module
        Annotated and partitioned module.
    """

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)
    seq = tvm.transform.Sequential([
        transform.CanonicalizeOps(),
        transform.InferType(),
        transform.SimplifyInference(),
        transform.FoldConstant(),
        transform.FoldScaleAxis(),
        # fold consecutive add ops to simplify pattern `conv2d-bias_add-bn-relu`
        transform.SimplifyExpr(),
        transform.FoldConstant(),
        transform.MergeComposite(pattern_table()),
        transform.AnnotateTarget("dnnl"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
    ])
    with tvm.transform.PassContext(opt_level=3):
        mod = seq(mod)
    return mod
Exemple #4
0
def partition_for_vitis_ai(mod, params=None, dpu=None, **opts):
    """Partition the Relay expression for offloading operators to Vitis AI DPU

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.
    dpu : str
        The DPU identifier (e.g. DPUCZDX8G-zcu104, DPUCADX8G)

    Returns
    -------
    ret : Module
    """

    if dpu is None:
        raise ValueError(
            "Please pass Vitis AI DPU identifier to the partitioning function")

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)

    seq = tvm.transform.Sequential([
        transform.InferType(),
        VitisAIAnnotationPass("vitis_ai", dpu, params),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
    ])

    return seq(mod)
Exemple #5
0
def test_byoc_microtvm_multiple_subgraphs(merge_compiler_regions):
    """This is a test case to check BYOC capabilities of AOT with multiple sub graphs"""
    use_unpacked_api = False
    interface_api = "packed"
    test_runner = AOT_DEFAULT_RUNNER

    x = relay.var("x", shape=(10, 10))
    w0 = relay.var("w0", shape=(10, 10))
    w1 = relay.var("w1", shape=(10, 10))
    w2 = relay.var("w2", shape=(10, 10))
    w3 = relay.var("w3", shape=(10, 10))
    w4 = relay.var("w4", shape=(10, 10))
    w5 = relay.var("w5", shape=(10, 10))
    w6 = relay.var("w6", shape=(10, 10))
    w7 = relay.var("w7", shape=(10, 10))

    # C compiler
    z0 = relay.add(x, w0)
    p0 = relay.subtract(z0, w1)
    q0 = relay.multiply(p0, w2)

    z1 = relay.add(x, w3)
    p1 = relay.subtract(z1, w4)
    q1 = relay.multiply(p1, w5)

    # Other parts on TVM
    z2 = relay.add(x, w6)
    q2 = relay.subtract(z2, w7)

    r = relay.concatenate((q0, q1, q2), axis=0)
    f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r)
    mod = tvm.IRModule()
    ann = byoc.CcompilerAnnotator()
    mod["main"] = ann.visit(f)

    if merge_compiler_regions:
        mod = transform.MergeCompilerRegions()(mod)

    mod = tvm.relay.transform.PartitionGraph("mod_name")(mod)
    mod = tvm.relay.transform.InferType()(mod)

    x_data = np.random.rand(10, 10).astype("float32")
    w_data = []
    for _ in range(8):
        w_data.append(np.random.rand(10, 10).astype("float32"))

    map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i])
                                                for i in range(8)])
    output_list = generate_ref_data(mod, map_inputs)
    input_list = [map_inputs["x"]]
    input_list.extend([map_inputs["w{}".format(i)] for i in range(8)])
    compile_and_run(
        AOTTestModel(name="my_mod",
                     module=mod,
                     inputs=map_inputs,
                     outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
def test_constant_tuples():
    @reg.register("qnn.concatenate", "target.const_tuples")
    def add(attrs, args):  # pylint: disable=unused-variable
        return True

    def create_graph():
        a = relay.var('a', shape=(10, 10), dtype="uint8")
        b = relay.var('b', shape=(10, 10), dtype="uint8")
        a1 = relay.abs(a)

        zeroi = relay.const(1, "int32")
        zerof = relay.const(0, "float32")
        con = relay.qnn.op.concatenate((a1, b),
                                       input_scales=(zerof, zerof),
                                       input_zero_points=(zeroi, zeroi),
                                       output_scale=zerof,
                                       output_zero_point=zeroi,
                                       axis=1)

        f = relay.Function([a, b], con)
        mod = tvm.IRModule.from_expr(f)
        return mod

    seq = tvm.transform.Sequential([
        transform.AnnotateTarget("const_tuples"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
    ])

    partitioned = seq(create_graph())
    concat = partitioned["const_tuples_0"].body
    assert type(concat.args[1]) == relay.Tuple
    assert type(concat.args[2]) == relay.Tuple
    assert type(concat.args[3]) == relay.Constant
    assert type(concat.args[4]) == relay.Constant
def partition_for_cmsisnn(mod, params=None, **opts):
    """Partition the graph greedily offloading supported
    operators on Cortex-M using CMSIS-NN

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.

    Returns
    -------
    ret : Module
        annotated and partitioned module.
    """
    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)

    seq = tvm.transform.Sequential(
        [
            transform.InferType(),
            transform.MergeComposite(pattern_table()),
            transform.AnnotateTarget("cmsisnn"),
            transform.MergeCompilerRegions(),
            transform.PartitionGraph(),
        ]
    )

    return seq(mod)
Exemple #8
0
def partition_for_clml(mod, params=None):
    """Partition the graph greedily offloading supported
    operators to CLML Library.

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.

    Returns
    -------
    ret : annotated and partitioned module.
    """

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)

    seq = tvm.transform.Sequential(
        [
            transform.InferType(),
            transform.FoldConstant(),
            transform.MergeComposite(clml_pattern_table()),
            transform.AnnotateTarget("clml", False),
            transform.MergeCompilerRegions(),
            transform.PartitionGraph(),
        ]
    )

    result_mod = seq(mod)
    return result_mod
Exemple #9
0
def test_extern_dnnl_mobilenet():
    if not tvm.get_global_func("relay.ext.dnnl", True):
        print("skip because DNNL codegen is not available")
        return

    dtype = 'float32'
    ishape = (1, 3, 224, 224)
    mod, params = relay.testing.mobilenet.get_workload(batch_size=1,
                                                       dtype='float32')

    mod["main"] = bind_params_by_name(mod["main"], params)
    mod = transform.AnnotateTarget(["dnnl"])(mod)
    mod = transform.MergeCompilerRegions()(mod)
    mod = transform.PartitionGraph()(mod)
    i_data = np.random.uniform(0, 1, ishape).astype(dtype)

    ref_mod, params = relay.testing.mobilenet.get_workload(batch_size=1,
                                                           dtype='float32')
    ref_ex = relay.create_executor("graph", mod=ref_mod, ctx=tvm.cpu(0))
    ref_res = ref_ex.evaluate()(i_data, **params)

    check_result(mod, {"data": i_data}, (1, 1000),
                 ref_res.asnumpy(),
                 tol=1e-5,
                 params=params)
def test_duplicate_outputs():
    target = "test_duplicate_outputs"

    @tvm.ir.register_op_attr("abs", "target." + target)
    def abs(attrs, args):  # pylint: disable=unused-variable
        return True

    def create_graph():
        data = relay.var("data", shape=(10, 10))
        x = relay.abs(data)
        out_1 = relay.nn.relu(x)
        out_2 = relay.tanh(x)
        out_3 = relay.log(x)
        out = relay.Tuple([out_1, out_2, out_3])
        func = relay.Function([data], out)
        return func

    def expected():
        mod = tvm.IRModule()

        # function 0
        f0_i0 = relay.var(target + "_0_i0", shape=(10, 10))
        f0_o0 = relay.abs(f0_i0)
        func0 = relay.Function([f0_i0], f0_o0)

        func0 = func0.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
        func0 = func0.with_attr("Inline", tvm.tir.IntImm("int32", 1))
        func0 = func0.with_attr("Compiler", target)
        func0 = func0.with_attr("global_symbol", target + "_0")
        gv0 = relay.GlobalVar(target + "_0")
        mod[gv0] = func0
        mod = transform.InferType()(mod)

        # body
        data = relay.var("data", shape=(10, 10))
        function_out = gv0(data)
        out_1 = relay.nn.relu(function_out)
        out_2 = relay.tanh(function_out)
        out_3 = relay.log(function_out)
        out = relay.Tuple([out_1, out_2, out_3])
        func = relay.Function([data], out)
        mod["main"] = func
        mod = transform.InferType()(mod)
        return mod

    mod = tvm.IRModule()
    mod["main"] = create_graph()

    seq = tvm.transform.Sequential(
        [
            transform.AnnotateTarget(target),
            transform.MergeCompilerRegions(),
            transform.PartitionGraph(),
        ]
    )

    ref_mod = expected()
    partitioned = seq(mod)
    assert tvm.ir.structural_equal(partitioned, ref_mod, map_free_vars=True)
    def test_different_output_region():
        mod = get_mod()
        mod = WhiteListAnnotator(["subtract", "log"], "ccompiler")(mod)
        mod = transform.MergeCompilerRegions()(mod)
        mod = transform.PartitionGraph()(mod)

        expected_mod = expected_different_output_region()
        assert tvm.ir.structural_equal(mod, expected_mod, map_free_vars=True)
Exemple #12
0
def get_pass_order(use_patterns):
    """
    Get the pass ordering based on using predicates or patterns.

    Parameters
    ----------
    use_patterns: Bool
        True if pass needs to work with op patterns
    Returns
    ----------
    ret : Sequential
        Pass object
    """
    return (tvm.transform.Sequential([
        transform.InferType(),
        RemoveDropoutPass(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout({
            "nn.conv1d": ["NCW", "default"],
            "nn.conv2d": ["NCHW", "default"],
            "nn.conv3d": ["NCDHW", "default"],
            "nn.conv2d_transpose": ["NCHW", "default"],
        }),
        transform.FoldConstant(),
        transform.MergeComposite(pattern_table()),
        transform.AnnotateTarget("tensorrt"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.InlineComposites("tensorrt"),
        transform.InferType(),
    ]) if use_patterns else tvm.transform.Sequential([
        transform.InferType(),
        RemoveDropoutPass(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout({
            "nn.conv1d": ["NCW", "default"],
            "nn.conv2d": ["NCHW", "default"],
            "nn.conv3d": ["NCDHW", "default"],
            "nn.conv2d_transpose": ["NCHW", "default"],
        }),
        transform.FoldConstant(),
        transform.AnnotateTarget("tensorrt"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.InferType(),
    ]))
def test_byoc_microtvm(merge_compiler_regions):
    """This is a simple test to check BYOC capabilities of AOT - with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036"""
    use_unpacked_api = False
    interface_api = "packed"
    test_runner = AOTTestRunner(pass_config={"tir.usmp.enable": True})

    x = relay.var("x", shape=(10, 10))
    w0 = relay.var("w0", shape=(10, 10))
    w1 = relay.var("w1", shape=(10, 10))

    # z0 = x + w0
    x_ = compiler_begin(x, "ccompiler")
    w0_ = compiler_begin(w0, "ccompiler")
    z0_ = relay.add(x_, w0_)
    z0 = compiler_end(z0_, "ccompiler")

    # z1 = z0 + w1
    z0__ = compiler_begin(z0, "ccompiler")
    w1_ = compiler_begin(w1, "ccompiler")
    z1_ = relay.add(z0__, w1_)
    z1 = compiler_end(z1_, "ccompiler")

    # z2 = z0 + z1
    z2 = relay.add(z0, z1)

    f = relay.Function([x, w0, w1], z2)
    mod = tvm.IRModule()
    mod["main"] = f

    if merge_compiler_regions:
        mod = transform.MergeCompilerRegions()(mod)

    mod = transform.PartitionGraph("mod_name")(mod)
    mod = transform.InferType()(mod)

    x_data = [("x", np.random.rand(10, 10).astype("float32"))]
    w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)]

    map_inputs = OrderedDict(x_data + w_data)
    output_list = generate_ref_data(mod, map_inputs)

    compiled_test_mods = compile_models(
        AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Exemple #14
0
def partition_for_tensorrt(
    mod: tvm.IRModule,
    params: Optional[Dict[str, tvm.nd.NDArray]] = None,
    # CAUTION: Can't use default Target("tensorrt") here since the target kind is only available
    #          if is_tensorrt_compiler_enabled() == True.
    target: Optional[tvm.target.Target] = None,
) -> tvm.IRModule:
    """Partition all functions in mod to greedily offload supported operators to TensorRT.

    Parameters
    ----------
    mod : tvm.IRModule
        The module to partition.
    target : tvm.target.Target
        A target of kind "tensorrt" describing additional partitioning and compilation options.
    params : Optional[Dict[str, tvm.nd.NDArray]]
        Constant input parameters.

    Returns
    -------
    partitioned_mod : tvm.IRModule
        The partitioned module.

    """
    assert is_tensorrt_compiler_enabled(
    ), "Can only partition for TensorRT if it is enabled"
    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)
    if target is None:
        # Use a default target. The get_tensorrt_target() function will similarly create an
        # equivalent default target when compilation continues after partitioning.
        target = tvm.target.Target("tensorrt")

    seq = tvm.transform.Sequential([
        transform.InferType(),
        RemoveDropoutPass(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout({
            "nn.conv1d": ["NCW", "default"],
            "nn.conv2d": ["NCHW", "default"],
            "nn.conv3d": ["NCDHW", "default"],
            "nn.conv2d_transpose": ["NCHW", "default"],
        }),
        transform.FoldConstant(),
        transform.MergeComposite(pattern_table()),
        transform.AnnotateTarget("tensorrt"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.InferType(),
    ])
    with target:
        mod = seq(mod)
        mod = prune_tensorrt_subgraphs(mod)
    return mod
Exemple #15
0
def test_partial_constant():
    """Test the subgraph with (const, var, const, var) arguments."""
    if not tvm.get_global_func("runtime.DNNLJSONRuntimeCreate", True):
        print("skip because DNNL codegen is not available")
        return

    dtype = "float32"
    ishape = (10, 10)

    in_1 = relay.var("in_1", shape=ishape, dtype=dtype)
    in_2 = relay.var("in_2", shape=ishape, dtype=dtype)
    in_3 = relay.var("in_3", shape=ishape, dtype=dtype)
    in_4 = relay.var("in_4", shape=ishape, dtype=dtype)

    add1 = relay.add(in_1, in_2)
    add2 = relay.add(add1, in_3)
    add3 = relay.add(add2, in_3)
    add4 = relay.add(add3, in_3)

    func = relay.Function([in_1, in_2, in_3, in_4], add4)
    ref_mod = tvm.IRModule.from_expr(func)
    ref_mod = relay.transform.InferType()(ref_mod)

    data1 = np.random.uniform(0, 1, ishape).astype(dtype)
    data3 = np.random.uniform(0, 1, ishape).astype(dtype)

    params = {
        "in_1": tvm.nd.array(data1, device=tvm.cpu(0)),
        "in_3": tvm.nd.array(data3, device=tvm.cpu(0)),
    }
    ref_mod["main"] = bind_params_by_name(ref_mod["main"], params)

    opt_pass = tvm.transform.Sequential([
        transform.InferType(),
        transform.SimplifyInference(),
        transform.FoldConstant(),
        transform.FoldScaleAxis(),
        transform.AnnotateTarget("dnnl"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
    ])

    with tvm.transform.PassContext(opt_level=3,
                                   disabled_pass=["AlterOpLayout"]):
        mod = opt_pass(ref_mod)

    data2 = np.random.uniform(0, 1, ishape).astype(dtype)
    data4 = np.random.uniform(0, 1, ishape).astype(dtype)
    check_result(mod,
                 ref_mod, {
                     "in_2": data2,
                     "in_4": data4
                 }, (10, 10),
                 tol=1e-5)
Exemple #16
0
def test_byoc_microtvm(merge_compiler_regions):
    """
    This is a simple test to check BYOC capabilities of AOT
    with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036
    """
    use_unpacked_api = False
    interface_api = "packed"
    test_runner = AOT_DEFAULT_RUNNER

    input_x = relay.var("x", shape=(10, 10))
    input_w0 = relay.var("w0", shape=(10, 10))
    input_w1 = relay.var("w1", shape=(10, 10))

    # z0 = x + w0
    marked_input_x = compiler_begin(input_x, "ccompiler")
    marked_input_w0 = compiler_begin(input_w0, "ccompiler")
    add_x_and_w0 = relay.add(marked_input_x, marked_input_w0)
    end_inner_add = compiler_end(add_x_and_w0, "ccompiler")

    # z1 = z0 + w1
    marked_inner_add = compiler_begin(end_inner_add, "ccompiler")
    marked_w1 = compiler_begin(input_w1, "ccompiler")
    add_nested_and_w1 = relay.add(marked_inner_add, marked_w1)
    end_outer_add = compiler_end(add_nested_and_w1, "ccompiler")

    # z2 = z0 + z1
    final_add = relay.add(end_inner_add, end_outer_add)

    relay_func = relay.Function([input_x, input_w0, input_w1], final_add)
    mod = tvm.IRModule()
    mod["main"] = relay_func

    if merge_compiler_regions:
        mod = transform.MergeCompilerRegions()(mod)

    mod = transform.PartitionGraph("mod_name")(mod)
    mod = transform.InferType()(mod)

    x_data = [("x", np.random.rand(10, 10).astype("float32"))]
    w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32"))
              for i in range(2)]

    map_inputs = OrderedDict(x_data + w_data)
    output_list = generate_ref_data(mod, map_inputs)
    compile_and_run(
        AOTTestModel(name="my_mod",
                     module=mod,
                     inputs=map_inputs,
                     outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Exemple #17
0
def test_byoc_microtvm(merge_compiler_regions):
    """This is a simple test to check BYOC capabilities of AOT - with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036"""
    use_unpacked_api = False
    interface_api = "packed"
    test_runner = AOT_DEFAULT_RUNNER

    x = relay.var("x", shape=(10, 10))
    w0 = relay.var("w0", shape=(10, 10))
    w1 = relay.var("w1", shape=(10, 10))

    # z0 = x + w0
    x_ = compiler_begin(x, "ccompiler")
    w0_ = compiler_begin(w0, "ccompiler")
    z0_ = relay.add(x_, w0_)
    z0 = compiler_end(z0_, "ccompiler")

    # z1 = z0 + w1
    z0__ = compiler_begin(z0, "ccompiler")
    w1_ = compiler_begin(w1, "ccompiler")
    z1_ = relay.add(z0__, w1_)
    z1 = compiler_end(z1_, "ccompiler")

    # z2 = z0 + z1
    z2 = relay.add(z0, z1)

    f = relay.Function([x, w0, w1], z2)
    mod = tvm.IRModule()
    mod["main"] = f

    if merge_compiler_regions:
        mod = transform.MergeCompilerRegions()(mod)

    mod = transform.PartitionGraph("mod_name")(mod)
    mod = transform.InferType()(mod)

    x_data = [("x", np.random.rand(10, 10).astype("float32"))]
    w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32"))
              for i in range(2)]

    map_inputs = OrderedDict(x_data + w_data)
    output_list = generate_ref_data(mod, map_inputs)
    compile_and_run(
        AOTTestModel(name="my_mod",
                     module=mod,
                     inputs=map_inputs,
                     outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Exemple #18
0
def partition_for_vitis_ai(mod, params=None, dpu=None, **opts):
    """Partition the Relay expression for offloading operators to Vitis AI DPU

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.
    dpu : str
        The DPU identifier (e.g. DPUCZDX8G-zcu104, DPUCADF8H)

    Returns
    -------
    ret : Module
    """

    if dpu is None:
        raise ValueError(
            "Please pass Vitis AI DPU identifier to the partitioning function")

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)

    desired_layouts_in_partition = {
        "nn.conv2d": ["NHWC", "default"],
        "nn.upsampling": ["NHWC"],
        "image.resize2d": ["NHWC"],
    }
    desired_layouts_in_main = {
        "nn.conv2d": ["NCHW", "default"],
        "nn.upsampling": ["NCHW"],
        "image.resize2d": ["NCHW"],
    }
    seq = tvm.transform.Sequential([
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout(desired_layouts_in_partition),
        transform.FoldConstant(),
        transform.InferType(),
        VitisAIAnnotationPass("vitis_ai", dpu, params),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout(desired_layouts_in_main),
        transform.FoldConstant(),
    ])

    with tvm.transform.PassContext(opt_level=3):
        return seq(mod)
Exemple #19
0
def test_mobilenet_dnnl():
    # if not tvm.get_global_func("relay.ext.dnnl", True):
    #     print("skip because DNNL codegen is not available")
    #     return

    dtype = "float32"
    ishape = (1, 3, 224, 224)
    mod, params = relay.testing.mobilenet.get_workload(batch_size=1,
                                                       dtype="float32")

    mod = transform.AnnotateTarget(["dnnl"])(mod)
    mod = transform.MergeCompilerRegions()(mod)
    mod = transform.PartitionGraph()(mod)

    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
Exemple #20
0
    def get_partitoned_mod(mod):
        remove_bn_pass = tvm.transform.Sequential([
            transform.InferType(),
            transform.SimplifyInference(),
            transform.FoldConstant(),
            transform.FoldScaleAxis(),
        ])
        byoc_pass = tvm.transform.Sequential([
            remove_bn_pass,
            transform.AnnotateTarget("dnnl"),
            transform.MergeCompilerRegions(),
            transform.PartitionGraph(),
        ])

        with tvm.transform.PassContext(opt_level=3,
                                       disabled_pass=["AlterOpLayout"]):
            return byoc_pass(mod)
Exemple #21
0
def build_module(
    mod,
    target,
    dpu_target="DPUCADX8G",
    params=None,
    enable_vitis_ai=True,
    #enable_vitis_ai=False,
    tvm_ops=0,
    vitis_ai_partitions=1,
):
    """Build module for Vitis-AI codegen."""
    if isinstance(mod, tvm.relay.expr.Call):
        mod = tvm.IRModule.from_expr(mod)
    if params is None:
        params = {}

    mod = relay.transform.InferType()(mod)
    temp = utils.tempdir()
    print(temp)
    export_rt_mod_file = temp.relpath("vitis_ai.rtmod")
    with tvm.transform.PassContext(
        opt_level=3, config={"relay.ext.vitis_ai.options.target": dpu_target,
                             'relay.ext.vitis_ai.options.export_runtime_module': export_rt_mod_file}
    ):
        if enable_vitis_ai:
            mod["main"] = bind_params_by_name(mod["main"], params)
            mod = annotation(mod, params, dpu_target)
            mod = transform.MergeCompilerRegions()(mod)
            mod = transform.PartitionGraph()(mod)
            tvm_op_count = get_cpu_op_count(mod)
            assert tvm_op_count == tvm_ops, "Got {} TVM operators, expected {}".format(
                tvm_op_count, tvm_ops
            )
            partition_count = 0
            for global_var in mod.get_global_vars():
                if "vitis_ai" in global_var.name_hint:
                    partition_count += 1

            assert (
                vitis_ai_partitions == partition_count
            ), "Got {} Vitis-AI partitions, expected {}".format(
                partition_count, vitis_ai_partitions
            )
        relay.backend.compile_engine.get().clear()
        return relay.build(mod, target, params=params)
Exemple #22
0
def partition_for_ethosn(mod, params=None, **opts):
    """Partition the graph greedily offloading supported
    operators to Arm Ethos-N NPU.

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.

    Returns
    -------
    ret : annotated and partitioned module.
    """
    opts = opts or {}
    if "variant" not in opts:
        raise ValueError(
            "Please specify a variant in the target string, e.g. -variant=n78."
        )

    # -variant=ethos-n78 deprecated in favour of -variant=n78
    if opts["variant"].lower() == "ethos-n78":
        warnings.warn(
            "Please use '-variant=n78' instead of the deprecated "
            "'-variant=ethos-n78', which will be removed in TVM v0.9.",
            DeprecationWarning,
        )
    elif opts["variant"] != "n78":
        raise ValueError(
            "When targeting Ethos(TM)-N78, -variant=n78 should be set.")

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)

    seq = tvm.transform.Sequential([
        transform.InferType(),
        transform.MergeComposite(pattern_table()),
        transform.AnnotateTarget("ethos-n"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
    ])

    return seq(mod)
Exemple #23
0
def test_mobilenet_dnnl():
    if not tvm.get_global_func("relay.ext.dnnl", True):
        print("skip because DNNL codegen is not available")
        return

    dtype = "float32"
    ishape = (1, 3, 224, 224)
    mod, params = relay.testing.mobilenet.get_workload(batch_size=1,
                                                       dtype="float32")

    mod = transform.AnnotateTarget(["dnnl"])(mod)
    mod = transform.MergeCompilerRegions()(mod)
    mod = transform.PartitionGraph()(mod)

    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
    data = get_calibration_data(mod, {"data": i_data, **params})

    # Check the number and orders
    check_data_size(mod, data)
Exemple #24
0
    def partition(dpu_target):
        data = relay.var("data", relay.TensorType((1, 3, 224, 224), "float32"))
        weight = relay.var("weight", relay.TensorType((16, 3, 3, 3),
                                                      "float32"))
        bn_gamma = relay.var("bn_gamma", relay.TensorType((16, ), "float32"))
        bn_beta = relay.var("bn_beta", relay.TensorType((16, ), "float32"))
        bn_mmean = relay.var("bn_mean", relay.TensorType((16, ), "float32"))
        bn_mvar = relay.var("bn_var", relay.TensorType((16, ), "float32"))

        conv = relay.nn.conv2d(data=data,
                               weight=weight,
                               kernel_size=(3, 3),
                               channels=16,
                               padding=(1, 1))
        bn_output = relay.nn.batch_norm(conv, bn_gamma, bn_beta, bn_mmean,
                                        bn_mvar)

        func = relay.Function(
            [data, weight, bn_gamma, bn_beta, bn_mmean, bn_mvar],
            bn_output.astuple())
        mod = tvm.IRModule()
        mod["main"] = func
        params = {}
        params["weight"] = np.random.rand(16, 3, 3, 3).astype("float32")
        params["bn_gamma"] = np.random.rand(16).astype("float32")
        params["bn_beta"] = np.random.rand(16).astype("float32")
        params["bn_mean"] = np.random.rand(16).astype("float32")
        params["bn_var"] = np.random.rand(16).astype("float32")
        mod = annotation(mod, params, dpu_target)

        opt_pass = tvm.transform.Sequential([
            transform.MergeCompilerRegions(),
            transform.PartitionGraph(),
        ])

        with tvm.transform.PassContext(opt_level=3):
            mod = opt_pass(mod)

        return mod
Exemple #25
0
def partition_for_ethosn77(mod, params=None, **opts):
    """Partition the graph greedily offloading supported
    operators to Arm Ethos-N NPU.

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.

    Returns
    -------
    ret : annotated and partitioned module.
    """
    if opts:
        tops = opts.get("tops", None)
        ple_ratio = opts.get("ple_ratio", None)
        sram_size = opts.get("sram_size", None)
        if tops or ple_ratio or sram_size:
            raise ValueError(
                "Setting tops, ple_ratio or sram_size has no effect when targeting Ethos(TM)-N77"
            )

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)

    seq = tvm.transform.Sequential(
        [
            transform.InferType(),
            transform.MergeComposite(pattern_table()),
            transform.AnnotateTarget("ethos-n"),
            transform.MergeCompilerRegions(),
            transform.PartitionGraph(),
        ]
    )

    return seq(mod)
def test_flatten_tuple_output():
    target = "test_flatten_tuple_output"

    @reg.register("split", "target." + target)
    def split(attrs, args):  # pylint: disable=unused-variable
        return True

    @reg.register("abs", "target." + target)
    def abs(attrs, args):  # pylint: disable=unused-variable
        return True

    def create_graph():
        a = relay.var('a', shape=(10, 10), dtype="uint8")

        a_split = relay.split(a, 2)
        a_split_0 = relay.TupleGetItem(a_split.astuple(), 0)
        a_split_0_abs = relay.abs(a_split_0)

        a_con = relay.concatenate(a_split, 0)
        a_split_0_relu = relay.nn.relu(a_split_0_abs)

        out = relay.Tuple((a_con, a_split_0_relu))
        f = relay.Function([a], out)
        mod = tvm.IRModule.from_expr(f)
        return mod

    def expected():
        mod = tvm.IRModule()

        # function 0
        f0_i0 = relay.var(target + "_0_i0", shape=(10, 10), dtype="uint8")
        a_split = relay.split(f0_i0, 2)
        a_split_0 = relay.TupleGetItem(a_split.astuple(), 0)
        a_split_1 = relay.TupleGetItem(a_split.astuple(), 1)
        a_split_abs_in = relay.TupleGetItem(a_split.astuple(), 0)
        abs = relay.abs(a_split_abs_in)
        tuple_out = relay.Tuple((a_split_0, a_split_1, abs))
        func0 = relay.Function([f0_i0], tuple_out)

        func0 = func0.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
        func0 = func0.with_attr("Inline", tvm.tir.IntImm("int32", 1))
        func0 = func0.with_attr("Compiler", target)
        func0 = func0.with_attr("global_symbol", target + "_0")
        gv0 = relay.GlobalVar(target + "_0")
        mod[gv0] = func0

        #body
        data = relay.var('a', shape=(10, 10), dtype="uint8")
        f_out = gv0(data)
        f_out_0 = relay.TupleGetItem(f_out, 0)
        f_out_1 = relay.TupleGetItem(f_out, 1)
        tuple = relay.Tuple((f_out_0, f_out_1))
        concat = relay.concatenate(tuple, 0)
        f_out_2 = relay.TupleGetItem(f_out, 2)
        relu = relay.nn.relu(f_out_2)
        ret_tuple = relay.Tuple((concat, relu))
        mod["main"] = relay.Function([data], ret_tuple)
        return mod

    seq = tvm.transform.Sequential([
        transform.AnnotateTarget(target),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
    ])

    partitioned = seq(create_graph())
    assert tvm.ir.structural_equal(partitioned, expected(), map_free_vars=True)
def test_duplicate_merge_and_tuplegetitem():
    target = "test_duplicate_merge_and_tuplegetitem"

    @reg.register("nn.batch_norm", "target." + target)
    def batch_norm(attrs, args):  # pylint: disable=unused-variable
        return True

    @reg.register("nn.relu", "target." + target)
    def relu(attrs, args):  # pylint: disable=unused-variable
        return True

    def create_graph():
        data = relay.var('data', shape=(10, 10))
        bn_gamma = relay.var("bn_gamma")
        bn_beta = relay.var("bn_beta")
        bn_mmean = relay.var("bn_mean")
        bn_mvar = relay.var("bn_var")
        x = relay.nn.batch_norm(data, bn_gamma, bn_beta, bn_mmean, bn_mvar)
        out_1 = relay.nn.relu(x[0])
        bn_out_1 = x[1]
        out_2 = relay.tanh(bn_out_1)
        out_3 = relay.log(bn_out_1)
        out = relay.Tuple([out_1, out_2, out_3])
        func = relay.Function([data, bn_gamma, bn_beta, bn_mmean, bn_mvar],
                              out)
        return func

    def expected():
        mod = tvm.IRModule()

        # function 0
        f0_i0 = relay.var(target + "_0_i0", shape=(10, 10))
        f0_i1 = relay.var(target + "_0_i1")
        f0_i2 = relay.var(target + "_0_i2")
        f0_i3 = relay.var(target + "_0_i3")
        f0_i4 = relay.var(target + "_0_i4")
        f0_n0 = relay.nn.batch_norm(f0_i0, f0_i1, f0_i2, f0_i3, f0_i4)
        f0_n1 = f0_n0[1]
        f0_n2 = relay.nn.relu(f0_n0[0])
        f0_o0 = relay.Tuple([f0_n2, f0_n1])
        func0 = relay.Function([f0_i0, f0_i1, f0_i2, f0_i3, f0_i4], f0_o0)

        func0 = func0.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
        func0 = func0.with_attr("Inline", tvm.tir.IntImm("int32", 1))
        func0 = func0.with_attr("Compiler", target)
        func0 = func0.with_attr("global_symbol", target + "_0")
        gv0 = relay.GlobalVar(target + "_0")
        mod[gv0] = func0

        # body
        data = relay.var('data', shape=(10, 10))
        bn_gamma = relay.var("bn_gamma")
        bn_beta = relay.var("bn_beta")
        bn_mmean = relay.var("bn_mean")
        bn_mvar = relay.var("bn_var")
        function_out = gv0(data, bn_gamma, bn_beta, bn_mmean, bn_mvar)
        get_out0 = relay.TupleGetItem(function_out, 0)
        get_out1 = relay.TupleGetItem(function_out, 1)
        out_2 = relay.tanh(get_out1)
        out_3 = relay.log(get_out1)
        out = relay.Tuple([get_out0, out_2, out_3])
        func = relay.Function([data, bn_gamma, bn_beta, bn_mmean, bn_mvar],
                              out)
        mod["main"] = func
        return mod

    mod = tvm.IRModule()
    mod["main"] = create_graph()

    seq = tvm.transform.Sequential([
        transform.AnnotateTarget(target),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
    ])

    ref_mod = expected()
    partitioned = seq(mod)
    assert tvm.ir.structural_equal(partitioned, ref_mod, map_free_vars=True)
Exemple #28
0
def partition_for_dnnl(mod, params=None, alter_layout=True):
    """Partition the graph greedily offloading supported operators to DNNL.

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.
    Returns
    -------
    mod : Module
        Annotated and partitioned module.
    """
    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)

    with TempOpAttr("nn.conv2d", "FTVMLegalize", dnnl.legalize_group_conv):
        with TempOpAttr("nn.conv2d_transpose", "FTVMLegalize",
                        dnnl.legalize_group_conv):
            seq = tvm.transform.Sequential([
                transform.CanonicalizeOps(),
                transform.InferType(),
                transform.SimplifyInference(),
                transform.FoldConstant(),
                transform.FoldScaleAxis(),
                # fold consecutive add ops to simplify pattern `conv2d-bias_add-bn-relu`
                transform.SimplifyExpr(),
                transform.FoldConstant(),
                # alter group conv /conv_transpose layout to `GOIHW` / `GIOHW`
                transform.Legalize(),
                transform.FoldConstant(),
            ])
            with tvm.transform.PassContext(opt_level=3):
                mod = seq(mod)
    if alter_layout:
        with TempOpAttr("nn.conv1d", "FTVMAlterOpLayout", dnnl.alter_conv):
            with TempOpAttr("nn.conv2d", "FTVMAlterOpLayout", dnnl.alter_conv):
                with TempOpAttr("nn.conv3d", "FTVMAlterOpLayout",
                                dnnl.alter_conv):
                    with TempOpAttr("nn.conv2d_transpose", "FTVMAlterOpLayout",
                                    dnnl.alter_conv_transpose):
                        with TempOpAttr("nn.conv3d_transpose",
                                        "FTVMAlterOpLayout",
                                        dnnl.alter_conv_transpose):
                            alter_layout_seq = tvm.transform.Sequential([
                                transform.AlterOpLayout(),
                                transform.FoldConstant(),
                            ])
                            with tvm.transform.PassContext(opt_level=3):
                                mod = alter_layout_seq(mod)

    byoc_seq = tvm.transform.Sequential([
        transform.MergeComposite(dnnl.pattern_table()),
        transform.AnnotateTarget("dnnl"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
    ])
    with tvm.transform.PassContext(opt_level=3):
        mod = byoc_seq(mod)
    return mod
Exemple #29
0
def partition_for_tensorrt(
    mod,
    params=None,
    version=None,
    use_implicit_batch=True,
    remove_no_mac_subgraphs=False,
    max_workspace_size=1 << 30,
):
    """Partition the graph greedily offloading supported operators to TensorRT.

    Parameters
    ----------
    mod : Module
        The module to run passes on.
    params : Optional[Dict[str, NDArray]]
        Constant input parameters.
    version : Optional[Tuple[int, int, int]]
        TensorRT version to target as tuple of (major, minor, patch). If TVM is compiled with
        USE_TENSORRT_RUNTIME=ON, the linked TensorRT version will be used instead.
    use_implicit_batch : Optional[bool]
        Use TensorRT implicit batch mode (default true). Setting to false will enable explicit batch
        mode which will widen supported operators to include those which modify the batch dimension,
        but may reduce performance for some models.
    remove_no_mac_subgraphs : Optional[bool]
        Removes subgraphs which have been partitioned for TensorRT if they do not have any
        multiply-accumulate operations. The removed subgraphs will go through TVM's standard
        compilation instead. Can improve performance.
    max_workspace_size : Optional[int]
        How many bytes of workspace size to allow each subgraph to use for TensorRT engine creation.
        See TensorRT documentation for more info.
    Returns
    -------
    mod_and_config : Tuple[Module, Dict[str, Any]]
        A tuple of 1) annotated and partitioned module and 2) "relay.ext.tensorrt.options"
        configuration which should be given to PassContext when building.
    """
    config = {
        "use_implicit_batch": use_implicit_batch,
        "max_workspace_size": max_workspace_size,
        "remove_no_mac_subgraphs": remove_no_mac_subgraphs,
    }
    if version:
        assert isinstance(version, tuple) and len(version) == 3
        config["tensorrt_version"] = version
    else:
        linked_version = tuple(
            tvm.get_global_func("relay.op.get_tensorrt_version")())
        if not linked_version:
            logger.warning(
                "TVM was not built against TensorRT and no version was provided to "
                "partition_for_tensorrt. Defaulting to 6.0.1")
            linked_version = (6, 0, 1)
        config["tensorrt_version"] = linked_version

    if params:
        mod["main"] = bind_params_by_name(mod["main"], params)
    seq = tvm.transform.Sequential([
        transform.InferType(),
        RemoveDropoutPass(),
        transform.RemoveUnusedFunctions(),
        transform.ConvertLayout({
            "nn.conv2d": ["NCHW", "default"],
            "nn.conv3d": ["NCDHW", "default"],
            "nn.conv2d_transpose": ["NCHW", "default"],
        }),
        transform.FoldConstant(),
        transform.AnnotateTarget("tensorrt"),
        transform.MergeCompilerRegions(),
        transform.PartitionGraph(),
        transform.InferType(),
    ])
    with tvm.transform.PassContext(
            opt_level=3, config={"relay.ext.tensorrt.options": config}):
        mod = seq(mod)
        mod = prune_tensorrt_subgraphs(mod)
    return mod, config
Exemple #30
0
def test_byoc_microtvm_multiple_subgraphs(merge_compiler_regions):
    """This is a test case to check BYOC capabilities of AOT with multiple sub graphs"""
    use_unpacked_api = False
    interface_api = "packed"
    test_runner = AOT_DEFAULT_RUNNER

    input_x = relay.var("x", shape=(10, 10))
    input_w0 = relay.var("w0", shape=(10, 10))
    input_w1 = relay.var("w1", shape=(10, 10))
    input_w2 = relay.var("w2", shape=(10, 10))
    input_w3 = relay.var("w3", shape=(10, 10))
    input_w4 = relay.var("w4", shape=(10, 10))
    input_w5 = relay.var("w5", shape=(10, 10))
    input_w6 = relay.var("w6", shape=(10, 10))
    input_w7 = relay.var("w7", shape=(10, 10))

    # C compiler
    ccompiler_add_1 = relay.add(input_x, input_w0)
    ccompiler_sub_1 = relay.subtract(ccompiler_add_1, input_w1)
    ccompiler_mul_1 = relay.multiply(ccompiler_sub_1, input_w2)

    ccompiler_add_2 = relay.add(input_x, input_w3)
    ccompiler_sub_2 = relay.subtract(ccompiler_add_2, input_w4)
    ccompiler_mul_2 = relay.multiply(ccompiler_sub_2, input_w5)

    # Other parts on TVM
    tvm_add = relay.add(input_x, input_w6)
    tvm_sub = relay.subtract(tvm_add, input_w7)

    concat_outputs = relay.concatenate(
        (ccompiler_mul_1, ccompiler_mul_2, tvm_sub), axis=0)
    relay_func = relay.Function(
        [
            input_x, input_w0, input_w1, input_w2, input_w3, input_w4,
            input_w5, input_w6, input_w7
        ],
        concat_outputs,
    )
    mod = tvm.IRModule()
    ann = byoc.CcompilerAnnotator()
    mod["main"] = ann.visit(relay_func)

    if merge_compiler_regions:
        mod = transform.MergeCompilerRegions()(mod)

    mod = tvm.relay.transform.PartitionGraph("mod_name")(mod)
    mod = tvm.relay.transform.InferType()(mod)

    x_data = np.random.rand(10, 10).astype("float32")
    w_data = []
    for _ in range(8):
        w_data.append(np.random.rand(10, 10).astype("float32"))

    map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i])
                                                for i in range(8)])
    output_list = generate_ref_data(mod, map_inputs)
    input_list = [map_inputs["x"]]
    input_list.extend([map_inputs["w{}".format(i)] for i in range(8)])
    compile_and_run(
        AOTTestModel(name="my_mod",
                     module=mod,
                     inputs=map_inputs,
                     outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )