Ejemplo n.º 1
0
def _compile(ext_func):
    """
    This is the main wrapper that accepts an external
    relay function and runs all the passes to lower it down
    to command stream
    Parameters
    ----------
    ext_func : tvm.relay.function.Function
        The partitioned relay function
    Returns
    -------
    cs : str
        An hex string of the bytes of command stream
    encoded_constants : str
        An hex string of the bytes that includes concat'd
        encoded weights, encoded biases and scales.
    scratch_size : int
        The size of the scratch buffer needed.
    """
    mod = tvm.IRModule()
    mod["main"] = ext_func
    mod = LegalizeEthosU()(mod)
    mod = relay.transform.InferType()(mod)
    # We are currently using copy_constants scheduler In the long run,
    # this should be a single intelligent and a composite scheduler
    # that can perform scheduling based on user inputs such as
    # scratch memory size.
    tir_mod, params = lower_to_tir(mod["main"], copy_constants())
    cmms, encoded_constants, scratch_size = tir_to_cs_translator.translate(
        tir_mod, params)
    return cmms, encoded_constants, scratch_size
Ejemplo n.º 2
0
def relay_to_tir(mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
    """
    This is the hook for python-based lowering of a Relay module which lowers NPU
    external functions to TIR.

    Parameters
    ----------
    mod : tvm.ir.IRModule
        This is the Relay module.

    Returns
    -------
    mod : tvm.ir.IRModule
        The Relay module with scheduled NPU external functions.
    """
    mod = OutlineCompilerFunctions("ethos-u")(mod)
    mod = LegalizeEthosU()(mod)
    mod = LUTsOptimizer()(mod)
    mod = relay.transform.InferType()(mod)
    mod = IdentityOptimizer()(mod)
    mod = LayoutOptimizer()(mod)
    mod = relay.transform.InferType()(mod)

    device_contexts = {
        gv: "ethos-u"
        for gv, _ in filter(lambda x: util.is_npu_func(x[1]),
                            mod.functions.items())
    }
    mod = mod.with_attr("device_contexts", device_contexts)

    # Use the cascader if it is enabled for the U55 accelerator, otherwise use copy_constants
    # scheduler
    if util.is_cascader_enabled():
        if util.get_accelerator_config() == "ethos-u65-256":
            raise ValueError(
                "Cascading is not supported for the U65 accelerator")

        workspace_memory_pools = mod.attrs["workspace_memory_pools"]

        if not workspace_memory_pools:
            raise ValueError(
                "Workspace memory pool needs to be provided for the U55 cascader"
            )
        if len(workspace_memory_pools.pools) != 1:
            raise ValueError(
                "Exactly one workspace pool needs to be provided for the U55 cascader"
            )

        memory_pressure = _calculate_memory_pressure(mod)
        sram = extract_memory_info(workspace_memory_pools.pools[0],
                                   memory_pressure)
        tir_mod = LowerToTIR(
            _ethos_u55_cascader(sram, util.is_striping_enabled()))(mod)
    else:
        tir_mod = LowerToTIR(copy_constants())(mod)

    return tir_mod
Ejemplo n.º 3
0
def test_schedule_diamond_graph():
    ifm_a = relay.var("IFM_A", shape=(1, 56, 56, 96), dtype="int8")
    conv_a = make_ethosu_conv2d(ifm_a, 96, 24, (1, 1), (0, 0), (1, 1), (1, 1))
    conv_b = make_ethosu_conv2d(conv_a, 24, 24, (1, 1), (0, 0), (1, 1), (1, 1))
    add = make_ethosu_binary_elementwise(conv_a, conv_b, 24, 24, "ADD", "int8")

    func = relay.Function(relay.analysis.free_vars(add), add)
    func = run_opt_pass(func, relay.transform.InferType())

    test_mod, _ = _lower_to_tir(func, copy_constants())
    reference_mod = DiamondGraphTir
    tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"],
                                   True)
Ejemplo n.º 4
0
def test_copy_luts():
    ifm_shape = (1, 33, 33, 11)
    ifm = relay.var("IFM", shape=ifm_shape, dtype="int8")
    lut = relay.const([i for i in range(256)], dtype="int8")
    conv = make_ethosu_conv2d(
        ifm, ifm_shape[3], 8, (3, 3), (0, 0), (1, 1), (1, 1), lut=lut, activation="TANH"
    )
    identity = make_ethosu_identity(conv, lut=lut, activation="TANH")
    func = relay.Function(relay.analysis.free_vars(identity), identity)
    func = run_opt_pass(func, relay.transform.InferType())

    func, const_dict = extract_constants(func)
    te_graph = lower_to_te(func)

    sch = te.create_schedule([te_graph.outputs[0].op])
    copy_constants()(te_graph, const_dict, sch)
    copy_luts()(te_graph, const_dict, sch)
    assert len(sch.stages) == 17
    assert ".global" in sch.stages[5].op.name
    assert ".global" in sch.stages[7].op.name
    assert ".local" in sch.stages[9].op.name
    assert ".local" in sch.stages[10].op.name
Ejemplo n.º 5
0
def test_constant_as_input():
    """Test to check that constants specified as inputs aren't
    interpreted as an encoded constant."""
    def get_graph():
        dtype = "uint8"
        ifm = relay.var("ifm", shape=(1, 16, 16, 32), dtype=dtype)
        conv1 = make_ethosu_conv2d(
            ifm,
            32,
            16,
            (1, 1),
            (0, 0),
            (1, 1),
            (1, 1),
        )
        scalar = relay.const(np.ones((1, 1, 1, 1), dtype=dtype), dtype=dtype)
        add1 = make_ethosu_binary_elementwise(conv1,
                                              scalar,
                                              ifm_channels=32,
                                              ifm2_channels=1,
                                              operator_type="ADD",
                                              ofm_dtype=dtype)
        func = relay.Function(relay.analysis.free_vars(add1), add1)
        func = run_opt_pass(func, relay.transform.InferType())
        return func

    tir_mod, params = _lower_to_tir(get_graph(), copy_constants())

    # Check tile address for the scalar constant input hasn't been
    # overwritten.
    extern_calls = tir_mod["main"].body.body.body.body.body
    binary_elementwise = extern_calls[-1].value
    args = binary_elementwise.args

    reason = "Tile address overwritten"
    assert args[26] == 0, reason
    assert args[27] == 0, reason
    assert args[28] == 0, reason

    # More generally, check compiles successfully to make sure
    # nothing else was overrwritten.
    # With Target Hooks the TIR module needs a target attached
    # and lowered via make unpacked API.
    tir_mod["main"] = tir_mod["main"].with_attr("target",
                                                tvm.target.Target("ethos-u"))
    tir_mod = tvm.tir.transform.MakeUnpackedAPI()(tir_mod)
    tir_to_cs_translator.translate(tir_mod, params)
Ejemplo n.º 6
0
def test_copy_constants():
    ifm_a = relay.var("IFM_A", shape=(1, 26, 26, 32), dtype="int8")
    conv_a = make_ethosu_conv2d(ifm_a, 32, 8, (3, 3), (0, 0), (1, 1), (1, 1))
    conv_b = make_ethosu_conv2d(conv_a, 8, 4, (1, 1), (0, 0), (1, 1), (1, 1))
    func = relay.Function(relay.analysis.free_vars(conv_b), conv_b)
    func = run_opt_pass(func, relay.transform.InferType())

    func, const_dict = extract_constants(func)
    cached_func = lower_to_te(func)

    sch = te.create_schedule([cached_func.outputs[0].op])
    planner = copy_constants()
    planner(cached_func, const_dict, sch)
    assert len(sch.stages) == 23
    assert ".global" in sch.stages[6].op.name
    assert ".global" in sch.stages[8].op.name
    assert ".global" in sch.stages[17].op.name
    assert ".global" in sch.stages[19].op.name
Ejemplo n.º 7
0
def relay_to_tir_func(ext_func: relay.Function) -> tvm.tir.PrimFunc:
    """
    This is the hook for python-based lowering of relay function
    that gets offloaded to the microNPU.

    Parameters
    ----------
    ext_func : relay.Function
        This is the partitioned relay function

    Returns
    -------
    primfunc : tir.PrimFunc
        This returns the scheduled PrimFunc
    """
    assert len(ext_func.params) == 1
    input_size = util.calculate_size_bytes(ext_func.params[0])
    output_size = util.calculate_size_bytes(ext_func.body)
    mod = tvm.IRModule()
    mod["main"] = ext_func
    mod = LegalizeEthosU()(mod)
    mod = LUTsOptimizer()(mod)
    mod = LayoutOptimizer()(mod)
    mod = relay.transform.InferType()(mod)
    # We are currently using copy_constants scheduler In the long run,
    # this should be a single intelligent and a composite scheduler
    # that can perform scheduling based on user inputs such as
    # scratch memory size.
    tir_mod, const_dict = lower_to_tir(mod["main"], copy_constants())

    for idx in const_dict.keys():
        const_dict[idx] = tvm.nd.array(const_dict[idx])

    primfunc = tir_mod["main"]
    primfunc = primfunc.with_attr("global_symbol",
                                  ext_func.attrs["global_symbol"])
    primfunc = primfunc.with_attr("ethos-u.constants", const_dict)
    primfunc = primfunc.with_attr("ethos-u.input_size", input_size)
    primfunc = primfunc.with_attr("ethos-u.output_size", output_size)
    return primfunc
Ejemplo n.º 8
0
def test_copy():
    def _get_func():
        data = relay.var("data", shape=(1, 16, 16, 32), dtype="int8")
        conv = make_ethosu_conv2d(
            data,
            32,
            8,
            (1, 1),
            (0, 0),
            (1, 1),
            (1, 1),
        )
        func = relay.Function(relay.analysis.free_vars(conv), conv)
        func = run_opt_pass(func, relay.transform.InferType())
        return func

    func = _get_func()
    mod, _ = lower_to_tir(func, cascader=copy_constants())

    script = mod.script(show_meta=True)
    test_mod = tvm.script.from_source(script)
    reference_mod = ReferenceModule
    tvm.ir.assert_structural_equal(test_mod["main"], reference_mod["main"], True)