def test_tflite_model_u2_usecase_two_models_with_a_single_external_pool(model_urls, usmp_algo):
    """This checks for inference using a single large enough common pool"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools(
        [PoolInfo("my_memory_pool", {target: PoolInfo.READ_WRITE_ACCESS})]
    )
    test_runner = AOTTestRunner(
        pass_config={"tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo},
        prologue=f"""
        #define MAX(A, B) ((A > B) ? A : B)
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool[MAX({_get_workspace_size_define_macro("my_memory_pool", "mod1")},{_get_workspace_size_define_macro("my_memory_pool", "mod2")})];
        """,
    )

    tflite_model_file1 = tf_testing.get_workload_official(
        model_urls[0][0],
        model_urls[0][1],
    )
    mod1, inputs1, params1 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file1)
    output_list1 = generate_ref_data(mod1, inputs1, params1)

    tflite_model_file2 = tf_testing.get_workload_official(
        model_urls[1][0],
        model_urls[1][1],
    )
    mod2, inputs2, params2 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file2)
    output_list2 = generate_ref_data(mod2, inputs2, params2)

    compiled_test_mods = compile_models(
        [
            AOTTestModel(
                name="mod1", module=mod1, inputs=inputs1, outputs=output_list1, params=params1
            ),
            AOTTestModel(
                name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2
            ),
        ],
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
def _get_ethosu_workspace_size(mod, params, accel_type, pool_size,
                               enable_cascader, enable_striping):

    target, ethosu_target, runtime, executor, pass_config = _get_compilation_config(
        accel_type, enable_cascader, enable_striping)

    workspace_memory_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo(
            "SRAM",
            [target, ethosu_target],
            PoolInfoProperties(
                size_hint_bytes=pool_size,
                read_bandwidth_bytes_per_cycle=16,
                write_bandwidth_bytes_per_cycle=16,
                target_burst_bytes={ethosu_target: 1},
            ),
        ),
    ])

    with tvm.transform.PassContext(opt_level=3, config=pass_config):
        lib = tvm.relay.build(
            mod,
            target,
            executor=executor,
            runtime=runtime,
            workspace_memory_pools=workspace_memory_pools,
            params=params,
        )

    mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata)
    return mlf_memory_map["main"][0]["workspace_size_bytes"]
Exemple #3
0
def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo):
    """This checks for inference using two external pools placed in the application"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools([
        PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS},
                 size_hint_bytes=2500000),
        PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}),
    ])
    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}];
        """,
    )

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = _get_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Exemple #4
0
def compare_ethosu_with_reference(
    mod,
    input_data,
    output_data,
    accel_type: str,
    output_tolerance=0,
    print_cmm=False,
    enable_cascader=None,
):
    if enable_cascader is None:
        enable_cascader = "u65" not in accel_type
    pool_name = "my_memory_pool"
    host_target = tvm.target.Target("c")
    ethosu_target = tvm.target.Target("ethos-u")
    workspace_pools = WorkspaceMemoryPools([
        PoolInfo(
            pool_name,
            {
                host_target: PoolInfo.READ_WRITE_ACCESS,
                ethosu_target: PoolInfo.READ_WRITE_ACCESS,
            },
            size_hint_bytes=2400000,
            read_bandwidth_bytes_per_cycle=16,
            write_bandwidth_bytes_per_cycle=16,
            target_burst_bytes={ethosu_target: 1},
        )
    ])
    test_runner = create_test_runner(
        accel_type,
        enable_usmp=True,
        enable_cascader=enable_cascader,
        enable_striping=False,
        workspace_pools=workspace_pools,
    )
    compiled_models = build_source(
        mod,
        input_data,
        output_data,
        test_runner,
        workspace_pools=workspace_pools,
        output_tolerance=output_tolerance,
    )

    # Assumes only two runtime.Modules are created -- i.e. single offload module
    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[
        0].imported_modules[0]

    # Verify generated C source
    if print_cmm:
        get_artifacts = tvm._ffi.get_global_func(
            "runtime.module.ethos-u.get_artifacts")
        compilation_artifacts = get_artifacts(ethosu_module)
        cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
        print_payload(cmms)

    verify_source(compiled_models, test_runner)
def test_multiple_memory_pools():
    """
    The cascader does not support multiple workspace memory
    pools. Check the correct error is thrown.
    """
    np.random.seed(2)
    ifm_shape = (1, 80, 75, 3)

    target, ethosu_target, runtime, executor, pass_config = _get_compilation_config(
        "ethos-u55-256", True, True)
    workspace_memory_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo(
            "SRAM",
            [target, ethosu_target],
            PoolInfoProperties(
                size_hint_bytes=1,
                read_bandwidth_bytes_per_cycle=16,
                write_bandwidth_bytes_per_cycle=16,
                target_burst_bytes={ethosu_target: 1},
            ),
        ),
        WorkspacePoolInfo(
            "SRAM",
            [target, ethosu_target],
            PoolInfoProperties(
                size_hint_bytes=1,
                read_bandwidth_bytes_per_cycle=16,
                write_bandwidth_bytes_per_cycle=16,
                target_burst_bytes={ethosu_target: 1},
            ),
        ),
    ])

    @tf.function
    def tf_graph(x):
        return tf.nn.max_pool(x, (3, 3), (1, 1), "SAME")

    _, tflite_graph = infra.get_tflite_graph(tf_graph, [ifm_shape])
    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
    relay_module, params = relay.frontend.from_tflite(tflite_model)
    mod = partition_for_ethosu(relay_module, params)

    with pytest.raises(ValueError) as e:
        with tvm.transform.PassContext(opt_level=3, config=pass_config):
            tvm.relay.build(
                mod,
                target,
                executor=executor,
                runtime=runtime,
                workspace_memory_pools=workspace_memory_pools,
                params=params,
            )

    expected_reason = "Exactly one workspace pool needs to be provided for the U55 cascader"
    on_error = "A ValueError was caught but its reason is not the expected one."
    assert expected_reason in str(e.value), on_error
Exemple #6
0
def _get_ethosu_workspace_size(mod, params, accel_type, pool_size,
                               enable_cascader, enable_striping):
    enable_usmp = True

    target = tvm.target.Target("c")
    ethosu_target = tvm.target.Target("ethos-u")
    runtime = Runtime("crt")

    executor = Executor(
        "aot",
        {
            "workspace-byte-alignment": 16,
            "interface-api": "c",
            "unpacked-api": True,
        },
    )
    pass_config = {
        "tir.disable_vectorize": True,
        "relay.ext.ethos-u.options": {
            "accelerator_config": accel_type,
            "enable_cascader": enable_cascader,
            "enable_striping": enable_striping,
        },
        "tir.usmp.enable": enable_usmp,
        "tir.usmp.algorithm": "hill_climb",
        "tir.disable_storage_rewrite": enable_usmp,
    }

    workspace_memory_pools = WorkspaceMemoryPools([
        PoolInfo(
            "SRAM",
            {
                target: PoolInfo.READ_WRITE_ACCESS,
                ethosu_target: PoolInfo.READ_WRITE_ACCESS
            },
            size_hint_bytes=pool_size,
            read_bandwidth_bytes_per_cycle=16,
            write_bandwidth_bytes_per_cycle=16,
            target_burst_bytes={ethosu_target: 1},
        ),
    ])

    with tvm.transform.PassContext(opt_level=3, config=pass_config):
        lib = tvm.relay.build(
            mod,
            target,
            executor=executor,
            runtime=runtime,
            workspace_memory_pools=workspace_memory_pools,
            params=params,
        )

    mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata)
    return mlf_memory_map["main"][0]["workspace_size_bytes"]
def test_tflite_model_u3_usecase_single_external_pool(model_url, usmp_algo):
    """This checks for inference with USMP using external pool placed in the application"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel

    use_unpacked_api = True
    interface_api = "c"

    pool_name = "my_memory_pool"
    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools(
        [WorkspacePoolInfo(pool_name, [target])])
    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t {pool_name}[{_get_workspace_size_define_macro(pool_name)}];
        """,
    )

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Exemple #8
0
def test_networks_with_usmp_and_cascader_wo_striping(accel_type, model_url,
                                                     workspace_size):
    np.random.seed(23)

    pool_name = "my_memory_pool"
    host_target = tvm.target.Target("c")
    ethosu_target = tvm.target.Target("ethos-u")
    workspace_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo(
            pool_name,
            [host_target, ethosu_target],
            PoolInfoProperties(
                size_hint_bytes=2400000,
                read_bandwidth_bytes_per_cycle=16,
                write_bandwidth_bytes_per_cycle=16,
                target_burst_bytes={ethosu_target: 1},
            ),
        )
    ])
    tflite_model_buf = infra.get_tflite_model(model_url)
    input_data, output_data = infra.generate_ref_data_tflite(tflite_model_buf)
    mod, params = convert_to_relay(tflite_model_buf)
    mod = partition_for_ethosu(mod, params)
    test_runner = infra.create_test_runner(
        accel_type,
        enable_usmp=True,
        enable_cascader=True,
        enable_striping=False,
        workspace_pools=workspace_pools,
    )
    compiled_models = infra.build_source(mod,
                                         input_data,
                                         output_data,
                                         test_runner,
                                         workspace_pools=workspace_pools)
    infra.verify_source(compiled_models, test_runner)

    allocated_pool_info = list(
        dict(compiled_models[0].executor_factory.executor_codegen_metadata.
             pool_inputs).values())[0]
    assert allocated_pool_info.allocated_size == workspace_size
def test_tflite_model_u4_usecase_two_external_pools(model_url, usmp_algo):
    """This checks for inference with USMP using external pool placed in the application"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools([
        PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS},
                 size_hint_bytes=2500000),
        PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}),
    ])

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    input_name, input_data = list(inputs.items())[0]
    input_size_bytes = input_data.size * input_data.itemsize
    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo,
            "tir.usmp.use_workspace_io": True,
        },
        prologue=f"""
        #include <string.h>
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}];
        struct {_add_module_prefix("workspace_pools")} {_add_module_prefix("workspace_pools")} = {{
            .my_memory_pool_1 = my_memory_pool_1,
            .my_memory_pool_2 = my_memory_pool_2,
        }};
        struct {_add_module_prefix("inputs")} {_add_module_prefix("inputs")} = {_add_module_prefix("map_inputs")}(&{_add_module_prefix("workspace_pools")});
        memcpy({_add_module_prefix("inputs")}.{input_name}, tvmgen_default_input_data_input, {input_size_bytes});
        struct {_add_module_prefix("outputs")} {_add_module_prefix("outputs")} = {_add_module_prefix("map_outputs")}(&{_add_module_prefix("workspace_pools")});
        """,
    )

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
        use_workspace_io=True,
    )
Exemple #10
0
def test_tflite_model_u3_usecase_var_cons_ext_pools(model_url, usmp_algo):
    """This checks for inference using one external workspace and one external constant
    pools placed in the application"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_mem_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo("my_memory_pool_1", [target],
                          PoolInfoProperties(size_hint_bytes=8500000)),
    ])

    constant_mem_pools = ConstantMemoryPools([
        ConstantPoolInfo("my_const_pool_1", [target], []),
    ])

    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES)))
        static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }};
        """,
    )

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_mem_pools,
        constant_memory_pools=constant_mem_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Exemple #11
0
def test_tflite_model_u3_usecase_conv2d_var_cons(usmp_algo):
    """This checks for inference using workspace and constant pools placed in the application"""

    mod = tvm.parser.fromtext("""\
        #[version = "0.0.5"]
        def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) {
            %1 = nn.conv2d(
                 %data,
                 %weight,
                 padding=[2, 2],
                 channels=3,
                 kernel_size=[5, 5],
                 data_layout="NCHW",
                 kernel_layout="OIHW",
                 out_dtype="int32");
            %2 = cast(nn.max_pool2d(%1, pool_size=[3, 3]), dtype="int8");
            %3 = nn.conv2d(
                 %2,
                 %weight,
                 padding=[2, 2],
                 channels=3,
                 kernel_size=[5, 5],
                 data_layout="NCHW",
                 kernel_layout="OIHW",
                 out_dtype="int32");
            %4 = nn.max_pool2d(%3, pool_size=[3, 3]);
            %4
        }
    """)

    main_func = mod["main"]
    shape_dict = {
        p.name_hint: p.checked_type.concrete_shape
        for p in main_func.params
    }
    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}

    weight_data = np.random.randint(1, 255, shape_dict["weight"]).astype(
        type_dict["weight"])
    input_data = np.ones(shape_dict["data"]).astype(type_dict["data"])
    params = {"weight": weight_data}
    inputs = {"data": input_data}

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_mem_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo("my_memory_pool_1", [target],
                          PoolInfoProperties(size_hint_bytes=8500000)),
    ])

    constant_mem_pools = ConstantMemoryPools([
        ConstantPoolInfo("my_const_pool_1", [target], []),
    ])

    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES)))
        static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }};
        """,
    )

    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_mem_pools,
        constant_memory_pools=constant_mem_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )