Esempio n. 1
0
def _get_ethosu_workspace_size(mod, params, accel_type, pool_size,
                               enable_cascader, enable_striping):

    target, ethosu_target, runtime, executor, pass_config = _get_compilation_config(
        accel_type, enable_cascader, enable_striping)

    workspace_memory_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo(
            "SRAM",
            [target, ethosu_target],
            PoolInfoProperties(
                size_hint_bytes=pool_size,
                read_bandwidth_bytes_per_cycle=16,
                write_bandwidth_bytes_per_cycle=16,
                target_burst_bytes={ethosu_target: 1},
            ),
        ),
    ])

    with tvm.transform.PassContext(opt_level=3, config=pass_config):
        lib = tvm.relay.build(
            mod,
            target,
            executor=executor,
            runtime=runtime,
            workspace_memory_pools=workspace_memory_pools,
            params=params,
        )

    mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata)
    return mlf_memory_map["main"][0]["workspace_size_bytes"]
Esempio n. 2
0
def test_workspace_calculation(workspace_byte_alignment, main_workspace_size):
    mod, params = tvm.relay.testing.synthetic.get_workload()
    target = "c"
    runtime = Runtime("crt")
    executor = Executor(
        "aot",
        {
            "workspace-byte-alignment": workspace_byte_alignment,
        },
    )
    with tvm.transform.PassContext(
            opt_level=3,
            config={
                "tir.disable_vectorize": True,
            },
    ):
        lib = tvm.relay.build(mod,
                              target,
                              executor=executor,
                              runtime=runtime,
                              params=params)

    mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata)
    assert mlf_memory_map["main"][0][
        "workspace_size_bytes"] == main_workspace_size
Esempio n. 3
0
def test_tflite_model_u1_usecase(model_url, usmp_algo, workspace_size,
                                 constant_size):
    """
    This checks for ML models and the memory used by them
    when using USMP with different algorithms
    """
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOTTestRunner(pass_config={
        "tir.usmp.enable": True,
        "tir.usmp.algorithm": usmp_algo
    })

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    # Checking the workspace size reported in model library format
    mlf_memory_map = mlf._build_function_memory_map(
        compiled_test_mods[0].executor_factory.function_metadata)
    assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size
    assert mlf_memory_map["main"][0]["constants_size_bytes"] == constant_size
    # That should match to workspace size that will be codegen'd to the entry point.
    allocated_pool_info_size = sum([
        _.allocated_size for _ in list(
            dict(compiled_test_mods[0].executor_factory.
                 executor_codegen_metadata.pool_inputs).values())
    ])
    assert allocated_pool_info_size == workspace_size + constant_size

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Esempio n. 4
0
def _get_ethosu_workspace_size(mod, params, accel_type, pool_size,
                               enable_cascader, enable_striping):
    enable_usmp = True

    target = tvm.target.Target("c")
    ethosu_target = tvm.target.Target("ethos-u")
    runtime = Runtime("crt")

    executor = Executor(
        "aot",
        {
            "workspace-byte-alignment": 16,
            "interface-api": "c",
            "unpacked-api": True,
        },
    )
    pass_config = {
        "tir.disable_vectorize": True,
        "relay.ext.ethos-u.options": {
            "accelerator_config": accel_type,
            "enable_cascader": enable_cascader,
            "enable_striping": enable_striping,
        },
        "tir.usmp.enable": enable_usmp,
        "tir.usmp.algorithm": "hill_climb",
        "tir.disable_storage_rewrite": enable_usmp,
    }

    workspace_memory_pools = WorkspaceMemoryPools([
        PoolInfo(
            "SRAM",
            {
                target: PoolInfo.READ_WRITE_ACCESS,
                ethosu_target: PoolInfo.READ_WRITE_ACCESS
            },
            size_hint_bytes=pool_size,
            read_bandwidth_bytes_per_cycle=16,
            write_bandwidth_bytes_per_cycle=16,
            target_burst_bytes={ethosu_target: 1},
        ),
    ])

    with tvm.transform.PassContext(opt_level=3, config=pass_config):
        lib = tvm.relay.build(
            mod,
            target,
            executor=executor,
            runtime=runtime,
            workspace_memory_pools=workspace_memory_pools,
            params=params,
        )

    mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata)
    return mlf_memory_map["main"][0]["workspace_size_bytes"]
Esempio n. 5
0
def test_networks_without_usmp(accel_type, model_url, workspace_size):
    np.random.seed(23)
    tflite_model_buf = infra.get_tflite_model(model_url)
    input_data, output_data = infra.generate_ref_data_tflite(tflite_model_buf)
    mod, params = convert_to_relay(tflite_model_buf)
    mod = partition_for_ethosu(mod, params)
    test_runner = infra.create_test_runner(accel_type, enable_usmp=False)
    compiled_models = infra.build_source(mod, input_data, output_data,
                                         test_runner)
    mlf_memory_map = mlf._build_function_memory_map(
        compiled_models[0].executor_factory.function_metadata)
    assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size
    infra.verify_source(compiled_models, test_runner)
Esempio n. 6
0
def test_workspace_calculation_cmsis_nn():
    """This tests cmsis_nn codegen for workspace calculation.
    This is tested specially because cmsis-nn codegen creates
    multiple PrimFuncs per offloaded relay function in a non
    -hierarchical manner."""
    pytest.importorskip("tflite")

    # pylint: disable=import-outside-toplevel
    from tvm.relay.op.contrib import cmsisnn
    from tvm.contrib.download import download_testdata

    # pylint: enable=import-outside-toplevel

    target = "c"
    runtime = Runtime("crt")
    executor = Executor(
        "aot",
        {
            "workspace-byte-alignment": 16,
            "interface-api": "c",
            "unpacked-api": True,
        },
    )

    base_url = ("https://github.com/ARM-software/ML-zoo/raw/"
                "48a22ee22325d15d2371a6df24eb7d67e21dcc97"
                "/models/keyword_spotting/cnn_small/tflite_int8")
    file_to_download = "cnn_s_quantized.tflite"
    file_saved = "cnn_s_quantized_15Dec2021.tflite"
    model_file = download_testdata("{}/{}".format(base_url, file_to_download),
                                   file_saved)
    mod, _, params = create_relay_module_and_inputs_from_tflite_file(
        model_file)
    mod = cmsisnn.partition_for_cmsisnn(mod, params)
    with tvm.transform.PassContext(
            opt_level=3,
            config={
                "tir.disable_vectorize": True,
            },
    ):
        lib = tvm.relay.build(mod,
                              target,
                              executor=executor,
                              runtime=runtime,
                              params=params)
    mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata)
    assert mlf_memory_map["main"][0]["workspace_size_bytes"] == 14384
Esempio n. 7
0
def test_networks_without_usmp(accel_type, model_url, workspace_size,
                               tolerance):
    np.random.seed(23)
    tflite_model_file = tf_testing.get_workload_official(
        model_url[0], model_url[1])
    mod, input_data, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_data = generate_ref_data(mod, input_data, params)
    mod = partition_for_ethosu(mod, params)
    compiled_models = infra.build_source(mod,
                                         input_data,
                                         output_data,
                                         accel_type,
                                         output_tolerance=tolerance,
                                         enable_usmp=False)
    mlf_memory_map = mlf._build_function_memory_map(
        compiled_models[0].executor_factory.function_metadata)
    assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size
    infra.verify_source(compiled_models, accel_type, enable_usmp=False)