def test_tflite_model_u2_usecase_two_models_with_a_single_external_pool(model_urls, usmp_algo): """This checks for inference using a single large enough common pool""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools( [PoolInfo("my_memory_pool", {target: PoolInfo.READ_WRITE_ACCESS})] ) test_runner = AOTTestRunner( pass_config={"tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo}, prologue=f""" #define MAX(A, B) ((A > B) ? A : B) __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool[MAX({_get_workspace_size_define_macro("my_memory_pool", "mod1")},{_get_workspace_size_define_macro("my_memory_pool", "mod2")})]; """, ) tflite_model_file1 = tf_testing.get_workload_official( model_urls[0][0], model_urls[0][1], ) mod1, inputs1, params1 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file1) output_list1 = generate_ref_data(mod1, inputs1, params1) tflite_model_file2 = tf_testing.get_workload_official( model_urls[1][0], model_urls[1][1], ) mod2, inputs2, params2 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file2) output_list2 = generate_ref_data(mod2, inputs2, params2) compiled_test_mods = compile_models( [ AOTTestModel( name="mod1", module=mod1, inputs=inputs1, outputs=output_list1, params=params1 ), AOTTestModel( name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2 ), ], interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def _get_ethosu_workspace_size(mod, params, accel_type, pool_size, enable_cascader, enable_striping): target, ethosu_target, runtime, executor, pass_config = _get_compilation_config( accel_type, enable_cascader, enable_striping) workspace_memory_pools = WorkspaceMemoryPools([ WorkspacePoolInfo( "SRAM", [target, ethosu_target], PoolInfoProperties( size_hint_bytes=pool_size, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ), ), ]) with tvm.transform.PassContext(opt_level=3, config=pass_config): lib = tvm.relay.build( mod, target, executor=executor, runtime=runtime, workspace_memory_pools=workspace_memory_pools, params=params, ) mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata) return mlf_memory_map["main"][0]["workspace_size_bytes"]
def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo): """This checks for inference using two external pools placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools([ PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS}, size_hint_bytes=2500000), PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}]; """, ) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = _get_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def compare_ethosu_with_reference( mod, input_data, output_data, accel_type: str, output_tolerance=0, print_cmm=False, enable_cascader=None, ): if enable_cascader is None: enable_cascader = "u65" not in accel_type pool_name = "my_memory_pool" host_target = tvm.target.Target("c") ethosu_target = tvm.target.Target("ethos-u") workspace_pools = WorkspaceMemoryPools([ PoolInfo( pool_name, { host_target: PoolInfo.READ_WRITE_ACCESS, ethosu_target: PoolInfo.READ_WRITE_ACCESS, }, size_hint_bytes=2400000, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ) ]) test_runner = create_test_runner( accel_type, enable_usmp=True, enable_cascader=enable_cascader, enable_striping=False, workspace_pools=workspace_pools, ) compiled_models = build_source( mod, input_data, output_data, test_runner, workspace_pools=workspace_pools, output_tolerance=output_tolerance, ) # Assumes only two runtime.Modules are created -- i.e. single offload module ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[ 0].imported_modules[0] # Verify generated C source if print_cmm: get_artifacts = tvm._ffi.get_global_func( "runtime.module.ethos-u.get_artifacts") compilation_artifacts = get_artifacts(ethosu_module) cmms = bytes.fromhex(compilation_artifacts[0].command_stream) print_payload(cmms) verify_source(compiled_models, test_runner)
def test_multiple_memory_pools(): """ The cascader does not support multiple workspace memory pools. Check the correct error is thrown. """ np.random.seed(2) ifm_shape = (1, 80, 75, 3) target, ethosu_target, runtime, executor, pass_config = _get_compilation_config( "ethos-u55-256", True, True) workspace_memory_pools = WorkspaceMemoryPools([ WorkspacePoolInfo( "SRAM", [target, ethosu_target], PoolInfoProperties( size_hint_bytes=1, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ), ), WorkspacePoolInfo( "SRAM", [target, ethosu_target], PoolInfoProperties( size_hint_bytes=1, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ), ), ]) @tf.function def tf_graph(x): return tf.nn.max_pool(x, (3, 3), (1, 1), "SAME") _, tflite_graph = infra.get_tflite_graph(tf_graph, [ifm_shape]) tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0) relay_module, params = relay.frontend.from_tflite(tflite_model) mod = partition_for_ethosu(relay_module, params) with pytest.raises(ValueError) as e: with tvm.transform.PassContext(opt_level=3, config=pass_config): tvm.relay.build( mod, target, executor=executor, runtime=runtime, workspace_memory_pools=workspace_memory_pools, params=params, ) expected_reason = "Exactly one workspace pool needs to be provided for the U55 cascader" on_error = "A ValueError was caught but its reason is not the expected one." assert expected_reason in str(e.value), on_error
def _get_ethosu_workspace_size(mod, params, accel_type, pool_size, enable_cascader, enable_striping): enable_usmp = True target = tvm.target.Target("c") ethosu_target = tvm.target.Target("ethos-u") runtime = Runtime("crt") executor = Executor( "aot", { "workspace-byte-alignment": 16, "interface-api": "c", "unpacked-api": True, }, ) pass_config = { "tir.disable_vectorize": True, "relay.ext.ethos-u.options": { "accelerator_config": accel_type, "enable_cascader": enable_cascader, "enable_striping": enable_striping, }, "tir.usmp.enable": enable_usmp, "tir.usmp.algorithm": "hill_climb", "tir.disable_storage_rewrite": enable_usmp, } workspace_memory_pools = WorkspaceMemoryPools([ PoolInfo( "SRAM", { target: PoolInfo.READ_WRITE_ACCESS, ethosu_target: PoolInfo.READ_WRITE_ACCESS }, size_hint_bytes=pool_size, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ), ]) with tvm.transform.PassContext(opt_level=3, config=pass_config): lib = tvm.relay.build( mod, target, executor=executor, runtime=runtime, workspace_memory_pools=workspace_memory_pools, params=params, ) mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata) return mlf_memory_map["main"][0]["workspace_size_bytes"]
def test_tflite_model_u3_usecase_single_external_pool(model_url, usmp_algo): """This checks for inference with USMP using external pool placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" pool_name = "my_memory_pool" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools( [WorkspacePoolInfo(pool_name, [target])]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".data.tvm"), aligned(16))) static uint8_t {pool_name}[{_get_workspace_size_define_macro(pool_name)}]; """, ) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_networks_with_usmp_and_cascader_wo_striping(accel_type, model_url, workspace_size): np.random.seed(23) pool_name = "my_memory_pool" host_target = tvm.target.Target("c") ethosu_target = tvm.target.Target("ethos-u") workspace_pools = WorkspaceMemoryPools([ WorkspacePoolInfo( pool_name, [host_target, ethosu_target], PoolInfoProperties( size_hint_bytes=2400000, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ), ) ]) tflite_model_buf = infra.get_tflite_model(model_url) input_data, output_data = infra.generate_ref_data_tflite(tflite_model_buf) mod, params = convert_to_relay(tflite_model_buf) mod = partition_for_ethosu(mod, params) test_runner = infra.create_test_runner( accel_type, enable_usmp=True, enable_cascader=True, enable_striping=False, workspace_pools=workspace_pools, ) compiled_models = infra.build_source(mod, input_data, output_data, test_runner, workspace_pools=workspace_pools) infra.verify_source(compiled_models, test_runner) allocated_pool_info = list( dict(compiled_models[0].executor_factory.executor_codegen_metadata. pool_inputs).values())[0] assert allocated_pool_info.allocated_size == workspace_size
def test_tflite_model_u4_usecase_two_external_pools(model_url, usmp_algo): """This checks for inference with USMP using external pool placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools([ PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS}, size_hint_bytes=2500000), PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}), ]) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) input_name, input_data = list(inputs.items())[0] input_size_bytes = input_data.size * input_data.itemsize test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo, "tir.usmp.use_workspace_io": True, }, prologue=f""" #include <string.h> __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}]; struct {_add_module_prefix("workspace_pools")} {_add_module_prefix("workspace_pools")} = {{ .my_memory_pool_1 = my_memory_pool_1, .my_memory_pool_2 = my_memory_pool_2, }}; struct {_add_module_prefix("inputs")} {_add_module_prefix("inputs")} = {_add_module_prefix("map_inputs")}(&{_add_module_prefix("workspace_pools")}); memcpy({_add_module_prefix("inputs")}.{input_name}, tvmgen_default_input_data_input, {input_size_bytes}); struct {_add_module_prefix("outputs")} {_add_module_prefix("outputs")} = {_add_module_prefix("map_outputs")}(&{_add_module_prefix("workspace_pools")}); """, ) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, use_workspace_io=True, )
def test_tflite_model_u3_usecase_var_cons_ext_pools(model_url, usmp_algo): """This checks for inference using one external workspace and one external constant pools placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_mem_pools = WorkspaceMemoryPools([ WorkspacePoolInfo("my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=8500000)), ]) constant_mem_pools = ConstantMemoryPools([ ConstantPoolInfo("my_const_pool_1", [target], []), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }}; """, ) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_mem_pools, constant_memory_pools=constant_mem_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_tflite_model_u3_usecase_conv2d_var_cons(usmp_algo): """This checks for inference using workspace and constant pools placed in the application""" mod = tvm.parser.fromtext("""\ #[version = "0.0.5"] def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) { %1 = nn.conv2d( %data, %weight, padding=[2, 2], channels=3, kernel_size=[5, 5], data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32"); %2 = cast(nn.max_pool2d(%1, pool_size=[3, 3]), dtype="int8"); %3 = nn.conv2d( %2, %weight, padding=[2, 2], channels=3, kernel_size=[5, 5], data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32"); %4 = nn.max_pool2d(%3, pool_size=[3, 3]); %4 } """) main_func = mod["main"] shape_dict = { p.name_hint: p.checked_type.concrete_shape for p in main_func.params } type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params} weight_data = np.random.randint(1, 255, shape_dict["weight"]).astype( type_dict["weight"]) input_data = np.ones(shape_dict["data"]).astype(type_dict["data"]) params = {"weight": weight_data} inputs = {"data": input_data} use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_mem_pools = WorkspaceMemoryPools([ WorkspacePoolInfo("my_memory_pool_1", [target], PoolInfoProperties(size_hint_bytes=8500000)), ]) constant_mem_pools = ConstantMemoryPools([ ConstantPoolInfo("my_const_pool_1", [target], []), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES))) static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }}; """, ) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_mem_pools, constant_memory_pools=constant_mem_pools, target=target, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )