def _get_ethosu_workspace_size(mod, params, accel_type, pool_size, enable_cascader, enable_striping): target, ethosu_target, runtime, executor, pass_config = _get_compilation_config( accel_type, enable_cascader, enable_striping) workspace_memory_pools = WorkspaceMemoryPools([ WorkspacePoolInfo( "SRAM", [target, ethosu_target], PoolInfoProperties( size_hint_bytes=pool_size, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ), ), ]) with tvm.transform.PassContext(opt_level=3, config=pass_config): lib = tvm.relay.build( mod, target, executor=executor, runtime=runtime, workspace_memory_pools=workspace_memory_pools, params=params, ) mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata) return mlf_memory_map["main"][0]["workspace_size_bytes"]
def test_workspace_calculation(workspace_byte_alignment, main_workspace_size): mod, params = tvm.relay.testing.synthetic.get_workload() target = "c" runtime = Runtime("crt") executor = Executor( "aot", { "workspace-byte-alignment": workspace_byte_alignment, }, ) with tvm.transform.PassContext( opt_level=3, config={ "tir.disable_vectorize": True, }, ): lib = tvm.relay.build(mod, target, executor=executor, runtime=runtime, params=params) mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata) assert mlf_memory_map["main"][0][ "workspace_size_bytes"] == main_workspace_size
def test_tflite_model_u1_usecase(model_url, usmp_algo, workspace_size, constant_size): """ This checks for ML models and the memory used by them when using USMP with different algorithms """ pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing # pylint: disable=import-outside-toplevel use_unpacked_api = True interface_api = "c" test_runner = AOTTestRunner(pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: _check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) # Checking the workspace size reported in model library format mlf_memory_map = mlf._build_function_memory_map( compiled_test_mods[0].executor_factory.function_metadata) assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size assert mlf_memory_map["main"][0]["constants_size_bytes"] == constant_size # That should match to workspace size that will be codegen'd to the entry point. allocated_pool_info_size = sum([ _.allocated_size for _ in list( dict(compiled_test_mods[0].executor_factory. executor_codegen_metadata.pool_inputs).values()) ]) assert allocated_pool_info_size == workspace_size + constant_size run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def _get_ethosu_workspace_size(mod, params, accel_type, pool_size, enable_cascader, enable_striping): enable_usmp = True target = tvm.target.Target("c") ethosu_target = tvm.target.Target("ethos-u") runtime = Runtime("crt") executor = Executor( "aot", { "workspace-byte-alignment": 16, "interface-api": "c", "unpacked-api": True, }, ) pass_config = { "tir.disable_vectorize": True, "relay.ext.ethos-u.options": { "accelerator_config": accel_type, "enable_cascader": enable_cascader, "enable_striping": enable_striping, }, "tir.usmp.enable": enable_usmp, "tir.usmp.algorithm": "hill_climb", "tir.disable_storage_rewrite": enable_usmp, } workspace_memory_pools = WorkspaceMemoryPools([ PoolInfo( "SRAM", { target: PoolInfo.READ_WRITE_ACCESS, ethosu_target: PoolInfo.READ_WRITE_ACCESS }, size_hint_bytes=pool_size, read_bandwidth_bytes_per_cycle=16, write_bandwidth_bytes_per_cycle=16, target_burst_bytes={ethosu_target: 1}, ), ]) with tvm.transform.PassContext(opt_level=3, config=pass_config): lib = tvm.relay.build( mod, target, executor=executor, runtime=runtime, workspace_memory_pools=workspace_memory_pools, params=params, ) mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata) return mlf_memory_map["main"][0]["workspace_size_bytes"]
def test_networks_without_usmp(accel_type, model_url, workspace_size): np.random.seed(23) tflite_model_buf = infra.get_tflite_model(model_url) input_data, output_data = infra.generate_ref_data_tflite(tflite_model_buf) mod, params = convert_to_relay(tflite_model_buf) mod = partition_for_ethosu(mod, params) test_runner = infra.create_test_runner(accel_type, enable_usmp=False) compiled_models = infra.build_source(mod, input_data, output_data, test_runner) mlf_memory_map = mlf._build_function_memory_map( compiled_models[0].executor_factory.function_metadata) assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size infra.verify_source(compiled_models, test_runner)
def test_workspace_calculation_cmsis_nn(): """This tests cmsis_nn codegen for workspace calculation. This is tested specially because cmsis-nn codegen creates multiple PrimFuncs per offloaded relay function in a non -hierarchical manner.""" pytest.importorskip("tflite") # pylint: disable=import-outside-toplevel from tvm.relay.op.contrib import cmsisnn from tvm.contrib.download import download_testdata # pylint: enable=import-outside-toplevel target = "c" runtime = Runtime("crt") executor = Executor( "aot", { "workspace-byte-alignment": 16, "interface-api": "c", "unpacked-api": True, }, ) base_url = ("https://github.com/ARM-software/ML-zoo/raw/" "48a22ee22325d15d2371a6df24eb7d67e21dcc97" "/models/keyword_spotting/cnn_small/tflite_int8") file_to_download = "cnn_s_quantized.tflite" file_saved = "cnn_s_quantized_15Dec2021.tflite" model_file = download_testdata("{}/{}".format(base_url, file_to_download), file_saved) mod, _, params = create_relay_module_and_inputs_from_tflite_file( model_file) mod = cmsisnn.partition_for_cmsisnn(mod, params) with tvm.transform.PassContext( opt_level=3, config={ "tir.disable_vectorize": True, }, ): lib = tvm.relay.build(mod, target, executor=executor, runtime=runtime, params=params) mlf_memory_map = mlf._build_function_memory_map(lib.function_metadata) assert mlf_memory_map["main"][0]["workspace_size_bytes"] == 14384
def test_networks_without_usmp(accel_type, model_url, workspace_size, tolerance): np.random.seed(23) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1]) mod, input_data, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_data = generate_ref_data(mod, input_data, params) mod = partition_for_ethosu(mod, params) compiled_models = infra.build_source(mod, input_data, output_data, accel_type, output_tolerance=tolerance, enable_usmp=False) mlf_memory_map = mlf._build_function_memory_map( compiled_models[0].executor_factory.function_metadata) assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size infra.verify_source(compiled_models, accel_type, enable_usmp=False)