def test_quant_mobilenet_tfl(): """Since in AOT we pass directly the output buffer from the user, in quantized networks sharing the output buffers is not possible. This is because the output data type is int8 and the intermediate buffer are int32 or int16. We use mobilenet quantized to stress this situation and verify that the output buffer sharing is disabled in AOT.""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing use_unpacked_api = True interface_api = "c" test_runner = AOT_DEFAULT_RUNNER tflite_model_file = tf_testing.get_workload_official( "https://storage.googleapis.com/download.tensorflow.org/" "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz", "mobilenet_v1_1.0_224_quant.tflite", ) with open(tflite_model_file, "rb") as f: tflite_model_buf = f.read() data_shape = (1, 224, 224, 3) in_min, in_max = (0, 255) data = np.random.randint(in_min, high=in_max, size=data_shape, dtype="uint8") mod, params = convert_to_relay(tflite_model_buf) inputs = {"input": data} output_list = generate_ref_data(mod, inputs, params) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), test_runner, interface_api, use_unpacked_api, )
def test_tflite_model(model_url, usmp_algo, workspace_size): """This checks for ML models and the memory used by them when using USMP with different algorithms""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing use_unpacked_api = True interface_api = "c" test_runner = AOTTestRunner(pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) with open(tflite_model_file, "rb") as f: tflite_model_buf = f.read() data_shape = (1, 224, 224, 3) in_min, in_max = (0, 255) data = np.random.randint(in_min, high=in_max, size=data_shape, dtype="uint8") mod, params = convert_to_relay(tflite_model_buf, data, "input") inputs = {"input": data} output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) # Checking the workspace size assert (sum( compiled_model.executor_factory.function_metadata["__tvm_main__"]. workspace_sizes.values()) == workspace_size) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def _get_relay_module_and_inputs_from_tflite_file(tflite_model_file): with open(tflite_model_file, "rb") as f: tflite_model_buf = f.read() mod, params = convert_to_relay(tflite_model_buf) inputs = dict() for param in mod["main"].params: name = str(param.name_hint) data_shape = [int(i) for i in param.type_annotation.shape] dtype = str(param.type_annotation.dtype) in_min, in_max = (np.iinfo(dtype).min, np.iinfo(dtype).max) data = np.random.randint(in_min, high=in_max, size=data_shape, dtype=dtype) inputs[name] = data return mod, inputs, params