def test_tflite_model_u2_usecase_two_models_with_a_single_external_pool(model_urls, usmp_algo): """This checks for inference using a single large enough common pool""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools( [PoolInfo("my_memory_pool", {target: PoolInfo.READ_WRITE_ACCESS})] ) test_runner = AOTTestRunner( pass_config={"tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo}, prologue=f""" #define MAX(A, B) ((A > B) ? A : B) __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool[MAX({_get_workspace_size_define_macro("my_memory_pool", "mod1")},{_get_workspace_size_define_macro("my_memory_pool", "mod2")})]; """, ) tflite_model_file1 = tf_testing.get_workload_official( model_urls[0][0], model_urls[0][1], ) mod1, inputs1, params1 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file1) output_list1 = generate_ref_data(mod1, inputs1, params1) tflite_model_file2 = tf_testing.get_workload_official( model_urls[1][0], model_urls[1][1], ) mod2, inputs2, params2 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file2) output_list2 = generate_ref_data(mod2, inputs2, params2) compiled_test_mods = compile_models( [ AOTTestModel( name="mod1", module=mod1, inputs=inputs1, outputs=output_list1, params=params1 ), AOTTestModel( name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2 ), ], interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_multiple_models(interface_api, use_unpacked_api, test_runner): # Identity model without params x = relay.var("x", "float32") mod1 = relay.Function([x], x) one = np.array(1.0, "float32") inputs1 = {"x": one} output_list1 = generate_ref_data(mod1, inputs1) params1 = None # Convolution model RELAY_MODEL = """ #[version = "0.0.5"] def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), int8]) { %1 = nn.conv2d( %data, %weight, padding=[2, 2], channels=8, kernel_size=[5, 5], data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32"); %1 } """ mod2 = tvm.parser.fromtext(RELAY_MODEL) main_func = mod2["main"] shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params} type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params} weight_data = np.ones(shape_dict["weight"]).astype(type_dict["weight"]) input_data = np.ones(shape_dict["data"]).astype(type_dict["data"]) params2 = {"weight": weight_data} inputs2 = {"data": input_data} output_list2 = generate_ref_data(mod2, inputs2, params2) compile_and_run( [ AOTTestModel( name="mod1", module=mod1, inputs=inputs1, outputs=output_list1, params=params1, ), AOTTestModel( name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2, ), ], test_runner, interface_api, use_unpacked_api, )
def test_conv2d(enable_usmp, target_kind): RELAY_MODEL = textwrap.dedent( """\ #[version = "0.0.5"] def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) { %1 = nn.conv2d( %data, %weight, padding=[2, 2], channels=3, kernel_size=[5, 5], data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32"); %2 = cast(nn.max_pool2d(%1, pool_size=[3, 3]), dtype="int8"); %3 = nn.conv2d( %2, %weight, padding=[2, 2], channels=3, kernel_size=[5, 5], data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32"); %4 = nn.max_pool2d(%3, pool_size=[3, 3]); %4 } """ ) ir_mod = tvm.parser.fromtext(RELAY_MODEL) main_func = ir_mod["main"] shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params} type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params} weight_data = np.ones(shape_dict["weight"]).astype(type_dict["weight"]) input_data = np.ones(shape_dict["data"]).astype(type_dict["data"]) params = {"weight": weight_data} inputs = {"data": input_data} ref_outputs = generate_ref_data(ir_mod, inputs, params) with tvm.transform.PassContext( opt_level=3, config={"tir.disable_vectorize": True, "tir.usmp.enable": enable_usmp} ): mod = tvm.relay.build( ir_mod, params=params, target=target_kind, executor=backend.Executor("aot", {"interface-api": "packed"}), ) temp_dir = tvm.contrib.utils.TempDirectory() test_so_path = temp_dir / "test.so" mod.export_library(test_so_path, cc="gcc", options=["-std=c11"]) loaded_mod = tvm.runtime.load_module(test_so_path) runner = tvm.runtime.executor.AotModule(loaded_mod["default"](tvm.cpu(0))) runner.set_input(**inputs) runner.run() assert (runner.get_output(0).asnumpy() == list(ref_outputs.values())[0]).all()
def test_mobilenet(enable_usmp, target_kind): ir_mod, params = testing.mobilenet.get_workload(batch_size=1) data_shape = [ int(x) for x in ir_mod["main"].checked_type.arg_types[0].shape ] data = np.random.uniform(size=data_shape).astype("float32") inputs = {"data": data} ref_outputs = generate_ref_data(ir_mod, inputs, params) with tvm.transform.PassContext(opt_level=3, config={ "tir.disable_vectorize": True, "tir.usmp.enable": enable_usmp }): mod = tvm.relay.build( ir_mod, params=params, target=target_kind, executor=backend.Executor("aot", {"interface-api": "packed"}), ) temp_dir = tvm.contrib.utils.TempDirectory() test_so_path = temp_dir / "test.so" mod.export_library(test_so_path, cc="gcc", options=["-std=c11"]) loaded_mod = tvm.runtime.load_module(test_so_path) runner = tvm.runtime.executor.AotModule(loaded_mod["default"](tvm.cpu(0))) runner.set_input(**inputs) runner.run() assert (runner.get_output(0).asnumpy() == list( ref_outputs.values())[0]).all()
def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_shape): """Test a subgraph with a single conv2d operator.""" dtype = "float32" ishape = (1, 32, 14, 14) wshape = (32, weight_shape, 3, 3) data0 = relay.var("data", shape=ishape, dtype=dtype) weight0 = relay.var("weight", shape=wshape, dtype=dtype) out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups) main_f = relay.Function([data0, weight0], out) mod = tvm.IRModule() mod["main"] = main_f mod = transform.InferType()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, wshape).astype(dtype) inputs = OrderedDict([("data", i_data), ("weight", w1_data)]) output_list = generate_ref_data(mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def test_add_name_mangling_with_params(interface_api, use_unpacked_api, test_runner): x = relay.var("x", shape=(1, 10)) y = relay.var("y", shape=(1, 10)) z = relay.add(x, y) func = relay.Function([x, y], z) x_in = np.ones((1, 10)).astype("float32") y_in = np.random.uniform(size=(1, 10)).astype("float32") params = {"x": x_in} inputs = {"y": y_in} output_list = generate_ref_data(func, inputs, params) compile_and_run( AOTTestModel( name="my_mod", module=func, inputs=inputs, outputs=output_list, params=params, ), test_runner, interface_api, use_unpacked_api, )
def test_conv_with_params(interface_api, use_unpacked_api, test_runner): RELAY_MODEL = """ #[version = "0.0.5"] def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), int8]) { %1 = nn.conv2d( %data, %weight, padding=[2, 2], channels=8, kernel_size=[5, 5], data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32"); %1 } """ mod = tvm.parser.fromtext(RELAY_MODEL) main_func = mod["main"] shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params} type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params} weight_data = np.ones(shape_dict["weight"]).astype(type_dict["weight"]) input_data = np.ones(shape_dict["data"]).astype(type_dict["data"]) params = {"weight": weight_data} inputs = {"data": input_data} output_list = generate_ref_data(mod, inputs, params) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), test_runner, interface_api, use_unpacked_api, )
def test_byoc_microtvm_multiple_subgraphs(merge_compiler_regions): """This is a test case to check BYOC capabilities of AOT with multiple sub graphs""" use_unpacked_api = False interface_api = "packed" test_runner = AOT_DEFAULT_RUNNER x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) w2 = relay.var("w2", shape=(10, 10)) w3 = relay.var("w3", shape=(10, 10)) w4 = relay.var("w4", shape=(10, 10)) w5 = relay.var("w5", shape=(10, 10)) w6 = relay.var("w6", shape=(10, 10)) w7 = relay.var("w7", shape=(10, 10)) # C compiler z0 = relay.add(x, w0) p0 = relay.subtract(z0, w1) q0 = relay.multiply(p0, w2) z1 = relay.add(x, w3) p1 = relay.subtract(z1, w4) q1 = relay.multiply(p1, w5) # Other parts on TVM z2 = relay.add(x, w6) q2 = relay.subtract(z2, w7) r = relay.concatenate((q0, q1, q2), axis=0) f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r) mod = tvm.IRModule() ann = byoc.CcompilerAnnotator() mod["main"] = ann.visit(f) if merge_compiler_regions: mod = transform.MergeCompilerRegions()(mod) mod = tvm.relay.transform.PartitionGraph("mod_name")(mod) mod = tvm.relay.transform.InferType()(mod) x_data = np.random.rand(10, 10).astype("float32") w_data = [] for _ in range(8): w_data.append(np.random.rand(10, 10).astype("float32")) map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i]) for i in range(8)]) output_list = generate_ref_data(mod, map_inputs) input_list = [map_inputs["x"]] input_list.extend([map_inputs["w{}".format(i)] for i in range(8)]) compile_and_run( AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def test_mobilenet(debug_calculated_workspaces, workspace_byte_alignment): use_unpacked_api = True interface_api = "c" test_runner = AOT_DEFAULT_RUNNER # TODO(@Mousius) - Enable memory planning to take into account debug information debugging_memory_overhead = 1024 * 1024 mod, params = testing.mobilenet.get_workload(batch_size=1) data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape] data = np.random.uniform(size=data_shape).astype("float32") inputs = {"data": data} output_list = generate_ref_data(mod, inputs, params) compile_and_run( AOTTestModel( module=mod, inputs=inputs, outputs=output_list, params=params, extra_memory_in_bytes=debugging_memory_overhead, ), test_runner, interface_api, use_unpacked_api, workspace_byte_alignment=workspace_byte_alignment, debug_calculated_workspaces=debug_calculated_workspaces, )
def test_quant_mobilenet_tfl(): """Since in AOT we pass directly the output buffer from the user, in quantized networks sharing the output buffers is not possible. This is because the output data type is int8 and the intermediate buffer are int32 or int16. We use mobilenet quantized to stress this situation and verify that the output buffer sharing is disabled in AOT.""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing use_unpacked_api = True interface_api = "c" test_runner = AOT_DEFAULT_RUNNER tflite_model_file = tf_testing.get_workload_official( "https://storage.googleapis.com/download.tensorflow.org/" "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz", "mobilenet_v1_1.0_224_quant.tflite", ) with open(tflite_model_file, "rb") as f: tflite_model_buf = f.read() data_shape = (1, 224, 224, 3) in_min, in_max = (0, 255) data = np.random.randint(in_min, high=in_max, size=data_shape, dtype="uint8") mod, params = convert_to_relay(tflite_model_buf) inputs = {"input": data} output_list = generate_ref_data(mod, inputs, params) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), test_runner, interface_api, use_unpacked_api, )
def test_transpose(interface_api, use_unpacked_api, test_runner): """Test that non-inpleaceable operations (e.g., transpose) do not happen in-place.""" dtype = "float32" x = relay.var("x", shape=(10, 5), dtype=dtype) y = relay.var("y", shape=(10, 5), dtype=dtype) t = relay.var("z", shape=(), dtype=dtype) a = relay.add(x, y) b = relay.transpose(a) z = relay.add(b, t) # Check result. func = relay.Function([x, y, t], z) x_data = np.random.rand(10, 5).astype(dtype) y_data = np.random.rand(10, 5).astype(dtype) t_data = np.random.uniform(size=()).astype(dtype) inputs = {"x": x_data, "y": y_data, "z": t_data} output_list = generate_ref_data(func, inputs) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, enable_op_fusion=False, )
def test_quant_mobilenet_tfl(): """Since in AOT we pass directly the output buffer from the user, in quantized networks sharing the output buffers is not possible. This is because the output data type is int8 and the intermediate buffer are int32 or int16. We use mobilenet quantized to stress this situation and verify that the output buffer sharing is disabled in AOT.""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing use_unpacked_api = True interface_api = "c" test_runner = AOT_DEFAULT_RUNNER tflite_model_file = tf_testing.get_workload_official( "https://storage.googleapis.com/download.tensorflow.org/" "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz", "mobilenet_v1_1.0_224_quant.tflite", ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), test_runner, interface_api, use_unpacked_api, )
def test_name_sanitiser_name_clash(): """Test that 2 input tensors with names that clash once sanitized, generates an error""" interface_api = "c" use_unpacked_api = True test_runner = AOT_DEFAULT_RUNNER dtype = "float32" x = relay.var("input::-1", shape=(10, 5), dtype=dtype) # Next 2 input tensor names will clash once sanitized. y = relay.var("input::-2", shape=(10, 5), dtype=dtype) t = relay.var("input:--2", shape=(), dtype=dtype) a = relay.add(x, y) b = relay.transpose(a) z = relay.add(b, t) # Check result. func = relay.Function([x, y, t], z) x_data = np.random.rand(10, 5).astype(dtype) y_data = np.random.rand(10, 5).astype(dtype) t_data = np.random.uniform(size=()).astype(dtype) inputs = {"input::-1": x_data, "input::-2": y_data, "input:--2": t_data} output_list = generate_ref_data(func, inputs) with pytest.raises(TVMError, match="Sanitized input tensor name clash"): compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, enable_op_fusion=False, )
def test_deprecated_target_arguments(capsys): """Tests we can still use relay.build with -executor, -runtime and -link-params""" interface_api = "c" use_unpacked_api = True test_runner = AOT_DEFAULT_RUNNER x = relay.var("x", shape=(1, 10)) y = relay.var("y", shape=(1, 10)) z = relay.add(x, y) func = relay.Function([x, y], z) x_in = np.ones((1, 10)).astype("float32") y_in = np.random.uniform(size=(1, 10)).astype("float32") params = {"x": x_in} inputs = {"y": y_in} output_list = generate_ref_data(func, inputs, params) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list, params=params), test_runner, interface_api, use_unpacked_api, use_runtime_executor=False, target= "c -executor=aot --link-params -runtime=c -interface-api=c --unpacked-api", )
def non_device_api_main_func(): x = relay.var("x", shape=(10, 10)) y = relay.var("y", shape=(1, 10)) func = relay.Function([x, y], relay.multiply(x, y)) x_data = np.random.rand(10, 10).astype("float32") y_data = np.random.rand(1, 10).astype("float32") inputs = OrderedDict([("x", x_data), ("y", y_data)]) output_list = generate_ref_data(func, inputs) def compile_to_main_func(interface_api="c", use_unpacked_api=True): test_runner = AOT_DEFAULT_RUNNER compiled_models = compile_models( models=AOTTestModel( module=IRModule.from_expr(func), inputs=inputs, outputs=output_list, ), interface_api=interface_api, use_unpacked_api=use_unpacked_api, workspace_byte_alignment=16, pass_config=test_runner.pass_config, ) main_ir_module = list( compiled_models[0].executor_factory.lowered_ir_mods.values())[0] main_func = main_ir_module["__tvm_main__"] return main_func return compile_to_main_func
def test_conv2d(use_calculated_workspaces, interface_api, use_unpacked_api): """Test a subgraph with a single conv2d operator.""" def conv2d_direct(): dtype = "float32" ishape = (1, 32, 14, 14) w1shape = (32, 32, 3, 3) data0 = relay.var("data", shape=ishape, dtype=dtype) weight0 = relay.var("weight", shape=w1shape, dtype=dtype) out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1)) main_f = relay.Function([data0, weight0], out) mod = tvm.IRModule() mod["main"] = main_f mod = transform.InferType()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, w1shape).astype(dtype) inputs = OrderedDict([("data", i_data), ("weight", w1_data)]) return mod, inputs, (1, 32, 14, 14) def group_conv2d(): dtype = "float32" ishape = (1, 32, 14, 14) w2shape = (32, 1, 3, 3) data0 = relay.var("data", shape=(ishape), dtype=dtype) weight0 = relay.var("weight", shape=(w2shape), dtype=dtype) out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=32) main_f = relay.Function([data0, weight0], out) mod = tvm.IRModule() mod["main"] = main_f mod = transform.InferType()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w_data = np.random.uniform(0, 1, w2shape).astype(dtype) inputs = OrderedDict([("data", i_data), ("weight", w_data)]) return mod, inputs, (1, 32, 14, 14) for mod, inputs, out_shape in [conv2d_direct(), group_conv2d()]: output_list = generate_ref_data(mod, inputs) compile_and_run( mod, inputs, output_list, interface_api, use_unpacked_api, use_calculated_workspaces, )
def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo): """This checks for inference using two external pools placed in the application""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing use_unpacked_api = True interface_api = "c" target = tvm.target.Target("c") workspace_memory_pools = WorkspaceMemoryPools([ PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS}, size_hint_bytes=2500000), PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}), ]) test_runner = AOTTestRunner( pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }, prologue=f""" __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}]; __attribute__((section(".data.tvm"), aligned(16))) static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}]; """, ) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = _get_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, workspace_memory_pools=workspace_memory_pools, target=target, ) for compiled_model in compiled_test_mods: check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_conv2d(interface_api, use_unpacked_api, test_runner, groups, weight_shape): """Test a subgraph with a single conv2d operator.""" dtype = "float32" ishape = (1, 32, 14, 14) wshape = (32, weight_shape, 3, 3) pass_config = {"tir.usmp.enable": True} test_runner = AOTTestRunner( makefile=test_runner.makefile, prologue=test_runner.prologue, epilogue=test_runner.epilogue, includes=test_runner.includes, parameters=test_runner.parameters, pass_config=pass_config, ) data0 = relay.var("data", shape=ishape, dtype=dtype) weight0 = relay.var("weight", shape=wshape, dtype=dtype) out = relay.nn.conv2d(data0, weight0, kernel_size=(3, 3), padding=(1, 1), groups=groups) main_f = relay.Function([data0, weight0], out) mod = tvm.IRModule() mod["main"] = main_f mod = transform.InferType()(mod) i_data = np.random.uniform(0, 1, ishape).astype(dtype) w1_data = np.random.uniform(0, 1, wshape).astype(dtype) inputs = OrderedDict([("data", i_data), ("weight", w1_data)]) output_list = generate_ref_data(mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, ) compiled_test_mods = compile_models( models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_byoc_microtvm(merge_compiler_regions): """This is a simple test to check BYOC capabilities of AOT - with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036""" use_unpacked_api = False interface_api = "packed" test_runner = AOTTestRunner(pass_config={"tir.usmp.enable": True}) x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) # z0 = x + w0 x_ = compiler_begin(x, "ccompiler") w0_ = compiler_begin(w0, "ccompiler") z0_ = relay.add(x_, w0_) z0 = compiler_end(z0_, "ccompiler") # z1 = z0 + w1 z0__ = compiler_begin(z0, "ccompiler") w1_ = compiler_begin(w1, "ccompiler") z1_ = relay.add(z0__, w1_) z1 = compiler_end(z1_, "ccompiler") # z2 = z0 + z1 z2 = relay.add(z0, z1) f = relay.Function([x, w0, w1], z2) mod = tvm.IRModule() mod["main"] = f if merge_compiler_regions: mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph("mod_name")(mod) mod = transform.InferType()(mod) x_data = [("x", np.random.rand(10, 10).astype("float32"))] w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)] map_inputs = OrderedDict(x_data + w_data) output_list = generate_ref_data(mod, map_inputs) compiled_test_mods = compile_models( AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_tuple_getitem(interface_api, use_unpacked_api, test_runner): func = relay.Function([], relay.TupleGetItem(relay.Tuple([relay.const(1), relay.const(2)]), 0)) output_list = generate_ref_data(func, {}) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs={}, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def test_add_const(interface_api, use_unpacked_api, test_runner): two = relay.add(relay.const(1), relay.const(1)) func = relay.Function([], two) output_list = generate_ref_data(func, {}) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs={}, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def test_tflite_model(model_url, usmp_algo, workspace_size): """This checks for ML models and the memory used by them when using USMP with different algorithms""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing use_unpacked_api = True interface_api = "c" test_runner = AOTTestRunner(pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) with open(tflite_model_file, "rb") as f: tflite_model_buf = f.read() data_shape = (1, 224, 224, 3) in_min, in_max = (0, 255) data = np.random.randint(in_min, high=in_max, size=data_shape, dtype="uint8") mod, params = convert_to_relay(tflite_model_buf, data, "input") inputs = {"input": data} output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) # Checking the workspace size assert (sum( compiled_model.executor_factory.function_metadata["__tvm_main__"]. workspace_sizes.values()) == workspace_size) run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_id(interface_api, use_unpacked_api, test_runner): x = relay.var("x", "float32") ident = relay.Function([x], x) one = np.array(1.0, "float32") inputs = {"x": one} output_list = generate_ref_data(ident, inputs) compile_and_run( AOTTestModel(module=IRModule.from_expr(ident), inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def test_subtract(interface_api, use_unpacked_api, test_runner): i = relay.var("i", shape=[], dtype="int32") sub = relay.subtract(i, relay.const(1, dtype="int32")) func = relay.Function([i], sub, ret_type=relay.TensorType([], "int32")) i_data = np.array(1, dtype="int32") inputs = {"i": i_data} output_list = generate_ref_data(func, inputs) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def test_add_const(interface_api, use_unpacked_api, use_calculated_workspaces): two = relay.add(relay.const(1), relay.const(1)) func = relay.Function([], two) output_list = generate_ref_data(func, {}) inputs = {} compile_and_run( func, inputs, output_list, interface_api, use_unpacked_api, use_calculated_workspaces, )
def test_tflite_model_u1_usecase(model_url, usmp_algo, workspace_size): """This checks for ML models and the memory used by them when using USMP with different algorithms""" pytest.importorskip("tflite") import tvm.relay.testing.tf as tf_testing use_unpacked_api = True interface_api = "c" test_runner = AOTTestRunner(pass_config={ "tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo }) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1], ) mod, inputs, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_list = generate_ref_data(mod, inputs, params) compiled_test_mods = compile_models( AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params), interface_api=interface_api, use_unpacked_api=use_unpacked_api, pass_config=test_runner.pass_config, ) for compiled_model in compiled_test_mods: check_for_no_tvm_backendallocworkspace_calls( compiled_model.executor_factory.lib) # Checking the workspace size reported in model library format mlf_memory_map = mlf._build_function_memory_map( compiled_test_mods[0].executor_factory.function_metadata) assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size # That should match to workspace size that will be codegen'd to the entry point. allocated_pool_info = list( dict(compiled_test_mods[0].executor_factory.executor_codegen_metadata. pool_inputs).values())[0] assert allocated_pool_info.allocated_size == workspace_size run_and_check( models=compiled_test_mods, runner=test_runner, interface_api=interface_api, )
def test_byoc_microtvm(merge_compiler_regions): """This is a simple test to check BYOC capabilities of AOT - with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036""" use_unpacked_api = False interface_api = "packed" test_runner = AOT_DEFAULT_RUNNER x = relay.var("x", shape=(10, 10)) w0 = relay.var("w0", shape=(10, 10)) w1 = relay.var("w1", shape=(10, 10)) # z0 = x + w0 x_ = compiler_begin(x, "ccompiler") w0_ = compiler_begin(w0, "ccompiler") z0_ = relay.add(x_, w0_) z0 = compiler_end(z0_, "ccompiler") # z1 = z0 + w1 z0__ = compiler_begin(z0, "ccompiler") w1_ = compiler_begin(w1, "ccompiler") z1_ = relay.add(z0__, w1_) z1 = compiler_end(z1_, "ccompiler") # z2 = z0 + z1 z2 = relay.add(z0, z1) f = relay.Function([x, w0, w1], z2) mod = tvm.IRModule() mod["main"] = f if merge_compiler_regions: mod = transform.MergeCompilerRegions()(mod) mod = transform.PartitionGraph("mod_name")(mod) mod = transform.InferType()(mod) x_data = [("x", np.random.rand(10, 10).astype("float32"))] w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)] map_inputs = OrderedDict(x_data + w_data) output_list = generate_ref_data(mod, map_inputs) compile_and_run( AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, )
def test_id(interface_api, use_unpacked_api, use_calculated_workspaces): x = relay.var("x", "float32") ident = relay.Function([x], x) one = np.array(1.0, "float32") inputs = {"x": one} output_list = generate_ref_data(ident, inputs) compile_and_run( ident, inputs, output_list, interface_api, use_unpacked_api, use_calculated_workspaces, )
def test_subtract(interface_api, use_unpacked_api, use_calculated_workspaces): i = relay.var("i", shape=[], dtype="int32") sub = relay.subtract(i, relay.const(1, dtype="int32")) func = relay.Function([i], sub, ret_type=relay.TensorType([], "int32")) i_data = np.array(1, dtype="int32") inputs = {"i": i_data} output_list = generate_ref_data(func, inputs) compile_and_run( func, inputs, output_list, interface_api, use_unpacked_api, use_calculated_workspaces, )
def test_tuple_output(interface_api, use_unpacked_api, test_runner): x = relay.var("x", shape=(6, 9)) y = relay.split(x, 3).astuple() a = relay.TupleGetItem(y, 0) b = relay.TupleGetItem(y, 1) out = relay.Tuple([a, b]) func = relay.Function([x], out) x_data = np.random.rand(6, 9).astype("float32") inputs = {"x": x_data} output_list = generate_ref_data(func, inputs) compile_and_run( AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list), test_runner, interface_api, use_unpacked_api, )