def test_avgpool_1d(data_shape_ncw, pool_size, strides, padding): """Test a subgraph with a single avgpool_1d operator.""" ishape = data_shape_ncw input0 = relay.var("input", relay.TensorType(ishape, "int32")) out0 = relay.op.nn.avg_pool1d(input0, pool_size, layout="NCW", strides=strides, padding=padding) ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0)) input1 = relay.var("input", relay.TensorType(ishape, "int16")) out1 = relay.op.nn.avg_pool1d(input1, pool_size, layout="NCW", strides=strides, padding=padding) mod = tvm.IRModule.from_expr(relay.Function([input1], out1)) input_data = np.random.randint(low=-10, high=10, size=ishape, dtype="int32") inputs = {"input": input_data} output_list = generate_ref_data(ref_mod, inputs) compile_and_run( AOTTestModel( module=mod, inputs={"input": input_data.astype(dtype="int16")}, outputs=output_list ), runner=AOT_CORSTONE300_RUNNER, interface_api="c", use_unpacked_api=True, target_opts={ "-keys": "arm_cpu", "-mcpu": "cortex-m7", }, )
def test_dense(M, K, N): """Test a subgraph with a single dense operator.""" ishape = (M, K) wshape = (N, K) input0 = relay.var("input", relay.TensorType(ishape, "int8")) dense_f = relay.op.nn.batch_flatten(input0) weight0 = relay.const( np.random.randint(low=-10, high=10, size=wshape, dtype="int8")) out = relay.op.nn.dense(dense_f, weight0, out_dtype="int32") mod = tvm.IRModule.from_expr(relay.Function([input0], out)) inputs = { "input": np.random.randint(low=-128, high=127, size=ishape, dtype="int8") } output_list = generate_ref_data(mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), runner=AOT_CORSTONE300_RUNNER, interface_api="c", use_unpacked_api=True, target_opts={ "-keys": "arm_cpu", "-mcpu": "cortex-m7", }, )
def test_maxpool_1d(data_shape_nwc, pool_size, strides, padding): """Test a subgraph with a single maxpool_1d operator.""" ishape = data_shape_nwc input0 = relay.var("input", relay.TensorType(ishape, "int8")) out = relay.op.nn.max_pool1d(input0, pool_size, layout="NWC", strides=strides, padding=padding) mod = tvm.IRModule.from_expr(relay.Function([input0], out)) inputs = { "input": np.random.randint(low=-128, high=127, size=ishape, dtype="int8") } output_list = generate_ref_data(mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), runner=AOT_CORSTONE300_RUNNER, interface_api="c", use_unpacked_api=True, target_opts={ "-keys": "arm_cpu", "-mcpu": "cortex-m7", }, )
def test_ethosu_requantize(accel_type, ifm_shape, ifm_scale, ifm_zp, ofm_scale, ofm_zp): np.random.seed(0) dtype = "int8" def create_model(): ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") requantize = relay.qnn.op.requantize( ifm, relay.const(ifm_scale, dtype="float32"), relay.const(ifm_zp, dtype="int32"), relay.const(ofm_scale, dtype="float32"), relay.const(ofm_zp, dtype="int32"), ) return tvm.IRModule.from_expr(relay.Function([ifm], requantize)) cpu_mod = create_model() input_data = { "ifm": np.random.randint(-128, high=127, size=ifm_shape, dtype=dtype) } output_data = generate_ref_data(cpu_mod, input_data) ethosu_mod = partition_for_ethosu(cpu_mod) _compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
def test_relay_reshape_codegen(ifm_shape, new_shape, accel_type): # Create a "partitioned" Relay graph ifm0 = relay.var("ifm0", shape=ifm_shape, dtype="int8") reshape = relay.op.reshape(ifm0, newshape=new_shape) mod = infra.make_partitioned_function(reshape) data = np.random.randint(-128, high=127, size=ifm_shape, dtype="int8") # Generate a reference output using Relay reshape that doesn't get offloaded ref_mod = tvm.IRModule() ref_mod["main"] = relay.Function([ifm0], reshape) ref_mod = relay.transform.InferType()(ref_mod) out_data = generate_ref_data(ref_mod, {"ifm0": data}) compiled_model = infra.build_source( mod, {"ifm": data}, out_data, accel_type, ) imported_modules = compiled_model[0].executor_factory.lib.imported_modules assert len(imported_modules) == 2 ethosu_module = imported_modules[0] # Verify generated C source get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs") cmms = get_cs(ethosu_module) cmms = bytes.fromhex(cmms) infra.print_payload(cmms) infra.verify_source(compiled_model, accel_type)
def test_forward_mobilenet_v1(accel_type): """Test the Mobilenet V1 TF Lite model.""" np.random.seed(23) tflite_model_file = tf_testing.get_workload_official( "https://storage.googleapis.com/download.tensorflow.org/" "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz", "mobilenet_v1_1.0_224_quant.tflite", ) with open(tflite_model_file, "rb") as f: tflite_model_buf = f.read() input_tensor = "input" input_dtype = "uint8" input_shape = (1, 224, 224, 3) in_min, in_max = util.get_range_for_dtype_str(input_dtype) input_data = np.random.randint(in_min, high=in_max, size=input_shape, dtype=input_dtype) relay_mod, params = convert_to_relay(tflite_model_buf) input_data = {input_tensor: input_data} output_data = generate_ref_data(relay_mod, input_data) mod = partition_for_ethosu(relay_mod, params) compiled_models = infra.build_source( mod, input_data, output_data, accel_type, output_tolerance=10 ) infra.verify_source(compiled_models, accel_type)
def test_ethosu_left_shift_binary_elemwise( accel_type, ifm_shape, ifm2_shape, ): np.random.seed(0) dtype = "int32" def create_model(): ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype) ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype) c1 = relay.left_shift(ifm, ifm2) return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], c1)) cpu_mod = create_model() # Generate reference data in_min, in_max = util.get_range_for_dtype_str(dtype) input_data = { "ifm": np.random.randint(in_min, high=in_max, size=ifm_shape, dtype=dtype), "ifm2": np.random.randint(0, high=32, size=ifm2_shape, dtype=dtype), } output_data = generate_ref_data(cpu_mod, input_data) ethosu_mod = partition_for_ethosu(cpu_mod) _compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type, output_tolerance=0)
def test_op_int8(zero_point, scale): interface_api = "c" use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER dtype = "int8" shape = [1, 16, 16, 3] model = make_model(shape, dtype, dtype, zero_point, scale) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # validate the output in_min, in_max = get_range_for_dtype_str(dtype) np.random.seed(0) input_data = np.random.randint(in_min, high=in_max, size=shape, dtype=dtype) inputs = {"in0": input_data} params = {} output_list = generate_ref_data(orig_mod["main"], inputs, params) compile_and_run( AOTTestModel(module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params), test_runner, interface_api, use_unpacked_api, )
def test_cnn_small(): # download the model base_url = "https://github.com/ARM-software/ML-zoo/raw/master/models/keyword_spotting/cnn_small/tflite_int8" file_to_download = "cnn_s_quantized.tflite" model_file = download_testdata("{}/{}".format(base_url, file_to_download), file_to_download) with open(model_file, "rb") as f: tflite_model_buf = f.read() input_shape = (1, 490) in_min, in_max = get_range_for_dtype_str("int8") input_data = np.random.randint(in_min, high=in_max, size=input_shape).astype(np.float32) orig_mod, params = convert_to_relay(tflite_model_buf, input_data, "input") cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) # validate CMSIS-NN output against CPU output interface_api = "c" use_unpacked_api = True test_runner = AOT_CORSTONE300_RUNNER inputs = {"input": input_data} params = {} output_list = generate_ref_data(orig_mod["main"], inputs, params) compile_and_run( AOTTestModel(module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params), test_runner, interface_api, use_unpacked_api, )
def test_elementwise_add_from_constant_scalar(accel_type, dtype): ifm_shape = (1, 4, 4, 8) def create_relay_graph(): inp = relay.var("input", shape=ifm_shape, dtype=dtype) scalar = relay.const(np.ones((1, 1, 1, 1), dtype=dtype), dtype=dtype) add = relay.qnn.op.add( inp, scalar, relay.const(1.0, dtype="float32"), relay.const(0, dtype="int32"), relay.const(1.0, dtype="float32"), relay.const(0, dtype="int32"), relay.const(1.0, dtype="float32"), relay.const(0, dtype="int32"), ) return tvm.IRModule.from_expr(relay.Function(relay.analysis.free_vars(add), add)) cpu_mod = create_relay_graph() ethosu_mod = partition_for_ethosu(cpu_mod) # Generate reference data input_data = { "input": np.random.randint( low=np.iinfo(dtype).min, high=np.iinfo(dtype).max, size=ifm_shape, dtype=dtype ), } output_data = generate_ref_data(cpu_mod, input_data) _compare_ethosu_with_reference( ethosu_mod, input_data, output_data, accel_type, output_tolerance=0 )
def test_conv2d(data_shape_nhwc, kernel_size, num_filter, strides, padding, dilation, dtype): """Test a subgraph with a single conv2d operator.""" ishape = data_shape_nhwc wshape = (*kernel_size, data_shape_nhwc[-1], num_filter) weight_data = np.random.randint(low=-10, high=10, size=wshape, dtype=dtype) input0 = relay.var("input", relay.TensorType(ishape, dtype)) weight0 = relay.const(weight_data) out0 = relay.op.nn.conv2d( input0, weight0, kernel_size=kernel_size, strides=strides, padding=padding, dilation=(dilation, dilation), data_layout="NHWC", kernel_layout="HWIO", out_dtype="int32", out_layout="NHWC", ) ref_mod = tvm.IRModule.from_expr(relay.Function([input0], out0)) input1 = relay.var("input", relay.TensorType(ishape, dtype)) weight1 = relay.const(np.moveaxis(weight_data, 2, -1)) out1 = relay.op.nn.conv2d( input1, weight1, kernel_size=kernel_size, strides=strides, padding=padding, dilation=(dilation, dilation), data_layout="NHWC", kernel_layout="HWOI", out_dtype="int32", out_layout="NHWC", ) mod = tvm.IRModule.from_expr(relay.Function([input1], out1)) inputs = { "input": np.random.randint(low=-128, high=127, size=ishape, dtype=dtype) } output_list = generate_ref_data(ref_mod, inputs) compile_and_run( AOTTestModel(module=mod, inputs=inputs, outputs=output_list), runner=AOT_CORSTONE300_RUNNER, interface_api="c", use_unpacked_api=True, target_opts={ "-keys": "arm_cpu", "-mcpu": "cortex-m7", }, )
def test_ethosu_section_name(): def create_graph_single(input_tensor_name, input_tensor_shape, input_tensor_dtype): c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c1_params.ifm.shape = input_tensor_shape c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32) c1_params.kernel.sc = relay.const(np.random.rand(32) * 2, "float32") c1_params.strides = (1, 1) c1_params.pad = "VALID" c1_params.update_output_qnn_params(input_tensor_dtype, input_tensor_dtype, input_tensor_dtype) input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) c1_params.ofm.shape = get_shape_expr(input0, c1) f = relay.Function([input0], c1) mod = tvm.IRModule() mod["main"] = f return mod, [c1_params] accel_type = "ethos-u55-256" relay_module, _ = create_graph_single("input", (1, 300, 300, 3), "int8") input_dtype = "int8" mod = partition_for_ethosu(relay_module) # Generate reference data in_min, in_max = util.get_range_for_dtype_str(input_dtype) input_data = { "input": np.random.randint(in_min, high=in_max, size=(1, 300, 300, 3), dtype=input_dtype) } output_data = generate_ref_data(relay_module, input_data) compiled_models = infra.build_source(mod, input_data, output_data, accel_type, output_tolerance=1) # Assumes only two runtime.Modules are created -- i.e. single offload module ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[ 0].imported_modules[0] # Verify generated C source source = ethosu_module.get_source() assert ( '__attribute__((section(".rodata.tvm"), aligned(16))) static int8_t tvmgen_default_ethos_u_main_0_cms_data_data' in source) assert ( '__attribute__((section(".rodata.tvm"), aligned(16))) static int8_t tvmgen_default_ethos_u_main_0_weights' in source)
def test_op_int8(op, input_0_scale, input_0_zero_point, input_1_scale, input_1_zero_point): interface_api = "c" use_unpacked_api = True test_runner = AOT_CORSTONE300_RUNNER dtype = "int8" shape = [1, 16, 16, 3] model = make_model( op, shape, dtype, dtype, input_0_scale, input_0_zero_point, input_1_scale, input_1_zero_point, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) # validate pattern matching attrs = [ cmsisnn_mod[var.name_hint].attrs for var in cmsisnn_mod.get_global_vars() if cmsisnn_mod[var.name_hint].attrs ] assert any(attrs), "At least one function with external attributes was expected." compilers = [ key == "Compiler" and value == "cmsis-nn" for attr in attrs for key, value in attr.items() ] assert any(compilers), "Module does not contain function for cmsisnn target." assert count_num_calls(orig_mod) == count_num_calls( cmsisnn_mod ), "Number of calls changed during partitioning" # validate the output in_min, in_max = get_range_for_dtype_str(dtype) inputs = { "input_0": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype), "input_1": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype), } output_list = generate_ref_data(orig_mod["main"], inputs) compile_and_run( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, output_tolerance=1, ), test_runner, interface_api, use_unpacked_api, )
def test_op_int8( in_shape, pool_size, strides, padding, relu_type, pool_type, zero_point, scale, ): interface_api = "c" use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER dtype = "int8" model = make_model( pool_type, in_shape, pool_size, strides, padding, dtype, scale, zero_point, relu_type, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # validate the output in_min, in_max = get_range_for_dtype_str(dtype) np.random.seed(0) inputs = { "input": np.random.randint(in_min, high=in_max, size=in_shape, dtype="int8"), } output_list = generate_ref_data(orig_mod["main"], inputs) compile_and_run( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=None, output_tolerance=1, ), test_runner, interface_api, use_unpacked_api, )
def test_relay_strided_slice_codegen(ifm_shape, begin, end, accel_type): def create_model(): ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") strided_slice = relay.op.strided_slice(ifm, begin, end) return tvm.IRModule.from_expr(relay.Function([ifm], strided_slice)) cpu_mod = create_model() input_data = {"ifm": np.random.randint(-128, high=127, size=ifm_shape, dtype="int8")} output_data = generate_ref_data(cpu_mod, input_data) ethosu_mod = _create_ethosu_partition(cpu_mod) _compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
def test_constant_input_int8(op, input_0, input_1): interface_api = "c" use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER dtype = "int8" shape = [1, 16, 16, 3] input_0_scale = 0.256 input_0_zero_point = 33 input_1_scale = 0.128 input_1_zero_point = -24 model = make_model( op, input_0, input_1, input_0_scale, input_0_zero_point, input_1_scale, input_1_zero_point, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # validate the output in_min, in_max = get_range_for_dtype_str(dtype) inputs = {} if isinstance(input_0, tvm.relay.expr.Var): inputs.update({ "input_0": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype) }) if isinstance(input_1, tvm.relay.expr.Var): inputs.update({ "input_1": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype) }) output_list = generate_ref_data(orig_mod["main"], inputs) compile_and_run( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, output_tolerance=1, ), test_runner, interface_api, use_unpacked_api, )
def test_binary_add_from_constant_scalar(accel_type): dtype = "uint8" ifm_shape = (1, 4, 4, 8) def create_relay_graph(): inp = relay.var("input", shape=ifm_shape, dtype=dtype) scalar = relay.const(np.ones((1, 1, 1, 1), dtype=dtype), dtype=dtype) add = relay.qnn.op.add( inp, scalar, relay.const(1.0, dtype="float32"), relay.const(0, dtype="int32"), relay.const(1.0, dtype="float32"), relay.const(0, dtype="int32"), relay.const(1.0, dtype="float32"), relay.const(0, dtype="int32"), ) func = relay.Function(relay.analysis.free_vars(add), add) return tvm.IRModule.from_expr(func) mod = create_relay_graph() partitioned_mod = partition_for_ethosu(mod) # Generate reference data input_data = { "input": np.random.randint(low=0, high=255, size=ifm_shape, dtype=dtype) } output_data = generate_ref_data(mod, input_data) compiled_models = infra.build_source( partitioned_mod, input_data, output_data, accel_type, output_tolerance=0, ) # Assumes only two runtime.Modules are created -- i.e. single offload module imported_modules = compiled_models[0].executor_factory.lib.imported_modules assert len(imported_modules) == 2 ethosu_module = imported_modules[0] # Verify generated C source get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs") cmms = get_cs(ethosu_module) cmms = bytes.fromhex(cmms) infra.print_payload(cmms) infra.verify_source(compiled_models, accel_type)
def test_op_int8(op, relu_type, input_0_scale, input_0_zero_point, input_1_scale, input_1_zero_point): interface_api = "c" use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER dtype = "int8" shape = [1, 16, 16, 3] model = make_model( op, generate_variable("input_0"), generate_variable("input_1"), input_0_scale, input_0_zero_point, input_1_scale, input_1_zero_point, relu_type, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # validate the output in_min, in_max = get_range_for_dtype_str(dtype) inputs = { "input_0": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype), "input_1": np.random.randint(in_min, high=in_max, size=shape, dtype=dtype), } output_list = generate_ref_data(orig_mod["main"], inputs) compile_and_run( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, output_tolerance=1, ), test_runner, interface_api, use_unpacked_api, )
def test_ethosu_left_shift_binary_elemwise( accel_type, ifm_shape, ifm2_shape, ): dtype = "int32" def create_model(): ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype) ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype) c1 = relay.left_shift(ifm, ifm2) f = relay.Function([ifm, ifm2], c1) mod = tvm.IRModule() mod["main"] = f return mod relay_mod = create_model() mod = partition_for_ethosu(relay_mod) # Generate reference data in_min, in_max = util.get_range_for_dtype_str(dtype) input_data = { "ifm": np.random.randint(in_min, high=in_max, size=ifm_shape, dtype=dtype), "ifm2": np.random.randint(0, high=32, size=ifm2_shape, dtype=dtype), } output_data = generate_ref_data(relay_mod, input_data) compiled_models = infra.build_source( mod, input_data, output_data, accel_type, ) # Assumes only two runtime.Modules are created -- i.e. single offload module imported_modules = compiled_models[0].executor_factory.lib.imported_modules assert len(imported_modules) == 2 ethosu_module = imported_modules[0] # Verify generated C source get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs") cmms = get_cs(ethosu_module) cmms = bytes.fromhex(cmms) infra.print_payload(cmms) infra.verify_source(compiled_models, accel_type)
def test_relay_reshape_codegen(ifm_shape, new_shape, accel_type): np.random.seed(0) def create_model(): ifm = relay.var("ifm", shape=ifm_shape, dtype="int8") reshape = relay.op.reshape(ifm, newshape=new_shape) return tvm.IRModule.from_expr(relay.Function([ifm], reshape)) cpu_mod = create_model() input_data = { "ifm": np.random.randint(-128, high=127, size=ifm_shape, dtype="int8") } output_data = generate_ref_data(cpu_mod, input_data) ethosu_mod = _create_ethosu_partition(cpu_mod) _compare_ethosu_with_reference(ethosu_mod, input_data, output_data, accel_type)
def create_mod_from_relay(): ifm = relay.var("input", shape=ifm_shape, dtype=dtype) cast = relay.cast(ifm, dtype="int32") mean = relay.mean(cast, axis=axis, keepdims=keep_dims) requantize = relay.qnn.op.requantize( mean, input_scale=relay.const(1.0, dtype="float32"), input_zero_point=relay.const(0, dtype="int32"), output_scale=relay.const(1.0, dtype="float32"), output_zero_point=relay.const(0, dtype="int32"), ) func = relay.Function(relay.analysis.free_vars(requantize), requantize) mod = tvm.IRModule.from_expr(func) input_data = {"input": np.random.randint(low=-127, high=128, size=ifm_shape, dtype=dtype)} output_data = generate_ref_data(mod, input_data) return mod, input_data, output_data
def test_networks_without_usmp(accel_type, model_url, workspace_size, tolerance): np.random.seed(23) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1]) mod, input_data, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_data = generate_ref_data(mod, input_data, params) mod = partition_for_ethosu(mod, params) compiled_models = infra.build_source(mod, input_data, output_data, accel_type, output_tolerance=tolerance, enable_usmp=False) mlf_memory_map = mlf._build_function_memory_map( compiled_models[0].executor_factory.function_metadata) assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size infra.verify_source(compiled_models, accel_type, enable_usmp=False)
def test_networks_with_usmp(accel_type, model_url, workspace_size, tolerance): np.random.seed(23) tflite_model_file = tf_testing.get_workload_official( model_url[0], model_url[1]) mod, input_data, params = create_relay_module_and_inputs_from_tflite_file( tflite_model_file) output_data = generate_ref_data(mod, input_data, params) mod = partition_for_ethosu(mod, params) compiled_models = infra.build_source(mod, input_data, output_data, accel_type, output_tolerance=tolerance, enable_usmp=True) allocated_pool_info = list( dict(compiled_models[0].executor_factory.executor_codegen_metadata. pool_inputs).values())[0] assert allocated_pool_info.allocated_size == workspace_size infra.verify_source(compiled_models, accel_type, enable_usmp=True)
def test_empty_function(): ORIGINAL_MODEL = """ #[version = "0.0.5"] def @main(%data : Tensor[(16, 29), int8]) -> Tensor[(16, 29), int8] { add(%data, %data) } """ CMSISNN_MODEL = """ #[version = "0.0.5"] def @tvmgen_default_cmsis_nn_main_1(%i1: Tensor[(16, 29), int8], Inline=1, Compiler="cmsis-nn", global_symbol="tvmgen_default_cmsis_nn_main_1", Primitive=1) -> Tensor[(16, 29), int8] { add(%i1, %i1) } def @main(%data : Tensor[(16, 29), int8]) -> Tensor[(16, 29), int8] { %1 = @tvmgen_default_cmsis_nn_main_1(%data) /* ty=Tensor[(16, 29), int8] */; %1 } """ orig_mod = tvm.parser.fromtext(ORIGINAL_MODEL) cmsisnn_mod = tvm.parser.fromtext(CMSISNN_MODEL) params = {} # validate the output interface_api = "c" use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER dtype = "int8" in_min, in_max = get_range_for_dtype_str(dtype) rng = np.random.default_rng(12345) inputs = {"data": rng.integers(in_min, high=in_max, size=(16, 29), dtype=dtype)} outputs = generate_ref_data(orig_mod["main"], inputs, params) compile_and_run( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=outputs, params=params, output_tolerance=0, ), test_runner, interface_api, use_unpacked_api, )
def test_cnn_small(): # download the model base_url = "https://github.com/ARM-software/ML-zoo/raw/48a22ee22325d15d2371a6df24eb7d67e21dcc97/models/keyword_spotting/cnn_small/tflite_int8" file_to_download = "cnn_s_quantized.tflite" file_saved = "cnn_s_quantized_15Dec2021.tflite" model_file = download_testdata("{}/{}".format(base_url, file_to_download), file_saved) with open(model_file, "rb") as f: tflite_model_buf = f.read() input_shape = (1, 490) dtype = "int8" in_min, in_max = get_range_for_dtype_str(dtype) rng = np.random.default_rng(12345) input_data = rng.integers(in_min, high=in_max, size=input_shape, dtype=dtype) orig_mod, params = convert_to_relay(tflite_model_buf, input_data, "input") cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) # validate CMSIS-NN output against CPU output interface_api = "c" use_unpacked_api = True test_runner = AOT_CORSTONE300_RUNNER inputs = {"input": input_data} params = {} output_list = generate_ref_data(orig_mod["main"], inputs, params) compile_and_run( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params, output_tolerance=1, ), test_runner, interface_api, use_unpacked_api, )
def test_cnn_small(): # download the model base_url = "https://github.com/ARM-software/ML-zoo/raw/ee35139af86bdace5e502b09fe8b9da9cb1f06bb/models/keyword_spotting/cnn_small/tflite_int8" file_to_download = "cnn_s_quantized.tflite" model_file = download_testdata("{}/{}".format(base_url, file_to_download), file_to_download) with open(model_file, "rb") as f: tflite_model_buf = f.read() input_shape = (1, 490) rng = np.random.default_rng(12345) input_data = rng.random(input_shape, dtype=np.float32) orig_mod, params = convert_to_relay(tflite_model_buf, input_data, "input") cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) # validate CMSIS-NN output against CPU output interface_api = "c" use_unpacked_api = True test_runner = AOT_CORSTONE300_RUNNER inputs = {"input": input_data} params = {} output_list = generate_ref_data(orig_mod["main"], inputs, params) compile_and_run( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params, output_tolerance=1, ), test_runner, interface_api, use_unpacked_api, )
def test_depthwise_int8( ifm_shape, kernel_size, padding, strides, dilation, enable_bias, relu_type, input_zero_point, input_scale, kernel_scale, out_channels, depth_multiplier, ): interface_api = "c" use_unpacked_api = True test_runner = AOT_CORSTONE300_RUNNER dtype = "int8" groups = 1 weight_format = "HWIO" kernel_h = kernel_size[0] kernel_w = kernel_size[1] kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels) kernel_zero_point = 0 in_min, in_max = get_range_for_dtype_str(dtype) groups = ifm_shape[3] weight_format = "HWOI" kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier) out_channels = ifm_shape[3] * depth_multiplier ks_len = len(kernel_scale) kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)] output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, input_zero_point, kernel_scale, kernel_zero_point, dtype, dtype, dtype, True, ) model, params = make_model( ifm_shape, kernel_shape, input_zero_point, input_scale, kernel_zero_point, kernel_scale, output_zero_point, output_scale, padding, strides, dilation, groups, dtype, dtype, out_channels, weight_format, enable_bias, relu_type, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) # validate pattern matching attrs = [ cmsisnn_mod[var.name_hint].attrs for var in cmsisnn_mod.get_global_vars() if cmsisnn_mod[var.name_hint].attrs ] assert any(attrs), "At least one function with external attributes was expected." compilers = [ key == "Compiler" and value == "cmsis-nn" for attr in attrs for key, value in attr.items() ] assert any(compilers), "Module does not contain function for cmsis-nn target." assert count_num_calls(orig_mod) == count_num_calls( cmsisnn_mod ), "Number of calls changed during partitioning" # validate the output rng = np.random.default_rng(12345) inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)} output_list = generate_ref_data(orig_mod["main"], inputs, params) compile_and_run( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params, output_tolerance=1, ), test_runner, interface_api, use_unpacked_api, )
def test_depthwise_int8( ifm_shape, kernel_size, padding, strides, dilation, enable_bias, relu_type, input_zero_point, input_scale, kernel_scale, out_channels, depth_multiplier, ): interface_api = "c" use_unpacked_api = True test_runner = AOT_CORSTONE300_RUNNER dtype = "int8" groups = 1 weight_format = "HWIO" kernel_h = kernel_size[0] kernel_w = kernel_size[1] kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels) kernel_zero_point = 0 in_min, in_max = get_range_for_dtype_str(dtype) groups = ifm_shape[3] weight_format = "HWOI" kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier) out_channels = ifm_shape[3] * depth_multiplier ks_len = len(kernel_scale) kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)] output_scale, output_zero_point = get_conv2d_qnn_params( kernel_shape, input_scale, input_zero_point, kernel_scale, kernel_zero_point, dtype, dtype, dtype, True, ) model, params = make_model( ifm_shape, kernel_shape, input_zero_point, input_scale, kernel_zero_point, kernel_scale, output_zero_point, output_scale, padding, strides, dilation, groups, dtype, dtype, out_channels, weight_format, enable_bias, relu_type, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # validate the output rng = np.random.default_rng(12345) inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)} output_list = generate_ref_data(orig_mod["main"], inputs, params) compile_and_run( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params, output_tolerance=1, ), test_runner, interface_api, use_unpacked_api, )
def test_op_int8( in_shape, enable_bias, input_zero_point, input_scale, kernel_scale, out_channels, relu_type, ): interface_api = "c" use_unpacked_api = True test_runner = AOT_USMP_CORSTONE300_RUNNER dtype = "int8" kernel_zero_point = 0 kernel_shape = [out_channels, in_shape[1]] conv2d_kernel_shape = (1, 1, kernel_shape[0], kernel_shape[1]) in_min, in_max = get_range_for_dtype_str(dtype) output_scale, output_zero_point = get_conv2d_qnn_params( conv2d_kernel_shape, input_scale, input_zero_point, kernel_scale, kernel_zero_point, dtype, ) model, params = make_model( in_shape, kernel_shape, input_zero_point, kernel_zero_point, input_scale, kernel_scale, output_zero_point, output_scale, dtype, dtype, out_channels, enable_bias, ) orig_mod = make_module(model) cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params) # validate pattern matching assert_partitioned_function(orig_mod, cmsisnn_mod) # validate the output rng = np.random.default_rng(12345) inputs = { "input": rng.integers(in_min, high=in_max, size=in_shape, dtype=dtype) } output_list = generate_ref_data(orig_mod["main"], inputs, params) compile_and_run( AOTTestModel( module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params, output_tolerance=1, ), test_runner, interface_api, use_unpacked_api, )
def test_ethosu_conv2d(accel_type): def create_graph_single(input_tensor_name, input_tensor_shape, input_tensor_dtype): c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c1_params.ifm.shape = input_tensor_shape c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32) c1_params.kernel.sc = relay.const(np.random.rand(32) * 2, "float32") c1_params.strides = (1, 1) c1_params.pad = "VALID" c1_params.update_output_qnn_params(input_tensor_dtype, input_tensor_dtype, input_tensor_dtype) input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) c1_params.ofm.shape = get_shape_expr(input0, c1) f = relay.Function([input0], c1) mod = tvm.IRModule() mod["main"] = f return mod, [c1_params] def create_graph_double(input_tensor_name, input_tensor_shape, input_tensor_dtype): c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c1_params.ifm.shape = input_tensor_shape c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8) c1_params.strides = (2, 2) c1_params.pad = "VALID" c1_params.update_output_qnn_params(input_tensor_dtype, input_tensor_dtype, input_tensor_dtype) input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) c1_params.ofm.shape = get_shape_expr(input0, c1) c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c2_params.ifm.shape = c1_params.ofm.shape c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16) c2_params.strides = (1, 1) c2_params.pad = "SAME" c2_params.update_output_qnn_params() c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1) c2_params.ofm.shape = get_shape_expr(input0, c2) f = relay.Function([input0], c2) mod = tvm.IRModule() mod["main"] = f return mod, [c2_params, c1_params] def create_graph_activation(input_tensor_name, input_tensor_shape, input_tensor_dtype): c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c1_params.ifm.shape = input_tensor_shape c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8) c1_params.strides = (2, 2) c1_params.pad = "VALID" c1_params.activation = "CLIP" c1_params.clip_min = 90 c1_params.clip_max = 110 c1_params.update_output_qnn_params(input_tensor_dtype, input_tensor_dtype, input_tensor_dtype) input0 = relay.var(input_tensor_name, shape=c1_params.ifm.shape, dtype=c1_params.ifm.dtype) c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0) c1_params.ofm.shape = get_shape_expr(input0, c1) c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype) c2_params.ifm.shape = c1_params.ofm.shape c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16) c2_params.strides = (1, 1) c2_params.pad = "SAME" c2_params.update_output_qnn_params() c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1) c2_params.ofm.shape = get_shape_expr(input0, c2) f = relay.Function([input0], c2) mod = tvm.IRModule() mod["main"] = f return mod, [c2_params, c1_params] test_cases = [ (create_graph_single, ["input", (1, 300, 300, 3), "int8"]), (create_graph_double, ["input", (1, 128, 256, 4), "int8"]), (create_graph_activation, ["input", (1, 64, 100, 4), "int8"]), ] np.random.seed(42) for test_case in test_cases: relay_module, conv_params = test_case[0](*test_case[1]) input_tensor, input_shape, input_dtype = test_case[1] mod = partition_for_ethosu(relay_module) # Generate reference data in_min, in_max = util.get_range_for_dtype_str(input_dtype) input_data = { input_tensor: np.random.randint(in_min, high=in_max, size=input_shape, dtype=input_dtype) } output_data = generate_ref_data(relay_module, input_data) compiled_models = infra.build_source(mod, input_data, output_data, accel_type, output_tolerance=1) # Assumes only two runtime.Modules are created -- i.e. single offload module ethosu_module = (compiled_models[0].executor_factory.lib. imported_modules[0].imported_modules[0]) # Verify generated C source get_artifacts = tvm._ffi.get_global_func( "runtime.module.ethos-u.get_artifacts") compilation_artifacts = get_artifacts(ethosu_module) cmms = bytes.fromhex(compilation_artifacts[0].command_stream) infra.print_payload(cmms) infra.verify_source(compiled_models, accel_type)