Exemple #1
0
def test_aot_codegen_checks_returns():
    """This test checks whether AoT lowering creates calls that check the return value correctly"""
    x = relay.var("x", shape=(1, 10))
    y = relay.var("y", shape=(1, 10))
    z = relay.add(x, y)
    func = relay.Function([x, y], z)

    compiled_test_mods = compile_models(
        models=AOTTestModel(module=IRModule.from_expr(func),
                            inputs=None,
                            outputs=None),
        interface_api="c",
        use_unpacked_api=True,
    )
    source = compiled_test_mods[0].executor_factory.lib.imported_modules[
        0].get_source()

    main_ir_module = compiled_test_mods[
        0].executor_factory.lowered_ir_mods.items()[0][1]
    main_func = main_ir_module["__tvm_main__"]

    # Check operator call is wrapped properly
    assert (str(
        main_func.body[1]) == "tir.tvm_check_return(0, -1, tir.call_extern(" +
            '"tvmgen_default_fused_add",' +
            " x_buffer_var, y_buffer_var, output_buffer_var))\n")
    # TODO(Mousius) - Create a better place for C codegen tests
    assert (
        "if (tvmgen_default_fused_add(x_buffer_var, y_buffer_var, output_buffer_var) != 0 ) return -1;"
        in source)
def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo):
    """This checks for inference using two external pools placed in the application"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools([
        PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS},
                 size_hint_bytes=2500000),
        PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}),
    ])
    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}];
        """,
    )

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
def test_tflite_model_u1_usecase(model_url, usmp_algo, workspace_size,
                                 constant_size):
    """
    This checks for ML models and the memory used by them
    when using USMP with different algorithms
    """
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOTTestRunner(pass_config={
        "tir.usmp.enable": True,
        "tir.usmp.algorithm": usmp_algo
    })

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    # Checking the workspace size reported in model library format
    mlf_memory_map = mlf._build_function_memory_map(
        compiled_test_mods[0].executor_factory.function_metadata)
    assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size
    assert mlf_memory_map["main"][0]["constants_size_bytes"] == constant_size
    # That should match to workspace size that will be codegen'd to the entry point.
    allocated_pool_info_size = sum([
        _.allocated_size for _ in list(
            dict(compiled_test_mods[0].executor_factory.
                 executor_codegen_metadata.pool_inputs).values())
    ])
    assert allocated_pool_info_size == workspace_size + constant_size

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
def test_conv2d(interface_api, use_unpacked_api, test_runner, groups,
                weight_shape):
    """Test a subgraph with a single conv2d operator."""
    dtype = "float32"
    ishape = (1, 32, 14, 14)
    wshape = (32, weight_shape, 3, 3)
    pass_config = {"tir.usmp.enable": True}
    test_runner = AOTTestRunner(
        makefile=test_runner.makefile,
        prologue=test_runner.prologue,
        epilogue=test_runner.epilogue,
        includes=test_runner.includes,
        parameters=test_runner.parameters,
        pass_config=pass_config,
    )

    data0 = relay.var("data", shape=ishape, dtype=dtype)
    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
    out = relay.nn.conv2d(data0,
                          weight0,
                          kernel_size=(3, 3),
                          padding=(1, 1),
                          groups=groups)
    main_f = relay.Function([data0, weight0], out)
    mod = tvm.IRModule()
    mod["main"] = main_f
    mod = transform.InferType()(mod)

    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)

    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])

    output_list = generate_ref_data(mod, inputs)
    compile_and_run(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
    compiled_test_mods = compile_models(
        models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Exemple #5
0
 def compile_to_main_func(interface_api="c", use_unpacked_api=True):
     test_runner = AOT_DEFAULT_RUNNER
     compiled_models = compile_models(
         models=AOTTestModel(
             module=IRModule.from_expr(func),
             inputs=inputs,
             outputs=output_list,
         ),
         interface_api=interface_api,
         use_unpacked_api=use_unpacked_api,
         workspace_byte_alignment=16,
         pass_config=test_runner.pass_config,
     )
     main_ir_module = list(
         compiled_models[0].executor_factory.lowered_ir_mods.values())[0]
     main_func = main_ir_module["__tvm_main__"]
     return main_func
Exemple #6
0
 def compile_to_main_func(interface_api="c", use_unpacked_api=True):
     test_runner = create_test_runner()
     compiled_models = compile_models(
         models=AOTTestModel(
             module=mod,
             inputs=input_data,
             outputs=output_data,
         ),
         interface_api=interface_api,
         use_unpacked_api=use_unpacked_api,
         workspace_byte_alignment=16,
         pass_config=test_runner.pass_config,
     )
     main_ir_module = compiled_models[
         0].executor_factory.lowered_ir_mods.items()[0][1]
     main_func = main_ir_module["__tvm_main__"]
     return main_func
Exemple #7
0
def test_aot_codegen_backend_alloc_workspace_calls():
    """This test checks whether AoT lowering creates TVMBackendAllocWorkspace calls"""

    # The %data and %weight shapes in the following primitive Relay should create
    # small tensors that would get lowered to stack allocations in the CPU PrimFuncs.
    # However, the AoT executor codegen should retain them as TVMBAW calls
    # pylint: disable=line-too-long
    relay_mod = tvm.parser.fromtext("""
        #[version = "0.0.5"]
        def @main(%data: Tensor[(1, 4, 4, 4), float32], %weight: Tensor[(4, 4, 3, 3), float32], src_layout="OIHW", dst_layout="OIHW4i4o") -> Tensor[(1, 4, 4, 4), float32] {
        %0 = fn (%p02: Tensor[(1, 4, 4, 4), float32], Primitive=1, hash="9332b3872fb5292c", src_layout="NCHW", dst_layout="NCHW4c") -> Tensor[(1, 1, 4, 4, 4), float32] {
            layout_transform(%p02, src_layout="NCHW", dst_layout="NCHW4c") /* ty=Tensor[(1, 1, 4, 4, 4), float32] */
        };
        %1 = fn (%p03: Tensor[(4, 4, 3, 3), float32], Primitive=1, hash="9f0b2b8a24a4dab3", src_layout="OIHW", dst_layout="OIHW4i4o") -> Tensor[(1, 1, 3, 3, 4, 4), float32] {
            layout_transform(%p03, src_layout="OIHW", dst_layout="OIHW4i4o") /* ty=Tensor[(1, 1, 3, 3, 4, 4), float32] */
        };
        %2 = %0(%data) /* ty=Tensor[(1, 1, 4, 4, 4), float32] */;
        %3 = %1(%weight) /* ty=Tensor[(1, 1, 3, 3, 4, 4), float32] */;
        %4 = fn (%p01: Tensor[(1, 1, 4, 4, 4), float32], %p1: Tensor[(1, 1, 3, 3, 4, 4), float32], out_layout="NCHW4c", kernel_layout="OIHW4i4o", Primitive=1, data_layout="NCHW4c") -> Tensor[(1, 1, 4, 4, 4), float32] {
                                                                                                                                                                                                                                                      nn.contrib_conv2d_NCHWc(%p01, %p1, padding=[1, 1, 1, 1], channels=4, kernel_size=[3, 3], data_layout="NCHW4c", kernel_layout="OIHW4i4o", out_layout="NCHW4c") /* ty=Tensor[(1, 1, 4, 4, 4), float32] */
        };
        %5 = %4(%2, %3) /* ty=Tensor[(1, 1, 4, 4, 4), float32] */;
        %6 = fn (%p0: Tensor[(1, 1, 4, 4, 4), float32], Primitive=1, src_layout="NCHW4c", dst_layout="NCHW") -> Tensor[(1, 4, 4, 4), float32] {
            layout_transform(%p0, src_layout="NCHW4c", dst_layout="NCHW") /* ty=Tensor[(1, 4, 4, 4), float32] */
        };
        %6(%5) /* ty=Tensor[(1, 4, 4, 4), float32] */
        }
        """)
    # pylint: enable=line-too-long

    compiled_test_mods = compile_models(
        models=AOTTestModel(module=relay_mod, inputs=None, outputs=None),
        interface_api="c",
        use_unpacked_api=True,
    )
    source = compiled_test_mods[0].executor_factory.lib.imported_modules[
        0].get_source()
    # There should be three allocates created for three primitive relay function
    # calls in the main for the above relay snippet.
    assert source.count("TVMBackendAllocWorkspace") == 3
Exemple #8
0
def build_source(
    module,
    inputs,
    outputs,
    test_runner,
    output_tolerance=0,
    workspace_pools=None,
):
    return compile_models(
        models=AOTTestModel(
            module=module,
            inputs=inputs,
            outputs=outputs,
            output_tolerance=output_tolerance,
            extra_memory_in_bytes=0,
        ),
        interface_api="c",
        use_unpacked_api=True,
        workspace_memory_pools=workspace_pools,
        workspace_byte_alignment=16,
        pass_config=test_runner.pass_config,
    )
Exemple #9
0
def build_source(
    module,
    inputs,
    outputs,
    accel="ethos-u55-256",
    output_tolerance=0,
    enable_usmp=True,
    enable_cascader=False,
):
    test_runner = create_test_runner(accel, enable_usmp, enable_cascader)
    return compile_models(
        models=AOTTestModel(
            module=module,
            inputs=inputs,
            outputs=outputs,
            output_tolerance=output_tolerance,
            extra_memory_in_bytes=0,
        ),
        interface_api="c",
        use_unpacked_api=True,
        workspace_byte_alignment=16,
        pass_config=test_runner.pass_config,
    )
Exemple #10
0
def test_constants_alignment(constants_byte_alignment):
    """Test that constants_byte_alignment correctly sets constants byte alignment"""

    use_unpacked_api = True
    interface_api = "c"

    mod, params = testing.mobilenet.get_workload(batch_size=1)
    data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape]
    data = np.random.uniform(size=data_shape).astype("float32")
    inputs = {"data": data}
    output_list = generate_ref_data(mod, inputs, params)
    target = f"c -constants-byte-alignment={constants_byte_alignment}"
    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api,
        use_unpacked_api,
        target=tvm.target.Target(target, host=target),
    )
    source = compiled_test_mods[0].executor_factory.lib.imported_modules[
        0].get_source()
    assert f'__attribute__((section(".rodata.tvm"), aligned({constants_byte_alignment})))' in source
Exemple #11
0
def test_reshape_removal(padding):
    """Tests reshape is removed from the network"""
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_USMP_CORSTONE300_RUNNER

    in_shape = (1, 28, 28, 12)
    pool_size = (3, 3)
    strides = (2, 2)
    relu_type = "NONE"
    zero_point, scale = (-34, 0.0256)

    max_pool = make_model(
        pool_op=relay.nn.max_pool2d,
        shape=in_shape,
        pool_size=pool_size,
        strides=strides,
        padding=padding,
        scale=scale,
        zero_point=zero_point,
        relu_type=relu_type,
    )
    new_shape = (1, 28, 28, 3) if padding == "VALID" else (1, 30, 30, 3)
    reshape = relay.reshape(max_pool, newshape=new_shape)

    model = make_model(
        pool_op=relay.nn.avg_pool2d,
        shape=new_shape,
        pool_size=pool_size,
        strides=strides,
        padding=padding,
        scale=scale,
        zero_point=zero_point,
        relu_type=relu_type,
        input_op=reshape,
    )
    orig_mod = make_module(model)

    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod)

    # validate pattern matching
    assert_partitioned_function(orig_mod, cmsisnn_mod)

    # generate reference output
    rng = np.random.default_rng(12345)
    in_min, in_max = get_range_for_dtype_str("int8")
    inputs = {
        "input": rng.integers(in_min, high=in_max, size=in_shape, dtype="int8")
    }
    output_list = generate_ref_data(orig_mod["main"], inputs, params=None)

    # validate presence of depthwise convolution
    compiled_models = compile_models(
        AOTTestModel(
            module=cmsisnn_mod,
            inputs=inputs,
            outputs=output_list,
            params=None,
            output_tolerance=1,
        ),
        interface_api,
        use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    main_mod = None
    for target, mod in compiled_models[
            0].executor_factory.lowered_ir_mods.items():
        if target.kind.name == "c":
            main_mod = mod

    # when padding="SAME", extra padding is introduced which causes Reshape to be fused with the
    # Pad. RemoveReshapes pass cannot remove a fused Reshape. Whereas padding="VALID" doesn't need
    # an extra Pad layer. In this case, the pass removes the Reshape from the graph.
    reshapes_present = any(
        ["reshape" in gv.name_hint for gv in main_mod.get_global_vars()])
    check_reshapes = reshapes_present if padding == "SAME" else not reshapes_present
    expected_reshapes = "a" if padding == "SAME" else "No"
    assert check_reshapes, "Expeting {} reshape layer(s).".format(
        expected_reshapes)

    # validate the output
    run_and_check(
        models=compiled_models,
        runner=test_runner,
        interface_api=interface_api,
    )
Exemple #12
0
def test_relay_conv2d_cmsisnn_depthwise_int8(
    padding,
    strides,
    dilation,
    relu_type,
    input_zero_point,
    input_scale,
    kernel_scale,
    depth_multiplier,
):
    """Tests QNN Depthwise int8 op via CMSIS-NN"""
    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_USMP_CORSTONE300_RUNNER

    dtype = "int8"
    in_min, in_max = get_range_for_dtype_str(dtype)

    ifm_shape = (1, 24, 24, 1)
    groups = ifm_shape[3]
    weight_format = "HWIO"
    (kernel_h, kernel_w) = (3, 3)
    kernel_shape = (kernel_h, kernel_w, ifm_shape[3], depth_multiplier)
    out_channels = ifm_shape[3] * depth_multiplier
    enable_bias = True
    ks_len = len(kernel_scale)
    kernel_zero_point = 0
    kernel_scale = [kernel_scale[i % ks_len] for i in range(out_channels)]

    output_scale, output_zero_point = get_conv2d_qnn_params(
        kernel_shape,
        input_scale,
        input_zero_point,
        kernel_scale,
        kernel_zero_point,
        dtype,
        dtype,
        dtype,
        True,
    )

    model, params = make_model(
        ifm_shape,
        kernel_shape,
        input_zero_point,
        input_scale,
        kernel_zero_point,
        kernel_scale,
        output_zero_point,
        output_scale,
        padding,
        strides,
        dilation,
        groups,
        dtype,
        dtype,
        out_channels,
        weight_format,
        enable_bias,
        relu_type,
    )
    orig_mod = make_module(model)
    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)

    # validate pattern matching
    assert_partitioned_function(orig_mod, cmsisnn_mod)

    # generate reference output
    rng = np.random.default_rng(12345)
    inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
    output_list = generate_ref_data(orig_mod["main"], inputs, params)

    # validate presence of depthwise convolution
    compiled_models = compile_models(
        AOTTestModel(
            module=cmsisnn_mod,
            inputs=inputs,
            outputs=output_list,
            params=params,
            output_tolerance=1,
        ),
        interface_api,
        use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    cmsisnn_tir_mod = None
    for target, mod in compiled_models[0].executor_factory.lowered_ir_mods.items():
        if target.kind.name == "cmsis-nn":
            cmsisnn_tir_mod = mod

    cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"]
    call_extern = None
    # This happens when context buffer is init in case depthM != 1
    if isinstance(cmsisnn_func.body, tvm.tir.stmt.Evaluate):
        call_extern = cmsisnn_func.body.value
    else:
        call_extern = cmsisnn_func.body.body.value
    assert (
        call_extern.args[0].value == "arm_depthwise_conv_wrapper_s8"
    ), "Relay Conv2D should be mapped to CMSIS-NN Depthwise Convolution."

    # validate the output
    run_and_check(
        models=compiled_models,
        runner=test_runner,
        interface_api=interface_api,
    )
Exemple #13
0
def test_conv2d_number_primfunc_args(
    padding,
    enable_bias,
    input_zero_point,
    input_scale,
    kernel_scale,
    out_channels,
):
    """Tests number of arguments in Conv2D primfunc"""
    interface_api = "c"
    use_unpacked_api = True

    ifm_shape = (1, 64, 100, 4)
    kernel_size = (3, 3)
    strides = (1, 1)
    dilation = (1, 1)
    dtype = "int8"
    groups = 1
    weight_format = "HWIO"
    kernel_h = kernel_size[0]
    kernel_w = kernel_size[1]
    kernel_shape = (kernel_h, kernel_w, ifm_shape[3] // groups, out_channels)
    kernel_zero_point = 0
    in_min, in_max = get_range_for_dtype_str(dtype)
    relu_type = "RELU"

    output_scale, output_zero_point = get_conv2d_qnn_params(
        kernel_shape,
        input_scale,
        input_zero_point,
        kernel_scale,
        kernel_zero_point,
        dtype,
        dtype,
        dtype,
    )

    model, params = make_model(
        ifm_shape,
        kernel_shape,
        input_zero_point,
        input_scale,
        kernel_zero_point,
        kernel_scale,
        output_zero_point,
        output_scale,
        padding,
        strides,
        dilation,
        groups,
        dtype,
        dtype,
        out_channels,
        weight_format,
        enable_bias,
        relu_type,
    )
    orig_mod = make_module(model)
    cmsisnn_mod = cmsisnn.partition_for_cmsisnn(orig_mod, params)

    # validate pattern matching
    assert_partitioned_function(orig_mod, cmsisnn_mod)

    # compile the model
    rng = np.random.default_rng(12345)
    inputs = {"input": rng.integers(in_min, high=in_max, size=ifm_shape, dtype=dtype)}
    output_list = generate_ref_data(orig_mod["main"], inputs, params)

    compiled_models = compile_models(
        AOTTestModel(module=cmsisnn_mod, inputs=inputs, outputs=output_list, params=params),
        interface_api,
        use_unpacked_api,
    )

    # validate number of TIR primfunc args
    expected_num_params = 6 if enable_bias else 5
    cmsisnn_tir_mod = None
    for target, mod in compiled_models[0].executor_factory.lowered_ir_mods.items():
        if target.kind.name == "cmsis-nn":
            cmsisnn_tir_mod = mod

    cmsisnn_func = cmsisnn_tir_mod["tvmgen_default_cmsis_nn_main_0"]
    assert (
        len(cmsisnn_func.params) == expected_num_params
    ), "Generated unexpected number of function arguments."
Exemple #14
0
def test_output_tensor_names():
    """Test that the output names generated match those in the model"""
    pytest.importorskip("tflite")

    import os
    import tensorflow as tf
    import tflite.Model

    ifm_shape = (1, 299, 299, 3)
    padding = "VALID"
    strides = (1, 1)
    dilation = (1, 1)
    kernel_shape = (3, 2)

    def create_tflite_graph_two_outs():
        """Create a model with 2 output tensors"""
        class Model(tf.Module):
            @tf.function
            def tf_function(self, x):
                # Use tf.nn API to create the model
                tf_strides = [1, strides[0], strides[1], 1]
                filter_shape = [kernel_shape[0], kernel_shape[1], 3, 3]
                filter1 = tf.constant(
                    np.arange(np.prod(filter_shape)).reshape(filter_shape),
                    dtype=tf.float32,
                )
                op = tf.nn.conv2d(
                    x,
                    filters=filter1,
                    strides=tf_strides,
                    padding=padding,
                    dilations=dilation,
                )
                op = tf.nn.relu(op)
                # Second convolution
                filter2 = tf.constant(
                    1000 +
                    np.arange(np.prod(filter_shape)).reshape(filter_shape),
                    dtype=tf.float32,
                )
                op2 = tf.nn.conv2d(
                    x,
                    filters=filter2,
                    strides=strides,
                    padding=padding,
                    data_format="NHWC",
                    dilations=dilation,
                )
                op2 = tf.nn.relu(op2)
                return op, op2

        model = Model()
        concrete_func = model.tf_function.get_concrete_function(
            tf.TensorSpec(ifm_shape, dtype=tf.float32))

        # Convert the model
        def representative_dataset():
            for _ in range(100):
                data = np.random.rand(*tuple(ifm_shape))
                yield [data.astype(np.float32)]

        converter = tf.lite.TFLiteConverter.from_concrete_functions(
            [concrete_func])
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        converter.representative_dataset = representative_dataset
        converter.target_spec.supported_ops = [
            tf.lite.OpsSet.TFLITE_BUILTINS_INT8
        ]
        converter.inference_input_type = tf.int8
        converter.inference_output_type = tf.int8
        tflite_model = converter.convert()
        return tflite_model

    tflite_graph = create_tflite_graph_two_outs()
    tflite_model = tflite.Model.Model.GetRootAsModel(tflite_graph, 0)
    mod, params = relay.frontend.from_tflite(
        tflite_model,
        shape_dict={"input": ifm_shape},
        dtype_dict={"input": "int8"},
    )

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOT_DEFAULT_RUNNER

    in_min, in_max = (-128, 127)
    data = np.random.randint(in_min, high=in_max, size=ifm_shape, dtype="int8")
    input_name = mod["main"].params[0].name_hint
    inputs = {input_name: data}
    output_list = generate_ref_data(mod, inputs, params)
    compile_and_run(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        test_runner,
        interface_api,
        use_unpacked_api,
    )

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api,
        use_unpacked_api,
    )

    # Check that the names of the output tensors occur in the source code
    source = compiled_test_mods[0].executor_factory.lib.get_source()
    for output_name in output_list.keys():
        assert output_name in source
Exemple #15
0
def test_packed_global_variables():
    """Check packed global variables in codegen output."""
    dtype = "float32"
    ishape = (1, 32, 14, 14)
    wshape = (32, 32, 3, 3)
    interface_api = "packed"
    use_unpacked_api = False

    data0 = relay.var("data", shape=ishape, dtype=dtype)
    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
    out = relay.nn.conv2d(data0,
                          weight0,
                          kernel_size=(3, 3),
                          padding=(1, 1),
                          groups=1)
    main_f = relay.Function([data0, weight0], out)
    mod = tvm.IRModule()
    mod["main"] = main_f
    mod = transform.InferType()(mod)

    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)

    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])

    output_list = generate_ref_data(mod, inputs)
    compiled_models_list = compile_models(
        models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        workspace_byte_alignment=8,
        enable_op_fusion=True,
        pass_config=AOT_DEFAULT_RUNNER.pass_config,
        use_runtime_executor=True,
        target=tvm.target.Target("c"),
    )
    compiled_model = compiled_models_list[0]

    tmp_path = utils.tempdir()
    base_path = tmp_path.temp_dir

    model = compiled_model.model
    tar_file = os.path.join(base_path, f"{model.name}.tar")
    export_model_library_format(compiled_model.executor_factory, tar_file)
    t = tarfile.open(tar_file)
    t.extractall(base_path)

    file_list = []
    for path in (pathlib.Path(base_path) / "codegen" / "host" /
                 "src").iterdir():
        if path.is_file():
            file_list.append(path)
    assert len(file_list) > 0

    for path in file_list:
        with open(path, "r") as lib_f:
            lib1 = lib_f.readlines()

        tvmgen_names = []
        tvmgen_funcs = []
        for line in lib1:
            for item in line.split(" "):
                # Find all names starting with tvmgen_default
                if item.startswith("tvmgen_default"):
                    # Collect any name starting with tvmgen_default
                    tvmgen_names.append(item)
                    # Collect all functions starting with tvmgen_default
                    tvmgen_funcs += re.findall(r"(?<=).*(?=\()", item)

        # Check if any function name has a packed variable name in all items that start with tvmgen_default
        for func in tvmgen_funcs:
            assert f"{func}_packed" not in tvmgen_names
Exemple #16
0
def test_tflite_model_u3_usecase_var_cons_ext_pools(model_url, usmp_algo):
    """This checks for inference using one external workspace and one external constant
    pools placed in the application"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_mem_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo("my_memory_pool_1", [target],
                          PoolInfoProperties(size_hint_bytes=8500000)),
    ])

    constant_mem_pools = ConstantMemoryPools([
        ConstantPoolInfo("my_const_pool_1", [target], []),
    ])

    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES)))
        static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }};
        """,
    )

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_mem_pools,
        constant_memory_pools=constant_mem_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
def test_byoc_microtvm(merge_compiler_regions):
    """
    This is a simple test to check BYOC capabilities of AOT
    with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036
    """
    use_unpacked_api = False
    interface_api = "packed"
    test_runner = AOTTestRunner(pass_config={"tir.usmp.enable": True})

    input_x = relay.var("x", shape=(10, 10))
    input_w0 = relay.var("w0", shape=(10, 10))
    input_w1 = relay.var("w1", shape=(10, 10))

    # z0 = x + w0
    marked_input_x = compiler_begin(input_x, "ccompiler")
    marked_input_w0 = compiler_begin(input_w0, "ccompiler")
    add_x_and_w0 = relay.add(marked_input_x, marked_input_w0)
    end_inner_add = compiler_end(add_x_and_w0, "ccompiler")

    # z1 = z0 + w1
    marked_inner_add = compiler_begin(end_inner_add, "ccompiler")
    marked_w1 = compiler_begin(input_w1, "ccompiler")
    add_nested_and_w1 = relay.add(marked_inner_add, marked_w1)
    end_outer_add = compiler_end(add_nested_and_w1, "ccompiler")

    # z2 = z0 + z1
    final_add = relay.add(end_inner_add, end_outer_add)

    relay_func = relay.Function([input_x, input_w0, input_w1], final_add)
    mod = tvm.IRModule()
    mod["main"] = relay_func

    if merge_compiler_regions:
        mod = transform.MergeCompilerRegions()(mod)

    mod = transform.PartitionGraph("mod_name")(mod)
    mod = transform.InferType()(mod)

    x_data = [("x", np.random.rand(10, 10).astype("float32"))]
    w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32"))
              for i in range(2)]

    map_inputs = OrderedDict(x_data + w_data)
    output_list = generate_ref_data(mod, map_inputs)

    compiled_test_mods = compile_models(
        AOTTestModel(name="my_mod",
                     module=mod,
                     inputs=map_inputs,
                     outputs=output_list),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Exemple #18
0
def test_tflite_model_u3_usecase_conv2d_var_cons(usmp_algo):
    """This checks for inference using workspace and constant pools placed in the application"""

    mod = tvm.parser.fromtext("""\
        #[version = "0.0.5"]
        def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) {
            %1 = nn.conv2d(
                 %data,
                 %weight,
                 padding=[2, 2],
                 channels=3,
                 kernel_size=[5, 5],
                 data_layout="NCHW",
                 kernel_layout="OIHW",
                 out_dtype="int32");
            %2 = cast(nn.max_pool2d(%1, pool_size=[3, 3]), dtype="int8");
            %3 = nn.conv2d(
                 %2,
                 %weight,
                 padding=[2, 2],
                 channels=3,
                 kernel_size=[5, 5],
                 data_layout="NCHW",
                 kernel_layout="OIHW",
                 out_dtype="int32");
            %4 = nn.max_pool2d(%3, pool_size=[3, 3]);
            %4
        }
    """)

    main_func = mod["main"]
    shape_dict = {
        p.name_hint: p.checked_type.concrete_shape
        for p in main_func.params
    }
    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}

    weight_data = np.random.randint(1, 255, shape_dict["weight"]).astype(
        type_dict["weight"])
    input_data = np.ones(shape_dict["data"]).astype(type_dict["data"])
    params = {"weight": weight_data}
    inputs = {"data": input_data}

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_mem_pools = WorkspaceMemoryPools([
        WorkspacePoolInfo("my_memory_pool_1", [target],
                          PoolInfoProperties(size_hint_bytes=8500000)),
    ])

    constant_mem_pools = ConstantMemoryPools([
        ConstantPoolInfo("my_const_pool_1", [target], []),
    ])

    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        __attribute__((section(".bss.noinit"), aligned(TVM_RUNTIME_ALLOC_ALIGNMENT_BYTES)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".rodata.tvm"), aligned(TVM_RUNTIME_CONST_ALLOC_ALIGNMENT_BYTES)))
        static uint8_t my_const_pool_1[{_get_constant_size_define_macro("my_const_pool_1")}] = {{ {_get_constant_data_define_macro("my_const_pool_1")} }};
        """,
    )

    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_mem_pools,
        constant_memory_pools=constant_mem_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
def test_two_models_with_a_single_external_pool(model_urls, usmp_algo):
    """This checks for inference using a single large enough common pool"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools(
        [PoolInfo("my_memory_pool", {target: PoolInfo.READ_WRITE_ACCESS})])
    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        #define MAX(A, B) ((A > B) ? A : B)
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool[MAX({_get_workspace_size_define_macro("my_memory_pool", "mod1")},{_get_workspace_size_define_macro("my_memory_pool", "mod2")})];
        """,
    )

    tflite_model_file1 = tf_testing.get_workload_official(
        model_urls[0][0],
        model_urls[0][1],
    )
    mod1, inputs1, params1 = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file1)
    output_list1 = generate_ref_data(mod1, inputs1, params1)

    tflite_model_file2 = tf_testing.get_workload_official(
        model_urls[1][0],
        model_urls[1][1],
    )
    mod2, inputs2, params2 = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file2)
    output_list2 = generate_ref_data(mod2, inputs2, params2)

    compiled_test_mods = compile_models(
        [
            AOTTestModel(name="mod1",
                         module=mod1,
                         inputs=inputs1,
                         outputs=output_list1,
                         params=params1),
            AOTTestModel(name="mod2",
                         module=mod2,
                         inputs=inputs2,
                         outputs=output_list2,
                         params=params2),
        ],
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
def test_tflite_model_u4_usecase_two_external_pools(model_url, usmp_algo):
    """This checks for inference with USMP using external pool placed in the application"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing  # pylint: disable=import-outside-toplevel

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools([
        PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS},
                 size_hint_bytes=2500000),
        PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}),
    ])

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    input_name, input_data = list(inputs.items())[0]
    input_size_bytes = input_data.size * input_data.itemsize
    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo,
            "tir.usmp.use_workspace_io": True,
        },
        prologue=f"""
        #include <string.h>
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}];
        struct {_add_module_prefix("workspace_pools")} {_add_module_prefix("workspace_pools")} = {{
            .my_memory_pool_1 = my_memory_pool_1,
            .my_memory_pool_2 = my_memory_pool_2,
        }};
        struct {_add_module_prefix("inputs")} {_add_module_prefix("inputs")} = {_add_module_prefix("map_inputs")}(&{_add_module_prefix("workspace_pools")});
        memcpy({_add_module_prefix("inputs")}.{input_name}, tvmgen_default_input_data_input, {input_size_bytes});
        struct {_add_module_prefix("outputs")} {_add_module_prefix("outputs")} = {_add_module_prefix("map_outputs")}(&{_add_module_prefix("workspace_pools")});
        """,
    )

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        _check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
        use_workspace_io=True,
    )