def test_tflite_model_u2_usecase_two_models_with_a_single_external_pool(model_urls, usmp_algo):
    """This checks for inference using a single large enough common pool"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools(
        [PoolInfo("my_memory_pool", {target: PoolInfo.READ_WRITE_ACCESS})]
    )
    test_runner = AOTTestRunner(
        pass_config={"tir.usmp.enable": True, "tir.usmp.algorithm": usmp_algo},
        prologue=f"""
        #define MAX(A, B) ((A > B) ? A : B)
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool[MAX({_get_workspace_size_define_macro("my_memory_pool", "mod1")},{_get_workspace_size_define_macro("my_memory_pool", "mod2")})];
        """,
    )

    tflite_model_file1 = tf_testing.get_workload_official(
        model_urls[0][0],
        model_urls[0][1],
    )
    mod1, inputs1, params1 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file1)
    output_list1 = generate_ref_data(mod1, inputs1, params1)

    tflite_model_file2 = tf_testing.get_workload_official(
        model_urls[1][0],
        model_urls[1][1],
    )
    mod2, inputs2, params2 = create_relay_module_and_inputs_from_tflite_file(tflite_model_file2)
    output_list2 = generate_ref_data(mod2, inputs2, params2)

    compiled_test_mods = compile_models(
        [
            AOTTestModel(
                name="mod1", module=mod1, inputs=inputs1, outputs=output_list1, params=params1
            ),
            AOTTestModel(
                name="mod2", module=mod2, inputs=inputs2, outputs=output_list2, params=params2
            ),
        ],
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Beispiel #2
0
def test_multiple_models(interface_api, use_unpacked_api, test_runner):
    # Identity model without params
    x = relay.var("x", "float32")
    mod1 = relay.Function([x], x)
    one = np.array(1.0, "float32")
    inputs1 = {"x": one}
    output_list1 = generate_ref_data(mod1, inputs1)
    params1 = None

    # Convolution model
    RELAY_MODEL = """
#[version = "0.0.5"]
def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), int8]) {
    %1 = nn.conv2d(
         %data,
         %weight,
         padding=[2, 2],
         channels=8,
         kernel_size=[5, 5],
         data_layout="NCHW",
         kernel_layout="OIHW",
         out_dtype="int32");
  %1
}
"""
    mod2 = tvm.parser.fromtext(RELAY_MODEL)
    main_func = mod2["main"]
    shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params}
    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}

    weight_data = np.ones(shape_dict["weight"]).astype(type_dict["weight"])
    input_data = np.ones(shape_dict["data"]).astype(type_dict["data"])

    params2 = {"weight": weight_data}
    inputs2 = {"data": input_data}
    output_list2 = generate_ref_data(mod2, inputs2, params2)

    compile_and_run(
        [
            AOTTestModel(
                name="mod1",
                module=mod1,
                inputs=inputs1,
                outputs=output_list1,
                params=params1,
            ),
            AOTTestModel(
                name="mod2",
                module=mod2,
                inputs=inputs2,
                outputs=output_list2,
                params=params2,
            ),
        ],
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Beispiel #3
0
def test_conv2d(enable_usmp, target_kind):
    RELAY_MODEL = textwrap.dedent(
        """\
        #[version = "0.0.5"]
        def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(3, 3, 5, 5), int8]) {
            %1 = nn.conv2d(
                 %data,
                 %weight,
                 padding=[2, 2],
                 channels=3,
                 kernel_size=[5, 5],
                 data_layout="NCHW",
                 kernel_layout="OIHW",
                 out_dtype="int32");
            %2 = cast(nn.max_pool2d(%1, pool_size=[3, 3]), dtype="int8");
            %3 = nn.conv2d(
                 %2,
                 %weight,
                 padding=[2, 2],
                 channels=3,
                 kernel_size=[5, 5],
                 data_layout="NCHW",
                 kernel_layout="OIHW",
                 out_dtype="int32");
            %4 = nn.max_pool2d(%3, pool_size=[3, 3]);
            %4
        }
    """
    )
    ir_mod = tvm.parser.fromtext(RELAY_MODEL)

    main_func = ir_mod["main"]
    shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params}
    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}

    weight_data = np.ones(shape_dict["weight"]).astype(type_dict["weight"])
    input_data = np.ones(shape_dict["data"]).astype(type_dict["data"])

    params = {"weight": weight_data}
    inputs = {"data": input_data}
    ref_outputs = generate_ref_data(ir_mod, inputs, params)

    with tvm.transform.PassContext(
        opt_level=3, config={"tir.disable_vectorize": True, "tir.usmp.enable": enable_usmp}
    ):
        mod = tvm.relay.build(
            ir_mod,
            params=params,
            target=target_kind,
            executor=backend.Executor("aot", {"interface-api": "packed"}),
        )

    temp_dir = tvm.contrib.utils.TempDirectory()
    test_so_path = temp_dir / "test.so"
    mod.export_library(test_so_path, cc="gcc", options=["-std=c11"])
    loaded_mod = tvm.runtime.load_module(test_so_path)
    runner = tvm.runtime.executor.AotModule(loaded_mod["default"](tvm.cpu(0)))
    runner.set_input(**inputs)
    runner.run()
    assert (runner.get_output(0).asnumpy() == list(ref_outputs.values())[0]).all()
Beispiel #4
0
def test_mobilenet(enable_usmp, target_kind):
    ir_mod, params = testing.mobilenet.get_workload(batch_size=1)
    data_shape = [
        int(x) for x in ir_mod["main"].checked_type.arg_types[0].shape
    ]
    data = np.random.uniform(size=data_shape).astype("float32")
    inputs = {"data": data}
    ref_outputs = generate_ref_data(ir_mod, inputs, params)

    with tvm.transform.PassContext(opt_level=3,
                                   config={
                                       "tir.disable_vectorize": True,
                                       "tir.usmp.enable": enable_usmp
                                   }):
        mod = tvm.relay.build(
            ir_mod,
            params=params,
            target=target_kind,
            executor=backend.Executor("aot", {"interface-api": "packed"}),
        )

    temp_dir = tvm.contrib.utils.TempDirectory()
    test_so_path = temp_dir / "test.so"
    mod.export_library(test_so_path, cc="gcc", options=["-std=c11"])
    loaded_mod = tvm.runtime.load_module(test_so_path)
    runner = tvm.runtime.executor.AotModule(loaded_mod["default"](tvm.cpu(0)))
    runner.set_input(**inputs)
    runner.run()
    assert (runner.get_output(0).asnumpy() == list(
        ref_outputs.values())[0]).all()
Beispiel #5
0
def test_conv2d(interface_api, use_unpacked_api, test_runner, groups,
                weight_shape):
    """Test a subgraph with a single conv2d operator."""
    dtype = "float32"
    ishape = (1, 32, 14, 14)
    wshape = (32, weight_shape, 3, 3)

    data0 = relay.var("data", shape=ishape, dtype=dtype)
    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
    out = relay.nn.conv2d(data0,
                          weight0,
                          kernel_size=(3, 3),
                          padding=(1, 1),
                          groups=groups)
    main_f = relay.Function([data0, weight0], out)
    mod = tvm.IRModule()
    mod["main"] = main_f
    mod = transform.InferType()(mod)

    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)

    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])

    output_list = generate_ref_data(mod, inputs)
    compile_and_run(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Beispiel #6
0
def test_add_name_mangling_with_params(interface_api, use_unpacked_api, test_runner):
    x = relay.var("x", shape=(1, 10))
    y = relay.var("y", shape=(1, 10))
    z = relay.add(x, y)
    func = relay.Function([x, y], z)

    x_in = np.ones((1, 10)).astype("float32")
    y_in = np.random.uniform(size=(1, 10)).astype("float32")

    params = {"x": x_in}
    inputs = {"y": y_in}
    output_list = generate_ref_data(func, inputs, params)

    compile_and_run(
        AOTTestModel(
            name="my_mod",
            module=func,
            inputs=inputs,
            outputs=output_list,
            params=params,
        ),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Beispiel #7
0
def test_conv_with_params(interface_api, use_unpacked_api, test_runner):
    RELAY_MODEL = """
#[version = "0.0.5"]
def @main(%data : Tensor[(1, 3, 64, 64), uint8], %weight : Tensor[(8, 3, 5, 5), int8]) {
    %1 = nn.conv2d(
         %data,
         %weight,
         padding=[2, 2],
         channels=8,
         kernel_size=[5, 5],
         data_layout="NCHW",
         kernel_layout="OIHW",
         out_dtype="int32");
  %1
}
"""
    mod = tvm.parser.fromtext(RELAY_MODEL)
    main_func = mod["main"]
    shape_dict = {p.name_hint: p.checked_type.concrete_shape for p in main_func.params}
    type_dict = {p.name_hint: p.checked_type.dtype for p in main_func.params}

    weight_data = np.ones(shape_dict["weight"]).astype(type_dict["weight"])
    input_data = np.ones(shape_dict["data"]).astype(type_dict["data"])

    params = {"weight": weight_data}
    inputs = {"data": input_data}
    output_list = generate_ref_data(mod, inputs, params)

    compile_and_run(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Beispiel #8
0
def test_byoc_microtvm_multiple_subgraphs(merge_compiler_regions):
    """This is a test case to check BYOC capabilities of AOT with multiple sub graphs"""
    use_unpacked_api = False
    interface_api = "packed"
    test_runner = AOT_DEFAULT_RUNNER

    x = relay.var("x", shape=(10, 10))
    w0 = relay.var("w0", shape=(10, 10))
    w1 = relay.var("w1", shape=(10, 10))
    w2 = relay.var("w2", shape=(10, 10))
    w3 = relay.var("w3", shape=(10, 10))
    w4 = relay.var("w4", shape=(10, 10))
    w5 = relay.var("w5", shape=(10, 10))
    w6 = relay.var("w6", shape=(10, 10))
    w7 = relay.var("w7", shape=(10, 10))

    # C compiler
    z0 = relay.add(x, w0)
    p0 = relay.subtract(z0, w1)
    q0 = relay.multiply(p0, w2)

    z1 = relay.add(x, w3)
    p1 = relay.subtract(z1, w4)
    q1 = relay.multiply(p1, w5)

    # Other parts on TVM
    z2 = relay.add(x, w6)
    q2 = relay.subtract(z2, w7)

    r = relay.concatenate((q0, q1, q2), axis=0)
    f = relay.Function([x, w0, w1, w2, w3, w4, w5, w6, w7], r)
    mod = tvm.IRModule()
    ann = byoc.CcompilerAnnotator()
    mod["main"] = ann.visit(f)

    if merge_compiler_regions:
        mod = transform.MergeCompilerRegions()(mod)

    mod = tvm.relay.transform.PartitionGraph("mod_name")(mod)
    mod = tvm.relay.transform.InferType()(mod)

    x_data = np.random.rand(10, 10).astype("float32")
    w_data = []
    for _ in range(8):
        w_data.append(np.random.rand(10, 10).astype("float32"))

    map_inputs = OrderedDict([("x", x_data)] + [("w{}".format(i), w_data[i])
                                                for i in range(8)])
    output_list = generate_ref_data(mod, map_inputs)
    input_list = [map_inputs["x"]]
    input_list.extend([map_inputs["w{}".format(i)] for i in range(8)])
    compile_and_run(
        AOTTestModel(name="my_mod",
                     module=mod,
                     inputs=map_inputs,
                     outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Beispiel #9
0
def test_mobilenet(debug_calculated_workspaces, workspace_byte_alignment):
    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOT_DEFAULT_RUNNER

    # TODO(@Mousius) - Enable memory planning to take into account debug information
    debugging_memory_overhead = 1024 * 1024

    mod, params = testing.mobilenet.get_workload(batch_size=1)
    data_shape = [int(x) for x in mod["main"].checked_type.arg_types[0].shape]
    data = np.random.uniform(size=data_shape).astype("float32")
    inputs = {"data": data}
    output_list = generate_ref_data(mod, inputs, params)
    compile_and_run(
        AOTTestModel(
            module=mod,
            inputs=inputs,
            outputs=output_list,
            params=params,
            extra_memory_in_bytes=debugging_memory_overhead,
        ),
        test_runner,
        interface_api,
        use_unpacked_api,
        workspace_byte_alignment=workspace_byte_alignment,
        debug_calculated_workspaces=debug_calculated_workspaces,
    )
Beispiel #10
0
def test_quant_mobilenet_tfl():
    """Since in AOT we pass directly the output buffer from the user, in quantized networks sharing the output buffers is not possible.
    This is because the output data type is int8 and the intermediate buffer are int32 or int16. We use mobilenet quantized to stress this
    situation and verify that the output buffer sharing is disabled in AOT."""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOT_DEFAULT_RUNNER

    tflite_model_file = tf_testing.get_workload_official(
        "https://storage.googleapis.com/download.tensorflow.org/"
        "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz",
        "mobilenet_v1_1.0_224_quant.tflite",
    )
    with open(tflite_model_file, "rb") as f:
        tflite_model_buf = f.read()
    data_shape = (1, 224, 224, 3)
    in_min, in_max = (0, 255)
    data = np.random.randint(in_min, high=in_max, size=data_shape, dtype="uint8")
    mod, params = convert_to_relay(tflite_model_buf)
    inputs = {"input": data}
    output_list = generate_ref_data(mod, inputs, params)
    compile_and_run(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Beispiel #11
0
def test_transpose(interface_api, use_unpacked_api, test_runner):
    """Test that non-inpleaceable operations (e.g., transpose) do not happen in-place."""

    dtype = "float32"
    x = relay.var("x", shape=(10, 5), dtype=dtype)
    y = relay.var("y", shape=(10, 5), dtype=dtype)
    t = relay.var("z", shape=(), dtype=dtype)
    a = relay.add(x, y)
    b = relay.transpose(a)
    z = relay.add(b, t)
    # Check result.
    func = relay.Function([x, y, t], z)
    x_data = np.random.rand(10, 5).astype(dtype)
    y_data = np.random.rand(10, 5).astype(dtype)
    t_data = np.random.uniform(size=()).astype(dtype)

    inputs = {"x": x_data, "y": y_data, "z": t_data}
    output_list = generate_ref_data(func, inputs)
    compile_and_run(
        AOTTestModel(module=IRModule.from_expr(func),
                     inputs=inputs,
                     outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
        enable_op_fusion=False,
    )
Beispiel #12
0
def test_quant_mobilenet_tfl():
    """Since in AOT we pass directly the output buffer from the user, in quantized networks sharing the output buffers is not possible.
    This is because the output data type is int8 and the intermediate buffer are int32 or int16. We use mobilenet quantized to stress this
    situation and verify that the output buffer sharing is disabled in AOT."""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOT_DEFAULT_RUNNER

    tflite_model_file = tf_testing.get_workload_official(
        "https://storage.googleapis.com/download.tensorflow.org/"
        "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz",
        "mobilenet_v1_1.0_224_quant.tflite",
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)
    compile_and_run(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Beispiel #13
0
def test_name_sanitiser_name_clash():
    """Test that 2 input tensors with names that clash once sanitized, generates an error"""

    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_DEFAULT_RUNNER

    dtype = "float32"
    x = relay.var("input::-1", shape=(10, 5), dtype=dtype)
    # Next 2 input tensor names will clash once sanitized.
    y = relay.var("input::-2", shape=(10, 5), dtype=dtype)
    t = relay.var("input:--2", shape=(), dtype=dtype)
    a = relay.add(x, y)
    b = relay.transpose(a)
    z = relay.add(b, t)
    # Check result.
    func = relay.Function([x, y, t], z)
    x_data = np.random.rand(10, 5).astype(dtype)
    y_data = np.random.rand(10, 5).astype(dtype)
    t_data = np.random.uniform(size=()).astype(dtype)

    inputs = {"input::-1": x_data, "input::-2": y_data, "input:--2": t_data}
    output_list = generate_ref_data(func, inputs)

    with pytest.raises(TVMError, match="Sanitized input tensor name clash"):
        compile_and_run(
            AOTTestModel(module=IRModule.from_expr(func),
                         inputs=inputs,
                         outputs=output_list),
            test_runner,
            interface_api,
            use_unpacked_api,
            enable_op_fusion=False,
        )
Beispiel #14
0
def test_deprecated_target_arguments(capsys):
    """Tests we can still use relay.build with -executor, -runtime and -link-params"""

    interface_api = "c"
    use_unpacked_api = True
    test_runner = AOT_DEFAULT_RUNNER

    x = relay.var("x", shape=(1, 10))
    y = relay.var("y", shape=(1, 10))
    z = relay.add(x, y)
    func = relay.Function([x, y], z)

    x_in = np.ones((1, 10)).astype("float32")
    y_in = np.random.uniform(size=(1, 10)).astype("float32")

    params = {"x": x_in}
    inputs = {"y": y_in}
    output_list = generate_ref_data(func, inputs, params)

    compile_and_run(
        AOTTestModel(module=IRModule.from_expr(func),
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        test_runner,
        interface_api,
        use_unpacked_api,
        use_runtime_executor=False,
        target=
        "c -executor=aot --link-params -runtime=c -interface-api=c --unpacked-api",
    )
Beispiel #15
0
def non_device_api_main_func():
    x = relay.var("x", shape=(10, 10))
    y = relay.var("y", shape=(1, 10))
    func = relay.Function([x, y], relay.multiply(x, y))
    x_data = np.random.rand(10, 10).astype("float32")
    y_data = np.random.rand(1, 10).astype("float32")

    inputs = OrderedDict([("x", x_data), ("y", y_data)])
    output_list = generate_ref_data(func, inputs)

    def compile_to_main_func(interface_api="c", use_unpacked_api=True):
        test_runner = AOT_DEFAULT_RUNNER
        compiled_models = compile_models(
            models=AOTTestModel(
                module=IRModule.from_expr(func),
                inputs=inputs,
                outputs=output_list,
            ),
            interface_api=interface_api,
            use_unpacked_api=use_unpacked_api,
            workspace_byte_alignment=16,
            pass_config=test_runner.pass_config,
        )
        main_ir_module = list(
            compiled_models[0].executor_factory.lowered_ir_mods.values())[0]
        main_func = main_ir_module["__tvm_main__"]
        return main_func

    return compile_to_main_func
Beispiel #16
0
def test_conv2d(use_calculated_workspaces, interface_api, use_unpacked_api):
    """Test a subgraph with a single conv2d operator."""
    def conv2d_direct():
        dtype = "float32"
        ishape = (1, 32, 14, 14)
        w1shape = (32, 32, 3, 3)

        data0 = relay.var("data", shape=ishape, dtype=dtype)
        weight0 = relay.var("weight", shape=w1shape, dtype=dtype)
        out = relay.nn.conv2d(data0,
                              weight0,
                              kernel_size=(3, 3),
                              padding=(1, 1))
        main_f = relay.Function([data0, weight0], out)
        mod = tvm.IRModule()
        mod["main"] = main_f
        mod = transform.InferType()(mod)

        i_data = np.random.uniform(0, 1, ishape).astype(dtype)
        w1_data = np.random.uniform(0, 1, w1shape).astype(dtype)

        inputs = OrderedDict([("data", i_data), ("weight", w1_data)])
        return mod, inputs, (1, 32, 14, 14)

    def group_conv2d():
        dtype = "float32"
        ishape = (1, 32, 14, 14)
        w2shape = (32, 1, 3, 3)

        data0 = relay.var("data", shape=(ishape), dtype=dtype)
        weight0 = relay.var("weight", shape=(w2shape), dtype=dtype)
        out = relay.nn.conv2d(data0,
                              weight0,
                              kernel_size=(3, 3),
                              padding=(1, 1),
                              groups=32)
        main_f = relay.Function([data0, weight0], out)
        mod = tvm.IRModule()
        mod["main"] = main_f
        mod = transform.InferType()(mod)

        i_data = np.random.uniform(0, 1, ishape).astype(dtype)
        w_data = np.random.uniform(0, 1, w2shape).astype(dtype)

        inputs = OrderedDict([("data", i_data), ("weight", w_data)])
        return mod, inputs, (1, 32, 14, 14)

    for mod, inputs, out_shape in [conv2d_direct(), group_conv2d()]:
        output_list = generate_ref_data(mod, inputs)
        compile_and_run(
            mod,
            inputs,
            output_list,
            interface_api,
            use_unpacked_api,
            use_calculated_workspaces,
        )
Beispiel #17
0
def test_tflite_model_u3_usecase_two_external_pools(model_url, usmp_algo):
    """This checks for inference using two external pools placed in the application"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"

    target = tvm.target.Target("c")
    workspace_memory_pools = WorkspaceMemoryPools([
        PoolInfo("my_memory_pool_1", {target: PoolInfo.READ_WRITE_ACCESS},
                 size_hint_bytes=2500000),
        PoolInfo("my_memory_pool_2", {target: PoolInfo.READ_WRITE_ACCESS}),
    ])
    test_runner = AOTTestRunner(
        pass_config={
            "tir.usmp.enable": True,
            "tir.usmp.algorithm": usmp_algo
        },
        prologue=f"""
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_1[{_get_workspace_size_define_macro("my_memory_pool_1")}];
        __attribute__((section(".data.tvm"), aligned(16)))
        static uint8_t my_memory_pool_2[{_get_workspace_size_define_macro("my_memory_pool_2")}];
        """,
    )

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = _get_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
        workspace_memory_pools=workspace_memory_pools,
        target=target,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Beispiel #18
0
def test_conv2d(interface_api, use_unpacked_api, test_runner, groups,
                weight_shape):
    """Test a subgraph with a single conv2d operator."""
    dtype = "float32"
    ishape = (1, 32, 14, 14)
    wshape = (32, weight_shape, 3, 3)
    pass_config = {"tir.usmp.enable": True}
    test_runner = AOTTestRunner(
        makefile=test_runner.makefile,
        prologue=test_runner.prologue,
        epilogue=test_runner.epilogue,
        includes=test_runner.includes,
        parameters=test_runner.parameters,
        pass_config=pass_config,
    )

    data0 = relay.var("data", shape=ishape, dtype=dtype)
    weight0 = relay.var("weight", shape=wshape, dtype=dtype)
    out = relay.nn.conv2d(data0,
                          weight0,
                          kernel_size=(3, 3),
                          padding=(1, 1),
                          groups=groups)
    main_f = relay.Function([data0, weight0], out)
    mod = tvm.IRModule()
    mod["main"] = main_f
    mod = transform.InferType()(mod)

    i_data = np.random.uniform(0, 1, ishape).astype(dtype)
    w1_data = np.random.uniform(0, 1, wshape).astype(dtype)

    inputs = OrderedDict([("data", i_data), ("weight", w1_data)])

    output_list = generate_ref_data(mod, inputs)
    compile_and_run(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
    compiled_test_mods = compile_models(
        models=AOTTestModel(module=mod, inputs=inputs, outputs=output_list),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
def test_byoc_microtvm(merge_compiler_regions):
    """This is a simple test to check BYOC capabilities of AOT - with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036"""
    use_unpacked_api = False
    interface_api = "packed"
    test_runner = AOTTestRunner(pass_config={"tir.usmp.enable": True})

    x = relay.var("x", shape=(10, 10))
    w0 = relay.var("w0", shape=(10, 10))
    w1 = relay.var("w1", shape=(10, 10))

    # z0 = x + w0
    x_ = compiler_begin(x, "ccompiler")
    w0_ = compiler_begin(w0, "ccompiler")
    z0_ = relay.add(x_, w0_)
    z0 = compiler_end(z0_, "ccompiler")

    # z1 = z0 + w1
    z0__ = compiler_begin(z0, "ccompiler")
    w1_ = compiler_begin(w1, "ccompiler")
    z1_ = relay.add(z0__, w1_)
    z1 = compiler_end(z1_, "ccompiler")

    # z2 = z0 + z1
    z2 = relay.add(z0, z1)

    f = relay.Function([x, w0, w1], z2)
    mod = tvm.IRModule()
    mod["main"] = f

    if merge_compiler_regions:
        mod = transform.MergeCompilerRegions()(mod)

    mod = transform.PartitionGraph("mod_name")(mod)
    mod = transform.InferType()(mod)

    x_data = [("x", np.random.rand(10, 10).astype("float32"))]
    w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32")) for i in range(2)]

    map_inputs = OrderedDict(x_data + w_data)
    output_list = generate_ref_data(mod, map_inputs)

    compiled_test_mods = compile_models(
        AOTTestModel(name="my_mod", module=mod, inputs=map_inputs, outputs=output_list),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(compiled_model.executor_factory.lib)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Beispiel #20
0
def test_tuple_getitem(interface_api, use_unpacked_api, test_runner):
    func = relay.Function([], relay.TupleGetItem(relay.Tuple([relay.const(1), relay.const(2)]), 0))
    output_list = generate_ref_data(func, {})

    compile_and_run(
        AOTTestModel(module=IRModule.from_expr(func), inputs={}, outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Beispiel #21
0
def test_add_const(interface_api, use_unpacked_api, test_runner):
    two = relay.add(relay.const(1), relay.const(1))
    func = relay.Function([], two)
    output_list = generate_ref_data(func, {})

    compile_and_run(
        AOTTestModel(module=IRModule.from_expr(func), inputs={}, outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
def test_tflite_model(model_url, usmp_algo, workspace_size):
    """This checks for ML models and the memory used by them when using USMP with different algorithms"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOTTestRunner(pass_config={
        "tir.usmp.enable": True,
        "tir.usmp.algorithm": usmp_algo
    })

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    with open(tflite_model_file, "rb") as f:
        tflite_model_buf = f.read()
    data_shape = (1, 224, 224, 3)
    in_min, in_max = (0, 255)
    data = np.random.randint(in_min,
                             high=in_max,
                             size=data_shape,
                             dtype="uint8")
    mod, params = convert_to_relay(tflite_model_buf, data, "input")
    inputs = {"input": data}
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    # Checking the workspace size
    assert (sum(
        compiled_model.executor_factory.function_metadata["__tvm_main__"].
        workspace_sizes.values()) == workspace_size)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Beispiel #23
0
def test_id(interface_api, use_unpacked_api, test_runner):
    x = relay.var("x", "float32")
    ident = relay.Function([x], x)
    one = np.array(1.0, "float32")
    inputs = {"x": one}
    output_list = generate_ref_data(ident, inputs)

    compile_and_run(
        AOTTestModel(module=IRModule.from_expr(ident), inputs=inputs, outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Beispiel #24
0
def test_subtract(interface_api, use_unpacked_api, test_runner):
    i = relay.var("i", shape=[], dtype="int32")
    sub = relay.subtract(i, relay.const(1, dtype="int32"))
    func = relay.Function([i], sub, ret_type=relay.TensorType([], "int32"))
    i_data = np.array(1, dtype="int32")
    inputs = {"i": i_data}
    output_list = generate_ref_data(func, inputs)
    compile_and_run(
        AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Beispiel #25
0
def test_add_const(interface_api, use_unpacked_api, use_calculated_workspaces):
    two = relay.add(relay.const(1), relay.const(1))
    func = relay.Function([], two)
    output_list = generate_ref_data(func, {})
    inputs = {}

    compile_and_run(
        func,
        inputs,
        output_list,
        interface_api,
        use_unpacked_api,
        use_calculated_workspaces,
    )
Beispiel #26
0
def test_tflite_model_u1_usecase(model_url, usmp_algo, workspace_size):
    """This checks for ML models and the memory used by them when using USMP with different algorithms"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOTTestRunner(pass_config={
        "tir.usmp.enable": True,
        "tir.usmp.algorithm": usmp_algo
    })

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    mod, inputs, params = create_relay_module_and_inputs_from_tflite_file(
        tflite_model_file)
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    # Checking the workspace size reported in model library format
    mlf_memory_map = mlf._build_function_memory_map(
        compiled_test_mods[0].executor_factory.function_metadata)
    assert mlf_memory_map["main"][0]["workspace_size_bytes"] == workspace_size
    # That should match to workspace size that will be codegen'd to the entry point.
    allocated_pool_info = list(
        dict(compiled_test_mods[0].executor_factory.executor_codegen_metadata.
             pool_inputs).values())[0]
    assert allocated_pool_info.allocated_size == workspace_size

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Beispiel #27
0
def test_byoc_microtvm(merge_compiler_regions):
    """This is a simple test to check BYOC capabilities of AOT - with and without merging compiler regions to test for https://github.com/apache/tvm/issues/9036"""
    use_unpacked_api = False
    interface_api = "packed"
    test_runner = AOT_DEFAULT_RUNNER

    x = relay.var("x", shape=(10, 10))
    w0 = relay.var("w0", shape=(10, 10))
    w1 = relay.var("w1", shape=(10, 10))

    # z0 = x + w0
    x_ = compiler_begin(x, "ccompiler")
    w0_ = compiler_begin(w0, "ccompiler")
    z0_ = relay.add(x_, w0_)
    z0 = compiler_end(z0_, "ccompiler")

    # z1 = z0 + w1
    z0__ = compiler_begin(z0, "ccompiler")
    w1_ = compiler_begin(w1, "ccompiler")
    z1_ = relay.add(z0__, w1_)
    z1 = compiler_end(z1_, "ccompiler")

    # z2 = z0 + z1
    z2 = relay.add(z0, z1)

    f = relay.Function([x, w0, w1], z2)
    mod = tvm.IRModule()
    mod["main"] = f

    if merge_compiler_regions:
        mod = transform.MergeCompilerRegions()(mod)

    mod = transform.PartitionGraph("mod_name")(mod)
    mod = transform.InferType()(mod)

    x_data = [("x", np.random.rand(10, 10).astype("float32"))]
    w_data = [("w{}".format(i), np.random.rand(10, 10).astype("float32"))
              for i in range(2)]

    map_inputs = OrderedDict(x_data + w_data)
    output_list = generate_ref_data(mod, map_inputs)
    compile_and_run(
        AOTTestModel(name="my_mod",
                     module=mod,
                     inputs=map_inputs,
                     outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
Beispiel #28
0
def test_id(interface_api, use_unpacked_api, use_calculated_workspaces):
    x = relay.var("x", "float32")
    ident = relay.Function([x], x)
    one = np.array(1.0, "float32")
    inputs = {"x": one}
    output_list = generate_ref_data(ident, inputs)

    compile_and_run(
        ident,
        inputs,
        output_list,
        interface_api,
        use_unpacked_api,
        use_calculated_workspaces,
    )
Beispiel #29
0
def test_subtract(interface_api, use_unpacked_api, use_calculated_workspaces):
    i = relay.var("i", shape=[], dtype="int32")
    sub = relay.subtract(i, relay.const(1, dtype="int32"))
    func = relay.Function([i], sub, ret_type=relay.TensorType([], "int32"))
    i_data = np.array(1, dtype="int32")
    inputs = {"i": i_data}
    output_list = generate_ref_data(func, inputs)
    compile_and_run(
        func,
        inputs,
        output_list,
        interface_api,
        use_unpacked_api,
        use_calculated_workspaces,
    )
Beispiel #30
0
def test_tuple_output(interface_api, use_unpacked_api, test_runner):
    x = relay.var("x", shape=(6, 9))
    y = relay.split(x, 3).astuple()
    a = relay.TupleGetItem(y, 0)
    b = relay.TupleGetItem(y, 1)
    out = relay.Tuple([a, b])
    func = relay.Function([x], out)
    x_data = np.random.rand(6, 9).astype("float32")
    inputs = {"x": x_data}
    output_list = generate_ref_data(func, inputs)
    compile_and_run(
        AOTTestModel(module=IRModule.from_expr(func), inputs=inputs, outputs=output_list),
        test_runner,
        interface_api,
        use_unpacked_api,
    )