Example #1
0
def test_quant_mobilenet_tfl():
    """Since in AOT we pass directly the output buffer from the user, in quantized networks sharing the output buffers is not possible.
    This is because the output data type is int8 and the intermediate buffer are int32 or int16. We use mobilenet quantized to stress this
    situation and verify that the output buffer sharing is disabled in AOT."""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOT_DEFAULT_RUNNER

    tflite_model_file = tf_testing.get_workload_official(
        "https://storage.googleapis.com/download.tensorflow.org/"
        "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz",
        "mobilenet_v1_1.0_224_quant.tflite",
    )
    with open(tflite_model_file, "rb") as f:
        tflite_model_buf = f.read()
    data_shape = (1, 224, 224, 3)
    in_min, in_max = (0, 255)
    data = np.random.randint(in_min, high=in_max, size=data_shape, dtype="uint8")
    mod, params = convert_to_relay(tflite_model_buf)
    inputs = {"input": data}
    output_list = generate_ref_data(mod, inputs, params)
    compile_and_run(
        AOTTestModel(module=mod, inputs=inputs, outputs=output_list, params=params),
        test_runner,
        interface_api,
        use_unpacked_api,
    )
def test_tflite_model(model_url, usmp_algo, workspace_size):
    """This checks for ML models and the memory used by them when using USMP with different algorithms"""
    pytest.importorskip("tflite")

    import tvm.relay.testing.tf as tf_testing

    use_unpacked_api = True
    interface_api = "c"
    test_runner = AOTTestRunner(pass_config={
        "tir.usmp.enable": True,
        "tir.usmp.algorithm": usmp_algo
    })

    tflite_model_file = tf_testing.get_workload_official(
        model_url[0],
        model_url[1],
    )
    with open(tflite_model_file, "rb") as f:
        tflite_model_buf = f.read()
    data_shape = (1, 224, 224, 3)
    in_min, in_max = (0, 255)
    data = np.random.randint(in_min,
                             high=in_max,
                             size=data_shape,
                             dtype="uint8")
    mod, params = convert_to_relay(tflite_model_buf, data, "input")
    inputs = {"input": data}
    output_list = generate_ref_data(mod, inputs, params)

    compiled_test_mods = compile_models(
        AOTTestModel(module=mod,
                     inputs=inputs,
                     outputs=output_list,
                     params=params),
        interface_api=interface_api,
        use_unpacked_api=use_unpacked_api,
        pass_config=test_runner.pass_config,
    )

    for compiled_model in compiled_test_mods:
        check_for_no_tvm_backendallocworkspace_calls(
            compiled_model.executor_factory.lib)

    # Checking the workspace size
    assert (sum(
        compiled_model.executor_factory.function_metadata["__tvm_main__"].
        workspace_sizes.values()) == workspace_size)

    run_and_check(
        models=compiled_test_mods,
        runner=test_runner,
        interface_api=interface_api,
    )
Example #3
0
def _get_relay_module_and_inputs_from_tflite_file(tflite_model_file):
    with open(tflite_model_file, "rb") as f:
        tflite_model_buf = f.read()
    mod, params = convert_to_relay(tflite_model_buf)

    inputs = dict()
    for param in mod["main"].params:
        name = str(param.name_hint)
        data_shape = [int(i) for i in param.type_annotation.shape]
        dtype = str(param.type_annotation.dtype)
        in_min, in_max = (np.iinfo(dtype).min, np.iinfo(dtype).max)
        data = np.random.randint(in_min,
                                 high=in_max,
                                 size=data_shape,
                                 dtype=dtype)
        inputs[name] = data

    return mod, inputs, params