Example #1
0
 def update_output_qnn_params(self,
                              input_dtype="uint8",
                              kernel_dtype="uint8",
                              output_dtype="uint8"):
     _, dtype_max = get_range_for_dtype_str(input_dtype)
     input_max = self.ifm.sc.data.asnumpy() * (dtype_max -
                                               self.ifm.zp.data.asnumpy())
     input_min = -self.ifm.sc.data.asnumpy() * self.ifm.zp.data.asnumpy()
     _, dtype_max = get_range_for_dtype_str(kernel_dtype)
     kernel_max = np.max(self.kernel.sc.data.asnumpy() *
                         (dtype_max - self.kernel.zp.data.asnumpy()))
     kernel_min = np.min(-self.kernel.sc.data.asnumpy() *
                         self.kernel.zp.data.asnumpy())
     kernel_h = self.kernel.get_dim_size("H")
     kernel_w = self.kernel.get_dim_size("W")
     channels = self.kernel.get_dim_size("I")
     output_limits = [
         kernel_max * kernel_h * kernel_w * channels * input_max,
         kernel_min * kernel_h * kernel_w * channels * input_max,
         kernel_min * kernel_h * kernel_w * channels * input_min,
         kernel_max * kernel_h * kernel_w * channels * input_min,
     ]
     output_max = max(output_limits)
     output_min = min(output_limits)
     dtype_min, dtype_max = get_range_for_dtype_str(input_dtype)
     self.ofm.sc = relay.const(
         (output_max - output_min) / (dtype_max - dtype_min), "float32")
     self.ofm.zp = relay.const(
         -int(output_min / self.ofm.sc.data.asnumpy()), "int32")
Example #2
0
    def __init__(self, dtype):
        self.ifm = TensorType()
        self.ofm = TensorType()
        self.kernel = TensorType()

        # default values
        self.ifm.dtype = dtype
        self.ifm.layout = "NHWC"
        ifm_min, ifm_max = get_range_for_dtype_str(self.ifm.dtype)
        self.ifm.zp = relay.const(np.random.randint(ifm_min, ifm_max), "int32")
        self.ifm.sc = relay.const(np.random.random() * 2, "float32")
        self.kernel.dtype = dtype
        self.kernel.layout = "HWIO"
        kernel_min, kernel_max = get_range_for_dtype_str(self.kernel.dtype)
        self.kernel.zp = relay.const(np.random.randint(kernel_min, kernel_max),
                                     "int32")
        self.kernel.sc = relay.const(np.random.random() * 2, "float32")
        self.ofm.layout = "NHWC"
        self.ofm.dtype = dtype
        ofm_min, ofm_max = get_range_for_dtype_str(self.ofm.dtype)
        self.ofm.zp = relay.const(np.random.randint(ofm_min, ofm_max), "int32")
        self.ofm.sc = relay.const(np.random.random() * 2, "float32")
        self.dilation = (1, 1)

        self.strides = None
        self.pad = None
        self.activation = "NONE"
        self.clip_min = 0
        self.clip_max = 0
Example #3
0
def test_ethosu_left_shift_binary_elemwise(
    accel_type,
    ifm_shape,
    ifm2_shape,
):
    np.random.seed(0)
    dtype = "int32"

    def create_model():
        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
        ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
        c1 = relay.left_shift(ifm, ifm2)
        return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], c1))

    cpu_mod = create_model()

    # Generate reference data
    in_min, in_max = util.get_range_for_dtype_str(dtype)
    input_data = {
        "ifm": np.random.randint(in_min,
                                 high=in_max,
                                 size=ifm_shape,
                                 dtype=dtype),
        "ifm2": np.random.randint(0, high=32, size=ifm2_shape, dtype=dtype),
    }
    output_data = generate_ref_data(cpu_mod, input_data)
    ethosu_mod = partition_for_ethosu(cpu_mod)

    infra.compare_ethosu_with_reference(ethosu_mod, input_data, output_data,
                                        accel_type)
Example #4
0
def test_forward_mobilenet_v1(accel_type):
    """Test the Mobilenet V1 TF Lite model."""
    np.random.seed(23)
    tflite_model_file = tf_testing.get_workload_official(
        "https://storage.googleapis.com/download.tensorflow.org/"
        "models/mobilenet_v1_2018_08_02/mobilenet_v1_1.0_224_quant.tgz",
        "mobilenet_v1_1.0_224_quant.tflite",
    )
    with open(tflite_model_file, "rb") as f:
        tflite_model_buf = f.read()
    input_tensor = "input"
    input_dtype = "uint8"
    input_shape = (1, 224, 224, 3)
    in_min, in_max = util.get_range_for_dtype_str(input_dtype)
    input_data = np.random.randint(in_min, high=in_max, size=input_shape, dtype=input_dtype)

    relay_mod, params = convert_to_relay(tflite_model_buf)
    input_data = {input_tensor: input_data}
    output_data = generate_ref_data(relay_mod, input_data)

    mod = partition_for_ethosu(relay_mod, params)
    compiled_models = infra.build_source(
        mod, input_data, output_data, accel_type, output_tolerance=10
    )
    infra.verify_source(compiled_models, accel_type)
Example #5
0
def test_ethosu_section_name():
    def create_graph_single(input_tensor_name, input_tensor_shape,
                            input_tensor_dtype):
        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c1_params.ifm.shape = input_tensor_shape
        c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32)
        c1_params.kernel.sc = relay.const(np.random.rand(32) * 2, "float32")
        c1_params.strides = (1, 1)
        c1_params.pad = "VALID"
        c1_params.update_output_qnn_params(input_tensor_dtype,
                                           input_tensor_dtype,
                                           input_tensor_dtype)
        input0 = relay.var(input_tensor_name,
                           shape=c1_params.ifm.shape,
                           dtype=c1_params.ifm.dtype)
        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
        c1_params.ofm.shape = get_shape_expr(input0, c1)

        f = relay.Function([input0], c1)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod, [c1_params]

    accel_type = "ethos-u55-256"
    relay_module, _ = create_graph_single("input", (1, 300, 300, 3), "int8")
    input_dtype = "int8"
    mod = partition_for_ethosu(relay_module)

    # Generate reference data
    in_min, in_max = util.get_range_for_dtype_str(input_dtype)
    input_data = {
        "input":
        np.random.randint(in_min,
                          high=in_max,
                          size=(1, 300, 300, 3),
                          dtype=input_dtype)
    }
    output_data = generate_ref_data(relay_module, input_data)

    compiled_models = infra.build_source(mod,
                                         input_data,
                                         output_data,
                                         accel_type,
                                         output_tolerance=1)

    # Assumes only two runtime.Modules are created -- i.e. single offload module
    ethosu_module = compiled_models[0].executor_factory.lib.imported_modules[
        0].imported_modules[0]

    # Verify generated C source
    source = ethosu_module.get_source()
    assert (
        '__attribute__((section(".rodata.tvm"), aligned(16))) static int8_t tvmgen_default_ethos_u_main_0_cms_data_data'
        in source)
    assert (
        '__attribute__((section(".rodata.tvm"), aligned(16))) static int8_t tvmgen_default_ethos_u_main_0_weights'
        in source)
Example #6
0
def test_ethosu_right_shift_binary_elemwise(ifm_shape, ifm2_shape,
                                            reversed_operands, accel_type,
                                            ofm_dtype):
    np.random.seed(0)
    dtype = "int32"

    def create_model():
        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
        ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
        shr_op = infra.make_ethosu_binary_elementwise(ifm, ifm2, ifm_shape[3],
                                                      ifm2_shape[3], "SHR",
                                                      ofm_dtype,
                                                      reversed_operands)
        return tvm.IRModule.from_expr(relay.Function([ifm, ifm2], shr_op))

    def generate_output_data(input_data):
        lhs = input_data["ifm"]
        rhs = input_data["ifm2"]
        if reversed_operands:
            lhs = np.broadcast_to(lhs, ifm2_shape)
            lhs, rhs = rhs, lhs
        else:
            rhs = np.broadcast_to(rhs, ifm_shape)

        def rounding_right_shift(lhs, rhs):
            r = 1 << (rhs - 1)
            return (lhs + r) >> rhs

        return [
            np.array([
                rounding_right_shift(x[0], x[1])
                for x in zip(lhs.flat, rhs.flat)
            ]).astype(ofm_dtype)
        ]

    cpu_mod = create_model()

    # Generate reference data
    in_min, in_max = util.get_range_for_dtype_str(dtype)
    in_min, in_max = 18, 19
    lhs = np.random.randint(in_min, high=in_max, size=ifm_shape, dtype=dtype)
    rhs = np.random.randint(1, high=2, size=ifm2_shape, dtype=dtype)
    input_data = {
        "ifm": lhs,
        "ifm2": rhs,
    }
    output_data = {"output": generate_output_data(input_data)[0]}
    ethosu_mod = infra.create_ethosu_partition(cpu_mod)

    infra.compare_ethosu_with_reference(ethosu_mod, input_data, output_data,
                                        accel_type)
Example #7
0
def test_ethosu_left_shift_binary_elemwise(
    accel_type,
    ifm_shape,
    ifm2_shape,
):
    dtype = "int32"

    def create_model():
        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
        ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
        c1 = relay.left_shift(ifm, ifm2)
        f = relay.Function([ifm, ifm2], c1)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod

    relay_mod = create_model()
    mod = partition_for_ethosu(relay_mod)

    # Generate reference data
    in_min, in_max = util.get_range_for_dtype_str(dtype)
    input_data = {
        "ifm": np.random.randint(in_min,
                                 high=in_max,
                                 size=ifm_shape,
                                 dtype=dtype),
        "ifm2": np.random.randint(0, high=32, size=ifm2_shape, dtype=dtype),
    }
    output_data = generate_ref_data(relay_mod, input_data)

    compiled_models = infra.build_source(
        mod,
        input_data,
        output_data,
        accel_type,
    )

    # Assumes only two runtime.Modules are created -- i.e. single offload module
    imported_modules = compiled_models[0].executor_factory.lib.imported_modules
    assert len(imported_modules) == 2
    ethosu_module = imported_modules[0]

    # Verify generated C source
    get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs")
    cmms = get_cs(ethosu_module)
    cmms = bytes.fromhex(cmms)

    infra.print_payload(cmms)
    infra.verify_source(compiled_models, accel_type)
Example #8
0
def test_ethosu_conv2d(accel_type):
    def create_graph_single(input_tensor_name, input_tensor_shape,
                            input_tensor_dtype):
        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c1_params.ifm.shape = input_tensor_shape
        c1_params.kernel.shape = (3, 3, c1_params.ifm.shape[3], 32)
        c1_params.kernel.sc = relay.const(np.random.rand(32) * 2, "float32")
        c1_params.strides = (1, 1)
        c1_params.pad = "VALID"
        c1_params.update_output_qnn_params(input_tensor_dtype,
                                           input_tensor_dtype,
                                           input_tensor_dtype)
        input0 = relay.var(input_tensor_name,
                           shape=c1_params.ifm.shape,
                           dtype=c1_params.ifm.dtype)
        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
        c1_params.ofm.shape = get_shape_expr(input0, c1)

        f = relay.Function([input0], c1)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod, [c1_params]

    def create_graph_double(input_tensor_name, input_tensor_shape,
                            input_tensor_dtype):
        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c1_params.ifm.shape = input_tensor_shape
        c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8)
        c1_params.strides = (2, 2)
        c1_params.pad = "VALID"
        c1_params.update_output_qnn_params(input_tensor_dtype,
                                           input_tensor_dtype,
                                           input_tensor_dtype)
        input0 = relay.var(input_tensor_name,
                           shape=c1_params.ifm.shape,
                           dtype=c1_params.ifm.dtype)
        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
        c1_params.ofm.shape = get_shape_expr(input0, c1)

        c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c2_params.ifm.shape = c1_params.ofm.shape
        c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16)
        c2_params.strides = (1, 1)
        c2_params.pad = "SAME"
        c2_params.update_output_qnn_params()
        c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1)
        c2_params.ofm.shape = get_shape_expr(input0, c2)

        f = relay.Function([input0], c2)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod, [c2_params, c1_params]

    def create_graph_activation(input_tensor_name, input_tensor_shape,
                                input_tensor_dtype):
        c1_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c1_params.ifm.shape = input_tensor_shape
        c1_params.kernel.shape = (7, 7, c1_params.ifm.shape[3], 8)
        c1_params.strides = (2, 2)
        c1_params.pad = "VALID"
        c1_params.activation = "CLIP"
        c1_params.clip_min = 90
        c1_params.clip_max = 110
        c1_params.update_output_qnn_params(input_tensor_dtype,
                                           input_tensor_dtype,
                                           input_tensor_dtype)
        input0 = relay.var(input_tensor_name,
                           shape=c1_params.ifm.shape,
                           dtype=c1_params.ifm.dtype)
        c1, new_params = relay_ir_builder.create_qnn_conv2d(c1_params, input0)
        c1_params.ofm.shape = get_shape_expr(input0, c1)

        c2_params = relay_ir_builder.QnnConv2DParams(input_tensor_dtype)
        c2_params.ifm.shape = c1_params.ofm.shape
        c2_params.kernel.shape = (5, 5, c2_params.ifm.shape[3], 16)
        c2_params.strides = (1, 1)
        c2_params.pad = "SAME"
        c2_params.update_output_qnn_params()
        c2, new_params = relay_ir_builder.create_qnn_conv2d(c2_params, c1)
        c2_params.ofm.shape = get_shape_expr(input0, c2)

        f = relay.Function([input0], c2)
        mod = tvm.IRModule()
        mod["main"] = f
        return mod, [c2_params, c1_params]

    test_cases = [
        (create_graph_single, ["input", (1, 300, 300, 3), "int8"]),
        (create_graph_double, ["input", (1, 128, 256, 4), "int8"]),
        (create_graph_activation, ["input", (1, 64, 100, 4), "int8"]),
    ]
    np.random.seed(42)
    for test_case in test_cases:
        relay_module, conv_params = test_case[0](*test_case[1])
        input_tensor, input_shape, input_dtype = test_case[1]
        mod = partition_for_ethosu(relay_module)

        # Generate reference data
        in_min, in_max = util.get_range_for_dtype_str(input_dtype)
        input_data = {
            input_tensor:
            np.random.randint(in_min,
                              high=in_max,
                              size=input_shape,
                              dtype=input_dtype)
        }
        output_data = generate_ref_data(relay_module, input_data)

        compiled_models = infra.build_source(mod,
                                             input_data,
                                             output_data,
                                             accel_type,
                                             output_tolerance=1)

        # Assumes only two runtime.Modules are created -- i.e. single offload module
        ethosu_module = (compiled_models[0].executor_factory.lib.
                         imported_modules[0].imported_modules[0])

        # Verify generated C source
        get_artifacts = tvm._ffi.get_global_func(
            "runtime.module.ethos-u.get_artifacts")
        compilation_artifacts = get_artifacts(ethosu_module)
        cmms = bytes.fromhex(compilation_artifacts[0].command_stream)
        infra.print_payload(cmms)
        infra.verify_source(compiled_models, accel_type)
Example #9
0
def test_ethosu_right_shift_binary_elemwise(ifm_shape, ifm2_shape,
                                            reversed_operands, accel_type,
                                            ofm_dtype):
    dtype = "int32"

    def create_model():
        ifm_count = int(np.prod(ifm_shape))
        ifm2_count = int(np.prod(ifm2_shape))

        # Create a "partitioned" Relay function
        ifms = relay.var("ifms", shape=[ifm_count + ifm2_count], dtype=dtype)
        split = relay.split(ifms, [ifm_count])
        ifm = relay.reshape(split[0], newshape=ifm_shape)
        ifm2 = relay.reshape(split[1], newshape=ifm2_shape)
        shr_op = infra.make_ethosu_binary_elementwise(ifm, ifm2, ifm_shape[3],
                                                      ifm2_shape[3], "SHR",
                                                      ofm_dtype,
                                                      reversed_operands)

        glb_ethosu = relay.GlobalVar("tvmgen_default_ethos_u_main_0")
        func = (relay.Function([ifms], shr_op).with_attr(
            "Inline", 1).with_attr("Compiler", "ethos-u").with_attr(
                "global_symbol",
                "tvmgen_default_ethos_u_main_0").with_attr("Primitive", 1))
        mod = tvm.IRModule()
        mod[glb_ethosu] = func
        mod = relay.transform.InferType()(mod)

        # Main
        ifm = relay.var("ifm", shape=ifm_shape, dtype=dtype)
        ifm2 = relay.var("ifm2", shape=ifm2_shape, dtype=dtype)
        call = relay.Call(
            glb_ethosu,
            [
                relay.concatenate(
                    data=(
                        relay.reshape(ifm, newshape=ifm_count),
                        relay.reshape(ifm2, newshape=ifm2_count),
                    ),
                    axis=0,
                )
            ],
        )
        mod["main"] = relay.Function([ifm, ifm2], call)
        mod = relay.transform.InferType()(mod)
        return mod

    mod = create_model()

    # Generate reference data
    in_min, in_max = util.get_range_for_dtype_str(dtype)
    in_min, in_max = 18, 19
    lhs = np.random.randint(in_min, high=in_max, size=ifm_shape, dtype=dtype)
    rhs = np.random.randint(1, high=2, size=ifm2_shape, dtype=dtype)
    input_data = {
        "ifm": lhs,
        "ifm2": rhs,
    }

    if reversed_operands:
        lhs = np.broadcast_to(lhs, ifm2_shape)
        lhs, rhs = rhs, lhs
    else:
        rhs = np.broadcast_to(rhs, ifm_shape)

    def rounding_right_shift(lhs, rhs):
        r = 1 << (rhs - 1)
        return (lhs + r) >> rhs

    output_data = np.array([
        rounding_right_shift(x[0], x[1]) for x in zip(lhs.flat, rhs.flat)
    ]).astype(ofm_dtype)

    compiled_model = infra.build_source(mod, input_data, [output_data],
                                        accel_type)
    imported_modules = compiled_model[0].executor_factory.lib.imported_modules
    assert len(imported_modules) == 2
    ethosu_module = imported_modules[0]

    # Verify generated C source
    get_cs = tvm._ffi.get_global_func("runtime.module.ethos-u.getcs")
    cmms = get_cs(ethosu_module)
    cmms = bytes.fromhex(cmms)

    infra.print_payload(cmms)
    infra.verify_source(compiled_model, accel_type)