def test_translate_ethosu_depthwise_conv2d():
    def extract_ethosu_depthwise_conv2d_extern_call(mod):
        # There should only be a single function
        assert len(mod.functions.items()) == 1
        primfunc = mod.functions.items()[0][1]

        ethosu_depthwise_conv2d_calls = list()

        def populate_ethosu_depthwise_conv2d_calls(stmt):
            if (
                isinstance(stmt, tvm.tir.Call)
                and stmt.op.name == "tir.call_extern"
                and stmt.args[0] == "ethosu_depthwise_conv2d"
            ):
                ethosu_depthwise_conv2d_calls.append(stmt)

        stmt_functor.post_order_visit(primfunc.body, populate_ethosu_depthwise_conv2d_calls)
        return ethosu_depthwise_conv2d_calls[0]

    depthwise_conv2d_call = extract_ethosu_depthwise_conv2d_extern_call(SingleEthosuDepthwiseConv2D)
    npu_op, w_zero_point = tir_to_cs_translator.translate_ethosu_depthwise_conv2d(
        depthwise_conv2d_call
    )

    assert npu_op.ifm.data_type == vapi.NpuDataType.INT8
    assert npu_op.ifm.shape == vapi.NpuShape3D(8, 8, 3)
    assert npu_op.ifm.tiles.height_0 == vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]).height_0
    assert npu_op.ifm.tiles.height_1 == vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]).height_1
    assert npu_op.ifm.tiles.width_0 == vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]).width_0
    assert npu_op.ifm.quantization == pytest.approx(vapi.NpuQuantization(0.6, 11))
    assert npu_op.ifm.layout == vapi.NpuLayout.NHWC
    assert npu_op.ifm.strides == vapi.NpuShape3D(24, 3, 1)
    # Compare OFM
    assert npu_op.ofm.data_type == vapi.NpuDataType.INT8
    assert npu_op.ofm.shape == vapi.NpuShape3D(6, 7, 3)
    assert npu_op.ofm.tiles.height_0 == vapi.NpuTileBox(6, 0, 8, [0, 0, 0, 0]).height_0
    assert npu_op.ofm.tiles.height_1 == vapi.NpuTileBox(6, 0, 7, [0, 0, 0, 0]).height_1
    assert npu_op.ofm.tiles.width_0 == vapi.NpuTileBox(6, 0, 7, [0, 0, 0, 0]).width_0
    assert npu_op.ofm.quantization == pytest.approx(vapi.NpuQuantization(0.26, 15))
    assert npu_op.ofm.layout == vapi.NpuLayout.NHWC
    assert npu_op.ofm.strides == vapi.NpuShape3D(21, 3, 1)
    # Compare kernel and padding
    assert (
        npu_op.kernel.__dict__
        == vapi.NpuKernel(w=2, h=3, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1).__dict__
    )
    assert npu_op.padding == vapi.NpuPadding(top=0, left=0, bottom=0, right=0)
    # Compare activation
    assert npu_op.activation.op_type == vapi.NpuActivationOp.NONE_OR_RELU
    assert npu_op.activation.min == 0
    assert npu_op.activation.max == pytest.approx(23.4)
    # Compare ifm upscaling
    assert npu_op.ifm_upscale == vapi.NpuResamplingMode.NONE
    # Compare weight quantization parameters
    assert w_zero_point == 13
def _create_npu_feature_map(
        serial_feature_map: spec.SerialFeatureMap) -> vapi.NpuFeatureMap:
    """This is a helper function to capture a list
    of arguments to create Vela NpuFeatureMap object.
    """
    layout_map = {
        "NHWC": vapi.NpuLayout.NHWC,
        "NHCWB16": vapi.NpuLayout.NHCWB16
    }
    datatype_map = {
        "uint8": vapi.NpuDataType.UINT8,
        "int8": vapi.NpuDataType.INT8,
        "uint16": vapi.NpuDataType.UINT16,
        "int16": vapi.NpuDataType.INT16,
        "int32": vapi.NpuDataType.INT32,
    }
    layout = str(serial_feature_map.layout.value)
    data_type = str(serial_feature_map.data_type.value)
    date_type_bytes = np.iinfo(np.dtype(data_type)).bits // 8
    assert layout in layout_map.keys()
    assert data_type in datatype_map.keys()
    nfm = vapi.NpuFeatureMap()
    nfm.data_type = datatype_map[data_type]
    nfm.shape = vapi.NpuShape3D(
        int(serial_feature_map.height),
        int(serial_feature_map.width),
        int(serial_feature_map.channels),
    )
    nfm.tiles = vapi.NpuTileBox(
        int(serial_feature_map.tile_height_0),
        int(serial_feature_map.tile_height_1),
        int(serial_feature_map.tile_width_0),
        [
            serial_feature_map.tile_address_0,
            serial_feature_map.tile_address_1,
            serial_feature_map.tile_address_2,
            serial_feature_map.tile_address_3,
        ],
    )
    nfm.quantization = _create_npu_quantization(serial_feature_map.scale,
                                                serial_feature_map.zero_point)
    nfm.layout = layout_map[layout]
    nfm.strides = vapi.NpuShape3D(
        int(serial_feature_map.stride_h.value) * date_type_bytes,
        int(serial_feature_map.stride_w.value) * date_type_bytes,
        int(serial_feature_map.stride_c.value) * date_type_bytes,
    )
    return nfm
Exemple #3
0
def get_optimal_block_config(
        npu_op: vapi.NpuOperation,
        accel_config: vapi.NpuAccelerator) -> vapi.NpuShape3D:
    """
    "The NPU's unit of work is known as a block. It will fetch block(s) from Input
    Feature Map (IFM) and a compute block for Output Feature Map (OFM).
    Therefore, we need to pick an optimal block configuration considering bandwidth
    to bring IFM blocks and the number of OFM block computes need to happen
    to cover the OFM as indicated by the npu op.

    Parameters
    ----------
    npu_op : ethosu.vela.api.NpuOperation
        The NPU operation and its params
    accel_config : ethosu.vela.api.NpuAccelerator
        The NPU accelerator config

    Returns
    -------
    ethosu.vela.api.NpuShape3D :
        The optimal block config for the operator
    """
    options = tvm.transform.PassContext.current().config.get(
        "relay.ext.ethos-u.options", None)
    if options and options.dev_force_block_config:
        block_config = [
            int(v) for v in options.dev_force_block_config.split("x")
        ]
        return vapi.NpuShape3D(height=block_config[0],
                               width=block_config[1],
                               depth=block_config[2])
    all_valid_block_configs = vapi.npu_find_block_configs(npu_op, accel_config)
    return _get_optimal_block_config(all_valid_block_configs)
def _create_npu_block_config(serial_block_config: spec.SerialBlockConfig) -> vapi.NpuShape3D:
    """A helper function to convert a SerialBlockConfig into an NpuShape3D"""
    if serial_block_config.height * serial_block_config.width * serial_block_config.depth == 0:
        return None

    block_config = vapi.NpuShape3D(
        height=int(serial_block_config.height),
        width=int(serial_block_config.width),
        depth=int(serial_block_config.depth),
    )
    return block_config
Exemple #5
0
 def create_mock(test_vec):
     with patch("ethosu.vela.api.npu_encode_weights") as mock_enc_w:
         with patch("ethosu.vela.api.npu_find_block_configs") as mock_blk_cfg:
             mock_blk_cfg.return_value = [vapi.NpuShape3D(8, 8, 8)]
             ethosu_conv2d_calls = extract_ethosu_conv2d_extern_calls(test_vec["tir_module"])
             buffer_info = tirtocs.extract_buffer_info(
                 test_vec["tir_module"], test_vec["param_dict"]
             )
             for ethosu_conv2d_call in ethosu_conv2d_calls:
                 npu_op, _ = tirtocs.translate_ethosu_conv2d(ethosu_conv2d_call)
                 weights = buffer_info[npu_op.weights[0].address.buffer_var][0]
                 vela_api.encode_weights(ethosu_conv2d_call, weights, accel)
             return mock_enc_w
Exemple #6
0
def test_get_optimal_block_config():
    block_configs_cases = [
        {
            "test": [
                vapi.NpuShape3D(10, 20, 8),
                vapi.NpuShape3D(10, 30, 16),
                vapi.NpuShape3D(10, 40, 32),
            ],
            "ref": vapi.NpuShape3D(10, 40, 32),
        },
        {
            "test": [
                vapi.NpuShape3D(10, 20, 8),
                vapi.NpuShape3D(10, 50, 32),
                vapi.NpuShape3D(10, 40, 32),
            ],
            "ref": vapi.NpuShape3D(10, 50, 32),
        },
        {
            "test": [
                vapi.NpuShape3D(50, 50, 8),
                vapi.NpuShape3D(10, 30, 32),
                vapi.NpuShape3D(8, 8, 64),
            ],
            "ref": vapi.NpuShape3D(8, 8, 64),
        },
    ]

    for test_case in block_configs_cases:
        assert vela_api._get_optimal_block_config(test_case["test"]) == test_case["ref"]
def test_translate_ethosu_conv2d():
    test_cases = [
        {
            # Stimulus
            "tir_module": SingleEthosUConv2D,
            "param_dict": {
                1: np.random.randint(
                    np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 16], "uint8"
                ),
                2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [16], "int32"),
            },
            # Reference outputs
            "ref": [
                {
                    "ifm": {
                        "data_type": vapi.NpuDataType.UINT8,
                        "shape": vapi.NpuShape3D(8, 8, 3),
                        "tiles": vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]),
                        "quantization": vapi.NpuQuantization(0.5, 10),
                        "layout": vapi.NpuLayout.NHWC,
                        "strides": vapi.NpuShape3D(24, 3, 1),
                    },
                    "ofm": {
                        "data_type": vapi.NpuDataType.UINT8,
                        "shape": vapi.NpuShape3D(8, 8, 16),
                        "tiles": vapi.NpuTileBox(8, 0, 8, [0, 0, 0, 0]),
                        "quantization": vapi.NpuQuantization(0.25, 14),
                        "layout": vapi.NpuLayout.NHWC,
                        "strides": vapi.NpuShape3D(128, 16, 1),
                    },
                    "kernel": vapi.NpuKernel(
                        w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1
                    ),
                    "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0),
                    "activation": {
                        "op": vapi.NpuActivationOp.NONE_OR_RELU,
                        "min": -3.5,
                        "max": 60.25,
                    },
                    "ifm_upscale": vapi.NpuResamplingMode.NONE,
                    "w_zero_point": 12,
                }
            ],
        },
        {
            "tir_module": MultiEthosUConv2D,
            "param_dict": {
                1: np.random.randint(
                    np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 3, 32], "uint8"
                ),
                2: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [32], "int32"),
                3: np.random.randint(
                    np.iinfo("uint8").min, np.iinfo("uint8").max, [1, 1, 32, 8], "uint8"
                ),
                4: np.random.randint(np.iinfo("int32").min, np.iinfo("int32").max, [8], "int32"),
            },
            # Reference Outputs
            "ref": [
                {
                    "ifm": {
                        "data_type": vapi.NpuDataType.UINT8,
                        "shape": vapi.NpuShape3D(4, 8, 3),
                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
                        "quantization": vapi.NpuQuantization(0.5, 10),
                        "layout": vapi.NpuLayout.NHWC,
                        "strides": vapi.NpuShape3D(24, 3, 1),
                    },
                    "ofm": {
                        "data_type": vapi.NpuDataType.UINT8,
                        "shape": vapi.NpuShape3D(4, 8, 32),
                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
                        "quantization": vapi.NpuQuantization(0.25, 14),
                        "layout": vapi.NpuLayout.NHWC,
                        "strides": vapi.NpuShape3D(256, 32, 1),
                    },
                    "kernel": vapi.NpuKernel(
                        w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1
                    ),
                    "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0),
                    "activation": {"op": None},
                    "ifm_upscale": vapi.NpuResamplingMode.NONE,
                    "w_zero_point": 12,
                },
                {
                    "ifm": {
                        "data_type": vapi.NpuDataType.UINT8,
                        "shape": vapi.NpuShape3D(4, 8, 32),
                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
                        "quantization": vapi.NpuQuantization(0.5, 10),
                        "layout": vapi.NpuLayout.NHWC,
                        "strides": vapi.NpuShape3D(256, 32, 1),
                    },
                    "ofm": {
                        "data_type": vapi.NpuDataType.UINT8,
                        "shape": vapi.NpuShape3D(4, 8, 8),
                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
                        "quantization": vapi.NpuQuantization(0.25, 14),
                        "layout": vapi.NpuLayout.NHWC,
                        "strides": vapi.NpuShape3D(64, 8, 1),
                    },
                    "kernel": vapi.NpuKernel(
                        w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1
                    ),
                    "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0),
                    "activation": {
                        "op": vapi.NpuActivationOp.NONE_OR_RELU,
                        "min": -3.5,
                        "max": 60.25,
                    },
                    "ifm_upscale": vapi.NpuResamplingMode.NONE,
                    "w_zero_point": 12,
                },
                {
                    "ifm": {
                        "data_type": vapi.NpuDataType.UINT8,
                        "shape": vapi.NpuShape3D(4, 8, 3),
                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
                        "quantization": vapi.NpuQuantization(0.5, 10),
                        "layout": vapi.NpuLayout.NHWC,
                        "strides": vapi.NpuShape3D(24, 3, 1),
                    },
                    "ofm": {
                        "data_type": vapi.NpuDataType.UINT8,
                        "shape": vapi.NpuShape3D(4, 8, 32),
                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
                        "quantization": vapi.NpuQuantization(0.25, 14),
                        "layout": vapi.NpuLayout.NHWC,
                        "strides": vapi.NpuShape3D(256, 32, 1),
                    },
                    "kernel": vapi.NpuKernel(
                        w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1
                    ),
                    "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0),
                    "activation": {
                        "op": vapi.NpuActivationOp.NONE_OR_RELU,
                        "min": -3.5,
                        "max": 60.25,
                    },
                    "ifm_upscale": vapi.NpuResamplingMode.NONE,
                    "w_zero_point": 12,
                },
                {
                    "ifm": {
                        "data_type": vapi.NpuDataType.UINT8,
                        "shape": vapi.NpuShape3D(4, 8, 32),
                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
                        "quantization": vapi.NpuQuantization(0.5, 10),
                        "layout": vapi.NpuLayout.NHWC,
                        "strides": vapi.NpuShape3D(256, 32, 1),
                    },
                    "ofm": {
                        "data_type": vapi.NpuDataType.UINT8,
                        "shape": vapi.NpuShape3D(4, 8, 8),
                        "tiles": vapi.NpuTileBox(4, 0, 8, [0, 0, 0, 0]),
                        "quantization": vapi.NpuQuantization(0.25, 14),
                        "layout": vapi.NpuLayout.NHWC,
                        "strides": vapi.NpuShape3D(64, 8, 1),
                    },
                    "kernel": vapi.NpuKernel(
                        w=1, h=1, stride_x=1, stride_y=1, dilation_x=1, dilation_y=1
                    ),
                    "padding": vapi.NpuPadding(top=0, left=0, bottom=0, right=0),
                    "activation": {
                        "op": vapi.NpuActivationOp.NONE_OR_RELU,
                        "min": -3.5,
                        "max": 60.25,
                    },
                    "ifm_upscale": vapi.NpuResamplingMode.NONE,
                    "w_zero_point": 12,
                },
            ],
        },
    ]

    def extract_ethosu_conv2d_extern_calls(mod):
        """This function will obtain all ethosu_conv2d
        calls from a NPU TIR module
        Parameters
        ----------
        mod : tvm.IRModule
            This is a NPU TIR Module

        Returns
        -------
        list
            of tvm.tir.Call objects
            that are tir extern calls
            for ethosu_conv2d
        """
        # There should only be a single function
        assert len(mod.functions.items()) == 1
        primfunc = mod.functions.items()[0][1]

        ethosu_conv2d_calls = list()

        def populate_ethosu_conv2d_calls(stmt):
            if (
                isinstance(stmt, tvm.tir.Call)
                and stmt.op.name == "tir.call_extern"
                and stmt.args[0] == "ethosu_conv2d"
            ):
                ethosu_conv2d_calls.append(stmt)

        stmt_functor.post_order_visit(primfunc.body, populate_ethosu_conv2d_calls)
        return ethosu_conv2d_calls

    for test_case in test_cases:
        ethosu_conv2d_calls = extract_ethosu_conv2d_extern_calls(test_case["tir_module"])
        for idx, ethosu_conv2d_call in enumerate(ethosu_conv2d_calls):
            ref = test_case["ref"][idx]
            npu_op, w_zero_point = tir_to_cs_translator.translate_ethosu_conv2d(ethosu_conv2d_call)
            # Compare IFM
            assert npu_op.ifm.data_type == ref["ifm"]["data_type"]
            assert npu_op.ifm.shape == ref["ifm"]["shape"]
            assert npu_op.ifm.tiles.height_0 == ref["ifm"]["tiles"].height_0
            assert npu_op.ifm.tiles.height_1 == ref["ifm"]["tiles"].height_1
            assert npu_op.ifm.tiles.width_0 == ref["ifm"]["tiles"].width_0
            assert npu_op.ifm.quantization == ref["ifm"]["quantization"]
            assert npu_op.ifm.layout == ref["ifm"]["layout"]
            assert npu_op.ifm.strides == ref["ifm"]["strides"]
            # Compare OFM
            assert npu_op.ofm.data_type == ref["ofm"]["data_type"]
            assert npu_op.ofm.shape == ref["ofm"]["shape"]
            assert npu_op.ofm.tiles.height_0 == ref["ofm"]["tiles"].height_0
            assert npu_op.ofm.tiles.height_1 == ref["ofm"]["tiles"].height_1
            assert npu_op.ofm.tiles.width_0 == ref["ofm"]["tiles"].width_0
            assert npu_op.ofm.quantization == ref["ofm"]["quantization"]
            assert npu_op.ofm.layout == ref["ofm"]["layout"]
            assert npu_op.ofm.strides == ref["ofm"]["strides"]
            # Compare kernel and padding
            assert npu_op.kernel.__dict__ == ref["kernel"].__dict__
            assert npu_op.padding == ref["padding"]
            # Compare activation
            if ref["activation"]["op"] is None:
                assert npu_op.activation is None
            else:
                assert npu_op.activation.op_type == ref["activation"]["op"]
                assert npu_op.activation.min == ref["activation"]["min"]
                assert npu_op.activation.max == ref["activation"]["max"]
            # Compare ifm upscaling
            assert npu_op.ifm_upscale == ref["ifm_upscale"]
            # Compare weight quantization parameters
            assert w_zero_point == ref["w_zero_point"]
Exemple #8
0
                vapi.NpuShape3D(10, 30, 32),
                vapi.NpuShape3D(8, 8, 64),
            ],
            "ref":
            vapi.NpuShape3D(8, 8, 64),
        },
    ]

    for test_case in block_configs_cases:
        assert vela_api._get_optimal_block_config(
            test_case["test"]) == test_case["ref"]


@pytest.mark.parametrize(
    "block_config_str, expected_block_config",
    [("4x4x8", vapi.NpuShape3D(4, 4, 8)),
     ("3x7x16", vapi.NpuShape3D(3, 7, 16))],
)
def test_force_block_config(block_config_str, expected_block_config):
    config = {
        "dev_force_block_config": block_config_str,
    }
    with tvm.transform.PassContext(
            config={"relay.ext.ethos-u.options": config}):
        block_config = vela_api.get_optimal_block_config(
            None, vapi.NpuAccelerator.Ethos_U55_128)
        assert block_config == expected_block_config


def test_compress_weights():
    test_vecs = [