Exemple #1
0
def test_small_graph():
    subgraph = TESubgraph([], None)
    part_a = InlinePart(
        subgraph,
        [
            Propagator(
                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                [0, 0],
            ),
            Propagator(
                [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
                [-1, -1],
            ),
        ],
    )
    part_b = InlinePart(
        subgraph,
        [
            Propagator(
                [[1, 0, 0], [0, 1, 0], [0, 0, 1]],
                [0, 0],
            ),
        ],
    )
    tensor_1 = Tensor([10, 10], "uint8")
    tensor_2 = Tensor([9, 9], "uint8")
    tensor_3 = Tensor([10, 10], "uint8")
    tensor_4 = Tensor([10, 10], "uint8")

    part_a.set_input(0, tensor_1)
    part_a.set_input(1, tensor_2)
    part_a.set_output(tensor_3)
    tensor_1.add_consumer(part_a)
    tensor_2.add_consumer(part_a)
    tensor_3.add_producer(part_a)
    part_b.set_input(0, tensor_3)
    part_b.set_output(tensor_4)
    tensor_3.add_consumer(part_b)
    tensor_4.add_producer(part_b)

    assert part_a.input_tensors == [tensor_1, tensor_2]
    assert part_a.output_tensor == tensor_3
    assert part_b.input_tensors == [tensor_3]
    assert part_b.output_tensor == tensor_4

    assert tensor_1.producers == []
    assert tensor_1.consumers == [part_a]
    assert tensor_2.producers == []
    assert tensor_2.consumers == [part_a]
    assert tensor_3.producers == [part_a]
    assert tensor_3.consumers == [part_b]
    assert tensor_4.producers == [part_b]
    assert tensor_4.consumers == []

    graph = CascaderGraph([tensor_1, tensor_2], [tensor_4])
    assert graph.input_tensors == [tensor_1, tensor_2]
    assert graph.output_tensors == [tensor_4]
    assert graph.part_order == [part_b, part_a]
    for i, part in enumerate(graph.part_order):
        assert graph.get_part_id(part) == i
Exemple #2
0
def test_inline_part():
    subgraph = TESubgraph([], None)
    part = InlinePart(
        subgraph,
        [
            Propagator(
                [[0, 1, 0], [1, 0, 0], [0, 0, 1]],
                [0, 0],
            ),
        ],
    )
    output_stripe_config = StripeConfig([2, 4], [8, 8], [2, 4], [1, 2], [4, 2],
                                        [0, 0])
    input_stripe_config = StripeConfig([4, 2], [8, 8], [4, 2], [2, 1], [2, 4],
                                       [0, 0])

    assert part.input_tensors == [None]
    assert part.output_tensor == None
    assert len(part.propagators) == 1
    assert part.in_line == True
    assert part.get_stripe_align_hint() == [1, 1]
    performance_info = part.get_performance_info(output_stripe_config,
                                                 is_rolling=False)
    assert performance_info.compute_cycles == 0
    assert performance_info.read_bytes == [0]
    assert performance_info.write_bytes == 0
    input_stripe_configs = part.calculate_input_stripe_configs(
        output_stripe_config)
    assert len(input_stripe_configs) == 1
    assert input_stripe_configs[0] == input_stripe_config
def test_propagator():
    transform = [
        [1, 0, 0, 0],
        [0, 1 / 2, 0, 0],
        [0, 0, -1, 0],
        [0, 0, 0, 1],
    ]
    offset = [-1, 1, 2]
    propagator = Propagator(
        transform=transform,
        offset=offset,
    )
    assert list(propagator.offset) == offset
    for i, row in enumerate(transform):
        for j, value in enumerate(row):
            assert isclose(propagator.transform[i][j], value)
Exemple #4
0
def match_ethosu_inline(output_tensor, device_config):
    """Match a Tensor Expression corresponding to an operator that will be inlined.

    If the Tensor Expression matches, an InlinePart will be created that models the
    matched Tensor Expression. Otherwise, None will be returned. This matcher is
    naive and assumes nothing about the compute of the Tensor Expression. Therefore,
    the resulting InlinePart will have full-tensor dependencies (i.e. each output
    element depends on every input element).

    Parameters
    ----------
    output_tensor : tvm.te.Tensor
        The tensor to attempt to match with.
    device_config : EthosuDeviceConfig
        Target device configuration

    Returns
    -------
    Union[None, InlinePart]
        The created InlinePart if there was a match, otherwise None.

    """
    if output_tensor.op.name not in INLINE_OPS:
        return None

    input_tensors = output_tensor.op.input_tensors
    propagators = []
    output_dims = len(output_tensor.shape)
    for input_tensor in input_tensors:
        input_dims = len(input_tensor.shape)
        transform_matrix = np.zeros((input_dims + 1, output_dims + 1))
        for i, axis in enumerate(input_tensor.shape):
            transform_matrix[i, output_dims] = int(axis)
        transform_matrix[input_dims, output_dims] = 1
        offset_vector = np.zeros(input_dims, dtype="int64")
        propagators.append(
            Propagator(
                transform_matrix.tolist(),
                offset_vector.tolist(),
            ))

    subgraph = TESubgraph(input_tensors, output_tensor)
    return InlinePart(
        subgraph,
        propagators,
    )
Exemple #5
0
def unary_elementwise_compute(
    ifm: te.Tensor,
    lut: te.Tensor,
    operator_type: str,
    ifm_scale: float,
    ifm_zero_point: int,
    ofm_scale: float,
    ofm_zero_point: int,
    ofm_channels: int,
    activation: str,
    clip_min: int,
    clip_max: int,
    rounding_mode: str,
    ifm_layout: str,
    ofm_layout: str,
) -> te.Tensor:
    """A compute operator representing the capabilities of unary_elementwise for the NPU.

    Parameters
    ----------
    ifm : te.Tensor
        The Input Feature Map tensor (IFM).
    lut : te.Tensor
        The look-up table values to use if activation = "LUT".
    operator_type: str
        The type of the unary elementwise operator.
            "ABS"
            "CLZ"
    ifm_scale : float
        The quantization scale for the Input Feature Map tensor.
    ifm_zero_point : int
        The quantization zero point for the Input Feature Map tensor.
    ofm_scale : float
        The quantization scale for the Output Feature Map tensor.
    ofm_zero_point : int
        The quantization zero point for the Output Feature Map tensor.
    ofm_channels : int
        The number of OFM channels.
    activation : str
        The activation function to use.
            "NONE" - no activation function.
            "CLIP" - clip the output between clip_min and clip_max.
            "TANH" - tanh activation function.
            "SIGMOID" - sigmoid activation function.
            "LUT" - use a look-up table to perform the activation function.
    clip_min : int
        The minimum clipping value if activation = "CLIP".
    clip_max : int
        The maximum clipping value if activation = "CLIP".
    rounding_mode : str
        The rounding mode to apply to the Output Feature Map tensor.
            "TFL" - Tensorflow Lite rounding scheme.
            "TRUNCATE" - Truncate towards zero.
            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
    ifm_layout : str, optional
        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
    ofm_layout : str, optional
        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".

    Returns
    -------
    te.Tensor
        The OFM tensor.

    """
    assert ifm.shape[0] == 1
    assert ifm_layout in {"NHWC", "NHCWB16"}
    assert ofm_layout in {"NHWC", "NHCWB16"}

    # Changing the ifm and ofm scale to conform with that expected by Vela API
    ofm_scale = ifm_scale / ofm_scale
    ifm_scale = 1.0

    # Compute operation for the IFM DMA pipeline
    dmaed_ifm = dma_ifm_compute(ifm, ifm_layout, ifm_zero_point, ifm_scale,
                                ofm_channels, (0, 0, 0, 0))

    # Unary elementwise compute operation
    ofm_height = dmaed_ifm.shape[1]
    ofm_width = dmaed_ifm.shape[2]

    unary_elementwise_attrs = {
        "op": "ethosu_unary_elementwise",
        "operator_type": operator_type,
        "activation": activation,
        "clip_min": clip_min,
        "clip_max": clip_max,
        "rounding_mode": rounding_mode,
    }

    def clz_imp(inp):
        # Assuming that it's a 32 bit int
        return 32 - te.log2(inp)

    operators = {"ABS": te.abs, "CLZ": clz_imp}

    unary_elementwise = te.compute(
        (1, ofm_height, ofm_width, ofm_channels),
        lambda nn, hh, ww, cc: operators[operator_type]
        (dmaed_ifm(nn, hh, ww, cc).astype(ifm.dtype)),
        name="ethosu_unary_elementwise",
        attrs=unary_elementwise_attrs,
    )

    nhwc_to_nhcwb16 = [
        [1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 0, 1 / 16, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 0, 16],
        [0, 0, 0, 0, 1],
    ]
    nhcwb16_to_nhwc = [
        [1, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0],
        [0, 0, 16, 0, 1, -16],
        [0, 0, 0, 0, 0, 1],
    ]
    ifm_matrix = [
        [1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1],
    ]
    if ofm_layout == "NHCWB16":
        ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
    if ifm_layout == "NHCWB16":
        ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()

    ifm_propagator = Propagator(
        ifm_matrix,
        [0, 0, 0, 0] if ifm_layout == "NHWC" else [0, 0, 0, 0, 0],
    )
    propagator_attrs = {"ifm_propagator": ifm_propagator}

    # Compute operation for the OFM DMA pipeline
    return dma_ofm_compute(
        unary_elementwise,
        ofm_layout,
        ofm_zero_point,
        ofm_scale,
        ofm_channels,
        attrs=propagator_attrs,
    )
Exemple #6
0
def conv2d_compute(
    ifm: te.Tensor,
    weight: te.Tensor,
    scale_bias: te.Tensor,
    lut: te.Tensor,
    ifm_scale: float,
    ifm_zero_point: int,
    weight_zero_point: int,
    ofm_scale: float,
    ofm_zero_point: int,
    strides: Tuple[int, int],
    padding: Tuple[int, int, int, int],
    dilation: Union[Tuple[int, int], List[int]],
    activation: str,
    clip_min: int,
    clip_max: int,
    rounding_mode: str,
    upscale: str,
    ifm_layout: str,
    ofm_layout: str,
) -> te.Tensor:
    """A compute operator representing the capabilities of a 2D convolution for the NPU.

    Parameters
    ----------
    ifm : te.Tensor
        The Input Feature Map tensor (IFM).
    weight : te.Tensor
        The weight tensor.
    scale_bias : te.Tensor
        The packed per-channel weight scale and bias tensor.
    lut : te.Tensor
        The look-up table of values to use if activation = "LUT".
    ifm_scale : float
        The quantization scale for the Input Feature Map tensor.
    ifm_zero_point : int
        The quantization zero point for the Input Feature Map tensor.
    weight_zero_point : int
        The quantization zero point for the weight tensor.
    ofm_scale : float
        The quantization scale for the Output Feature Map tensor.
    ofm_zero_point : int
        The quantization zero point for the Output Feature Map tensor.
    strides : tuple
        The 2 dimensional strides as (stride_height, stride_width).
    padding : tuple
        The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
    dilation : Union[Tuple[int, int], List[int]]
        The 2 dimensional dilation as (dilation_height, dilation_width).
    activation : str
        The activation function to use.
            "NONE" - no activation function.
            "CLIP" - clip the output between clip_min and clip_max.
            "TANH" - tanh activation function.
            "SIGMOID" - sigmoid activation function.
            "LUT" - use a look-up table to perform the activation function.
    clip_min : int
        The minimum clipping value if activation = "CLIP".
    clip_max : int
        The maximum clipping value if activation = "CLIP".
    rounding_mode : str
        The rounding mode to apply to the Output Feature Map tensor.
            "TFL" - Tensorflow Lite rounding scheme.
            "TRUNCATE" - Truncate towards zero.
            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
    upscale : str
        The 2x2 upscaling mode to apply to the Input Feature Map tensor.
            "NONE" - no upscaling.
            "NEAREST" - upscale using nearest neighbour.
            "ZEROS" - upscale using zeros.
            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
    ifm_layout : str
        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
    ofm_layout : str
        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".

    Returns
    -------
    te.Tensor
        The OFM tensor.

    """
    assert ifm.shape[0] == 1
    assert ifm_layout in {"NHWC", "NHCWB16"}
    assert ofm_layout in {"NHWC", "NHCWB16"}

    padding = [int(v) for v in padding]
    stride_h, stride_w = [int(v) for v in strides]
    dilation_h, dilation_w = [int(v) for v in dilation]
    ofm_channels, kernel_h, kernel_w, ifm_channels = [
        int(v) for v in weight.shape
    ]
    upscale_factor = 2 if upscale != "NONE" else 1

    # Compute operation for the IFM DMA pipeline
    dmaed_ifm = dma_ifm_compute(
        ifm,
        ifm_layout,
        ifm_zero_point,
        ifm_scale,
        weight.shape[3],
        padding,
        upscale_factor,
    )

    # 2D Convolution compute operation
    dilated_kernel_h = (kernel_h - 1) * dilation_h + 1
    dilated_kernel_w = (kernel_w - 1) * dilation_w + 1
    ofm_height = (dmaed_ifm.shape[1] - dilated_kernel_h) // stride_h + 1
    ofm_width = (dmaed_ifm.shape[2] - dilated_kernel_w) // stride_w + 1
    rc = te.reduce_axis((0, ifm_channels), name="rc")
    rh = te.reduce_axis((0, kernel_h), name="ry")
    rw = te.reduce_axis((0, kernel_w), name="rx")

    conv2d_attrs = {
        "op": "ethosu_conv2d",
        "weight_zero_point": weight_zero_point,
        "activation": activation,
        "upscale": upscale,
        "clip_min": clip_min,
        "clip_max": clip_max,
        "rounding_mode": rounding_mode,
        "stride_h": stride_h,
        "stride_w": stride_w,
        "dilation_h": dilation_h,
        "dilation_w": dilation_w,
    }

    has_lut = activation in ("TANH", "LUT", "SIGMOID")

    # This is a trick to insert the LUT tensor into the TE graph if LUT is present
    lut_expr = (lut[0] + lut[255]).astype(ifm.dtype) if has_lut else 0

    # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
    if has_lut:
        conv2d_attrs["lut"] = lut

    conv = te.compute(
        (1, ofm_height, ofm_width, ofm_channels),
        lambda nn, hh, ww, cc: te.sum(
            dmaed_ifm(nn, hh * stride_h + rh * dilation_h, ww * stride_w + rw *
                      dilation_w, rc).astype(ifm.dtype) * weight[
                          cc, rh, rw, rc].astype(ifm.dtype)
            # This is a trick to load 10 elements of the scale_bias at once, not accurate maths
            + (scale_bias[cc, 0] * scale_bias[cc, 9] + lut_expr).astype(ifm.
                                                                        dtype),
            axis=[rh, rw, rc],
        ),
        name="ethosu_conv2d",
        attrs=conv2d_attrs,
    )

    nhwc_to_nhcwb16 = [
        [1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 0, 1 / 16, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 0, 16],
        [0, 0, 0, 0, 1],
    ]
    nhcwb16_to_nhwc = [
        [1, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0],
        [0, 0, 16, 0, 1, -16],
        [0, 0, 0, 0, 0, 1],
    ]
    ifm_matrix = [
        [1, 0, 0, 0, 0],
        [0, stride_h, 0, 0, (dilated_kernel_h - stride_h)],
        [0, 0, stride_w, 0, (dilated_kernel_w - stride_w)],
        [0, 0, 0, 0, ifm_channels],
        [0, 0, 0, 0, 1],
    ]
    weights_matrix = [
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, kernel_h],
        [0, 0, 0, 0, kernel_w],
        [0, 0, 0, 0, ifm_channels],
        [0, 0, 0, 0, 1],
    ]
    bias_matrix = [
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, 10],
        [0, 0, 0, 0, 1],
    ]
    if ofm_layout == "NHCWB16":
        ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
        weights_matrix = np.matmul(weights_matrix, nhcwb16_to_nhwc).tolist()
        bias_matrix = np.matmul(bias_matrix, nhcwb16_to_nhwc).tolist()
    if ifm_layout == "NHCWB16":
        ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
    ifm_propagator = Propagator(
        ifm_matrix,
        [0, -padding[0], -padding[1], 0]
        if ifm_layout == "NHWC" else [0, -padding[0], 0, -padding[1], 0],
    )
    weights_propagator = Propagator(
        weights_matrix,
        [0, 0, 0, 0],
    )
    bias_propagator = Propagator(
        bias_matrix,
        [0, 0],
    )
    propagator_attrs = {
        "ifm_propagator": ifm_propagator,
        "weights_propagator": weights_propagator,
        "bias_propagator": bias_propagator,
    }

    # Compute operation for the OFM DMA pipeline
    dma_ofm = dma_ofm_compute(conv,
                              ofm_layout,
                              ofm_zero_point,
                              ofm_scale,
                              ofm_channels,
                              attrs=propagator_attrs)
    return dma_ofm
Exemple #7
0
def pooling_compute(
    ifm: te.Tensor,
    lut: te.Tensor,
    pooling_type: str,
    ifm_scale: float,
    ifm_zero_point: int,
    ofm_scale: float,
    ofm_zero_point: int,
    pool_shape: Tuple[int, int],
    ofm_channels: int,
    strides: Tuple[int, int],
    padding: Tuple[int, int, int, int],
    activation: str,
    clip_min: int,
    clip_max: int,
    rounding_mode: str,
    upscale: str,
    ifm_layout: str,
    ofm_layout: str,
) -> te.Tensor:
    """A compute operator representing the capabilities of pooling for the NPU.

    Parameters
    ----------
    ifm : te.Tensor
        The Input Feature Map tensor (IFM).
    lut : te.Tensor
        The look-up table of values to use if activation = "LUT".
    pooling_type: str
        The type of the pooling. "AVG" - average pool,   "MAX" - max pool.
    ifm_scale : float
        The quantization scale for the Input Feature Map tensor.
    ifm_zero_point : int
        The quantization zero point for the Input Feature Map tensor.
    ofm_scale : float
        The quantization scale for the Output Feature Map tensor.
    ofm_zero_point : int
        The quantization zero point for the Output Feature Map tensor.
    pool_shape : Tuple[int, int]
        The 2 dimensional pool shape as (pool_shape_height, pool_shape_width).
    ofm_channels : int
        The number of the Output Feature Map channels
    strides : Tuple[int, int]
        The 2 dimensional strides as (stride_height, stride_width).
    padding : Tuple[int, int, int, int]
        The 4 dimensional padding as (pad_top, pad_left, pad_bottom, pad_right).
    activation : str
        The activation function to use.
            "NONE" - no activation function.
            "CLIP" - clip the output between clip_min and clip_max.
            "TANH" - tanh activation function.
            "SIGMOID" - sigmoid activation function.
            "LUT" - use a look-up table to perform the activation function.
    clip_min : int
        The minimum clipping value if activation = "CLIP".
    clip_max : int
        The maximum clipping value if activation = "CLIP".
    rounding_mode : str
        The rounding mode to apply to the Output Feature Map tensor.
            "TFL" - Tensorflow Lite rounding scheme.
            "TRUNCATE" - Truncate towards zero.
            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
    upscale : str
        The 2x2 upscaling mode to apply to the Input Feature Map tensor.
            "NONE" - no upscaling.
            "NEAREST" - upscale using nearest neighbour.
            "ZEROS" - upscale using zeros.
    ifm_layout : str
        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
    ofm_layout : str
        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".

    Returns
    -------
    te.Tensor
        The OFM tensor.
    """
    assert ifm.shape[0] == 1
    assert ifm_layout in {"NHWC", "NHCWB16"}
    assert ofm_layout in {"NHWC", "NHCWB16"}

    padding = [int(v) for v in padding]
    stride_h, stride_w = [int(v) for v in strides]
    pool_shape_h, pool_shape_w = [int(v) for v in pool_shape]

    # Compute operation for the IFM DMA pipeline
    dmaed_ifm = dma_ifm_compute(ifm, ifm_layout, ifm_zero_point, ifm_scale, ofm_channels, padding)

    # Pooling compute operation
    ofm_height = (dmaed_ifm.shape[1] - pool_shape_h) // stride_h + 1
    ofm_width = (dmaed_ifm.shape[2] - pool_shape_w) // stride_w + 1
    rh = te.reduce_axis((0, pool_shape_h), name="ry")
    rw = te.reduce_axis((0, pool_shape_w), name="rx")

    pooling_attrs = {
        "op": "ethosu_pooling",
        "pooling_type": pooling_type,
        "pool_shape_h": pool_shape_h,
        "pool_shape_w": pool_shape_w,
        "stride_h": stride_h,
        "stride_w": stride_w,
        "activation": activation,
        "clip_min": clip_min,
        "clip_max": clip_max,
        "rounding_mode": rounding_mode,
        "upscale": upscale,
    }

    has_lut = activation in ("TANH", "LUT", "SIGMOID")

    # This is a trick to insert the LUT tensor into the TE graph if LUT is present
    lut_expr = (lut[0] + lut[255]).astype(ifm.dtype) if has_lut else 0

    # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
    if has_lut:
        pooling_attrs["lut"] = lut

    pooling = te.compute(
        (1, ofm_height, ofm_width, ofm_channels),
        lambda nn, hh, ww, cc: te.max(
            (dmaed_ifm(nn, hh * stride_h + rh, ww * stride_w + rw, cc) + lut_expr).astype(
                ifm.dtype
            ),
            axis=[rh, rw],
        ),
        name="ethosu_pooling",
        attrs=pooling_attrs,
    )

    nhwc_to_nhcwb16 = [
        [1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 0, 1 / 16, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 0, 16],
        [0, 0, 0, 0, 1],
    ]
    nhcwb16_to_nhwc = [
        [1, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0],
        [0, 0, 16, 0, 1, -16],
        [0, 0, 0, 0, 0, 1],
    ]
    ifm_matrix = [
        [1, 0, 0, 0, 0],
        [0, stride_h, 0, 0, (pool_shape_h - stride_h)],
        [0, 0, stride_w, 0, (pool_shape_w - stride_w)],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1],
    ]
    if ofm_layout == "NHCWB16":
        ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
    if ifm_layout == "NHCWB16":
        ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
    ifm_propagator = Propagator(
        ifm_matrix,
        [0, -padding[0], -padding[1], 0]
        if ifm_layout == "NHWC"
        else [0, -padding[0], 0, -padding[1], 0],
    )
    propagator_attrs = {
        "ifm_propagator": ifm_propagator,
    }

    # Compute operation for the OFM DMA pipeline
    return dma_ofm_compute(
        pooling, ofm_layout, ofm_zero_point, ofm_scale, ofm_channels, attrs=propagator_attrs
    )
Exemple #8
0
def binary_elementwise_compute(
    ifm: te.Tensor,
    ifm2: te.Tensor,
    lut: te.Tensor,
    operator_type: str,
    ifm_scale: float,
    ifm_zero_point: int,
    ifm2_scale: float,
    ifm2_zero_point: int,
    ofm_scale: float,
    ofm_zero_point: int,
    ifm_channels: int,
    ifm2_channels: int,
    reversed_operands: bool,
    activation: str,
    clip_min: int,
    clip_max: int,
    rounding_mode: str,
    ifm_layout: str,
    ifm2_layout: str,
    ofm_layout: str,
    ofm_dtype: str,
) -> te.Tensor:
    """A compute operator representing the capabilities of binary_elementwise for the NPU.

    Parameters
    ----------
    ifm : te.Tensor
        The Input Feature Map tensor (IFM).
    ifm2 : te.Tensor
        The Input Feature Map tensor 2 (IFM2).
    lut : te.Tensor
        The look-up table values to use if activation = "LUT".
    operator_type: str
        The type of the binary elementwise operator.
            "ADD"
            "SUB"
            "MUL"
            "MIN"
            "MAX"
            "SHR"
            "SHL"
    ifm_scale : float
        The quantization scale for the Input Feature Map tensor.
    ifm_zero_point : int
        The quantization zero point for the Input Feature Map tensor.
    ifm2_scale : float
        The quantization scale for the Input Feature Map tensor 2.
    ifm2_zero_point : int
        The quantization zero point for the Input Feature Map tensor 1.
    ofm_scale : float
        The quantization scale for the Output Feature Map tensor.
    ofm_zero_point : int
        The quantization zero point for the Output Feature Map tensor.
    ifm_channels : int
        The number of the Input Feature Map channels.
    ifm2_channels : int
        The number of the Input Feature Map 2 channels.
    reversed_operands : bool
        True if IFM2 is the first operand and IFM is the second operand.
    activation : str
        The activation function to use.
            "NONE" - no activation function.
            "CLIP" - clip the output between clip_min and clip_max.
            "TANH" - tanh activation function.
            "SIGMOID" - sigmoid activation function.
            "LUT" - use a look-up table to perform the activation function.
        Available activations for activation type:
            {int8, uint8}: "NONE", "CLIP", "TANH", "SIGMOID", "LUT"
            {int32}: "NONE"
    clip_min : int
        The minimum clipping value if activation = "CLIP".
    clip_max : int
        The maximum clipping value if activation = "CLIP".
    rounding_mode : str
        The rounding mode to apply to the Output Feature Map tensor.
            "TFL" - Tensorflow Lite rounding scheme.
            "TRUNCATE" - Truncate towards zero.
            "NATURAL" - Round to nearest value, with x.5 rounded up towards +infinity.
    ifm_layout : str, optional
        The layout of the Input Feature Map tensor. Can be "NHWC" or "NHCWB16".
    ifm2_layout : str, optional
        The layout of the Input Feature Map tensor 2. Can be "NHWC" or "NHCWB16".
    ofm_layout : str, optional
        The layout of the Output Feature Map tensor. Can be "NHWC" or "NHCWB16".
    ofm_dtype: str
        The Output Feature Map tensor type.
        MUL, ADD, SUB {IFM}->{OFM}:
          {uint8, int8 int32} -> {uint8, int8, int32}, any pairing
        MAX, MIN:
          IFM and OFM must be of the same type, one of:
          {int8, uint8}
        SHR {IFM}->{OFM}:
          {int32}->{int8, uint8, int32}, any pairing"
        SHL:
          {int32}->{int32} only

    Returns
    -------
    te.Tensor
        The Output Feature Map tensor.
    """
    assert ifm.shape[0] == 1
    assert ifm2.shape[0] == 1
    assert ifm_layout in {"NHWC", "NHCWB16"}
    assert ifm2_layout in {"NHWC", "NHCWB16"}
    assert ofm_layout in {"NHWC", "NHCWB16"}

    # Compute operation for the IFM DMA pipeline
    dmaed_ifm = dma_ifm_compute(
        ifm, ifm_layout, ifm_zero_point, ifm_scale, ifm_channels, (0, 0, 0, 0)
    )
    dmaed_ifm2 = dma_ifm_compute(
        ifm2, ifm2_layout, ifm2_zero_point, ifm2_scale, ifm2_channels, (0, 0, 0, 0)
    )

    # Binary elementwise compute operation
    ofm_height = dmaed_ifm.shape[1]
    ofm_width = dmaed_ifm.shape[2]

    binary_elementwise_attrs = {
        "op": "ethosu_binary_elementwise",
        "operator_type": operator_type,
        "reversed_operands": reversed_operands,
        "activation": activation,
        "clip_min": clip_min,
        "clip_max": clip_max,
        "rounding_mode": rounding_mode,
    }

    operators = {
        "ADD": operator.add,
        "SUB": operator.sub,
        "MUL": operator.mul,
        "MIN": te.min,
        "MAX": te.max,
        "SHR": operator.add,
        "SHL": operator.add,
    }
    broadcast = [value == 1 for value in dmaed_ifm2.shape]

    if reversed_operands:
        binary_elementwise = te.compute(
            (1, ofm_height, ofm_width, ifm_channels),
            lambda nn, hh, ww, cc: operators[operator_type](
                dmaed_ifm2(
                    0 if broadcast[0] else nn,
                    0 if broadcast[1] else hh,
                    0 if broadcast[2] else ww,
                    0 if broadcast[3] else cc,
                ).astype(ifm.dtype),
                dmaed_ifm(nn, hh, ww, cc).astype(ifm.dtype),
            ).astype(ofm_dtype),
            name="ethosu_binary_elementwise",
            attrs=binary_elementwise_attrs,
        )
    else:
        binary_elementwise = te.compute(
            (1, ofm_height, ofm_width, ifm_channels),
            lambda nn, hh, ww, cc: operators[operator_type](
                dmaed_ifm(nn, hh, ww, cc).astype(ifm.dtype),
                dmaed_ifm2(
                    0 if broadcast[0] else nn,
                    0 if broadcast[1] else hh,
                    0 if broadcast[2] else ww,
                    0 if broadcast[3] else cc,
                ).astype(ifm.dtype),
            ).astype(ofm_dtype),
            name="ethosu_binary_elementwise",
            attrs=binary_elementwise_attrs,
        )

    nhwc_to_nhcwb16 = [
        [1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 0, 1 / 16, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 0, 16],
        [0, 0, 0, 0, 1],
    ]
    nhcwb16_to_nhwc = [
        [1, 0, 0, 0, 0, 0],
        [0, 1, 0, 0, 0, 0],
        [0, 0, 0, 1, 0, 0],
        [0, 0, 16, 0, 1, -16],
        [0, 0, 0, 0, 0, 1],
    ]
    ifm_matrix = [
        [1, 0, 0, 0, 0],
        [0, 1, 0, 0, 0],
        [0, 0, 1, 0, 0],
        [0, 0, 0, 1, 0],
        [0, 0, 0, 0, 1],
    ]
    ifm2_matrix = [
        [1, 0, 0, 0, 0],
        [0, (1 - int(broadcast[1])), 0, 0, int(broadcast[1])],
        [0, 0, (1 - int(broadcast[2])), 0, int(broadcast[2])],
        [0, 0, 0, (1 - int(broadcast[3])), int(broadcast[3])],
        [0, 0, 0, 0, 1],
    ]
    if ofm_layout == "NHCWB16":
        ifm_matrix = np.matmul(ifm_matrix, nhcwb16_to_nhwc).tolist()
        ifm2_matrix = np.matmul(ifm2_matrix, nhcwb16_to_nhwc).tolist()
    if ifm_layout == "NHCWB16":
        ifm_matrix = np.matmul(nhwc_to_nhcwb16, ifm_matrix).tolist()
    if ifm2_layout == "NHCWB16":
        ifm2_matrix = np.matmul(nhwc_to_nhcwb16, ifm2_matrix).tolist()
    ifm_propagator = Propagator(
        ifm_matrix,
        [0, 0, 0, 0] if ifm_layout == "NHWC" else [0, 0, 0, 0, 0],
    )
    ifm2_propagator = Propagator(
        ifm2_matrix,
        [0, 0, 0, 0] if ifm2_layout == "NHWC" else [0, 0, 0, 0, 0],
    )
    propagator_attrs = {
        "ifm_propagator": ifm_propagator,
        "ifm2_propagator": ifm2_propagator,
    }

    # Compute operation for the OFM DMA pipeline
    return dma_ofm_compute(
        binary_elementwise,
        ofm_layout,
        ofm_zero_point,
        ofm_scale,
        ifm_channels,
        attrs=propagator_attrs,
    )
    assert list(propagator.offset) == offset
    for i, row in enumerate(transform):
        for j, value in enumerate(row):
            assert isclose(propagator.transform[i][j], value)


@pytest.mark.parametrize(
    ["propagator", "input_stripe_config", "output_stripe_config"],
    [
        (
            Propagator(
                transform=[
                    [1, 0, 0, 0, 0],
                    [0, 1, 0, 0, 0],
                    [0, 0, 0, 1 / 16, 0],
                    [0, 0, 1, 0, 0],
                    [0, 0, 0, 0, 16],
                    [0, 0, 0, 0, 1],
                ],
                offset=[0, 0, 0, 0, 0],
            ),
            StripeConfig(
                shape=[1, 12, 14, 36],
                extent=[1, 24, 18, 72],
                strides=[1, 12, 14, 36],
                order=[1, 2, 3, 4],
                stripes=[1, 2, 2, 2],
                offset=[0, 0, 0, 0],
            ),
            StripeConfig(
                shape=[1, 12, 3, 14, 16],
Exemple #10
0
def identity_compute(
    ifm: te.Tensor,
    lut: te.Tensor,
    ifm_scale: float,
    ifm_zero_point: int,
    ofm_scale: float,
    ofm_zero_point: int,
    activation: str,
) -> te.Tensor:
    """A compute operator for the NPU identity operator.

    Parameters
    ----------
    ifm : te.Tensor
        The Input Feature Map tensor (IFM).
    lut : te.Tensor
        The look-up table values to use if activation is "LUT", "TANH" or "SIGMOID".
    ifm_scale : float
        The quantization scale for the Input Feature Map tensor.
    ifm_zero_point : int
        The quantization zero point for the Input Feature Map tensor.
    ofm_scale : float
        The quantization scale for the Output Feature Map tensor.
    ofm_zero_point : int
        The quantization zero point for the Output Feature Map tensor.
    activation : str
        The activation function to use.
            "NONE" - no activation function.
            "TANH" - tanh activation function.
            "SIGMOID" - sigmoid activation function.
            "LUT" - use a look-up table to perform the activation function.

    Returns
    -------
    te.Tensor
        The Output Feature Map tensor.
    """
    dmaed_ifm = read_compute(ifm, ifm_zero_point, ifm_scale)
    id_attrs = {"op": "ethosu_identity", "activation": activation}

    has_lut = activation in ("TANH", "LUT", "SIGMOID")

    # This is a trick to insert the LUT tensor into the TE graph if LUT is present
    lut_expr = (lut[0] + lut[255]).astype(ifm.dtype) if has_lut else 0

    # Add the LUT tensor to the attributes to be able to later tell which tensor is the LUT
    if has_lut:
        id_attrs["lut"] = lut

    identity = te.compute(
        ifm.shape,
        lambda *i: (dmaed_ifm(*i) + lut_expr).astype(ifm.dtype),
        name="ethosu_identity",
        attrs=id_attrs,
    )
    length = len(ifm.shape)
    ifm_matrix = np.identity(length + 1)
    offset = np.zeros(length, dtype="int64")
    ifm_propagator = Propagator(
        ifm_matrix,
        offset.tolist(),
    )
    propagator_attrs = {
        "ifm_propagator": ifm_propagator,
    }
    return write_compute(identity,
                         ofm_zero_point,
                         ofm_scale,
                         attrs=propagator_attrs)