Esempio n. 1
0
    def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr,
                 node_map: tvm.ir.container.Map) -> tvm.relay.Expr:
        params = ethosu_patterns.QnnDepthwiseConv2DParams(post.op.body)
        params.ifm.tensor = post.args[0]
        channels_map = {
            "NHWC": 3,
        }
        kernel_shape_map = {
            "HWOI": params.weights.shape[0:2],
        }

        weights_values = params.weights.values
        weights_values_ohwi = np.moveaxis(weights_values, [0, 1, 2, 3],
                                          [1, 2, 0, 3])

        activation = "NONE"
        # Activations requiring LUT is currently not supported, so setting it to an empty list
        lut = relay.const([], "int8")
        clip_min = 0
        clip_max = 0
        if params.activation:
            activation = ethosu_patterns.QnnDepthwiseConv2DParams.activation_map[
                params.activation.op.name]
            if activation == "CLIP":
                clip_min = int(params.activation.attrs.a_min)
                clip_max = int(params.activation.attrs.a_max)
        scale_bias = vela_api.pack_biases(
            biases=params.biases.tensor.data.asnumpy(),
            ifm_scale=params.ifm.q_params.scale_f32,
            ifm_dtype=np.dtype(params.ifm.dtype),
            weight_scales=params.weights.q_params.scale_f32,
            ofm_scale=params.ofm.q_params.scale_f32,
            is_activation_tanh_or_sigmoid=activation in ["TANH", "SIGMOID"],
        )

        ethosu_depthwise_conv2d = ethosu_ops.ethosu_depthwise_conv2d(
            post.args[0],  # IFM
            relay.const(weights_values_ohwi, params.weights.values.dtype),
            relay.const(scale_bias, "uint8"),
            lut,
            float(params.ifm.q_params.scale_f32),
            int(params.ifm.q_params.zero_point),
            int(params.weights.q_params.zero_point),
            float(params.ofm.q_params.scale_f32),
            int(params.ofm.q_params.zero_point),
            kernel_shape_map[str(params.weights.layout)],
            params.ofm.shape[channels_map[str(params.ofm.layout)]],
            strides=params.strides,
            padding=params.padding,
            dilation=params.dilation,
            activation=activation,
            clip_min=clip_min,
            clip_max=clip_max,
            upscale="NONE",
            ifm_layout=str(params.ifm.layout),
            ofm_layout=str(params.ofm.layout),
            ofm_dtype=str(params.ofm.dtype),
        )
        return ethosu_depthwise_conv2d
Esempio n. 2
0
 def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr,
              node_map: tvm.ir.container.Map) -> tvm.relay.Expr:
     params = ethosu_patterns.QnnConv2DParams(post.op.body)
     params.ifm.tensor = post.args[0]
     channels_map = {
         "NHWC": 3,
     }
     kernel_size_map = {
         "HWIO": params.weights.shape[0:2],
         "OHWI": params.weights.shape[1:3],
         "HWOI": params.weights.shape[0:2],
     }
     activation_map = {"clip": "CLIP"}
     weight_to_ohwi_transform_map = {"HWIO": [3, 0, 1, 2]}
     weights_values = params.weights.values
     weights_values_ohwi = np.transpose(
         weights_values,
         weight_to_ohwi_transform_map[str(params.weights.layout)])
     if params.activation:
         activation = activation_map[params.activation.op.name]
         clip_min = int(params.activation.attrs.a_min)
         clip_max = int(params.activation.attrs.a_max)
     else:
         activation = "NONE"
         clip_min = 0
         clip_max = 0
     scale_bias = vela_api.pack_biases(
         biases=params.biases.tensor.data.asnumpy(),
         ifm_scale=params.ifm.q_params.scale_f32,
         ifm_dtype=np.dtype(params.ifm.dtype),
         weight_scales=params.weights.q_params.scale_f32,
         ofm_scale=params.ofm.q_params.scale_f32,
         is_activation_tanh_or_sigmoid=activation in ["TANH", "SIGMOID"],
     )
     ethosu_conv2d = ethosu_ops.ethosu_conv2d(
         ifm=post.args[0],
         weight=relay.const(weights_values_ohwi,
                            params.weights.values.dtype),
         scale_bias=relay.const(scale_bias, "uint8"),
         lut=relay.const([], dtype="int8"),
         ifm_scale=float(params.ifm.q_params.scale_f32),
         ifm_zero_point=int(params.ifm.q_params.zero_point),
         weight_zero_point=int(params.weights.q_params.zero_point),
         ofm_scale=float(params.ofm.q_params.scale_f32),
         ofm_zero_point=int(params.ofm.q_params.zero_point),
         kernel_shape=kernel_size_map[str(params.weights.layout)],
         ofm_channels=params.ofm.shape[channels_map[str(
             params.ofm.layout)]],
         strides=params.strides,
         padding=params.padding,
         dilation=params.dilation,
         activation=activation,
         clip_min=clip_min,
         clip_max=clip_max,
         upscale="NONE",
         ifm_layout=str(params.ifm.layout),
         ofm_layout=str(params.ofm.layout),
     )
     return ethosu_conv2d
Esempio n. 3
0
    def callback(self, pre, post, node_map):
        params = ethosu_patterns.FullyConnectedParams(post.op.body)
        params.ifm.tensor = post.args[0]

        # IFM reshapes
        ifm = post.args[0]
        if len(params.ifm.shape
               ) != 4 or not params.ifm.shape[1] == params.ifm.shape[2] == 1:
            ifm = relay.reshape(ifm, (1, 1, 1, params.ifm.shape[-1]))

        # Weight transformations
        weights_values = params.weights.values
        weights_values_ohwi = np.expand_dims(weights_values, axis=(1, 2))
        if params.activation:
            activation = "CLIP"
            clip_min = int(params.activation.attrs.a_min)
            clip_max = int(params.activation.attrs.a_max)
        else:
            activation = "NONE"
            clip_min = 0
            clip_max = 0
        bias_values = (params.biases.tensor.data.asnumpy()
                       if params.biases else np.zeros((params.ofm.shape[-1])))
        scale_bias = vela_api.pack_biases(
            biases=bias_values,
            ifm_scale=params.ifm.q_params.scale_f32,
            ifm_dtype=np.dtype(params.ifm.dtype),
            weight_scales=params.weights.q_params.scale_f32,
            ofm_scale=params.ofm.q_params.scale_f32,
            is_activation_tanh_or_sigmoid=False,
        )
        ethosu_fc = ethosu_ops.ethosu_conv2d(
            ifm=ifm,
            weight=relay.const(weights_values_ohwi,
                               params.weights.values.dtype),
            scale_bias=relay.const(scale_bias, "uint8"),
            lut=relay.const([], dtype="int8"),
            ifm_scale=float(params.ifm.q_params.scale_f32),
            ifm_zero_point=int(params.ifm.q_params.zero_point),
            weight_zero_point=int(params.weights.q_params.zero_point),
            ofm_scale=float(params.ofm.q_params.scale_f32),
            ofm_zero_point=int(params.ofm.q_params.zero_point),
            kernel_shape=[1, 1],
            ofm_channels=params.weights.shape[0],
            strides=(1, 1),
            padding=(0, 0, 0, 0),
            dilation=(1, 1),
            activation=activation,
            clip_min=clip_min,
            clip_max=clip_max,
            upscale="NONE",
            ifm_layout="NHWC",
            ofm_layout="NHWC",
        )

        if len(params.ofm.shape
               ) != 4 or not params.ofm.shape[1] == params.ofm.shape[2] == 1:
            ethosu_fc = relay.reshape(ethosu_fc, params.ofm.shape)
        return ethosu_fc
Esempio n. 4
0
    def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr,
                 node_map: tvm.ir.container.Map) -> tvm.relay.Expr:
        params = ethosu_patterns.QnnConv2DTransposeParams(post.op.body)
        params.ifm.tensor = post.args[0]

        ofm_shape = params.ofm.shape
        legalize_padding = params.legalize_padding

        weight_to_ohwi_transform_map = {"IOHW": [1, 2, 3, 0]}
        weights_values = params.weights.values
        weights_values_ohwi = np.transpose(
            weights_values,
            weight_to_ohwi_transform_map[str(params.weights.layout)])
        weights_values_ohwi = np.flip(weights_values_ohwi, (1, 2))
        weights = relay.const(weights_values_ohwi,
                              dtype=params.weights.values.dtype)

        bias_values = (params.biases.tensor.data.asnumpy()
                       if params.biases else np.zeros((params.ifm.shape[-1])))
        scale_bias = vela_api.pack_biases(
            biases=bias_values,
            ifm_scale=params.ifm.q_params.scale_f32,
            ifm_dtype=np.dtype(params.ifm.dtype),
            weight_scales=params.weights.q_params.scale_f32,
            ofm_scale=params.ofm.q_params.scale_f32,
            is_activation_tanh_or_sigmoid=False,
        )

        reduced_op = ethosu_ops.ethosu_conv2d(
            ifm=post.args[0],
            weight=weights,
            scale_bias=relay.const(scale_bias, "uint8"),
            lut=relay.const([], dtype="int8"),
            ifm_scale=float(params.ifm.q_params.scale_f32),
            ifm_zero_point=int(params.ifm.q_params.zero_point),
            weight_zero_point=int(params.weights.q_params.zero_point),
            ofm_scale=float(params.ofm.q_params.scale_f32),
            ofm_zero_point=int(params.ofm.q_params.zero_point),
            kernel_shape=params.kernel_shape,
            ofm_channels=int(ofm_shape[-1]),
            strides=(1, 1),
            padding=legalize_padding,
            dilation=params.dilation,
            ifm_layout=str(params.ifm.layout),
            ofm_layout=str(params.ofm.layout),
            upscale="ZEROS",
        )

        # Remove additional padding by 'cropping' back to expected size
        return relay.strided_slice(reduced_op, (0, 0, 0, 0), ofm_shape)
Esempio n. 5
0
 def create_mock(test_vec):
     with patch("ethosu.vela.api.npu_encode_bias") as mock_npu_encode_bias:
         mock_npu_encode_bias.return_value = bytearray(10)
         ifm_dtype = test_vec["ifm_dtype"]
         max = np.iinfo(ifm_dtype).max
         min = np.iinfo(ifm_dtype).min
         # tvm will always create biases in int32
         biases = np.random.randint(min, max, test_vec["bias_length"], np.int32)
         packed_biases = vela_api.pack_biases(
             biases=biases,
             ifm_scale=test_vec["ifm_scale"],
             ifm_dtype=test_vec["ifm_dtype"],
             weight_scales=test_vec["weight_scales"],
             ofm_scale=test_vec["ofm_scale"],
             is_activation_tanh_or_sigmoid=test_vec["is_activation_tanh_or_sigmoid"],
         )
         test_vec["bias_values"] = biases
         return mock_npu_encode_bias, packed_biases
     return None
Esempio n. 6
0
    def callback(self, pre: tvm.relay.Expr, post: tvm.relay.Expr,
                 node_map: tvm.ir.container.Map) -> tvm.relay.Expr:
        params = ethosu_patterns.MeanParams(post.op.body)
        params.ifm.tensor = post.args[0]

        ifm_shape = params.ifm.shape
        ofm_shape = params.ofm.shape
        lut = relay.const([], "int8")
        axis = params.axis
        reduced_op = params.ifm.tensor

        # Enforce 4d input
        if len(ifm_shape) < 4:
            axis = [x + 1 for x in axis]
            if len(ifm_shape) == 3:
                ifm_shape = [1, params.height, params.width, ifm_shape[2]]
            else:
                ifm_shape = [1, params.height, params.width, 1]
            reduced_op = relay.reshape(reduced_op, ifm_shape)

        filter_height = ifm_shape[1] if 1 in axis else 1
        filter_width = ifm_shape[2] if 2 in axis else 1
        in_channels = out_channels = ifm_shape[-1]

        # If the height is greater than max kernel height, reshape the input
        # from [filter_height, filter_width] to [1, (filter_height*filter_width)]
        # only in the case the axis is [1, 2].
        if axis == [1, 2] and filter_height > 64:
            ifm_shape = (ifm_shape[0], 1, filter_height * filter_width,
                         in_channels)
            filter_width = filter_height * filter_width
            filter_height = 1
            reduced_op = relay.reshape(reduced_op, ifm_shape)

        if axis == [1, 2] and params.keepdims:
            weight_scale = 1
            weight_values = np.ones(
                [out_channels, filter_height, filter_width, in_channels])
            scale_bias = vela_api.pack_biases(
                biases=np.zeros(ifm_shape[-1]),
                ifm_scale=params.ifm.q_params.scale_f32,
                ifm_dtype=np.dtype(params.ifm.dtype),
                weight_scales=np.array([weight_scale], dtype=np.float),
                ofm_scale=params.ofm.q_params.scale_f32,
                is_activation_tanh_or_sigmoid=False,
            )

            reduced_op = ethosu_ops.ethosu_depthwise_conv2d(
                ifm=reduced_op,
                weight=relay.const(weight_values, params.ifm.dtype),
                scale_bias=relay.const(scale_bias, "uint8"),
                lut=lut,
                ifm_scale=float(params.ifm.q_params.scale_f32),
                ifm_zero_point=int(params.ifm.q_params.zero_point),
                weight_zero_point=0,
                ofm_scale=float(params.ofm.q_params.scale_f32),
                ofm_zero_point=int(params.ofm.q_params.zero_point),
                kernel_shape=(filter_height, filter_width),
                ofm_channels=out_channels,
                ofm_dtype="int16",
            )

            n = int(filter_height * filter_width)
            eps = 1 / (256 * (n + 1)) if n % 2 == 0 else 0

            scalar_tensor = relay.const(np.ones([1, 1, 1, 1], dtype="int16"),
                                        dtype="int16")

            reduced_op = ethosu_ops.ethosu_binary_elementwise(
                ifm=reduced_op,
                ifm2=scalar_tensor,
                lut=lut,
                operator_type="MUL",
                ifm_scale=float(params.ofm.q_params.scale_f32),
                ifm_zero_point=int(params.ofm.q_params.zero_point),
                ifm2_scale=1 / (n - eps),
                ifm2_zero_point=0,
                ofm_scale=float(params.ofm.q_params.scale_f32),
                ofm_zero_point=int(params.ofm.q_params.zero_point),
                ifm_channels=out_channels,
                ifm2_channels=out_channels,
                reversed_operands=False,
                ofm_dtype="int8",
                rounding_mode="NATURAL",
            )
        elif (params.ifm.q_params.scale_f32 == params.ofm.q_params.scale_f32
              and params.ifm.q_params.zero_point
              == params.ofm.q_params.zero_point):
            reduced_op = ethosu_ops.ethosu_pooling(
                ifm=reduced_op,
                lut=lut,
                pooling_type="AVG",
                ifm_scale=float(params.ifm.q_params.scale_f32),
                ifm_zero_point=0,
                ofm_scale=float(params.ofm.q_params.scale_f32),
                ofm_zero_point=0,
                pool_shape=(filter_height, filter_width),
                ofm_channels=out_channels,
                rounding_mode="TRUNCATE",
            )
        else:
            weight_scale = 1 / (filter_height * filter_width)
            weight_values = np.ones(
                [out_channels, filter_height, filter_width, in_channels])
            bias = -1 * int(
                params.ifm.q_params.zero_point) * filter_height * filter_width

            scale_bias = vela_api.pack_biases(
                biases=np.ones([ifm_shape[-1]]) * bias,
                ifm_scale=params.ifm.q_params.scale_f32,
                ifm_dtype=np.dtype(params.ifm.dtype),
                weight_scales=np.array([weight_scale], dtype=np.float),
                ofm_scale=params.ofm.q_params.scale_f32,
                is_activation_tanh_or_sigmoid=False,
            )
            reduced_op = ethosu_ops.ethosu_depthwise_conv2d(
                ifm=reduced_op,
                weight=relay.const(weight_values, params.ifm.dtype),
                scale_bias=relay.const(scale_bias, "uint8"),
                lut=lut,
                ifm_scale=float(params.ifm.q_params.scale_f32),
                ifm_zero_point=0,
                weight_zero_point=0,
                ofm_scale=float(params.ofm.q_params.scale_f32),
                ofm_zero_point=int(params.ofm.q_params.zero_point),
                kernel_shape=(filter_height, filter_width),
                ofm_channels=out_channels,
                rounding_mode="NATURAL",
            )

        # Reshape to original ofm shape
        if len(ofm_shape) < 4:
            reduced_op = relay.reshape(reduced_op, ofm_shape)

        return reduced_op