Exemple #1
0
 def _impl(inputs, _):
     assert len(inputs) == 3, "Input quant params not found in op inputs"
     inp_scale = _expr.const(inputs[1])
     inp_zero_point = _expr.const(inputs[2])
     return relay.qnn.op.dequantize(inputs[0], inp_scale, inp_zero_point)
Exemple #2
0
 def _impl(inputs, _):
     return relay.qnn.op.quantize(inputs[0],
                                  _expr.const(inputs[1]),
                                  _expr.const(inputs[2]),
                                  out_dtype="uint8",
                                  axis=1)
Exemple #3
0
def add_quant_params_to_outputs(outputs,
                                packed_param_map,
                                quant_params,
                                input_scales_for_bias,
                                keep_quantized_weight=False):
    """
    Add quant params to outputs so that they can be referenced by other
    ops later. Weights are quantized here.
    """
    for node_name, packed_param_name in packed_param_map.items():
        qparam = quant_params[packed_param_name]
        weight_scale = _get_numpy(qparam.scale)
        param_prefix = packed_param_name[:-len("._packed_params")]

        if keep_quantized_weight:
            qparam.weight_var = _expr.var(param_prefix + "_weight",
                                          shape=qparam.weight.shape,
                                          dtype="int8")
            qparam.weight = quantize_numpy(qparam.weight, weight_scale,
                                           _get_numpy(qparam.zero_point),
                                           np.int8)
            qweight = qparam.weight_var
        else:
            qparam.weight_var = _expr.var(param_prefix + "_weight",
                                          shape=qparam.weight.shape,
                                          dtype="float32")
            qweight = relay.qnn.op.quantize(qparam.weight_var,
                                            qparam.scale,
                                            qparam.zero_point,
                                            out_dtype="int8",
                                            axis=0)

        if qparam.bias is not None:
            float_bias_var = _expr.var(param_prefix + "_bias",
                                       shape=qparam.bias.shape,
                                       dtype="float32")
            if node_name not in input_scales_for_bias:
                # This case is for dynamic quantization, where the input activation scale is
                # unknown until runtime.
                qparam.bias_var = float_bias_var
                qbias = qparam.bias_var
            elif keep_quantized_weight:
                qparam.bias_var = _expr.var(param_prefix + "_bias",
                                            shape=qparam.bias.shape,
                                            dtype="int32")
                qparam.bias = quantize_numpy(
                    qparam.bias,
                    input_scales_for_bias[node_name] * weight_scale, 0,
                    np.int32)
                qbias = qparam.bias_var
            else:
                qparam.bias_var = float_bias_var
                qbias = relay.qnn.op.quantize(
                    qparam.bias_var,
                    _expr.const(input_scales_for_bias[node_name] *
                                weight_scale),
                    _expr.const(0, "int32"),
                    out_dtype="int32",
                    axis=0,
                )
        else:
            qbias = None

        quant_params[packed_param_name] = qparam

        params = [qweight, qparam.scale, qparam.zero_point, qbias]

        if isinstance(quant_params[packed_param_name], ConvPackedParam):
            params += [
                qparam.stride,
                qparam.padding,
                qparam.dilation,
                qparam.groups,
                qparam.output_padding,
            ]

        outputs[node_name] = params
    def _impl(inputs, _):
        # refer to src/ATen/native/quantized/cpu/qconv.cpp
        # inputs[0]: input tensor
        # inputs[1]: (weight, scale, zero_point, bias)
        # inputs[2-5]: stride, padding, dilation, groups
        # inputs[6]: output_scale
        # inputs[7]: output_zero_point
        # inputs[8]: input_scale (added manually by frontend)
        # inputs[9]: input_zero_point (added manually by frontend)
        weight = inputs[1][0]
        weight_scale = inputs[1][1]
        weight_zero_point = inputs[1][2]

        output_scale = _expr.const(inputs[6])
        output_zero_point = _expr.const(inputs[7])

        assert len(inputs) == 10, "Input quant params not found in op inputs"
        # These are manually added by add_input_quant_params_to_op_inputs above
        # In torch, they are retrieved from QTensor data structure at runtime
        input_scale = _expr.const(inputs[8])
        input_zero_point = _expr.const(inputs[9])

        strides, padding, dilation = inputs[2], inputs[3], inputs[4]
        strides = infer_shape(inputs[2])
        padding = infer_shape(inputs[3])
        dilation = infer_shape(inputs[4])
        groups = inputs[5]

        weight_shape = infer_shape(weight)
        kernel_size = (weight_shape[2], weight_shape[3])
        out_channels = weight_shape[0]

        if padding[0] != 0 or padding[1] != 0:
            pad_val = _get_scalar(input_zero_point)
            inp = _op.nn.pad(inputs[0],
                             pad_width=((0, 0), (0, 0), (padding[0],
                                                         padding[0]),
                                        (padding[1], padding[1])),
                             pad_value=float(pad_val))
        else:
            inp = inputs[0]

        # padding is (0, 0) because we did explicit pad op with
        # pad value being zero point above
        conv_out = relay.qnn.op.conv2d(inp,
                                       weight,
                                       input_zero_point,
                                       weight_zero_point,
                                       input_scale,
                                       weight_scale,
                                       kernel_size=kernel_size,
                                       dilation=dilation,
                                       strides=strides,
                                       padding=(0, 0),
                                       groups=groups,
                                       channels=out_channels)
        bias_var = inputs[1][3]

        return _do_bias_and_requantize(conv_out, bias_var, input_scale,
                                       weight_scale, output_scale,
                                       output_zero_point, with_relu)
Exemple #5
0
def non_max_suppression(
    data,
    valid_count,
    indices,
    max_output_size=-1,
    iou_threshold=0.5,
    force_suppress=False,
    top_k=-1,
    coord_start=2,
    score_index=1,
    id_index=0,
    return_indices=True,
    invalid_to_bottom=False,
):
    """Non-maximum suppression operator for object detection.

    Parameters
    ----------
    data : relay.Expr
        3-D tensor with shape [batch_size, num_anchors, 6]
        or [batch_size, num_anchors, 5].
        The last dimension should be in format of
        [class_id, score, box_left, box_top, box_right, box_bottom]
        or [score, box_left, box_top, box_right, box_bottom]. It could
        be the second output out_tensor of get_valid_counts.

    valid_count : relay.Expr
        1-D tensor for valid number of boxes. It could be the output
        valid_count of get_valid_counts.

    indices: relay.Expr
        2-D tensor with shape [batch_size, num_anchors], represents
        the index of box in original data. It could be the third
        output out_indices of get_valid_counts. The values in the
        second dimension are like the output of arange(num_anchors)
        if get_valid_counts is not used before non_max_suppression.

    max_output_size : int or relay.Expr, optional
        Max number of output valid boxes for each instance.
        Return all valid boxes if the value of max_output_size is less than 0.

    iou_threshold : float or relay.Expr, optional
        Non-maximum suppression threshold.

    force_suppress : bool, optional
        Suppress all detections regardless of class_id.

    top_k : int, optional
        Keep maximum top k detections before nms, -1 for no limit.

    coord_start : int, optional
        The starting index of the consecutive 4 coordinates.

    score_index : int, optional
        Index of the scores/confidence of boxes.

    id_index : int, optional
        index of the class categories, -1 to disable.

    return_indices : bool, optional
        Whether to return box indices in input data.

    invalid_to_bottom : bool, optional
        Whether to move all valid bounding boxes to the top.

    Returns
    -------
    out : relay.Expr or relay.Tuple
        return relay.Expr if return_indices is disabled, a 3-D tensor
        with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5].
        If return_indices is True, return relay.Tuple of two 2-D tensors, with
        shape [batch_size, num_anchors] and [batch_size, num_valid_anchors] respectively.
    """
    if not isinstance(max_output_size, expr.Expr):
        max_output_size = expr.const(max_output_size, "int32")
    if not isinstance(iou_threshold, expr.Expr):
        iou_threshold = expr.const(iou_threshold, "float32")
    out = _make.non_max_suppression(
        data,
        valid_count,
        indices,
        max_output_size,
        iou_threshold,
        force_suppress,
        top_k,
        coord_start,
        score_index,
        id_index,
        return_indices,
        invalid_to_bottom,
    )
    if return_indices:
        return expr.TupleWrapper(out, 2)
    return out
Exemple #6
0
 def hardsigmoid(x):
     dtype = "float32"
     return relu6(x + _expr.const(3.0, dtype=dtype)) / _expr.const(
         6.0, dtype=dtype)