def _impl(inputs, _): assert len(inputs) == 3, "Input quant params not found in op inputs" inp_scale = _expr.const(inputs[1]) inp_zero_point = _expr.const(inputs[2]) return relay.qnn.op.dequantize(inputs[0], inp_scale, inp_zero_point)
def _impl(inputs, _): return relay.qnn.op.quantize(inputs[0], _expr.const(inputs[1]), _expr.const(inputs[2]), out_dtype="uint8", axis=1)
def add_quant_params_to_outputs(outputs, packed_param_map, quant_params, input_scales_for_bias, keep_quantized_weight=False): """ Add quant params to outputs so that they can be referenced by other ops later. Weights are quantized here. """ for node_name, packed_param_name in packed_param_map.items(): qparam = quant_params[packed_param_name] weight_scale = _get_numpy(qparam.scale) param_prefix = packed_param_name[:-len("._packed_params")] if keep_quantized_weight: qparam.weight_var = _expr.var(param_prefix + "_weight", shape=qparam.weight.shape, dtype="int8") qparam.weight = quantize_numpy(qparam.weight, weight_scale, _get_numpy(qparam.zero_point), np.int8) qweight = qparam.weight_var else: qparam.weight_var = _expr.var(param_prefix + "_weight", shape=qparam.weight.shape, dtype="float32") qweight = relay.qnn.op.quantize(qparam.weight_var, qparam.scale, qparam.zero_point, out_dtype="int8", axis=0) if qparam.bias is not None: float_bias_var = _expr.var(param_prefix + "_bias", shape=qparam.bias.shape, dtype="float32") if node_name not in input_scales_for_bias: # This case is for dynamic quantization, where the input activation scale is # unknown until runtime. qparam.bias_var = float_bias_var qbias = qparam.bias_var elif keep_quantized_weight: qparam.bias_var = _expr.var(param_prefix + "_bias", shape=qparam.bias.shape, dtype="int32") qparam.bias = quantize_numpy( qparam.bias, input_scales_for_bias[node_name] * weight_scale, 0, np.int32) qbias = qparam.bias_var else: qparam.bias_var = float_bias_var qbias = relay.qnn.op.quantize( qparam.bias_var, _expr.const(input_scales_for_bias[node_name] * weight_scale), _expr.const(0, "int32"), out_dtype="int32", axis=0, ) else: qbias = None quant_params[packed_param_name] = qparam params = [qweight, qparam.scale, qparam.zero_point, qbias] if isinstance(quant_params[packed_param_name], ConvPackedParam): params += [ qparam.stride, qparam.padding, qparam.dilation, qparam.groups, qparam.output_padding, ] outputs[node_name] = params
def _impl(inputs, _): # refer to src/ATen/native/quantized/cpu/qconv.cpp # inputs[0]: input tensor # inputs[1]: (weight, scale, zero_point, bias) # inputs[2-5]: stride, padding, dilation, groups # inputs[6]: output_scale # inputs[7]: output_zero_point # inputs[8]: input_scale (added manually by frontend) # inputs[9]: input_zero_point (added manually by frontend) weight = inputs[1][0] weight_scale = inputs[1][1] weight_zero_point = inputs[1][2] output_scale = _expr.const(inputs[6]) output_zero_point = _expr.const(inputs[7]) assert len(inputs) == 10, "Input quant params not found in op inputs" # These are manually added by add_input_quant_params_to_op_inputs above # In torch, they are retrieved from QTensor data structure at runtime input_scale = _expr.const(inputs[8]) input_zero_point = _expr.const(inputs[9]) strides, padding, dilation = inputs[2], inputs[3], inputs[4] strides = infer_shape(inputs[2]) padding = infer_shape(inputs[3]) dilation = infer_shape(inputs[4]) groups = inputs[5] weight_shape = infer_shape(weight) kernel_size = (weight_shape[2], weight_shape[3]) out_channels = weight_shape[0] if padding[0] != 0 or padding[1] != 0: pad_val = _get_scalar(input_zero_point) inp = _op.nn.pad(inputs[0], pad_width=((0, 0), (0, 0), (padding[0], padding[0]), (padding[1], padding[1])), pad_value=float(pad_val)) else: inp = inputs[0] # padding is (0, 0) because we did explicit pad op with # pad value being zero point above conv_out = relay.qnn.op.conv2d(inp, weight, input_zero_point, weight_zero_point, input_scale, weight_scale, kernel_size=kernel_size, dilation=dilation, strides=strides, padding=(0, 0), groups=groups, channels=out_channels) bias_var = inputs[1][3] return _do_bias_and_requantize(conv_out, bias_var, input_scale, weight_scale, output_scale, output_zero_point, with_relu)
def non_max_suppression( data, valid_count, indices, max_output_size=-1, iou_threshold=0.5, force_suppress=False, top_k=-1, coord_start=2, score_index=1, id_index=0, return_indices=True, invalid_to_bottom=False, ): """Non-maximum suppression operator for object detection. Parameters ---------- data : relay.Expr 3-D tensor with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. The last dimension should be in format of [class_id, score, box_left, box_top, box_right, box_bottom] or [score, box_left, box_top, box_right, box_bottom]. It could be the second output out_tensor of get_valid_counts. valid_count : relay.Expr 1-D tensor for valid number of boxes. It could be the output valid_count of get_valid_counts. indices: relay.Expr 2-D tensor with shape [batch_size, num_anchors], represents the index of box in original data. It could be the third output out_indices of get_valid_counts. The values in the second dimension are like the output of arange(num_anchors) if get_valid_counts is not used before non_max_suppression. max_output_size : int or relay.Expr, optional Max number of output valid boxes for each instance. Return all valid boxes if the value of max_output_size is less than 0. iou_threshold : float or relay.Expr, optional Non-maximum suppression threshold. force_suppress : bool, optional Suppress all detections regardless of class_id. top_k : int, optional Keep maximum top k detections before nms, -1 for no limit. coord_start : int, optional The starting index of the consecutive 4 coordinates. score_index : int, optional Index of the scores/confidence of boxes. id_index : int, optional index of the class categories, -1 to disable. return_indices : bool, optional Whether to return box indices in input data. invalid_to_bottom : bool, optional Whether to move all valid bounding boxes to the top. Returns ------- out : relay.Expr or relay.Tuple return relay.Expr if return_indices is disabled, a 3-D tensor with shape [batch_size, num_anchors, 6] or [batch_size, num_anchors, 5]. If return_indices is True, return relay.Tuple of two 2-D tensors, with shape [batch_size, num_anchors] and [batch_size, num_valid_anchors] respectively. """ if not isinstance(max_output_size, expr.Expr): max_output_size = expr.const(max_output_size, "int32") if not isinstance(iou_threshold, expr.Expr): iou_threshold = expr.const(iou_threshold, "float32") out = _make.non_max_suppression( data, valid_count, indices, max_output_size, iou_threshold, force_suppress, top_k, coord_start, score_index, id_index, return_indices, invalid_to_bottom, ) if return_indices: return expr.TupleWrapper(out, 2) return out
def hardsigmoid(x): dtype = "float32" return relu6(x + _expr.const(3.0, dtype=dtype)) / _expr.const( 6.0, dtype=dtype)