Exemple #1
0
    def test_per_channel_qtensor_creation(self):
        numel = 10
        ch_axis = 0
        scales = torch.rand(numel)
        zero_points_int = torch.randint(0, 10, size=(numel, ))
        zero_points_float = torch.randn(numel)
        for dtype, zero_points in itertools.product(
            [torch.qint8, torch.quint8], [zero_points_float, zero_points_int]):
            q = torch._empty_per_channel_affine_quantized(
                [numel],
                scales=scales,
                zero_points=zero_points,
                axis=ch_axis,
                dtype=dtype)
            # TODO(#38095): Replace assertEqualIgnoreType. See issue #38095
            self.assertEqualIgnoreType(scales, q.q_per_channel_scales())
            self.assertEqual(zero_points, q.q_per_channel_zero_points())
            self.assertEqual(ch_axis, q.q_per_channel_axis())

        # create Tensor from uint8_t Tensor, scales and zero_points
        for zero_points in [zero_points_float, zero_points_int]:
            int_tensor = torch.randint(0,
                                       100,
                                       size=(numel, ),
                                       dtype=torch.uint8)
            q = torch._make_per_channel_quantized_tensor(
                int_tensor, scales, zero_points, ch_axis)
            self.assertEqual(int_tensor, q.int_repr())
            # TODO(#38095): Replace assertEqualIgnoreType. See issue #38095
            self.assertEqualIgnoreType(scales, q.q_per_channel_scales())
            self.assertEqual(zero_points, q.q_per_channel_zero_points())
            self.assertEqual(ch_axis, q.q_per_channel_axis())
Exemple #2
0
def quantized_tensor_to_pytorch(tensor: torch.Tensor,
                                scale,
                                zp,
                                num_bits,
                                mode,
                                dest_dtype,
                                per_channel=False,
                                channel_dim=0):
    """
    Convert a tensor quantized with quantization parameters calculated by CACP to a PyTorch "native" quantized
    tensor.

    We refer to quantization parameters calculated using either of:
      * quantization.symmetric_linear_quantization_params
      * quantization.asymmetric_linear_quantization_params

    And to tensors quantized using either of:
      * quantization.linear_quantize
      * quantization.linear_quantize_clamp

    Args:
        tensor (torch.Tensor): The tensor quantized in CACP
        scale (torch.Tensor): Scale factor calcualted by CACP
        zp (torch.Tensor): Zero point calcualted by CACP
        num_bits (int): Number of bits used for quantization in CACP
        mode (quantization.LinearQuantMode): The quantization mode used in CACP
        dest_dtype (torch.dtype): PyTorch quantized dtype to convert to. Must be one of: torch.quint8, torch.qint8
        per_channel (bool): Flag in indicating if tensor was quantized per-channel
        channel_dim (int): If per_channel is set, this indicates the dimension of the channel in the tensor

    Returns:
        PyTorch quantized tensor (dtype one of torch.quint8 / torch.qint8 / torch.qint32)
    """
    assert (
        tensor == tensor.int()).all(), 'Tensor does not appear to be quantized'
    converted_scale, converted_zp = qparams_to_pytorch(scale,
                                                       zp,
                                                       num_bits,
                                                       mode,
                                                       dest_dtype,
                                                       reduce_range=False)
    zp_diff = -converted_zp.view(zp.shape) - zp

    if dest_dtype == torch.quint8:
        temp_dtype = torch.uint8
    elif dest_dtype == torch.qint8:
        temp_dtype = torch.int8
    else:  # dest_dtype == torch.qint32:
        temp_dtype = torch.int32
    tensor = (tensor - zp_diff).to(temp_dtype)
    if per_channel and scale.shape[channel_dim] > 1:
        return torch._make_per_channel_quantized_tensor(
            tensor, converted_scale, converted_zp, channel_dim)
    return torch._make_per_tensor_quantized_tensor(tensor, converted_scale,
                                                   converted_zp)
Exemple #3
0
    def test_per_channel_qtensor_creation(self):
        numel = 10
        ch_axis = 0
        scales = torch.rand(numel)
        zero_points = torch.randint(0, 10, size=(numel,))
        q = torch._empty_per_channel_affine_quantized(
            [numel], scales=scales, zero_points=zero_points, axis=ch_axis, dtype=torch.quint8)
        self.assertEqual(scales, q.q_per_channel_scales())
        self.assertEqual(zero_points, q.q_per_channel_zero_points())
        self.assertEqual(ch_axis, q.q_per_channel_axis())

        # create Tensor from uint8_t Tensor, scales and zero_points
        int_tensor = torch.randint(0, 100, size=(numel,), dtype=torch.uint8)
        q = torch._make_per_channel_quantized_tensor(int_tensor, scales, zero_points, ch_axis)
        self.assertEqual(int_tensor, q.int_repr())
        self.assertEqual(scales, q.q_per_channel_scales())
        self.assertEqual(zero_points, q.q_per_channel_zero_points())
        self.assertEqual(ch_axis, q.q_per_channel_axis())
Exemple #4
0
def _clamp_weights(qweight, observer, scale, zp):
    if not _needs_weight_clamping(observer, qweight.dtype):
        return qweight

    observer = _get_weight_observer(observer)
    min_, max_ = observer.quant_min, observer.quant_max

    # Doing this because can't use torch.ops.quantized.clamp() with per_channel qscheme yet.
    qw_int_max = torch.clone(qweight.int_repr()).fill_(max_)
    qw_int_min = torch.clone(qweight.int_repr()).fill_(min_)
    qw_int = torch.minimum(torch.maximum(qweight.int_repr(), qw_int_min), qw_int_max)

    if observer.qscheme in [torch.per_tensor_symmetric,
                            torch.per_tensor_affine]:
        qweight = torch._make_per_tensor_quantized_tensor(qw_int, scale.item(), zp.item())
    elif observer.qscheme in [torch.per_channel_symmetric,
                              torch.per_channel_affine,
                              torch.per_channel_affine_float_qparams]:
        qweight = torch._make_per_channel_quantized_tensor(qw_int, scale, zp, axis=observer.ch_axis)
    else:
        raise ValueError("Unexpected qscheme " + observer.qscheme)
    return qweight