def test_per_channel_qtensor_creation(self): numel = 10 ch_axis = 0 scales = torch.rand(numel) zero_points_int = torch.randint(0, 10, size=(numel, )) zero_points_float = torch.randn(numel) for dtype, zero_points in itertools.product( [torch.qint8, torch.quint8], [zero_points_float, zero_points_int]): q = torch._empty_per_channel_affine_quantized( [numel], scales=scales, zero_points=zero_points, axis=ch_axis, dtype=dtype) # TODO(#38095): Replace assertEqualIgnoreType. See issue #38095 self.assertEqualIgnoreType(scales, q.q_per_channel_scales()) self.assertEqual(zero_points, q.q_per_channel_zero_points()) self.assertEqual(ch_axis, q.q_per_channel_axis()) # create Tensor from uint8_t Tensor, scales and zero_points for zero_points in [zero_points_float, zero_points_int]: int_tensor = torch.randint(0, 100, size=(numel, ), dtype=torch.uint8) q = torch._make_per_channel_quantized_tensor( int_tensor, scales, zero_points, ch_axis) self.assertEqual(int_tensor, q.int_repr()) # TODO(#38095): Replace assertEqualIgnoreType. See issue #38095 self.assertEqualIgnoreType(scales, q.q_per_channel_scales()) self.assertEqual(zero_points, q.q_per_channel_zero_points()) self.assertEqual(ch_axis, q.q_per_channel_axis())
def quantized_tensor_to_pytorch(tensor: torch.Tensor, scale, zp, num_bits, mode, dest_dtype, per_channel=False, channel_dim=0): """ Convert a tensor quantized with quantization parameters calculated by CACP to a PyTorch "native" quantized tensor. We refer to quantization parameters calculated using either of: * quantization.symmetric_linear_quantization_params * quantization.asymmetric_linear_quantization_params And to tensors quantized using either of: * quantization.linear_quantize * quantization.linear_quantize_clamp Args: tensor (torch.Tensor): The tensor quantized in CACP scale (torch.Tensor): Scale factor calcualted by CACP zp (torch.Tensor): Zero point calcualted by CACP num_bits (int): Number of bits used for quantization in CACP mode (quantization.LinearQuantMode): The quantization mode used in CACP dest_dtype (torch.dtype): PyTorch quantized dtype to convert to. Must be one of: torch.quint8, torch.qint8 per_channel (bool): Flag in indicating if tensor was quantized per-channel channel_dim (int): If per_channel is set, this indicates the dimension of the channel in the tensor Returns: PyTorch quantized tensor (dtype one of torch.quint8 / torch.qint8 / torch.qint32) """ assert ( tensor == tensor.int()).all(), 'Tensor does not appear to be quantized' converted_scale, converted_zp = qparams_to_pytorch(scale, zp, num_bits, mode, dest_dtype, reduce_range=False) zp_diff = -converted_zp.view(zp.shape) - zp if dest_dtype == torch.quint8: temp_dtype = torch.uint8 elif dest_dtype == torch.qint8: temp_dtype = torch.int8 else: # dest_dtype == torch.qint32: temp_dtype = torch.int32 tensor = (tensor - zp_diff).to(temp_dtype) if per_channel and scale.shape[channel_dim] > 1: return torch._make_per_channel_quantized_tensor( tensor, converted_scale, converted_zp, channel_dim) return torch._make_per_tensor_quantized_tensor(tensor, converted_scale, converted_zp)
def test_per_channel_qtensor_creation(self): numel = 10 ch_axis = 0 scales = torch.rand(numel) zero_points = torch.randint(0, 10, size=(numel,)) q = torch._empty_per_channel_affine_quantized( [numel], scales=scales, zero_points=zero_points, axis=ch_axis, dtype=torch.quint8) self.assertEqual(scales, q.q_per_channel_scales()) self.assertEqual(zero_points, q.q_per_channel_zero_points()) self.assertEqual(ch_axis, q.q_per_channel_axis()) # create Tensor from uint8_t Tensor, scales and zero_points int_tensor = torch.randint(0, 100, size=(numel,), dtype=torch.uint8) q = torch._make_per_channel_quantized_tensor(int_tensor, scales, zero_points, ch_axis) self.assertEqual(int_tensor, q.int_repr()) self.assertEqual(scales, q.q_per_channel_scales()) self.assertEqual(zero_points, q.q_per_channel_zero_points()) self.assertEqual(ch_axis, q.q_per_channel_axis())
def _clamp_weights(qweight, observer, scale, zp): if not _needs_weight_clamping(observer, qweight.dtype): return qweight observer = _get_weight_observer(observer) min_, max_ = observer.quant_min, observer.quant_max # Doing this because can't use torch.ops.quantized.clamp() with per_channel qscheme yet. qw_int_max = torch.clone(qweight.int_repr()).fill_(max_) qw_int_min = torch.clone(qweight.int_repr()).fill_(min_) qw_int = torch.minimum(torch.maximum(qweight.int_repr(), qw_int_min), qw_int_max) if observer.qscheme in [torch.per_tensor_symmetric, torch.per_tensor_affine]: qweight = torch._make_per_tensor_quantized_tensor(qw_int, scale.item(), zp.item()) elif observer.qscheme in [torch.per_channel_symmetric, torch.per_channel_affine, torch.per_channel_affine_float_qparams]: qweight = torch._make_per_channel_quantized_tensor(qw_int, scale, zp, axis=observer.ch_axis) else: raise ValueError("Unexpected qscheme " + observer.qscheme) return qweight