def from_observed(cls, other): converted = torch.quantization.convert(other, mapping=None, inplace=False, remove_qconfig=True, convert_custom_config_dict=None) # Remove the parameters for the bias_k and bias_v to quantize them # TODO: This is a potential source of accuracy drop. # quantized cat takes the scale and zp of the first # element, which might lose the precision in the bias_k # and the bias_v (which are cat'ed with k/v being first). if converted.bias_k is not None: bias_k = converted._parameters.pop('bias_k') sc, zp = torch._choose_qparams_per_tensor(bias_k, reduce_range=False) bias_k = torch.quantize_per_tensor(bias_k, sc, zp, torch.quint8) setattr(converted, 'bias_k', bias_k) # noqa: B010 if converted.bias_v is not None: bias_v = converted._parameters.pop('bias_v') sc, zp = torch._choose_qparams_per_tensor(bias_k, reduce_range=False) bias_v = torch.quantize_per_tensor(bias_v, sc, zp, torch.quint8) setattr(converted, 'bias_v', bias_v) # noqa: B010 return converted
def test_choose_qparams(self, X, reduce_range): X, (scale, zero_point, torch_type) = X X = torch.from_numpy(X) X_scale, X_zp = _calculate_dynamic_qparams(X, torch.quint8, reduce_range=reduce_range) qparams = torch._choose_qparams_per_tensor(X, reduce_range) np.testing.assert_array_almost_equal(X_scale, qparams[0], decimal=3) self.assertEqual(X_zp, qparams[1])
def test_per_tensor_dynamic_quant_observers(self, X, reduce_range): X, (scale, zero_point, torch_type) = X x = torch.from_numpy(X) obs = MinMaxDynamicQuantObserver(dtype=torch.quint8, reduce_range=reduce_range) result = obs(x) qparams = obs.calculate_qparams() ref = torch._choose_qparams_per_tensor(x, reduce_range) self.assertEqual(ref[0], qparams[0]) self.assertEqual(ref[1], qparams[1])