def test_observer_scriptable(self): obs_list = [ MinMaxObserver(), MovingAverageMinMaxObserver(), MinMaxDynamicQuantObserver() ] for obs in obs_list: scripted = torch.jit.script(obs) x = torch.rand(3, 4) obs(x) scripted(x) self.assertEqual(obs.calculate_qparams(), scripted.calculate_qparams()) buf = io.BytesIO() torch.jit.save(scripted, buf) buf.seek(0) loaded = torch.jit.load(buf) self.assertEqual(obs.calculate_qparams(), loaded.calculate_qparams()) # Check TensorListObserver from torch.quantization.observer import _MinMaxTensorListObserver obs = _MinMaxTensorListObserver() scripted = torch.jit.script(obs) x = [torch.rand(3, 4), torch.rand(4, 5)] obs(x) scripted(x) self.assertEqual(obs.calculate_qparams(), scripted.calculate_qparams())
def test_per_tensor_observers(self, qdtype, qscheme, reduce_range): # reduce_range cannot be true for symmetric quantization with uint8 if qdtype == torch.quint8 and qscheme == torch.per_tensor_symmetric: reduce_range = False ObserverList = [MinMaxObserver(dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range), MovingAverageMinMaxObserver(averaging_constant=0.5, dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range)] for myobs in ObserverList: # Calculate Qparams should return with a warning for observers with no data qparams = myobs.calculate_qparams() if type(myobs) == MinMaxObserver: x = torch.tensor([1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0]) y = torch.tensor([4.0, 5.0, 5.0, 6.0, 7.0, 8.0]) else: # Moving average of min/max for x and y matches that of # extreme values for x/y used for minmax observer x = torch.tensor([0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0]) y = torch.tensor([2.0, 5.0, 5.0, 6.0, 7.0, 10.0]) result = myobs(x) result = myobs(y) self.assertEqual(result, y) self.assertEqual(myobs.min_val, 1.0) self.assertEqual(myobs.max_val, 8.0) qparams = myobs.calculate_qparams() if reduce_range: if qscheme == torch.per_tensor_symmetric: ref_scale = 0.062745 * 255 / 127 ref_zero_point = 0 if qdtype is torch.qint8 else 128 else: ref_scale = 0.0313725 * 255 / 127 ref_zero_point = -64 if qdtype is torch.qint8 else 0 else: if qscheme == torch.per_tensor_symmetric: ref_scale = 0.062745 ref_zero_point = 0 if qdtype is torch.qint8 else 128 else: ref_scale = 0.0313725 ref_zero_point = -128 if qdtype is torch.qint8 else 0 self.assertEqual(qparams[1].item(), ref_zero_point) self.assertAlmostEqual(qparams[0].item(), ref_scale, delta=1e-5) state_dict = myobs.state_dict() b = io.BytesIO() torch.save(state_dict, b) b.seek(0) loaded_dict = torch.load(b) for key in state_dict: self.assertEqual(state_dict[key], loaded_dict[key]) loaded_obs = MinMaxObserver(dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range) loaded_obs.load_state_dict(loaded_dict) loaded_qparams = loaded_obs.calculate_qparams() self.assertEqual(myobs.min_val, loaded_obs.min_val) self.assertEqual(myobs.max_val, loaded_obs.max_val) self.assertEqual(myobs.calculate_qparams(), loaded_obs.calculate_qparams())
def test_observer_scriptable(self): obs_list = [MinMaxObserver(), MovingAverageMinMaxObserver(), MinMaxDynamicQuantObserver()] for obs in obs_list: scripted = torch.jit.script(obs) x = torch.rand(3, 4) obs(x) scripted(x) self.assertEqual(obs.calculate_qparams(), scripted.calculate_qparams()) buf = io.BytesIO() torch.jit.save(scripted, buf) buf.seek(0) loaded = torch.jit.load(buf) self.assertEqual(obs.calculate_qparams(), loaded.calculate_qparams())
def test_observer_qparams_respects_device_affinity(self): """ Ensure that the scale and zero_point returned by the observer are on the same device as the input tensor. """ observerList = [MinMaxObserver(), MovingAverageMinMaxObserver(), PerChannelMinMaxObserver(), MovingAveragePerChannelMinMaxObserver()] for obs in observerList: device = torch.device('cuda:1') x = torch.randn(1, 2, device=device) obs.to(device) result = obs(x) scale, zero_point = obs.calculate_qparams() self.assertEqual(x.device, scale.device) self.assertEqual(x.device, zero_point.device)
MinMaxObserver, MovingAverageMinMaxObserver, QConfig, float_qparams_weight_only_qconfig, get_default_qat_qconfig, get_default_qconfig, ) # TensorFlow Lite Quantization Specs # https://www.tensorflow.org/lite/performance/quantization_spec?hl=en # For activations: int8 asymmetric per-tensor [-128, 127] range # For weights: int8 symmetric per-tensor [-127, 127] range _TFLITE_QCONFIG = QConfig( activation=MovingAverageMinMaxObserver.with_args( dtype=torch.qint8, quant_min=-128, quant_max=127, qscheme=torch.per_tensor_affine, ), weight=MinMaxObserver.with_args(dtype=torch.qint8, quant_min=-127, quant_max=127, qscheme=torch.per_tensor_symmetric), ) _TFLITE_QAT_QCONFIG = QConfig( activation=FakeQuantize.with_args( observer=MovingAverageMinMaxObserver, dtype=torch.qint8, quant_min=-128, quant_max=127, qscheme=torch.per_tensor_affine, ),