def test_minmax_observer(self, qdtype, qscheme, reduce_range): # reduce_range cannot be true for symmetric quantization with uint8 if qdtype == torch.quint8 and qscheme == torch.per_tensor_symmetric: reduce_range = False myobs = MinMaxObserver(dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range) x = torch.tensor([1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0]) y = torch.tensor([4.0, 5.0, 5.0, 6.0, 7.0, 8.0]) result = myobs(x) result = myobs(y) self.assertEqual(result, y) self.assertEqual(myobs.min_val, 1.0) self.assertEqual(myobs.max_val, 8.0) qparams = myobs.calculate_qparams() if reduce_range: if qscheme == torch.per_tensor_symmetric: ref_scale = 0.062745 * 255 / 127 ref_zero_point = 0 if qdtype is torch.qint8 else 128 else: ref_scale = 0.0313725 * 255 / 127 ref_zero_point = -64 if qdtype is torch.qint8 else 0 else: if qscheme == torch.per_tensor_symmetric: ref_scale = 0.062745 ref_zero_point = 0 if qdtype is torch.qint8 else 128 else: ref_scale = 0.0313725 ref_zero_point = -128 if qdtype is torch.qint8 else 0 self.assertEqual(qparams[1].item(), ref_zero_point) self.assertAlmostEqual(qparams[0].item(), ref_scale, delta=1e-5)
def test_observer_scriptable(self): obs_list = [ MinMaxObserver(), MovingAverageMinMaxObserver(), MinMaxDynamicQuantObserver() ] for obs in obs_list: scripted = torch.jit.script(obs) x = torch.rand(3, 4) obs(x) scripted(x) self.assertEqual(obs.calculate_qparams(), scripted.calculate_qparams()) buf = io.BytesIO() torch.jit.save(scripted, buf) buf.seek(0) loaded = torch.jit.load(buf) self.assertEqual(obs.calculate_qparams(), loaded.calculate_qparams()) # Check TensorListObserver from torch.quantization.observer import _MinMaxTensorListObserver obs = _MinMaxTensorListObserver() scripted = torch.jit.script(obs) x = [torch.rand(3, 4), torch.rand(4, 5)] obs(x) scripted(x) self.assertEqual(obs.calculate_qparams(), scripted.calculate_qparams())
def test_minmax_observer(self, qdtype, qscheme): myobs = MinMaxObserver(dtype=qdtype, qscheme=qscheme) x = torch.tensor([1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0]) y = torch.tensor([4.0, 5.0, 5.0, 6.0, 7.0, 8.0]) result = myobs(x) result = myobs(y) self.assertEqual(result, y) self.assertEqual(myobs.min_val, 1.0) self.assertEqual(myobs.max_val, 8.0) qparams = myobs.calculate_qparams() if qscheme == torch.per_tensor_symmetric: ref_scale = 0.062745 ref_zero_point = 0 if qdtype is torch.qint8 else 128 else: ref_scale = 0.0313725 ref_zero_point = -128 if qdtype is torch.qint8 else 0 self.assertEqual(qparams[1].item(), ref_zero_point) self.assertAlmostEqual(qparams[0].item(), ref_scale, delta=1e-5)
def test_per_tensor_observers(self, qdtype, qscheme, reduce_range): # reduce_range cannot be true for symmetric quantization with uint8 if qdtype == torch.quint8 and qscheme == torch.per_tensor_symmetric: reduce_range = False ObserverList = [MinMaxObserver(dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range), MovingAverageMinMaxObserver(averaging_constant=0.5, dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range)] for myobs in ObserverList: # Calculate Qparams should return with a warning for observers with no data qparams = myobs.calculate_qparams() if type(myobs) == MinMaxObserver: x = torch.tensor([1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0]) y = torch.tensor([4.0, 5.0, 5.0, 6.0, 7.0, 8.0]) else: # Moving average of min/max for x and y matches that of # extreme values for x/y used for minmax observer x = torch.tensor([0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0]) y = torch.tensor([2.0, 5.0, 5.0, 6.0, 7.0, 10.0]) result = myobs(x) result = myobs(y) self.assertEqual(result, y) self.assertEqual(myobs.min_val, 1.0) self.assertEqual(myobs.max_val, 8.0) qparams = myobs.calculate_qparams() if reduce_range: if qscheme == torch.per_tensor_symmetric: ref_scale = 0.062745 * 255 / 127 ref_zero_point = 0 if qdtype is torch.qint8 else 128 else: ref_scale = 0.0313725 * 255 / 127 ref_zero_point = -64 if qdtype is torch.qint8 else 0 else: if qscheme == torch.per_tensor_symmetric: ref_scale = 0.062745 ref_zero_point = 0 if qdtype is torch.qint8 else 128 else: ref_scale = 0.0313725 ref_zero_point = -128 if qdtype is torch.qint8 else 0 self.assertEqual(qparams[1].item(), ref_zero_point) self.assertAlmostEqual(qparams[0].item(), ref_scale, delta=1e-5) state_dict = myobs.state_dict() b = io.BytesIO() torch.save(state_dict, b) b.seek(0) loaded_dict = torch.load(b) for key in state_dict: self.assertEqual(state_dict[key], loaded_dict[key]) loaded_obs = MinMaxObserver(dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range) loaded_obs.load_state_dict(loaded_dict) loaded_qparams = loaded_obs.calculate_qparams() self.assertEqual(myobs.min_val, loaded_obs.min_val) self.assertEqual(myobs.max_val, loaded_obs.max_val) self.assertEqual(myobs.calculate_qparams(), loaded_obs.calculate_qparams())
def test_tensor_list_observer(self): from torch.quantization.observer import _MinMaxTensorListObserver x = [torch.tensor([1.0, 2.5, 3.5]), torch.tensor([2.0, 4.5, 3.5]), torch.tensor([4.0, 2.5, 3.5]), ] obs = _MinMaxTensorListObserver() obs(x) qparams = obs.calculate_qparams() ref_min_val = [] ref_max_val = [] ref_qparams = [] for i in x: obs_ref = MinMaxObserver() obs_ref(i) ref_min_val.append(obs_ref.min_val) ref_max_val.append(obs_ref.max_val) ref_qparams.append(obs_ref.calculate_qparams()) for i in range(len(x)): self.assertEqual(obs.min_val[i], ref_min_val[i]) self.assertEqual(obs.max_val[i], ref_max_val[i]) self.assertEqual(qparams[0][i], ref_qparams[i][0]) self.assertEqual(qparams[1][i], ref_qparams[i][1])
def test_observer_scriptable(self): obs_list = [MinMaxObserver(), MovingAverageMinMaxObserver(), MinMaxDynamicQuantObserver()] for obs in obs_list: scripted = torch.jit.script(obs) x = torch.rand(3, 4) obs(x) scripted(x) self.assertEqual(obs.calculate_qparams(), scripted.calculate_qparams()) buf = io.BytesIO() torch.jit.save(scripted, buf) buf.seek(0) loaded = torch.jit.load(buf) self.assertEqual(obs.calculate_qparams(), loaded.calculate_qparams())
def test_observer_qparams_respects_device_affinity(self): """ Ensure that the scale and zero_point returned by the observer are on the same device as the input tensor. """ observerList = [MinMaxObserver(), MovingAverageMinMaxObserver(), PerChannelMinMaxObserver(), MovingAveragePerChannelMinMaxObserver()] for obs in observerList: device = torch.device('cuda:1') x = torch.randn(1, 2, device=device) obs.to(device) result = obs(x) scale, zero_point = obs.calculate_qparams() self.assertEqual(x.device, scale.device) self.assertEqual(x.device, zero_point.device)
def test_minmax_observer(self, qdtype, qscheme, reduce_range): # reduce_range cannot be true for symmetric quantization with uint8 if qdtype == torch.quint8 and qscheme == torch.per_tensor_symmetric: reduce_range = False myobs = MinMaxObserver(dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range) x = torch.tensor([1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0]) y = torch.tensor([4.0, 5.0, 5.0, 6.0, 7.0, 8.0]) result = myobs(x) result = myobs(y) self.assertEqual(result, y) self.assertEqual(myobs.min_val, 1.0) self.assertEqual(myobs.max_val, 8.0) qparams = myobs.calculate_qparams() if reduce_range: if qscheme == torch.per_tensor_symmetric: ref_scale = 0.062745 * 255 / 127 ref_zero_point = 0 if qdtype is torch.qint8 else 128 else: ref_scale = 0.0313725 * 255 / 127 ref_zero_point = -64 if qdtype is torch.qint8 else 0 else: if qscheme == torch.per_tensor_symmetric: ref_scale = 0.062745 ref_zero_point = 0 if qdtype is torch.qint8 else 128 else: ref_scale = 0.0313725 ref_zero_point = -128 if qdtype is torch.qint8 else 0 self.assertEqual(qparams[1].item(), ref_zero_point) self.assertAlmostEqual(qparams[0].item(), ref_scale, delta=1e-5) # Test for serializability state_dict = myobs.state_dict() b = io.BytesIO() torch.save(state_dict, b) b.seek(0) loaded_dict = torch.load(b) for key in state_dict: self.assertEqual(state_dict[key], loaded_dict[key]) loaded_obs = MinMaxObserver(dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range) loaded_obs.load_state_dict(loaded_dict) loaded_qparams = loaded_obs.calculate_qparams() self.assertEqual(myobs.min_val, loaded_obs.min_val) self.assertEqual(myobs.max_val, loaded_obs.max_val) self.assertEqual(myobs.calculate_qparams(), loaded_obs.calculate_qparams())
def test_per_tensor_observer(self): obs = MinMaxObserver() self._test_obs(obs, input_size=[5, 5], generate=False)
get_default_qconfig, ) # TensorFlow Lite Quantization Specs # https://www.tensorflow.org/lite/performance/quantization_spec?hl=en # For activations: int8 asymmetric per-tensor [-128, 127] range # For weights: int8 symmetric per-tensor [-127, 127] range _TFLITE_QCONFIG = QConfig( activation=MovingAverageMinMaxObserver.with_args( dtype=torch.qint8, quant_min=-128, quant_max=127, qscheme=torch.per_tensor_affine, ), weight=MinMaxObserver.with_args(dtype=torch.qint8, quant_min=-127, quant_max=127, qscheme=torch.per_tensor_symmetric), ) _TFLITE_QAT_QCONFIG = QConfig( activation=FakeQuantize.with_args( observer=MovingAverageMinMaxObserver, dtype=torch.qint8, quant_min=-128, quant_max=127, qscheme=torch.per_tensor_affine, ), weight=FakeQuantize.with_args(observer=MinMaxObserver, dtype=torch.qint8, quant_min=-127, quant_max=127, qscheme=torch.per_tensor_symmetric),