def test_minmax_observer(self, qdtype, qscheme, reduce_range):
     # reduce_range cannot be true for symmetric quantization with uint8
     if qdtype == torch.quint8 and qscheme == torch.per_tensor_symmetric:
         reduce_range = False
     myobs = MinMaxObserver(dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range)
     x = torch.tensor([1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0])
     y = torch.tensor([4.0, 5.0, 5.0, 6.0, 7.0, 8.0])
     result = myobs(x)
     result = myobs(y)
     self.assertEqual(result, y)
     self.assertEqual(myobs.min_val, 1.0)
     self.assertEqual(myobs.max_val, 8.0)
     qparams = myobs.calculate_qparams()
     if reduce_range:
         if qscheme == torch.per_tensor_symmetric:
             ref_scale = 0.062745 * 255 / 127
             ref_zero_point = 0 if qdtype is torch.qint8 else 128
         else:
             ref_scale = 0.0313725 * 255 / 127
             ref_zero_point = -64 if qdtype is torch.qint8 else 0
     else:
         if qscheme == torch.per_tensor_symmetric:
             ref_scale = 0.062745
             ref_zero_point = 0 if qdtype is torch.qint8 else 128
         else:
             ref_scale = 0.0313725
             ref_zero_point = -128 if qdtype is torch.qint8 else 0
     self.assertEqual(qparams[1].item(), ref_zero_point)
     self.assertAlmostEqual(qparams[0].item(), ref_scale, delta=1e-5)
    def test_observer_scriptable(self):
        obs_list = [
            MinMaxObserver(),
            MovingAverageMinMaxObserver(),
            MinMaxDynamicQuantObserver()
        ]
        for obs in obs_list:
            scripted = torch.jit.script(obs)

            x = torch.rand(3, 4)
            obs(x)
            scripted(x)
            self.assertEqual(obs.calculate_qparams(),
                             scripted.calculate_qparams())

            buf = io.BytesIO()
            torch.jit.save(scripted, buf)
            buf.seek(0)
            loaded = torch.jit.load(buf)
            self.assertEqual(obs.calculate_qparams(),
                             loaded.calculate_qparams())

        # Check TensorListObserver
        from torch.quantization.observer import _MinMaxTensorListObserver
        obs = _MinMaxTensorListObserver()
        scripted = torch.jit.script(obs)
        x = [torch.rand(3, 4), torch.rand(4, 5)]
        obs(x)
        scripted(x)
        self.assertEqual(obs.calculate_qparams(), scripted.calculate_qparams())
 def test_minmax_observer(self, qdtype, qscheme):
     myobs = MinMaxObserver(dtype=qdtype, qscheme=qscheme)
     x = torch.tensor([1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0])
     y = torch.tensor([4.0, 5.0, 5.0, 6.0, 7.0, 8.0])
     result = myobs(x)
     result = myobs(y)
     self.assertEqual(result, y)
     self.assertEqual(myobs.min_val, 1.0)
     self.assertEqual(myobs.max_val, 8.0)
     qparams = myobs.calculate_qparams()
     if qscheme == torch.per_tensor_symmetric:
         ref_scale = 0.062745
         ref_zero_point = 0 if qdtype is torch.qint8 else 128
     else:
         ref_scale = 0.0313725
         ref_zero_point = -128 if qdtype is torch.qint8 else 0
     self.assertEqual(qparams[1].item(), ref_zero_point)
     self.assertAlmostEqual(qparams[0].item(), ref_scale, delta=1e-5)
Exemple #4
0
    def test_per_tensor_observers(self, qdtype, qscheme, reduce_range):
        # reduce_range cannot be true for symmetric quantization with uint8
        if qdtype == torch.quint8 and qscheme == torch.per_tensor_symmetric:
            reduce_range = False
        ObserverList = [MinMaxObserver(dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range),
                        MovingAverageMinMaxObserver(averaging_constant=0.5,
                                                    dtype=qdtype,
                                                    qscheme=qscheme,
                                                    reduce_range=reduce_range)]
        for myobs in ObserverList:
            # Calculate Qparams should return with a warning for observers with no data
            qparams = myobs.calculate_qparams()
            if type(myobs) == MinMaxObserver:
                x = torch.tensor([1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0])
                y = torch.tensor([4.0, 5.0, 5.0, 6.0, 7.0, 8.0])
            else:
                # Moving average of min/max for x and y matches that of
                # extreme values for x/y used for minmax observer
                x = torch.tensor([0.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0])
                y = torch.tensor([2.0, 5.0, 5.0, 6.0, 7.0, 10.0])

            result = myobs(x)
            result = myobs(y)
            self.assertEqual(result, y)
            self.assertEqual(myobs.min_val, 1.0)
            self.assertEqual(myobs.max_val, 8.0)
            qparams = myobs.calculate_qparams()
            if reduce_range:
                if qscheme == torch.per_tensor_symmetric:
                    ref_scale = 0.062745 * 255 / 127
                    ref_zero_point = 0 if qdtype is torch.qint8 else 128
                else:
                    ref_scale = 0.0313725 * 255 / 127
                    ref_zero_point = -64 if qdtype is torch.qint8 else 0
            else:
                if qscheme == torch.per_tensor_symmetric:
                    ref_scale = 0.062745
                    ref_zero_point = 0 if qdtype is torch.qint8 else 128
                else:
                    ref_scale = 0.0313725
                    ref_zero_point = -128 if qdtype is torch.qint8 else 0
            self.assertEqual(qparams[1].item(), ref_zero_point)
            self.assertAlmostEqual(qparams[0].item(), ref_scale, delta=1e-5)
            state_dict = myobs.state_dict()
            b = io.BytesIO()
            torch.save(state_dict, b)
            b.seek(0)
            loaded_dict = torch.load(b)
            for key in state_dict:
                self.assertEqual(state_dict[key], loaded_dict[key])
            loaded_obs = MinMaxObserver(dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range)
            loaded_obs.load_state_dict(loaded_dict)
            loaded_qparams = loaded_obs.calculate_qparams()
            self.assertEqual(myobs.min_val, loaded_obs.min_val)
            self.assertEqual(myobs.max_val, loaded_obs.max_val)
            self.assertEqual(myobs.calculate_qparams(), loaded_obs.calculate_qparams())
Exemple #5
0
 def test_tensor_list_observer(self):
     from torch.quantization.observer import _MinMaxTensorListObserver
     x = [torch.tensor([1.0, 2.5, 3.5]),
          torch.tensor([2.0, 4.5, 3.5]),
          torch.tensor([4.0, 2.5, 3.5]), ]
     obs = _MinMaxTensorListObserver()
     obs(x)
     qparams = obs.calculate_qparams()
     ref_min_val = []
     ref_max_val = []
     ref_qparams = []
     for i in x:
         obs_ref = MinMaxObserver()
         obs_ref(i)
         ref_min_val.append(obs_ref.min_val)
         ref_max_val.append(obs_ref.max_val)
         ref_qparams.append(obs_ref.calculate_qparams())
     for i in range(len(x)):
         self.assertEqual(obs.min_val[i], ref_min_val[i])
         self.assertEqual(obs.max_val[i], ref_max_val[i])
         self.assertEqual(qparams[0][i], ref_qparams[i][0])
         self.assertEqual(qparams[1][i], ref_qparams[i][1])
Exemple #6
0
    def test_observer_scriptable(self):
        obs_list = [MinMaxObserver(), MovingAverageMinMaxObserver(), MinMaxDynamicQuantObserver()]
        for obs in obs_list:
            scripted = torch.jit.script(obs)

            x = torch.rand(3, 4)
            obs(x)
            scripted(x)
            self.assertEqual(obs.calculate_qparams(), scripted.calculate_qparams())

            buf = io.BytesIO()
            torch.jit.save(scripted, buf)
            buf.seek(0)
            loaded = torch.jit.load(buf)
            self.assertEqual(obs.calculate_qparams(), loaded.calculate_qparams())
    def test_observer_qparams_respects_device_affinity(self):
        """
        Ensure that the scale and zero_point returned by the observer
        are on the same device as the input tensor.
        """
        observerList = [MinMaxObserver(),
                        MovingAverageMinMaxObserver(),
                        PerChannelMinMaxObserver(),
                        MovingAveragePerChannelMinMaxObserver()]
        for obs in observerList:
            device = torch.device('cuda:1')
            x = torch.randn(1, 2, device=device)
            obs.to(device)
            result = obs(x)
            scale, zero_point = obs.calculate_qparams()

            self.assertEqual(x.device, scale.device)
            self.assertEqual(x.device, zero_point.device)
    def test_minmax_observer(self, qdtype, qscheme, reduce_range):
        # reduce_range cannot be true for symmetric quantization with uint8
        if qdtype == torch.quint8 and qscheme == torch.per_tensor_symmetric:
            reduce_range = False
        myobs = MinMaxObserver(dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range)
        x = torch.tensor([1.0, 2.0, 2.0, 3.0, 4.0, 5.0, 6.0])
        y = torch.tensor([4.0, 5.0, 5.0, 6.0, 7.0, 8.0])
        result = myobs(x)
        result = myobs(y)
        self.assertEqual(result, y)
        self.assertEqual(myobs.min_val, 1.0)
        self.assertEqual(myobs.max_val, 8.0)
        qparams = myobs.calculate_qparams()
        if reduce_range:
            if qscheme == torch.per_tensor_symmetric:
                ref_scale = 0.062745 * 255 / 127
                ref_zero_point = 0 if qdtype is torch.qint8 else 128
            else:
                ref_scale = 0.0313725 * 255 / 127
                ref_zero_point = -64 if qdtype is torch.qint8 else 0
        else:
            if qscheme == torch.per_tensor_symmetric:
                ref_scale = 0.062745
                ref_zero_point = 0 if qdtype is torch.qint8 else 128
            else:
                ref_scale = 0.0313725
                ref_zero_point = -128 if qdtype is torch.qint8 else 0
        self.assertEqual(qparams[1].item(), ref_zero_point)
        self.assertAlmostEqual(qparams[0].item(), ref_scale, delta=1e-5)

        # Test for serializability
        state_dict = myobs.state_dict()
        b = io.BytesIO()
        torch.save(state_dict, b)
        b.seek(0)
        loaded_dict = torch.load(b)
        for key in state_dict:
            self.assertEqual(state_dict[key], loaded_dict[key])
        loaded_obs = MinMaxObserver(dtype=qdtype, qscheme=qscheme, reduce_range=reduce_range)
        loaded_obs.load_state_dict(loaded_dict)
        loaded_qparams = loaded_obs.calculate_qparams()
        self.assertEqual(myobs.min_val, loaded_obs.min_val)
        self.assertEqual(myobs.max_val, loaded_obs.max_val)
        self.assertEqual(myobs.calculate_qparams(), loaded_obs.calculate_qparams())
 def test_per_tensor_observer(self):
     obs = MinMaxObserver()
     self._test_obs(obs, input_size=[5, 5], generate=False)
Exemple #10
0
    get_default_qconfig,
)

# TensorFlow Lite Quantization Specs
# https://www.tensorflow.org/lite/performance/quantization_spec?hl=en
# For activations: int8 asymmetric per-tensor [-128, 127] range
# For weights: int8 symmetric per-tensor [-127, 127] range
_TFLITE_QCONFIG = QConfig(
    activation=MovingAverageMinMaxObserver.with_args(
        dtype=torch.qint8,
        quant_min=-128,
        quant_max=127,
        qscheme=torch.per_tensor_affine,
    ),
    weight=MinMaxObserver.with_args(dtype=torch.qint8,
                                    quant_min=-127,
                                    quant_max=127,
                                    qscheme=torch.per_tensor_symmetric),
)
_TFLITE_QAT_QCONFIG = QConfig(
    activation=FakeQuantize.with_args(
        observer=MovingAverageMinMaxObserver,
        dtype=torch.qint8,
        quant_min=-128,
        quant_max=127,
        qscheme=torch.per_tensor_affine,
    ),
    weight=FakeQuantize.with_args(observer=MinMaxObserver,
                                  dtype=torch.qint8,
                                  quant_min=-127,
                                  quant_max=127,
                                  qscheme=torch.per_tensor_symmetric),