def test_qtensor_permute(self):
        r = torch.rand(10, 30, 2, 2, dtype=torch.float) * 4 - 2
        scale = 0.02
        zero_point = 1
        qr = torch.quantize_linear(r, scale, zero_point, torch.qint8)
        qr = qr.transpose(0, 1)
        rqr = qr.dequantize()
        # compare transpose + dequantized result with orignal transposed result
        self.assertTrue(np.allclose(r.numpy().transpose([1, 0, 2, 3]), rqr.numpy(), atol=2 / scale))

        qr = torch.quantize_linear(r, scale, zero_point, torch.qint8)
        qr1 = qr.permute([1, 0, 2, 3])
        qr2 = qr.transpose(0, 1)
        # compare int representation after transformations
        self.assertEqual(qr1.int_repr(), qr2.int_repr())
        self.assertEqual(qr1.q_scale(), qr2.q_scale())
        self.assertEqual(qr1.q_zero_point(), qr2.q_zero_point())
        # compare dequantized result
        self.assertEqual(qr1.dequantize(), qr2.dequantize())
        # compare permuted + dequantized result with original transposed result
        self.assertTrue(np.allclose(qr2.dequantize().numpy(), r.numpy().transpose([1, 0, 2, 3]), atol=2 / scale))
        # make permuted result contiguous
        self.assertEqual(qr2.contiguous().int_repr(), qr2.int_repr())

        # change memory format
        qlast = qr.contiguous(memory_format=torch.channels_last)
        self.assertEqual(qr.stride(), list(reversed(sorted(qr.stride()))))
        self.assertNotEqual(qlast.stride(), list(reversed(sorted(qlast.stride()))))
        self.assertEqual(qr.int_repr(), qlast.int_repr())
        self.assertEqual(qr.q_scale(), qlast.q_scale())
        self.assertEqual(qr.q_zero_point(), qlast.q_zero_point())
        self.assertEqual(qlast.dequantize(), qr.dequantize())
Exemple #2
0
    def from_float(mod):
        r"""Create a quantized module from a float module or qparams_dict

        Args:
            mod (Module): a float module, either produced by torch.quantization
                          utilities or provided by the user
        """
        if hasattr(mod, 'weight_fake_quant'):
            # assert type(mod) == QATLinear, 'training mode nnq.Linear.from_float only works for nn.qat.Linear'
            weight_observer = mod.weight_fake_quant
        else:
            assert type(mod) == NNLinear, 'nnq.Linear.from_float only works for nn.Linear'
            assert hasattr(mod, 'qconfig'), 'Input float module must have qconfig defined'
            assert hasattr(mod, 'observer'), 'Input float module must have observer attached'
            weight_observer = mod.qconfig.weight()
            weight_observer(mod.weight)
        activation_observer = mod.observer
        act_scale, act_zp = activation_observer.calculate_qparams()
        wt_scale, wt_zp = weight_observer.calculate_qparams()
        bias_scale = (wt_scale * act_scale).float()
        qweight = torch.quantize_linear(mod.weight.float(), wt_scale, wt_zp.long().item(), torch.qint8)
        if mod.bias is not None:
            qbias = torch.quantize_linear(mod.bias.float(), bias_scale, 0, torch.qint32)
        else:
            qbias = None
        qlinear = Linear(mod.in_features, mod.out_features)
        qlinear.set_weight(qweight)
        qlinear.bias = qbias
        qlinear.scale = float(act_scale)
        qlinear.zero_point = int(act_zp)
        return qlinear
Exemple #3
0
    def from_float(mod):
        r"""Create a quantized module from a float module or qparams_dict

            Args: `mod` a float module, either produced by torch.quantization utilities
            or directly from user
        """
        assert type(
            mod) == NNLinear, 'nnq.Linear.from_float only works for nn.Linear'
        assert hasattr(
            mod, 'qconfig'), 'Input float module must have qconfig defined'
        assert hasattr(
            mod, 'observer'), 'Input float module must have observer attached'
        activation_observer = mod.observer
        act_qparams = activation_observer.calculate_qparams()
        weight_observer = mod.qconfig.weight()
        weight_observer(mod.weight)
        wt_qparams = weight_observer.calculate_qparams()
        bias_scale = (wt_qparams[0] * act_qparams[0]).float()
        qweight = torch.quantize_linear(mod.weight.float(), wt_qparams[0],
                                        wt_qparams[1].long().item(),
                                        torch.qint8)
        qbias = torch.quantize_linear(mod.bias.float(), bias_scale, 0,
                                      torch.qint32)
        qlinear = Linear(mod.in_features, mod.out_features)
        qlinear._packed_weight = torch.ops.quantized.fbgemm_linear_prepack(
            qweight)
        qlinear.bias = qbias
        qlinear.out_scale = torch.tensor([act_qparams[0]])
        qlinear.out_zero_point = torch.tensor([act_qparams[1]])
        return qlinear
Exemple #4
0
    def test_qadd_relu_same_qparams(self):
        add_relu = torch.ops.quantized.add_relu
        add = torch.ops.quantized.add

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale = 2.0
        zero_point = 127
        qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)

        # Add ReLU ground truth
        C = (qA.dequantize() + qB.dequantize()).numpy()
        qC = _quantize(C, scale, zero_point)
        qC_hat = add(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized addition failed.")

        # Add + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale, zero_point)
        qCrelu_hat = add_relu(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized addition with ReLU failed.")
Exemple #5
0
    def test_qadd_scalar_relu(self, A, b):
        import copy
        add_scalar = torch.ops.quantized.add_scalar
        add_scalar_relu = torch.ops.quantized.add_scalar_relu

        A, (scale, zero_point, dtype) = A
        A = A.astype(np.float32)
        qA = torch.quantize_linear(torch.from_numpy(A), scale, zero_point, dtype)

        C = qA.dequantize() + b
        C_relu = copy.deepcopy(C)
        C_relu[C_relu < 0] = 0

        C_ref = torch.quantize_linear(C, scale, zero_point, dtype)
        C_relu_ref = torch.quantize_linear(C_relu, scale, zero_point, dtype)

        C_hat = add_scalar(qA, b, scale=scale, zero_point=zero_point)
        C_relu_hat = add_scalar_relu(qA, b, scale=scale, zero_point=zero_point)

        self.assertEqual(C_ref, C_hat,
                         message="Scalar add results don't match:\
                         {} vs {}".format(C_ref, C_hat))
        self.assertEqual(C_relu_ref, C_relu_hat,
                         message="Scalar add relu results don't match:\
                         {} vs {}".format(C_relu_ref, C_relu_hat))
Exemple #6
0
    def init(self, N, IC, OC, H, W, G, kernel, stride, pad):
        scale = 1.0 / 255
        zero_point = 0
        X = torch.randn(N, IC, H, W, dtype=torch.float32)
        qX = torch.quantize_linear(X,
                                   scale=scale,
                                   zero_point=zero_point,
                                   dtype=torch.quint8)
        W = torch.randn(OC, IC // G, kernel, kernel, dtype=torch.float32)
        qW = torch.quantize_linear(W,
                                   scale=scale,
                                   zero_point=0,
                                   dtype=torch.qint8)

        self.input = qX
        self.qconv2d = nnq.Conv2d(IC,
                                  OC,
                                  kernel,
                                  stride=stride,
                                  padding=pad,
                                  groups=G)
        self.qconv2d.weight = qW
        self.qconv2d.scale = torch.tensor([scale], dtype=torch.double)
        self.qconv2d.zero_point = torch.tensor([zero_point], dtype=torch.int)
        self.set_module_name("QConv2d")
Exemple #7
0
    def test_qrelu(self, qparams):
        X = np.array([[-3, -2, 1, 2],
                      [0, 0, 0, 0],
                      [-5, -4, -3, -2],
                      [1, 2, 3, 4]], dtype=np.float32)
        scale, zero_point, torch_type = qparams

        Y = X.copy()
        Y[Y < 0] = 0
        qY = torch.quantize_linear(torch.from_numpy(Y), scale=scale,
                                   zero_point=zero_point, dtype=torch_type)
        X = torch.from_numpy(X)
        qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)

        ops_under_test = {
            'native': torch.relu,
            'nn.functional': torch.nn.functional.relu,
        }

        for name, op in ops_under_test.items():
            qY_hat = op(qX)
            self.assertEqual(qY, qY_hat, message="{} relu failed".format(name))

        ops_under_test_inplace = {
            'inplace native': torch.relu_,
            'inplace nn.functional': torch.nn.functional.relu_,
        }

        for name, op_ in ops_under_test_inplace.items():
            qY_hat = qX.clone()
            op_(qY_hat)
            self.assertEqual(qY, qY_hat, message="{} relu failed".format(name))
    def test_qtensor_permute(self):
        r = torch.rand(100, 30, dtype=torch.float) * 2 - 4
        scale = 2
        zero_point = 2
        qr = torch.quantize_linear(r, scale, zero_point, torch.qint8)
        qr = qr.transpose(0, 1)
        rqr = qr.dequantize()
        # compare transpose + dequantized result with orignal transposed result
        self.assertTrue(np.allclose(r.numpy().T, rqr.numpy(), atol=2 / scale))

        qr = torch.quantize_linear(r, scale, zero_point, torch.qint8)
        qr1 = qr.permute([1, 0])
        qr2 = qr.transpose(0, 1)
        # compare int representation after transformations
        self.assertTrue(torch.equal(qr1.int_repr(), qr2.int_repr()))
        self.assertTrue(qr1.q_scale() == qr2.q_scale())
        self.assertTrue(qr1.q_zero_point() == qr2.q_zero_point())
        # compare dequantized result
        self.assertTrue(
            np.array_equal(qr1.dequantize().numpy(),
                           qr2.dequantize().numpy()))
        # compare permuted + dequantized result with original transposed result
        self.assertTrue(
            np.allclose(qr2.dequantize().numpy(), r.numpy().T, atol=2 / scale))
        # make permuted result contiguous
        self.assertTrue(
            torch.equal(qr2.contiguous().int_repr(), qr2.int_repr()))
Exemple #9
0
    def from_float(mod):
        r"""Create a quantized module from a float module or qparams_dict

            Args: `mod` a float module, either produced by torch.quantization utilities
            or directly from user
        """
        if hasattr(mod, 'weight_fake_quant'):
            # assert type(mod) == QATLinear, 'training mode nnq.Linear.from_float only works for nn.qat.Linear'
            weight_observer = mod.weight_fake_quant
        else:
            assert type(
                mod
            ) == NNLinear, 'nnq.Linear.from_float only works for nn.Linear'
            assert hasattr(
                mod, 'qconfig'), 'Input float module must have qconfig defined'
            assert hasattr(
                mod,
                'observer'), 'Input float module must have observer attached'
            weight_observer = mod.qconfig.weight()
            weight_observer(mod.weight)
        activation_observer = mod.observer
        act_scale, act_zp = activation_observer.calculate_qparams()
        wt_scale, wt_zp = weight_observer.calculate_qparams()
        bias_scale = (wt_scale * act_scale).float()
        qweight = torch.quantize_linear(mod.weight.float(), wt_scale,
                                        wt_zp.long().item(), torch.qint8)
        qbias = torch.quantize_linear(mod.bias.float(), bias_scale, 0,
                                      torch.qint32)
        qlinear = Linear(mod.in_features, mod.out_features)
        qlinear._packed_weight = torch.ops.quantized.fbgemm_linear_prepack(
            qweight)
        qlinear.bias = qbias
        qlinear.scale = torch.tensor([act_scale], dtype=torch.double)
        qlinear.zero_point = torch.tensor([act_zp], dtype=torch.long)
        return qlinear
    def test_linear_api(self):
        """test API functionality for nn.quantized.linear"""
        in_features = 10
        out_features = 20
        batch_size = 5
        W = torch.rand(out_features, in_features).float()
        W_q = torch.quantize_linear(W, 0.1, 4, torch.qint8)
        W_pack = torch.ops.quantized.fbgemm_linear_prepack(W_q)
        X = torch.rand(batch_size, in_features).float()
        X_q = torch.quantize_linear(X, 0.2, 10, torch.quint8)
        B = torch.rand(out_features).float()
        B_q = torch.quantize_linear(B,
                                    W_q.q_scale() * X_q.q_scale(), 0,
                                    torch.qint32)
        out_scale = 0.5
        out_zero_point = 3
        qlinear = nnq.Linear(in_features, out_features)
        qlinear._packed_weight = W_pack
        qlinear.bias = B_q
        qlinear.out_scale = torch.tensor([out_scale])
        qlinear.out_zero_point = torch.tensor([out_zero_point])
        Z_q = qlinear(X_q)
        # Check if the module implementation matches calling the
        # ops directly
        Z_ref = torch.ops.quantized.fbgemm_linear(X_q, W_pack, B_q, out_scale,
                                                  out_zero_point)
        self.assertEqual(Z_ref, Z_q)

        # Test serialization of quantized Linear Module using state_dict
        model_dict = qlinear.state_dict()
        self.assertEqual(model_dict['weight'], W_q)
        self.assertEqual(model_dict['bias'], B_q)
        with tempfile.NamedTemporaryFile() as f:
            torch.save(model_dict, f)
            f.seek(0)
            loaded_dict = torch.load(f)
        for key in model_dict:
            self.assertEqual(model_dict[key], loaded_dict[key])
        loaded_qlinear = nnq.Linear(in_features, out_features)
        loaded_qlinear.load_state_dict(loaded_dict)

        linear_unpack = torch.ops.quantized.fbgemm_linear_unpack
        self.assertEqual(linear_unpack(qlinear._packed_weight),
                         linear_unpack(loaded_qlinear._packed_weight))
        self.assertEqual(qlinear.bias, loaded_qlinear.bias)
        self.assertEqual(qlinear.out_scale, loaded_qlinear.out_scale)
        self.assertEqual(qlinear.out_zero_point, loaded_qlinear.out_zero_point)
        self.assertTrue(dir(qlinear) == dir(loaded_qlinear))
        self.assertTrue(hasattr(qlinear, '_packed_weight'))
        self.assertTrue(hasattr(loaded_qlinear, '_packed_weight'))
        self.assertTrue(hasattr(qlinear, 'weight'))
        self.assertTrue(hasattr(loaded_qlinear, 'weight'))
        self.assertEqual(qlinear.weight, loaded_qlinear.weight)
        self.assertEqual(
            qlinear.weight,
            torch.ops.quantized.fbgemm_linear_unpack(qlinear._packed_weight))
        Z_q2 = qlinear(X_q)
        self.assertEqual(Z_q, Z_q2)
Exemple #11
0
    def test_cat(self, X, num, dim, relu):
        tensors_q = []
        tensors_ref = []
        X, (scale, zero_point, torch_type) = X
        assume(dim < X.ndim)
        X = torch.from_numpy(X)
        new_shape = np.array(X.shape)
        new_shape[dim] = 0
        for idx in range(num):
            tensors_q.append(
                torch.quantize_linear(X, scale, zero_point, torch_type))
            tensors_ref.append(X)
            new_shape[dim] += tensors_ref[-1].shape[dim]

        cat_ref = torch.cat(tensors_ref, dim=dim)
        cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type)
        cat_ref = cat_ref.dequantize()

        if relu:
            cat_ref = F.relu(cat_ref)
            q_cat_op = torch.ops.quantized.cat_relu
            q_cat_out_op = torch.ops.quantized.cat_relu_out
        else:
            q_cat_op = torch.ops.quantized.cat
            q_cat_out_op = torch.ops.quantized.cat_out

        cat_q = q_cat_op(tensors_q,
                         dim=dim,
                         scale=scale,
                         zero_point=zero_point)
        cat_q = cat_q.dequantize()
        np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy())

        cat_q_out = torch._empty_affine_quantized(list(new_shape),
                                                  scale=scale,
                                                  zero_point=zero_point,
                                                  dtype=torch_type)
        q_cat_out_op(tensors_q, dim=dim, out=cat_q_out)
        cat_q_out = cat_q_out.dequantize()
        np.testing.assert_equal(cat_ref.numpy(), cat_q_out.numpy())

        # Test the cat on per-channel quantized tensor.
        ch_axis = 1
        scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis]))
        scales = scales.to(torch.float64)
        zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis]))
        zero_points = zero_points.to(torch.long)
        tensors_q[0] = torch.quantize_linear_per_channel(X,
                                                         scales,
                                                         zero_points,
                                                         axis=[ch_axis],
                                                         dtype=torch_type)
        with self.assertRaisesRegex(RuntimeError, "supported.*cat"):
            cat_q = q_cat_op(tensors_q,
                             dim=ch_axis,
                             scale=scale,
                             zero_point=zero_point)
    def test_conv_api(self):
        """Tests the correctness of the conv module.

        The correctness is defined against the functional implementation.
        """

        N, iC, H, W = 10, 10, 10, 3
        oC, g, kH, kW = 16, 1, 3, 3
        scale, zero_point = 1.0 / 255, 128

        X = torch.randn(N, iC, H, W, dtype=torch.float32)
        X = X.permute([0, 2, 3, 1]).contiguous()
        qX = torch.quantize_linear(X, scale=scale, zero_point=128, dtype=torch.quint8)

        w = torch.randn(oC, iC // g, kH, kW, dtype=torch.float32)
        w = w.permute([0, 2, 3, 1]).contiguous()
        qw = torch.quantize_linear(w, scale=scale, zero_point=0, dtype=torch.qint8)

        b = torch.randn(oC, dtype=torch.float32)
        qb = torch.quantize_linear(b, scale=1.0 / 1024, zero_point=0, dtype=torch.qint32)

        conv_under_test = Conv2d(in_channels=iC,
                                 out_channels=oC,
                                 kernel_size=(kH, kW),
                                 stride=1,
                                 padding=0,
                                 dilation=1,
                                 groups=g,
                                 bias=True,
                                 padding_mode='zeros')
        conv_under_test.weight = qw
        conv_under_test.bias = qb
        conv_under_test.scale = scale
        conv_under_test.zero_point = zero_point

        # Test members
        self.assertTrue(hasattr(conv_under_test, '_packed_weight'))
        self.assertTrue(hasattr(conv_under_test, '_scale'))
        self.assertTrue(hasattr(conv_under_test, '_zero_point'))

        # Test properties
        # self.assertEqual(qw, conv_under_test.weight)
        self.assertEqual(qb, conv_under_test.bias)
        self.assertEqual(scale, conv_under_test.scale)
        self.assertEqual(zero_point, conv_under_test.zero_point)

        # Test forward
        result_under_test = conv_under_test(qX)
        result_reference = qF.conv2d(qX, qw, bias=qb,
                                     scale=scale, zero_point=zero_point,
                                     stride=1, padding=0,
                                     dilation=1, groups=g,
                                     prepacked=False, dtype=torch.quint8)

        self.assertEqual(result_reference, result_under_test,
                         message="Tensors are not equal.")
Exemple #13
0
    def test_equal(self, X, X2, X_per_channel, X2_per_channel):
        X, X_params = X
        (scale, zero_point, torch_type) = X_params
        X2, X2_params = X2
        (scale2, zero_point2, torch_type2) = X2_params

        X = torch.from_numpy(X)
        if X_per_channel:
            X_scheme = 'per_channel'
            channels = X.shape[-1]
            qX = torch.quantize_linear_per_channel(
                X,
                scales=torch.tensor([scale] * channels),
                zero_points=torch.tensor([zero_point] * channels),
                dtype=torch_type,
                axis=[X.ndim - 1])
        else:
            X_scheme = 'per_tensor'
            qX = torch.quantize_linear(X,
                                       scale=scale,
                                       zero_point=zero_point,
                                       dtype=torch_type)
        X2 = torch.from_numpy(X2)
        if X2_per_channel:
            X2_scheme = 'per_channel'
            channels = X2.shape[-1]
            qX2 = torch.quantize_linear_per_channel(
                X2,
                scales=torch.tensor([scale2] * channels),
                zero_points=torch.tensor([zero_point2] * channels),
                dtype=torch_type2,
                axis=[X2.ndim - 1])
        else:
            X2_scheme = 'per_tensor'
            qX2 = torch.quantize_linear(X2,
                                        scale=scale2,
                                        zero_point=zero_point2,
                                        dtype=torch_type2)

        def equal_ref(X, params, X_scheme, X2, params2, X2_scheme):
            if X_scheme != X2_scheme:
                return False
            if params != params2:
                return False
            if X.shape != X2.shape:
                return False
            if (X != X2).any():
                return False
            return True

        self.assertEqual(
            qX.equal(qX),
            equal_ref(X, X_params, X_scheme, X, X_params, X_scheme))
        self.assertEqual(
            qX.equal(qX2),
            equal_ref(X, X_params, X_scheme, X2, X2_params, X2_scheme))
Exemple #14
0
    def from_float(cls, mod):
        r"""Creates a quantized module from a float module or qparams_dict.

        Args:
            mod (Module): a float module, either produced by torch.quantization
                          utilities or provided by the user
        """
        if hasattr(mod, 'weight_fake_quant'):
            # assert type(mod) == cls.__QAT_MODULE, ' nnq.' + cls.__name__ + '.from_float only works for ' + \
            #     cls.__QAT_MODULE.__name__
            if type(mod) == nniqat.ConvBn2d:
                mod.weight, mod.bias = \
                    fuse_conv_bn_weights(mod.weight, mod.bias, mod.running_mean,
                                         mod.running_var, mod.eps, mod.gamma, mod.beta)
            assert hasattr(
                mod,
                'observer'), 'Input QAT module must have observer attached'
            weight_observer = mod.weight_fake_quant
            activation_observer = mod.observer
        else:
            assert type(mod) == cls._FLOAT_MODULE, ' nnq.' + cls.__name__ + '.from_float only works for ' + \
                cls._FLOAT_MODULE.__name__
            assert hasattr(
                mod, 'qconfig'), 'Input float module must have qconfig defined'
            # workaround for sequential, ConvReLU2d should probably
            # inherit from Conv2d instead
            if type(mod) == nni.ConvReLU2d:
                activation_observer = mod[1].observer
                mod = mod[0]
            else:
                activation_observer = mod.observer
            weight_observer = mod.qconfig.weight()
            weight_observer(mod.weight)
        act_scale, act_zp = activation_observer.calculate_qparams()
        assert weight_observer.dtype == torch.qint8, 'Weight observer must have a dtype of qint8'
        wt_scale, wt_zp = weight_observer.calculate_qparams()
        # Scale bias to activation_scale/2^16, this quantizes bias
        # to about 24 bits of precision
        bias_scale = float(act_scale / (2**16))

        qweight = torch.quantize_linear(mod.weight.float(), float(wt_scale),
                                        int(wt_zp), torch.qint8)
        qconv = cls(mod.in_channels, mod.out_channels, mod.kernel_size,
                    mod.stride, mod.padding, mod.dilation, mod.groups, mod.bias
                    is not None, mod.padding_mode)
        qconv.set_weight(qweight)
        if mod.bias is not None:
            qbias = torch.quantize_linear(mod.bias.float(), bias_scale, 0,
                                          torch.qint32)
        else:
            qbias = None
        qconv.bias = qbias
        qconv.scale = float(act_scale)
        qconv.zero_point = int(act_zp)

        return qconv
Exemple #15
0
    def test_conv_api(self, use_bias):
        """Tests the correctness of the conv module.

        The correctness is defined against the functional implementation.
        """

        N, iC, H, W = 10, 10, 10, 3
        oC, g, kH, kW = 16, 1, 3, 3
        scale, zero_point = 1.0 / 255, 128
        stride = (1, 1)
        i_padding = (0, 0)
        dilation = (1, 1)

        X = torch.randn(N, iC, H, W, dtype=torch.float32)
        X = X.permute([0, 2, 3, 1]).contiguous()
        qX = torch.quantize_linear(X,
                                   scale=scale,
                                   zero_point=128,
                                   dtype=torch.quint8)

        w = torch.randn(oC, iC // g, kH, kW, dtype=torch.float32)

        qw = torch.quantize_linear(w,
                                   scale=scale,
                                   zero_point=0,
                                   dtype=torch.qint8)

        b = torch.randn(oC, dtype=torch.float32) if use_bias else None
        q_bias = torch.quantize_linear(
            b, scale=1.0 /
            1024, zero_point=0, dtype=torch.qint32) if use_bias else None
        q_filters_ref = torch.ops.quantized.fbgemm_conv_prepack(
            qw.permute([0, 2, 3, 1]), stride, i_padding, dilation, g)

        requantized_bias = torch.quantize_linear(
            q_bias.dequantize(), scale *
            scale, 0, torch.qint32) if use_bias else None
        ref_result = torch.ops.quantized.fbgemm_conv2d(
            qX.permute([0, 2, 3, 1]), q_filters_ref, requantized_bias, stride,
            i_padding, dilation, g, scale, zero_point).permute([0, 3, 1, 2])

        q_result = torch.nn.quantized.functional.conv2d(qX,
                                                        qw,
                                                        bias=q_bias,
                                                        scale=scale,
                                                        zero_point=zero_point,
                                                        stride=stride,
                                                        padding=i_padding,
                                                        dilation=dilation,
                                                        groups=g,
                                                        dtype=torch.quint8)

        self.assertEqual(ref_result, q_result)
Exemple #16
0
    def test_qadd_relu_different_qparams(self):
        add_relu = torch.ops.quantized.add_relu
        add = torch.ops.quantized.add
        add_out = torch.ops.quantized.add_out
        add_relu_out = torch.ops.quantized.add_relu_out

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale_A = 3.0
        zero_point_A = 7
        scale_B = 5.0
        zero_point_B = 127

        scale_C = 0.5
        zero_point_C = 5

        qA = torch.quantize_linear(A,
                                   scale=scale_A,
                                   zero_point=zero_point_A,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B,
                                   scale=scale_B,
                                   zero_point=zero_point_B,
                                   dtype=torch.quint8)

        # Add ground truth
        C = (qA.dequantize() + qB.dequantize()).numpy()
        qC = _quantize(C, scale_C, zero_point_C)
        qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized addition failed.")
        qC_out_hat = torch._empty_affine_quantized(qC.shape,
                                                   scale=scale_C,
                                                   zero_point=zero_point_C,
                                                   dtype=torch.quint8)
        add_out(qA, qB, out=qC_out_hat)
        self.assertEqual(qC_hat, qC_out_hat, message="Add.out failed")

        # Add + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale_C, zero_point_C)
        qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point_C)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized addition with ReLU failed.")
        qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape,
                                                       scale=scale_C,
                                                       zero_point=zero_point_C,
                                                       dtype=torch.quint8)
        add_relu_out(qA, qB, out=qCrelu_out_hat)
        self.assertEqual(qCrelu_hat,
                         qCrelu_out_hat,
                         message="AddReLU.out failed")
Exemple #17
0
    def test_qmul_relu_same_qparams(self):
        mul_relu = torch.ops.quantized.mul_relu
        mul = torch.ops.quantized.mul
        mul_out = torch.ops.quantized.mul_out
        mul_relu_out = torch.ops.quantized.mul_relu_out

        A = torch.arange(-25, 25, dtype=torch.float)
        B = torch.arange(-25, 25, dtype=torch.float)
        scale = 2.0
        zero_point = 127
        qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)
        qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point,
                                   dtype=torch.quint8)

        # mul ReLU ground truth
        C = (qA.dequantize() * qB.dequantize()).numpy()
        qC = _quantize(C, scale, zero_point)
        qC_hat = mul(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qC, qC_hat.int_repr(),
                                "Quantized mulition failed.")
        qC_out_hat = torch._empty_affine_quantized(qC.shape,
                                                   scale=scale,
                                                   zero_point=zero_point,
                                                   dtype=torch.quint8)
        mul_out(qA, qB, out=qC_out_hat)
        self.assertEqual(qC_hat, qC_out_hat, message="mul.out failed")

        # mul + ReLU ground truth
        Crelu = C.copy()
        Crelu[C < 0] = 0
        qCrelu = _quantize(Crelu, scale, zero_point)
        qCrelu_hat = mul_relu(qA, qB, scale=scale, zero_point=zero_point)
        np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(),
                                "Quantized mulition with ReLU failed.")
        qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape,
                                                       scale=scale,
                                                       zero_point=zero_point,
                                                       dtype=torch.quint8)
        mul_relu_out(qA, qB, out=qCrelu_out_hat)
        self.assertEqual(qCrelu_hat, qCrelu_out_hat,
                         message="mulReLU.out failed")

        # Scalar addition
        mul = torch.ops.quantized.mul_scalar
        for b in B:
            C_ref = qA.dequantize().numpy() * b.item()
            qC = _quantize(C_ref, scale, zero_point)
            dqC = _dequantize(qC, scale, zero_point)
            qC_hat = mul(qA, b.item(), scale, zero_point)
            dqC_hat = qC_hat.dequantize()
            self.assertEqual(dqC, dqC_hat)
Exemple #18
0
    def test_qnnpack_relu(self, X):
        X, (scale, zero_point, torch_type) = X
        relu = torch.ops.quantized.qnnpack_relu

        X = torch.from_numpy(X)
        Y = X.clone()

        qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type)
        qY_hat = relu(qX)

        Y[Y < 0] = 0
        qY = torch.quantize_linear(Y, scale=scale, zero_point=zero_point, dtype=torch_type)
        self.assertEqual(qY, qY_hat)
Exemple #19
0
 def test_qtensor_dtypes(self):
     r = torch.rand(3, 2, dtype=torch.float) * 2 - 4
     scale = 2
     zero_point = 2
     qr = torch.quantize_linear(r, scale, zero_point, torch.qint8)
     rqr = qr.dequantize()
     self.assertTrue(np.allclose(r.numpy(), rqr.numpy(), atol=2 / scale))
     qr = torch.quantize_linear(r, scale, zero_point, torch.quint8)
     rqr = qr.dequantize()
     self.assertTrue(np.allclose(r.numpy(), rqr.numpy(), atol=2 / scale))
     qr = torch.quantize_linear(r, scale, zero_point, torch.qint32)
     rqr = qr.dequantize()
     self.assertTrue(np.allclose(r.numpy(), rqr.numpy(), atol=2 / scale))
Exemple #20
0
    def test_max_pool2d(self, X, kernel, stride, dilation, padding):
        X, (scale, zero_point, torch_type) = X
        # Check constraints
        assume(kernel // 2 >= padding)  # Kernel cannot be overhanging!
        iH, iW = X.shape[-2:]
        oH = self._pool_output_shape(iH, kernel, padding, stride, dilation)
        assume(oH > 0)
        oW = self._pool_output_shape(iW, kernel, padding, stride, dilation)
        assume(oW > 0)

        a = torch.from_numpy(X)
        a_pool = torch.nn.functional.max_pool2d(a,
                                                kernel_size=kernel,
                                                stride=stride,
                                                padding=padding,
                                                dilation=dilation)
        a_ref = torch.quantize_linear(a_pool,
                                      scale=scale,
                                      zero_point=zero_point,
                                      dtype=torch_type)
        a_ref = a_ref.dequantize()
        qa = torch.quantize_linear(a,
                                   scale=scale,
                                   zero_point=zero_point,
                                   dtype=torch_type)

        ops_under_test = {
            "torch": torch.max_pool2d,
            "nn.functional": torch.nn.functional.max_pool2d,
            "nn.quantized.functional": torch.nn.quantized.functional.max_pool2d
        }

        for name, op in ops_under_test.items():
            a_hat = op(qa,
                       kernel_size=kernel,
                       stride=stride,
                       padding=padding,
                       dilation=dilation)
            self.assertEqual(a_ref,
                             a_hat.dequantize(),
                             message="{} results are off".format(name))
        # Test the ops.quantized separately, because None is not treated.
        a_hat = torch.ops.quantized.max_pool2d(
            qa,
            kernel_size=_pair(kernel),
            stride=_pair(kernel if stride is None else stride),
            padding=_pair(padding),
            dilation=_pair(dilation))
        self.assertEqual(a_ref,
                         a_hat.dequantize(),
                         message="ops.quantized.max_pool2d results are off")
 def test_qtensor(self):
     num_elements = 10
     r = torch.ones(num_elements, dtype=torch.float)
     scale = 1.0
     zero_point = 2
     qr = torch.quantize_linear(r, scale, zero_point, torch.quint8)
     self.assertEqual(qr.q_scale(), scale)
     self.assertEqual(qr.q_zero_point(), zero_point)
     self.assertTrue(qr.is_quantized)
     self.assertFalse(r.is_quantized)
     self.assertEqual(qr.qscheme(), torch.per_tensor_affine)
     self.assertTrue(isinstance(qr.qscheme(), torch.qscheme))
     # slicing and int_repr
     int_repr = qr.int_repr()
     for num in int_repr:
         self.assertEqual(num, 3)
     for num in qr[2:].int_repr():
         self.assertEqual(num, 3)
     # dequantize
     rqr = qr.dequantize()
     for i in range(num_elements):
         self.assertEqual(r[i], rqr[i])
     # Scalar Tensor
     # item
     r = torch.ones(1, dtype=torch.float)
     qr = torch.quantize_linear(r, scale, zero_point, torch.quint8)
     self.assertEqual(qr.item(), 1)
     self.assertEqual(qr[0].item(), 1)
     # assignment
     self.assertTrue(qr[0].is_quantized)
     qr[0] = 11.3  # float asignment
     self.assertEqual(qr.item(), 11)
     x = torch.ones(1, dtype=torch.float) * 15.3
     # Copying from a float Tensor
     qr[:] = x
     self.assertEqual(qr.item(), 15)
     # we can also print a qtensor
     self.assertEqual(
         ' '.join(str(qr).split()),
         "tensor([15.], size=(1,), dtype=torch.quint8, " +
         "quantization_scheme=torch.per_tensor_affine, " +
         "scale=1.0, zero_point=2)")
     empty_r = torch.ones((0, 1), dtype=torch.float)
     empty_qr = torch.quantize_linear(empty_r, scale, zero_point,
                                      torch.quint8)
     self.assertEqual(
         ' '.join(str(empty_qr).split()),
         "tensor([], size=(0, 1), dtype=torch.quint8, " +
         "quantization_scheme=torch.per_tensor_affine, " +
         "scale=1.0, zero_point=2)")
Exemple #22
0
    def test_qrelu(self, Q):
        X, (scale, zero_point), (qmin, qmax), (torch_type, np_type) = Q
        relu = torch.ops.quantized.relu

        Y = X.copy()
        X = torch.from_numpy(X)

        qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)
        qY_hat = relu(qX)

        Y[Y < 0] = 0
        qY = torch.quantize_linear(torch.from_numpy(Y), scale=scale, zero_point=zero_point, dtype=torch_type)
        self.assertEqual(qY.int_repr(), qY_hat.int_repr())
Exemple #23
0
    def test_max_pool2d(self, Q, kernel, stride, dilation, padding):
        import torch.nn.functional as F
        X, (scale, zero_point), (qmin, qmax), (torch_type, np_type) = Q

        # Check constraints
        assume(kernel // 2 >= padding)  # Kernel cannot be overhanging!
        iH, iW = X.shape[-2:]
        oH = self._pool_output_shape(iH, kernel, padding, stride, dilation)
        assume(oH > 0)
        oW = self._pool_output_shape(iW, kernel, padding, stride, dilation)
        assume(oW > 0)

        k = (kernel, kernel)
        s = (stride, stride)
        d = (dilation, dilation)
        p = (padding, padding)

        q_max_pool = torch.ops.quantized.max_pool2d

        a = torch.from_numpy(X)
        qa = torch.quantize_linear(a, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)

        a_hat = qa.dequantize()
        a_pool = F.max_pool2d(a_hat, kernel_size=k, stride=s, padding=p,
                              dilation=d)

        qa_pool_hat = q_max_pool(qa, kernel_size=k, stride=s, padding=p,
                                 dilation=d)
        a_pool_hat = qa_pool_hat.dequantize()

        np.testing.assert_equal(a_pool.numpy(), a_pool_hat.numpy())
Exemple #24
0
    def test_qconv_unpack(self, X, strideH, strideW, padH, padW):
        (inputs, filters, bias, groups) = X
        inputs, (inputs_scale, inputs_zero_point, inputs_qtype) = inputs
        filters, (filters_scale, filters_zero_point, filters_qtype) = filters
        bias, (bias_scale, bias_zero_point, bias_qtype) = bias

        qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack
        qconv_unpack = torch.ops.quantized.fbgemm_conv_unpack

        # Orig tensor is assumed to be in K(C/G)RS format
        W = torch.from_numpy(filters).to(torch.float)
        # K(C/G)RS -> KRS(C/G)
        W_KRSC = W.permute([0, 2, 3, 1]).contiguous()
        W_q = torch.quantize_linear(W_KRSC, scale=filters_scale, zero_point=filters_zero_point, dtype=filters_qtype)

        # Pack weights using weight packing operator
        strides = [strideH, strideW]
        paddings = [padH, padW]
        dilations = [1, 1]
        W_packed = qconv_prepack(W_q, strides, paddings, dilations, groups)
        # Unpack weights weight unpacking operator (Used for serialization)
        W_unpacked = qconv_unpack(W_packed)

        # Assert equal
        np.testing.assert_equal(W_q.int_repr().numpy(), W_unpacked.int_repr().numpy())
        np.testing.assert_equal(W_q.q_scale(), W_unpacked.q_scale())
        np.testing.assert_equal(W_q.q_zero_point(), W_unpacked.q_zero_point())
Exemple #25
0
    def test_adaptive_avg_pool2d(self, X, output_size_h, output_size_w):
        X, (scale, zero_point, torch_type) = X

        H, W = X.shape[-2:]
        assume(output_size_h <= H)
        assume(output_size_w <= W)
        if output_size_h == output_size_w:
            output_size = output_size_h
        else:
            output_size = (output_size_h, output_size_w)

        X = torch.from_numpy(X)
        qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point,
                                   dtype=torch_type)

        # Run reference on int_repr + round to avoid double rounding error.
        X_ref = torch.nn.functional.adaptive_avg_pool2d(
            qX.int_repr().to(torch.float), output_size).round()

        ops_under_test = {
            "nn.functional": torch.nn.functional.adaptive_avg_pool2d,
            "nn.quantized.functional":
                torch.nn.quantized.functional.adaptive_avg_pool2d
        }

        error_message = r"Results are off for {}:\n\tExpected:\n{}\n\tGot:\n{}"

        for name, op in ops_under_test.items():
            qX_hat = op(qX, output_size=output_size)
            qX_repr = qX_hat.int_repr()
            self.assertEqual(X_ref, qX_repr,
                             message=error_message.format(name, X_ref, qX_repr))
Exemple #26
0
    def test_numerical_consistency_cuda(self):
        '''
        Comparing numerical consistency between CPU quantize/dequantize op and the CUDA fake quantize op
        '''
        np.random.seed(NP_RANDOM_SEED)
        fake_quantize_per_tensor_affine_forward = torch.ops.quantized.fake_quantize_per_tensor_affine_forward

        scale = 3
        zero_point = 2
        num_bits = 8
        X = np.random.rand(20, 20) * 125
        X_torch = torch.from_numpy(X).float()
        Y = torch.dequantize(
            torch.quantize_linear(X_torch, scale, zero_point, torch.qint8))
        Y_prime = fake_quantize_per_tensor_affine_forward(
            X=X_torch.cuda(),
            scale=scale,
            zero_point=zero_point,
            num_bits=num_bits,
            quant_delay=0,
            iter=0)
        tolerance = 1e-6
        np.testing.assert_allclose(Y,
                                   Y_prime.cpu(),
                                   rtol=tolerance,
                                   atol=tolerance)
Exemple #27
0
    def from_float(cls, mod):
        r"""Create a dynamic quantized module from a float module or qparams_dict

        Args:
            mod (Module): a float module, either produced by torch.quantization
                          utilities or provided by the user
        """
        assert type(
            mod
        ) == NNLinear, 'nn.quantized.dynamic.Linear.from_float only works for nn.Linear'
        assert hasattr(
            mod, 'qconfig'), 'Input float module must have qconfig defined'
        if mod.qconfig is not None and mod.qconfig.weight() is not None:
            weight_observer = mod.qconfig.weight()
        else:
            # We have the circular import issues if we import the qconfig in the beginning of this file:
            # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the
            # import until we need it.
            from torch.quantization.QConfig import default_dynamic_qconfig
            weight_observer = default_dynamic_qconfig.weight()
        assert weight_observer.dtype == torch.qint8, 'Weight observer must have dtype torch.qint8'
        weight_observer(mod.weight)
        wt_scale, wt_zp = weight_observer.calculate_qparams()
        qweight = torch.quantize_linear(mod.weight.float(), float(wt_scale),
                                        int(wt_zp), torch.qint8)
        qlinear = Linear(mod.in_features, mod.out_features)
        qlinear.set_weight_bias(qweight, mod.bias)
        return qlinear
Exemple #28
0
                def process_weights(ihhh, layer, suffix):
                    weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix)
                    bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix)

                    weight = getattr(mod, weight_name)
                    bias = getattr(mod, bias_name)
                    # for each layer, for each direction we need to quantize and pack
                    # weights and pack parameters in this order:
                    #
                    #   w_ih, w_hh, b_ih, b_hh
                    weight_observer(weight)
                    wt_scale, wt_zp = weight_observer.calculate_qparams()
                    qweight = torch.quantize_linear(weight.float(),
                                                    float(wt_scale),
                                                    int(wt_zp), torch.qint8)
                    packed_weight = \
                        torch.ops.quantized.linear_prepack(qweight, bias)

                    params = [packed_weight, bias]
                    pos_names = ['w', 'b']
                    ret_name = [
                        '{}_{}_l{}{}'.format(name, ihhh, layer, suffix)
                        for name in pos_names
                    ]
                    quantized_weights.append(qweight)
                    packed_weights.append(ret_name[0])
                    return params, ret_name
Exemple #29
0
    def test_pool_api(self):
        """Tests the correctness of the pool module.

        The correctness is defined against the functional implementation.
        """
        N, C, H, W = 10, 10, 10, 3
        kwargs = {
            'kernel_size': 2,
            'stride': None,
            'padding': 0,
            'dilation': 1
        }

        scale, zero_point = 1.0 / 255, 128

        X = torch.randn(N, C, H, W, dtype=torch.float32)
        qX = torch.quantize_linear(X,
                                   scale=scale,
                                   zero_point=zero_point,
                                   dtype=torch.quint8)
        qX_expect = torch.nn.functional.max_pool2d(qX, **kwargs)

        pool_under_test = torch.nn.quantized.MaxPool2d(**kwargs)
        qX_hat = pool_under_test(qX)
        self.assertEqual(qX_expect, qX_hat)

        # JIT Testing
        self.checkScriptable(pool_under_test, list(zip([X], [qX_expect])))
Exemple #30
0
def linear(input, weight, bias=None, scale=None, zero_point=None):
    # type: (Tensor, Tensor, Optional[Tensor]) -> Tensor
    r"""
    Applies a linear transformation to the incoming quantized data:
    :math:`y = xA^T + b`.
    See :class:`~torch.nn.Linear`

    .. note::

      Current implementation uses packed weights. This has penalty on performance.
      If you want to avoid the overhead, use :class:`~torch.nn.quantized.Linear`.

    Args:
      input (Tensor): Quantized input of type `torch.quint8`
      weight (Tensor): Quantized weight of type `torch.qint8`
      bias (Tensor): None or Quantized bias of type `torch.qint32`
      scale (double): output scale. If None, derived from the input scale
      zero_point (long): output zero point. If None, derived from the input zero_point

    Shape:
        - Input: :math:`(N, *, in\_features)` where `*` means any number of
          additional dimensions
        - Weight: :math:`(out\_features, in\_features)`
        - Bias: :math:`(out\_features)`
        - Output: :math:`(N, *, out\_features)`
    """
    if scale is None:
        scale = input.q_scale()
    if zero_point is None:
        zero_point = input.q_zero_point()
    _packed_weight = torch.ops.quantized.fbgemm_linear_prepack(weight)
    if bias is not None:
        bias = torch.quantize_linear(bias.dequantize(), weight.q_scale() * input.q_scale(), 0, torch.qint32)
    return torch.ops.quantized.fbgemm_linear(input, _packed_weight, bias, scale,
                                             zero_point)