Ejemplo n.º 1
0
    def test_per_channel_scale(self, verbose):
        """Quantizer performs per channel scaling"""
        x_np = np.random.rand(15, 15, 64, 128).astype('float32')
        x_torch = torch.Tensor(x_np).cuda()

        # Pytorch filter layout seems to be KCRS, reduce max to shape [K, 1, 1, 1] to test per channel scale
        # Shrink max a little, so that clip behavior is tested
        amax_x_np = 0.7 * np.max(np.abs(x_np), axis=(1, 2, 3), keepdims=True)

        quant_x_np = test_utils.quant_np(x_np, amax_x_np)
        quantizer = tensor_quantizer.TensorQuantizer(
            tensor_quant.QuantDescriptor(num_bits=8, axis=(0), fake_quant=False, scale_amax=0.7))
        quantizer.cuda()
        module_quant_x = quantizer(x_torch)

        # np.testing.assert_array_equal(quant_x_torch.cpu().numpy(), quant_x_np)
        # Pytorch numerics is not the same as numpy, it will be off by 1
        error = np.abs(module_quant_x.cpu().numpy() - quant_x_np)
        np.testing.assert_array_less(error, 2)
        if verbose:
            mismatches = np.where(error >= 1)
            print("Mismatches:")
            print(" Original: ", x_np[mismatches])
            print(" numpy: ", quant_x_np[mismatches])
            print(" TensorQuantizer: ", module_quant_x.cpu().numpy()[mismatches])
Ejemplo n.º 2
0
    def test_per_channel_scale(self):
        """ fake_tensor_quant performs per channel quantization
        """
        x_np = np.random.rand(15, 15, 64, 128).astype('float32')
        x_torch = torch.Tensor(x_np).cuda()

        # Pytorch filter layout seems to be KCRS, reduce max to shape [K, 1, 1, 1] to test per channel scale
        # Shrink max a little, so that clip behavior is tested
        amax_x_np = 0.7 * np.max(np.abs(x_np), axis=(1, 2, 3), keepdims=True)
        # Pytorch's max function doesn't support reduces multiple axis, and returns (max, argmax) tuple,
        # so it has to be reduced by multiple torch.max
        amax_x_torch = 0.7 * torch.max(torch.max(
            torch.max(x_torch, dim=1,
                      keepdim=True)[0], dim=2, keepdim=True)[0],
                                       dim=3,
                                       keepdim=True)[0]

        quant_x_np = test_utils.quant_np(x_np, amax_x_np)
        quant_x_torch, _ = tensor_quant.tensor_quant(x_torch, amax_x_torch)

        # np.testing.assert_array_equal(quant_x_torch.cpu().numpy(), quant_x_np)
        # Pytorch numerics is not the same as numpy, it will be off by 1
        np.testing.assert_array_less(
            np.abs(quant_x_torch.cpu().numpy() - quant_x_np), 2)
        if verbose:
            mismatches = np.where(
                np.abs(quant_x_torch.cpu().numpy() - quant_x_np) >= 1)
            print("Mismatches:")
            print(" Original: ", x_np[mismatches])
            print(" numpy: ", quant_x_np[mismatches])
            print(" Pytorch: ", quant_x_torch.cpu().numpy()[mismatches])
Ejemplo n.º 3
0
 def test_per_tensor_scale(self):
     """Quantizer performs expected quantization"""
     x_np = np.random.rand(1023)
     x_torch = torch.Tensor(x_np)
     quant_x_np = test_utils.quant_np(x_np, np.max(np.abs(x_np)))
     quantizer = tensor_quantizer.TensorQuantizer(tensor_quant.QuantDescriptor(num_bits=8, fake_quant=False))
     module_quant_x = quantizer(x_torch)
     np.testing.assert_array_equal(module_quant_x.cpu().numpy(), quant_x_np)
Ejemplo n.º 4
0
    def test_cuda_ext_inplace(self):
        x_np = np.random.rand(1023).astype('float32')
        x_torch = torch.Tensor(x_np).cuda()
        quant_x_np = test_utils.quant_np(x_np, np.max(np.abs(x_np)), fake=True)
        cuda_ext.fake_tensor_quant_(x_torch, torch.max(torch.abs(x_torch)))
        np.testing.assert_array_equal(x_torch.cpu().numpy(), quant_x_np)

        # Test fp16
        x_np_fp16 = np.random.rand(1023).astype('float16')
        x_torch_fp16 = torch.Tensor(x_np_fp16).cuda().half()
        quant_x_np_fp16 = test_utils.quant_np(x_np_fp16,
                                              np.max(np.abs(x_np_fp16)),
                                              fake=True)
        cuda_ext.fake_tensor_quant_(x_torch_fp16,
                                    torch.max(torch.abs(x_torch_fp16)))
        np.testing.assert_array_almost_equal(x_torch_fp16.cpu().numpy(),
                                             quant_x_np_fp16,
                                             decimal=2)
Ejemplo n.º 5
0
 def test_per_tensor_scale(self):
     """ tensor_quant matches numpy quantization
     """
     torch.set_default_tensor_type('torch.cuda.FloatTensor')  # Test on GPU
     x_np = np.random.rand(1023)
     x_torch = torch.Tensor(x_np)
     quant_x_np = test_utils.quant_np(x_np, np.max(np.abs(x_np)))
     quant_x_torch, _ = tensor_quant.tensor_quant(
         x_torch, torch.max(torch.abs(x_torch)))
     np.testing.assert_array_equal(quant_x_torch.cpu().numpy(), quant_x_np)
     torch.set_default_tensor_type('torch.FloatTensor')
Ejemplo n.º 6
0
 def test_unsigned(self):
     x_np = np.random.rand(1023).astype('float32')
     x_torch = torch.Tensor(x_np).cuda()
     quant_x_np = test_utils.quant_np(x_np,
                                      np.max(np.abs(x_np)),
                                      num_bits=9,
                                      fake=True)
     quant_x_torch = tensor_quant.fake_tensor_quant(
         x_torch, torch.max(torch.abs(x_torch)), 8, True)
     np.testing.assert_array_almost_equal(quant_x_torch.cpu().numpy(),
                                          quant_x_np)
Ejemplo n.º 7
0
 def test_per_tensor_scale(self):
     """ fake_tensor_quant matches numpy quantization
     """
     x_np = np.random.rand(13).astype('float32')
     print(x_np)
     x_torch = torch.Tensor(x_np).cuda()
     quant_x_np = test_utils.quant_np(x_np, np.max(np.abs(x_np)), fake=True)
     quant_x_torch = tensor_quant.fake_tensor_quant(
         x_torch, torch.max(torch.abs(x_torch)))
     np.testing.assert_array_almost_equal(quant_x_torch.cpu().numpy(),
                                          quant_x_np)
Ejemplo n.º 8
0
 def test_learn_amax(self):
     """Test the clip implied by learn_amax"""
     x_np = np.random.rand(1023).astype(np.float32)
     x_torch = torch.Tensor(x_np)
     amax = 0.5
     quant_x_np = test_utils.quant_np(x_np, 0.5, fake=True)
     quantizer = tensor_quantizer.TensorQuantizer(
         tensor_quant.QuantDescriptor(num_bits=8, amax=amax, learn_amax=True))
     assert hasattr(quantizer, 'clip')
     module_quant_x = quantizer(x_torch)
     np.testing.assert_array_equal(module_quant_x.cpu().detach().numpy(), quant_x_np)
Ejemplo n.º 9
0
    def test_scale_amax(self):
        x_np = np.random.rand(1023).astype(np.float32)
        x_torch = torch.Tensor(x_np)
        amax = 0.5
        scale_amax = 0.9
        quant_x_np = test_utils.quant_np(x_np, amax * scale_amax, fake=True)
        quantizer = tensor_quantizer.TensorQuantizer(
            tensor_quant.QuantDescriptor(num_bits=8, amax=amax, scale_amax=scale_amax))
        module_quant_x = quantizer(x_torch)
        np.testing.assert_array_equal(module_quant_x.cpu().detach().numpy(), quant_x_np)

        # Test twice. There was a but in scale amax logic that modify the amax every time
        module_quant_x = quantizer(x_torch)
        np.testing.assert_array_equal(module_quant_x.cpu().detach().numpy(), quant_x_np)
Ejemplo n.º 10
0
 def test_full_range(self):
     """ fake_tensor_quant uses the full integer range when narrow=False
     """
     x_np = np.random.rand(1023).astype('float32')
     x_torch = torch.Tensor(x_np).cuda()
     amax = np.max(np.abs(x_np))
     quant_x_np = test_utils.quant_np(x_np,
                                      amax,
                                      num_bits=9,
                                      fake=True,
                                      narrow_range=False)
     quant_x_torch = tensor_quant.fake_tensor_quant(
         x_torch, torch.max(torch.abs(x_torch)), 8, True, False)
     np.testing.assert_array_almost_equal(quant_x_torch.cpu().numpy(),
                                          quant_x_np)
Ejemplo n.º 11
0
    def test_unsigned(self):
        x_np = np.random.rand(1023).astype('float32')
        x_torch = torch.Tensor(x_np)
        quant_x_np = test_utils.quant_np(x_np,
                                         np.max(np.abs(x_np)),
                                         num_bits=9,
                                         fake=False)
        quant_x_torch, _ = tensor_quant.tensor_quant(
            x_torch, torch.max(torch.abs(x_torch)), 8, True)
        np.testing.assert_array_almost_equal(quant_x_torch.cpu().numpy(),
                                             quant_x_np)

        x_torch = torch.randn(3, 7)
        with pytest.raises(TypeError, match="Negative values encountered"):
            tensor_quant.tensor_quant(x_torch, torch.max(torch.abs(x_torch)),
                                      8, True)