def test_unsigned(self): x_np = np.random.rand(1023).astype('float32') x_torch = torch.Tensor(x_np) quant_x_np = test_utils.quant_np(x_np, np.max(np.abs(x_np)), num_bits=9, fake=False) quant_x_torch, _ = tensor_quant.tensor_quant( x_torch, torch.max(torch.abs(x_torch)), 8, True) np.testing.assert_array_almost_equal(quant_x_torch.cpu().numpy(), quant_x_np) x_torch = torch.randn(3, 7) with pytest.raises(TypeError, match="Negative values encountered"): tensor_quant.tensor_quant(x_torch, torch.max(torch.abs(x_torch)), 8, True)
def test_per_channel_scale(self): """ fake_tensor_quant performs per channel quantization """ x_np = np.random.rand(15, 15, 64, 128).astype('float32') x_torch = torch.Tensor(x_np).cuda() # Pytorch filter layout seems to be KCRS, reduce max to shape [K, 1, 1, 1] to test per channel scale # Shrink max a little, so that clip behavior is tested amax_x_np = 0.7 * np.max(np.abs(x_np), axis=(1, 2, 3), keepdims=True) # Pytorch's max function doesn't support reduces multiple axis, and returns (max, argmax) tuple, # so it has to be reduced by multiple torch.max amax_x_torch = 0.7 * torch.max(torch.max( torch.max(x_torch, dim=1, keepdim=True)[0], dim=2, keepdim=True)[0], dim=3, keepdim=True)[0] quant_x_np = test_utils.quant_np(x_np, amax_x_np) quant_x_torch, _ = tensor_quant.tensor_quant(x_torch, amax_x_torch) # np.testing.assert_array_equal(quant_x_torch.cpu().numpy(), quant_x_np) # Pytorch numerics is not the same as numpy, it will be off by 1 np.testing.assert_array_less( np.abs(quant_x_torch.cpu().numpy() - quant_x_np), 2) if verbose: mismatches = np.where( np.abs(quant_x_torch.cpu().numpy() - quant_x_np) >= 1) print("Mismatches:") print(" Original: ", x_np[mismatches]) print(" numpy: ", quant_x_np[mismatches]) print(" Pytorch: ", quant_x_torch.cpu().numpy()[mismatches])
def test_clip_gradient(self): x = torch.randn(3, 7, requires_grad=True).cuda() x.retain_grad() amax = x.abs().max() / 2 x_in_range = (-amax <= x) * (x <= amax) quant_x, _ = tensor_quant.tensor_quant(x, amax, 8) loss = torch.sum((quant_x - 0.5)**2) loss.backward() np.testing.assert_array_equal(x.grad.cpu().numpy() != 0, x_in_range.cpu().numpy())
def test_simple_run_no_fake(self): """Quantizer fake_quant=False calls tensor_quant and sets the scale property""" x = torch.randn(3, 7).cuda() amax_x = torch.max(torch.abs(x)) fn_quant_x, fn_scale = tensor_quant.tensor_quant(x, amax_x) quantizer = tensor_quantizer.TensorQuantizer(tensor_quant.QuantDescriptor(num_bits=8, fake_quant=False)) module_quant_x = quantizer(x) module_scale = quantizer.scale np.testing.assert_array_equal(fn_quant_x.cpu().numpy(), module_quant_x.cpu().numpy()) np.testing.assert_array_equal(fn_scale.cpu().numpy(), module_scale.cpu().numpy())
def test_per_tensor_scale(self): """ tensor_quant matches numpy quantization """ torch.set_default_tensor_type('torch.cuda.FloatTensor') # Test on GPU x_np = np.random.rand(1023) x_torch = torch.Tensor(x_np) quant_x_np = test_utils.quant_np(x_np, np.max(np.abs(x_np))) quant_x_torch, _ = tensor_quant.tensor_quant( x_torch, torch.max(torch.abs(x_torch))) np.testing.assert_array_equal(quant_x_torch.cpu().numpy(), quant_x_np) torch.set_default_tensor_type('torch.FloatTensor')
def test_backward(self): """ tensor_quant implements straight through estimator on the backward pass Note: this does not work for integer output_dtype """ x = torch.randn(3, 7, requires_grad=True).cuda() labels = torch.randint(6, (3, )).type(torch.LongTensor).cuda() quant_x, _ = tensor_quant.tensor_quant(x, x.abs().max(), 7) float_quant_x = quant_x.type(torch.FloatTensor).cuda() x.retain_grad() float_quant_x.retain_grad() criterion = torch.nn.CrossEntropyLoss().cuda() loss = criterion(float_quant_x, labels) loss.backward() np.testing.assert_array_equal(float_quant_x.grad.cpu().numpy(), x.grad.cpu().numpy())
def test_full_range(self): """ fake_tensor_quant uses the full integer range when narrow=False """ x_np = np.random.rand(1023).astype('float32') x_torch = torch.Tensor(x_np).cuda() amax = np.max(np.abs(x_np)) quant_x_np = test_utils.quant_np(x_np, amax, num_bits=9, fake=False, narrow_range=False) quant_x_torch, _ = tensor_quant.tensor_quant( x_torch, torch.max(torch.abs(x_torch)), 8, True, False) np.testing.assert_array_almost_equal(quant_x_torch.cpu().numpy(), quant_x_np)
def _quant_forward(self, inputs): """Quantized forward pass.""" if self._learn_amax: inputs = self.clip(inputs) amax = torch.max(-self.clip.clip_value_min, self.clip.clip_value_max).detach() else: amax = self._get_amax(inputs) if self._fake_quant: if not TensorQuantizer.use_fb_fake_quant: outputs = fake_tensor_quant(inputs, amax, self._num_bits, self._unsigned, self._narrow_range) else: outputs = self._fb_fake_quant(inputs, amax) else: outputs, self._scale = tensor_quant(inputs, amax, self._num_bits, self._unsigned) return outputs
def test_simple_run(self): """ quantizer passes gradcheck """ x = Parameter(torch.randn(2, 3, dtype=torch.float64).cuda()) * 100 tensor_quant.tensor_quant(x, torch.max(torch.abs(x)), 7)
def test_overflow_fp16(self): x_torch = torch.randn(1023).cuda().half() with pytest.raises(ValueError, match="scale is too large for FP16"): quant_x_torch, scale = tensor_quant.tensor_quant( x_torch, torch.tensor(1e-4).cuda().half(), 8, False)