def test_quantize_APoT_rand_k1(self): # generate random size of tensor2quantize between 1 -> 20 size = random.randint(1, 20) # generate tensor with random fp values between 0 -> 1000 tensor2quantize = 1000 * torch.rand(size, dtype=torch.float) apot_observer = APoTObserver(b=8, k=1) apot_observer(tensor2quantize) alpha, gamma, quantization_levels, level_indices = apot_observer.calculate_qparams( signed=False) # get apot quantized tensor result qtensor = quantize_APoT(tensor2quantize=tensor2quantize, alpha=alpha, gamma=gamma, quantization_levels=quantization_levels, level_indices=level_indices) # get uniform quantization quantized tensor result uniform_observer = MinMaxObserver() uniform_observer(tensor2quantize) scale, zero_point = uniform_observer.calculate_qparams() uniform_quantized = quantize_per_tensor(input=tensor2quantize, scale=scale, zero_point=zero_point, dtype=torch.quint8).int_repr() qtensor_data = qtensor.data.int() uniform_quantized_tensor = uniform_quantized.data.int() self.assertTrue(torch.equal(qtensor_data, uniform_quantized_tensor))
def test_calculate_qparams_3terms(self): obs = APoTObserver(max_val=1.0, b=6, k=2) obs_result = obs.calculate_qparams(signed=False) # calculate expected gamma value gamma_test = 0 for i in range(3): gamma_test += 2**(-i) gamma_test = 1 / gamma_test # check gamma value self.assertEqual(obs_result[0], gamma_test) # check quantization levels size quantlevels_size_test = int(len(obs_result[1])) quantlevels_size = 2**6 self.assertEqual(quantlevels_size_test, quantlevels_size) # check level indices size levelindices_size_test = int(len(obs_result[2])) self.assertEqual(levelindices_size_test, 64) # check level indices unique values level_indices_test_list = obs_result[2].tolist() self.assertEqual(len(level_indices_test_list), len(set(level_indices_test_list)))
def test_calculate_qparams_signed(self): obs = APoTObserver(max_val=1.0, b=4, k=2) obs_result = obs.calculate_qparams(signed=True) # calculate expected gamma value gamma_test = 0 for i in range(2): gamma_test += 2**(-i) gamma_test = 1 / gamma_test # check gamma value self.assertEqual(obs_result[0], gamma_test) # check quantization levels size quantlevels_size_test = int(len(obs_result[1])) self.assertEqual(quantlevels_size_test, 49) # check negatives of each element contained # in quantization levels quantlevels_test_list = obs_result[1].tolist() negatives_contained = True for ele in quantlevels_test_list: if not (-ele) in quantlevels_test_list: negatives_contained = False self.assertTrue(negatives_contained) # check level indices size levelindices_size_test = int(len(obs_result[2])) self.assertEqual(levelindices_size_test, 49) # check level indices unique elements level_indices_test_list = obs_result[2].tolist() self.assertEqual(len(level_indices_test_list), len(set(level_indices_test_list)))
def test_int_repr(self): # generate tensor with random fp values tensor2quantize = tensor2quantize = torch.tensor( [0, 0.0215, 0.1692, 0.385, 1, 0.0391]) observer = APoTObserver(b=4, k=2) observer.forward(tensor2quantize) qparams = observer.calculate_qparams(signed=False) # get apot quantized tensor result qtensor = quantize_APoT(tensor2quantize=tensor2quantize, alpha=qparams[0], gamma=qparams[1], quantization_levels=qparams[2], level_indices=qparams[3]) qtensor_data = qtensor.int_repr().int() # expected qtensor values calculated based on # corresponding level_indices to nearest quantization level # for each fp value in tensor2quantize # e.g. # 0.0215 in tensor2quantize nearest 0.0208 in quantization_levels -> 3 in level_indices expected_qtensor_data = torch.tensor([0, 3, 8, 13, 5, 12], dtype=torch.int32) self.assertTrue(torch.equal(qtensor_data, expected_qtensor_data))
def test_calculate_qparams_k1(self): obs = APoTObserver(b=6, k=1) obs.min_val = torch.tensor([0.0]) obs.max_val = torch.tensor([1.0]) alpha, gamma, quantization_levels, level_indices = obs.calculate_qparams( signed=False) # calculate expected gamma value gamma_test = 0 for i in range(6): gamma_test += 2**(-i) gamma_test = 1 / gamma_test # check gamma value self.assertEqual(gamma, gamma_test) # check quantization levels size quantlevels_size_test = int(len(quantization_levels)) quantlevels_size = 2**6 self.assertEqual(quantlevels_size_test, quantlevels_size) # check level indices size levelindices_size_test = int(len(level_indices)) level_indices_size = 2**6 self.assertEqual(levelindices_size_test, level_indices_size) # check level indices unique values level_indices_test_list = level_indices.tolist() self.assertEqual(len(level_indices_test_list), len(set(level_indices_test_list)))
class APoTFakeQuantize(FakeQuantizeBase): alpha: Tensor gamma: Tensor quantization_levels: Tensor level_indices: Tensor def __init__(self, **observer_kwargs): super().__init__() self.activation_post_process = APoTObserver(**observer_kwargs) def calculate_qparams(self, signed: bool): # type: ignore[override] return self.activation_post_process.calculate_qparams(signed=signed) def forward(self, X: torch.Tensor, signed: bool): # type: ignore[override] if self.observer_enabled[0] == 1: self.activation_post_process.forward(X) self.alpha, self.gamma, self.quantization_levels, self.level_indices = \ self.activation_post_process.calculate_qparams(signed) if self.fake_quant_enabled[0] == 1: assert (self.alpha is not None and self.gamma is not None and self.quantization_levels is not None and self.level_indices is not None), "Must set qparams for fake quant" X = fake_quantize_function.apply(X, self.alpha, self.gamma, self.quantization_levels, self.level_indices) return X
def test_dequantize_dim(self): # make observer observer = APoTObserver(4, 2) # generate random size of tensor2quantize between 1 -> 20 size1 = random.randint(1, 20) size2 = random.randint(1, 20) size3 = random.randint(1, 20) # make tensor2quantize: random fp values between 0 -> 1000 tensor2quantize = 1000 * torch.rand( size1, size2, size3, dtype=torch.float) observer.forward(tensor2quantize) alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams( signed=False) # make mock apot_tensor original_apot = quantize_APoT(tensor2quantize=tensor2quantize, alpha=alpha, gamma=gamma, quantization_levels=quantization_levels, level_indices=level_indices) # dequantize apot_tensor dequantize_result = dequantize_APoT(apot_tensor=original_apot) self.assertEqual(original_apot.data.size(), dequantize_result.size())
def test_quantize_APoT_k2(self): r""" given b = 4, k = 2, alpha = 1.0, we know: (from APoT paper example: https://arxiv.org/pdf/1909.13144.pdf) quantization_levels = tensor([0.0000, 0.0208, 0.0417, 0.0625, 0.0833, 0.1250, 0.1667, 0.1875, 0.2500, 0.3333, 0.3750, 0.5000, 0.6667, 0.6875, 0.7500, 1.0000]) level_indices = tensor([ 0, 3, 12, 15, 2, 14, 8, 11, 10, 1, 13, 9, 4, 7, 6, 5])) """ # generate tensor with random fp values tensor2quantize = torch.tensor([0, 0.0215, 0.1692, 0.385, 1, 0.0391]) observer = APoTObserver(b=4, k=2) observer.forward(tensor2quantize) alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams(signed=False) # get apot quantized tensor result qtensor = quantize_APoT(tensor2quantize=tensor2quantize, alpha=alpha, gamma=gamma, quantization_levels=quantization_levels, level_indices=level_indices) qtensor_data = qtensor.data.int() # expected qtensor values calculated based on # corresponding level_indices to nearest quantization level # for each fp value in tensor2quantize # e.g. # 0.0215 in tensor2quantize nearest 0.0208 in quantization_levels -> 3 in level_indices expected_qtensor = torch.tensor([0, 3, 8, 13, 5, 12], dtype=torch.int32) self.assertTrue(torch.equal(qtensor_data, expected_qtensor))
def test_backward(self): input = torch.randn(20, dtype=torch.double, requires_grad=True) observer = APoTObserver(b=4, k=2) observer(input) alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams(signed=False) test = gradcheck(fake_quantize_function.apply, (input, alpha, gamma, quantization_levels, level_indices), atol=1e-4)
def test_calculate_qparams_invalid(self): obs = APoTObserver(b=0, k=0) obs.min_val = torch.tensor([0.0]) obs.max_val = torch.tensor([0.0]) with self.assertRaises(AssertionError): alpha, gamma, quantization_levels, level_indices = obs.calculate_qparams( signed=False)
def __init__(self, b, k, max_val, signed, dtype=torch.quint8) -> None: self.signed = signed # check for valid inputs of b, k assert (k and k != 0) assert (b % k == 0) self.b = b self.k = k self.n = b // k # make observer, get quantizion levels and level indices obs = APoTObserver(max_val=max_val, b=b, k=k) obs_result = obs.calculate_qparams(signed=signed) self.quantization_levels = obs_result[1] self.level_indices = obs_result[2]
def test_forward(self): obs = APoTObserver(b=4, k=2) X = torch.tensor([0.0, -100.23, -37.18, 3.42, 8.93, 9.21, 87.92]) X = obs.forward(X) alpha, gamma, quantization_levels, level_indices = obs.calculate_qparams( signed=True) min_val = torch.min(X) max_val = torch.max(X) expected_alpha = torch.max(-min_val, max_val) self.assertEqual(alpha, expected_alpha)
def test_fake_calc_qparams(self): apot_fake = APoTFakeQuantize(b=4, k=2) apot_fake.activation_post_process.min_val = torch.tensor([0.0]) apot_fake.activation_post_process.max_val = torch.tensor([1.0]) alpha, gamma, quantization_levels, level_indices = apot_fake.calculate_qparams(signed=False) observer = APoTObserver(b=4, k=2) observer.min_val = torch.tensor([0.0]) observer.max_val = torch.tensor([1.0]) qparams_expected = observer.calculate_qparams(signed=False) self.assertEqual(alpha, qparams_expected[0]) self.assertTrue(torch.equal(gamma, qparams_expected[1])) self.assertTrue(torch.equal(quantization_levels, qparams_expected[2])) self.assertTrue(torch.equal(level_indices, qparams_expected[3]))
def test_calculate_qparams_signed(self): obs = APoTObserver(b=4, k=2) obs.min_val = torch.tensor([0.0]) obs.max_val = torch.tensor([1.0]) alpha, gamma, quantization_levels, level_indices = obs.calculate_qparams( signed=True) alpha_test = torch.max(-obs.min_val, obs.max_val) # check alpha value self.assertEqual(alpha, alpha_test) # calculate expected gamma value gamma_test = 0 for i in range(2): gamma_test += 2**(-i) gamma_test = 1 / gamma_test # check gamma value self.assertEqual(gamma, gamma_test) # check quantization levels size quantlevels_size_test = int(len(quantization_levels)) self.assertEqual(quantlevels_size_test, 49) # check negatives of each element contained # in quantization levels quantlevels_test_list = quantization_levels.tolist() negatives_contained = True for ele in quantlevels_test_list: if not (-ele) in quantlevels_test_list: negatives_contained = False self.assertTrue(negatives_contained) # check level indices size levelindices_size_test = int(len(level_indices)) self.assertEqual(levelindices_size_test, 49) # check level indices unique elements level_indices_test_list = level_indices.tolist() self.assertEqual(len(level_indices_test_list), len(set(level_indices_test_list)))
def test_forward(self): # generate a tensor of size 20 with random values # between 0 -> 1000 to quantize -> dequantize X = 1000 * torch.rand(20) observer = APoTObserver(b=4, k=2) observer.forward(X) alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams(signed=False) apot_fake = APoTFakeQuantize(b=4, k=2) apot_fake.enable_observer() apot_fake.enable_fake_quant() X_reduced_precision_fp = apot_fake.forward(torch.clone(X), False) # get X_expected by converting fp -> apot -> fp to simulate quantize -> dequantize X_to_apot = quantize_APoT(X, alpha, gamma, quantization_levels, level_indices) X_expected = dequantize_APoT(X_to_apot) self.assertTrue(torch.equal(X_reduced_precision_fp, X_expected))
def __init__(self, weight2quantize: torch.Tensor, b: int, k: int): assert weight2quantize.dim() == 2 assert b % k == 0 super().__init__() self.b = b self.k = k self.n = self.b // self.k observer = APoTObserver(b=self.b, k=self.k) observer(weight2quantize) self.alpha, self.gamma, self.quantization_levels, self.level_indices = observer.calculate_qparams( signed=False) quantized_weight = quantize_APoT(weight2quantize, self.alpha, self.gamma, self.quantization_levels, self.level_indices) self.weight = quantized_weight.data self.weight_transposed = torch.transpose(self.weight, 0, 1)
def test_dequantize_quantize_rand_b6(self): # make observer observer = APoTObserver(12, 4) # generate random size of tensor2quantize between 1 -> 20 size = random.randint(1, 20) # make tensor2quantize: random fp values between 0 -> 1000 tensor2quantize = 1000 * torch.rand(size, dtype=torch.float) observer.forward(tensor2quantize) alpha, gamma, quantization_levels, level_indices = observer.calculate_qparams( signed=False) # make mock apot_tensor original_apot = quantize_APoT(tensor2quantize=tensor2quantize, alpha=alpha, gamma=gamma, quantization_levels=quantization_levels, level_indices=level_indices) original_input = torch.clone(original_apot.data).int() # dequantize apot_tensor dequantize_result = dequantize_APoT(apot_tensor=original_apot) # quantize apot_tensor final_apot = quantize_APoT(tensor2quantize=dequantize_result, alpha=alpha, gamma=gamma, quantization_levels=quantization_levels, level_indices=level_indices) result = final_apot.data.int() self.assertTrue(torch.equal(original_input, result))
def test_calculate_qparams(self): t = torch.Tensor() obs = APoTObserver(t, t, t, 0, 0) with self.assertRaises(NotImplementedError): obs.calculate_qparams()
def test_calculate_qparams_invalid(self): obs = APoTObserver(max_val=0.0, b=0, k=0) with self.assertRaises(AssertionError): obs_result = obs.calculate_qparams(signed=False)
def __init__(self, **observer_kwargs): super().__init__() self.activation_post_process = APoTObserver(**observer_kwargs)