def test_qtensor_permute(self): r = torch.rand(10, 30, 2, 2, dtype=torch.float) * 4 - 2 scale = 0.02 zero_point = 1 qr = torch.quantize_linear(r, scale, zero_point, torch.qint8) qr = qr.transpose(0, 1) rqr = qr.dequantize() # compare transpose + dequantized result with orignal transposed result self.assertTrue(np.allclose(r.numpy().transpose([1, 0, 2, 3]), rqr.numpy(), atol=2 / scale)) qr = torch.quantize_linear(r, scale, zero_point, torch.qint8) qr1 = qr.permute([1, 0, 2, 3]) qr2 = qr.transpose(0, 1) # compare int representation after transformations self.assertEqual(qr1.int_repr(), qr2.int_repr()) self.assertEqual(qr1.q_scale(), qr2.q_scale()) self.assertEqual(qr1.q_zero_point(), qr2.q_zero_point()) # compare dequantized result self.assertEqual(qr1.dequantize(), qr2.dequantize()) # compare permuted + dequantized result with original transposed result self.assertTrue(np.allclose(qr2.dequantize().numpy(), r.numpy().transpose([1, 0, 2, 3]), atol=2 / scale)) # make permuted result contiguous self.assertEqual(qr2.contiguous().int_repr(), qr2.int_repr()) # change memory format qlast = qr.contiguous(memory_format=torch.channels_last) self.assertEqual(qr.stride(), list(reversed(sorted(qr.stride())))) self.assertNotEqual(qlast.stride(), list(reversed(sorted(qlast.stride())))) self.assertEqual(qr.int_repr(), qlast.int_repr()) self.assertEqual(qr.q_scale(), qlast.q_scale()) self.assertEqual(qr.q_zero_point(), qlast.q_zero_point()) self.assertEqual(qlast.dequantize(), qr.dequantize())
def from_float(mod): r"""Create a quantized module from a float module or qparams_dict Args: mod (Module): a float module, either produced by torch.quantization utilities or provided by the user """ if hasattr(mod, 'weight_fake_quant'): # assert type(mod) == QATLinear, 'training mode nnq.Linear.from_float only works for nn.qat.Linear' weight_observer = mod.weight_fake_quant else: assert type(mod) == NNLinear, 'nnq.Linear.from_float only works for nn.Linear' assert hasattr(mod, 'qconfig'), 'Input float module must have qconfig defined' assert hasattr(mod, 'observer'), 'Input float module must have observer attached' weight_observer = mod.qconfig.weight() weight_observer(mod.weight) activation_observer = mod.observer act_scale, act_zp = activation_observer.calculate_qparams() wt_scale, wt_zp = weight_observer.calculate_qparams() bias_scale = (wt_scale * act_scale).float() qweight = torch.quantize_linear(mod.weight.float(), wt_scale, wt_zp.long().item(), torch.qint8) if mod.bias is not None: qbias = torch.quantize_linear(mod.bias.float(), bias_scale, 0, torch.qint32) else: qbias = None qlinear = Linear(mod.in_features, mod.out_features) qlinear.set_weight(qweight) qlinear.bias = qbias qlinear.scale = float(act_scale) qlinear.zero_point = int(act_zp) return qlinear
def from_float(mod): r"""Create a quantized module from a float module or qparams_dict Args: `mod` a float module, either produced by torch.quantization utilities or directly from user """ assert type( mod) == NNLinear, 'nnq.Linear.from_float only works for nn.Linear' assert hasattr( mod, 'qconfig'), 'Input float module must have qconfig defined' assert hasattr( mod, 'observer'), 'Input float module must have observer attached' activation_observer = mod.observer act_qparams = activation_observer.calculate_qparams() weight_observer = mod.qconfig.weight() weight_observer(mod.weight) wt_qparams = weight_observer.calculate_qparams() bias_scale = (wt_qparams[0] * act_qparams[0]).float() qweight = torch.quantize_linear(mod.weight.float(), wt_qparams[0], wt_qparams[1].long().item(), torch.qint8) qbias = torch.quantize_linear(mod.bias.float(), bias_scale, 0, torch.qint32) qlinear = Linear(mod.in_features, mod.out_features) qlinear._packed_weight = torch.ops.quantized.fbgemm_linear_prepack( qweight) qlinear.bias = qbias qlinear.out_scale = torch.tensor([act_qparams[0]]) qlinear.out_zero_point = torch.tensor([act_qparams[1]]) return qlinear
def test_qadd_relu_same_qparams(self): add_relu = torch.ops.quantized.add_relu add = torch.ops.quantized.add A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale = 2.0 zero_point = 127 qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point, dtype=torch.quint8) # Add ReLU ground truth C = (qA.dequantize() + qB.dequantize()).numpy() qC = _quantize(C, scale, zero_point) qC_hat = add(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized addition failed.") # Add + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale, zero_point) qCrelu_hat = add_relu(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized addition with ReLU failed.")
def test_qadd_scalar_relu(self, A, b): import copy add_scalar = torch.ops.quantized.add_scalar add_scalar_relu = torch.ops.quantized.add_scalar_relu A, (scale, zero_point, dtype) = A A = A.astype(np.float32) qA = torch.quantize_linear(torch.from_numpy(A), scale, zero_point, dtype) C = qA.dequantize() + b C_relu = copy.deepcopy(C) C_relu[C_relu < 0] = 0 C_ref = torch.quantize_linear(C, scale, zero_point, dtype) C_relu_ref = torch.quantize_linear(C_relu, scale, zero_point, dtype) C_hat = add_scalar(qA, b, scale=scale, zero_point=zero_point) C_relu_hat = add_scalar_relu(qA, b, scale=scale, zero_point=zero_point) self.assertEqual(C_ref, C_hat, message="Scalar add results don't match:\ {} vs {}".format(C_ref, C_hat)) self.assertEqual(C_relu_ref, C_relu_hat, message="Scalar add relu results don't match:\ {} vs {}".format(C_relu_ref, C_relu_hat))
def init(self, N, IC, OC, H, W, G, kernel, stride, pad): scale = 1.0 / 255 zero_point = 0 X = torch.randn(N, IC, H, W, dtype=torch.float32) qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch.quint8) W = torch.randn(OC, IC // G, kernel, kernel, dtype=torch.float32) qW = torch.quantize_linear(W, scale=scale, zero_point=0, dtype=torch.qint8) self.input = qX self.qconv2d = nnq.Conv2d(IC, OC, kernel, stride=stride, padding=pad, groups=G) self.qconv2d.weight = qW self.qconv2d.scale = torch.tensor([scale], dtype=torch.double) self.qconv2d.zero_point = torch.tensor([zero_point], dtype=torch.int) self.set_module_name("QConv2d")
def test_qrelu(self, qparams): X = np.array([[-3, -2, 1, 2], [0, 0, 0, 0], [-5, -4, -3, -2], [1, 2, 3, 4]], dtype=np.float32) scale, zero_point, torch_type = qparams Y = X.copy() Y[Y < 0] = 0 qY = torch.quantize_linear(torch.from_numpy(Y), scale=scale, zero_point=zero_point, dtype=torch_type) X = torch.from_numpy(X) qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) ops_under_test = { 'native': torch.relu, 'nn.functional': torch.nn.functional.relu, } for name, op in ops_under_test.items(): qY_hat = op(qX) self.assertEqual(qY, qY_hat, message="{} relu failed".format(name)) ops_under_test_inplace = { 'inplace native': torch.relu_, 'inplace nn.functional': torch.nn.functional.relu_, } for name, op_ in ops_under_test_inplace.items(): qY_hat = qX.clone() op_(qY_hat) self.assertEqual(qY, qY_hat, message="{} relu failed".format(name))
def test_qtensor_permute(self): r = torch.rand(100, 30, dtype=torch.float) * 2 - 4 scale = 2 zero_point = 2 qr = torch.quantize_linear(r, scale, zero_point, torch.qint8) qr = qr.transpose(0, 1) rqr = qr.dequantize() # compare transpose + dequantized result with orignal transposed result self.assertTrue(np.allclose(r.numpy().T, rqr.numpy(), atol=2 / scale)) qr = torch.quantize_linear(r, scale, zero_point, torch.qint8) qr1 = qr.permute([1, 0]) qr2 = qr.transpose(0, 1) # compare int representation after transformations self.assertTrue(torch.equal(qr1.int_repr(), qr2.int_repr())) self.assertTrue(qr1.q_scale() == qr2.q_scale()) self.assertTrue(qr1.q_zero_point() == qr2.q_zero_point()) # compare dequantized result self.assertTrue( np.array_equal(qr1.dequantize().numpy(), qr2.dequantize().numpy())) # compare permuted + dequantized result with original transposed result self.assertTrue( np.allclose(qr2.dequantize().numpy(), r.numpy().T, atol=2 / scale)) # make permuted result contiguous self.assertTrue( torch.equal(qr2.contiguous().int_repr(), qr2.int_repr()))
def from_float(mod): r"""Create a quantized module from a float module or qparams_dict Args: `mod` a float module, either produced by torch.quantization utilities or directly from user """ if hasattr(mod, 'weight_fake_quant'): # assert type(mod) == QATLinear, 'training mode nnq.Linear.from_float only works for nn.qat.Linear' weight_observer = mod.weight_fake_quant else: assert type( mod ) == NNLinear, 'nnq.Linear.from_float only works for nn.Linear' assert hasattr( mod, 'qconfig'), 'Input float module must have qconfig defined' assert hasattr( mod, 'observer'), 'Input float module must have observer attached' weight_observer = mod.qconfig.weight() weight_observer(mod.weight) activation_observer = mod.observer act_scale, act_zp = activation_observer.calculate_qparams() wt_scale, wt_zp = weight_observer.calculate_qparams() bias_scale = (wt_scale * act_scale).float() qweight = torch.quantize_linear(mod.weight.float(), wt_scale, wt_zp.long().item(), torch.qint8) qbias = torch.quantize_linear(mod.bias.float(), bias_scale, 0, torch.qint32) qlinear = Linear(mod.in_features, mod.out_features) qlinear._packed_weight = torch.ops.quantized.fbgemm_linear_prepack( qweight) qlinear.bias = qbias qlinear.scale = torch.tensor([act_scale], dtype=torch.double) qlinear.zero_point = torch.tensor([act_zp], dtype=torch.long) return qlinear
def test_linear_api(self): """test API functionality for nn.quantized.linear""" in_features = 10 out_features = 20 batch_size = 5 W = torch.rand(out_features, in_features).float() W_q = torch.quantize_linear(W, 0.1, 4, torch.qint8) W_pack = torch.ops.quantized.fbgemm_linear_prepack(W_q) X = torch.rand(batch_size, in_features).float() X_q = torch.quantize_linear(X, 0.2, 10, torch.quint8) B = torch.rand(out_features).float() B_q = torch.quantize_linear(B, W_q.q_scale() * X_q.q_scale(), 0, torch.qint32) out_scale = 0.5 out_zero_point = 3 qlinear = nnq.Linear(in_features, out_features) qlinear._packed_weight = W_pack qlinear.bias = B_q qlinear.out_scale = torch.tensor([out_scale]) qlinear.out_zero_point = torch.tensor([out_zero_point]) Z_q = qlinear(X_q) # Check if the module implementation matches calling the # ops directly Z_ref = torch.ops.quantized.fbgemm_linear(X_q, W_pack, B_q, out_scale, out_zero_point) self.assertEqual(Z_ref, Z_q) # Test serialization of quantized Linear Module using state_dict model_dict = qlinear.state_dict() self.assertEqual(model_dict['weight'], W_q) self.assertEqual(model_dict['bias'], B_q) with tempfile.NamedTemporaryFile() as f: torch.save(model_dict, f) f.seek(0) loaded_dict = torch.load(f) for key in model_dict: self.assertEqual(model_dict[key], loaded_dict[key]) loaded_qlinear = nnq.Linear(in_features, out_features) loaded_qlinear.load_state_dict(loaded_dict) linear_unpack = torch.ops.quantized.fbgemm_linear_unpack self.assertEqual(linear_unpack(qlinear._packed_weight), linear_unpack(loaded_qlinear._packed_weight)) self.assertEqual(qlinear.bias, loaded_qlinear.bias) self.assertEqual(qlinear.out_scale, loaded_qlinear.out_scale) self.assertEqual(qlinear.out_zero_point, loaded_qlinear.out_zero_point) self.assertTrue(dir(qlinear) == dir(loaded_qlinear)) self.assertTrue(hasattr(qlinear, '_packed_weight')) self.assertTrue(hasattr(loaded_qlinear, '_packed_weight')) self.assertTrue(hasattr(qlinear, 'weight')) self.assertTrue(hasattr(loaded_qlinear, 'weight')) self.assertEqual(qlinear.weight, loaded_qlinear.weight) self.assertEqual( qlinear.weight, torch.ops.quantized.fbgemm_linear_unpack(qlinear._packed_weight)) Z_q2 = qlinear(X_q) self.assertEqual(Z_q, Z_q2)
def test_cat(self, X, num, dim, relu): tensors_q = [] tensors_ref = [] X, (scale, zero_point, torch_type) = X assume(dim < X.ndim) X = torch.from_numpy(X) new_shape = np.array(X.shape) new_shape[dim] = 0 for idx in range(num): tensors_q.append( torch.quantize_linear(X, scale, zero_point, torch_type)) tensors_ref.append(X) new_shape[dim] += tensors_ref[-1].shape[dim] cat_ref = torch.cat(tensors_ref, dim=dim) cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type) cat_ref = cat_ref.dequantize() if relu: cat_ref = F.relu(cat_ref) q_cat_op = torch.ops.quantized.cat_relu q_cat_out_op = torch.ops.quantized.cat_relu_out else: q_cat_op = torch.ops.quantized.cat q_cat_out_op = torch.ops.quantized.cat_out cat_q = q_cat_op(tensors_q, dim=dim, scale=scale, zero_point=zero_point) cat_q = cat_q.dequantize() np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy()) cat_q_out = torch._empty_affine_quantized(list(new_shape), scale=scale, zero_point=zero_point, dtype=torch_type) q_cat_out_op(tensors_q, dim=dim, out=cat_q_out) cat_q_out = cat_q_out.dequantize() np.testing.assert_equal(cat_ref.numpy(), cat_q_out.numpy()) # Test the cat on per-channel quantized tensor. ch_axis = 1 scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis])) scales = scales.to(torch.float64) zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis])) zero_points = zero_points.to(torch.long) tensors_q[0] = torch.quantize_linear_per_channel(X, scales, zero_points, axis=[ch_axis], dtype=torch_type) with self.assertRaisesRegex(RuntimeError, "supported.*cat"): cat_q = q_cat_op(tensors_q, dim=ch_axis, scale=scale, zero_point=zero_point)
def test_conv_api(self): """Tests the correctness of the conv module. The correctness is defined against the functional implementation. """ N, iC, H, W = 10, 10, 10, 3 oC, g, kH, kW = 16, 1, 3, 3 scale, zero_point = 1.0 / 255, 128 X = torch.randn(N, iC, H, W, dtype=torch.float32) X = X.permute([0, 2, 3, 1]).contiguous() qX = torch.quantize_linear(X, scale=scale, zero_point=128, dtype=torch.quint8) w = torch.randn(oC, iC // g, kH, kW, dtype=torch.float32) w = w.permute([0, 2, 3, 1]).contiguous() qw = torch.quantize_linear(w, scale=scale, zero_point=0, dtype=torch.qint8) b = torch.randn(oC, dtype=torch.float32) qb = torch.quantize_linear(b, scale=1.0 / 1024, zero_point=0, dtype=torch.qint32) conv_under_test = Conv2d(in_channels=iC, out_channels=oC, kernel_size=(kH, kW), stride=1, padding=0, dilation=1, groups=g, bias=True, padding_mode='zeros') conv_under_test.weight = qw conv_under_test.bias = qb conv_under_test.scale = scale conv_under_test.zero_point = zero_point # Test members self.assertTrue(hasattr(conv_under_test, '_packed_weight')) self.assertTrue(hasattr(conv_under_test, '_scale')) self.assertTrue(hasattr(conv_under_test, '_zero_point')) # Test properties # self.assertEqual(qw, conv_under_test.weight) self.assertEqual(qb, conv_under_test.bias) self.assertEqual(scale, conv_under_test.scale) self.assertEqual(zero_point, conv_under_test.zero_point) # Test forward result_under_test = conv_under_test(qX) result_reference = qF.conv2d(qX, qw, bias=qb, scale=scale, zero_point=zero_point, stride=1, padding=0, dilation=1, groups=g, prepacked=False, dtype=torch.quint8) self.assertEqual(result_reference, result_under_test, message="Tensors are not equal.")
def test_equal(self, X, X2, X_per_channel, X2_per_channel): X, X_params = X (scale, zero_point, torch_type) = X_params X2, X2_params = X2 (scale2, zero_point2, torch_type2) = X2_params X = torch.from_numpy(X) if X_per_channel: X_scheme = 'per_channel' channels = X.shape[-1] qX = torch.quantize_linear_per_channel( X, scales=torch.tensor([scale] * channels), zero_points=torch.tensor([zero_point] * channels), dtype=torch_type, axis=[X.ndim - 1]) else: X_scheme = 'per_tensor' qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) X2 = torch.from_numpy(X2) if X2_per_channel: X2_scheme = 'per_channel' channels = X2.shape[-1] qX2 = torch.quantize_linear_per_channel( X2, scales=torch.tensor([scale2] * channels), zero_points=torch.tensor([zero_point2] * channels), dtype=torch_type2, axis=[X2.ndim - 1]) else: X2_scheme = 'per_tensor' qX2 = torch.quantize_linear(X2, scale=scale2, zero_point=zero_point2, dtype=torch_type2) def equal_ref(X, params, X_scheme, X2, params2, X2_scheme): if X_scheme != X2_scheme: return False if params != params2: return False if X.shape != X2.shape: return False if (X != X2).any(): return False return True self.assertEqual( qX.equal(qX), equal_ref(X, X_params, X_scheme, X, X_params, X_scheme)) self.assertEqual( qX.equal(qX2), equal_ref(X, X_params, X_scheme, X2, X2_params, X2_scheme))
def from_float(cls, mod): r"""Creates a quantized module from a float module or qparams_dict. Args: mod (Module): a float module, either produced by torch.quantization utilities or provided by the user """ if hasattr(mod, 'weight_fake_quant'): # assert type(mod) == cls.__QAT_MODULE, ' nnq.' + cls.__name__ + '.from_float only works for ' + \ # cls.__QAT_MODULE.__name__ if type(mod) == nniqat.ConvBn2d: mod.weight, mod.bias = \ fuse_conv_bn_weights(mod.weight, mod.bias, mod.running_mean, mod.running_var, mod.eps, mod.gamma, mod.beta) assert hasattr( mod, 'observer'), 'Input QAT module must have observer attached' weight_observer = mod.weight_fake_quant activation_observer = mod.observer else: assert type(mod) == cls._FLOAT_MODULE, ' nnq.' + cls.__name__ + '.from_float only works for ' + \ cls._FLOAT_MODULE.__name__ assert hasattr( mod, 'qconfig'), 'Input float module must have qconfig defined' # workaround for sequential, ConvReLU2d should probably # inherit from Conv2d instead if type(mod) == nni.ConvReLU2d: activation_observer = mod[1].observer mod = mod[0] else: activation_observer = mod.observer weight_observer = mod.qconfig.weight() weight_observer(mod.weight) act_scale, act_zp = activation_observer.calculate_qparams() assert weight_observer.dtype == torch.qint8, 'Weight observer must have a dtype of qint8' wt_scale, wt_zp = weight_observer.calculate_qparams() # Scale bias to activation_scale/2^16, this quantizes bias # to about 24 bits of precision bias_scale = float(act_scale / (2**16)) qweight = torch.quantize_linear(mod.weight.float(), float(wt_scale), int(wt_zp), torch.qint8) qconv = cls(mod.in_channels, mod.out_channels, mod.kernel_size, mod.stride, mod.padding, mod.dilation, mod.groups, mod.bias is not None, mod.padding_mode) qconv.set_weight(qweight) if mod.bias is not None: qbias = torch.quantize_linear(mod.bias.float(), bias_scale, 0, torch.qint32) else: qbias = None qconv.bias = qbias qconv.scale = float(act_scale) qconv.zero_point = int(act_zp) return qconv
def test_conv_api(self, use_bias): """Tests the correctness of the conv module. The correctness is defined against the functional implementation. """ N, iC, H, W = 10, 10, 10, 3 oC, g, kH, kW = 16, 1, 3, 3 scale, zero_point = 1.0 / 255, 128 stride = (1, 1) i_padding = (0, 0) dilation = (1, 1) X = torch.randn(N, iC, H, W, dtype=torch.float32) X = X.permute([0, 2, 3, 1]).contiguous() qX = torch.quantize_linear(X, scale=scale, zero_point=128, dtype=torch.quint8) w = torch.randn(oC, iC // g, kH, kW, dtype=torch.float32) qw = torch.quantize_linear(w, scale=scale, zero_point=0, dtype=torch.qint8) b = torch.randn(oC, dtype=torch.float32) if use_bias else None q_bias = torch.quantize_linear( b, scale=1.0 / 1024, zero_point=0, dtype=torch.qint32) if use_bias else None q_filters_ref = torch.ops.quantized.fbgemm_conv_prepack( qw.permute([0, 2, 3, 1]), stride, i_padding, dilation, g) requantized_bias = torch.quantize_linear( q_bias.dequantize(), scale * scale, 0, torch.qint32) if use_bias else None ref_result = torch.ops.quantized.fbgemm_conv2d( qX.permute([0, 2, 3, 1]), q_filters_ref, requantized_bias, stride, i_padding, dilation, g, scale, zero_point).permute([0, 3, 1, 2]) q_result = torch.nn.quantized.functional.conv2d(qX, qw, bias=q_bias, scale=scale, zero_point=zero_point, stride=stride, padding=i_padding, dilation=dilation, groups=g, dtype=torch.quint8) self.assertEqual(ref_result, q_result)
def test_qadd_relu_different_qparams(self): add_relu = torch.ops.quantized.add_relu add = torch.ops.quantized.add add_out = torch.ops.quantized.add_out add_relu_out = torch.ops.quantized.add_relu_out A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale_A = 3.0 zero_point_A = 7 scale_B = 5.0 zero_point_B = 127 scale_C = 0.5 zero_point_C = 5 qA = torch.quantize_linear(A, scale=scale_A, zero_point=zero_point_A, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale_B, zero_point=zero_point_B, dtype=torch.quint8) # Add ground truth C = (qA.dequantize() + qB.dequantize()).numpy() qC = _quantize(C, scale_C, zero_point_C) qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized addition failed.") qC_out_hat = torch._empty_affine_quantized(qC.shape, scale=scale_C, zero_point=zero_point_C, dtype=torch.quint8) add_out(qA, qB, out=qC_out_hat) self.assertEqual(qC_hat, qC_out_hat, message="Add.out failed") # Add + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale_C, zero_point_C) qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized addition with ReLU failed.") qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape, scale=scale_C, zero_point=zero_point_C, dtype=torch.quint8) add_relu_out(qA, qB, out=qCrelu_out_hat) self.assertEqual(qCrelu_hat, qCrelu_out_hat, message="AddReLU.out failed")
def test_qmul_relu_same_qparams(self): mul_relu = torch.ops.quantized.mul_relu mul = torch.ops.quantized.mul mul_out = torch.ops.quantized.mul_out mul_relu_out = torch.ops.quantized.mul_relu_out A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale = 2.0 zero_point = 127 qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point, dtype=torch.quint8) # mul ReLU ground truth C = (qA.dequantize() * qB.dequantize()).numpy() qC = _quantize(C, scale, zero_point) qC_hat = mul(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized mulition failed.") qC_out_hat = torch._empty_affine_quantized(qC.shape, scale=scale, zero_point=zero_point, dtype=torch.quint8) mul_out(qA, qB, out=qC_out_hat) self.assertEqual(qC_hat, qC_out_hat, message="mul.out failed") # mul + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale, zero_point) qCrelu_hat = mul_relu(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized mulition with ReLU failed.") qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape, scale=scale, zero_point=zero_point, dtype=torch.quint8) mul_relu_out(qA, qB, out=qCrelu_out_hat) self.assertEqual(qCrelu_hat, qCrelu_out_hat, message="mulReLU.out failed") # Scalar addition mul = torch.ops.quantized.mul_scalar for b in B: C_ref = qA.dequantize().numpy() * b.item() qC = _quantize(C_ref, scale, zero_point) dqC = _dequantize(qC, scale, zero_point) qC_hat = mul(qA, b.item(), scale, zero_point) dqC_hat = qC_hat.dequantize() self.assertEqual(dqC, dqC_hat)
def test_qnnpack_relu(self, X): X, (scale, zero_point, torch_type) = X relu = torch.ops.quantized.qnnpack_relu X = torch.from_numpy(X) Y = X.clone() qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) qY_hat = relu(qX) Y[Y < 0] = 0 qY = torch.quantize_linear(Y, scale=scale, zero_point=zero_point, dtype=torch_type) self.assertEqual(qY, qY_hat)
def test_qtensor_dtypes(self): r = torch.rand(3, 2, dtype=torch.float) * 2 - 4 scale = 2 zero_point = 2 qr = torch.quantize_linear(r, scale, zero_point, torch.qint8) rqr = qr.dequantize() self.assertTrue(np.allclose(r.numpy(), rqr.numpy(), atol=2 / scale)) qr = torch.quantize_linear(r, scale, zero_point, torch.quint8) rqr = qr.dequantize() self.assertTrue(np.allclose(r.numpy(), rqr.numpy(), atol=2 / scale)) qr = torch.quantize_linear(r, scale, zero_point, torch.qint32) rqr = qr.dequantize() self.assertTrue(np.allclose(r.numpy(), rqr.numpy(), atol=2 / scale))
def test_max_pool2d(self, X, kernel, stride, dilation, padding): X, (scale, zero_point, torch_type) = X # Check constraints assume(kernel // 2 >= padding) # Kernel cannot be overhanging! iH, iW = X.shape[-2:] oH = self._pool_output_shape(iH, kernel, padding, stride, dilation) assume(oH > 0) oW = self._pool_output_shape(iW, kernel, padding, stride, dilation) assume(oW > 0) a = torch.from_numpy(X) a_pool = torch.nn.functional.max_pool2d(a, kernel_size=kernel, stride=stride, padding=padding, dilation=dilation) a_ref = torch.quantize_linear(a_pool, scale=scale, zero_point=zero_point, dtype=torch_type) a_ref = a_ref.dequantize() qa = torch.quantize_linear(a, scale=scale, zero_point=zero_point, dtype=torch_type) ops_under_test = { "torch": torch.max_pool2d, "nn.functional": torch.nn.functional.max_pool2d, "nn.quantized.functional": torch.nn.quantized.functional.max_pool2d } for name, op in ops_under_test.items(): a_hat = op(qa, kernel_size=kernel, stride=stride, padding=padding, dilation=dilation) self.assertEqual(a_ref, a_hat.dequantize(), message="{} results are off".format(name)) # Test the ops.quantized separately, because None is not treated. a_hat = torch.ops.quantized.max_pool2d( qa, kernel_size=_pair(kernel), stride=_pair(kernel if stride is None else stride), padding=_pair(padding), dilation=_pair(dilation)) self.assertEqual(a_ref, a_hat.dequantize(), message="ops.quantized.max_pool2d results are off")
def test_qtensor(self): num_elements = 10 r = torch.ones(num_elements, dtype=torch.float) scale = 1.0 zero_point = 2 qr = torch.quantize_linear(r, scale, zero_point, torch.quint8) self.assertEqual(qr.q_scale(), scale) self.assertEqual(qr.q_zero_point(), zero_point) self.assertTrue(qr.is_quantized) self.assertFalse(r.is_quantized) self.assertEqual(qr.qscheme(), torch.per_tensor_affine) self.assertTrue(isinstance(qr.qscheme(), torch.qscheme)) # slicing and int_repr int_repr = qr.int_repr() for num in int_repr: self.assertEqual(num, 3) for num in qr[2:].int_repr(): self.assertEqual(num, 3) # dequantize rqr = qr.dequantize() for i in range(num_elements): self.assertEqual(r[i], rqr[i]) # Scalar Tensor # item r = torch.ones(1, dtype=torch.float) qr = torch.quantize_linear(r, scale, zero_point, torch.quint8) self.assertEqual(qr.item(), 1) self.assertEqual(qr[0].item(), 1) # assignment self.assertTrue(qr[0].is_quantized) qr[0] = 11.3 # float asignment self.assertEqual(qr.item(), 11) x = torch.ones(1, dtype=torch.float) * 15.3 # Copying from a float Tensor qr[:] = x self.assertEqual(qr.item(), 15) # we can also print a qtensor self.assertEqual( ' '.join(str(qr).split()), "tensor([15.], size=(1,), dtype=torch.quint8, " + "quantization_scheme=torch.per_tensor_affine, " + "scale=1.0, zero_point=2)") empty_r = torch.ones((0, 1), dtype=torch.float) empty_qr = torch.quantize_linear(empty_r, scale, zero_point, torch.quint8) self.assertEqual( ' '.join(str(empty_qr).split()), "tensor([], size=(0, 1), dtype=torch.quint8, " + "quantization_scheme=torch.per_tensor_affine, " + "scale=1.0, zero_point=2)")
def test_qrelu(self, Q): X, (scale, zero_point), (qmin, qmax), (torch_type, np_type) = Q relu = torch.ops.quantized.relu Y = X.copy() X = torch.from_numpy(X) qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) qY_hat = relu(qX) Y[Y < 0] = 0 qY = torch.quantize_linear(torch.from_numpy(Y), scale=scale, zero_point=zero_point, dtype=torch_type) self.assertEqual(qY.int_repr(), qY_hat.int_repr())
def test_max_pool2d(self, Q, kernel, stride, dilation, padding): import torch.nn.functional as F X, (scale, zero_point), (qmin, qmax), (torch_type, np_type) = Q # Check constraints assume(kernel // 2 >= padding) # Kernel cannot be overhanging! iH, iW = X.shape[-2:] oH = self._pool_output_shape(iH, kernel, padding, stride, dilation) assume(oH > 0) oW = self._pool_output_shape(iW, kernel, padding, stride, dilation) assume(oW > 0) k = (kernel, kernel) s = (stride, stride) d = (dilation, dilation) p = (padding, padding) q_max_pool = torch.ops.quantized.max_pool2d a = torch.from_numpy(X) qa = torch.quantize_linear(a, scale=scale, zero_point=zero_point, dtype=torch_type) a_hat = qa.dequantize() a_pool = F.max_pool2d(a_hat, kernel_size=k, stride=s, padding=p, dilation=d) qa_pool_hat = q_max_pool(qa, kernel_size=k, stride=s, padding=p, dilation=d) a_pool_hat = qa_pool_hat.dequantize() np.testing.assert_equal(a_pool.numpy(), a_pool_hat.numpy())
def test_qconv_unpack(self, X, strideH, strideW, padH, padW): (inputs, filters, bias, groups) = X inputs, (inputs_scale, inputs_zero_point, inputs_qtype) = inputs filters, (filters_scale, filters_zero_point, filters_qtype) = filters bias, (bias_scale, bias_zero_point, bias_qtype) = bias qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack qconv_unpack = torch.ops.quantized.fbgemm_conv_unpack # Orig tensor is assumed to be in K(C/G)RS format W = torch.from_numpy(filters).to(torch.float) # K(C/G)RS -> KRS(C/G) W_KRSC = W.permute([0, 2, 3, 1]).contiguous() W_q = torch.quantize_linear(W_KRSC, scale=filters_scale, zero_point=filters_zero_point, dtype=filters_qtype) # Pack weights using weight packing operator strides = [strideH, strideW] paddings = [padH, padW] dilations = [1, 1] W_packed = qconv_prepack(W_q, strides, paddings, dilations, groups) # Unpack weights weight unpacking operator (Used for serialization) W_unpacked = qconv_unpack(W_packed) # Assert equal np.testing.assert_equal(W_q.int_repr().numpy(), W_unpacked.int_repr().numpy()) np.testing.assert_equal(W_q.q_scale(), W_unpacked.q_scale()) np.testing.assert_equal(W_q.q_zero_point(), W_unpacked.q_zero_point())
def test_adaptive_avg_pool2d(self, X, output_size_h, output_size_w): X, (scale, zero_point, torch_type) = X H, W = X.shape[-2:] assume(output_size_h <= H) assume(output_size_w <= W) if output_size_h == output_size_w: output_size = output_size_h else: output_size = (output_size_h, output_size_w) X = torch.from_numpy(X) qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) # Run reference on int_repr + round to avoid double rounding error. X_ref = torch.nn.functional.adaptive_avg_pool2d( qX.int_repr().to(torch.float), output_size).round() ops_under_test = { "nn.functional": torch.nn.functional.adaptive_avg_pool2d, "nn.quantized.functional": torch.nn.quantized.functional.adaptive_avg_pool2d } error_message = r"Results are off for {}:\n\tExpected:\n{}\n\tGot:\n{}" for name, op in ops_under_test.items(): qX_hat = op(qX, output_size=output_size) qX_repr = qX_hat.int_repr() self.assertEqual(X_ref, qX_repr, message=error_message.format(name, X_ref, qX_repr))
def test_numerical_consistency_cuda(self): ''' Comparing numerical consistency between CPU quantize/dequantize op and the CUDA fake quantize op ''' np.random.seed(NP_RANDOM_SEED) fake_quantize_per_tensor_affine_forward = torch.ops.quantized.fake_quantize_per_tensor_affine_forward scale = 3 zero_point = 2 num_bits = 8 X = np.random.rand(20, 20) * 125 X_torch = torch.from_numpy(X).float() Y = torch.dequantize( torch.quantize_linear(X_torch, scale, zero_point, torch.qint8)) Y_prime = fake_quantize_per_tensor_affine_forward( X=X_torch.cuda(), scale=scale, zero_point=zero_point, num_bits=num_bits, quant_delay=0, iter=0) tolerance = 1e-6 np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance)
def from_float(cls, mod): r"""Create a dynamic quantized module from a float module or qparams_dict Args: mod (Module): a float module, either produced by torch.quantization utilities or provided by the user """ assert type( mod ) == NNLinear, 'nn.quantized.dynamic.Linear.from_float only works for nn.Linear' assert hasattr( mod, 'qconfig'), 'Input float module must have qconfig defined' if mod.qconfig is not None and mod.qconfig.weight() is not None: weight_observer = mod.qconfig.weight() else: # We have the circular import issues if we import the qconfig in the beginning of this file: # https://github.com/pytorch/pytorch/pull/24231. The current workaround is to postpone the # import until we need it. from torch.quantization.QConfig import default_dynamic_qconfig weight_observer = default_dynamic_qconfig.weight() assert weight_observer.dtype == torch.qint8, 'Weight observer must have dtype torch.qint8' weight_observer(mod.weight) wt_scale, wt_zp = weight_observer.calculate_qparams() qweight = torch.quantize_linear(mod.weight.float(), float(wt_scale), int(wt_zp), torch.qint8) qlinear = Linear(mod.in_features, mod.out_features) qlinear.set_weight_bias(qweight, mod.bias) return qlinear
def process_weights(ihhh, layer, suffix): weight_name = 'weight_{}_l{}{}'.format(ihhh, layer, suffix) bias_name = 'bias_{}_l{}{}'.format(ihhh, layer, suffix) weight = getattr(mod, weight_name) bias = getattr(mod, bias_name) # for each layer, for each direction we need to quantize and pack # weights and pack parameters in this order: # # w_ih, w_hh, b_ih, b_hh weight_observer(weight) wt_scale, wt_zp = weight_observer.calculate_qparams() qweight = torch.quantize_linear(weight.float(), float(wt_scale), int(wt_zp), torch.qint8) packed_weight = \ torch.ops.quantized.linear_prepack(qweight, bias) params = [packed_weight, bias] pos_names = ['w', 'b'] ret_name = [ '{}_{}_l{}{}'.format(name, ihhh, layer, suffix) for name in pos_names ] quantized_weights.append(qweight) packed_weights.append(ret_name[0]) return params, ret_name
def test_pool_api(self): """Tests the correctness of the pool module. The correctness is defined against the functional implementation. """ N, C, H, W = 10, 10, 10, 3 kwargs = { 'kernel_size': 2, 'stride': None, 'padding': 0, 'dilation': 1 } scale, zero_point = 1.0 / 255, 128 X = torch.randn(N, C, H, W, dtype=torch.float32) qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch.quint8) qX_expect = torch.nn.functional.max_pool2d(qX, **kwargs) pool_under_test = torch.nn.quantized.MaxPool2d(**kwargs) qX_hat = pool_under_test(qX) self.assertEqual(qX_expect, qX_hat) # JIT Testing self.checkScriptable(pool_under_test, list(zip([X], [qX_expect])))
def linear(input, weight, bias=None, scale=None, zero_point=None): # type: (Tensor, Tensor, Optional[Tensor]) -> Tensor r""" Applies a linear transformation to the incoming quantized data: :math:`y = xA^T + b`. See :class:`~torch.nn.Linear` .. note:: Current implementation uses packed weights. This has penalty on performance. If you want to avoid the overhead, use :class:`~torch.nn.quantized.Linear`. Args: input (Tensor): Quantized input of type `torch.quint8` weight (Tensor): Quantized weight of type `torch.qint8` bias (Tensor): None or Quantized bias of type `torch.qint32` scale (double): output scale. If None, derived from the input scale zero_point (long): output zero point. If None, derived from the input zero_point Shape: - Input: :math:`(N, *, in\_features)` where `*` means any number of additional dimensions - Weight: :math:`(out\_features, in\_features)` - Bias: :math:`(out\_features)` - Output: :math:`(N, *, out\_features)` """ if scale is None: scale = input.q_scale() if zero_point is None: zero_point = input.q_zero_point() _packed_weight = torch.ops.quantized.fbgemm_linear_prepack(weight) if bias is not None: bias = torch.quantize_linear(bias.dequantize(), weight.q_scale() * input.q_scale(), 0, torch.qint32) return torch.ops.quantized.fbgemm_linear(input, _packed_weight, bias, scale, zero_point)