def test_linear_api(self): """test API functionality for nn.quantized.linear""" in_features = 10 out_features = 20 batch_size = 5 W = torch.rand(out_features, in_features).float() W_q = torch.quantize_linear(W, 0.1, 4, torch.qint8) W_pack = torch.ops.quantized.fbgemm_linear_prepack(W_q) X = torch.rand(batch_size, in_features).float() X_q = torch.quantize_linear(X, 0.2, 10, torch.quint8) B = torch.rand(out_features).float() B_q = torch.quantize_linear(B, W_q.q_scale() * X_q.q_scale(), 0, torch.qint32) out_scale = 0.5 out_zero_point = 3 qlinear = nnq.Linear(in_features, out_features) qlinear._packed_weight = W_pack qlinear.bias = B_q qlinear.out_scale = torch.tensor([out_scale]) qlinear.out_zero_point = torch.tensor([out_zero_point]) Z_q = qlinear(X_q) # Check if the module implementation matches calling the # ops directly Z_ref = torch.ops.quantized.fbgemm_linear(X_q, W_pack, B_q, out_scale, out_zero_point) self.assertEqual(Z_ref, Z_q) # Test serialization of quantized Linear Module using state_dict model_dict = qlinear.state_dict() self.assertEqual(model_dict['weight'], W_q) self.assertEqual(model_dict['bias'], B_q) with tempfile.NamedTemporaryFile() as f: torch.save(model_dict, f) f.seek(0) loaded_dict = torch.load(f) for key in model_dict: self.assertEqual(model_dict[key], loaded_dict[key]) loaded_qlinear = nnq.Linear(in_features, out_features) loaded_qlinear.load_state_dict(loaded_dict) linear_unpack = torch.ops.quantized.fbgemm_linear_unpack self.assertEqual(linear_unpack(qlinear._packed_weight), linear_unpack(loaded_qlinear._packed_weight)) self.assertEqual(qlinear.bias, loaded_qlinear.bias) self.assertEqual(qlinear.out_scale, loaded_qlinear.out_scale) self.assertEqual(qlinear.out_zero_point, loaded_qlinear.out_zero_point) self.assertTrue(dir(qlinear) == dir(loaded_qlinear)) self.assertTrue(hasattr(qlinear, '_packed_weight')) self.assertTrue(hasattr(loaded_qlinear, '_packed_weight')) self.assertTrue(hasattr(qlinear, 'weight')) self.assertTrue(hasattr(loaded_qlinear, 'weight')) self.assertEqual(qlinear.weight, loaded_qlinear.weight) self.assertEqual( qlinear.weight, torch.ops.quantized.fbgemm_linear_unpack(qlinear._packed_weight)) Z_q2 = qlinear(X_q) self.assertEqual(Z_q, Z_q2)
def test_conv_api(self, use_bias, use_fused): """Tests the correctness of the conv module. The correctness is defined against the functional implementation. """ N, iC, H, W = 10, 10, 10, 3 oC, g, kH, kW = 16, 1, 3, 3 scale, zero_point = 1.0 / 255, 128 X = torch.randn(N, iC, H, W, dtype=torch.float32) X = X.permute([0, 2, 3, 1]).contiguous() qX = torch.quantize_linear(X, scale=scale, zero_point=128, dtype=torch.quint8) w = torch.randn(oC, iC // g, kH, kW, dtype=torch.float32) qw = torch.quantize_linear(w, scale=scale, zero_point=0, dtype=torch.qint8) b = torch.randn(oC, dtype=torch.float32) if use_bias else None qb = torch.quantize_linear( b, scale=1.0 / 1024, zero_point=0, dtype=torch.qint32) if use_bias else None if use_fused: conv_under_test = ConvReLU2d(in_channels=iC, out_channels=oC, kernel_size=(kH, kW), stride=1, padding=0, dilation=1, groups=g, bias=use_bias, padding_mode='zeros') else: conv_under_test = Conv2d(in_channels=iC, out_channels=oC, kernel_size=(kH, kW), stride=1, padding=0, dilation=1, groups=g, bias=use_bias, padding_mode='zeros') # Run module with default-initialized parameters. # This tests that the constructor is correct. conv_under_test(qX) conv_under_test.set_weight(qw) conv_under_test.bias = qb conv_under_test.scale = scale conv_under_test.zero_point = zero_point # Test members self.assertTrue(hasattr(conv_under_test, '_packed_weight')) self.assertTrue(hasattr(conv_under_test, 'scale')) self.assertTrue(hasattr(conv_under_test, 'zero_point')) # Test properties self.assertEqual(qw, conv_under_test.weight()) self.assertEqual(qb, conv_under_test.bias) self.assertEqual(scale, conv_under_test.scale) self.assertEqual(zero_point, conv_under_test.zero_point) # Test forward result_under_test = conv_under_test(qX) result_reference = qF.conv2d(qX, qw, bias=qb, scale=scale, zero_point=zero_point, stride=1, padding=0, dilation=1, groups=g, dtype=torch.quint8) if use_fused: # result_reference < zero_point doesn't work for qtensor yet # result_reference[result_reference < zero_point] = zero_point MB, OC, OH, OW = result_reference.size() for i in range(MB): for j in range(OC): for h in range(OH): for w in range(OW): if result_reference[i][j][h][w].int_repr( ) < zero_point: # assign 0. that gets converted to zero_point result_reference[i][j][h][w] = 0. self.assertEqual(result_reference, result_under_test, message="Tensors are not equal.") # Test serialization of quantized Conv Module using state_dict model_dict = conv_under_test.state_dict() self.assertEqual(model_dict['weight'], qw) if use_bias: self.assertEqual(model_dict['bias'], qb) with tempfile.NamedTemporaryFile() as f: torch.save(model_dict, f) f.seek(0) loaded_dict = torch.load(f) for key in model_dict: self.assertEqual(loaded_dict[key], model_dict[key]) if use_fused: loaded_conv_under_test = ConvReLU2d(in_channels=iC, out_channels=oC, kernel_size=(kH, kW), stride=1, padding=0, dilation=1, groups=g, bias=use_bias, padding_mode='zeros') else: loaded_conv_under_test = Conv2d(in_channels=iC, out_channels=oC, kernel_size=(kH, kW), stride=1, padding=0, dilation=1, groups=g, bias=use_bias, padding_mode='zeros') loaded_conv_under_test.load_state_dict(loaded_dict) self.assertEqual(loaded_conv_under_test.weight(), conv_under_test.weight()) if use_bias: self.assertEqual(loaded_conv_under_test.bias, conv_under_test.bias) self.assertEqual(loaded_conv_under_test.scale, conv_under_test.scale) self.assertEqual(loaded_conv_under_test.zero_point, conv_under_test.zero_point) self.assertTrue(dir(loaded_conv_under_test) == dir(conv_under_test)) self.assertTrue(hasattr(conv_under_test, '_packed_weight')) self.assertTrue(hasattr(loaded_conv_under_test, '_packed_weight')) self.assertTrue(hasattr(conv_under_test, 'weight')) self.assertTrue(hasattr(loaded_conv_under_test, 'weight')) self.assertEqual(loaded_conv_under_test.weight(), conv_under_test.weight()) self.assertEqual(loaded_conv_under_test.weight(), qw) loaded_result = loaded_conv_under_test(qX) self.assertEqual(loaded_result, result_reference) with tempfile.NamedTemporaryFile() as f: torch.save(conv_under_test, f) f.seek(0) loaded_conv = torch.load(f) self.assertEqual(conv_under_test.bias, loaded_conv.bias) self.assertEqual(conv_under_test.scale, loaded_conv.scale) self.assertEqual(conv_under_test.zero_point, loaded_conv.zero_point) # JIT testing self.checkScriptable(conv_under_test, list(zip([qX], [result_reference])), check_save_load=True) # Test from_float float_conv = torch.nn.Conv2d(in_channels=iC, out_channels=oC, kernel_size=(kH, kW), stride=1, padding=0, dilation=1, groups=g, bias=use_bias, padding_mode='zeros').float() float_conv.qconfig = torch.quantization.default_qconfig torch.quantization.prepare(float_conv) float_conv(X.float()) quantized_float_conv = torch.nn.Sequential(float_conv) torch.quantization.convert(quantized_float_conv) # Smoke test to make sure the module actually runs quantized_float_conv(qX) # Check that bias is quantized based on output scale if use_bias: qbias = torch.quantize_linear( float_conv.bias, quantized_float_conv[0].scale / 2**16, 0, torch.qint32) self.assertEqual(quantized_float_conv[0].bias.dequantize(), qbias.dequantize()) # Smoke test extra_repr str(quantized_float_conv)
def test_linear_api(self, batch_size, in_features, out_features, use_bias, use_fused): """test API functionality for nn.quantized.linear and nn._intrinsic.quantized.linear_relu""" W = torch.rand(out_features, in_features).float() W_q = torch.quantize_linear(W, 0.1, 4, torch.qint8) W_pack = torch.ops.quantized.fbgemm_linear_prepack(W_q) X = torch.rand(batch_size, in_features).float() X_q = torch.quantize_linear(X, 0.2, 10, torch.quint8) B = torch.rand(out_features).float() if use_bias else None B_q = torch.quantize_linear(B, W_q.q_scale() * X_q.q_scale(), 0, torch.qint32) if use_bias else None scale = 0.5 zero_point = 3 if use_fused: qlinear = nnq_fused.LinearReLU(in_features, out_features) else: qlinear = nnq.Linear(in_features, out_features) qlinear._packed_weight = W_pack qlinear.bias = B_q if use_bias else None qlinear.scale = torch.tensor([scale], dtype=torch.double) qlinear.zero_point = torch.tensor([zero_point], dtype=torch.long) Z_q = qlinear(X_q) # Check if the module implementation matches calling the # ops directly if use_fused: Z_ref = torch.ops.quantized.fbgemm_linear_relu( X_q, W_pack, B_q, scale, zero_point) else: Z_ref = torch.ops.quantized.fbgemm_linear(X_q, W_pack, B_q, scale, zero_point) self.assertEqual(Z_ref, Z_q) # Test serialization of quantized Linear Module using state_dict model_dict = qlinear.state_dict() self.assertEqual(model_dict['weight'], W_q) if use_bias: self.assertEqual(model_dict['bias'], B_q) with tempfile.NamedTemporaryFile() as f: torch.save(model_dict, f) f.seek(0) loaded_dict = torch.load(f) for key in model_dict: self.assertEqual(model_dict[key], loaded_dict[key]) if use_fused: loaded_qlinear = nnq_fused.LinearReLU(in_features, out_features) else: loaded_qlinear = nnq.Linear(in_features, out_features) loaded_qlinear.load_state_dict(loaded_dict) linear_unpack = torch.ops.quantized.fbgemm_linear_unpack self.assertEqual(linear_unpack(qlinear._packed_weight), linear_unpack(loaded_qlinear._packed_weight)) if use_bias: self.assertEqual(qlinear.bias, loaded_qlinear.bias) self.assertEqual(qlinear.scale, loaded_qlinear.scale) self.assertEqual(qlinear.zero_point, loaded_qlinear.zero_point) self.assertTrue(dir(qlinear) == dir(loaded_qlinear)) self.assertTrue(hasattr(qlinear, '_packed_weight')) self.assertTrue(hasattr(loaded_qlinear, '_packed_weight')) self.assertTrue(hasattr(qlinear, 'weight')) self.assertTrue(hasattr(loaded_qlinear, 'weight')) self.assertEqual(qlinear.weight, loaded_qlinear.weight) self.assertEqual( qlinear.weight, torch.ops.quantized.fbgemm_linear_unpack(qlinear._packed_weight)) Z_q2 = qlinear(X_q) self.assertEqual(Z_q, Z_q2)
def test_linear_api(self, batch_size, in_features, out_features, use_bias, use_fused): """test API functionality for nn.quantized.linear and nn._intrinsic.quantized.linear_relu""" W = torch.rand(out_features, in_features).float() W_q = torch.quantize_linear(W, 0.1, 4, torch.qint8) X = torch.rand(batch_size, in_features).float() X_q = torch.quantize_linear(X, 0.2, 10, torch.quint8) B = torch.rand(out_features).float() if use_bias else None B_q = torch.quantize_linear(B, W_q.q_scale() * X_q.q_scale(), 0, torch.qint32) if use_bias else None scale = 0.5 zero_point = 3 if use_fused: qlinear = nnq_fused.LinearReLU(in_features, out_features) else: qlinear = nnq.Linear(in_features, out_features) qlinear.set_weight(W_q) # Simple round-trip test to ensure weight()/set_weight() API self.assertEqual(qlinear.weight(), W_q) W_pack = qlinear._packed_weight qlinear.bias = B_q if use_bias else None qlinear.scale = float(scale) qlinear.zero_point = int(zero_point) Z_q = qlinear(X_q) # Check if the module implementation matches calling the # ops directly if use_fused: Z_ref = torch.ops.quantized.fbgemm_linear_relu( X_q, W_pack, B_q, scale, zero_point) else: Z_ref = torch.ops.quantized.fbgemm_linear(X_q, W_pack, B_q, scale, zero_point) self.assertEqual(Z_ref, Z_q) # Test serialization of quantized Linear Module using state_dict model_dict = qlinear.state_dict() self.assertEqual(model_dict['weight'], W_q) if use_bias: self.assertEqual(model_dict['bias'], B_q) with tempfile.NamedTemporaryFile() as f: torch.save(model_dict, f) f.seek(0) loaded_dict = torch.load(f) for key in model_dict: self.assertEqual(model_dict[key], loaded_dict[key]) if use_fused: loaded_qlinear = nnq_fused.LinearReLU(in_features, out_features) else: loaded_qlinear = nnq.Linear(in_features, out_features) loaded_qlinear.load_state_dict(loaded_dict) linear_unpack = torch.ops.quantized.fbgemm_linear_unpack self.assertEqual(linear_unpack(qlinear._packed_weight), linear_unpack(loaded_qlinear._packed_weight)) if use_bias: self.assertEqual(qlinear.bias, loaded_qlinear.bias) self.assertEqual(qlinear.scale, loaded_qlinear.scale) self.assertEqual(qlinear.zero_point, loaded_qlinear.zero_point) self.assertTrue(dir(qlinear) == dir(loaded_qlinear)) self.assertTrue(hasattr(qlinear, '_packed_weight')) self.assertTrue(hasattr(loaded_qlinear, '_packed_weight')) self.assertTrue(hasattr(qlinear, 'weight')) self.assertTrue(hasattr(loaded_qlinear, 'weight')) self.assertEqual(qlinear.weight(), loaded_qlinear.weight()) self.assertEqual( qlinear.weight(), torch.ops.quantized.fbgemm_linear_unpack(qlinear._packed_weight)) Z_q2 = qlinear(X_q) self.assertEqual(Z_q, Z_q2) # test serialization of module directly with tempfile.NamedTemporaryFile() as f: torch.save(qlinear, f) f.seek(0) loaded = torch.load(f) # This check is disabled pending an issue in PyTorch serialization: # https://github.com/pytorch/pytorch/issues/24045 # self.assertEqual(qlinear.weight(), loaded.weight()) self.assertEqual(qlinear.bias, loaded.bias) self.assertEqual(qlinear.scale, loaded.scale) self.assertEqual(qlinear.zero_point, loaded.zero_point) # Test JIT self.checkScriptable(qlinear, zip([X_q], [Z_ref]), check_save_load=True)