def tensor(draw, shapes=None, elements=None, qparams=None): if isinstance(shapes, SearchStrategy): _shape = draw(shapes) else: _shape = draw(st.sampled_from(shapes)) if qparams is None: if elements is None: elements = st.floats(-1e6, 1e6, allow_nan=False, width=32) X = draw(stnp.arrays(dtype=np.float32, elements=elements, shape=_shape)) assume(not (np.isnan(X).any() or np.isinf(X).any())) return X, None qparams = draw(qparams) if elements is None: min_value, max_value = _get_valid_min_max(qparams) elements = st.floats(min_value, max_value, allow_infinity=False, allow_nan=False, width=32) X = draw(stnp.arrays(dtype=np.float32, elements=elements, shape=_shape)) # Recompute the scale and zero_points according to the X statistics. scale, zp = _calculate_dynamic_qparams(X, qparams[2]) enforced_zp = _ENFORCED_ZERO_POINT.get(qparams[2], None) if enforced_zp is not None: zp = enforced_zp return X, (scale, zp, qparams[2])
def test_qlinear(self, use_bias, use_relu): batch_size = 1 input_channels = 2 output_channels = 2 qlinear_prepack = torch.ops.quantized.fbgemm_linear_prepack if use_relu: qlinear_dynamic = torch.ops.quantized.fbgemm_linear_relu_dynamic else: qlinear_dynamic = torch.ops.quantized.fbgemm_linear_dynamic X_fp32 = torch.tensor([[100, -150]], dtype=torch.float) W_fp32 = torch.tensor([[-150, 100], [100, -150]], dtype=torch.float) b_fp32 = torch.tensor([13, -20], dtype=torch.float) if use_bias else None W_scale, W_zp = _calculate_dynamic_qparams(W_fp32, torch.qint8) W_q = torch.quantize_linear(W_fp32, scale=W_scale, zero_point=W_zp, dtype=torch.qint8) # Weight prepacking operator for dynamic quantized Linear W_prepack = qlinear_prepack(W_q) # Dynamic quantized Linear operator with prepacked weight Y_fp32 = qlinear_dynamic(X_fp32, W_prepack, b_fp32) Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32) if use_relu: Y_fp32_ref[Y_fp32_ref < 0.0] = 0.0 self.assertEqual( Y_fp32, Y_fp32_ref, message="torch.ops.quantized.fbgemm_linear_dynamic results are off" )
def test_linear_api(self, batch_size, in_features, out_features, use_bias, use_default_observer): """test API functionality for nn.quantized.dynamic.Linear""" W = torch.rand(out_features, in_features).float() W_scale, W_zp = _calculate_dynamic_qparams(W, torch.qint8) W_q = torch.quantize_linear(W, W_scale, W_zp, torch.qint8) X = torch.rand(batch_size, in_features).float() B = torch.rand(out_features).float() if use_bias else None qlinear = nnqd.Linear(in_features, out_features) # Run module with default-initialized parameters. # This tests that the constructor is correct. qlinear(X) qlinear.set_weight(W_q) # Simple round-trip test to ensure weight()/set_weight() API self.assertEqual(qlinear.weight(), W_q) W_pack = qlinear._packed_weight qlinear.bias = B if use_bias else None Z_dq = qlinear(X) # Check if the module implementation matches calling the # ops directly Z_ref = torch.ops.quantized.fbgemm_linear_dynamic(X, W_pack, B) self.assertEqual(Z_ref, Z_dq) # Test serialization of dynamic quantized Linear Module using state_dict model_dict = qlinear.state_dict() self.assertEqual(model_dict['weight'], W_q) if use_bias: self.assertEqual(model_dict['bias'], B) with tempfile.TemporaryFile() as f: torch.save(model_dict, f) f.seek(0) loaded_dict = torch.load(f) for key in model_dict: self.assertEqual(model_dict[key], loaded_dict[key]) loaded_qlinear = nnqd.Linear(in_features, out_features) loaded_qlinear.load_state_dict(loaded_dict) linear_unpack = torch.ops.quantized.fbgemm_linear_unpack self.assertEqual(linear_unpack(qlinear._packed_weight), linear_unpack(loaded_qlinear._packed_weight)) if use_bias: self.assertEqual(qlinear.bias, loaded_qlinear.bias) self.assertTrue(dir(qlinear) == dir(loaded_qlinear)) self.assertTrue(hasattr(qlinear, '_packed_weight')) self.assertTrue(hasattr(loaded_qlinear, '_packed_weight')) self.assertTrue(hasattr(qlinear, 'weight')) self.assertTrue(hasattr(loaded_qlinear, 'weight')) self.assertEqual(qlinear.weight(), loaded_qlinear.weight()) self.assertEqual( qlinear.weight(), torch.ops.quantized.fbgemm_linear_unpack(qlinear._packed_weight)) Z_dq2 = qlinear(X) self.assertEqual(Z_dq, Z_dq2) # test serialization of module directly with tempfile.TemporaryFile() as f: torch.save(qlinear, f) f.seek(0) loaded = torch.load(f) # This check is disabled pending an issue in PyTorch serialization: # https://github.com/pytorch/pytorch/issues/24045 # self.assertEqual(qlinear.weight(), loaded.weight()) self.assertEqual(qlinear.zero_point, loaded.zero_point) # Test JIT self.checkScriptable(qlinear, list(zip([X], [Z_ref])), check_save_load=True) # Test from_float float_linear = torch.nn.Linear(in_features, out_features).float() if use_default_observer: float_linear.qconfig = torch.quantization.default_dynamic_qconfig prepare_dynamic(float_linear) float_linear(X.float()) quantized_float_linear = nnqd.Linear.from_float(float_linear) # Smoke test to make sure the module actually runs quantized_float_linear(X) # Smoke test extra_repr str(quantized_float_linear)
def test_linear_api(self, batch_size, in_features, out_features, use_bias, use_default_observer): """test API functionality for nn.quantized.dynamic.Linear""" W = torch.rand(out_features, in_features).float() W_scale, W_zp = _calculate_dynamic_qparams(W, torch.qint8) W_q = torch.quantize_per_tensor(W, W_scale, W_zp, torch.qint8) X = torch.rand(batch_size, in_features).float() B = torch.rand(out_features).float() if use_bias else None qlinear = nnqd.Linear(in_features, out_features) # Run module with default-initialized parameters. # This tests that the constructor is correct. qlinear.set_weight_bias(W_q, B) qlinear(X) # Simple round-trip test to ensure weight()/set_weight() API self.assertEqual(qlinear.weight(), W_q) W_pack = qlinear._packed_params Z_dq = qlinear(X) # Check if the module implementation matches calling the # ops directly Z_ref = torch.ops.quantized.linear_dynamic(X, W_pack) self.assertEqual(Z_ref, Z_dq) # Test serialization of dynamic quantized Linear Module using state_dict model_dict = qlinear.state_dict() self.assertEqual(model_dict['weight'], W_q) if use_bias: self.assertEqual(model_dict['bias'], B) b = io.BytesIO() torch.save(model_dict, b) b.seek(0) loaded_dict = torch.load(b) for key in model_dict: self.assertEqual(model_dict[key], loaded_dict[key]) loaded_qlinear = nnqd.Linear(in_features, out_features) loaded_qlinear.load_state_dict(loaded_dict) linear_unpack = torch.ops.quantized.linear_unpack self.assertEqual(linear_unpack(qlinear._packed_params), linear_unpack(loaded_qlinear._packed_params)) if use_bias: self.assertEqual(qlinear.bias(), loaded_qlinear.bias()) self.assertTrue(dir(qlinear) == dir(loaded_qlinear)) self.assertTrue(hasattr(qlinear, '_packed_params')) self.assertTrue(hasattr(loaded_qlinear, '_packed_params')) self.assertTrue(hasattr(qlinear, '_weight_bias')) self.assertTrue(hasattr(loaded_qlinear, '_weight_bias')) self.assertEqual(qlinear._weight_bias(), loaded_qlinear._weight_bias()) self.assertEqual( qlinear._weight_bias(), torch.ops.quantized.linear_unpack(qlinear._packed_params)) Z_dq2 = qlinear(X) self.assertEqual(Z_dq, Z_dq2) # The below check is meant to ensure that `torch.save` and `torch.load` # serialization works, however it is currently broken by the following: # https://github.com/pytorch/pytorch/issues/24045 # # Instead, we currently check that the proper exception is thrown on save. # <start code> # b = io.BytesIO() # torch.save(qlinear, b) # b.seek(0) # loaded = torch.load(b) # self.assertEqual(qlinear.weight(), loaded.weight()) # self.assertEqual(qlinear.zero_point, loaded.zero_point) # <end code> with self.assertRaisesRegex( RuntimeError, r'torch.save\(\) is not currently supported'): b = io.BytesIO() torch.save(qlinear, b) # Test JIT self.checkScriptable(qlinear, list(zip([X], [Z_ref])), check_save_load=True) # Test from_float float_linear = torch.nn.Linear(in_features, out_features).float() if use_default_observer: float_linear.qconfig = torch.quantization.default_dynamic_qconfig prepare_dynamic(float_linear) float_linear(X.float()) quantized_float_linear = nnqd.Linear.from_float(float_linear) # Smoke test to make sure the module actually runs quantized_float_linear(X) # Smoke test extra_repr str(quantized_float_linear)
def test_qlinear(self, batch_size, input_channels, output_channels, use_bias, use_relu, use_multi_dim_input): qlinear_prepack = torch.ops.quantized.fbgemm_linear_prepack if use_relu: qlinear_dynamic = torch.ops.quantized.fbgemm_linear_relu_dynamic else: qlinear_dynamic = torch.ops.quantized.fbgemm_linear_dynamic if use_multi_dim_input: batch_size *= 3 # Test the multi-dim input tensor X_scale = 1.0 X_zp = 0 X_value_min = 0 X_value_max = 255 X_q0 = np.round( np.random.rand(batch_size, input_channels) * (X_value_max - X_value_min) + X_value_min).astype(np.uint8) X_q0[0, 0] = X_value_min X_q0[0, 1] = X_value_max W_scale = 1.0 W_zp = 0 W_value_min = -128 W_value_max = 127 W_q0 = np.round( np.random.rand(output_channels, input_channels) * (W_value_max - W_value_min) + W_value_min).astype(np.int8) W_q0[0, 0] = W_value_min W_q0[1, 0] = W_value_max b_value_min = -10 b_value_max = 10 b_q0 = np.round( np.random.rand(output_channels) * (b_value_max - b_value_min) + b_value_min).astype(np.int32) if use_bias else None avoid_vpmaddubsw_overflow_linear( batch_size, input_channels, output_channels, X_q0, X_value_min, X_value_max, W_q0, W_value_min, W_value_max, ) X_fp32 = torch.from_numpy(_dequantize(X_q0, X_scale, X_zp)).to(dtype=torch.float) W_fp32 = torch.from_numpy(_dequantize(W_q0, W_scale, W_zp)).to(dtype=torch.float) b_fp32 = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to( dtype=torch.float) if use_bias else None if use_multi_dim_input: X_fp32 = X_fp32.view(3, int(batch_size / 3), input_channels) W_scale, W_zp = _calculate_dynamic_qparams(W_fp32, torch.qint8) W_q = torch.quantize_linear(W_fp32, scale=W_scale, zero_point=W_zp, dtype=torch.qint8) # Observe X_fp32 and determine X_scale and X_zero_point, this should match # internals of dynamic linear. X_scale, X_zp = _calculate_dynamic_qparams(X_fp32, torch.quint8) X_q = torch.quantize_linear(X_fp32, scale=X_scale, zero_point=X_zp, dtype=torch.quint8) # Weight prepacking operator for dynamic quantized Linear W_prepack = qlinear_prepack(W_q) # Dynamic quantized Linear operator with prepacked weight Y_fp32 = qlinear_dynamic(X_q.dequantize(), W_prepack, b_fp32) # Y_fp32 = qlinear_dynamic(X_fp32, W_prepack, b_fp32) Y_fp32_ref = F.linear(X_q.dequantize(), W_q.dequantize(), b_fp32) # Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32) # if use_multi_dim_input: # Y_fp32_ref = Y_fp32_ref.view(3, int(batch_size / 3), output_channels) if use_relu: Y_fp32_ref[Y_fp32_ref < 0.0] = 0.0 self.assertEqual( Y_fp32, Y_fp32_ref, message="torch.ops.quantized.fbgemm_linear_dynamic results are off" )