class TestQuantizedOps(TestCase): """Computes the output shape given pooling parameters.""" def _pool_output_shape(self, input_size, kernel_size, padding, stride, dilation, ceiling_mode=False): if stride is None: stride = kernel_size output_size = ( (input_size + 2 * padding - dilation * (kernel_size - 1) - 1 + (stride - 1 if ceiling_mode else 0)) // stride + 1) if (padding > 0 and ((output_size - 1) * stride >= input_size + padding)): output_size += 1 return output_size """Tests the correctness of the quantized::relu op.""" @given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5), qparams=hu.qparams())) def test_qrelu(self, X): X, (scale, zero_point, torch_type) = X Y = X.copy() Y[Y < 0] = 0 qY = torch.quantize_linear(torch.from_numpy(Y), scale=scale, zero_point=zero_point, dtype=torch_type) X = torch.from_numpy(X) qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) ops_under_test = { 'ops.quantized': torch.ops.quantized.relu, 'native': torch.relu, 'nn.functional': torch.nn.functional.relu } for name, op in ops_under_test.items(): qY_hat = op(qX) self.assertEqual(qY, qY_hat, message="{} relu failed".format(name)) """Tests the correctness of the add and add_relu op.""" def test_qadd_relu_same_qparams(self): add_relu = torch.ops.quantized.add_relu add = torch.ops.quantized.add A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale = 2.0 zero_point = 127 qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point, dtype=torch.quint8) # Add ReLU ground truth C = (qA.dequantize() + qB.dequantize()).numpy() qC = _quantize(C, scale, zero_point) qC_hat = add(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized addition failed.") # Add + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale, zero_point) qCrelu_hat = add_relu(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized addition with ReLU failed.") """Tests the correctness of the add and add_relu op.""" def test_qadd_relu_different_qparams(self): add_relu = torch.ops.quantized.add_relu add = torch.ops.quantized.add A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale_A = 3.0 zero_point_A = 7 scale_B = 5.0 zero_point_B = 127 scale_C = 0.5 zero_point_C = 5 qA = torch.quantize_linear(A, scale=scale_A, zero_point=zero_point_A, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale_B, zero_point=zero_point_B, dtype=torch.quint8) # Add ground truth C = (qA.dequantize() + qB.dequantize()).numpy() qC = _quantize(C, scale_C, zero_point_C) qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized addition failed.") # Add + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale_C, zero_point_C) qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized addition with ReLU failed.") """Tests max pool operation on quantized tensors.""" @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4, min_side=1, max_side=10), qparams=hu.qparams()), kernel=st.sampled_from((3, 5, 7)), stride=st.sampled_from((None, 1, 2)), dilation=st.integers(1, 2), padding=st.integers(0, 2)) def test_max_pool2d(self, X, kernel, stride, dilation, padding): X, (scale, zero_point, torch_type) = X # Check constraints assume(kernel // 2 >= padding) # Kernel cannot be overhanging! iH, iW = X.shape[-2:] oH = self._pool_output_shape(iH, kernel, padding, stride, dilation) assume(oH > 0) oW = self._pool_output_shape(iW, kernel, padding, stride, dilation) assume(oW > 0) a = torch.from_numpy(X) a_pool = torch.nn.functional.max_pool2d(a, kernel_size=kernel, stride=stride, padding=padding, dilation=dilation) a_ref = torch.quantize_linear(a_pool, scale=scale, zero_point=zero_point, dtype=torch_type) a_ref = a_ref.dequantize() qa = torch.quantize_linear(a, scale=scale, zero_point=zero_point, dtype=torch_type) ops_under_test = { "torch": torch.max_pool2d, "nn.functional": torch.nn.functional.max_pool2d, "nn.quantized.functional": torch.nn.quantized.functional.max_pool2d } for name, op in ops_under_test.items(): a_hat = op(qa, kernel_size=kernel, stride=stride, padding=padding, dilation=dilation) self.assertEqual(a_ref, a_hat.dequantize(), message="{} results are off".format(name)) # Test the ops.quantized separately, because None is not treated. a_hat = torch.ops.quantized.max_pool2d( qa, kernel_size=_pair(kernel), stride=_pair(kernel if stride is None else stride), padding=_pair(padding), dilation=_pair(dilation)) self.assertEqual(a_ref, a_hat.dequantize(), message="ops.quantized.max_pool2d results are off") @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4, min_side=1, max_side=10), qparams=hu.qparams()), output_size_h=st.integers(1, 10), output_size_w=st.integers(1, 10)) def test_adaptive_avg_pool2d(self, X, output_size_h, output_size_w): X, (scale, zero_point, torch_type) = X H, W = X.shape[-2:] assume(output_size_h <= H) assume(output_size_w <= W) if output_size_h == output_size_w: output_size = output_size_h else: output_size = (output_size_h, output_size_w) X = torch.from_numpy(X) qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) # Run reference on int_repr + round to avoid double rounding error. X_ref = torch.nn.functional.adaptive_avg_pool2d( qX.int_repr().to(torch.float), output_size).round() ops_under_test = { "nn.functional": torch.nn.functional.adaptive_avg_pool2d, "nn.quantized.functional": torch.nn.quantized.functional.adaptive_avg_pool2d } error_message = r"Results are off for {}:\n\tExpected:\n{}\n\tGot:\n{}" for name, op in ops_under_test.items(): qX_hat = op(qX, output_size=output_size) qX_repr = qX_hat.int_repr() self.assertEqual(X_ref, qX_repr, message=error_message.format(name, X_ref, qX_repr)) """Tests quantize concatenation (both fused and not).""" @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4, min_side=1, max_side=10), qparams=hu.qparams()), num=st.integers(1, 4), axis=st.integers(1, 4), relu=st.booleans()) def test_cat(self, X, num, axis, relu): tensors_q = [] tensors_ref = [] X, (scale, zero_point, torch_type) = X assume(axis < X.ndim) X = torch.from_numpy(X) new_shape = np.array(X.shape) new_shape[axis] = 0 for idx in range(num): tensors_q.append(torch.quantize_linear(X, scale, zero_point, torch_type)) tensors_ref.append(X) new_shape[axis] += tensors_ref[-1].shape[axis] cat_ref = torch.cat(tensors_ref, axis=axis) cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type) cat_ref = cat_ref.dequantize() if relu: cat_ref = F.relu(cat_ref) q_cat_op = torch.ops.quantized.cat_relu q_cat_out_op = torch.ops.quantized.cat_relu_out else: q_cat_op = torch.ops.quantized.cat q_cat_out_op = torch.ops.quantized.cat_out cat_q = q_cat_op(tensors_q, axis=axis, scale=scale, zero_point=zero_point) cat_q = cat_q.dequantize() np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy()) cat_q_out = torch._empty_affine_quantized( list(new_shape), scale=scale, zero_point=zero_point, dtype=torch_type) q_cat_out_op(tensors_q, axis=axis, out=cat_q_out) cat_q_out = cat_q_out.dequantize() np.testing.assert_equal(cat_ref.numpy(), cat_q_out.numpy()) # Test the cat on per-channel quantized tensor. ch_axis = 1 scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis])) scales = scales.to(torch.float64) zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis])) zero_points = zero_points.to(torch.long) tensors_q[0] = torch.quantize_linear_per_channel( X, scales, zero_points, axis=[ch_axis], dtype=torch_type) with self.assertRaisesRegex(RuntimeError, "supported.*cat"): cat_q = q_cat_op(tensors_q, axis=axis, scale=scale, zero_point=zero_point)
class TestQuantizedConv(unittest.TestCase): """Tests the correctness of quantized convolution op.""" @given(batch_size=st.integers(1, 3), input_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]), height=st.integers(10, 16), width=st.integers(7, 14), output_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]), groups=st.integers(1, 3), kernel_h=st.integers(1, 7), kernel_w=st.integers(1, 7), stride_h=st.integers(1, 2), stride_w=st.integers(1, 2), pad_h=st.integers(0, 2), pad_w=st.integers(0, 2), dilation=st.integers(1, 1), X_scale=st.floats(0.2, 1.6), X_zero_point=st.integers(0, 4), W_scale=st.floats(0.2, 1.6), W_zero_point=st.integers(-5, 5), Y_scale=st.floats(0.2, 1.6), Y_zero_point=st.integers(0, 4), use_bias=st.booleans(), use_relu=st.booleans()) def test_qconv( self, batch_size, input_channels_per_group, height, width, output_channels_per_group, groups, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation, X_scale, X_zero_point, W_scale, W_zero_point, Y_scale, Y_zero_point, use_bias, use_relu ): qconv = torch.ops.quantized.fbgemm_conv2d if use_relu: qconv = torch.ops.quantized.fbgemm_conv2d_relu qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack # C input_channels = input_channels_per_group * groups # K output_channels = output_channels_per_group * groups dilation_h = dilation_w = dilation # For testing, we use small values for weights and for activations so that no overflow occurs # in vpmaddubsw instruction. If the overflow occurs in qconv implementation and if there is no overflow # in reference we can't exactly match the results with reference. # Please see the comment in qconv implementation file (aten/src/ATen/native/quantized/cpu/qconv.cpp) # for more details. W_value_min = -5 W_value_max = 5 # the operator expects them in the format (output_channels, input_channels/groups, kernel_h, kernel_w) W_init = torch.from_numpy( np.random.randint( W_value_min, W_value_max, (output_channels, int(input_channels / groups), kernel_h, kernel_w)), ) b_init = torch.from_numpy(np.random.randint(0, 10, (output_channels,))) stride = [stride_h, stride_w] pad = [pad_h, pad_w] dilation = [dilation_h, dilation_w] X_value_min = 0 X_value_max = 4 X_init = torch.from_numpy(np.random.randint( X_value_min, X_value_max, (batch_size, input_channels, height, width))) X = X_scale * (X_init - X_zero_point).to(dtype=torch.float) W = W_scale * (W_init - W_zero_point).to(dtype=torch.float) b = X_scale * W_scale * (b_init - 0).to(dtype=torch.float) # Existing floating point conv operator conv_op = torch.nn.Conv2d(input_channels, output_channels, (kernel_h, kernel_w), (stride_h, stride_w), (pad_h, pad_w), (dilation_h, dilation_w), groups) # assign weights conv_op.weight = torch.nn.Parameter(W, requires_grad=False) conv_op.bias = torch.nn.Parameter(b, requires_grad=False) if use_bias else None result_ref = conv_op(X) if use_relu: relu = torch.nn.ReLU() result_ref = relu(result_ref) # quantize reference results for comparision result_ref_q = torch.quantize_linear(result_ref, scale=Y_scale, zero_point=Y_zero_point, dtype=torch.quint8) # reformat X_init and W_init in the required format by qconv operator # NCHW -> NHWC X_NHWC = X.permute([0, 2, 3, 1]).contiguous() # K(C/G)RS -> KRS(C/G) W_KRSC = W.permute([0, 2, 3, 1]).contiguous() X_q = torch.quantize_linear(X_NHWC, scale=X_scale, zero_point=X_zero_point, dtype=torch.quint8) W_q = torch.quantize_linear(W_KRSC, scale=W_scale, zero_point=W_zero_point, dtype=torch.qint8) b_q = torch.quantize_linear(b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32) if use_bias else None W_prepack = qconv_prepack(W_q, stride, pad, dilation, groups) Y_q = qconv( X_q, W_prepack, b_q, stride, pad, dilation, groups, Y_scale, Y_zero_point, ) # Back to NCHW format Y_q = Y_q.permute([0, 3, 1, 2]).contiguous() # Make sure the results match # assert_array_almost_equal compares using the following formula: # abs(desired-actual) < 1.5 * 10**(-decimal) # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html) # We use decimal = 0 to ignore off-by-1 differences between reference and # test. Off-by-1 differences arise due to the order of round and # zero_point addition operation, i.e., if addition followed by round is # used by reference and round followed by addition is used by test, the # results may differ by 1. # For example, the result of round(2.5) + 1 is 3 while round(2.5 + 1) is 4 # assuming the rounding mode is round-to-nearest, ties-to-even. np.testing.assert_array_almost_equal(result_ref_q.int_repr().numpy(), Y_q.int_repr().numpy(), decimal=0) """Tests the correctness of the quantized::fbgemm_qconv_unpack op.""" @given(X=hu.tensor_conv2d(min_batch=1, max_batch=3, min_in_channels=1, max_in_channels=7, min_out_channels=1, max_out_channels=7, H_range=(6, 12), W_range=(6, 12), kH_range=(3, 5), kW_range=(3, 5), max_groups=4, qparams=[hu.qparams(dtypes=torch.quint8, zero_point_min=0, zero_point_max=0), hu.qparams(dtypes=torch.qint8, zero_point_min=0, zero_point_max=0), hu.qparams(dtypes=torch.qint32, zero_point_min=0, zero_point_max=0)]), strideH=st.integers(1, 3), strideW=st.integers(1, 3), padH=st.integers(1, 2), padW=st.integers(1, 2)) def test_qconv_unpack(self, X, strideH, strideW, padH, padW): (inputs, filters, bias, groups) = X inputs, (inputs_scale, inputs_zero_point, inputs_qtype) = inputs filters, (filters_scale, filters_zero_point, filters_qtype) = filters bias, (bias_scale, bias_zero_point, bias_qtype) = bias qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack qconv_unpack = torch.ops.quantized.fbgemm_conv_unpack # Orig tensor is assumed to be in K(C/G)RS format W = torch.from_numpy(filters).to(torch.float) # K(C/G)RS -> KRS(C/G) W_KRSC = W.permute([0, 2, 3, 1]).contiguous() W_q = torch.quantize_linear(W_KRSC, scale=filters_scale, zero_point=filters_zero_point, dtype=filters_qtype) # Pack weights using weight packing operator strides = [strideH, strideW] paddings = [padH, padW] dilations = [1, 1] W_packed = qconv_prepack(W_q, strides, paddings, dilations, groups) # Unpack weights weight unpacking operator (Used for serialization) W_unpacked = qconv_unpack(W_packed) # Assert equal np.testing.assert_equal(W_q.int_repr().numpy(), W_unpacked.int_repr().numpy()) np.testing.assert_equal(W_q.q_scale(), W_unpacked.q_scale()) np.testing.assert_equal(W_q.q_zero_point(), W_unpacked.q_zero_point())
class TestQNNPackOps(TestCase): """Tests the correctness of the quantized::qnnpack_relu op.""" @given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5), qparams=hu.qparams(dtypes=torch.quint8, zero_point_min=0, zero_point_max=0))) def test_qnnpack_relu(self, X): X, (scale, zero_point, torch_type) = X relu = torch.ops.quantized.qnnpack_relu X = torch.from_numpy(X) Y = X.clone() qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) qY_hat = relu(qX) Y[Y < 0] = 0 qY = torch.quantize_linear(Y, scale=scale, zero_point=zero_point, dtype=torch_type) self.assertEqual(qY, qY_hat) """Tests the correctness of the quantized::qnnpack_linear op.""" @given(output_channels=st.sampled_from([2, 4, 5, 8, 16, 32]), X=hu.tensor(shapes=hu.array_shapes(2, 3, 8, 15), qparams=hu.qparams(dtypes=torch.quint8))) def test_qnnpack_linear(self, output_channels, X): X, (X_scale, X_zp, torch_type) = X qmin = torch.iinfo(torch_type).min qmax = torch.iinfo(torch_type).max input_channels = X.shape[X.ndim - 1] input_rows = 1 for x in range(X.ndim - 1): input_rows *= X.shape[x] qnnpack_linear = torch.ops.quantized.qnnpack_linear X_q0 = np.round(X * (qmin - qmax) + qmin).astype(np.uint8) W_scale = 0.4 W_zp = 0 W_value_min = 0 W_value_max = 255 W_q0 = np.round( np.random.rand(output_channels, input_channels) * (W_value_max - W_value_min) + W_value_min ).astype(np.uint8) b_value_min = -10 b_value_max = 10 b_q0 = np.round( np.random.rand(output_channels) * (b_value_max - b_value_min) + b_value_min ).astype(np.int32) X_scale = 10 X_zp = 0 X = torch.from_numpy(_dequantize(X_q0, X_scale, X_zp)).to(dtype=torch.float) W = torch.from_numpy(_dequantize(W_q0, W_scale, W_zp)).to(dtype=torch.float) b = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to(dtype=torch.float) X_q = torch.quantize_linear(X, scale=X_scale, zero_point=X_zp, dtype=torch.quint8) W_q = torch.quantize_linear(W, scale=W_scale, zero_point=W_zp, dtype=torch.quint8) b_q = torch.quantize_linear(b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32) Y_scale = 5.4 # This makes sure that the max output value does not exceed 255. Y_zp = 0 # Reference quantized Linear operator Y_q_ref = qlinear_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q0, Y_scale, Y_zp) Y_q_ref_float = _dequantize(Y_q_ref, Y_scale, Y_zp) # Quantized linear operator Y_q = qnnpack_linear(X_q, W_q, b_q, Y_scale, Y_zp) # Assert equal np.testing.assert_array_almost_equal(Y_q_ref_float, Y_q.dequantize().numpy(), decimal=4) # Reference quantized result from PyTorch Linear operator W_fp32 = W_q.dequantize().to(dtype=torch.float) X_fp32 = X_q.dequantize().to(dtype=torch.float) b_fp32 = b_q.dequantize().to(dtype=torch.float) Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32) Y_fp32_ref = Y_fp32_ref.view(-1, output_channels) Y_q_ref2 = torch.quantize_linear(Y_fp32_ref, Y_scale, Y_zp, torch.quint8) # Assert equal np.testing.assert_array_almost_equal(Y_q_ref2.dequantize().numpy(), Y_q.dequantize().numpy(), decimal=4)
class TestQuantizedOps(TestCase): """Computes the output shape given pooling parameters.""" def _pool_output_shape(self, input_size, kernel_size, padding, stride, dilation, ceiling_mode=False): if stride is None: stride = kernel_size output_size = ( (input_size + 2 * padding - dilation * (kernel_size - 1) - 1 + (stride - 1 if ceiling_mode else 0)) // stride + 1) if (padding > 0 and ((output_size - 1) * stride >= input_size + padding)): output_size += 1 return output_size """Tests the correctness of the quantized::relu op.""" @given(qparams=hu.qparams()) def test_qrelu(self, qparams): X = np.array([[-3, -2, 1, 2], [0, 0, 0, 0], [-5, -4, -3, -2], [1, 2, 3, 4]], dtype=np.float32) scale, zero_point, torch_type = qparams Y = X.copy() Y[Y < 0] = 0 qY = torch.quantize_linear(torch.from_numpy(Y), scale=scale, zero_point=zero_point, dtype=torch_type) X = torch.from_numpy(X) qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) ops_under_test = { 'native': torch.relu, 'nn.functional': torch.nn.functional.relu, } for name, op in ops_under_test.items(): qY_hat = op(qX) self.assertEqual(qY, qY_hat, message="{} relu failed".format(name)) ops_under_test_inplace = { 'inplace native': torch.relu_, 'inplace nn.functional': torch.nn.functional.relu_, } for name, op_ in ops_under_test_inplace.items(): qY_hat = qX.clone() op_(qY_hat) self.assertEqual(qY, qY_hat, message="{} relu failed".format(name)) """Tests the correctness of the quantized::relu op.""" @given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5), qparams=hu.qparams())) def test_qrelu6(self, X): X, (scale, zero_point, torch_type) = X Y = X.copy() Y[Y < 0] = 0 Y[Y > 6.0] = 6.0 qY = torch.quantize_linear(torch.from_numpy(Y), scale=scale, zero_point=zero_point, dtype=torch_type) X = torch.from_numpy(X) qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) ops_under_test = { 'ops.quantized': torch.ops.quantized.relu6, 'module': torch.nn.quantized.ReLU6(), } for name, op in ops_under_test.items(): qY_hat = op(qX) self.assertEqual(qY, qY_hat, message="{} relu failed".format(name)) """Tests the correctness of the scalar addition.""" @given(A=hu.tensor(shapes=hu.array_shapes(1, 4, 1, 5), elements=st.floats(-1e6, 1e6, allow_nan=False), qparams=hu.qparams()), b=st.floats(-1e6, 1e6, allow_nan=False, allow_infinity=False)) def test_qadd_scalar_relu(self, A, b): import copy add_scalar = torch.ops.quantized.add_scalar add_scalar_relu = torch.ops.quantized.add_scalar_relu A, (scale, zero_point, dtype) = A A = A.astype(np.float32) qA = torch.quantize_linear(torch.from_numpy(A), scale, zero_point, dtype) C = qA.dequantize() + b C_relu = copy.deepcopy(C) C_relu[C_relu < 0] = 0 C_ref = torch.quantize_linear(C, scale, zero_point, dtype) C_relu_ref = torch.quantize_linear(C_relu, scale, zero_point, dtype) C_hat = add_scalar(qA, b, scale=scale, zero_point=zero_point) C_relu_hat = add_scalar_relu(qA, b, scale=scale, zero_point=zero_point) self.assertEqual(C_ref, C_hat, message="Scalar add results don't match:\ {} vs {}".format(C_ref, C_hat)) self.assertEqual(C_relu_ref, C_relu_hat, message="Scalar add relu results don't match:\ {} vs {}".format(C_relu_ref, C_relu_hat)) """Tests the correctness of the add and add_relu op.""" def test_qadd_relu_same_qparams(self): add_relu = torch.ops.quantized.add_relu add = torch.ops.quantized.add add_out = torch.ops.quantized.add_out add_relu_out = torch.ops.quantized.add_relu_out A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale = 2.0 zero_point = 127 qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point, dtype=torch.quint8) # Add ReLU ground truth C = (qA.dequantize() + qB.dequantize()).numpy() qC = _quantize(C, scale, zero_point) qC_hat = add(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized addition failed.") qC_out_hat = torch._empty_affine_quantized(qC.shape, scale=scale, zero_point=zero_point, dtype=torch.quint8) add_out(qA, qB, out=qC_out_hat) self.assertEqual(qC_hat, qC_out_hat, message="Add.out failed") # Add + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale, zero_point) qCrelu_hat = add_relu(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized addition with ReLU failed.") qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape, scale=scale, zero_point=zero_point, dtype=torch.quint8) add_relu_out(qA, qB, out=qCrelu_out_hat) self.assertEqual(qCrelu_hat, qCrelu_out_hat, message="AddReLU.out failed") """Tests the correctness of the add and add_relu op.""" def test_qadd_relu_different_qparams(self): add_relu = torch.ops.quantized.add_relu add = torch.ops.quantized.add add_out = torch.ops.quantized.add_out add_relu_out = torch.ops.quantized.add_relu_out A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale_A = 3.0 zero_point_A = 7 scale_B = 5.0 zero_point_B = 127 scale_C = 0.5 zero_point_C = 5 qA = torch.quantize_linear(A, scale=scale_A, zero_point=zero_point_A, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale_B, zero_point=zero_point_B, dtype=torch.quint8) # Add ground truth C = (qA.dequantize() + qB.dequantize()).numpy() qC = _quantize(C, scale_C, zero_point_C) qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized addition failed.") qC_out_hat = torch._empty_affine_quantized(qC.shape, scale=scale_C, zero_point=zero_point_C, dtype=torch.quint8) add_out(qA, qB, out=qC_out_hat) self.assertEqual(qC_hat, qC_out_hat, message="Add.out failed") # Add + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale_C, zero_point_C) qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized addition with ReLU failed.") qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape, scale=scale_C, zero_point=zero_point_C, dtype=torch.quint8) add_relu_out(qA, qB, out=qCrelu_out_hat) self.assertEqual(qCrelu_hat, qCrelu_out_hat, message="AddReLU.out failed") """Tests the correctness of the mul and mul_relu op.""" def test_qmul_relu_same_qparams(self): mul_relu = torch.ops.quantized.mul_relu mul = torch.ops.quantized.mul mul_out = torch.ops.quantized.mul_out mul_relu_out = torch.ops.quantized.mul_relu_out A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale = 2.0 zero_point = 127 qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point, dtype=torch.quint8) # mul ReLU ground truth C = (qA.dequantize() * qB.dequantize()).numpy() qC = _quantize(C, scale, zero_point) qC_hat = mul(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized mulition failed.") qC_out_hat = torch._empty_affine_quantized(qC.shape, scale=scale, zero_point=zero_point, dtype=torch.quint8) mul_out(qA, qB, out=qC_out_hat) self.assertEqual(qC_hat, qC_out_hat, message="mul.out failed") # mul + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale, zero_point) qCrelu_hat = mul_relu(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized mulition with ReLU failed.") qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape, scale=scale, zero_point=zero_point, dtype=torch.quint8) mul_relu_out(qA, qB, out=qCrelu_out_hat) self.assertEqual(qCrelu_hat, qCrelu_out_hat, message="mulReLU.out failed") # Scalar addition mul = torch.ops.quantized.mul_scalar for b in B: C_ref = qA.dequantize().numpy() * b.item() qC = _quantize(C_ref, scale, zero_point) dqC = _dequantize(qC, scale, zero_point) qC_hat = mul(qA, b.item(), scale, zero_point) dqC_hat = qC_hat.dequantize() self.assertEqual(dqC, dqC_hat) """Tests the correctness of the mul and mul_relu op.""" def test_qmul_relu_different_qparams(self): mul_relu = torch.ops.quantized.mul_relu mul = torch.ops.quantized.mul mul_out = torch.ops.quantized.mul_out mul_relu_out = torch.ops.quantized.mul_relu_out A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale_A = 3.0 zero_point_A = 7 scale_B = 5.0 zero_point_B = 127 scale_C = 0.5 zero_point_C = 5 qA = torch.quantize_linear(A, scale=scale_A, zero_point=zero_point_A, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale_B, zero_point=zero_point_B, dtype=torch.quint8) # mul ground truth C = (qA.dequantize() * qB.dequantize()).numpy() qC = _quantize(C, scale_C, zero_point_C) qC_hat = mul(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized multiplication failed.") qC_out_hat = torch._empty_affine_quantized(qC.shape, scale=scale_C, zero_point=zero_point_C, dtype=torch.quint8) mul_out(qA, qB, out=qC_out_hat) self.assertEqual(qC_hat, qC_out_hat, message="mul.out failed") # mul + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale_C, zero_point_C) qCrelu_hat = mul_relu(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized multiplication with ReLU failed.") qCrelu_out_hat = torch._empty_affine_quantized(qCrelu.shape, scale=scale_C, zero_point=zero_point_C, dtype=torch.quint8) mul_relu_out(qA, qB, out=qCrelu_out_hat) self.assertEqual(qCrelu_hat, qCrelu_out_hat, message="mulReLU.out failed") """Tests max pool operation on quantized tensors.""" @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4, min_side=1, max_side=10), qparams=hu.qparams()), kernel=st.sampled_from((3, 5, 7)), stride=st.sampled_from((None, 1, 2)), dilation=st.integers(1, 2), padding=st.integers(0, 2)) def test_max_pool2d(self, X, kernel, stride, dilation, padding): X, (scale, zero_point, torch_type) = X # Check constraints assume(kernel // 2 >= padding) # Kernel cannot be overhanging! iH, iW = X.shape[-2:] oH = self._pool_output_shape(iH, kernel, padding, stride, dilation) assume(oH > 0) oW = self._pool_output_shape(iW, kernel, padding, stride, dilation) assume(oW > 0) a = torch.from_numpy(X) a_pool = torch.nn.functional.max_pool2d(a, kernel_size=kernel, stride=stride, padding=padding, dilation=dilation) a_ref = torch.quantize_linear(a_pool, scale=scale, zero_point=zero_point, dtype=torch_type) a_ref = a_ref.dequantize() qa = torch.quantize_linear(a, scale=scale, zero_point=zero_point, dtype=torch_type) ops_under_test = { "torch": torch.max_pool2d, "nn.functional": torch.nn.functional.max_pool2d, "nn.quantized.functional": torch.nn.quantized.functional.max_pool2d } for name, op in ops_under_test.items(): a_hat = op(qa, kernel_size=kernel, stride=stride, padding=padding, dilation=dilation) self.assertEqual(a_ref, a_hat.dequantize(), message="{} results are off".format(name)) # Test the ops.quantized separately, because None is not treated. a_hat = torch.ops.quantized.max_pool2d( qa, kernel_size=_pair(kernel), stride=_pair(kernel if stride is None else stride), padding=_pair(padding), dilation=_pair(dilation)) self.assertEqual(a_ref, a_hat.dequantize(), message="ops.quantized.max_pool2d results are off") @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4, min_side=1, max_side=10), qparams=hu.qparams()), output_size_h=st.integers(1, 10), output_size_w=st.integers(1, 10)) def test_adaptive_avg_pool2d(self, X, output_size_h, output_size_w): X, (scale, zero_point, torch_type) = X H, W = X.shape[-2:] assume(output_size_h <= H) assume(output_size_w <= W) if output_size_h == output_size_w: output_size = output_size_h else: output_size = (output_size_h, output_size_w) X = torch.from_numpy(X) qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) # Run reference on int_repr + round to avoid double rounding error. X_ref = torch.nn.functional.adaptive_avg_pool2d( qX.int_repr().to(torch.float), output_size).round() ops_under_test = { "nn.functional": torch.nn.functional.adaptive_avg_pool2d, "nn.quantized.functional": torch.nn.quantized.functional.adaptive_avg_pool2d } error_message = r"Results are off for {}:\n\tExpected:\n{}\n\tGot:\n{}" for name, op in ops_under_test.items(): qX_hat = op(qX, output_size=output_size) self.assertEqual(X_ref, qX_hat.int_repr(), prec=1.0, message=error_message.format(name, X_ref, qX_hat)) self.assertEqual(scale, qX_hat.q_scale(), message=error_message.format(name + '.scale', scale, qX_hat.q_scale())) self.assertEqual(zero_point, qX_hat.q_zero_point(), message=error_message.format(name + '.zero_point', scale, qX_hat.q_zero_point())) """Tests quantize concatenation (both fused and not).""" @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4, min_side=1, max_side=10), qparams=hu.qparams()), num=st.integers(1, 4), dim=st.integers(1, 4), relu=st.booleans()) def test_cat(self, X, num, dim, relu): tensors_q = [] tensors_ref = [] X, (scale, zero_point, torch_type) = X assume(dim < X.ndim) X = torch.from_numpy(X) new_shape = np.array(X.shape) new_shape[dim] = 0 for idx in range(num): tensors_q.append(torch.quantize_linear(X, scale, zero_point, torch_type)) tensors_ref.append(X) new_shape[dim] += tensors_ref[-1].shape[dim] cat_ref = torch.cat(tensors_ref, dim=dim) cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type) cat_ref = cat_ref.dequantize() if relu: cat_ref = F.relu(cat_ref) q_cat_op = torch.ops.quantized.cat_relu q_cat_out_op = torch.ops.quantized.cat_relu_out else: q_cat_op = torch.ops.quantized.cat q_cat_out_op = torch.ops.quantized.cat_out cat_q = q_cat_op(tensors_q, dim=dim, scale=scale, zero_point=zero_point) cat_q = cat_q.dequantize() np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy()) cat_q_out = torch._empty_affine_quantized( list(new_shape), scale=scale, zero_point=zero_point, dtype=torch_type) q_cat_out_op(tensors_q, dim=dim, out=cat_q_out) cat_q_out = cat_q_out.dequantize() np.testing.assert_equal(cat_ref.numpy(), cat_q_out.numpy()) # Test the cat on per-channel quantized tensor. ch_axis = 1 scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis])) scales = scales.to(torch.float64) zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis])) zero_points = zero_points.to(torch.long) tensors_q[0] = torch.quantize_linear_per_channel( X, scales, zero_points, axis=[ch_axis], dtype=torch_type) with self.assertRaisesRegex(RuntimeError, "supported.*cat"): cat_q = q_cat_op(tensors_q, dim=ch_axis, scale=scale, zero_point=zero_point) """Tests the correctness of the quantized equal op.""" @given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5), qparams=hu.qparams()), X2=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5), qparams=hu.qparams()), X_per_channel=st.booleans(), X2_per_channel=st.booleans()) def test_equal(self, X, X2, X_per_channel, X2_per_channel): X, X_params = X (scale, zero_point, torch_type) = X_params X2, X2_params = X2 (scale2, zero_point2, torch_type2) = X2_params X = torch.from_numpy(X) if X_per_channel: X_scheme = 'per_channel' channels = X.shape[-1] qX = torch.quantize_linear_per_channel( X, scales=torch.tensor([scale] * channels), zero_points=torch.tensor([zero_point] * channels), dtype=torch_type, axis=[X.ndim - 1]) else: X_scheme = 'per_tensor' qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) X2 = torch.from_numpy(X2) if X2_per_channel: X2_scheme = 'per_channel' channels = X2.shape[-1] qX2 = torch.quantize_linear_per_channel( X2, scales=torch.tensor([scale2] * channels), zero_points=torch.tensor([zero_point2] * channels), dtype=torch_type2, axis=[X2.ndim - 1]) else: X2_scheme = 'per_tensor' qX2 = torch.quantize_linear(X2, scale=scale2, zero_point=zero_point2, dtype=torch_type2) def equal_ref(X, params, X_scheme, X2, params2, X2_scheme): if X_scheme != X2_scheme: return False if params != params2: return False if X.shape != X2.shape: return False if (X != X2).any(): return False return True self.assertEqual(qX.equal(qX), equal_ref(X, X_params, X_scheme, X, X_params, X_scheme)) self.assertEqual(qX.equal(qX2), equal_ref(X, X_params, X_scheme, X2, X2_params, X2_scheme))
class TestQuantizedLinear(unittest.TestCase): """Tests the correctness of the quantized linear and linear_relu op.""" @given(batch_size=st.integers(1, 4), input_channels=st.integers(16, 32), output_channels=st.integers(4, 8), use_bias=st.booleans(), use_relu=st.booleans()) def test_qlinear(self, batch_size, input_channels, output_channels, use_bias, use_relu): qlinear_prepack = torch.ops.quantized.fbgemm_linear_prepack if use_relu: qlinear = torch.ops.quantized.fbgemm_linear_relu else: qlinear = torch.ops.quantized.fbgemm_linear X_scale = 1.5 X_zp = 5 X_value_min = 0 X_value_max = 225 X_q0 = np.round( np.random.rand(batch_size, input_channels) * (X_value_max - X_value_min) + X_value_min ).astype(np.uint8) W_scale = 0.4 W_zp = 2 W_value_min = -128 W_value_max = 127 W_q0 = np.round( np.random.rand(output_channels, input_channels) * (W_value_max - W_value_min) + W_value_min ).astype(np.int8) b_value_min = -10 b_value_max = 10 b_q0 = np.round( np.random.rand(output_channels) * (b_value_max - b_value_min) + b_value_min ).astype(np.int32) if use_bias else None avoid_vpmaddubsw_overflow_linear( batch_size, input_channels, output_channels, X_q0, X_value_min, X_value_max, W_q0, W_value_min, W_value_max, ) X = torch.from_numpy(_dequantize(X_q0, X_scale, X_zp)).to(dtype=torch.float) W = torch.from_numpy(_dequantize(W_q0, W_scale, W_zp)).to(dtype=torch.float) b = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to(dtype=torch.float) if use_bias else None X_q = torch.quantize_linear(X, scale=X_scale, zero_point=X_zp, dtype=torch.quint8) W_q = torch.quantize_linear(W, scale=W_scale, zero_point=W_zp, dtype=torch.qint8) b_q = torch.quantize_linear(b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32) if use_bias else None # Compare X_scale * W_scale * input_channels * X_value_max * W_value_max with # Y_scale * 255 (max for uint8). Y_scale = 125.1234 Y_zp = 5 # Reference quantized Linear operator Y_q_ref = qlinear_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q0, Y_scale, Y_zp) if use_relu: Y_q_ref[Y_q_ref < Y_zp] = Y_zp # Weight prepacking operator for quantized Linear W_prepack = qlinear_prepack(W_q) # Quantized Linear operator with prepacked weight Y_q = qlinear(X_q, W_prepack, b_q, Y_scale, Y_zp) # Y_q_ref_real = _dequantize(Y_q_ref, Y_scale, Y_zp) # Y_q_real = Y_q.dequantize() # Assert equal np.testing.assert_equal(Y_q_ref, Y_q.int_repr().numpy()) # Reference quantized result from PyTorch Linear operator W_fp32 = W_q.dequantize().to(dtype=torch.float) X_fp32 = X_q.dequantize().to(dtype=torch.float) b_fp32 = b_q.dequantize().to(dtype=torch.float) if use_bias else None Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32) if use_relu: Y_fp32_ref[Y_fp32_ref < 0.0] = 0.0 Y_q_ref2 = torch.quantize_linear(Y_fp32_ref, Y_scale, Y_zp, torch.quint8) # Assert equal np.testing.assert_equal(Y_q_ref2.int_repr().numpy(), Y_q.int_repr().numpy()) """Tests the correctness of the quantized::fbgemm_linear_unpack op.""" @given(W=hu.tensor(shapes=hu.array_shapes(2, 2,), qparams=hu.qparams(dtypes=torch.qint8))) def test_qlinear_unpack(self, W): W, (W_scale, W_zp, torch_type) = W qlinear_prepack = torch.ops.quantized.fbgemm_linear_prepack qlinear_unpack = torch.ops.quantized.fbgemm_linear_unpack W = torch.from_numpy(W) W_q = torch.quantize_linear(W, scale=W_scale, zero_point=W_zp, dtype=torch_type) # Weight prepacking operator for quantized Linear W_prepack = qlinear_prepack(W_q) # Weight unpack operator for quantized Linear (Used for serialization) W_q_origin = qlinear_unpack(W_prepack) # Assert equal np.testing.assert_equal(W_q.int_repr(), W_q_origin.int_repr().numpy()) np.testing.assert_equal(W_q.q_scale(), W_q_origin.q_scale()) np.testing.assert_equal(W_q.q_zero_point(), W_q_origin.q_zero_point())
class TestFakeQuantizePerTensorAffine(unittest.TestCase): # NOTE: Tests in this class are decorated with no_deadline # to prevent spurious failures due to cuda runtime initialization. def to_tensor(self, X, device): return torch.tensor(X).to(device=torch.device(device), dtype=torch.float32) @no_deadline @given(device=st.sampled_from( ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), X=hu.tensor(shapes=hu.array_shapes( 1, 5, ), qparams=hu.qparams(dtypes=torch.quint8))) def test_forward(self, device, X): r"""Tests the forward path of the FakeQuantizePerTensorAffine op. """ np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = torch.tensor(X).to(dtype=torch.float, device=device) Y = _fake_quantize_per_tensor_affine_reference(X.cpu(), scale, zero_point, quant_min, quant_max) Y_prime = torch.fake_quantize_per_tensor_affine( X, scale, zero_point, quant_min, quant_max) np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance) @no_deadline @given(device=st.sampled_from( ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), X=hu.tensor(shapes=hu.array_shapes( 1, 5, ), qparams=hu.qparams(dtypes=torch.quint8))) def test_backward(self, device, X): r"""Tests the backward method. Note that this runs the reference quantization and thus the errors might be originating there. """ np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = torch.tensor(X).to(dtype=torch.float, device=device) X.requires_grad_() Y = _fake_quantize_per_tensor_affine_reference(X.cpu(), scale, zero_point, quant_min, quant_max) Y_prime = torch.fake_quantize_per_tensor_affine( X, scale, zero_point, quant_min, quant_max) dout = torch.rand(X.shape, dtype=torch.float).to(device) dX = _fake_quantize_per_tensor_affine_grad_reference( dout, X, scale, zero_point, quant_min, quant_max) Y_prime.backward(dout) np.testing.assert_allclose(dX.cpu(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance) @no_deadline @given(device=st.sampled_from( ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), X=hu.tensor(shapes=hu.array_shapes( 1, 5, ), qparams=hu.qparams(dtypes=torch.quint8))) def test_numerical_consistency(self, device, X): r"""Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op """ np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = torch.tensor(X).to(dtype=torch.float, device=device) # quantize_linear and dequantize are only implemented in CPU Y = torch.dequantize( torch.quantize_linear(X.cpu(), scale, zero_point, torch_type)) Y_prime = torch.fake_quantize_per_tensor_affine( X, scale, zero_point, quant_min, quant_max) np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance) @no_deadline @given(device=st.sampled_from( ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), X=hu.tensor(shapes=hu.array_shapes( 1, 5, ), qparams=hu.qparams(dtypes=torch.quint8))) def test_fq_module(self, device, X): np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = torch.tensor(X).to(dtype=torch.float, device=device) X.requires_grad_() fq_module = FakeQuantize(torch_type, torch.per_tensor_affine, quant_min, quant_max) Y_prime = fq_module(X) assert fq_module.scale is not None assert fq_module.zero_point is not None Y = _fake_quantize_per_tensor_affine_reference(X, fq_module.scale, fq_module.zero_point, quant_min, quant_max) np.testing.assert_allclose(Y.cpu().detach().numpy(), Y_prime.cpu().detach().numpy(), rtol=tolerance, atol=tolerance) # Test backward dout = torch.rand(X.shape, dtype=torch.float, device=device) Y_prime.backward(dout) dX = _fake_quantize_per_tensor_affine_grad_reference( dout, X, fq_module.scale, fq_module.zero_point, quant_min, quant_max) np.testing.assert_allclose(dX.cpu(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance)
class FunctionalAPITest(TestCase): @given(X=hu.tensor_conv2d(min_batch=1, max_batch=3, min_in_channels=1, max_in_channels=7, min_out_channels=1, max_out_channels=7, H_range=(6, 12), W_range=(6, 12), kH_range=(3, 5), kW_range=(3, 5), max_groups=4, qparams=[ hu.qparams(dtypes=torch.quint8, zero_point_min=0, zero_point_max=0), hu.qparams(dtypes=torch.qint8, zero_point_min=0, zero_point_max=0), hu.qparams(dtypes=torch.qint32, zero_point_min=0, zero_point_max=0) ]), padH=st.integers(1, 3), padW=st.integers(1, 3), sH=st.integers(1, 3), sW=st.integers(1, 3), dH=st.integers(1, 2), dW=st.integers(1, 2), prepacked=st.booleans()) def test_conv_api(self, X, padH, padW, sH, sW, dH, dW, prepacked): """Tests the correctness of the conv functional. The correctness is defined by the behavior being similar to the `quantized._ops` implementation. """ # Random inputs # X, (scale, zero_point, torch_type) = X (inputs, filters, bias, groups) = X inputs, (inputs_scale, inputs_zero_point, inputs_qtype) = inputs filters, (filters_scale, filters_zero_point, filters_qtype) = filters bias, (bias_scale, bias_zero_point, bias_qtype) = bias scale, zero_point = inputs_scale, inputs_zero_point torch_type = inputs_qtype iC, oC = inputs.shape[1], filters.shape[0] iH, iW = inputs.shape[2:] kH, kW = filters.shape[2:] assume(kH // 2 >= padH) assume(kW // 2 >= padW) oH = _conv_output_shape(iH, kH, padH, sH, dH) assume(oH > 0) oW = _conv_output_shape(iW, kW, padW, sW, dW) assume(oW > 0) inputs = torch.from_numpy(inputs).to(torch.float) filters = torch.from_numpy(filters).to(torch.float) bias = torch.from_numpy(bias).to(torch.float) kernel_size = (kH, kW) stride = (sH, sW) i_padding = (padH, padW) dilation = (dH, dW) # Quantized inputs i_NHWC = inputs.permute([0, 2, 3, 1]).contiguous() w_RSCK = filters.permute([0, 2, 3, 1]).contiguous() q_inputs = torch.quantize_linear(i_NHWC, inputs_scale, inputs_zero_point, inputs_qtype) q_filters = torch.quantize_linear(w_RSCK, filters_scale, filters_zero_point, filters_qtype) q_filters_ref = torch.ops.quantized.fbgemm_conv_prepack( q_filters, groups) q_bias = torch.quantize_linear(bias, bias_scale, bias_zero_point, bias_qtype) # Reference op ref_op = torch.ops.quantized.fbgemm_conv2d # Results check try: ref_result = ref_op(q_inputs, q_filters_ref, q_bias, stride, i_padding, dilation, groups, scale, zero_point) except RuntimeError as e: e_msg = str(e).split("\n")[0].split("(")[0].strip() np.testing.assert_raises_regex(type(e), e_msg, qF.conv2d, q_inputs, q_filters_ref, bias=q_bias, scale=scale, zero_point=zero_point, stride=stride, padding=i_padding, dilation=dilation, groups=groups, prepacked=True, dtype=torch_type) else: if prepacked: q_filters = torch.ops.quantized.fbgemm_conv_prepack( q_filters, groups) q_result = qF.conv2d(q_inputs, q_filters, bias=q_bias, scale=scale, zero_point=zero_point, stride=stride, padding=i_padding, dilation=dilation, groups=groups, prepacked=prepacked, dtype=torch_type) np.testing.assert_equal(ref_result.int_repr().numpy(), q_result.int_repr().numpy())
class TestQuantizedOps(TestCase): """Computes the output shape given pooling parameters.""" def _pool_output_shape(self, input_size, kernel_size, padding, stride, dilation, ceiling_mode=False): output_size = ((input_size + 2 * padding - dilation * (kernel_size - 1) - 1 + (stride - 1 if ceiling_mode else 0)) // stride + 1) if (padding > 0 and ((output_size - 1) * stride >= input_size + padding)): output_size += 1 return output_size """Tests the correctness of the quantized::relu op.""" @given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5), qparams=hu.qparams())) def test_qrelu(self, X): X, (scale, zero_point, torch_type) = X Y = X.copy() Y[Y < 0] = 0 qY = torch.quantize_linear(torch.from_numpy(Y), scale=scale, zero_point=zero_point, dtype=torch_type) X = torch.from_numpy(X) qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) ops_under_test = { 'ops.quantized': torch.ops.quantized.relu, 'native': torch.relu, 'nn.functional': torch.nn.functional.relu } for name, op in ops_under_test.items(): qY_hat = op(qX) self.assertEqual(qY, qY_hat, "{} relu failed".format(name)) """Tests the correctness of the add and add_relu op.""" def test_qadd_relu_same_qparams(self): add_relu = torch.ops.quantized.add_relu add = torch.ops.quantized.add A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale = 2.0 zero_point = 127 qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point, dtype=torch.quint8) # Add ReLU ground truth C = (qA.dequantize() + qB.dequantize()).numpy() qC = _quantize(C, scale, zero_point) qC_hat = add(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized addition failed.") # Add + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale, zero_point) qCrelu_hat = add_relu(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized addition with ReLU failed.") """Tests the correctness of the add and add_relu op.""" def test_qadd_relu_different_qparams(self): add_relu = torch.ops.quantized.add_relu add = torch.ops.quantized.add A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale_A = 3.0 zero_point_A = 7 scale_B = 5.0 zero_point_B = 127 scale_C = 0.5 zero_point_C = 5 qA = torch.quantize_linear(A, scale=scale_A, zero_point=zero_point_A, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale_B, zero_point=zero_point_B, dtype=torch.quint8) # Add ground truth C = (qA.dequantize() + qB.dequantize()).numpy() qC = _quantize(C, scale_C, zero_point_C) qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized addition failed.") # Add + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale_C, zero_point_C) qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized addition with ReLU failed.") """Tests max pool operation on quantized tensors.""" @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4, min_side=1, max_side=10), qparams=hu.qparams()), kernel=st.sampled_from((3, 5, 7)), stride=st.integers(1, 2), dilation=st.integers(1, 2), padding=st.integers(0, 2)) def test_max_pool2d(self, X, kernel, stride, dilation, padding): X, (scale, zero_point, torch_type) = X # Check constraints assume(kernel // 2 >= padding) # Kernel cannot be overhanging! iH, iW = X.shape[-2:] oH = self._pool_output_shape(iH, kernel, padding, stride, dilation) assume(oH > 0) oW = self._pool_output_shape(iW, kernel, padding, stride, dilation) assume(oW > 0) k = (kernel, kernel) s = (stride, stride) d = (dilation, dilation) p = (padding, padding) q_max_pool = torch.ops.quantized.max_pool2d a = torch.from_numpy(X) qa = torch.quantize_linear(a, scale=scale, zero_point=zero_point, dtype=torch_type) a_hat = qa.dequantize() a_pool = F.max_pool2d(a_hat, kernel_size=k, stride=s, padding=p, dilation=d) qa_pool_hat = q_max_pool(qa, kernel_size=k, stride=s, padding=p, dilation=d) a_pool_hat = qa_pool_hat.dequantize() np.testing.assert_equal(a_pool.numpy(), a_pool_hat.numpy()) """Tests quantize concatenation (both fused and not).""" @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4, min_side=1, max_side=10), qparams=hu.qparams()), num=st.integers(1, 4), axis=st.integers(1, 4), relu=st.booleans()) def test_cat(self, X, num, axis, relu): tensors_q = [] tensors_ref = [] X, (scale, zero_point, torch_type) = X assume(axis < X.ndim) X = torch.from_numpy(X) for idx in range(num): tensors_q.append( torch.quantize_linear(X, scale, zero_point, torch_type)) tensors_ref.append(X) cat_ref = torch.cat(tensors_ref, axis=axis) cat_ref = torch.quantize_linear(cat_ref, scale, zero_point, torch_type) cat_ref = cat_ref.dequantize() if relu: cat_ref = F.relu(cat_ref) q_cat_op = torch.ops.quantized.cat_relu else: q_cat_op = torch.ops.quantized.cat cat_q = q_cat_op(tensors_q, axis=axis, scale=scale, zero_point=zero_point) cat_q = cat_q.dequantize() np.testing.assert_equal(cat_ref.numpy(), cat_q.numpy()) # Test the cat on per-channel quantized tensor. ch_axis = 1 scales = torch.from_numpy(np.array([1.0] * X.shape[ch_axis])) zero_points = torch.from_numpy(np.array([0] * X.shape[ch_axis])) tensors_q[0] = torch.quantize_linear_per_channel(X, scales, zero_points, axis=[ch_axis], dtype=torch_type) with self.assertRaisesRegex(RuntimeError, "supported.*cat"): cat_q = q_cat_op(tensors_q, axis=axis, scale=scale, zero_point=zero_point)
class TestQuantizedConv(unittest.TestCase): """Tests the correctness of quantized convolution op.""" @given(batch_size=st.integers(1, 3), input_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]), height=st.integers(10, 16), width=st.integers(7, 14), output_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]), groups=st.integers(1, 3), kernel_h=st.integers(1, 7), kernel_w=st.integers(1, 7), stride_h=st.integers(1, 2), stride_w=st.integers(1, 2), pad_h=st.integers(0, 2), pad_w=st.integers(0, 2), dilation=st.integers(1, 1), use_bias=st.booleans()) def test_qconv(self, batch_size, input_channels_per_group, height, width, output_channels_per_group, groups, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation, use_bias): qconv = torch.ops.quantized.fbgemm_conv2d qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack # C input_channels = input_channels_per_group * groups # K output_channels = output_channels_per_group * groups dilation_h = dilation_w = dilation # For testing, we use small values for weights and for activations so that no overflow occurs # in vpmaddubsw instruction. If the overflow occurs in qconv implementation and if there is no overflow # in reference we can't exactly match the results with reference. # Please see the comment in qconv implementation file (aten/src/ATen/native/quantized/cpu/qconv.cpp) # for more details. W_value_min = -5 W_value_max = 5 # the operator expects them in the format (output_channels, input_channels/groups, kernel_h, kernel_w) W_init = torch.from_numpy( np.random.randint(W_value_min, W_value_max, (output_channels, int(input_channels / groups), kernel_h, kernel_w)), ) b_init = torch.from_numpy(np.random.randint(0, 10, (output_channels, ))) # Existing floating point conv operator conv_op = torch.nn.Conv2d( input_channels, output_channels, (kernel_h, kernel_w), (stride_h, stride_w), (pad_h, pad_w), (dilation_h, dilation_w), groups, ) # assign the weights conv_op.weight = torch.nn.Parameter(W_init.to(dtype=torch.float), requires_grad=False) conv_op.bias = torch.nn.Parameter( b_init.to( dtype=torch.float), requires_grad=False) if use_bias else None X_value_min = 0 X_value_max = 4 X_init = torch.from_numpy( np.random.randint(X_value_min, X_value_max, (batch_size, input_channels, height, width))) # run on an input tensor result_ref = conv_op(X_init.to(dtype=torch.float)) # reformat X_init and W_init in the required format by conv operator # NCHW -> NHWC X_NHWC = X_init.permute([0, 2, 3, 1]).contiguous() # K(C/G)RS -> KRS(C/G) W_KRSC = W_init.permute([0, 2, 3, 1]).contiguous() X_scale = 1.5 # Currently only 0 as zero point is supported. X_zero_point = 0 X = X_scale * (X_NHWC - X_zero_point).to(dtype=torch.float) W_scale = 2.5 W_zero_point = 0 W = W_scale * (W_KRSC - W_zero_point).to(dtype=torch.float) b = X_scale * W_scale * (b_init - 0).to(dtype=torch.float) X_q = torch.quantize_linear(X, scale=X_scale, zero_point=X_zero_point, dtype=torch.quint8) W_q = torch.quantize_linear(W, scale=W_scale, zero_point=W_zero_point, dtype=torch.qint8) b_q = torch.quantize_linear( b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32) if use_bias else None W_prepack = qconv_prepack(W_q, groups) Y_scale = 7.3 Y_zero_point = 5 Y_q = qconv( X_q, W_prepack, b_q, [stride_h, stride_w], # stride [pad_h, pad_w], # padding [dilation_h, dilation_w], # dilation groups, # groups Y_scale, Y_zero_point, ) result_NHWK = result_ref.permute([0, 2, 3, 1]) result_q = _requantize(result_NHWK.numpy(), X_scale * W_scale / Y_scale, Y_zero_point) # Make sure the results match np.testing.assert_equal(result_q, Y_q.int_repr().numpy()) """Tests the correctness of the quantized::fbgemm_qconv_unpack op.""" @given(W=hu.tensor(shapes=hu.array_shapes( 4, 4, ), qparams=hu.qparams(dtypes=torch.qint8, zero_point_min=0, zero_point_max=0))) def test_qconv_unpack(self, W): W, (W_scale, W_zp, torch_type) = W qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack qconv_unpack = torch.ops.quantized.fbgemm_conv_unpack # Orig tensor is assumed to be in K(C/G)RS format W = torch.from_numpy(W) # K(C/G)RS -> KRS(C/G) W_KRSC = W.permute([0, 2, 3, 1]).contiguous() W_q = torch.quantize_linear(W_KRSC, scale=W_scale, zero_point=W_zp, dtype=torch_type) # Pack weights using weight packing operator W_packed = qconv_prepack(W_q, 1) # Unpack weights weight unpacking operator (Used for serialization) W_unpacked = qconv_unpack(W_packed) # Assert equal np.testing.assert_equal(W_q.int_repr().numpy(), W_unpacked.int_repr().numpy()) np.testing.assert_equal(W_q.q_scale(), W_unpacked.q_scale()) np.testing.assert_equal(W_q.q_zero_point(), W_unpacked.q_zero_point())
class TestQuantizedOps(TestCase): """Computes the output shape given pooling parameters.""" def _pool_output_shape(self, input_size, kernel_size, padding, stride, dilation, ceiling_mode=False): output_size = ((input_size + 2 * padding - dilation * (kernel_size - 1) - 1 + (stride - 1 if ceiling_mode else 0)) / stride + 1) if (padding > 0 and ((output_size - 1) * stride >= input_size + padding)): output_size += 1 return output_size """Tests the correctness of the quantized::relu op.""" @given(X=hu.tensor(shapes=hu.array_shapes(1, 5, 1, 5), qparams=hu.qparams())) def test_qrelu(self, X): X, (scale, zero_point, torch_type) = X relu = torch.ops.quantized.relu Y = X.copy() X = torch.from_numpy(X) qX = torch.quantize_linear(X, scale=scale, zero_point=zero_point, dtype=torch_type) qY_hat = relu(qX) Y[Y < 0] = 0 qY_ref = torch.quantize_linear(torch.from_numpy(Y), scale=scale, zero_point=zero_point, dtype=torch_type) self.assertEqual(qY_ref, qY_hat) """Tests the correctness of the add and add_relu op.""" def test_qadd_relu_same_qparams(self): add_relu = torch.ops.quantized.add_relu add = torch.ops.quantized.add A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale = 2.0 zero_point = 127 qA = torch.quantize_linear(A, scale=scale, zero_point=zero_point, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale, zero_point=zero_point, dtype=torch.quint8) # Add ReLU ground truth C = (qA.dequantize() + qB.dequantize()).numpy() qC = _quantize(C, scale, zero_point) qC_hat = add(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized addition failed.") # Add + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale, zero_point) qCrelu_hat = add_relu(qA, qB, scale=scale, zero_point=zero_point) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized addition with ReLU failed.") """Tests the correctness of the add and add_relu op.""" def test_qadd_relu_different_qparams(self): add_relu = torch.ops.quantized.add_relu add = torch.ops.quantized.add A = torch.arange(-25, 25, dtype=torch.float) B = torch.arange(-25, 25, dtype=torch.float) scale_A = 3.0 zero_point_A = 7 scale_B = 5.0 zero_point_B = 127 scale_C = 0.5 zero_point_C = 5 qA = torch.quantize_linear(A, scale=scale_A, zero_point=zero_point_A, dtype=torch.quint8) qB = torch.quantize_linear(B, scale=scale_B, zero_point=zero_point_B, dtype=torch.quint8) # Add ground truth C = (qA.dequantize() + qB.dequantize()).numpy() qC = _quantize(C, scale_C, zero_point_C) qC_hat = add(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qC, qC_hat.int_repr(), "Quantized addition failed.") # Add + ReLU ground truth Crelu = C.copy() Crelu[C < 0] = 0 qCrelu = _quantize(Crelu, scale_C, zero_point_C) qCrelu_hat = add_relu(qA, qB, scale=scale_C, zero_point=zero_point_C) np.testing.assert_equal(qCrelu, qCrelu_hat.int_repr(), "Quantized addition with ReLU failed.") """Tests max pool operation on quantized tensors.""" @given(X=hu.tensor(shapes=hu.array_shapes(min_dims=3, max_dims=4, min_side=1, max_side=10), qparams=hu.qparams()), kernel=st.sampled_from((3, 5, 7)), stride=st.integers(1, 2), dilation=st.integers(1, 2), padding=st.integers(0, 2)) def test_max_pool2d(self, X, kernel, stride, dilation, padding): X, (scale, zero_point, torch_type) = X # Check constraints assume(kernel // 2 >= padding) # Kernel cannot be overhanging! iH, iW = X.shape[-2:] oH = self._pool_output_shape(iH, kernel, padding, stride, dilation) assume(oH > 0) oW = self._pool_output_shape(iW, kernel, padding, stride, dilation) assume(oW > 0) k = (kernel, kernel) s = (stride, stride) d = (dilation, dilation) p = (padding, padding) q_max_pool = torch.ops.quantized.max_pool2d a = torch.from_numpy(X) qa = torch.quantize_linear(a, scale=scale, zero_point=zero_point, dtype=torch_type) a_hat = qa.dequantize() a_pool = F.max_pool2d(a_hat, kernel_size=k, stride=s, padding=p, dilation=d) qa_pool_hat = q_max_pool(qa, kernel_size=k, stride=s, padding=p, dilation=d) a_pool_hat = qa_pool_hat.dequantize()
class TestFakeQuantizePerTensor(TestCase): # NOTE: Tests in this class are decorated with no_deadline # to prevent spurious failures due to cuda runtime initialization. @no_deadline @given(device=st.sampled_from( ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), X=hu.tensor(shapes=hu.array_shapes( 1, 5, ), qparams=hu.qparams(dtypes=torch.quint8))) def test_forward_per_tensor(self, device, X): r"""Tests the forward path of the FakeQuantizePerTensorAffine op. """ np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = to_tensor(X, device) Y = _fake_quantize_per_tensor_affine_reference(X.cpu(), scale, zero_point, quant_min, quant_max) Y_prime = torch.fake_quantize_per_tensor_affine( X, scale, zero_point, quant_min, quant_max) np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance) @no_deadline @given(device=st.sampled_from( ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), X=hu.tensor(shapes=hu.array_shapes( 1, 5, ), qparams=hu.qparams(dtypes=torch.quint8))) def test_backward_per_tensor(self, device, X): r"""Tests the backward method. """ np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = to_tensor(X, device) X.requires_grad_() Y = _fake_quantize_per_tensor_affine_reference(X.cpu(), scale, zero_point, quant_min, quant_max) Y_prime = torch.fake_quantize_per_tensor_affine( X, scale, zero_point, quant_min, quant_max) dout = torch.rand(X.shape, dtype=torch.float).to(device) dX = _fake_quantize_per_tensor_affine_grad_reference( dout, X, scale, zero_point, quant_min, quant_max) Y_prime.backward(dout) np.testing.assert_allclose(dX.cpu(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance) @no_deadline @given(device=st.sampled_from( ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), X=hu.tensor(shapes=hu.array_shapes( 1, 5, ), qparams=hu.qparams(dtypes=torch.quint8))) def test_numerical_consistency_per_tensor(self, device, X): r"""Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op """ np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = to_tensor(X, device) # quantize_per_tensor and dequantize are only implemented in CPU Y = torch.dequantize( torch.quantize_per_tensor(X.cpu(), scale, zero_point, torch_type)) Y_prime = torch.fake_quantize_per_tensor_affine( X, scale, zero_point, quant_min, quant_max) np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance) @no_deadline @given( device=st.sampled_from( ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), X=hu.tensor(shapes=hu.array_shapes( 1, 5, ), qparams=hu.qparams(dtypes=[torch.quint8])), ) def test_fq_module(self, device, X): np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = to_tensor(X, device) X.requires_grad_() fq_module = torch.quantization.default_fake_quant().to(device) Y_prime = fq_module(X) assert fq_module.scale is not None assert fq_module.zero_point is not None Y = _fake_quantize_per_tensor_affine_reference(X, fq_module.scale, fq_module.zero_point, quant_min, quant_max) np.testing.assert_allclose(Y.cpu().detach().numpy(), Y_prime.cpu().detach().numpy(), rtol=tolerance, atol=tolerance) # Test backward dout = torch.rand(X.shape, dtype=torch.float, device=device) Y_prime.backward(dout) dX = _fake_quantize_per_tensor_affine_grad_reference( dout, X, fq_module.scale, fq_module.zero_point, quant_min, quant_max) np.testing.assert_allclose(dX.cpu().numpy(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance) def test_fq_serializable(self): observer = default_observer quant_min = 0 quant_max = 255 fq_module = FakeQuantize(observer, quant_min, quant_max) X = torch.tensor([-5, -3.5, -2, 0, 3, 5, 7], dtype=torch.float32) y_ref = fq_module(X) state_dict = fq_module.state_dict() self.assertEqual(state_dict['scale'], 0.094488) self.assertEqual(state_dict['zero_point'], 53) b = io.BytesIO() torch.save(state_dict, b) b.seek(0) loaded_dict = torch.load(b) loaded_fq_module = FakeQuantize(observer, quant_min, quant_max) loaded_fq_module.load_state_dict(loaded_dict) for key in state_dict: self.assertEqual(state_dict[key], loaded_fq_module.state_dict()[key]) self.assertEqual(loaded_fq_module.calculate_qparams(), fq_module.calculate_qparams()) def test_fake_quant_control(self): torch.manual_seed(42) X = torch.rand(20, 10, dtype=torch.float32) fq_module = torch.quantization.default_fake_quant() # Output of fake quant is not identical to input Y = fq_module(X) self.assertNotEqual(Y, X) torch.quantization.disable_fake_quant(fq_module) X = torch.rand(20, 10, dtype=torch.float32) Y = fq_module(X) # Fake quant is disabled,output is identical to input self.assertEqual(Y, X) scale = fq_module.scale zero_point = fq_module.zero_point torch.quantization.disable_observer(fq_module) torch.quantization.enable_fake_quant(fq_module) X = 10.0 * torch.rand(20, 10, dtype=torch.float32) - 5.0 Y = fq_module(X) self.assertNotEqual(Y, X) # Observer is disabled, scale and zero-point do not change self.assertEqual(fq_module.scale, scale) self.assertEqual(fq_module.zero_point, zero_point) torch.quantization.enable_observer(fq_module) Y = fq_module(X) self.assertNotEqual(Y, X) # Observer is enabled, scale and zero-point are different self.assertNotEqual(fq_module.scale, scale) self.assertNotEqual(fq_module.zero_point, zero_point)
class TestFakeQuantizePerChannel(TestCase): # NOTE: Tests in this class are decorated with no_deadline # to prevent spurious failures due to cuda runtime initialization. @no_deadline @given(device=st.sampled_from( ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), X=hu.per_channel_tensor(shapes=hu.array_shapes( 1, 5, ), qparams=hu.qparams(dtypes=torch.quint8))) def test_forward_per_channel(self, device, X): r"""Tests the forward path of the FakeQuantizePerTensorAffine op. """ np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, axis, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = to_tensor(X, device) scale = to_tensor(scale, device) zero_point = torch.tensor(zero_point).to(dtype=torch.int64, device=device) Y = _fake_quantize_per_channel_affine_reference( X.cpu(), scale.cpu(), zero_point.cpu(), axis, quant_min, quant_max) Y_prime = torch.fake_quantize_per_channel_affine( X, scale, zero_point, axis, quant_min, quant_max) np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance) @no_deadline @given(device=st.sampled_from( ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), X=hu.per_channel_tensor(shapes=hu.array_shapes( 1, 5, ), qparams=hu.qparams(dtypes=torch.quint8))) def test_backward_per_channel(self, device, X): r"""Tests the backward method. """ np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, axis, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = to_tensor(X, device) scale = to_tensor(scale, device) zero_point = torch.tensor(zero_point).to(dtype=torch.int64, device=device) X.requires_grad_() Y_prime = torch.fake_quantize_per_channel_affine( X, scale, zero_point, axis, quant_min, quant_max) dout = torch.rand(X.shape, dtype=torch.float).to(device) dX = _fake_quantize_per_channel_affine_grad_reference( dout, X, scale, zero_point, axis, quant_min, quant_max) Y_prime.backward(dout) np.testing.assert_allclose(dX.cpu().detach().numpy(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance) @no_deadline @given(device=st.sampled_from( ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), X=hu.per_channel_tensor(shapes=hu.array_shapes( 1, 5, ), qparams=hu.qparams(dtypes=torch.quint8))) @unittest.skip("temporarily disable the test") def test_numerical_consistency_per_channel(self, device, X): r"""Comparing numerical consistency between CPU quantize/dequantize op and the CPU fake quantize op """ np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, axis, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = to_tensor(X, device) scale = to_tensor(scale, device) zero_point = torch.tensor(zero_point).to(dtype=torch.int64, device=device) # quantize_linear and dequantize are only implemented in CPU Y = torch.dequantize( torch.quantize_per_channel(X.cpu(), scale.cpu(), zero_point.cpu(), axis, torch_type)) Y_prime = torch.fake_quantize_per_channel_affine( X, scale, zero_point, axis, quant_min, quant_max) np.testing.assert_allclose(Y, Y_prime.cpu(), rtol=tolerance, atol=tolerance) @no_deadline @given(device=st.sampled_from( ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']), X=hu.per_channel_tensor(shapes=hu.array_shapes( 2, 5, ), qparams=hu.qparams(dtypes=torch.qint8))) def test_fq_module(self, device, X): np.random.seed(NP_RANDOM_SEED) X, (scale, zero_point, axis, torch_type) = X quant_min = torch.iinfo(torch_type).min quant_max = torch.iinfo(torch_type).max X = to_tensor(X, device) X.requires_grad_() fq_module = FakeQuantize(default_per_channel_weight_observer, quant_min, quant_max, ch_axis=axis).to(device) Y_prime = fq_module(X) assert fq_module.scale is not None assert fq_module.zero_point is not None Y = _fake_quantize_per_channel_affine_reference( X, fq_module.scale, fq_module.zero_point, axis, quant_min, quant_max) np.testing.assert_allclose(Y.cpu().detach().numpy(), Y_prime.cpu().detach().numpy(), rtol=tolerance, atol=tolerance) # Test backward dout = torch.rand(X.shape, dtype=torch.float, device=device) Y_prime.backward(dout) dX = _fake_quantize_per_channel_affine_grad_reference( dout, X, fq_module.scale, fq_module.zero_point, axis, quant_min, quant_max) np.testing.assert_allclose(dX.cpu().numpy(), X.grad.cpu().detach().numpy(), rtol=tolerance, atol=tolerance) def test_fq_serializable(self): observer = default_per_channel_weight_observer quant_min = -128 quant_max = 127 fq_module = FakeQuantize(observer, quant_min, quant_max) X = torch.tensor( [[-5, -3.5, -2, 0, 3, 5, 7], [1, 3, 2, 5, 6.5, 8, 10]], dtype=torch.float32) y_ref = fq_module(X) state_dict = fq_module.state_dict() self.assertEqual(state_dict['scale'], [0.054902, 0.078431]) self.assertEqual(state_dict['zero_point'], [0, 0]) b = io.BytesIO() torch.save(state_dict, b) b.seek(0) loaded_dict = torch.load(b) for key in state_dict: self.assertEqual(state_dict[key], loaded_dict[key])