class FunctionalAPITest(TestCase): @given(X=hu.tensor_conv2d(min_batch=1, max_batch=3, min_in_channels=1, max_in_channels=7, min_out_channels=1, max_out_channels=7, H_range=(6, 12), W_range=(6, 12), kH_range=(3, 5), kW_range=(3, 5), max_groups=4, qparams=[ hu.qparams(dtypes=torch.quint8, zero_point_min=0, zero_point_max=0), hu.qparams(dtypes=torch.qint8, zero_point_min=0, zero_point_max=0), hu.qparams(dtypes=torch.qint32, zero_point_min=0, zero_point_max=0) ]), padH=st.integers(1, 3), padW=st.integers(1, 3), sH=st.integers(1, 3), sW=st.integers(1, 3), dH=st.integers(1, 2), dW=st.integers(1, 2), prepacked=st.booleans()) def test_conv_api(self, X, padH, padW, sH, sW, dH, dW, prepacked): """Tests the correctness of the conv functional. The correctness is defined by the behavior being similar to the `quantized._ops` implementation. """ # Random inputs # X, (scale, zero_point, torch_type) = X (inputs, filters, bias, groups) = X inputs, (inputs_scale, inputs_zero_point, inputs_qtype) = inputs filters, (filters_scale, filters_zero_point, filters_qtype) = filters bias, (bias_scale, bias_zero_point, bias_qtype) = bias scale, zero_point = inputs_scale, inputs_zero_point torch_type = inputs_qtype iC, oC = inputs.shape[1], filters.shape[0] iH, iW = inputs.shape[2:] kH, kW = filters.shape[2:] assume(kH // 2 >= padH) assume(kW // 2 >= padW) oH = _conv_output_shape(iH, kH, padH, sH, dH) assume(oH > 0) oW = _conv_output_shape(iW, kW, padW, sW, dW) assume(oW > 0) inputs = torch.from_numpy(inputs).to(torch.float) filters = torch.from_numpy(filters).to(torch.float) bias = torch.from_numpy(bias).to(torch.float) kernel_size = (kH, kW) stride = (sH, sW) i_padding = (padH, padW) dilation = (dH, dW) # Quantized inputs i_NHWC = inputs.permute([0, 2, 3, 1]).contiguous() w_RSCK = filters.permute([0, 2, 3, 1]).contiguous() q_inputs = torch.quantize_linear(i_NHWC, inputs_scale, inputs_zero_point, inputs_qtype) q_filters = torch.quantize_linear(w_RSCK, filters_scale, filters_zero_point, filters_qtype) q_filters_ref = torch.ops.quantized.fbgemm_conv_prepack( q_filters, groups) q_bias = torch.quantize_linear(bias, bias_scale, bias_zero_point, bias_qtype) # Reference op ref_op = torch.ops.quantized.fbgemm_conv2d # Results check try: ref_result = ref_op(q_inputs, q_filters_ref, q_bias, stride, i_padding, dilation, groups, scale, zero_point) except RuntimeError as e: e_msg = str(e).split("\n")[0].split("(")[0].strip() np.testing.assert_raises_regex(type(e), e_msg, qF.conv2d, q_inputs, q_filters_ref, bias=q_bias, scale=scale, zero_point=zero_point, stride=stride, padding=i_padding, dilation=dilation, groups=groups, prepacked=True, dtype=torch_type) else: if prepacked: q_filters = torch.ops.quantized.fbgemm_conv_prepack( q_filters, groups) q_result = qF.conv2d(q_inputs, q_filters, bias=q_bias, scale=scale, zero_point=zero_point, stride=stride, padding=i_padding, dilation=dilation, groups=groups, prepacked=prepacked, dtype=torch_type) np.testing.assert_equal(ref_result.int_repr().numpy(), q_result.int_repr().numpy())
class TestQuantizedConv(unittest.TestCase): """Tests the correctness of quantized convolution op.""" @given(batch_size=st.integers(1, 3), input_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]), height=st.integers(10, 16), width=st.integers(7, 14), output_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]), groups=st.integers(1, 3), kernel_h=st.integers(1, 7), kernel_w=st.integers(1, 7), stride_h=st.integers(1, 2), stride_w=st.integers(1, 2), pad_h=st.integers(0, 2), pad_w=st.integers(0, 2), dilation=st.integers(1, 1), X_scale=st.floats(0.2, 1.6), X_zero_point=st.integers(0, 4), W_scale=st.floats(0.2, 1.6), W_zero_point=st.integers(-5, 5), Y_scale=st.floats(0.2, 1.6), Y_zero_point=st.integers(0, 4), use_bias=st.booleans(), use_relu=st.booleans()) def test_qconv( self, batch_size, input_channels_per_group, height, width, output_channels_per_group, groups, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation, X_scale, X_zero_point, W_scale, W_zero_point, Y_scale, Y_zero_point, use_bias, use_relu ): qconv = torch.ops.quantized.fbgemm_conv2d if use_relu: qconv = torch.ops.quantized.fbgemm_conv2d_relu qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack # C input_channels = input_channels_per_group * groups # K output_channels = output_channels_per_group * groups dilation_h = dilation_w = dilation # For testing, we use small values for weights and for activations so that no overflow occurs # in vpmaddubsw instruction. If the overflow occurs in qconv implementation and if there is no overflow # in reference we can't exactly match the results with reference. # Please see the comment in qconv implementation file (aten/src/ATen/native/quantized/cpu/qconv.cpp) # for more details. W_value_min = -5 W_value_max = 5 # the operator expects them in the format (output_channels, input_channels/groups, kernel_h, kernel_w) W_init = torch.from_numpy( np.random.randint( W_value_min, W_value_max, (output_channels, int(input_channels / groups), kernel_h, kernel_w)), ) b_init = torch.from_numpy(np.random.randint(0, 10, (output_channels,))) stride = [stride_h, stride_w] pad = [pad_h, pad_w] dilation = [dilation_h, dilation_w] X_value_min = 0 X_value_max = 4 X_init = torch.from_numpy(np.random.randint( X_value_min, X_value_max, (batch_size, input_channels, height, width))) X = X_scale * (X_init - X_zero_point).to(dtype=torch.float) W = W_scale * (W_init - W_zero_point).to(dtype=torch.float) b = X_scale * W_scale * (b_init - 0).to(dtype=torch.float) # Existing floating point conv operator conv_op = torch.nn.Conv2d(input_channels, output_channels, (kernel_h, kernel_w), (stride_h, stride_w), (pad_h, pad_w), (dilation_h, dilation_w), groups) # assign weights conv_op.weight = torch.nn.Parameter(W, requires_grad=False) conv_op.bias = torch.nn.Parameter(b, requires_grad=False) if use_bias else None result_ref = conv_op(X) if use_relu: relu = torch.nn.ReLU() result_ref = relu(result_ref) # quantize reference results for comparision result_ref_q = torch.quantize_linear(result_ref, scale=Y_scale, zero_point=Y_zero_point, dtype=torch.quint8) # reformat X_init and W_init in the required format by qconv operator # NCHW -> NHWC X_NHWC = X.permute([0, 2, 3, 1]).contiguous() # K(C/G)RS -> KRS(C/G) W_KRSC = W.permute([0, 2, 3, 1]).contiguous() X_q = torch.quantize_linear(X_NHWC, scale=X_scale, zero_point=X_zero_point, dtype=torch.quint8) W_q = torch.quantize_linear(W_KRSC, scale=W_scale, zero_point=W_zero_point, dtype=torch.qint8) b_q = torch.quantize_linear(b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32) if use_bias else None W_prepack = qconv_prepack(W_q, stride, pad, dilation, groups) Y_q = qconv( X_q, W_prepack, b_q, stride, pad, dilation, groups, Y_scale, Y_zero_point, ) # Back to NCHW format Y_q = Y_q.permute([0, 3, 1, 2]).contiguous() # Make sure the results match # assert_array_almost_equal compares using the following formula: # abs(desired-actual) < 1.5 * 10**(-decimal) # (https://docs.scipy.org/doc/numpy/reference/generated/numpy.testing.assert_almost_equal.html) # We use decimal = 0 to ignore off-by-1 differences between reference and # test. Off-by-1 differences arise due to the order of round and # zero_point addition operation, i.e., if addition followed by round is # used by reference and round followed by addition is used by test, the # results may differ by 1. # For example, the result of round(2.5) + 1 is 3 while round(2.5 + 1) is 4 # assuming the rounding mode is round-to-nearest, ties-to-even. np.testing.assert_array_almost_equal(result_ref_q.int_repr().numpy(), Y_q.int_repr().numpy(), decimal=0) """Tests the correctness of the quantized::fbgemm_qconv_unpack op.""" @given(X=hu.tensor_conv2d(min_batch=1, max_batch=3, min_in_channels=1, max_in_channels=7, min_out_channels=1, max_out_channels=7, H_range=(6, 12), W_range=(6, 12), kH_range=(3, 5), kW_range=(3, 5), max_groups=4, qparams=[hu.qparams(dtypes=torch.quint8, zero_point_min=0, zero_point_max=0), hu.qparams(dtypes=torch.qint8, zero_point_min=0, zero_point_max=0), hu.qparams(dtypes=torch.qint32, zero_point_min=0, zero_point_max=0)]), strideH=st.integers(1, 3), strideW=st.integers(1, 3), padH=st.integers(1, 2), padW=st.integers(1, 2)) def test_qconv_unpack(self, X, strideH, strideW, padH, padW): (inputs, filters, bias, groups) = X inputs, (inputs_scale, inputs_zero_point, inputs_qtype) = inputs filters, (filters_scale, filters_zero_point, filters_qtype) = filters bias, (bias_scale, bias_zero_point, bias_qtype) = bias qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack qconv_unpack = torch.ops.quantized.fbgemm_conv_unpack # Orig tensor is assumed to be in K(C/G)RS format W = torch.from_numpy(filters).to(torch.float) # K(C/G)RS -> KRS(C/G) W_KRSC = W.permute([0, 2, 3, 1]).contiguous() W_q = torch.quantize_linear(W_KRSC, scale=filters_scale, zero_point=filters_zero_point, dtype=filters_qtype) # Pack weights using weight packing operator strides = [strideH, strideW] paddings = [padH, padW] dilations = [1, 1] W_packed = qconv_prepack(W_q, strides, paddings, dilations, groups) # Unpack weights weight unpacking operator (Used for serialization) W_unpacked = qconv_unpack(W_packed) # Assert equal np.testing.assert_equal(W_q.int_repr().numpy(), W_unpacked.int_repr().numpy()) np.testing.assert_equal(W_q.q_scale(), W_unpacked.q_scale()) np.testing.assert_equal(W_q.q_zero_point(), W_unpacked.q_zero_point())
class TestQuantizedConv(unittest.TestCase): """Tests the correctness of quantized convolution op.""" @given(batch_size=st.integers(1, 3), input_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]), height=st.integers(10, 16), width=st.integers(7, 14), output_channels_per_group=st.sampled_from([2, 4, 5, 8, 16, 32]), groups=st.integers(1, 3), kernel_h=st.integers(1, 7), kernel_w=st.integers(1, 7), stride_h=st.integers(1, 2), stride_w=st.integers(1, 2), pad_h=st.integers(0, 2), pad_w=st.integers(0, 2), dilation=st.integers(1, 1), use_bias=st.booleans(), use_relu=st.booleans()) def test_qconv(self, batch_size, input_channels_per_group, height, width, output_channels_per_group, groups, kernel_h, kernel_w, stride_h, stride_w, pad_h, pad_w, dilation, use_bias, use_relu): qconv = torch.ops.quantized.fbgemm_conv2d if use_relu: qconv = torch.ops.quantized.fbgemm_conv2d_relu qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack # C input_channels = input_channels_per_group * groups # K output_channels = output_channels_per_group * groups dilation_h = dilation_w = dilation # For testing, we use small values for weights and for activations so that no overflow occurs # in vpmaddubsw instruction. If the overflow occurs in qconv implementation and if there is no overflow # in reference we can't exactly match the results with reference. # Please see the comment in qconv implementation file (aten/src/ATen/native/quantized/cpu/qconv.cpp) # for more details. W_value_min = -5 W_value_max = 5 # the operator expects them in the format (output_channels, input_channels/groups, kernel_h, kernel_w) W_init = torch.from_numpy( np.random.randint(W_value_min, W_value_max, (output_channels, int(input_channels / groups), kernel_h, kernel_w)), ) b_init = torch.from_numpy(np.random.randint(0, 10, (output_channels, ))) # Existing floating point conv operator conv_op = torch.nn.Conv2d( input_channels, output_channels, (kernel_h, kernel_w), (stride_h, stride_w), (pad_h, pad_w), (dilation_h, dilation_w), groups, ) # assign the weights conv_op.weight = torch.nn.Parameter(W_init.to(dtype=torch.float), requires_grad=False) conv_op.bias = torch.nn.Parameter( b_init.to( dtype=torch.float), requires_grad=False) if use_bias else None X_value_min = 0 X_value_max = 4 X_init = torch.from_numpy( np.random.randint(X_value_min, X_value_max, (batch_size, input_channels, height, width))) # run on an input tensor result_ref = conv_op(X_init.to(dtype=torch.float)) # reformat X_init and W_init in the required format by conv operator # NCHW -> NHWC X_NHWC = X_init.permute([0, 2, 3, 1]).contiguous() # K(C/G)RS -> KRS(C/G) W_KRSC = W_init.permute([0, 2, 3, 1]).contiguous() X_scale = 1.5 # Currently only 0 as zero point is supported. X_zero_point = 0 X = X_scale * (X_NHWC - X_zero_point).to(dtype=torch.float) W_scale = 2.5 W_zero_point = 0 W = W_scale * (W_KRSC - W_zero_point).to(dtype=torch.float) b = X_scale * W_scale * (b_init - 0).to(dtype=torch.float) X_q = torch.quantize_linear(X, scale=X_scale, zero_point=X_zero_point, dtype=torch.quint8) W_q = torch.quantize_linear(W, scale=W_scale, zero_point=W_zero_point, dtype=torch.qint8) b_q = torch.quantize_linear( b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32) if use_bias else None W_prepack = qconv_prepack(W_q, [stride_h, stride_w], [pad_h, pad_w], [dilation_h, dilation_w], groups) Y_scale = 7.3 Y_zero_point = 5 Y_q = qconv( X_q, W_prepack, b_q, [stride_h, stride_w], # stride [pad_h, pad_w], # padding [dilation_h, dilation_w], # dilation groups, # groups Y_scale, Y_zero_point, ) result_NHWK = result_ref.permute([0, 2, 3, 1]) result_q = _requantize(result_NHWK.numpy(), X_scale * W_scale / Y_scale, Y_zero_point) if use_relu: result_q[result_q < Y_zero_point] = Y_zero_point # Make sure the results match np.testing.assert_equal(result_q, Y_q.int_repr().numpy()) """Tests the correctness of the quantized::fbgemm_qconv_unpack op.""" @given(X=hu.tensor_conv2d(min_batch=1, max_batch=3, min_in_channels=1, max_in_channels=7, min_out_channels=1, max_out_channels=7, H_range=(6, 12), W_range=(6, 12), kH_range=(3, 5), kW_range=(3, 5), max_groups=4, qparams=[ hu.qparams(dtypes=torch.quint8, zero_point_min=0, zero_point_max=0), hu.qparams(dtypes=torch.qint8, zero_point_min=0, zero_point_max=0), hu.qparams(dtypes=torch.qint32, zero_point_min=0, zero_point_max=0) ]), strideH=st.integers(1, 3), strideW=st.integers(1, 3), padH=st.integers(1, 2), padW=st.integers(1, 2)) def test_qconv_unpack(self, X, strideH, strideW, padH, padW): (inputs, filters, bias, groups) = X inputs, (inputs_scale, inputs_zero_point, inputs_qtype) = inputs filters, (filters_scale, filters_zero_point, filters_qtype) = filters bias, (bias_scale, bias_zero_point, bias_qtype) = bias qconv_prepack = torch.ops.quantized.fbgemm_conv_prepack qconv_unpack = torch.ops.quantized.fbgemm_conv_unpack # Orig tensor is assumed to be in K(C/G)RS format W = torch.from_numpy(filters).to(torch.float) # K(C/G)RS -> KRS(C/G) W_KRSC = W.permute([0, 2, 3, 1]).contiguous() W_q = torch.quantize_linear(W_KRSC, scale=filters_scale, zero_point=filters_zero_point, dtype=filters_qtype) # Pack weights using weight packing operator strides = [strideH, strideW] paddings = [padH, padW] dilations = [1, 1] W_packed = qconv_prepack(W_q, strides, paddings, dilations, groups) # Unpack weights weight unpacking operator (Used for serialization) W_unpacked = qconv_unpack(W_packed) # Assert equal np.testing.assert_equal(W_q.int_repr().numpy(), W_unpacked.int_repr().numpy()) np.testing.assert_equal(W_q.q_scale(), W_unpacked.q_scale()) np.testing.assert_equal(W_q.q_zero_point(), W_unpacked.q_zero_point())