def test_qlinear_relu(self): qlinear_prepack = torch.ops.quantized.fbgemm_linear_prepack qlinear_relu = torch.ops.quantized.fbgemm_linear_relu batch_size = 4 input_channels = 16 output_channels = 8 X_scale = 1.5 X_zp = 5 X_value_min = 0 X_value_max = 225 X_q0 = np.round( np.random.rand(batch_size, input_channels) * (X_value_max - X_value_min) + X_value_min ).astype(np.uint8) W_scale = 0.4 W_zp = 2 W_value_min = -128 W_value_max = 127 W_q0 = np.round( np.random.rand(output_channels, input_channels) * (W_value_max - W_value_min) + W_value_min ).astype(np.int8) b_value_min = -10 b_value_max = 10 b_q0 = np.round( np.random.rand(output_channels) * (b_value_max - b_value_min) + b_value_min ).astype(np.int32) avoid_vpmaddubsw_overflow_linear( batch_size, input_channels, output_channels, X_q0, X_value_min, X_value_max, W_q0, W_value_min, W_value_max, ) X = torch.from_numpy(_dequantize(X_q0, X_scale, X_zp)).to(dtype=torch.float) W = torch.from_numpy(_dequantize(W_q0, W_scale, W_zp)).to(dtype=torch.float) b = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to(dtype=torch.float) X_q = torch.quantize_linear(X, scale=X_scale, zero_point=X_zp, dtype=torch.quint8) W_q = torch.quantize_linear(W, scale=W_scale, zero_point=W_zp, dtype=torch.qint8) b_q = torch.quantize_linear(b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32) # Compare X_scale * W_scale * input_channels * X_value_max * W_value_max with # Y_scale * 255 (max for uint8). Y_scale = 125.1234 Y_zp = 5 # Reference quantized Linear operator Y_q_ref = qlinear_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q0, Y_scale, Y_zp) Y_q_ref[Y_q_ref < Y_zp] = Y_zp # Weight prepacking operator for quantized Linear W_prepack = qlinear_prepack(W_q) # Quantized Linear operator with prepacked weight Y_q = qlinear_relu(X_q, W_prepack, b_q, Y_scale, Y_zp) # Y_q_ref_real = _dequantize(Y_q_ref, Y_scale, Y_zp) # Y_q_real = Y_q.dequantize() # Assert equal np.testing.assert_equal(Y_q_ref, Y_q.int_repr().numpy()) # Reference quantized result from PyTorch Linear operator W_fp32 = W_q.dequantize().to(dtype=torch.float) X_fp32 = X_q.dequantize().to(dtype=torch.float) b_fp32 = b_q.dequantize().to(dtype=torch.float) Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32) Y_fp32_ref[Y_fp32_ref < 0.0] = 0.0 Y_q_ref2 = torch.quantize_linear(Y_fp32_ref, Y_scale, Y_zp, torch.quint8) # Assert equal np.testing.assert_equal(Y_q_ref2.int_repr().numpy(), Y_q.int_repr().numpy())
def test_qnnpack_linear(self, output_channels, Q): X, (X_scale, X_zp), (qmin, qmax), (torch_type, np_type) = Q input_channels = X.shape[X.ndim - 1] input_rows = 1 for x in range(X.ndim - 1): input_rows *= X.shape[x] qnnpack_linear = torch.ops.quantized.qnnpack_linear X_q0 = np.round( X * (qmin - qmax) + qmin ).astype(np.uint8) W_scale = 0.4 W_zp = 0 W_value_min = 0 W_value_max = 255 W_q0 = np.round( np.random.rand(output_channels, input_channels) * (W_value_max - W_value_min) + W_value_min ).astype(np.uint8) b_value_min = -10 b_value_max = 10 b_q0 = np.round( np.random.rand(output_channels) * (b_value_max - b_value_min) + b_value_min ).astype(np.int32) X_scale = 10 X_zp = 0 X = torch.from_numpy(_dequantize(X_q0, X_scale, X_zp)).to(dtype=torch.float) W = torch.from_numpy(_dequantize(W_q0, W_scale, W_zp)).to(dtype=torch.float) b = torch.from_numpy(_dequantize(b_q0, X_scale * W_scale, 0)).to(dtype=torch.float) X_q = torch.quantize_linear(X, scale=X_scale, zero_point=X_zp, dtype=torch.quint8) W_q = torch.quantize_linear(W, scale=W_scale, zero_point=W_zp, dtype=torch.quint8) b_q = torch.quantize_linear(b, scale=X_scale * W_scale, zero_point=0, dtype=torch.qint32) Y_scale = 5.4 # This makes sure that the max output value does not exceed 255. Y_zp = 0 # Reference quantized Linear operator Y_q_ref = qlinear_ref(X_q0, X_scale, X_zp, W_q0, W_scale, W_zp, b_q0, Y_scale, Y_zp) Y_q_ref_float = _dequantize(Y_q_ref, Y_scale, Y_zp) # Quantized linear operator Y_q = qnnpack_linear(X_q, W_q, b_q, Y_scale, Y_zp) # Assert equal np.testing.assert_array_almost_equal(Y_q_ref_float, Y_q.dequantize().numpy(), decimal=4) # Reference quantized result from PyTorch Linear operator W_fp32 = W_q.dequantize().to(dtype=torch.float) X_fp32 = X_q.dequantize().to(dtype=torch.float) b_fp32 = b_q.dequantize().to(dtype=torch.float) Y_fp32_ref = F.linear(X_fp32, W_fp32, b_fp32) Y_fp32_ref = Y_fp32_ref.view(-1, output_channels) Y_q_ref2 = torch.quantize_linear(Y_fp32_ref, Y_scale, Y_zp, torch.quint8) # Assert equal np.testing.assert_array_almost_equal(Y_q_ref2.dequantize().numpy(), Y_q.dequantize().numpy(), decimal=4)