def _test_op(self, qmodule, subname=None, input_size=None, input_quantized=True, generate=False, prec=None, new_zipfile_serialization=False): r""" Test quantized modules serialized previously can be loaded with current code, make sure we don't break backward compatibility for the serialization of quantized modules """ input_file, state_dict_file, scripted_module_file, traced_module_file, expected_file = \ get_filenames(self, subname) # only generate once. if generate and qengine_is_fbgemm(): input_tensor = torch.rand(*input_size).float() if input_quantized: input_tensor = torch.quantize_per_tensor(input_tensor, 0.5, 2, torch.quint8) torch.save(input_tensor, input_file) # Temporary fix to use _use_new_zipfile_serialization until #38379 lands. torch.save(qmodule.state_dict(), state_dict_file, _use_new_zipfile_serialization=new_zipfile_serialization) torch.jit.save(torch.jit.script(qmodule), scripted_module_file) torch.jit.save(torch.jit.trace(qmodule, input_tensor), traced_module_file) torch.save(qmodule(input_tensor), expected_file) input_tensor = torch.load(input_file) qmodule.load_state_dict(torch.load(state_dict_file)) qmodule_scripted = torch.jit.load(scripted_module_file) qmodule_traced = torch.jit.load(traced_module_file) expected = torch.load(expected_file) self.assertEqual(qmodule(input_tensor), expected, atol=prec) self.assertEqual(qmodule_scripted(input_tensor), expected, atol=prec) self.assertEqual(qmodule_traced(input_tensor), expected, atol=prec)
def test_sparse_qlinear_serdes(self): # Note: At the moment, for sparse kernels # fbgemm supports only static quantized sparse linear # qnnpack supports only dynamically quantized sparse linear # Hence we have two different tests. # fbgemm tests static flow, qnnpack tests dynamic. # Should be unified later on and tests should be fixed # appropriately. model_class = SparseQuantizedModel fqn_to_check = "linear" if qengine_is_fbgemm(): sparse_mapping = tq.get_default_static_sparse_quant_module_mappings( ) ref_mapping = tq.get_default_static_quant_module_mappings() qconfig_dict = {nn.Linear: tq.get_default_qconfig("fbgemm")} elif qengine_is_qnnpack(): sparse_mapping = tq.get_default_dynamic_sparse_quant_module_mappings( ) ref_mapping = tq.get_default_dynamic_quant_module_mappings() qconfig_dict = {nn.Linear: tq.qconfig.default_dynamic_qconfig} else: return _sparse_layer_test_helper( model_class=model_class, sparse_mapping=sparse_mapping, ref_mapping=ref_mapping, qconfig_dict=qconfig_dict, fqn_to_check=fqn_to_check, test_class=self, test_scripting=True, )
def test_conv3d_relu(self): if qengine_is_fbgemm(): module = nniq.ConvReLU3d(3, 3, kernel_size=3, stride=1, padding=0, dilation=1, groups=1, bias=True, padding_mode="zeros") self._test_op(module, input_size=[1, 3, 6, 6, 6], generate=False)
def test_lstm(self): class LSTMModule(torch.nn.Module): def __init__(self): super().__init__() self.lstm = nnqd.LSTM(input_size=3, hidden_size=7, num_layers=1).to(dtype=torch.float) def forward(self, x): x = self.lstm(x) return x if qengine_is_fbgemm(): mod = LSTMModule() self._test_op(mod, input_size=[4, 4, 3], input_quantized=False, generate=False, new_zipfile_serialization=True)
def _test_op_graph(self, qmodule, subname=None, input_size=None, input_quantized=True, generate=False, prec=None, new_zipfile_serialization=False): r""" Input: a floating point module If generate == True, traces and scripts the module and quantizes the results with PTQ, and saves the results. If generate == False, traces and scripts the module and quantizes the results with PTQ, and compares to saved results. """ input_file, state_dict_file, scripted_module_file, traced_module_file, \ expected_file, _package_file, _get_attr_targets_file = \ get_filenames(self, subname) # only generate once. if generate and qengine_is_fbgemm(): input_tensor = torch.rand(*input_size).float() torch.save(input_tensor, input_file) # convert to TorchScript scripted = torch.jit.script(qmodule) traced = torch.jit.trace(qmodule, input_tensor) # quantize def _eval_fn(model, data): model(data) qconfig_dict = {'': torch.ao.quantization.default_qconfig} scripted_q = torch.ao.quantization.quantize_jit( scripted, qconfig_dict, _eval_fn, [input_tensor]) traced_q = torch.ao.quantization.quantize_jit( traced, qconfig_dict, _eval_fn, [input_tensor]) torch.jit.save(scripted_q, scripted_module_file) torch.jit.save(traced_q, traced_module_file) torch.save(scripted_q(input_tensor), expected_file) input_tensor = torch.load(input_file) qmodule_scripted = torch.jit.load(scripted_module_file) qmodule_traced = torch.jit.load(traced_module_file) expected = torch.load(expected_file) self.assertEqual(qmodule_scripted(input_tensor), expected, atol=prec) self.assertEqual(qmodule_traced(input_tensor), expected, atol=prec)
def test_linear_dynamic(self): module_qint8 = nnqd.Linear(3, 1, bias_=True, dtype=torch.qint8) self._test_op(module_qint8, "qint8", input_size=[1, 3], input_quantized=False, generate=False) if qengine_is_fbgemm(): module_float16 = nnqd.Linear(3, 1, bias_=True, dtype=torch.float16) self._test_op(module_float16, "float16", input_size=[1, 3], input_quantized=False, generate=False)
def _test_op(self, qmodule, subname=None, input_size=None, input_quantized=True, generate=False, prec=None, new_zipfile_serialization=False): r""" Test quantized modules serialized previously can be loaded with current code, make sure we don't break backward compatibility for the serialization of quantized modules """ def remove_prefix(text, prefix): if text.startswith(prefix): return text[len(prefix):] return text # NB: we take __file__ from the module that defined the test # class, so we place the expect directory where the test script # lives, NOT where test/common_utils.py lives. module_id = self.__class__.__module__ munged_id = remove_prefix(self.id(), module_id + ".") test_file = os.path.realpath(sys.modules[module_id].__file__) base_name = os.path.join(os.path.dirname(test_file), "serialized", munged_id) subname_output = "" if subname: base_name += "_" + subname subname_output = " ({})".format(subname) input_file = base_name + ".input.pt" state_dict_file = base_name + ".state_dict.pt" scripted_module_file = base_name + ".scripted.pt" traced_module_file = base_name + ".traced.pt" expected_file = base_name + ".expected.pt" # only generate once. if generate and qengine_is_fbgemm(): input_tensor = torch.rand(*input_size).float() if input_quantized: input_tensor = torch.quantize_per_tensor( input_tensor, 0.5, 2, torch.quint8) torch.save(input_tensor, input_file) # Temporary fix to use _use_new_zipfile_serialization until #38379 lands. torch.save( qmodule.state_dict(), state_dict_file, _use_new_zipfile_serialization=new_zipfile_serialization) torch.jit.save(torch.jit.script(qmodule), scripted_module_file) torch.jit.save(torch.jit.trace(qmodule, input_tensor), traced_module_file) torch.save(qmodule(input_tensor), expected_file) input_tensor = torch.load(input_file) qmodule.load_state_dict(torch.load(state_dict_file)) qmodule_scripted = torch.jit.load(scripted_module_file) qmodule_traced = torch.jit.load(traced_module_file) expected = torch.load(expected_file) self.assertEqual(qmodule(input_tensor), expected, atol=prec) self.assertEqual(qmodule_scripted(input_tensor), expected, atol=prec) self.assertEqual(qmodule_traced(input_tensor), expected, atol=prec)
def _test_package(self, fp32_module, input_size, generate=False): """ Verifies that files created in the past with torch.package work on today's FX graph mode quantization transforms. """ input_file, state_dict_file, _scripted_module_file, _traced_module_file, \ expected_file, package_file, get_attr_targets_file = \ get_filenames(self, None) package_name = 'test' resource_name_model = 'test.pkl' def _do_quant_transforms( m: torch.nn.Module, input_tensor: torch.Tensor, ) -> torch.nn.Module: # do the quantizaton transforms and save result qconfig = torch.quantization.get_default_qconfig('fbgemm') mp = quantize_fx.prepare_fx(m, {'': qconfig}) mp(input_tensor) mq = quantize_fx.convert_fx(mp) return mq def _get_get_attr_target_strings(m: GraphModule) -> Set[str]: results = set() for node in m.graph.nodes: if node.op == 'get_attr': results.add(node.target) return results if generate and qengine_is_fbgemm(): input_tensor = torch.randn(*input_size) torch.save(input_tensor, input_file) # save the model with torch.package with torch.package.PackageExporter(package_file) as exp: exp.intern( 'torch.testing._internal.quantization_torch_package_models' ) exp.save_pickle(package_name, resource_name_model, fp32_module) # do the quantization transforms and save the result mq = _do_quant_transforms(fp32_module, input_tensor) get_attrs = _get_get_attr_target_strings(mq) torch.save(get_attrs, get_attr_targets_file) q_result = mq(input_tensor) torch.save(q_result, expected_file) # load input tensor input_tensor = torch.load(input_file) expected_output_tensor = torch.load(expected_file) expected_get_attrs = torch.load(get_attr_targets_file) # load model from package and verify output and get_attr targets match imp = torch.package.PackageImporter(package_file) m = imp.load_pickle(package_name, resource_name_model) mq = _do_quant_transforms(m, input_tensor) get_attrs = _get_get_attr_target_strings(mq) self.assertTrue( get_attrs == expected_get_attrs, f'get_attrs: expected {expected_get_attrs}, got {get_attrs}') output_tensor = mq(input_tensor) self.assertTrue(torch.allclose(output_tensor, expected_output_tensor))
def test_sparse_qlinear(self): batch_size = 12 input_channels = 16 output_channels = 4 decimal_val = 4 row_block_size = 1 col_block_size = 4 # X86 implementation of sparse ops in qnnpack only support # block pattern 1x4. # arm kernels have support for both 1x4 and 8x1. # This distinction is only because x86 implementations exist # only to enable testing of integration path. # We do plan to add 8x1 as well so that testing does not have to # special case like this. At the moment it is deprioritized due # to other higher priority works. if qengine_is_qnnpack() and not (row_block_size == 1 and col_block_size == 4): return # ONEDNN does not support this yet if qengine_is_onednn(): return dense_prepack = torch.ops.quantized.linear_prepack dense_qlinear = torch.ops.quantized.linear dense_qlinear_dynamic = torch.ops.quantized.linear_dynamic sparse_prepack = torch.ops.sparse.qlinear_prepack sparse_qlinear = torch.ops.sparse.qlinear sparse_qlinear_dynamic = torch.ops.sparse.qlinear_dynamic X_scale = 0.2 X_zp = 2 X_fp32 = torch.randn(batch_size, input_channels, dtype=torch.float32) float_bias = torch.randn(output_channels, dtype=torch.float32) W_scales = torch.rand(output_channels, dtype=torch.float32) W_zps = torch.zeros(output_channels, dtype=torch.int32) W_fp32 = torch.randn(output_channels, input_channels, dtype=torch.float32) with override_cpu_allocator_for_qnnpack(qengine_is_qnnpack()): X_q = torch.quantize_per_tensor(X_fp32, scale=X_scale, zero_point=X_zp, dtype=torch.quint8) for use_channelwise, dynamic_mode in product([True, False], [True, False]): if qengine_is_fbgemm() and dynamic_mode: logging.info( "dynamic sparse qlinear is only available in qnnpack") continue if qengine_is_qnnpack() and not dynamic_mode: logging.info( "static sparse qlinear is only available in fbgemm") continue if use_channelwise: W_q = torch.quantize_per_channel(W_fp32, scales=W_scales, zero_points=W_zps, axis=0, dtype=torch.qint8) else: W_q = torch.quantize_per_tensor(W_fp32, scale=W_scales[0], zero_point=W_zps[0], dtype=torch.qint8) Y_scale = 1.1234 Y_zp = 5 W_prepack_dense = dense_prepack(W_q, float_bias) W_prepack_sparse = sparse_prepack(W_q, float_bias, row_block_size, col_block_size) if dynamic_mode: Y = sparse_qlinear_dynamic(X_fp32, W_prepack_sparse) Y_ref = dense_qlinear_dynamic(X_fp32, W_prepack_dense) np.testing.assert_array_almost_equal(Y_ref.numpy(), Y.numpy(), decimal=decimal_val) else: Y_q = sparse_qlinear(X_q, W_prepack_sparse, Y_scale, Y_zp) Y_q_ref = dense_qlinear(X_q, W_prepack_dense, Y_scale, Y_zp) np.testing.assert_array_almost_equal( Y_q_ref.int_repr().numpy(), Y_q.int_repr().numpy(), decimal=decimal_val)
def test_sparse_qlinear_serdes(self): batch_size = 12 input_channels = 4 output_channels = 7 model = self.SparseQuantizedModel(input_channels, output_channels) # For sparse kernels both the activation and weight ZP = 0 X_scale = 0.2 X_zp = 0 W_scale = 1e-2 W_zp = 0 with override_cpu_allocator_for_qnnpack(qengine_is_qnnpack()): X_fp32 = torch.randn(batch_size, input_channels, dtype=torch.float32) float_bias = torch.randn(output_channels, dtype=torch.float32) X_q = torch.quantize_per_tensor(X_fp32, scale=X_scale, zero_point=X_zp, dtype=torch.quint8) X_fp32 = X_q.dequantize() W_fp32 = torch.randn(output_channels, input_channels, dtype=torch.float32) mask = torch.randint(0, 2, W_fp32.shape) W_fp32 *= mask W_q = torch.quantize_per_tensor(W_fp32, W_scale, W_zp, torch.qint8) model.linear.weight = nn.Parameter(W_q.dequantize()) model.linear.sparse_params = {'sparse_block_shape': (1, 4)} model.eval() # Note: At the moment, for sparse kernels # fbgemm supports only static quantized sparse linear # qnnpack supports only dynamically quantized sparse linear # Hence we have two different tests. # fbgemm tests static flow, qnnpack tests dynamic. # Should be unified later on and tests should be fixed # appropriately. if qengine_is_fbgemm(): model.qconfig = tq.get_default_qconfig('fbgemm') qmodel = copy.deepcopy(model) sqmodel = copy.deepcopy(model) tq.prepare(qmodel, inplace=True) tq.prepare(sqmodel, inplace=True) with torch.no_grad(): qmodel(X_fp32) sqmodel(X_fp32) # Make sure the quantization parameters are computed the same way qparams = qmodel.linear.qconfig.weight().calculate_qparams() sqparams = sqmodel.linear.qconfig.weight().calculate_qparams() self.assertEqual(qparams, sqparams) # Make sure mapping of sparse kernels does not affect the non-sparse sparse_mapping = tq.get_default_static_quant_module_mappings() sparse_mapping[nn.Linear] = ao_nn_sq.Linear tq.convert(sqmodel, inplace=True, mapping=sparse_mapping) tq.convert(qmodel, inplace=True) assert isinstance(sqmodel.linear, ao_nn_sq.Linear), "Convert failed" assert isinstance(qmodel.linear, nn.quantized.Linear), "Mapping failed" scripted_sqmodel = torch.jit.script(sqmodel) scripted_sqmodel.eval() buffer = io.BytesIO() torch.jit.save(scripted_sqmodel, buffer) buffer.seek(0) sqmodel = torch.jit.load(buffer) # Make sure numerics are right Y_ref = qmodel(X_q) Y_hat = sqmodel(X_q) self.assertEqual(Y_ref.dequantize(), Y_hat.dequantize()) elif qengine_is_qnnpack(): qconfig = {nn.Linear: tq.qconfig.default_dynamic_qconfig} dqmodel = copy.deepcopy(model) sdqmodel = copy.deepcopy(model) tq.propagate_qconfig_(dqmodel, qconfig) tq.propagate_qconfig_(sdqmodel, qconfig) # Make sure the quantization parameters are computed the same way qparams = dqmodel.linear.qconfig.weight().calculate_qparams() sqparams = sdqmodel.linear.qconfig.weight().calculate_qparams() self.assertEqual(qparams, sqparams) # Make sure mapping of sparse kernels does not affect the non-sparse sparse_mapping = copy.deepcopy( tq.get_default_dynamic_quant_module_mappings()) sparse_mapping[nn.Linear] = ao_nn_sq.dynamic.Linear with LinearBlockSparsePattern(1, 4): tq.convert(sdqmodel, inplace=True, mapping=sparse_mapping) tq.convert( dqmodel, mapping=tq.get_default_dynamic_quant_module_mappings(), inplace=True) assert isinstance(sdqmodel.linear, ao_nn_sq.dynamic.Linear), "Convert failed" assert isinstance( dqmodel.linear, nn.quantized.dynamic.Linear), "Mapping failed" scripted_sdqmodel = torch.jit.script(sdqmodel) scripted_sdqmodel.eval() buffer = io.BytesIO() torch.jit.save(scripted_sdqmodel, buffer) buffer.seek(0) sdqmodel = torch.jit.load(buffer) # Make sure numerics are right Y_ref = dqmodel(X_fp32) Y_hat = sdqmodel(X_fp32) self.assertEqual(Y_ref, Y_hat)