def test_convert_without_squash_mask(self): ( mod, sparsifier, sparse_config, ) = _get_model_and_sparsifier_and_sparse_config(tq.get_default_qconfig("fbgemm")) sparsifier.prepare(mod, config=sparse_config) tq.prepare(mod, inplace=True) # check that correct modules had parametrizations added and # that none were lost during prepare self.assertTrue(hasattr(mod[0], "parametrizations")) self.assertTrue(hasattr(mod[5], "parametrizations")) # check that correct observers were inserted and that matching # occured successfully self.assertTrue(hasattr(mod[5], "activation_post_process")) sparsifier.step() sparsity_level = _calculate_sparsity(mod[5].weight) mod(torch.randn(1, 4, 4, 4)) tq.convert(mod, inplace=True) # check that final module is the expected quantized module and that the model runs self.assertTrue(isinstance(mod[5], torch.nn.quantized.Linear)) self.assertEqual(mod(torch.randn(1, 4, 4, 4)).shape, torch.Size([1, 4, 4, 4])) # check that module was actually sparsified cur_sparsity = _calculate_sparsity(mod[5]._weight_bias()[0]) self.assertGreaterAlmostEqual(cur_sparsity, sparsity_level) self.assertGreaterAlmostEqual( sparsity_level, sparse_config[0]["sparsity_level"] ) self.assertGreaterAlmostEqual(cur_sparsity, sparse_config[0]["sparsity_level"])
def test_fusion_sequential_model_eval(self): for qengine in supported_qengines: with override_quantized_engine(qengine): model = ModelWithSequentialFusion().eval() model.to(torch.float) fuse_modules(model, [['conv1', 'relu1'] , ['features.0.0', 'features.0.1', 'features.0.2'], ['features.1.0', 'features.1.1', 'features.1.2'], ['features.2.0', 'features.2.1', 'features.2.2'], ['classifier.0', 'classifier.1']], inplace=True) self.assertEqual(type(model.conv1), nni.ConvReLU2d, msg="Fused Conv + Relu: nni.ConvReLU2d") self.assertEqual(type(model.conv1[0]), nn.Conv2d, msg="Fused Conv + Relu: Conv2d") self.assertEqual(type(model.conv1[1]), nn.ReLU, msg="Fused Conv + Relu: Relu") self.assertEqual(type(model.relu1), nn.Identity, msg="Fused Conv + Relu: Identity") for i in range(3): self.assertEqual(type(model.features[i][0]), nni.ConvReLU2d, msg="Fused submodule Conv + folded BN") self.assertEqual(type(model.features[i][1]), nn.Identity, msg="Fused submodule (skipped BN)") self.assertEqual(type(model.features[i][2]), nn.Identity, msg="Non-fused submodule Conv") self.assertEqual(type(model.classifier[0]), nni.LinearReLU) self.assertEqual(type(model.classifier[1]), nn.Identity) model.qconfig = torch.ao.quantization.get_default_qconfig(qengine) prepare(model, inplace=True) self.checkObservers(model) model(self.img_data_2d[0][0]) convert(model, inplace=True) model(self.img_data_2d[1][0]) self.checkModelWithSequentialQuantized(model)
def default_rcnn_prepare_for_quant_convert(self, cfg): if cfg.QUANTIZATION.EAGER_MODE: convert(self, inplace=True) else: assert not isinstance(self.backbone, FPN), "FPN is not supported in FX mode" self.backbone = convert_fx( self.backbone, convert_custom_config_dict={ "preserved_attributes": ["size_divisibility", "padding_constraints"] }, ) self.proposal_generator.rpn_head.rpn_feature = convert_fx( self.proposal_generator.rpn_head.rpn_feature) self.proposal_generator.rpn_head.rpn_regressor.cls_logits = convert_fx( self.proposal_generator.rpn_head.rpn_regressor.cls_logits) self.proposal_generator.rpn_head.rpn_regressor.bbox_pred = convert_fx( self.proposal_generator.rpn_head.rpn_regressor.bbox_pred) self.roi_heads.box_head.roi_box_conv = convert_fx( self.roi_heads.box_head.roi_box_conv) self.roi_heads.box_head.avgpool = convert_fx( self.roi_heads.box_head.avgpool) self.roi_heads.box_predictor.cls_score = convert_fx( self.roi_heads.box_predictor.cls_score) self.roi_heads.box_predictor.bbox_pred = convert_fx( self.roi_heads.box_predictor.bbox_pred) return self
def test_batchnorm_relu_basic(self): """ Basic test of the PyTorch 3D batchnorm RELU Node on Glow. """ class SimpleQuantizedBatchNormRelu(nn.Module): def __init__(self, w, b, m, v): super(SimpleQuantizedBatchNormRelu, self).__init__() self.bn = torch.nn.BatchNorm3d(4) self.relu = torch.nn.ReLU() self.bn.weight = torch.nn.Parameter(w) self.bn.bias = torch.nn.Parameter(b) self.bn.running_mean = m self.bn.running_var = v self.q = QuantStub() self.dq = DeQuantStub() def forward(self, x): qx = self.q(x) qy = self.bn(qx) qy_relu = self.relu(qy) y = self.dq(qy_relu) return y C = 4 weight = torch.ones(C) + torch.rand(C) * 0.001 bias = torch.rand(C) * 0.0001 running_mean = torch.zeros(C) running_var = torch.ones(C) inputs = torch.randn((10, C, 2, 3, 4), requires_grad=False) model = SimpleQuantizedBatchNormRelu(weight, bias, running_mean, running_var) model.eval() model.qconfig = my_qconfig modules_to_fuse = [["bn", "relu"]] fuse_modules(model, modules_to_fuse, inplace=True) prepare(model, inplace=True) model.forward(inputs) convert(model, inplace=True) # Because of the difference of quantization between PyTorch & Glow # We set eps big enough. # Batchnorm introduced great accuracy issues, which could create up to # ~1e-2 difference in some rare cases. In order to prevent this test # to be flaky, atol is set to be 0.1 and rtol is set to 0.00001. utils.compare_tracing_methods( model, inputs, fusible_ops={"quantized::batch_norm3d_relu"}, atol=1e-1, rtol=1e-5, skip_to_glow=True, )
def test_fixed_qparam_ops(self): class M(torch.nn.Module): def __init__(self): super().__init__() self.sigmoid = torch.nn.Sigmoid() self.hardsigmoid = torch.nn.Hardsigmoid() self.tanh = torch.nn.Tanh() self.quant = QuantStub() self.dequant = DeQuantStub() def forward(self, x): x = self.quant(x) x = self.sigmoid(x) x = self.hardsigmoid(x) x = self.tanh(x) x = self.dequant(x) return x m = M().train() m.qconfig = default_qat_qconfig m = prepare_qat(m) for attr in ['sigmoid', 'hardsigmoid', 'tanh']: self.assertEqual(type(getattr(m, attr).activation_post_process), FixedQParamsFakeQuantize) data = torch.randn(1, 3, 2, 4) before_convert = m(data) m = convert(m) after_convert = m(data) self.assertEqual(before_convert, after_convert) # make sure activation post process is removed for attr in ['sigmoid', 'hardsigmoid', 'tanh']: # verify fake quant module is removd self.assertFalse( hasattr(getattr(m, attr), 'activation_post_process')) # verify that hooks are removed self.assertTrue(len(getattr(m, attr)._forward_hooks.items()) == 0) # make sure no fake quantize module is inserted for eval mode def checkNoFQModule(m): for attr in ['sigmoid', 'hardsigmoid', 'tanh']: self.assertFalse( hasattr(getattr(m, attr), "activation_post_process")) self.assertTrue( len(getattr(m, attr)._forward_hooks.items()) == 0) m = M().eval() m.qconfig = default_qconfig m = prepare(m) checkNoFQModule(m) m = convert(m) checkNoFQModule(m)
def test_fusion_before_s_prep(self): ( mod, sparsifier, _, ) = self._get_model_and_sparsifier_and_sparse_config() tq.fuse_modules(mod, [["5", "6"]], inplace=True) # its absolutely broken by fusion but will still work if you put the correct fqn in sparse_config = [ { "tensor_fqn": "5.0.weight", "sparsity_level": 0.7, "sparse_block_shape": (1, 4), "zeros_per_block": 4, }, { "tensor_fqn": "0.weight" }, ] sparsifier.prepare(mod, config=sparse_config) mod[5].qconfig = tq.get_default_qconfig("fbgemm") tq.prepare(mod, inplace=True) # check that correct modules had parametrizations added and # that none were lost during prepare self.assertTrue(hasattr(mod[0], "parametrizations")) self.assertTrue(hasattr(mod[5][0], "parametrizations")) # check that correct observers were inserted and that matching # occured successfully self.assertTrue(hasattr(mod[5], "activation_post_process")) sparsifier.step() sparsity_level = self._calculate_sparsity(mod[5][0].weight) mod(torch.randn(1, 4, 4, 4)) tq.convert(mod, inplace=True) # check that final module is the expected quantized module and that the model runs self.assertTrue( isinstance(mod[5], torch.nn.intrinsic.quantized.LinearReLU)) self.assertEqual( mod(torch.randn(1, 4, 4, 4)).shape, torch.Size([1, 4, 4, 4])) # check that module was actually sparsified cur_sparsity = self._calculate_sparsity(mod[5]._weight_bias()[0]) self.assertGreaterAlmostEqual(cur_sparsity, sparsity_level) self.assertGreaterAlmostEqual(sparsity_level, sparse_config[0]["sparsity_level"]) self.assertGreaterAlmostEqual(cur_sparsity, sparse_config[0]["sparsity_level"])
def test_compare_model_stub_functional_static(self): r"""Compare the output of static quantized functional layer and its float shadow module""" qengine = torch.backends.quantized.engine model = ModelWithFunctionals().eval() model.qconfig = torch.ao.quantization.get_default_qconfig("fbgemm") q_model = prepare(model, inplace=False) q_model(self.img_data_2d[0][0]) q_model = convert(q_model) module_swap_list = [nnq.FloatFunctional] ob_dict = compare_model_stub( model, q_model, module_swap_list, self.img_data_2d[0][0] ) self.assertEqual(len(ob_dict), 6) self.assertTrue(isinstance(q_model.mycat, Shadow)) self.assertTrue(isinstance(q_model.myadd, Shadow)) self.assertTrue(isinstance(q_model.mymul, Shadow)) self.assertTrue(isinstance(q_model.myadd_relu, Shadow)) self.assertTrue(isinstance(q_model.my_scalar_add, Shadow)) self.assertTrue(isinstance(q_model.my_scalar_mul, Shadow)) for k, v in ob_dict.items(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape)
def test_embedding_bag_linear(self): for qengine in supported_qengines: with override_quantized_engine(qengine): model = ManualEmbeddingBagLinear().train() model = prepare_qat(model) self.checkObservers(model) train_indices = [[ torch.randint(0, 10, (12, 12)), torch.randn((12, 1)) ] for _ in range(2)] eval_output = [[torch.randint(0, 10, (12, 1))]] test_only_train_fn(model, train_indices) # make sure not activation_post_process is inserted for EmbeddingBag self.assertFalse(hasattr(model, "activation_post_process")) model = convert(model) def checkQuantized(model): # Make sure EmbeddingBag is now a quantized EmbeddingBag. self.assertTrue(type(model.emb), nn.quantized.EmbeddingBag) # Also test that Linear has been quantized. self.assertTrue(type(model.linear), nnq.Linear) test_only_eval_fn(model, eval_output) self.checkScriptable(model, eval_output) self.checkNoQconfig(model) checkQuantized(model) model = ManualEmbeddingBagLinear() model = quantize_qat(model, test_only_train_fn, [train_indices]) checkQuantized(model)
def test_dynamic_qat_linear(self): for qengine in supported_qengines: with override_quantized_engine(qengine): # Dynamic QAT without memoryless observers should fail with self.assertRaisesRegex( ValueError, "Dynamic QAT requires a memoryless observer." + "This means a MovingAverage observer with averaging constant equal to 1" ): model = ManualLinearDynamicQATModel(default_qat_qconfig) model = prepare_qat( model, mapping={torch.nn.Linear: nnqatd.Linear}) model = ManualLinearDynamicQATModel() model = prepare_qat(model, mapping={torch.nn.Linear: nnqatd.Linear}) self.assertEqual(type(model.fc1), nnqatd.Linear) self.assertEqual(type(model.fc2), nnqatd.Linear) self.checkObservers(model) test_only_train_fn(model, self.train_data) model = convert(model, mapping={nnqatd.Linear: nnqd.Linear}) self.assertEqual(type(model.fc1), nnqd.Linear) self.assertEqual(type(model.fc2), nnqd.Linear) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) self.checkNoQconfig(model)
def test_compare_model_outputs_functional_static(self): r"""Compare the output of functional layer in static quantized model and corresponding output of conv layer in float model """ qengine = torch.backends.quantized.engine model = ModelWithFunctionals().eval() model.qconfig = torch.ao.quantization.get_default_qconfig("fbgemm") q_model = prepare(model, inplace=False) q_model(self.img_data_2d[0][0]) q_model = convert(q_model) act_compare_dict = compare_model_outputs(model, q_model, self.img_data_2d[0][0]) self.assertEqual(len(act_compare_dict), 7) expected_act_compare_dict_keys = { "mycat.stats", "myadd.stats", "mymul.stats", "myadd_relu.stats", "my_scalar_add.stats", "my_scalar_mul.stats", "quant.stats", } self.assertTrue( act_compare_dict.keys() == expected_act_compare_dict_keys) for k, v in act_compare_dict.items(): self.assertTrue(len(v["float"]) == len(v["quantized"])) for i, val in enumerate(v["quantized"]): self.assertTrue(v["float"][i].shape == v["quantized"][i].shape)
def test_conv_linear(self): for qengine in supported_qengines: with override_quantized_engine(qengine): model = ManualConvLinearQATModel() model = prepare_qat(model) self.checkObservers(model) test_only_train_fn(model, self.img_data_2d_train) model = convert(model) def checkQuantized(model): self.assertEqual(type(model.conv), nnq.Conv2d) self.assertEqual(type(model.fc1), nnq.Linear) self.assertEqual(type(model.fc2), nnq.Linear) test_only_eval_fn(model, self.img_data_2d) self.checkScriptable(model, self.img_data_2d) self.checkNoQconfig(model) checkQuantized(model) model = ManualConvLinearQATModel() model = quantize_qat(model, test_only_train_fn, [self.img_data_2d_train]) checkQuantized(model)
def test_embedding_bag_linear(self): for qengine in supported_qengines: with override_quantized_engine(qengine): model = ManualEmbeddingBagLinear().train() model = prepare_qat( model, mapping=get_embedding_qat_module_mappings()) self.checkObservers(model) test_only_train_fn(model, self.embed_linear_data_train) # make sure not activation_post_process is inserted for EmbeddingBag self.assertFalse(hasattr(model, "activation_post_process")) # make sure that FakeQuant zero_points are correct dtype self.assertEqual(model.emb.weight_fake_quant.zero_point.dtype, torch.float32) self.assertEqual( model.linear.weight_fake_quant.zero_point.dtype, torch.int32) model = convert( model, mapping=get_embedding_static_quant_module_mappings()) def checkQuantized(model): # Make sure EmbeddingBag is now a quantized EmbeddingBag. self.assertTrue(type(model.emb), nn.quantized.EmbeddingBag) # Also test that Linear has been quantized. self.assertTrue(type(model.linear), nnq.Linear) test_only_eval_fn(model, self.embed_data) self.checkScriptable(model, self.embed_data) self.checkNoQconfig(model) checkQuantized(model) model = ManualEmbeddingBagLinear()
def test_defused_embedding_bag_linear(self): for qengine in supported_qengines: with override_quantized_engine(qengine): model = DeFusedEmbeddingBagLinear().train() model = prepare_qat( model, mapping=get_embedding_qat_module_mappings()) self.checkObservers(model) test_only_train_fn(model, self.embed_linear_data_train) # make sure activation_post_process is inserted after Linear. self.assertEqual(type(model.linear.activation_post_process), FusedMovingAvgObsFakeQuantize) # make sure that Embedding has a noop for activation. self.assertEqual(type(model.emb.activation_post_process), NoopObserver) model = convert( model, mapping=get_embedding_static_quant_module_mappings()) def checkQuantized(model): # make sure Embedding is now a QuantizedEmbedding self.assertEqual(type(model.emb), nn.quantized.Embedding) # make sure Linear is now a QuantizedLinear self.assertEqual(type(model.linear), nn.quantized.Linear) test_only_eval_fn(model, self.embed_data) self.checkScriptable(model, self.embed_data) self.checkNoQconfig(model) checkQuantized(model)
def test_fuse_module_train(self): model = ModelForFusion(default_qat_qconfig).train() # Test step by step fusion model = fuse_modules_qat(model, ['conv1', 'bn1', 'relu1']) model = fuse_modules_qat(model, ['sub1.conv', 'sub1.bn']) self.assertEqual(type(model.conv1), nni.ConvBnReLU2d, msg="Fused Conv + BN + Relu first layer") self.assertEqual(type(model.bn1), torch.nn.Identity, msg="Fused Conv + BN + Relu (skipped BN)") self.assertEqual(type(model.relu1), torch.nn.Identity, msg="Fused Conv + BN + Relu (skipped Relu)") self.assertEqual(type(model.sub1.conv), nni.ConvBn2d, msg="Fused submodule Conv + BN") self.assertEqual(type(model.sub1.bn), torch.nn.Identity, msg="Fused submodule Conv + BN (skipped BN)") self.assertEqual(type(model.sub2.conv), torch.nn.Conv2d, msg="Non-fused submodule Conv") self.assertEqual(type(model.sub2.relu), torch.nn.ReLU, msg="Non-fused submodule ReLU") model = prepare_qat(model) self.checkObservers(model) def checkQAT(model): self.assertEqual(type(model.conv1), nniqat.ConvBnReLU2d) self.assertEqual(type(model.bn1), nn.Identity) self.assertEqual(type(model.relu1), nn.Identity) self.assertEqual(type(model.sub1.conv), nniqat.ConvBn2d) self.assertEqual(type(model.sub1.bn), nn.Identity) self.assertEqual(type(model.sub2.conv), nn.Conv2d) self.assertEqual(type(model.sub2.relu), nn.ReLU) checkQAT(model) test_only_train_fn(model, self.img_data_1d_train) model = convert(model) def checkQuantized(model): self.assertEqual(type(model.conv1), nniq.ConvReLU2d) self.assertEqual(type(model.bn1), nn.Identity) self.assertEqual(type(model.relu1), nn.Identity) self.assertEqual(type(model.sub1.conv), nnq.Conv2d) self.assertEqual(type(model.sub1.bn), nn.Identity) self.assertEqual(type(model.sub2.conv), nn.Conv2d) self.assertEqual(type(model.sub2.relu), nn.ReLU) test_only_eval_fn(model, self.img_data_1d) self.checkNoQconfig(model) with self.assertRaisesRegex(RuntimeError, "Could not run 'aten::native_batch_norm' with arguments from the 'QuantizedCPU'"): checkQuantized(model) model = ModelForFusion(default_qat_qconfig).train() model = fuse_modules_qat( model, [['conv1', 'bn1', 'relu1'], ['sub1.conv', 'sub1.bn']]) model = quantize_qat(model, test_only_train_fn, [self.img_data_1d_train]) with self.assertRaisesRegex(RuntimeError, "Could not run 'aten::native_batch_norm' with arguments from the 'QuantizedCPU'"): checkQuantized(model)
def test_train_save_load_eval(self): r"""Test QAT flow of creating a model, doing QAT and saving the quantized state_dict During eval, we first call prepare_qat and conver on the model and then load the state_dict and compare results against original model """ for qengine in supported_qengines: with override_quantized_engine(qengine): model = TwoLayerLinearModel() model = torch.ao.quantization.QuantWrapper(model) model.qconfig = torch.ao.quantization.get_default_qat_qconfig( qengine) model = prepare_qat(model) fq_state_dict = model.state_dict() test_only_train_fn(model, self.train_data) model = convert(model) quant_state_dict = model.state_dict() x = torch.rand(2, 5, dtype=torch.float) ref = model(x) # Create model again for eval. Check result using quantized state_dict model = TwoLayerLinearModel() model = torch.ao.quantization.QuantWrapper(model) model.qconfig = torch.ao.quantization.get_default_qat_qconfig( qengine) torch.ao.quantization.prepare_qat(model, inplace=True) new_state_dict = model.state_dict() # Check to make sure the model after prepare_qat has the same state_dict as original. self.assertEqual(set(fq_state_dict.keys()), set(new_state_dict.keys())) torch.ao.quantization.convert(model, inplace=True) model.eval() model.load_state_dict(quant_state_dict) out = model(x) self.assertEqual(ref, out) # Check model created using prepare has same state dict as quantized state_dict model = TwoLayerLinearModel() model.eval() model = torch.ao.quantization.QuantWrapper(model) model.qconfig = torch.ao.quantization.get_default_qconfig( qengine) torch.ao.quantization.prepare(model, inplace=True) torch.ao.quantization.convert(model, inplace=True) self.assertEqual(set(model.state_dict().keys()), set(quant_state_dict.keys())) model.eval() model.load_state_dict(quant_state_dict) out = model(x) self.assertEqual(ref, out)
def test_linear_bn_workflow(self): qengine = torch.backends.quantized.engine m = nn.Sequential( QuantStub(), nn.Linear(4, 4), nn.BatchNorm1d(4), ) data = torch.randn(4, 4) m.qconfig = torch.ao.quantization.get_default_qat_qconfig(qengine) m = torch.ao.quantization.fuse_modules_qat(m, [['1', '2']]) mp = prepare_qat(m) mp(data) mq = convert(mp) self.assertTrue(type(mq[1]) == nnq.Linear) self.assertTrue(type(mq[2]) == nn.Identity)
def test_relu(self): class M(torch.nn.Module): def __init__(self): super().__init__() self.relu = nn.ReLU() def forward(self, x): x = self.relu(x) return x m = M().train() m.qconfig = default_qconfig m = prepare_qat(m) # make sure no activation_post_process is inserted for relu self.assertFalse(hasattr(m, "activation_post_process")) m = convert(m) # make sure ReLU module is not changed self.assertTrue(type(m.relu), nn.ReLU)
def _test_activation_convert_numerics_impl(self, Act, data): class M(torch.nn.Module): def __init__(self): super().__init__() self.act = Act() self.quant = QuantStub() self.dequant = DeQuantStub() def forward(self, x): x = self.quant(x) x = self.act(x) x = self.dequant(x) return x m = M().train() m.qconfig = default_qat_qconfig m = prepare_qat(m) before_convert = m(data) m = convert(m) after_convert = m(data) self.assertEqual(before_convert, after_convert)
def test_dropout(self): for qengine in supported_qengines: with override_quantized_engine(qengine): model = ManualDropoutQATModel(qengine) model = prepare_qat(model) self.checkObservers(model) test_only_train_fn(model, self.train_data) model = convert(model) def checkQuantized(model): self.assertEqual(type(model.fc1), nnq.Linear) self.assertEqual(type(model.dropout), nnq.Dropout) test_only_eval_fn(model, self.calib_data) self.checkScriptable(model, self.calib_data) self.checkNoQconfig(model) checkQuantized(model) model = quantize_qat(ManualDropoutQATModel(qengine), test_only_train_fn, [self.train_data]) checkQuantized(model)
def convert_predictor( cfg, pytorch_model, predictor_type, data_loader, ): if "int8" in predictor_type: if not cfg.QUANTIZATION.QAT.ENABLED: logger.info( "The model is not quantized during training, running post" " training quantization ...") pytorch_model = post_training_quantize(cfg, pytorch_model, data_loader) # only check bn exists in ptq as qat still has bn inside fused ops if fuse_utils.check_bn_exist(pytorch_model): logger.warn( "Post training quantized model has bn inside fused ops") logger.info( f"Converting quantized model {cfg.QUANTIZATION.BACKEND}...") if hasattr(pytorch_model, "prepare_for_quant_convert"): pytorch_model = pytorch_model.prepare_for_quant_convert(cfg) else: # TODO(T93870381): move this to a default function if cfg.QUANTIZATION.EAGER_MODE: pytorch_model = convert(pytorch_model, inplace=False) else: # FX graph mode quantization pytorch_model = convert_fx(pytorch_model) logger.info("Quantized Model:\n{}".format(pytorch_model)) else: pytorch_model = fuse_utils.fuse_model(pytorch_model) logger.info("Fused Model:\n{}".format(pytorch_model)) if fuse_utils.count_bn_exist(pytorch_model) > 0: logger.warning("BN existed in pytorch model after fusing.") return pytorch_model
def test_fuse_module_eval(self): model = ModelForFusion(default_qconfig) model.eval() model = fuse_modules( model, [['conv3', 'bn3', 'relu4'], ['conv1', 'bn1', 'relu1'], ['conv2', 'relu2'], ['bn2', 'relu3'], ['sub1.conv', 'sub1.bn']]) self.assertEqual( type(model.conv1), nni.ConvReLU2d, msg="Fused Conv + BN + Relu first layer (BN is folded)") self.assertEqual(type(model.conv1[0]), nn.Conv2d, msg="Fused Conv + BN + Relu (Conv + folded BN only)") self.assertEqual(type(model.conv1[1]), nn.ReLU, msg="Fused Conv + BN + Relu second layer (Relu only)") self.assertEqual( type(model.bn1), nn.Identity, msg="Fused Conv + BN + Relu second layer (Skipped BN)") self.assertEqual( type(model.relu1), nn.Identity, msg="Fused Conv + BN + Relu second layer (Skipped Relu)") self.assertEqual( type(model.conv2), nni.ConvReLU3d, msg="Fused Conv + BN + Relu first layer (BN is folded)") self.assertEqual(type(model.bn2), nni.BNReLU3d, msg="Fused BN + Relu first layer (Relu is folded))") self.assertEqual(type(model.relu3), nn.Identity, msg="Fused BN + Relu second layer (Skipped Relu)") self.assertEqual(type(model.conv2[0]), nn.Conv3d, msg="Fused Conv + BN + Relu (Conv + folded BN only)") self.assertEqual(type(model.conv2[1]), nn.ReLU, msg="Fused Conv + BN + Relu second layer (Relu only)") self.assertEqual( type(model.relu2), nn.Identity, msg="Fused Conv + BN + Relu second layer (Skipped Relu)") self.assertEqual(type(model.conv3), nni.ConvReLU1d, msg="Fused Conv + Relu for Conv1d (folded BN)") self.assertEqual(type(model.conv3[0]), nn.Conv1d, msg="Fused Conv + Relu for Conv1d ") self.assertEqual(type(model.conv3[1]), nn.ReLU, msg="Fused Conv + Relu for Conv1d") self.assertEqual(type(model.bn3), nn.Identity, msg="Fused Conv + BN + Relu for Conv1d (Skipped BN)") self.assertEqual(type(model.sub1.conv), nn.Conv2d, msg="Fused submodule Conv + folded BN") self.assertEqual(type(model.sub1.bn), nn.Identity, msg="Fused submodule (skipped BN)") self.assertEqual(type(model.sub2.conv), nn.Conv2d, msg="Non-fused submodule Conv") self.assertEqual(type(model.sub2.relu), torch.nn.ReLU, msg="Non-fused submodule ReLU") model = prepare(model) self.checkObservers(model) test_only_eval_fn(model, self.img_data_1d) model = convert(model) def checkQuantized(model): self.assertEqual(type(model.conv3), nniq.ConvReLU1d) self.assertEqual(type(model.conv1), nniq.ConvReLU2d) self.assertEqual(type(model.bn1), nn.Identity) self.assertEqual(type(model.relu1), nn.Identity) self.assertEqual(type(model.sub1.conv), nnq.Conv2d) self.assertEqual(type(model.sub1.bn), nn.Identity) self.assertEqual(type(model.sub2.conv), nn.Conv2d) self.assertEqual(type(model.sub2.relu), nn.ReLU) self.assertEqual(type(model.bn2), nniq.BNReLU3d) test_only_eval_fn(model, self.img_data_1d) self.checkNoQconfig(model) checkQuantized(model) model = ModelForFusion(default_qconfig).eval() model = fuse_modules( model, [['conv1', 'bn1', 'relu1'], ['conv2', 'relu2'], ['bn2', 'relu3'], ['sub1.conv', 'sub1.bn'], ['conv3', 'bn3', 'relu4']]) model = quantize(model, test_only_eval_fn, [self.img_data_1d]) checkQuantized(model)
def _squash_mask_calibrate_and_convert(model, sparsifier, input): sparsifier.step() sparsifier.squash_mask() model(input) tq.convert(model, inplace=True)
def test_sparse_qlinear_serdes(self): batch_size = 12 input_channels = 4 output_channels = 7 model = self.SparseQuantizedModel(input_channels, output_channels) # For sparse kernels both the activation and weight ZP = 0 X_scale = 0.2 X_zp = 0 W_scale = 1e-2 W_zp = 0 with override_cpu_allocator_for_qnnpack(qengine_is_qnnpack()): X_fp32 = torch.randn(batch_size, input_channels, dtype=torch.float32) float_bias = torch.randn(output_channels, dtype=torch.float32) X_q = torch.quantize_per_tensor(X_fp32, scale=X_scale, zero_point=X_zp, dtype=torch.quint8) X_fp32 = X_q.dequantize() W_fp32 = torch.randn(output_channels, input_channels, dtype=torch.float32) mask = torch.randint(0, 2, W_fp32.shape) W_fp32 *= mask W_q = torch.quantize_per_tensor(W_fp32, W_scale, W_zp, torch.qint8) model.linear.weight = nn.Parameter(W_q.dequantize()) model.linear.sparse_params = {'sparse_block_shape': (1, 4)} model.eval() # Note: At the moment, for sparse kernels # fbgemm supports only static quantized sparse linear # qnnpack supports only dynamically quantized sparse linear # Hence we have two different tests. # fbgemm tests static flow, qnnpack tests dynamic. # Should be unified later on and tests should be fixed # appropriately. if qengine_is_fbgemm(): model.qconfig = tq.get_default_qconfig('fbgemm') qmodel = copy.deepcopy(model) sqmodel = copy.deepcopy(model) tq.prepare(qmodel, inplace=True) tq.prepare(sqmodel, inplace=True) with torch.no_grad(): qmodel(X_fp32) sqmodel(X_fp32) # Make sure the quantization parameters are computed the same way qparams = qmodel.linear.qconfig.weight().calculate_qparams() sqparams = sqmodel.linear.qconfig.weight().calculate_qparams() self.assertEqual(qparams, sqparams) # Make sure mapping of sparse kernels does not affect the non-sparse sparse_mapping = tq.get_default_static_quant_module_mappings() sparse_mapping[nn.Linear] = ao_nn_sq.Linear tq.convert(sqmodel, inplace=True, mapping=sparse_mapping) tq.convert(qmodel, inplace=True) assert isinstance(sqmodel.linear, ao_nn_sq.Linear), "Convert failed" assert isinstance(qmodel.linear, nn.quantized.Linear), "Mapping failed" scripted_sqmodel = torch.jit.script(sqmodel) scripted_sqmodel.eval() buffer = io.BytesIO() torch.jit.save(scripted_sqmodel, buffer) buffer.seek(0) sqmodel = torch.jit.load(buffer) # Make sure numerics are right Y_ref = qmodel(X_q) Y_hat = sqmodel(X_q) self.assertEqual(Y_ref.dequantize(), Y_hat.dequantize()) elif qengine_is_qnnpack(): qconfig = {nn.Linear: tq.qconfig.default_dynamic_qconfig} dqmodel = copy.deepcopy(model) sdqmodel = copy.deepcopy(model) tq.propagate_qconfig_(dqmodel, qconfig) tq.propagate_qconfig_(sdqmodel, qconfig) # Make sure the quantization parameters are computed the same way qparams = dqmodel.linear.qconfig.weight().calculate_qparams() sqparams = sdqmodel.linear.qconfig.weight().calculate_qparams() self.assertEqual(qparams, sqparams) # Make sure mapping of sparse kernels does not affect the non-sparse sparse_mapping = copy.deepcopy( tq.get_default_dynamic_quant_module_mappings()) sparse_mapping[nn.Linear] = ao_nn_sq.dynamic.Linear with LinearBlockSparsePattern(1, 4): tq.convert(sdqmodel, inplace=True, mapping=sparse_mapping) tq.convert( dqmodel, mapping=tq.get_default_dynamic_quant_module_mappings(), inplace=True) assert isinstance(sdqmodel.linear, ao_nn_sq.dynamic.Linear), "Convert failed" assert isinstance( dqmodel.linear, nn.quantized.dynamic.Linear), "Mapping failed" scripted_sdqmodel = torch.jit.script(sdqmodel) scripted_sdqmodel.eval() buffer = io.BytesIO() torch.jit.save(scripted_sdqmodel, buffer) buffer.seek(0) sdqmodel = torch.jit.load(buffer) # Make sure numerics are right Y_ref = dqmodel(X_fp32) Y_hat = sdqmodel(X_fp32) self.assertEqual(Y_ref, Y_hat)
def _sparse_layer_test_helper( model_class, sparse_mapping, ref_mapping, qconfig_dict, fqn_to_check, test_class, test_scripting, ): # SET UP TEST PARAMETERS, INPUTS AND WEIGHTS # ------------------------------------------ batch_size = 12 input_channels = 4 output_channels = 7 model = model_class(input_channels, output_channels) # For sparse kernels both the activation and weight ZP = 0 X_scale = 0.2 X_zp = 2 W_scale = 1e-2 W_zp = 0 X_fp32 = torch.randn(batch_size, input_channels, dtype=torch.float32) float_bias = torch.randn(output_channels, dtype=torch.float32) # generate a weight which we'll insert into the model W_fp32 = torch.randn(output_channels, input_channels, dtype=torch.float32) mask = torch.randint(0, 2, W_fp32.shape) W_fp32 *= mask with override_cpu_allocator_for_qnnpack(qengine_is_qnnpack()): X_q = torch.quantize_per_tensor(X_fp32, scale=X_scale, zero_point=X_zp, dtype=torch.quint8) X_fp32 = X_q.dequantize() W_q = torch.quantize_per_tensor(W_fp32, W_scale, W_zp, torch.qint8) # PREPARE MODELS FOR QUANTIZATION # ------------------------------- model.linear.weight = nn.Parameter(W_q.dequantize()) model.eval() # Add `sparse_params` to the model. The test for correct # sparse_param addition is in the sparsifier tests model.linear.sparse_params = {"sparse_block_shape": (1, 4)} # generate model versions qmodel = copy.deepcopy(model) sqmodel = copy.deepcopy(model) # generate model versions and apply qconfigs tq.propagate_qconfig_(qmodel, qconfig_dict) tq.propagate_qconfig_(sqmodel, qconfig_dict) tq.prepare(qmodel, inplace=True) tq.prepare(sqmodel, inplace=True) # calibrate with torch.no_grad(): qmodel(X_fp32) sqmodel(X_fp32) # ACTUAL TESTING BEGINS HERE # -------------------------- # Make sure the quantization parameters are computed the same way qparams = qmodel.linear.qconfig.weight().calculate_qparams() sqparams = sqmodel.linear.qconfig.weight().calculate_qparams() test_class.assertEqual(qparams, sqparams) sqmodule_to_check = fqn_to_module(sqmodel, fqn_to_check) sqmodule_start_class = sqmodule_to_check.__class__ sqmodule_expected_converted_class = sparse_mapping[ sqmodule_start_class] qmodule_to_check = fqn_to_module(qmodel, fqn_to_check) qmodule_start_class = qmodule_to_check.__class__ qmodule_expected_converted_class = ref_mapping[qmodule_start_class] # need to determine whether dynamic quantization is being performed since # input dtype will be different at the end is_dynamic = isinstance(qmodule_to_check.activation_post_process, tq.PlaceholderObserver) tq.convert(sqmodel, inplace=True, mapping=sparse_mapping) tq.convert(qmodel, inplace=True, mapping=ref_mapping) # this code is a duplicate of above since the references do not # update to the post-convert modules sqmodule_to_check = fqn_to_module(sqmodel, fqn_to_check) qmodule_to_check = fqn_to_module(qmodel, fqn_to_check) # check that the modules were converted as expected assert isinstance(sqmodule_to_check, sqmodule_expected_converted_class), "Convert failed" assert isinstance(qmodule_to_check, qmodule_expected_converted_class), "Mapping failed" row_block_size, col_block_size = sqmodel.linear._packed_params._weight_bias( )[2:] assert row_block_size == 1 and col_block_size == 4 # only run during serialization/deserialization tests # makes sure script/save/load doesn't malform the sqmodel if test_scripting: scripted_sqmodel = torch.jit.script(sqmodel) scripted_sqmodel.eval() buffer = io.BytesIO() torch.jit.save(scripted_sqmodel, buffer) buffer.seek(0) sqmodel = torch.jit.load(buffer) # use correct input dtype if is_dynamic: Y_ref = qmodel(X_fp32) Y_hat = sqmodel(X_fp32) test_class.assertEqual(Y_ref, Y_hat) else: Y_ref = qmodel(X_q) Y_hat = sqmodel(X_q) test_class.assertEqual(Y_ref.dequantize(), Y_hat.dequantize())