def test_post_training_dynamic_quantization(self, root_dir):
        """ Validates post-training dynamic quantization. """
        seed_everything(100)

        model = TestModule()
        num_epochs = 2
        dynamic_quant = PostTrainingQuantization(
            qconfig_dicts={"": {"": default_dynamic_qconfig}}
        )
        trainer = Trainer(
            default_root_dir=os.path.join(root_dir, "quantized"),
            checkpoint_callback=False,
            callbacks=[dynamic_quant],
            max_epochs=num_epochs,
            logger=False,
        )
        # This will both train the model + quantize it.
        trainer.fit(model)

        self.assertIsNotNone(dynamic_quant.quantized)
        # Default qconfig requires calibration.
        self.assertFalse(dynamic_quant.should_calibrate)

        test_in = torch.randn(12, 32)
        with mode(model, training=False) as m:
            base_out = m(test_in)
        with mode(dynamic_quant.quantized, training=False) as q:
            test_out = q(test_in)

        # While quantized/original won't be exact, they should be close.
        self.assertLess(
            ((((test_out - base_out) ** 2).sum(axis=1)) ** (1 / 2)).mean(),
            0.015,
            "RMSE should be less than 0.015 between quantized and original.",
        )
    def test_custom_post_training_static_quant(self, root_dir):
        """Tests that we can customize Post-Training static by skipping certain layers."""
        class _CustomStaticQuant(PostTrainingQuantization):
            """Only quantize TestModule.another_layer."""
            def prepare(self, model, configs, attrs):
                example_inputs = (torch.randn(1, 2), )
                model.another_layer = prepare_fx(model.another_layer,
                                                 configs[""], example_inputs)

                return model

            def convert(self, model, submodules, attrs):
                model.another_layer = convert_fx(model.another_layer)
                return model

        seed_everything(100)

        model = TestModule()
        num_epochs = 2
        static_quantization = _CustomStaticQuant()
        trainer = Trainer(
            default_root_dir=os.path.join(root_dir, "quantized"),
            enable_checkpointing=False,
            callbacks=[static_quantization],
            max_epochs=num_epochs,
            logger=False,
            num_sanity_val_steps=0,
        )
        trainer.fit(model)

        self.assertIsNotNone(static_quantization.quantized)

        test_in = torch.randn(12, 32)
        with mode(model, training=False) as m:
            base_out = m(test_in)
        with mode(static_quantization.quantized, training=False) as q:
            test_out = q(test_in)

        # While quantized/original won't be exact, they should be close.
        self.assertLess(
            ((((test_out - base_out)**2).sum(axis=1))**(1 / 2)).mean(),
            0.02,
            "RMSE should be less than 0.007 between quantized and original.",
        )
Beispiel #3
0
    def setup(self, trainer: Trainer, pl_module: LightningModule,
              stage: str) -> None:
        """Override the model with a quantized-aware version on setup.

        This is the earliest place we can override this model which allows for
        appropriate behavior when restoring from checkpoints, as well as connecting
        to accelerators, etc.

        The model is only prepared once.
        """
        # Only prepare the model once.
        if hasattr(pl_module, "_prepared"):
            return

        with mode(pl_module, training=True) as train:
            pl_module._prepared = self.prepare(_deepcopy(train),
                                               configs=self.qconfig_dicts)
        pl_module.forward = MethodType(_quantized_forward, pl_module)
        self.prepared = pl_module._prepared