def test_quantization(tmpdir, observe: str, fuse: bool, convert: bool): """Parity test for quant model.""" seed_everything(42) dm = RegressDataModule() accelerator = "gpu" if torch.cuda.is_available() else "cpu" trainer_args = dict(default_root_dir=tmpdir, max_epochs=7, accelerator=accelerator, devices=1) model = RegressionModel() qmodel = copy.deepcopy(model) trainer = Trainer(**trainer_args) trainer.fit(model, datamodule=dm) org_size = get_model_size_mb(model) org_score = torch.mean(torch.tensor([mape(model(x), y) for x, y in dm.test_dataloader()])) fusing_layers = [(f"layer_{i}", f"layer_{i}a") for i in range(3)] if fuse else None qcb = QuantizationAwareTraining( observer_type=observe, modules_to_fuse=fusing_layers, quantize_on_fit_end=convert, observer_enabled_stages=("train", "validate"), ) trainer = Trainer(callbacks=[qcb], **trainer_args) trainer.fit(qmodel, datamodule=dm) quant_calls = qcb._forward_calls assert quant_calls == qcb._forward_calls quant_score = torch.mean(torch.tensor([mape(qmodel(x), y) for x, y in dm.test_dataloader()])) # test that the test score is almost the same as with pure training assert torch.allclose(org_score, quant_score, atol=0.45) model_path = trainer.checkpoint_callback.best_model_path curr_epoch = trainer.current_epoch trainer_args.update(dict(max_epochs=1, enable_checkpointing=False)) if not convert: trainer = Trainer(callbacks=[QuantizationAwareTraining()], **trainer_args) trainer.fit(qmodel, datamodule=dm) qmodel.eval() torch.quantization.convert(qmodel, inplace=True) quant_size = get_model_size_mb(qmodel) # test that the trained model is smaller then initial size_ratio = quant_size / org_size assert size_ratio < 0.65 # todo: make it work also with strict loading qmodel2 = RegressionModel.load_from_checkpoint(model_path, strict=False) quant2_score = torch.mean(torch.tensor([mape(qmodel2(x), y) for x, y in dm.test_dataloader()])) assert torch.allclose(org_score, quant2_score, atol=0.45) # test without and with QAT callback trainer_args.update(max_epochs=curr_epoch + 1) qmodel2 = RegressionModel() trainer = Trainer(callbacks=[QuantizationAwareTraining()], **trainer_args) trainer.fit(qmodel2, datamodule=dm, ckpt_path=model_path) quant2_score = torch.mean(torch.tensor([mape(qmodel2(x), y) for x, y in dm.test_dataloader()])) # test that the test score is almost the same as with pure training assert torch.allclose(org_score, quant2_score, atol=0.45)
def test_get_model_size_mb(): model = BoringModel() size_bytes = get_model_size_mb(model) # Size will be python version dependent. assert math.isclose(size_bytes, 0.001319, rel_tol=0.1)
def test_get_sparse_model_size_mb(): class BoringSparseModel(BoringModel): def __init__(self): super().__init__() self.layer = nn.Parameter(torch.ones(32).to_sparse()) model = BoringSparseModel() size_bytes = get_model_size_mb(model) assert math.isclose(size_bytes, 0.001511, rel_tol=0.1)