def test_trainer_finetune(tmpdir): model = DummyClassifier() train_dl = torch.utils.data.DataLoader(DummyDataset()) val_dl = torch.utils.data.DataLoader(DummyDataset()) task = ClassificationTask(model, loss_fn=F.nll_loss) trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) trainer.finetune(task, train_dl, val_dl, strategy=NoFreeze())
def test_test(tmpdir): """Tests that the model can be tested on our ``DummyDataset``.""" model = TemplateSKLearnClassifier(num_features=DummyDataset.num_features, num_classes=DummyDataset.num_classes) test_dl = torch.utils.data.DataLoader(DummyDataset(), batch_size=4) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.test(model, test_dl)
def test_from_argparse_args(): parser = ArgumentParser() parser = Trainer.add_argparse_args(parser) args = parser.parse_args(["--max_epochs=200"]) trainer = Trainer.from_argparse_args(args) assert trainer.max_epochs == 200 assert isinstance(trainer, Trainer)
def _test_learn2learning_training_strategies(gpus, accelerator, training_strategy, tmpdir): train_dir = Path(tmpdir / "train") train_dir.mkdir() (train_dir / "a").mkdir() pa_1 = train_dir / "a" / "1.png" pa_2 = train_dir / "a" / "2.png" pb_1 = train_dir / "b" / "1.png" pb_2 = train_dir / "b" / "2.png" image_size = (96, 96) _rand_image(image_size).save(pa_1) _rand_image(image_size).save(pa_2) (train_dir / "b").mkdir() _rand_image(image_size).save(pb_1) _rand_image(image_size).save(pb_2) n = 5 dm = ImageClassificationData.from_files( train_files=[str(pa_1)] * n + [str(pa_2)] * n + [str(pb_1)] * n + [str(pb_2)] * n, train_targets=[0] * n + [1] * n + [2] * n + [3] * n, batch_size=1, num_workers=0, transform_kwargs=dict(image_size=image_size), ) model = ImageClassifier( backbone="resnet18", training_strategy=training_strategy, training_strategy_kwargs={"ways": dm.num_classes, "shots": 4, "meta_batch_size": 4}, ) trainer = Trainer(fast_dev_run=2, gpus=gpus, accelerator=accelerator) trainer.fit(model, datamodule=dm)
def test_resolve_callbacks_multi_error(tmpdir): model = DummyClassifier() trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) task = MultiFinetuneClassificationTask(model, loss_fn=F.nll_loss) with pytest.raises(MisconfigurationException, match="should create a list with only 1 callback"): trainer._resolve_callbacks(task, None)
def test_classification_fiftyone(tmpdir): tmpdir = Path(tmpdir) (tmpdir / "a").mkdir() (tmpdir / "b").mkdir() _rand_image().save(tmpdir / "a_1.png") _rand_image().save(tmpdir / "b_1.png") train_images = [ str(tmpdir / "a_1.png"), str(tmpdir / "b_1.png"), ] train_dataset = fo.Dataset.from_dir(str(tmpdir), dataset_type=fo.types.ImageDirectory) s1 = train_dataset[train_images[0]] s2 = train_dataset[train_images[1]] s1["test"] = fo.Classification(label="1") s2["test"] = fo.Classification(label="2") s1.save() s2.save() data = ImageClassificationData.from_fiftyone( train_dataset=train_dataset, label_field="test", batch_size=2, num_workers=0, image_size=(64, 64), ) model = ImageClassifier(num_classes=2, backbone="resnet18") trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.finetune(model, datamodule=data, strategy="freeze")
def test_default_strategies(tmpdir): num_classes = 10 ds = DummyDataset() model = ImageClassifier(num_classes, backbone="resnet50") trainer = Trainer(fast_dev_run=2) trainer.fit(model, train_dataloader=DataLoader(ds))
def test_pointcloud_object_detection_data(tmpdir): seed_everything(52) download_data("https://pl-flash-data.s3.amazonaws.com/KITTI_micro.zip", tmpdir) dm = PointCloudObjectDetectorData.from_folders( train_folder=join(tmpdir, "KITTI_Micro", "Kitti", "train")) class MockModel(PointCloudObjectDetector): def training_step(self, batch, batch_idx: int): assert isinstance(batch, ObjectDetectBatchCollator) assert len(batch.point) == 2 assert batch.point[0][1].shape == torch.Size([4]) assert len(batch.bboxes) > 1 assert batch.attr[0]["name"] in ("000000.bin", "000001.bin") assert batch.attr[1]["name"] in ("000000.bin", "000001.bin") num_classes = 19 model = MockModel(backbone="pointpillars_kitti", num_classes=num_classes) trainer = Trainer(max_epochs=1, limit_train_batches=1, limit_val_batches=0) trainer.fit(model, dm) predict_path = join(tmpdir, "KITTI_Micro", "Kitti", "predict") model.eval() predictions = model.predict([join(predict_path, "scans/000000.bin")]) assert predictions[0][DefaultDataKeys.INPUT].shape[1] == 4 assert len(predictions[0][DefaultDataKeys.PREDS]) == 158
def test_resolve_callbacks_override_warning(tmpdir): model = DummyClassifier() trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) task = FinetuneClassificationTask(model, loss_fn=F.nll_loss) with pytest.warns(UserWarning, match="The model contains a default finetune callback"): trainer._resolve_callbacks(task, "test")
def test_datapipeline_transformations_overridden_by_task(): # define input transforms class ImageInput(Input): def load_data(self, folder): # from folder -> return files paths return ["a.jpg", "b.jpg"] def load_sample(self, path): # from a file path, load the associated image return np.random.uniform(0, 1, (64, 64, 3)) class ImageClassificationInputTransform(InputTransform): def per_sample_transform(self) -> Callable: return T.Compose([T.ToTensor()]) def per_batch_transform_on_device(self) -> Callable: return T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) class OverrideInputTransform(InputTransform): def per_sample_transform(self) -> Callable: return T.Compose([T.ToTensor(), T.Resize(128)]) # define task which overrides transforms using set_state class CustomModel(Task): def __init__(self): super().__init__(model=torch.nn.Linear(1, 1), loss_fn=torch.nn.MSELoss()) # override default transform to resize images self.input_transform = OverrideInputTransform def training_step(self, batch, batch_idx): assert batch.shape == torch.Size([2, 3, 128, 128]) assert torch.max(batch) <= 1.0 assert torch.min(batch) >= 0.0 def validation_step(self, batch, batch_idx): assert batch.shape == torch.Size([2, 3, 128, 128]) assert torch.max(batch) <= 1.0 assert torch.min(batch) >= 0.0 transform = ImageClassificationInputTransform() datamodule = DataModule( ImageInput(RunningStage.TRAINING, [1]), ImageInput(RunningStage.VALIDATING, [1]), transform=transform, batch_size=2, num_workers=0, ) # call trainer model = CustomModel() trainer = Trainer( max_epochs=1, limit_train_batches=2, limit_val_batches=1, num_sanity_val_steps=1, ) trainer.fit(model, datamodule=datamodule)
def test_task_fit(tmpdir: str): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) train_dl = torch.utils.data.DataLoader(DummyDataset()) val_dl = torch.utils.data.DataLoader(DummyDataset()) task = ClassificationTask(model, loss_fn=F.nll_loss) trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) trainer.fit(task, train_dl, val_dl)
def test_test(tmpdir): """Tests that the model can be tested on a pytorch geometric dataset.""" tudataset = datasets.TUDataset(root=tmpdir, name="KKI") model = GraphClassifier(num_features=tudataset.num_features, num_classes=tudataset.num_classes) model.data_pipeline = DataPipeline(preprocess=GraphClassificationPreprocess()) test_dl = torch.utils.data.DataLoader(tudataset, batch_size=4) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.test(model, test_dl)
def test_init_train(tmpdir): if os.name == "nt": # TODO: huggingface stuff timing out on windows return True model = TranslationTask(TEST_BACKBONE) train_dl = torch.utils.data.DataLoader(DummyDataset()) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.fit(model, train_dl)
def test_predict_sklearn(): """Tests that we can generate predictions from a scikit-learn ``Bunch``.""" bunch = datasets.load_iris() model = TemplateSKLearnClassifier(num_features=DummyDataset.num_features, num_classes=DummyDataset.num_classes) datamodule = TemplateData.from_sklearn(predict_bunch=bunch, batch_size=1) trainer = Trainer() out = trainer.predict(model, datamodule=datamodule, output="classes") assert isinstance(out[0][0], int)
def test_resolve_callbacks_invalid_strategy(tmpdir): model = DummyClassifier() trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) task = ClassificationTask(model, loss_fn=F.nll_loss) with pytest.raises( MisconfigurationException, match="should be a ``pytorch_lightning.callbacks.BaseFinetuning``" ): trainer._resolve_callbacks(task, EarlyStopping())
def test_predict_numpy(): """Tests that we can generate predictions from a numpy array.""" row = np.random.rand(1, DummyDataset.num_features) model = TemplateSKLearnClassifier(num_features=DummyDataset.num_features, num_classes=DummyDataset.num_classes) datamodule = TemplateData.from_numpy(predict_data=row, batch_size=1) trainer = Trainer() out = trainer.predict(model, datamodule=datamodule, output="classes") assert isinstance(out[0][0], int)
def test_predict_numpy(): img = np.ones((1, 3, 64, 64)) model = SemanticSegmentation(2, backbone="mobilenetv3_large_100") datamodule = SemanticSegmentationData.from_numpy(predict_data=img, batch_size=1) trainer = Trainer() out = trainer.predict(model, datamodule=datamodule, output="labels") assert isinstance(out[0][0], list) assert len(out[0][0]) == 64 assert len(out[0][0][0]) == 64
def test_ort_callback_fails_no_model(tmpdir): model = BoringModel() trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, callbacks=ORTCallback()) with pytest.raises(MisconfigurationException, match="Torch ORT requires to wrap a single model"): trainer.fit( model, train_dataloader=torch.utils.data.DataLoader(DummyDataset()), val_dataloaders=torch.utils.data.DataLoader(DummyDataset()), )
def test_classification(tmpdir): data = ImageClassificationData.from_filepaths( train_filepaths=["a", "b"], train_labels=[0, 1], train_transform=lambda x: x, loader=_dummy_image_loader, num_workers=0, batch_size=2, ) model = ImageClassifier(2, backbone="resnet18") trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.finetune(model, datamodule=data, strategy="freeze")
def test_classification_json(tmpdir): json_path = json_data(tmpdir) data = SpeechRecognitionData.from_json( "file", "text", train_file=json_path, num_workers=0, batch_size=2, ) model = SpeechRecognition(backbone=TEST_BACKBONE) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.fit(model, datamodule=data)
def test_multilabel(tmpdir): num_classes = 4 ds = DummyMultiLabelDataset(num_classes) model = ImageClassifier(num_classes, multi_label=True, serializer=Probabilities(multi_label=True)) train_dl = torch.utils.data.DataLoader(ds, batch_size=2) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.finetune(model, train_dl, strategy="freeze_unfreeze") image, label = ds[0][DefaultDataKeys.INPUT], ds[0][DefaultDataKeys.TARGET] predictions = model.predict([{DefaultDataKeys.INPUT: image}]) assert (torch.tensor(predictions) > 1).sum() == 0 assert (torch.tensor(predictions) < 0).sum() == 0 assert len(predictions[0]) == num_classes == len(label) assert len(torch.unique(label)) <= 2
def test_predict_dataset(tmpdir): """Tests that we can generate embeddings from a pytorch geometric dataset.""" tudataset = datasets.TUDataset(root=tmpdir, name="KKI") model = GraphEmbedder( GraphClassifier(num_features=tudataset.num_features, num_classes=tudataset.num_classes).backbone) datamodule = DataModule( predict_input=GraphClassificationDatasetInput(RunningStage.PREDICTING, tudataset), transform=GraphClassificationInputTransform, batch_size=4, ) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) out = trainer.predict(model, datamodule=datamodule) assert isinstance(out[0][0], torch.Tensor)
def test_classification(tmpdir): tmpdir = Path(tmpdir) (tmpdir / "a").mkdir() (tmpdir / "b").mkdir() _rand_image().save(tmpdir / "a" / "a_1.png") _rand_image().save(tmpdir / "b" / "a_1.png") data = ImageClassificationData.from_filepaths( train_filepaths=[tmpdir / "a", tmpdir / "b"], train_labels=[0, 1], train_transform={"per_batch_transform": lambda x: x}, num_workers=0, batch_size=2, ) model = ImageClassifier(num_classes=2, backbone="resnet18") trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.finetune(model, datamodule=data, strategy="freeze")
def test_pointcloud_segmentation_data(tmpdir): seed_everything(52) download_data( "https://pl-flash-data.s3.amazonaws.com/SemanticKittiMicro.zip", tmpdir) datamodule = PointCloudSegmentationData.from_folders( train_folder=join(tmpdir, "SemanticKittiMicro", "train"), predict_folder=join(tmpdir, "SemanticKittiMicro", "predict"), batch_size=4, ) class MockModel(PointCloudSegmentation): def training_step(self, batch, batch_idx: int): assert batch[DataKeys.INPUT]["xyz"][0].shape == torch.Size( [2, 45056, 3]) assert batch[DataKeys.INPUT]["xyz"][1].shape == torch.Size( [2, 11264, 3]) assert batch[DataKeys.INPUT]["xyz"][2].shape == torch.Size( [2, 2816, 3]) assert batch[DataKeys.INPUT]["xyz"][3].shape == torch.Size( [2, 704, 3]) assert batch[DataKeys.INPUT]["labels"].shape == torch.Size( [2, 45056]) assert batch[DataKeys.INPUT]["labels"].max() == 19 assert batch[DataKeys.INPUT]["labels"].min() == 0 assert batch[DataKeys.METADATA][0]["name"] in ("00_000000", "00_000001") assert batch[DataKeys.METADATA][1]["name"] in ("00_000000", "00_000001") num_classes = 19 model = MockModel(backbone="randlanet", num_classes=num_classes) trainer = Trainer(max_epochs=1, limit_train_batches=1, limit_val_batches=0) trainer.fit(model, datamodule=datamodule) predictions = trainer.predict(model, datamodule=datamodule)[0] assert predictions[0][DataKeys.INPUT].shape == torch.Size([45056, 3]) assert predictions[0][DataKeys.PREDS].shape == torch.Size([45056, 19]) assert predictions[0][DataKeys.TARGET].shape == torch.Size([45056])
def test_saving_with_serializers(tmpdir): checkpoint_file = os.path.join(tmpdir, 'tmp.ckpt') class CustomModel(Task): def __init__(self): super().__init__(model=torch.nn.Linear(1, 1), loss_fn=torch.nn.MSELoss()) serializer = Labels(["a", "b"]) model = CustomModel() trainer = Trainer(fast_dev_run=True) data_pipeline = DataPipeline(DefaultPreprocess(), serializer=serializer) data_pipeline.initialize() model.data_pipeline = data_pipeline assert isinstance(model.preprocess, DefaultPreprocess) dummy_data = DataLoader( list( zip(torch.arange(10, dtype=torch.float), torch.arange(10, dtype=torch.float)))) trainer.fit(model, train_dataloader=dummy_data) trainer.save_checkpoint(checkpoint_file) model = CustomModel.load_from_checkpoint(checkpoint_file) assert isinstance(model.preprocess._data_pipeline_state, DataPipelineState) assert model.preprocess._data_pipeline_state._state[ ClassificationState] == ClassificationState(['a', 'b'])
def test_trainer_request_dataloaders(stage): """Test to ensure that ``request_dataloaders`` can take a combination of arguments, for PL 1.5 and later. (stage, model) -> calls module on_dataloader hook (stage, model=model) -> calls module on_dataloader hook """ class TestModel(BoringModel): recorded_on_dataloader_calls = {} def on_train_dataloader(self) -> None: self.recorded_on_dataloader_calls[RunningStage.TRAINING] = True def on_val_dataloader(self) -> None: self.recorded_on_dataloader_calls[RunningStage.VALIDATING] = True def on_test_dataloader(self) -> None: self.recorded_on_dataloader_calls[RunningStage.TESTING] = True trainer = Trainer() model = TestModel() trainer.request_dataloader(stage, model) assert model.recorded_on_dataloader_calls[stage] model = TestModel() trainer.request_dataloader(stage, model=model) assert model.recorded_on_dataloader_calls[stage]
def test_classification(tmpdir): tmpdir = Path(tmpdir) (tmpdir / "a").mkdir() (tmpdir / "b").mkdir() image_a = str(tmpdir / "a" / "a_1.png") image_b = str(tmpdir / "b" / "b_1.png") _rand_image().save(image_a) _rand_image().save(image_b) data = ImageClassificationData.from_files( train_files=[image_a, image_b], train_targets=[0, 1], num_workers=0, batch_size=2, image_size=(64, 64), ) model = ImageClassifier(num_classes=2, backbone="resnet18") trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.finetune(model, datamodule=data, strategy="freeze")
def test_finetuning(tmpdir: str, strategy): train_dl = torch.utils.data.DataLoader(DummyDataset()) val_dl = torch.utils.data.DataLoader(DummyDataset()) task = ImageClassifier(10, backbone="resnet18") trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) if strategy == "cls": strategy = NoFreeze() if strategy == 'chocolat' or strategy is None: with pytest.raises(MisconfigurationException, match="strategy should be provided"): trainer.finetune(task, train_dl, val_dl, strategy=strategy) else: trainer.finetune(task, train_dl, val_dl, strategy=strategy)
def test_trainer_fit(tmpdir, callbacks, should_warn): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) train_dl = DataLoader(DummyDataset()) val_dl = DataLoader(DummyDataset()) task = ClassificationTask(model, loss_fn=F.nll_loss) trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir, callbacks=callbacks) if should_warn: with pytest.warns(UserWarning, match="trainer is using a fine-tuning callback"): trainer.fit(task, train_dl, val_dl) else: trainer.fit(task, train_dl, val_dl)
def test_transformations(tmpdir): transform = TestInputTransform() datamodule = DataModule( TestInput(RunningStage.TRAINING, [1]), TestInput(RunningStage.VALIDATING, [1]), TestInput(RunningStage.TESTING, [1]), transform=transform, batch_size=2, num_workers=0, ) assert datamodule.train_dataloader().dataset[0] == (0, 1, 2, 3) batch = next(iter(datamodule.train_dataloader())) assert torch.equal(batch, torch.tensor([[0, 1, 2, 3, 5], [0, 1, 2, 3, 5]])) assert datamodule.val_dataloader().dataset[0] == {"a": 0, "b": 1} assert datamodule.val_dataloader().dataset[1] == {"a": 1, "b": 2} batch = next(iter(datamodule.val_dataloader())) datamodule = DataModule( TestInput(RunningStage.TRAINING, [1]), TestInput(RunningStage.VALIDATING, [1]), TestInput(RunningStage.TESTING, [1]), transform=TestInputTransform2, batch_size=2, num_workers=0, ) batch = next(iter(datamodule.val_dataloader())) assert torch.equal(batch["a"], torch.tensor([0, 1])) assert torch.equal(batch["b"], torch.tensor([1, 2])) model = CustomModel() trainer = Trainer( max_epochs=1, limit_train_batches=2, limit_val_batches=1, limit_test_batches=2, limit_predict_batches=2, num_sanity_val_steps=1, ) trainer.fit(model, datamodule=datamodule) trainer.test(model, datamodule=datamodule) assert datamodule.input_transform.train_per_sample_transform_called assert datamodule.input_transform.train_collate_called assert datamodule.input_transform.train_per_batch_transform_on_device_called assert datamodule.input_transform.train_per_sample_transform_called assert datamodule.input_transform.val_collate_called assert datamodule.input_transform.val_per_batch_transform_on_device_called assert datamodule.input_transform.test_per_sample_transform_called