def _test_learn2learning_training_strategies(gpus, accelerator, training_strategy, tmpdir): train_dir = Path(tmpdir / "train") train_dir.mkdir() (train_dir / "a").mkdir() pa_1 = train_dir / "a" / "1.png" pa_2 = train_dir / "a" / "2.png" pb_1 = train_dir / "b" / "1.png" pb_2 = train_dir / "b" / "2.png" image_size = (96, 96) _rand_image(image_size).save(pa_1) _rand_image(image_size).save(pa_2) (train_dir / "b").mkdir() _rand_image(image_size).save(pb_1) _rand_image(image_size).save(pb_2) n = 5 dm = ImageClassificationData.from_files( train_files=[str(pa_1)] * n + [str(pa_2)] * n + [str(pb_1)] * n + [str(pb_2)] * n, train_targets=[0] * n + [1] * n + [2] * n + [3] * n, batch_size=1, num_workers=0, transform_kwargs=dict(image_size=image_size), ) model = ImageClassifier( backbone="resnet18", training_strategy=training_strategy, training_strategy_kwargs={"ways": dm.num_classes, "shots": 4, "meta_batch_size": 4}, ) trainer = Trainer(fast_dev_run=2, gpus=gpus, accelerator=accelerator) trainer.fit(model, datamodule=dm)
def test_saving_with_serializers(tmpdir): checkpoint_file = os.path.join(tmpdir, 'tmp.ckpt') class CustomModel(Task): def __init__(self): super().__init__(model=torch.nn.Linear(1, 1), loss_fn=torch.nn.MSELoss()) serializer = Labels(["a", "b"]) model = CustomModel() trainer = Trainer(fast_dev_run=True) data_pipeline = DataPipeline(DefaultPreprocess(), serializer=serializer) data_pipeline.initialize() model.data_pipeline = data_pipeline assert isinstance(model.preprocess, DefaultPreprocess) dummy_data = DataLoader( list( zip(torch.arange(10, dtype=torch.float), torch.arange(10, dtype=torch.float)))) trainer.fit(model, train_dataloader=dummy_data) trainer.save_checkpoint(checkpoint_file) model = CustomModel.load_from_checkpoint(checkpoint_file) assert isinstance(model.preprocess._data_pipeline_state, DataPipelineState) assert model.preprocess._data_pipeline_state._state[ ClassificationState] == ClassificationState(['a', 'b'])
def test_default_strategies(tmpdir): num_classes = 10 ds = DummyDataset() model = ImageClassifier(num_classes, backbone="resnet50") trainer = Trainer(fast_dev_run=2) trainer.fit(model, train_dataloader=DataLoader(ds))
def test_pointcloud_object_detection_data(tmpdir): seed_everything(52) download_data("https://pl-flash-data.s3.amazonaws.com/KITTI_micro.zip", tmpdir) dm = PointCloudObjectDetectorData.from_folders( train_folder=join(tmpdir, "KITTI_Micro", "Kitti", "train")) class MockModel(PointCloudObjectDetector): def training_step(self, batch, batch_idx: int): assert isinstance(batch, ObjectDetectBatchCollator) assert len(batch.point) == 2 assert batch.point[0][1].shape == torch.Size([4]) assert len(batch.bboxes) > 1 assert batch.attr[0]["name"] in ("000000.bin", "000001.bin") assert batch.attr[1]["name"] in ("000000.bin", "000001.bin") num_classes = 19 model = MockModel(backbone="pointpillars_kitti", num_classes=num_classes) trainer = Trainer(max_epochs=1, limit_train_batches=1, limit_val_batches=0) trainer.fit(model, dm) predict_path = join(tmpdir, "KITTI_Micro", "Kitti", "predict") model.eval() predictions = model.predict([join(predict_path, "scans/000000.bin")]) assert predictions[0][DefaultDataKeys.INPUT].shape[1] == 4 assert len(predictions[0][DefaultDataKeys.PREDS]) == 158
def test_train(tmpdir): """Tests that the model can be trained on our ``DummyDataset``.""" model = TemplateSKLearnClassifier(num_features=DummyDataset.num_features, num_classes=DummyDataset.num_classes) train_dl = torch.utils.data.DataLoader(DummyDataset(), batch_size=4) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.fit(model, train_dl)
def test_datapipeline_transformations_overridden_by_task(): # define input transforms class ImageInput(Input): def load_data(self, folder): # from folder -> return files paths return ["a.jpg", "b.jpg"] def load_sample(self, path): # from a file path, load the associated image return np.random.uniform(0, 1, (64, 64, 3)) class ImageClassificationInputTransform(InputTransform): def per_sample_transform(self) -> Callable: return T.Compose([T.ToTensor()]) def per_batch_transform_on_device(self) -> Callable: return T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) class OverrideInputTransform(InputTransform): def per_sample_transform(self) -> Callable: return T.Compose([T.ToTensor(), T.Resize(128)]) # define task which overrides transforms using set_state class CustomModel(Task): def __init__(self): super().__init__(model=torch.nn.Linear(1, 1), loss_fn=torch.nn.MSELoss()) # override default transform to resize images self.input_transform = OverrideInputTransform def training_step(self, batch, batch_idx): assert batch.shape == torch.Size([2, 3, 128, 128]) assert torch.max(batch) <= 1.0 assert torch.min(batch) >= 0.0 def validation_step(self, batch, batch_idx): assert batch.shape == torch.Size([2, 3, 128, 128]) assert torch.max(batch) <= 1.0 assert torch.min(batch) >= 0.0 transform = ImageClassificationInputTransform() datamodule = DataModule( ImageInput(RunningStage.TRAINING, [1]), ImageInput(RunningStage.VALIDATING, [1]), transform=transform, batch_size=2, num_workers=0, ) # call trainer model = CustomModel() trainer = Trainer( max_epochs=1, limit_train_batches=2, limit_val_batches=1, num_sanity_val_steps=1, ) trainer.fit(model, datamodule=datamodule)
def test_train(tmpdir): """Tests that the model can be trained on a pytorch geometric dataset.""" tudataset = datasets.TUDataset(root=tmpdir, name="KKI") model = GraphClassifier(num_features=tudataset.num_features, num_classes=tudataset.num_classes) model.data_pipeline = DataPipeline(preprocess=GraphClassificationPreprocess()) train_dl = torch.utils.data.DataLoader(tudataset, batch_size=4) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.fit(model, train_dl)
def test_task_fit(tmpdir: str): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) train_dl = torch.utils.data.DataLoader(DummyDataset()) val_dl = torch.utils.data.DataLoader(DummyDataset()) task = ClassificationTask(model, loss_fn=F.nll_loss) trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) trainer.fit(task, train_dl, val_dl)
def test_init_train(tmpdir): if os.name == "nt": # TODO: huggingface stuff timing out on windows return True model = TranslationTask(TEST_BACKBONE) train_dl = torch.utils.data.DataLoader(DummyDataset()) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.fit(model, train_dl)
def test_trainer_fit(tmpdir, callbacks, should_warn): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) train_dl = DataLoader(DummyDataset()) val_dl = DataLoader(DummyDataset()) task = ClassificationTask(model, loss_fn=F.nll_loss) trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir, callbacks=callbacks) if should_warn: with pytest.warns(UserWarning, match="trainer is using a fine-tuning callback"): trainer.fit(task, train_dl, val_dl) else: trainer.fit(task, train_dl, val_dl)
def test_ort_callback_fails_no_model(tmpdir): model = BoringModel() trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, callbacks=ORTCallback()) with pytest.raises(MisconfigurationException, match="Torch ORT requires to wrap a single model"): trainer.fit( model, train_dataloader=torch.utils.data.DataLoader(DummyDataset()), val_dataloaders=torch.utils.data.DataLoader(DummyDataset()), )
def test_classification_json(tmpdir): json_path = json_data(tmpdir) data = SpeechRecognitionData.from_json( "file", "text", train_file=json_path, num_workers=0, batch_size=2, ) model = SpeechRecognition(backbone=TEST_BACKBONE) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.fit(model, datamodule=data)
def test_transformations(tmpdir): transform = TestInputTransform() datamodule = DataModule( TestInput(RunningStage.TRAINING, [1]), TestInput(RunningStage.VALIDATING, [1]), TestInput(RunningStage.TESTING, [1]), transform=transform, batch_size=2, num_workers=0, ) assert datamodule.train_dataloader().dataset[0] == (0, 1, 2, 3) batch = next(iter(datamodule.train_dataloader())) assert torch.equal(batch, torch.tensor([[0, 1, 2, 3, 5], [0, 1, 2, 3, 5]])) assert datamodule.val_dataloader().dataset[0] == {"a": 0, "b": 1} assert datamodule.val_dataloader().dataset[1] == {"a": 1, "b": 2} batch = next(iter(datamodule.val_dataloader())) datamodule = DataModule( TestInput(RunningStage.TRAINING, [1]), TestInput(RunningStage.VALIDATING, [1]), TestInput(RunningStage.TESTING, [1]), transform=TestInputTransform2, batch_size=2, num_workers=0, ) batch = next(iter(datamodule.val_dataloader())) assert torch.equal(batch["a"], torch.tensor([0, 1])) assert torch.equal(batch["b"], torch.tensor([1, 2])) model = CustomModel() trainer = Trainer( max_epochs=1, limit_train_batches=2, limit_val_batches=1, limit_test_batches=2, limit_predict_batches=2, num_sanity_val_steps=1, ) trainer.fit(model, datamodule=datamodule) trainer.test(model, datamodule=datamodule) assert datamodule.input_transform.train_per_sample_transform_called assert datamodule.input_transform.train_collate_called assert datamodule.input_transform.train_per_batch_transform_on_device_called assert datamodule.input_transform.train_per_sample_transform_called assert datamodule.input_transform.val_collate_called assert datamodule.input_transform.val_per_batch_transform_on_device_called assert datamodule.input_transform.test_per_sample_transform_called
def test_model(coco_instances, backbone, head): datamodule = InstanceSegmentationData.from_coco( train_folder=coco_instances.train_folder, train_ann_file=coco_instances.train_ann_file, predict_folder=coco_instances.predict_folder, transform_kwargs=dict(image_size=(128, 128)), batch_size=2, ) assert datamodule.num_classes == 3 assert datamodule.labels == ["background", "cat", "dog"] model = InstanceSegmentation(num_classes=datamodule.num_classes, backbone=backbone, head=head) trainer = Trainer(fast_dev_run=True) trainer.fit(model, datamodule=datamodule) trainer.predict(model, datamodule=datamodule)
def test_init_train_enable_ort(tmpdir): class TestCallback(Callback): def on_train_start(self, trainer: Trainer, pl_module: LightningModule) -> None: assert isinstance(pl_module.model, ORTModule) model = TextClassifier(2, TEST_BACKBONE, enable_ort=True) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True, callbacks=TestCallback()) trainer.fit( model, train_dataloader=torch.utils.data.DataLoader(DummyDataset()), val_dataloaders=torch.utils.data.DataLoader(DummyDataset()), ) trainer.test(model, test_dataloaders=torch.utils.data.DataLoader(DummyDataset()))
def test_pointcloud_segmentation_data(tmpdir): seed_everything(52) download_data( "https://pl-flash-data.s3.amazonaws.com/SemanticKittiMicro.zip", tmpdir) datamodule = PointCloudSegmentationData.from_folders( train_folder=join(tmpdir, "SemanticKittiMicro", "train"), predict_folder=join(tmpdir, "SemanticKittiMicro", "predict"), batch_size=4, ) class MockModel(PointCloudSegmentation): def training_step(self, batch, batch_idx: int): assert batch[DataKeys.INPUT]["xyz"][0].shape == torch.Size( [2, 45056, 3]) assert batch[DataKeys.INPUT]["xyz"][1].shape == torch.Size( [2, 11264, 3]) assert batch[DataKeys.INPUT]["xyz"][2].shape == torch.Size( [2, 2816, 3]) assert batch[DataKeys.INPUT]["xyz"][3].shape == torch.Size( [2, 704, 3]) assert batch[DataKeys.INPUT]["labels"].shape == torch.Size( [2, 45056]) assert batch[DataKeys.INPUT]["labels"].max() == 19 assert batch[DataKeys.INPUT]["labels"].min() == 0 assert batch[DataKeys.METADATA][0]["name"] in ("00_000000", "00_000001") assert batch[DataKeys.METADATA][1]["name"] in ("00_000000", "00_000001") num_classes = 19 model = MockModel(backbone="randlanet", num_classes=num_classes) trainer = Trainer(max_epochs=1, limit_train_batches=1, limit_val_batches=0) trainer.fit(model, datamodule=datamodule) predictions = trainer.predict(model, datamodule=datamodule)[0] assert predictions[0][DataKeys.INPUT].shape == torch.Size([45056, 3]) assert predictions[0][DataKeys.PREDS].shape == torch.Size([45056, 19]) assert predictions[0][DataKeys.TARGET].shape == torch.Size([45056])
def test_not_trainable(tmpdir): """Tests that the model gives an error when training, validating, or testing.""" tudataset = datasets.TUDataset(root=tmpdir, name="KKI") model = GraphEmbedder( GraphClassifier(num_features=1, num_classes=1).backbone) datamodule = DataModule( GraphClassificationDatasetInput(RunningStage.TRAINING, tudataset), GraphClassificationDatasetInput(RunningStage.VALIDATING, tudataset), GraphClassificationDatasetInput(RunningStage.TESTING, tudataset), transform=GraphClassificationInputTransform, batch_size=4, ) trainer = Trainer(default_root_dir=tmpdir, num_sanity_val_steps=0) with pytest.raises(NotImplementedError, match="Training a `GraphEmbedder` is not supported."): trainer.fit(model, datamodule=datamodule) with pytest.raises(NotImplementedError, match="Validating a `GraphEmbedder` is not supported."): trainer.validate(model, datamodule=datamodule) with pytest.raises(NotImplementedError, match="Testing a `GraphEmbedder` is not supported."): trainer.test(model, datamodule=datamodule)
def test_data_module(): seed_everything(42) def train_fn(data): return data - 100 def val_fn(data): return data + 100 def test_fn(data): return data - 1000 def predict_fn(data): return data + 1000 @dataclass class TestTransform(InputTransform): def per_sample_transform(self): def fn(x): return x return fn def train_per_batch_transform_on_device(self) -> Callable: return train_fn def val_per_batch_transform_on_device(self) -> Callable: return val_fn def test_per_batch_transform_on_device(self) -> Callable: return test_fn def predict_per_batch_transform_on_device(self) -> Callable: return predict_fn transform = TestTransform() assert transform._transform is not None train_dataset = Input(RunningStage.TRAINING, np.arange(10, dtype=np.float32)) assert train_dataset.running_stage == RunningStage.TRAINING val_dataset = Input(RunningStage.VALIDATING, np.arange(10, dtype=np.float32)) assert val_dataset.running_stage == RunningStage.VALIDATING test_dataset = Input(RunningStage.TESTING, np.arange(10, dtype=np.float32)) assert test_dataset.running_stage == RunningStage.TESTING predict_dataset = Input(RunningStage.PREDICTING, np.arange(10, dtype=np.float32)) assert predict_dataset.running_stage == RunningStage.PREDICTING dm = DataModule( train_input=train_dataset, val_input=val_dataset, test_input=test_dataset, predict_input=predict_dataset, transform=transform, batch_size=2, ) assert len(dm.train_dataloader()) == 5 batch = next(iter(dm.train_dataloader())) assert batch.shape == torch.Size([2]) assert batch.min() >= 0 and batch.max() < 10 assert len(dm.val_dataloader()) == 5 batch = next(iter(dm.val_dataloader())) assert batch.shape == torch.Size([2]) assert batch.min() >= 0 and batch.max() < 10 class TestModel(Task): def training_step(self, batch, batch_idx): assert sum(batch < 0) == 2 def validation_step(self, batch, batch_idx): assert sum(batch > 0) == 2 def test_step(self, batch, batch_idx): assert sum(batch < 500) == 2 def predict_step(self, batch, *args, **kwargs): assert sum(batch > 500) == 2 assert torch.equal(batch, torch.tensor([1000.0, 1001.0])) def on_train_dataloader(self) -> None: pass def on_val_dataloader(self) -> None: pass def on_test_dataloader(self, *_) -> None: pass def on_predict_dataloader(self) -> None: pass def on_predict_end(self) -> None: pass def on_fit_end(self) -> None: pass model = TestModel(torch.nn.Linear(1, 1)) trainer = Trainer(fast_dev_run=True) trainer.fit(model, datamodule=dm) trainer.validate(model, datamodule=dm) trainer.test(model, datamodule=dm) trainer.predict(model, datamodule=dm) # Test that plain lightning module works with FlashDataModule class SampleBoringModel(BoringModel): def __init__(self): super().__init__() self.layer = torch.nn.Linear(2, 1) model = SampleBoringModel() trainer = Trainer(fast_dev_run=True) trainer.fit(model, datamodule=dm) trainer.validate(model, datamodule=dm) trainer.test(model, datamodule=dm) trainer.predict(model, datamodule=dm) transform = TestTransform() input = Input(RunningStage.TRAINING) dm = DataModule(train_input=input, batch_size=1, transform=transform) assert isinstance(dm.input_transform, TestTransform) class RandomDataset(Dataset): def __init__(self, size: int, length: int): self.len = length self.data = torch.ones(length, size) def __getitem__(self, index): return self.data[index] def __len__(self): return self.len def _add_hundred(x): if isinstance(x, Dict): x["input"] += 100 else: x += 100 return x class TrainInputTransform(InputTransform): def _add_one(self, x): if isinstance(x, Dict): x["input"] += 1 else: x += 1 return x def per_sample_transform(self) -> Callable: return self._add_one def val_per_sample_transform(self) -> Callable: return _add_hundred dm = DataModule( train_input=DatasetInput(RunningStage.TRAINING, RandomDataset(64, 32)), val_input=DatasetInput(RunningStage.VALIDATING, RandomDataset(64, 32)), test_input=DatasetInput(RunningStage.TESTING, RandomDataset(64, 32)), batch_size=3, transform=TrainInputTransform(), ) batch = next(iter(dm.train_dataloader())) assert batch["input"][0][0] == 2 batch = next(iter(dm.val_dataloader())) assert batch["input"][0][0] == 101 batch = next(iter(dm.test_dataloader())) assert batch["input"][0][0] == 2
def test_init_train(tmpdir): model = SummarizationTask(TEST_BACKBONE) train_dl = torch.utils.data.DataLoader(DummyDataset()) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.fit(model, train_dl)
parser.add_argument("--submission_path", type=str, required=True) parser.add_argument("--test_data_path", type=str, required=True) parser.add_argument("--best_model_path", type=str, required=True) # Optional parser.add_argument("--backbone", type=str, default="resnet18") parser.add_argument("--learning_rate", type=float, default=0.01) args = parser.parse_args() datamodule = ImageClassificationData.from_folders( train_folder=args.train_data_path, batch_size=8, ) model = ImageClassifier(datamodule.num_classes, backbone=args.backbone) trainer = Trainer(fast_dev_run=True) trainer.fit(model, datamodule=datamodule) trainer.save_checkpoint(args.best_model_path) datamodule = ImageClassificationData.from_folders( predict_folder=args.test_data_path, batch_size=8, ) predictions = Trainer().predict(model, datamodule=datamodule) submission_data = [{ "filename": os.path.basename(p["metadata"]["filepath"]), "label": torch.argmax(p["preds"]).item() } for batch in predictions for p in batch] df = pd.DataFrame(submission_data) df.to_csv(args.submission_path, index=False)
def test_init_train(tmpdir): model = SpeechRecognition(backbone=TEST_BACKBONE) train_dl = torch.utils.data.DataLoader(DummyDataset()) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.fit(model, train_dl)
def test_init_train(tmpdir): model = TextClassifier(2, TEST_BACKBONE) train_dl = torch.utils.data.DataLoader(DummyDataset()) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) trainer.fit(model, train_dl)