Beispiel #1
0
def test_trainer_finetune(tmpdir):
    model = DummyClassifier()
    train_dl = torch.utils.data.DataLoader(DummyDataset())
    val_dl = torch.utils.data.DataLoader(DummyDataset())
    task = ClassificationTask(model, loss_fn=F.nll_loss)
    trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    trainer.finetune(task, train_dl, val_dl, strategy=NoFreeze())
Beispiel #2
0
def test_test(tmpdir):
    """Tests that the model can be tested on our ``DummyDataset``."""
    model = TemplateSKLearnClassifier(num_features=DummyDataset.num_features,
                                      num_classes=DummyDataset.num_classes)
    test_dl = torch.utils.data.DataLoader(DummyDataset(), batch_size=4)
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.test(model, test_dl)
Beispiel #3
0
def test_from_argparse_args():
    parser = ArgumentParser()
    parser = Trainer.add_argparse_args(parser)
    args = parser.parse_args(["--max_epochs=200"])
    trainer = Trainer.from_argparse_args(args)
    assert trainer.max_epochs == 200
    assert isinstance(trainer, Trainer)
def _test_learn2learning_training_strategies(gpus, accelerator, training_strategy, tmpdir):
    train_dir = Path(tmpdir / "train")
    train_dir.mkdir()

    (train_dir / "a").mkdir()
    pa_1 = train_dir / "a" / "1.png"
    pa_2 = train_dir / "a" / "2.png"
    pb_1 = train_dir / "b" / "1.png"
    pb_2 = train_dir / "b" / "2.png"
    image_size = (96, 96)
    _rand_image(image_size).save(pa_1)
    _rand_image(image_size).save(pa_2)

    (train_dir / "b").mkdir()
    _rand_image(image_size).save(pb_1)
    _rand_image(image_size).save(pb_2)

    n = 5

    dm = ImageClassificationData.from_files(
        train_files=[str(pa_1)] * n + [str(pa_2)] * n + [str(pb_1)] * n + [str(pb_2)] * n,
        train_targets=[0] * n + [1] * n + [2] * n + [3] * n,
        batch_size=1,
        num_workers=0,
        transform_kwargs=dict(image_size=image_size),
    )

    model = ImageClassifier(
        backbone="resnet18",
        training_strategy=training_strategy,
        training_strategy_kwargs={"ways": dm.num_classes, "shots": 4, "meta_batch_size": 4},
    )

    trainer = Trainer(fast_dev_run=2, gpus=gpus, accelerator=accelerator)
    trainer.fit(model, datamodule=dm)
Beispiel #5
0
def test_resolve_callbacks_multi_error(tmpdir):
    model = DummyClassifier()
    trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    task = MultiFinetuneClassificationTask(model, loss_fn=F.nll_loss)
    with pytest.raises(MisconfigurationException,
                       match="should create a list with only 1 callback"):
        trainer._resolve_callbacks(task, None)
Beispiel #6
0
def test_classification_fiftyone(tmpdir):
    tmpdir = Path(tmpdir)

    (tmpdir / "a").mkdir()
    (tmpdir / "b").mkdir()
    _rand_image().save(tmpdir / "a_1.png")
    _rand_image().save(tmpdir / "b_1.png")

    train_images = [
        str(tmpdir / "a_1.png"),
        str(tmpdir / "b_1.png"),
    ]

    train_dataset = fo.Dataset.from_dir(str(tmpdir),
                                        dataset_type=fo.types.ImageDirectory)
    s1 = train_dataset[train_images[0]]
    s2 = train_dataset[train_images[1]]
    s1["test"] = fo.Classification(label="1")
    s2["test"] = fo.Classification(label="2")
    s1.save()
    s2.save()

    data = ImageClassificationData.from_fiftyone(
        train_dataset=train_dataset,
        label_field="test",
        batch_size=2,
        num_workers=0,
        image_size=(64, 64),
    )

    model = ImageClassifier(num_classes=2, backbone="resnet18")
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.finetune(model, datamodule=data, strategy="freeze")
def test_default_strategies(tmpdir):
    num_classes = 10
    ds = DummyDataset()
    model = ImageClassifier(num_classes, backbone="resnet50")

    trainer = Trainer(fast_dev_run=2)
    trainer.fit(model, train_dataloader=DataLoader(ds))
def test_pointcloud_object_detection_data(tmpdir):

    seed_everything(52)

    download_data("https://pl-flash-data.s3.amazonaws.com/KITTI_micro.zip",
                  tmpdir)

    dm = PointCloudObjectDetectorData.from_folders(
        train_folder=join(tmpdir, "KITTI_Micro", "Kitti", "train"))

    class MockModel(PointCloudObjectDetector):
        def training_step(self, batch, batch_idx: int):
            assert isinstance(batch, ObjectDetectBatchCollator)
            assert len(batch.point) == 2
            assert batch.point[0][1].shape == torch.Size([4])
            assert len(batch.bboxes) > 1
            assert batch.attr[0]["name"] in ("000000.bin", "000001.bin")
            assert batch.attr[1]["name"] in ("000000.bin", "000001.bin")

    num_classes = 19
    model = MockModel(backbone="pointpillars_kitti", num_classes=num_classes)
    trainer = Trainer(max_epochs=1, limit_train_batches=1, limit_val_batches=0)
    trainer.fit(model, dm)

    predict_path = join(tmpdir, "KITTI_Micro", "Kitti", "predict")
    model.eval()

    predictions = model.predict([join(predict_path, "scans/000000.bin")])
    assert predictions[0][DefaultDataKeys.INPUT].shape[1] == 4
    assert len(predictions[0][DefaultDataKeys.PREDS]) == 158
Beispiel #9
0
def test_resolve_callbacks_override_warning(tmpdir):
    model = DummyClassifier()
    trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    task = FinetuneClassificationTask(model, loss_fn=F.nll_loss)
    with pytest.warns(UserWarning,
                      match="The model contains a default finetune callback"):
        trainer._resolve_callbacks(task, "test")
Beispiel #10
0
def test_datapipeline_transformations_overridden_by_task():
    # define input transforms
    class ImageInput(Input):
        def load_data(self, folder):
            # from folder -> return files paths
            return ["a.jpg", "b.jpg"]

        def load_sample(self, path):
            # from a file path, load the associated image
            return np.random.uniform(0, 1, (64, 64, 3))

    class ImageClassificationInputTransform(InputTransform):
        def per_sample_transform(self) -> Callable:
            return T.Compose([T.ToTensor()])

        def per_batch_transform_on_device(self) -> Callable:
            return T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

    class OverrideInputTransform(InputTransform):
        def per_sample_transform(self) -> Callable:
            return T.Compose([T.ToTensor(), T.Resize(128)])

    # define task which overrides transforms using set_state
    class CustomModel(Task):
        def __init__(self):
            super().__init__(model=torch.nn.Linear(1, 1),
                             loss_fn=torch.nn.MSELoss())

            # override default transform to resize images
            self.input_transform = OverrideInputTransform

        def training_step(self, batch, batch_idx):
            assert batch.shape == torch.Size([2, 3, 128, 128])
            assert torch.max(batch) <= 1.0
            assert torch.min(batch) >= 0.0

        def validation_step(self, batch, batch_idx):
            assert batch.shape == torch.Size([2, 3, 128, 128])
            assert torch.max(batch) <= 1.0
            assert torch.min(batch) >= 0.0

    transform = ImageClassificationInputTransform()
    datamodule = DataModule(
        ImageInput(RunningStage.TRAINING, [1]),
        ImageInput(RunningStage.VALIDATING, [1]),
        transform=transform,
        batch_size=2,
        num_workers=0,
    )

    # call trainer
    model = CustomModel()
    trainer = Trainer(
        max_epochs=1,
        limit_train_batches=2,
        limit_val_batches=1,
        num_sanity_val_steps=1,
    )
    trainer.fit(model, datamodule=datamodule)
Beispiel #11
0
def test_task_fit(tmpdir: str):
    model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10),
                          nn.LogSoftmax())
    train_dl = torch.utils.data.DataLoader(DummyDataset())
    val_dl = torch.utils.data.DataLoader(DummyDataset())
    task = ClassificationTask(model, loss_fn=F.nll_loss)
    trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    trainer.fit(task, train_dl, val_dl)
Beispiel #12
0
def test_test(tmpdir):
    """Tests that the model can be tested on a pytorch geometric dataset."""
    tudataset = datasets.TUDataset(root=tmpdir, name="KKI")
    model = GraphClassifier(num_features=tudataset.num_features, num_classes=tudataset.num_classes)
    model.data_pipeline = DataPipeline(preprocess=GraphClassificationPreprocess())
    test_dl = torch.utils.data.DataLoader(tudataset, batch_size=4)
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.test(model, test_dl)
def test_init_train(tmpdir):
    if os.name == "nt":
        # TODO: huggingface stuff timing out on windows
        return True
    model = TranslationTask(TEST_BACKBONE)
    train_dl = torch.utils.data.DataLoader(DummyDataset())
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.fit(model, train_dl)
Beispiel #14
0
def test_predict_sklearn():
    """Tests that we can generate predictions from a scikit-learn ``Bunch``."""
    bunch = datasets.load_iris()
    model = TemplateSKLearnClassifier(num_features=DummyDataset.num_features,
                                      num_classes=DummyDataset.num_classes)
    datamodule = TemplateData.from_sklearn(predict_bunch=bunch, batch_size=1)
    trainer = Trainer()
    out = trainer.predict(model, datamodule=datamodule, output="classes")
    assert isinstance(out[0][0], int)
Beispiel #15
0
def test_resolve_callbacks_invalid_strategy(tmpdir):
    model = DummyClassifier()
    trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    task = ClassificationTask(model, loss_fn=F.nll_loss)
    with pytest.raises(
            MisconfigurationException,
            match="should be a ``pytorch_lightning.callbacks.BaseFinetuning``"
    ):
        trainer._resolve_callbacks(task, EarlyStopping())
Beispiel #16
0
def test_predict_numpy():
    """Tests that we can generate predictions from a numpy array."""
    row = np.random.rand(1, DummyDataset.num_features)
    model = TemplateSKLearnClassifier(num_features=DummyDataset.num_features,
                                      num_classes=DummyDataset.num_classes)
    datamodule = TemplateData.from_numpy(predict_data=row, batch_size=1)
    trainer = Trainer()
    out = trainer.predict(model, datamodule=datamodule, output="classes")
    assert isinstance(out[0][0], int)
def test_predict_numpy():
    img = np.ones((1, 3, 64, 64))
    model = SemanticSegmentation(2, backbone="mobilenetv3_large_100")
    datamodule = SemanticSegmentationData.from_numpy(predict_data=img,
                                                     batch_size=1)
    trainer = Trainer()
    out = trainer.predict(model, datamodule=datamodule, output="labels")
    assert isinstance(out[0][0], list)
    assert len(out[0][0]) == 64
    assert len(out[0][0][0]) == 64
Beispiel #18
0
def test_ort_callback_fails_no_model(tmpdir):
    model = BoringModel()
    trainer = Trainer(default_root_dir=tmpdir,
                      fast_dev_run=True,
                      callbacks=ORTCallback())
    with pytest.raises(MisconfigurationException,
                       match="Torch ORT requires to wrap a single model"):
        trainer.fit(
            model,
            train_dataloader=torch.utils.data.DataLoader(DummyDataset()),
            val_dataloaders=torch.utils.data.DataLoader(DummyDataset()),
        )
def test_classification(tmpdir):
    data = ImageClassificationData.from_filepaths(
        train_filepaths=["a", "b"],
        train_labels=[0, 1],
        train_transform=lambda x: x,
        loader=_dummy_image_loader,
        num_workers=0,
        batch_size=2,
    )
    model = ImageClassifier(2, backbone="resnet18")
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.finetune(model, datamodule=data, strategy="freeze")
Beispiel #20
0
def test_classification_json(tmpdir):
    json_path = json_data(tmpdir)

    data = SpeechRecognitionData.from_json(
        "file",
        "text",
        train_file=json_path,
        num_workers=0,
        batch_size=2,
    )
    model = SpeechRecognition(backbone=TEST_BACKBONE)
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.fit(model, datamodule=data)
def test_multilabel(tmpdir):

    num_classes = 4
    ds = DummyMultiLabelDataset(num_classes)
    model = ImageClassifier(num_classes, multi_label=True, serializer=Probabilities(multi_label=True))
    train_dl = torch.utils.data.DataLoader(ds, batch_size=2)
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.finetune(model, train_dl, strategy="freeze_unfreeze")
    image, label = ds[0][DefaultDataKeys.INPUT], ds[0][DefaultDataKeys.TARGET]
    predictions = model.predict([{DefaultDataKeys.INPUT: image}])
    assert (torch.tensor(predictions) > 1).sum() == 0
    assert (torch.tensor(predictions) < 0).sum() == 0
    assert len(predictions[0]) == num_classes == len(label)
    assert len(torch.unique(label)) <= 2
Beispiel #22
0
def test_predict_dataset(tmpdir):
    """Tests that we can generate embeddings from a pytorch geometric dataset."""
    tudataset = datasets.TUDataset(root=tmpdir, name="KKI")
    model = GraphEmbedder(
        GraphClassifier(num_features=tudataset.num_features,
                        num_classes=tudataset.num_classes).backbone)
    datamodule = DataModule(
        predict_input=GraphClassificationDatasetInput(RunningStage.PREDICTING,
                                                      tudataset),
        transform=GraphClassificationInputTransform,
        batch_size=4,
    )
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    out = trainer.predict(model, datamodule=datamodule)
    assert isinstance(out[0][0], torch.Tensor)
Beispiel #23
0
def test_classification(tmpdir):
    tmpdir = Path(tmpdir)

    (tmpdir / "a").mkdir()
    (tmpdir / "b").mkdir()
    _rand_image().save(tmpdir / "a" / "a_1.png")
    _rand_image().save(tmpdir / "b" / "a_1.png")

    data = ImageClassificationData.from_filepaths(
        train_filepaths=[tmpdir / "a", tmpdir / "b"],
        train_labels=[0, 1],
        train_transform={"per_batch_transform": lambda x: x},
        num_workers=0,
        batch_size=2,
    )
    model = ImageClassifier(num_classes=2, backbone="resnet18")
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.finetune(model, datamodule=data, strategy="freeze")
def test_pointcloud_segmentation_data(tmpdir):

    seed_everything(52)

    download_data(
        "https://pl-flash-data.s3.amazonaws.com/SemanticKittiMicro.zip",
        tmpdir)

    datamodule = PointCloudSegmentationData.from_folders(
        train_folder=join(tmpdir, "SemanticKittiMicro", "train"),
        predict_folder=join(tmpdir, "SemanticKittiMicro", "predict"),
        batch_size=4,
    )

    class MockModel(PointCloudSegmentation):
        def training_step(self, batch, batch_idx: int):
            assert batch[DataKeys.INPUT]["xyz"][0].shape == torch.Size(
                [2, 45056, 3])
            assert batch[DataKeys.INPUT]["xyz"][1].shape == torch.Size(
                [2, 11264, 3])
            assert batch[DataKeys.INPUT]["xyz"][2].shape == torch.Size(
                [2, 2816, 3])
            assert batch[DataKeys.INPUT]["xyz"][3].shape == torch.Size(
                [2, 704, 3])
            assert batch[DataKeys.INPUT]["labels"].shape == torch.Size(
                [2, 45056])
            assert batch[DataKeys.INPUT]["labels"].max() == 19
            assert batch[DataKeys.INPUT]["labels"].min() == 0
            assert batch[DataKeys.METADATA][0]["name"] in ("00_000000",
                                                           "00_000001")
            assert batch[DataKeys.METADATA][1]["name"] in ("00_000000",
                                                           "00_000001")

    num_classes = 19
    model = MockModel(backbone="randlanet", num_classes=num_classes)
    trainer = Trainer(max_epochs=1, limit_train_batches=1, limit_val_batches=0)
    trainer.fit(model, datamodule=datamodule)

    predictions = trainer.predict(model, datamodule=datamodule)[0]
    assert predictions[0][DataKeys.INPUT].shape == torch.Size([45056, 3])
    assert predictions[0][DataKeys.PREDS].shape == torch.Size([45056, 19])
    assert predictions[0][DataKeys.TARGET].shape == torch.Size([45056])
Beispiel #25
0
def test_saving_with_serializers(tmpdir):

    checkpoint_file = os.path.join(tmpdir, 'tmp.ckpt')

    class CustomModel(Task):
        def __init__(self):
            super().__init__(model=torch.nn.Linear(1, 1),
                             loss_fn=torch.nn.MSELoss())

    serializer = Labels(["a", "b"])
    model = CustomModel()
    trainer = Trainer(fast_dev_run=True)
    data_pipeline = DataPipeline(DefaultPreprocess(), serializer=serializer)
    data_pipeline.initialize()
    model.data_pipeline = data_pipeline
    assert isinstance(model.preprocess, DefaultPreprocess)
    dummy_data = DataLoader(
        list(
            zip(torch.arange(10, dtype=torch.float),
                torch.arange(10, dtype=torch.float))))
    trainer.fit(model, train_dataloader=dummy_data)
    trainer.save_checkpoint(checkpoint_file)
    model = CustomModel.load_from_checkpoint(checkpoint_file)
    assert isinstance(model.preprocess._data_pipeline_state, DataPipelineState)
    assert model.preprocess._data_pipeline_state._state[
        ClassificationState] == ClassificationState(['a', 'b'])
Beispiel #26
0
def test_trainer_request_dataloaders(stage):
    """Test to ensure that ``request_dataloaders`` can take a combination of arguments, for PL 1.5 and later.

    (stage, model) -> calls module on_dataloader hook (stage, model=model) -> calls module on_dataloader hook
    """
    class TestModel(BoringModel):
        recorded_on_dataloader_calls = {}

        def on_train_dataloader(self) -> None:
            self.recorded_on_dataloader_calls[RunningStage.TRAINING] = True

        def on_val_dataloader(self) -> None:
            self.recorded_on_dataloader_calls[RunningStage.VALIDATING] = True

        def on_test_dataloader(self) -> None:
            self.recorded_on_dataloader_calls[RunningStage.TESTING] = True

    trainer = Trainer()

    model = TestModel()
    trainer.request_dataloader(stage, model)
    assert model.recorded_on_dataloader_calls[stage]

    model = TestModel()
    trainer.request_dataloader(stage, model=model)
    assert model.recorded_on_dataloader_calls[stage]
Beispiel #27
0
def test_classification(tmpdir):
    tmpdir = Path(tmpdir)

    (tmpdir / "a").mkdir()
    (tmpdir / "b").mkdir()

    image_a = str(tmpdir / "a" / "a_1.png")
    image_b = str(tmpdir / "b" / "b_1.png")

    _rand_image().save(image_a)
    _rand_image().save(image_b)

    data = ImageClassificationData.from_files(
        train_files=[image_a, image_b],
        train_targets=[0, 1],
        num_workers=0,
        batch_size=2,
        image_size=(64, 64),
    )
    model = ImageClassifier(num_classes=2, backbone="resnet18")
    trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True)
    trainer.finetune(model, datamodule=data, strategy="freeze")
Beispiel #28
0
def test_finetuning(tmpdir: str, strategy):
    train_dl = torch.utils.data.DataLoader(DummyDataset())
    val_dl = torch.utils.data.DataLoader(DummyDataset())
    task = ImageClassifier(10, backbone="resnet18")
    trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    if strategy == "cls":
        strategy = NoFreeze()
    if strategy == 'chocolat' or strategy is None:
        with pytest.raises(MisconfigurationException, match="strategy should be provided"):
            trainer.finetune(task, train_dl, val_dl, strategy=strategy)
    else:
        trainer.finetune(task, train_dl, val_dl, strategy=strategy)
def test_trainer_fit(tmpdir, callbacks, should_warn):
    model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax())
    train_dl = DataLoader(DummyDataset())
    val_dl = DataLoader(DummyDataset())
    task = ClassificationTask(model, loss_fn=F.nll_loss)
    trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir, callbacks=callbacks)

    if should_warn:
        with pytest.warns(UserWarning, match="trainer is using a fine-tuning callback"):
            trainer.fit(task, train_dl, val_dl)
    else:
        trainer.fit(task, train_dl, val_dl)
Beispiel #30
0
def test_transformations(tmpdir):

    transform = TestInputTransform()
    datamodule = DataModule(
        TestInput(RunningStage.TRAINING, [1]),
        TestInput(RunningStage.VALIDATING, [1]),
        TestInput(RunningStage.TESTING, [1]),
        transform=transform,
        batch_size=2,
        num_workers=0,
    )

    assert datamodule.train_dataloader().dataset[0] == (0, 1, 2, 3)
    batch = next(iter(datamodule.train_dataloader()))
    assert torch.equal(batch, torch.tensor([[0, 1, 2, 3, 5], [0, 1, 2, 3, 5]]))

    assert datamodule.val_dataloader().dataset[0] == {"a": 0, "b": 1}
    assert datamodule.val_dataloader().dataset[1] == {"a": 1, "b": 2}
    batch = next(iter(datamodule.val_dataloader()))

    datamodule = DataModule(
        TestInput(RunningStage.TRAINING, [1]),
        TestInput(RunningStage.VALIDATING, [1]),
        TestInput(RunningStage.TESTING, [1]),
        transform=TestInputTransform2,
        batch_size=2,
        num_workers=0,
    )
    batch = next(iter(datamodule.val_dataloader()))
    assert torch.equal(batch["a"], torch.tensor([0, 1]))
    assert torch.equal(batch["b"], torch.tensor([1, 2]))

    model = CustomModel()
    trainer = Trainer(
        max_epochs=1,
        limit_train_batches=2,
        limit_val_batches=1,
        limit_test_batches=2,
        limit_predict_batches=2,
        num_sanity_val_steps=1,
    )
    trainer.fit(model, datamodule=datamodule)
    trainer.test(model, datamodule=datamodule)

    assert datamodule.input_transform.train_per_sample_transform_called
    assert datamodule.input_transform.train_collate_called
    assert datamodule.input_transform.train_per_batch_transform_on_device_called
    assert datamodule.input_transform.train_per_sample_transform_called
    assert datamodule.input_transform.val_collate_called
    assert datamodule.input_transform.val_per_batch_transform_on_device_called
    assert datamodule.input_transform.test_per_sample_transform_called