def test_task_datapipeline_save(tmpdir): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.Softmax()) train_dl = torch.utils.data.DataLoader(DummyDataset()) task = ClassificationTask(model, loss_fn=F.nll_loss, postprocess=DummyPostprocess()) # to check later task.postprocess.test = True # generate a checkpoint trainer = pl.Trainer( default_root_dir=tmpdir, limit_train_batches=1, max_epochs=1, progress_bar_refresh_rate=0, weights_summary=None, logger=False, ) trainer.fit(task, train_dl) path = str(tmpdir / "model.ckpt") trainer.save_checkpoint(path) # load from file task = ClassificationTask.load_from_checkpoint(path, model=model) assert task.postprocess.test
def test_optimizers_and_schedulers(tmpdir, optim, sched, interval): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) task = ClassificationTask(model, optimizer=optim, lr_scheduler=sched) train_dl = torch.utils.data.DataLoader(DummyDataset()) if sched is None: optimizer = task.configure_optimizers() assert isinstance(optimizer, torch.optim.Adadelta) else: optimizer, scheduler = task.configure_optimizers() assert isinstance(optimizer[0], torch.optim.Adadelta) scheduler = scheduler[0] assert isinstance(scheduler["scheduler"], torch.optim.lr_scheduler.StepLR) assert scheduler["interval"] == interval # generate a checkpoint trainer = flash.Trainer( default_root_dir=tmpdir, limit_train_batches=10, max_epochs=1, ) trainer.fit(task, train_dl)
def test_external_schedulers_provider_hf_transformers(tmpdir, optim, sched, use_datamodule, limit): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) task = ClassificationTask(model, optimizer=deepcopy(optim), lr_scheduler=deepcopy(sched), loss_fn=F.nll_loss) if limit is not None: batch_count = limit if isinstance(limit, int) else int(limit * 10) trainer = flash.Trainer(max_epochs=1, limit_train_batches=limit) else: batch_count = 10 trainer = flash.Trainer(max_epochs=1) ds = DummyDataset(num_samples=10) if use_datamodule: class TestDataModule(LightningDataModule): def train_dataloader(self): return DataLoader(ds) trainer.fit(task, datamodule=TestDataModule()) else: trainer.fit(task, train_dataloader=DataLoader(ds)) assert task.get_num_training_steps() == batch_count assert isinstance(trainer.optimizers[0], torch.optim.Adadelta) assert isinstance(trainer.lr_schedulers[0]["scheduler"], torch.optim.lr_scheduler.LambdaLR)
def test_task_predict_raises(): with pytest.raises(AttributeError, match="`flash.Task.predict` has been removed."): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.Softmax()) task = ClassificationTask(model, loss_fn=F.nll_loss) task.predict("args", kwarg="test")
def test_classification_task_trainer_predict(tmpdir): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) task = ClassificationTask(model) ds = PredictDummyDataset(10) batch_size = 6 predict_dl = task.process_predict_dataset(ds, batch_size=batch_size) trainer = pl.Trainer(default_root_dir=tmpdir) predictions = trainer.predict(task, predict_dl) assert len(list(chain.from_iterable(predictions))) == 10
def test_classificationtask_task_predict(): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.Softmax()) task = ClassificationTask(model, preprocess=DefaultPreprocess()) ds = DummyDataset() expected = list(range(10)) # single item x0, _ = ds[0] pred0 = task.predict(x0) assert pred0[0] in expected # list x1, _ = ds[1] pred1 = task.predict([x0, x1]) assert all(c in expected for c in pred1) assert pred0[0] == pred1[0]
def test_trainer_finetune(tmpdir): model = DummyClassifier() train_dl = torch.utils.data.DataLoader(DummyDataset()) val_dl = torch.utils.data.DataLoader(DummyDataset()) task = ClassificationTask(model, loss_fn=F.nll_loss) trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) trainer.finetune(task, train_dl, val_dl, strategy=NoFreeze())
def test_classification_task_metrics(): train_dataset = FixedDataset([0, 1]) val_dataset = FixedDataset([1, 1]) test_dataset = FixedDataset([0, 0]) model = OnesModel() class CheckAccuracy(Callback): def on_train_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: assert math.isclose( trainer.callback_metrics["train_accuracy_epoch"], 0.5) def on_validation_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: assert math.isclose(trainer.callback_metrics["val_accuracy"], 1.0) def on_test_end(self, trainer: "pl.Trainer", pl_module: "pl.LightningModule") -> None: assert math.isclose(trainer.callback_metrics["test_accuracy"], 0.0) task = ClassificationTask(model) trainer = flash.Trainer(max_epochs=1, callbacks=CheckAccuracy(), gpus=torch.cuda.device_count()) trainer.fit(task, train_dataloader=DataLoader(train_dataset), val_dataloaders=DataLoader(val_dataset)) trainer.test(task, DataLoader(test_dataset))
def test_task_fit(tmpdir: str): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) train_dl = torch.utils.data.DataLoader(DummyDataset()) val_dl = torch.utils.data.DataLoader(DummyDataset()) task = ClassificationTask(model, loss_fn=F.nll_loss) trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) trainer.fit(task, train_dl, val_dl)
def test_classificationtask_train(tmpdir: str, metrics: Any): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.Softmax()) train_dl = torch.utils.data.DataLoader(DummyDataset()) val_dl = torch.utils.data.DataLoader(DummyDataset()) task = ClassificationTask(model, loss_fn=F.nll_loss, metrics=metrics) trainer = pl.Trainer(fast_dev_run=True, default_root_dir=tmpdir) result = trainer.fit(task, train_dl, val_dl) result = trainer.test(task, val_dl) assert "test_nll_loss" in result[0]
def test_resolve_callbacks_invalid_strategy(tmpdir): model = DummyClassifier() trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) task = ClassificationTask(model, loss_fn=F.nll_loss) with pytest.raises( MisconfigurationException, match="should be a ``pytorch_lightning.callbacks.BaseFinetuning``" ): trainer._resolve_callbacks(task, EarlyStopping())
def test_external_optimizers_torch_optimizer(tmpdir, optim): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) task = ClassificationTask(model, optimizer=optim, lr_scheduler=None, loss_fn=F.nll_loss) trainer = flash.Trainer(max_epochs=1, limit_train_batches=2, gpus=torch.cuda.device_count()) ds = DummyDataset() trainer.fit(task, train_dataloader=DataLoader(ds)) from torch_optimizer import Yogi optimizer = task.configure_optimizers() assert isinstance(optimizer, Yogi)
def test_trainer_fit(tmpdir, callbacks, should_warn): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) train_dl = DataLoader(DummyDataset()) val_dl = DataLoader(DummyDataset()) task = ClassificationTask(model, loss_fn=F.nll_loss) trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir, callbacks=callbacks) if should_warn: with pytest.warns(UserWarning, match="trainer is using a fine-tuning callback"): trainer.fit(task, train_dl, val_dl) else: trainer.fit(task, train_dl, val_dl)
def test_optimizer_learning_rate(): mock_optimizer = MagicMock() Task.optimizers(mock_optimizer, "test") model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) ClassificationTask(model, optimizer="test").configure_optimizers() mock_optimizer.assert_called_once_with(mock.ANY) mock_optimizer.reset_mock() ClassificationTask(model, optimizer="test", learning_rate=10).configure_optimizers() mock_optimizer.assert_called_once_with(mock.ANY, lr=10) mock_optimizer.reset_mock() with pytest.raises(TypeError, match="The `learning_rate` argument is required"): ClassificationTask(model, optimizer="sgd").configure_optimizers()
def test_nested_tasks(tmpdir, task): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.Softmax()) train_dl = torch.utils.data.DataLoader(DummyDataset()) val_dl = torch.utils.data.DataLoader(DummyDataset()) child_task = ClassificationTask(model, loss_fn=F.nll_loss) parent_task = task(child_task) trainer = pl.Trainer(fast_dev_run=True, default_root_dir=tmpdir) trainer.fit(parent_task, train_dl, val_dl) result = trainer.test(parent_task, val_dl) assert "test_nll_loss" in result[0]
def test_classification_task_trainer_predict(tmpdir): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) task = ClassificationTask(model) ds = PredictDummyDataset() batch_size = 3 predict_dl = torch.utils.data.DataLoader(ds, batch_size=batch_size) trainer = pl.Trainer(default_root_dir=tmpdir) predictions = trainer.predict(task, predict_dl) assert len(predictions) == len(ds) // batch_size for batch_pred in predictions: assert len(batch_pred) == batch_size assert all(y < 10 for y in batch_pred)
nn.Linear(28 * 28, 128), nn.ReLU(), nn.Linear(128, 10), ) # 3. Load a dataset dataset = datasets.MNIST(os.path.join(_PATH_ROOT, 'data'), download=False, transform=transforms.ToTensor()) # 4. Split the data randomly train, val, test = random_split(dataset, [50000, 5000, 5000]) # type: ignore # 5. Create the model classifier = ClassificationTask(model, loss_fn=nn.functional.cross_entropy, optimizer=optim.Adam, learning_rate=10e-3) # 6. Create the trainer trainer = pl.Trainer( max_epochs=10, limit_train_batches=128, limit_val_batches=128, ) # 7. Train the model trainer.fit(classifier, DataLoader(train), DataLoader(val)) # 8. Test the model results = trainer.test(classifier, test_dataloaders=DataLoader(test))
def test_errors_and_exceptions_optimizers_and_schedulers(): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) with pytest.raises(TypeError): task = ClassificationTask(model, optimizer=[1, 2, 3, 4], lr_scheduler=None) task.configure_optimizers() with pytest.raises(KeyError): task = ClassificationTask(model, optimizer="not_a_valid_key", lr_scheduler=None) task.configure_optimizers() with pytest.raises(TypeError): task = ClassificationTask(model, optimizer=(["not", "a", "valid", "type"], { "random_kwarg": 10 }), lr_scheduler=None) task.configure_optimizers() with pytest.raises(TypeError): task = ClassificationTask(model, optimizer=("Adam", ["non", "dict", "type"]), lr_scheduler=None) task.configure_optimizers() with pytest.raises(KeyError): task = ClassificationTask(model, optimizer="Adam", lr_scheduler="not_a_valid_key") task.configure_optimizers() with pytest.raises(TypeError): task = ClassificationTask(model, optimizer="Adam", lr_scheduler=["not", "a", "valid", "type"]) task.configure_optimizers() with pytest.raises(TypeError): task = ClassificationTask(model, optimizer="Adam", lr_scheduler=(["not", "a", "valid", "type"], { "random_kwarg": 10 })) task.configure_optimizers()
def test_optimization(tmpdir): model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10), nn.LogSoftmax()) optim = torch.optim.Adam(model.parameters()) task = ClassificationTask(model, optimizer=optim, scheduler=None) optimizer = task.configure_optimizers() assert optimizer == optim task = ClassificationTask(model, optimizer=torch.optim.Adadelta, optimizer_kwargs={"eps": 0.5}, scheduler=None) optimizer = task.configure_optimizers() assert isinstance(optimizer, torch.optim.Adadelta) assert optimizer.defaults["eps"] == 0.5 task = ClassificationTask( model, optimizer=torch.optim.Adadelta, scheduler=torch.optim.lr_scheduler.StepLR, scheduler_kwargs={"step_size": 1}, ) optimizer, scheduler = task.configure_optimizers() assert isinstance(optimizer[0], torch.optim.Adadelta) assert isinstance(scheduler[0], torch.optim.lr_scheduler.StepLR) optim = torch.optim.Adadelta(model.parameters()) task = ClassificationTask(model, optimizer=optim, scheduler=torch.optim.lr_scheduler.StepLR( optim, step_size=1)) optimizer, scheduler = task.configure_optimizers() assert isinstance(optimizer[0], torch.optim.Adadelta) assert isinstance(scheduler[0], torch.optim.lr_scheduler.StepLR) if _TEXT_AVAILABLE: from transformers.optimization import get_linear_schedule_with_warmup assert isinstance(task.available_schedulers(), list) optim = torch.optim.Adadelta(model.parameters()) with pytest.raises( MisconfigurationException, match="The LightningModule isn't attached to the trainer yet." ): task = ClassificationTask(model, optimizer=optim, scheduler="linear_schedule_with_warmup") optimizer, scheduler = task.configure_optimizers() task = ClassificationTask( model, optimizer=optim, scheduler="linear_schedule_with_warmup", scheduler_kwargs={"num_warmup_steps": 0.1}, loss_fn=F.nll_loss, ) trainer = flash.Trainer(max_epochs=1, limit_train_batches=2, gpus=torch.cuda.device_count()) ds = DummyDataset() trainer.fit(task, train_dataloader=DataLoader(ds)) optimizer, scheduler = task.configure_optimizers() assert isinstance(optimizer[0], torch.optim.Adadelta) assert isinstance(scheduler[0], torch.optim.lr_scheduler.LambdaLR) expected = get_linear_schedule_with_warmup.__name__ assert scheduler[0].lr_lambdas[0].__qualname__.split( ".")[0] == expected
def test_resolve_callbacks_override_warning(tmpdir): model = DummyClassifier() trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) task = ClassificationTask(model, loss_fn=F.nll_loss) with pytest.warns(UserWarning, match="The model contains a default finetune callback"): trainer._resolve_callbacks(task, strategy="no_freeze")