def test_warning_on_wrong_test_settigs(tmpdir): """ Test the following cases related to test configuration of model: * error if `test_dataloader()` is overriden but `test_step()` is not * if both `test_dataloader()` and `test_step()` is overriden, throw warning if `test_epoch_end()` is not defined * error if `test_step()` is overriden but `test_dataloader()` is not """ tutils.reset_seed() hparams = tutils.get_default_hparams() trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) # ---------------- # if have test_dataloader should have test_step # ---------------- with pytest.raises(MisconfigurationException): model = EvalModelTemplate(hparams) model.test_step = None trainer.fit(model) # ---------------- # if have test_dataloader and test_step recommend test_epoch_end # ---------------- with pytest.warns(RuntimeWarning): model = EvalModelTemplate(hparams) model.test_epoch_end = None trainer.test(model) # ---------------- # if have test_step and NO test_dataloader passed in tell user to pass test_dataloader # ---------------- with pytest.raises(MisconfigurationException): model = EvalModelTemplate(hparams) model.test_dataloader = lambda: None trainer.test(model) # ---------------- # if have test_dataloader and NO test_step tell user to implement test_step # ---------------- with pytest.raises(MisconfigurationException): model = EvalModelTemplate(hparams) model.test_dataloader = lambda: None model.test_step = None trainer.test(model, test_dataloaders=model.dataloader(train=False)) # ---------------- # if have test_dataloader and test_step but no test_epoch_end warn user # ---------------- with pytest.warns(RuntimeWarning): model = EvalModelTemplate(hparams) model.test_dataloader = lambda: None model.test_epoch_end = None trainer.test(model, test_dataloaders=model.dataloader(train=False))
def test_result_obj_on_tpu(tmpdir): seed_everything(1234) batches = 5 epochs = 2 model = EvalModelTemplate() model.training_step = model.training_step_result_obj model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_obj model.validation_step_end = None model.validation_epoch_end = None model.test_step = model.test_step_result_obj model.test_step_end = None model.test_epoch_end = None trainer_options = dict(default_root_dir=tmpdir, max_epochs=epochs, callbacks=[EarlyStopping()], log_every_n_steps=2, limit_train_batches=batches, weights_summary=None, tpu_cores=8) tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
def test_dataloaders_with_limit_num_batches(tmpdir, limit_train_batches, limit_val_batches, limit_test_batches): """Verify num_batches for val & test dataloaders passed with batch limit as number""" model = EvalModelTemplate() model.val_dataloader = model.val_dataloader__multiple_mixed_length model.test_dataloader = model.test_dataloader__multiple_mixed_length model.validation_step = model.validation_step__multiple_dataloaders model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders model.test_step = model.test_step__multiple_dataloaders model.test_epoch_end = model.test_epoch_end__multiple_dataloaders # train, multiple val and multiple test passed with percent_check trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_train_batches=limit_train_batches, limit_val_batches=limit_val_batches, limit_test_batches=limit_test_batches, ) trainer.fit(model) assert trainer.num_training_batches == limit_train_batches assert trainer.num_val_batches == [limit_val_batches] * len( trainer.val_dataloaders) trainer.test(ckpt_path=None) assert trainer.num_test_batches == [limit_test_batches] * len( trainer.test_dataloaders)
def test_result_obj_on_tpu(tmpdir): seed_everything(1234) os.environ['PL_DEV_DEBUG'] = '1' batches = 5 epochs = 2 model = EvalModelTemplate() model.training_step = model.training_step_result_obj model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_obj model.validation_step_end = None model.validation_epoch_end = None model.test_step = model.test_step_result_obj model.test_step_end = None model.test_epoch_end = None trainer_options = dict(default_root_dir=tmpdir, max_epochs=epochs, early_stop_callback=True, row_log_interval=2, limit_train_batches=batches, weights_summary=None, tpu_cores=8) tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
def test_dataloaders_with_limit_percent_batches(tmpdir, limit_train_batches, limit_val_batches, limit_test_batches): """Verify num_batches for train, val & test dataloaders passed with batch limit in percent""" model = EvalModelTemplate() model.val_dataloader = model.val_dataloader__multiple_mixed_length model.test_dataloader = model.test_dataloader__multiple_mixed_length model.validation_step = model.validation_step__multiple_dataloaders model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders model.test_step = model.test_step__multiple_dataloaders model.test_epoch_end = model.test_epoch_end__multiple_dataloaders # train, multiple val and multiple test passed with percent_check trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_train_batches=limit_train_batches, limit_val_batches=limit_val_batches, limit_test_batches=limit_test_batches, ) trainer.fit(model) expected_train_batches = int(len(trainer.train_dataloader) * limit_train_batches) expected_val_batches = [int(len(dataloader) * limit_val_batches) for dataloader in trainer.val_dataloaders] assert trainer.num_training_batches == expected_train_batches assert trainer.num_val_batches == expected_val_batches trainer.test(ckpt_path=None) expected_test_batches = [int(len(dataloader) * limit_test_batches) for dataloader in trainer.test_dataloaders] assert trainer.num_test_batches == expected_test_batches
def test_dataloaders_with_fast_dev_run(tmpdir): """Verify num_batches for train, val & test dataloaders passed with fast_dev_run = True""" model = EvalModelTemplate() model.val_dataloader = model.val_dataloader__multiple_mixed_length model.test_dataloader = model.test_dataloader__multiple_mixed_length model.validation_step = model.validation_step__multiple_dataloaders model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders model.test_step = model.test_step__multiple_dataloaders model.test_epoch_end = model.test_epoch_end__multiple_dataloaders # train, multiple val and multiple test dataloaders passed with fast_dev_run = True trainer = Trainer( default_root_dir=tmpdir, max_epochs=2, fast_dev_run=True, ) assert trainer.max_epochs == 1 assert trainer.num_sanity_val_steps == 0 trainer.fit(model) assert not trainer.disable_validation assert trainer.num_training_batches == 1 assert trainer.num_val_batches == [1] * len(trainer.val_dataloaders) trainer.test(ckpt_path=None) assert trainer.num_test_batches == [1] * len(trainer.test_dataloaders) # verify sanity check batches match as expected num_val_dataloaders = len(model.val_dataloader()) assert trainer.dev_debugger.num_seen_sanity_check_batches == trainer.num_sanity_val_steps * num_val_dataloaders
def test_warning_with_few_workers_multi_loader(mock, tmpdir, ckpt_path): """ Test that error is raised if dataloader with only a few workers is used """ model = EvalModelTemplate() model.training_step = model.training_step__multiple_dataloaders model.validation_step = model.validation_step__multiple_dataloaders model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders model.test_step = model.test_step__multiple_dataloaders model.test_epoch_end = model.test_epoch_end__multiple_dataloaders # logger file to get meta train_dl = model.dataloader(train=True) train_dl.num_workers = 0 val_dl = model.dataloader(train=False) val_dl.num_workers = 0 train_dl = model.dataloader(train=False) train_dl.num_workers = 0 train_multi_dl = {'a': train_dl, 'b': train_dl} val_multi_dl = [val_dl, val_dl] test_multi_dl = [train_dl, train_dl] fit_options = dict(train_dataloader=train_multi_dl, val_dataloaders=val_multi_dl) trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2, ) # fit model with pytest.warns( UserWarning, match= 'The dataloader, train dataloader, does not have many workers which may be a bottleneck.' ): trainer.fit(model, **fit_options) with pytest.warns( UserWarning, match= 'The dataloader, val dataloader 0, does not have many workers which may be a bottleneck.' ): trainer.fit(model, **fit_options) if ckpt_path == 'specific': ckpt_path = trainer.checkpoint_callback.best_model_path test_options = dict(test_dataloaders=test_multi_dl, ckpt_path=ckpt_path) with pytest.warns( UserWarning, match= 'The dataloader, test dataloader 0, does not have many workers which may be a bottleneck.' ): trainer.test(**test_options)
def test_dataloaders_with_fast_dev_run(tmpdir, fast_dev_run): """ Verify num_batches for train, val & test dataloaders passed with fast_dev_run """ model = EvalModelTemplate() model.val_dataloader = model.val_dataloader__multiple_mixed_length model.test_dataloader = model.test_dataloader__multiple_mixed_length model.validation_step = model.validation_step__multiple_dataloaders model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders model.test_step = model.test_step__multiple_dataloaders model.test_epoch_end = model.test_epoch_end__multiple_dataloaders trainer_options = dict( default_root_dir=tmpdir, max_epochs=2, fast_dev_run=fast_dev_run, ) if fast_dev_run == 'temp': with pytest.raises(MisconfigurationException, match='either a bool or an int'): trainer = Trainer(**trainer_options) elif fast_dev_run == -1: with pytest.raises(MisconfigurationException, match='should be >= 0'): trainer = Trainer(**trainer_options) else: trainer = Trainer(**trainer_options) # fast_dev_run is set to True when it is 1 if fast_dev_run == 1: fast_dev_run = True assert trainer.fast_dev_run is fast_dev_run if fast_dev_run is True: fast_dev_run = 1 assert trainer.limit_train_batches == fast_dev_run assert trainer.limit_val_batches == fast_dev_run assert trainer.limit_test_batches == fast_dev_run assert trainer.num_sanity_val_steps == 0 assert trainer.max_epochs == 1 trainer.fit(model) assert not trainer.disable_validation assert trainer.num_training_batches == fast_dev_run assert trainer.num_val_batches == [fast_dev_run] * len( trainer.val_dataloaders) trainer.test(ckpt_path=None) assert trainer.num_test_batches == [fast_dev_run] * len( trainer.test_dataloaders) # verify sanity check batches match as expected num_val_dataloaders = len(model.val_dataloader()) assert trainer.dev_debugger.num_seen_sanity_check_batches == trainer.num_sanity_val_steps * num_val_dataloaders
def test_result_obj_predictions_ddp_spawn(tmpdir): seed_everything(4321) distributed_backend = 'ddp_spawn' option = 0 import os os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' dm = TrialMNISTDataModule(tmpdir) prediction_file = Path('predictions.pt') model = EvalModelTemplate(learning_rate=0.005) model.test_option = option model.prediction_file = prediction_file.as_posix() model.test_step = model.test_step_result_preds model.test_step_end = None model.test_epoch_end = None model.test_end = None prediction_files = [ Path('predictions_rank_0.pt'), Path('predictions_rank_1.pt') ] for prediction_file in prediction_files: if prediction_file.exists(): prediction_file.unlink() trainer = Trainer(default_root_dir=tmpdir, max_epochs=3, weights_summary=None, deterministic=True, distributed_backend=distributed_backend, gpus=[0, 1]) # Prediction file shouldn't exist yet because we haven't done anything # assert not model.prediction_file.exists() result = trainer.fit(model, dm) assert result == 1 result = trainer.test(datamodule=dm) result = result[0] assert result['test_loss'] < 0.6 assert result['test_acc'] > 0.8 dm.setup('test') # check prediction file now exists and is of expected length size = 0 for prediction_file in prediction_files: assert prediction_file.exists() predictions = torch.load(prediction_file) size += len(predictions) assert size == len(dm.mnist_test)
def test_dataloaders_with_limit_num_batches(tmpdir, limit_train_batches, limit_val_batches, limit_test_batches): """Verify num_batches for train, val & test dataloaders passed with batch limit as number""" os.environ['PL_DEV_DEBUG'] = '1' model = EvalModelTemplate() model.val_dataloader = model.val_dataloader__multiple_mixed_length model.test_dataloader = model.test_dataloader__multiple_mixed_length model.validation_step = model.validation_step__multiple_dataloaders model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders model.test_step = model.test_step__multiple_dataloaders model.test_epoch_end = model.test_epoch_end__multiple_dataloaders # train, multiple val and multiple test passed with percent_check trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_train_batches=limit_train_batches, limit_val_batches=limit_val_batches, limit_test_batches=limit_test_batches, ) trainer.fit(model) # ------------------------------------------- # MAKE SURE THE TRAINER SET THE CORRECT VALUES # ------------------------------------------- assert trainer.num_training_batches == limit_train_batches assert trainer.num_val_batches == [limit_val_batches] * len(trainer.val_dataloaders) trainer.test(ckpt_path=None) # when the limit is greater than the number of test batches it should be the num in loaders test_dataloader_lengths = [len(x) for x in model.test_dataloader()] if limit_test_batches > 1e10: assert trainer.num_test_batches == test_dataloader_lengths else: assert trainer.num_test_batches == [limit_test_batches] * len(trainer.test_dataloaders) # ------------------------------------------- # make sure we actually saw the expected num of batches # ------------------------------------------- num_val_dataloaders = len(model.val_dataloader()) num_test_dataloaders = len(model.test_dataloader()) if limit_train_batches > 0: # make sure val batches are as expected assert len(trainer.dev_debugger.num_seen_val_check_batches) == num_val_dataloaders for dataloader_idx, num_batches in trainer.dev_debugger.num_seen_val_check_batches.items(): assert num_batches == limit_val_batches # make sure test batches are as expected assert len(trainer.dev_debugger.num_seen_test_check_batches) == num_test_dataloaders for dataloader_idx, num_batches in trainer.dev_debugger.num_seen_test_check_batches.items(): if limit_test_batches > 1e10: assert num_batches == test_dataloader_lengths[dataloader_idx] else: assert num_batches == limit_test_batches
def test_val_step_full_loop_result_dp(tmpdir): # TODO: finish the full train, val, test loop with dp os.environ['PL_DEV_DEBUG'] = '1' batches = 10 epochs = 3 model = EvalModelTemplate() model.training_step = model.training_step_full_loop_result_obj_dp model.training_step_end = model.training_step_end_full_loop_result_obj_dp model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp model.validation_step = model.eval_step_full_loop_result_obj_dp model.validation_step_end = model.eval_step_end_full_loop_result_obj_dp model.validation_epoch_end = model.eval_epoch_end_full_loop_result_obj_dp model.test_step = model.eval_step_full_loop_result_obj_dp model.test_step_end = model.eval_step_end_full_loop_result_obj_dp model.test_epoch_end = model.eval_epoch_end_full_loop_result_obj_dp trainer = Trainer( default_root_dir=tmpdir, distributed_backend='dp', gpus=[0, 1], max_epochs=epochs, early_stop_callback=True, log_every_n_steps=2, limit_train_batches=batches, weights_summary=None, ) trainer.fit(model) results = trainer.test() # assert we returned all metrics requested assert len(results) == 1 results = results[0] assert 'test_epoch_end_metric' in results # make sure we saw all the correct keys along all paths seen_keys = set() for metric in trainer.dev_debugger.logged_metrics: seen_keys.update(metric.keys()) assert 'train_step_metric' in seen_keys assert 'train_step_end_metric' in seen_keys assert 'train_epoch_end_metric_epoch' in seen_keys assert 'validation_step_metric_step/epoch_0' in seen_keys assert 'validation_step_metric_epoch' in seen_keys assert 'validation_step_end_metric' in seen_keys assert 'validation_epoch_end_metric' in seen_keys assert 'test_step_metric_step/epoch_2' in seen_keys assert 'test_step_metric_epoch' in seen_keys assert 'test_step_end_metric' in seen_keys assert 'test_epoch_end_metric' in seen_keys
def test_fit_val_loader_only(tmpdir): model = EvalModelTemplate() train_dataloader = model.train_dataloader() val_dataloader = model.val_dataloader() model.train_dataloader = None model.val_dataloader = None model.test_dataloader = None model.test_step = None model.test_epoch_end = None trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) trainer.fit(model, train_dataloader=train_dataloader, val_dataloaders=val_dataloader)
def test_full_loop_result_cpu(tmpdir): seed_everything(1234) os.environ['PL_DEV_DEBUG'] = '1' batches = 10 epochs = 2 model = EvalModelTemplate() model.training_step = model.training_step_result_obj model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_obj model.validation_step_end = None model.validation_epoch_end = None model.test_step = model.test_step_result_obj model.test_step_end = None model.test_epoch_end = None trainer = Trainer( default_root_dir=tmpdir, max_epochs=epochs, early_stop_callback=True, log_every_n_steps=2, limit_train_batches=batches, weights_summary=None, ) trainer.fit(model) results = trainer.test() # assert we returned all metrics requested assert len(results) == 1 results = results[0] assert results['test_loss'] < 0.3 assert results['test_acc'] > 0.9 assert len(results) == 2 assert 'early_stop_on' not in results assert 'checkpoint_on' not in results results2 = trainer.test()[0] for k, v in results.items(): assert results2[k] == v
def test_result_obj_predictions(tmpdir, test_option, do_train, gpus): tutils.reset_seed() dm = TrialMNISTDataModule(tmpdir) prediction_file = Path(tmpdir) / 'predictions.pt' model = EvalModelTemplate() model.test_option = test_option model.prediction_file = prediction_file.as_posix() model.test_step = model.test_step_result_preds model.test_step_end = None model.test_epoch_end = None model.test_end = None if prediction_file.exists(): prediction_file.unlink() trainer = Trainer( default_root_dir=tmpdir, max_epochs=3, weights_summary=None, deterministic=True, gpus=gpus ) # Prediction file shouldn't exist yet because we haven't done anything assert not prediction_file.exists() if do_train: result = trainer.fit(model, dm) assert result == 1 result = trainer.test(datamodule=dm) result = result[0] assert result['test_loss'] < 0.6 assert result['test_acc'] > 0.8 else: result = trainer.test(model, datamodule=dm) # check prediction file now exists and is of expected length assert prediction_file.exists() predictions = torch.load(prediction_file) assert len(predictions) == len(dm.mnist_test)
def test_warning_with_few_workers_multi_loader(_, tmpdir, ckpt_path, stage): """ Test that error is raised if dataloader with only a few workers is used """ model = EvalModelTemplate() model.training_step = model.training_step__multiple_dataloaders model.validation_step = model.validation_step__multiple_dataloaders model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders model.test_step = model.test_step__multiple_dataloaders model.test_epoch_end = model.test_epoch_end__multiple_dataloaders val_dl = model.dataloader(train=False) val_dl.num_workers = 0 train_dl = model.dataloader(train=False) train_dl.num_workers = 0 train_multi_dl = {'a': train_dl, 'b': train_dl} val_multi_dl = [val_dl, val_dl] test_multi_dl = [train_dl, train_dl] trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2, ) with pytest.warns( UserWarning, match= f'The dataloader, {stage} dataloader{" 0" if stage != "train" else ""}, does not have many workers' ): if stage == 'test': ckpt_path = trainer.checkpoint_callback.best_model_path if ckpt_path == 'specific' else ckpt_path trainer.test(model, test_dataloaders=test_multi_dl, ckpt_path=ckpt_path) else: trainer.fit(model, train_dataloader=train_multi_dl, val_dataloaders=val_multi_dl)
def test_dm_reload_dataloaders_every_epoch(tmpdir): """Test datamodule, where trainer argument reload_dataloaders_every_epoch is set to True/False""" dm = CustomMNISTDataModule(tmpdir) model = EvalModelTemplate() model.validation_step = None model.validation_step_end = None model.validation_epoch_end = None model.test_step = None model.test_step_end = None model.test_epoch_end = None trainer = Trainer( default_root_dir=tmpdir, max_epochs=2, limit_train_batches=0.01, reload_dataloaders_every_epoch=True, ) trainer.fit(model, dm)
def test_train_loop_only(tmpdir): dm = TrialMNISTDataModule(tmpdir) model = EvalModelTemplate() model.validation_step = None model.validation_step_end = None model.validation_epoch_end = None model.test_step = None model.test_step_end = None model.test_epoch_end = None trainer = Trainer( default_root_dir=tmpdir, max_epochs=3, weights_summary=None, ) # fit model result = trainer.fit(model, dm) assert result == 1 assert trainer.logger_connector.callback_metrics['loss'] < 0.6