def test_batch_size_smaller_than_num_gpus(tmpdir): # we need at least 3 gpus for this test num_gpus = 3 batch_size = 3 class CurrentTestModel(EvalModelTemplate): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # batch norm doesn't work with batch size 1, we replace it self.c_d1_bn = torch.nn.ReLU() def training_step(self, *args, **kwargs): output = super().training_step(*args, **kwargs) loss = output['loss'] # we make sure to add some metrics to the output dict, # this is essential for this test output['progress_bar'] = {'train_loss': loss} return output def train_dataloader(self): dataloader = super().train_dataloader() # construct a dataset with a size that is not divisible by num_gpus # therefore the last batch will have a size < num_gpus size = num_gpus * batch_size + (num_gpus - 1) dataset = Subset(dataloader.dataset, range(size)) dataloader = DataLoader( dataset, batch_size=self.batch_size, drop_last=False, ) return dataloader hparams = EvalModelTemplate.get_default_hparams() hparams['batch_size'] = batch_size model = CurrentTestModel(**hparams) trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_train_batches=0.1, limit_val_batches=0, gpus=num_gpus, ) # we expect the reduction for the metrics also to happen on the last batch # where we will get fewer metrics than gpus trainer.fit(model) assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
def test_lbfgs_cpu_model(tmpdir): """Test each of the trainer options.""" trainer_options = dict( default_root_dir=tmpdir, max_epochs=1, progress_bar_refresh_rate=0, weights_summary='top', limit_train_batches=0.2, limit_val_batches=0.2, ) hparams = EvalModelTemplate.get_default_hparams() hparams.update(optimizer_name='lbfgs', learning_rate=0.004) model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__lbfgs tpipes.run_model_test_without_loggers(trainer_options, model, min_acc=0.25)
def test_loading_meta_tags(tmpdir): hparams = EvalModelTemplate.get_default_hparams() # save tags logger = tutils.get_default_logger(tmpdir) logger.log_hyperparams(Namespace(some_str='a_str', an_int=1, a_float=2.0)) logger.log_hyperparams(hparams) logger.save() # load tags path_expt_dir = tutils.get_data_path(logger, path_dir=tmpdir) tags_path = os.path.join(path_expt_dir, 'meta_tags.csv') tags = load_hparams_from_tags_csv(tags_path) assert tags.batch_size == 32 and tags.hidden_dim == 1000
def test_trainer_arg_str(tmpdir): hparams = EvalModelTemplate.get_default_hparams() hparams.__dict__['my_fancy_lr'] = 1.0 # update with non-standard field model = EvalModelTemplate(hparams) before_lr = hparams.my_fancy_lr # logger file to get meta trainer = Trainer(default_save_path=tmpdir, max_epochs=1, auto_lr_find='my_fancy_lr') trainer.fit(model) after_lr = model.hparams.my_fancy_lr assert before_lr != after_lr, \ 'Learning rate was not altered after running learning rate finder'
def test_suggestion_with_non_finite_values(tmpdir): """ Test that non-finite values does not alter results """ hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(hparams) # logger file to get meta trainer = Trainer(default_save_path=tmpdir, max_epochs=3) lrfinder = trainer.lr_find(model) before_lr = lrfinder.suggestion() lrfinder.results['loss'][-1] = float('nan') after_lr = lrfinder.suggestion() assert before_lr == after_lr, \ 'Learning rate was altered because of non-finite loss values'
def test_auto_scale_batch_size_trainer_arg(tmpdir, scale_arg): """ Test possible values for 'batch size auto scaling' Trainer argument. """ tutils.reset_seed() hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) before_batch_size = hparams.get('batch_size') trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, auto_scale_batch_size=scale_arg, gpus=1) trainer.tune(model) after_batch_size = model.batch_size assert before_batch_size != after_batch_size, \ 'Batch size was not altered after running auto scaling of batch size' assert not os.path.exists(tmpdir / 'scale_batch_size_temp_model.ckpt')
def test_disabled_validation(tmpdir): """Verify that `limit_val_batches=0` disables the validation loop unless `fast_dev_run=True`.""" class CurrentModel(EvalModelTemplate): validation_step_invoked = False validation_epoch_end_invoked = False def validation_step(self, *args, **kwargs): self.validation_step_invoked = True return super().validation_step(*args, **kwargs) def validation_epoch_end(self, *args, **kwargs): self.validation_epoch_end_invoked = True return super().validation_epoch_end(*args, **kwargs) hparams = EvalModelTemplate.get_default_hparams() model = CurrentModel(**hparams) trainer_options = dict( default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=2, limit_train_batches=0.4, limit_val_batches=0.0, fast_dev_run=False, ) trainer = Trainer(**trainer_options) result = trainer.fit(model) # check that limit_val_batches=0 turns off validation assert result == 1, "training failed to complete" assert trainer.current_epoch == 1 assert not model.validation_step_invoked, "`validation_step` should not run when `limit_val_batches=0`" assert not model.validation_epoch_end_invoked, "`validation_epoch_end` should not run when `limit_val_batches=0`" # check that limit_val_batches has no influence when fast_dev_run is turned on model = CurrentModel(**hparams) trainer_options.update(fast_dev_run=True) trainer = Trainer(**trainer_options) result = trainer.fit(model) assert result == 1, "training failed to complete" assert trainer.current_epoch == 0 assert model.validation_step_invoked, "did not run `validation_step` with `fast_dev_run=True`" assert model.validation_epoch_end_invoked, "did not run `validation_epoch_end` with `fast_dev_run=True`"
def test_configure_optimizer_from_dict(tmpdir): """Tests if `configure_optimizer` method could return a dictionary with `optimizer` field only.""" class CurrentModel(EvalModelTemplate): def configure_optimizers(self): config = { 'optimizer': torch.optim.SGD(params=self.parameters(), lr=1e-03) } return config hparams = EvalModelTemplate.get_default_hparams() model = CurrentModel(hparams) # fit model trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) result = trainer.fit(model) assert result == 1
def test_reduce_lr_on_plateau_scheduling_missing_monitor(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__reduce_lr_on_plateau # fit model trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2, ) m = '.*ReduceLROnPlateau requires returning a dict from configure_optimizers.*' with pytest.raises(MisconfigurationException, match=m): trainer.fit(model)
def test_trainer_arg_bool(tmpdir): """ Test that setting trainer arg to bool works """ hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) before_lr = hparams.get('learning_rate') # logger file to get meta trainer = Trainer( default_root_dir=tmpdir, max_epochs=2, auto_lr_find=True, ) trainer.fit(model) after_lr = model.learning_rate assert before_lr != after_lr, \ 'Learning rate was not altered after running learning rate finder'
def test_lbfgs_cpu_model(tmpdir): """Test each of the trainer options.""" trainer_options = dict( default_root_dir=tmpdir, max_epochs=2, progress_bar_refresh_rate=0, weights_summary='top', train_percent_check=1.0, val_percent_check=0.2, ) hparams = EvalModelTemplate.get_default_hparams() setattr(hparams, 'optimizer_name', 'lbfgs') setattr(hparams, 'learning_rate', 0.002) model = EvalModelTemplate(hparams) model.configure_optimizers = model.configure_optimizers__lbfgs tutils.run_model_test_without_loggers(trainer_options, model, min_acc=0.5)
def test_none_optimizer(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__empty # fit model trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2, ) result = trainer.fit(model) # verify training completed assert result == 1
def test_loading_yaml(tmpdir): tutils.reset_seed() hparams = EvalModelTemplate.get_default_hparams() # save tags logger = tutils.get_default_logger(tmpdir) logger.log_hyperparams(Namespace(some_str='a_str', an_int=1, a_float=2.0)) logger.log_hyperparams(hparams) logger.save() # load hparams path_expt_dir = tutils.get_data_path(logger, path_dir=tmpdir) hparams_path = os.path.join(path_expt_dir, 'hparams.yaml') tags = load_hparams_from_yaml(hparams_path) assert tags['batch_size'] == 32 and tags['hidden_dim'] == 1000
def test_test_checkpoint_path(tmpdir, ckpt_path, save_top_k): hparams = EvalModelTemplate.get_default_hparams() loaded_checkpoint_path = '' class TestBestModel(EvalModelTemplate): @classmethod def load_from_checkpoint(cls, checkpoint_path, *args, **kwargs): nonlocal loaded_checkpoint_path loaded_checkpoint_path = checkpoint_path return super().load_from_checkpoint(checkpoint_path, *args, **kwargs) model = TestBestModel(**hparams) trainer = Trainer( max_epochs=2, progress_bar_refresh_rate=0, default_root_dir=tmpdir, checkpoint_callback=ModelCheckpoint(save_top_k=save_top_k), ) trainer.fit(model) if ckpt_path == 'best': # ckpt_path is 'best', meaning we load the best weights if save_top_k <= 0: with pytest.raises(MisconfigurationException, match='.*is not configured to save the best.*'): trainer.test(ckpt_path=ckpt_path) else: trainer.test(ckpt_path=ckpt_path) assert loaded_checkpoint_path == trainer.checkpoint_callback.best_model_path elif ckpt_path is None: # ckpt_path is None, meaning we don't load any checkpoints and # use the weights from the end of training trainer.test(ckpt_path=ckpt_path) assert loaded_checkpoint_path == '' else: # specific checkpoint, pick one from saved ones if save_top_k == 0: with pytest.raises(FileNotFoundError): trainer.test(ckpt_path='random.ckpt') else: ckpt_path = str( list((Path(tmpdir) / 'lightning_logs/version_0/checkpoints').iterdir())[0]) trainer.test(ckpt_path=ckpt_path) assert loaded_checkpoint_path == ckpt_path
def test_custom_logger(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) logger = CustomLogger() trainer = Trainer( max_epochs=1, limit_train_batches=0.05, logger=logger, default_root_dir=tmpdir, ) result = trainer.fit(model) assert result == 1, "Training failed" assert logger.hparams_logged == hparams assert logger.metrics_logged != {} assert logger.finalized_status == "success"
def test_hparams_save_load(tmpdir): model = EvalModelTemplate(vars(EvalModelTemplate.get_default_hparams())) trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, ) # fit model result = trainer.fit(model) assert result == 1 # try to load the model now pretrained_model = tutils.load_model_from_checkpoint( trainer.checkpoint_callback.dirpath, module_class=EvalModelTemplate ) assert pretrained_model
def test_none_optimizer(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__empty # fit model trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2, ) trainer.fit(model) # verify training completed assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
def test_load_model_from_checkpoint(tmpdir): """Verify test() on pretrained model.""" hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) trainer_options = dict( progress_bar_refresh_rate=0, max_epochs=2, limit_train_batches=0.4, limit_val_batches=0.2, checkpoint_callback=ModelCheckpoint(tmpdir, save_top_k=-1), default_root_dir=tmpdir, ) # fit model trainer = Trainer(**trainer_options) result = trainer.fit(model) trainer.test(ckpt_path=None) # correct result and ok accuracy assert result == 1, 'training failed to complete' # load last checkpoint last_checkpoint = sorted( glob.glob(os.path.join(trainer.checkpoint_callback.dirpath, "*.ckpt")))[-1] pretrained_model = EvalModelTemplate.load_from_checkpoint(last_checkpoint) # test that hparams loaded correctly for k, v in hparams.items(): assert getattr(pretrained_model, k) == v # assert weights are the same for (old_name, old_p), (new_name, new_p) in zip(model.named_parameters(), pretrained_model.named_parameters()): assert torch.all(torch.eq( old_p, new_p)), 'loaded weights are not the same as the saved weights' new_trainer = Trainer(**trainer_options) new_trainer.test(pretrained_model) # test we have good test accuracy tutils.assert_ok_model_acc(new_trainer)
def test_reduce_lr_on_plateau_scheduling(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__reduce_lr_on_plateau # fit model trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2) results = trainer.fit(model) assert results == 1 assert trainer.lr_schedulers[0] == \ dict(scheduler=trainer.lr_schedulers[0]['scheduler'], monitor='val_loss', interval='epoch', frequency=1, reduce_on_plateau=True), \ 'lr schduler was not correctly converted to dict'
def test_call_to_trainer_method(tmpdir, optimizer): """Test that directly calling the trainer method works""" hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) if optimizer == "adagrad": model.configure_optimizers = model.configure_optimizers__adagrad before_lr = hparams.get("learning_rate") # logger file to get meta trainer = Trainer(default_root_dir=tmpdir, max_epochs=2) lrfinder = trainer.tuner.lr_find(model, mode="linear") after_lr = lrfinder.suggestion() model.learning_rate = after_lr trainer.tune(model) assert before_lr != after_lr, "Learning rate was not altered after running learning rate finder"
def test_call_to_trainer_method(tmpdir, scale_method): """Test that calling the trainer method itself works.""" tutils.reset_seed() hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) before_batch_size = hparams.get("batch_size") # logger file to get meta trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) after_batch_size = trainer.tuner.scale_batch_size(model, mode=scale_method, max_trials=5) model.batch_size = after_batch_size trainer.fit(model) assert before_batch_size != after_batch_size, "Batch size was not altered after running auto scaling of batch size"
def test_wrong_train_setting(tmpdir): """ * Test that an error is thrown when no `train_dataloader()` is defined * Test that an error is thrown when no `training_step()` is defined """ tutils.reset_seed() hparams = EvalModelTemplate.get_default_hparams() trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) with pytest.raises(MisconfigurationException): model = EvalModelTemplate(**hparams) model.train_dataloader = None trainer.fit(model) with pytest.raises(MisconfigurationException): model = EvalModelTemplate(**hparams) model.training_step = None trainer.fit(model)
def test_suggestion_parameters_work(tmpdir): """ Test that default skipping does not alter results in basic case """ hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) # logger file to get meta trainer = Trainer( default_root_dir=tmpdir, max_epochs=3, ) lrfinder = trainer.tuner.lr_find(model) lr1 = lrfinder.suggestion(skip_begin=10) # default lr2 = lrfinder.suggestion(skip_begin=80) # way too high, should have an impact assert lr1 != lr2, \ 'Skipping parameter did not influence learning rate'
def test_running_state_during_test(tmpdir): """ Tests that state is set to RUNNING during test """ hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) snapshot_callback = StateSnapshotCallback( snapshot_method='on_test_batch_start') trainer = Trainer( callbacks=[snapshot_callback], default_root_dir=tmpdir, fast_dev_run=True, ) trainer.test(model) assert snapshot_callback.trainer_state == TrainerState.RUNNING
def test_test_loop_config(tmpdir): """" When either test loop or test data are missing """ hparams = EvalModelTemplate.get_default_hparams() trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) # has test loop but no test data with pytest.warns(UserWarning): model = EvalModelTemplate(**hparams) model.test_dataloader = None trainer.test(model) # has test data but no test loop with pytest.warns(UserWarning): model = EvalModelTemplate(**hparams) model.test_step = None trainer.test(model, test_dataloaders=model.dataloader(train=False))
def test_logger_reset_correctly(tmpdir): """ Test that logger is updated correctly """ tutils.reset_seed() hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(hparams) trainer = Trainer(default_save_path=tmpdir, max_epochs=10, auto_lr_find=True) logger1 = trainer.logger trainer.fit(model) logger2 = trainer.logger logger3 = model.logger assert logger1 == logger2, \ 'Learning rate finder altered the logger of trainer' assert logger2 == logger3, \ 'Learning rate finder altered the logger of model'
def test_trainer_arg_bool(tmpdir, use_hparams): """Test that setting trainer arg to bool works""" hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) before_lr = hparams.get("learning_rate") if use_hparams: del model.learning_rate model.configure_optimizers = model.configure_optimizers__lr_from_hparams # logger file to get meta trainer = Trainer(default_root_dir=tmpdir, max_epochs=2, auto_lr_find=True) trainer.tune(model) if use_hparams: after_lr = model.hparams.learning_rate else: after_lr = model.learning_rate assert before_lr != after_lr, "Learning rate was not altered after running learning rate finder"
def test_interrupt_state_on_keyboard_interrupt(tmpdir, extra_params): """ Tests that state is set to INTERRUPTED on KeyboardInterrupt """ hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) class InterruptCallback(Callback): def __init__(self): super().__init__() def on_batch_start(self, trainer, pl_module): raise KeyboardInterrupt trainer = Trainer(callbacks=[InterruptCallback()], default_root_dir=tmpdir, **extra_params) trainer.fit(model) assert trainer.state == TrainerState.INTERRUPTED
def test_trainer_arg(tmpdir, scale_arg): """ Check that trainer arg works with bool input. """ tutils.reset_seed() hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(hparams) before_batch_size = hparams.batch_size # logger file to get meta trainer = Trainer( default_save_path=tmpdir, max_epochs=1, auto_scale_batch_size=scale_arg, ) trainer.fit(model) after_batch_size = model.hparams.batch_size assert before_batch_size != after_batch_size, \ 'Batch size was not altered after running auto scaling of batch size'
def test_val_loop_config(tmpdir): """" When either val loop or val data are missing raise warning """ tutils.reset_seed() hparams = EvalModelTemplate.get_default_hparams() trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) # no val data has val loop with pytest.warns(UserWarning): model = EvalModelTemplate(**hparams) model.validation_step = None trainer.fit(model) # has val loop but no val data with pytest.warns(UserWarning): model = EvalModelTemplate(**hparams) model.val_dataloader = None trainer.fit(model)