def test_multi_optimizer_with_scheduling_stepping(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__multiple_schedulers # fit model trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2) results = trainer.fit(model) assert results == 1 init_lr = hparams.get('learning_rate') adjusted_lr1 = [pg['lr'] for pg in trainer.optimizers[0].param_groups] adjusted_lr2 = [pg['lr'] for pg in trainer.optimizers[1].param_groups] assert len(trainer.lr_schedulers) == 2, \ 'all lr scheduler not initialized properly' assert all(a == adjusted_lr1[0] for a in adjusted_lr1), \ 'lr not equally adjusted for all param groups for optimizer 1' adjusted_lr1 = adjusted_lr1[0] assert all(a == adjusted_lr2[0] for a in adjusted_lr2), \ 'lr not equally adjusted for all param groups for optimizer 2' adjusted_lr2 = adjusted_lr2[0] # Called ones after end of epoch assert init_lr * 0.1 ** 1 == adjusted_lr1, \ 'lr for optimizer 1 not adjusted correctly' # Called every 3 steps, meaning for 1 epoch of 11 batches, it is called 3 times assert init_lr * 0.1 == adjusted_lr2, \ 'lr for optimizer 2 not adjusted correctly'
def test_trainer_arg_str(tmpdir, use_hparams): """ Test that setting trainer arg to string works """ hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) model.my_fancy_lr = 1.0 # update with non-standard field model.hparams['my_fancy_lr'] = 1.0 before_lr = model.my_fancy_lr if use_hparams: del model.my_fancy_lr model.configure_optimizers = model.configure_optimizers__lr_from_hparams # logger file to get meta trainer = Trainer( default_root_dir=tmpdir, max_epochs=2, auto_lr_find='my_fancy_lr', ) trainer.tune(model) if use_hparams: after_lr = model.hparams.my_fancy_lr else: after_lr = model.my_fancy_lr assert before_lr != after_lr, \ 'Learning rate was not altered after running learning rate finder'
def test_optimizer_with_scheduling(tmpdir): """ Verify that learning rate scheduling is working """ hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__single_scheduler # fit model trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2) results = trainer.fit(model) assert results == 1 init_lr = hparams.get('learning_rate') adjusted_lr = [pg['lr'] for pg in trainer.optimizers[0].param_groups] assert len(trainer.lr_schedulers) == 1, \ 'lr scheduler not initialized properly, it has %i elements instread of 1' % len(trainer.lr_schedulers) assert all(a == adjusted_lr[0] for a in adjusted_lr), \ 'Lr not equally adjusted for all param groups' adjusted_lr = adjusted_lr[0] assert init_lr * 0.1 == adjusted_lr, \ 'Lr not adjusted correctly, expected %f but got %f' % (init_lr * 0.1, adjusted_lr)
def test_lr_logger_multi_lrs(tmpdir): """ Test that learning rates are extracted and logged for multi lr schedulers. """ tutils.reset_seed() model = EvalModelTemplate() model.configure_optimizers = model.configure_optimizers__multiple_schedulers lr_logger = LearningRateLogger() trainer = Trainer( default_root_dir=tmpdir, max_epochs=2, limit_val_batches=0.1, limit_train_batches=0.5, callbacks=[lr_logger], ) result = trainer.fit(model) assert result assert lr_logger.lrs, 'No learning rates logged' assert len(lr_logger.lrs) == len(trainer.lr_schedulers), \ 'Number of learning rates logged does not match number of lr schedulers' assert all([k in ['lr-Adam', 'lr-Adam-1'] for k in lr_logger.lrs.keys()]), \ 'Names of learning rates not set correctly' assert all(len(lr) == trainer.max_epochs for k, lr in lr_logger.lrs.items()), \ 'Length of logged learning rates exceeds the number of epochs'
def test_multi_optimizer_with_scheduling(tmpdir): """ Verify that learning rate scheduling is working """ hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(hparams) model.configure_optimizers = model.configure_optimizers__multiple_schedulers # fit model trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, val_percent_check=0.1, train_percent_check=0.2) results = trainer.fit(model) assert results == 1 init_lr = hparams.learning_rate adjusted_lr1 = [pg['lr'] for pg in trainer.optimizers[0].param_groups] adjusted_lr2 = [pg['lr'] for pg in trainer.optimizers[1].param_groups] assert len(trainer.lr_schedulers) == 2, \ 'all lr scheduler not initialized properly, it has %i elements instread of 1' % len(trainer.lr_schedulers) assert all(a == adjusted_lr1[0] for a in adjusted_lr1), \ 'Lr not equally adjusted for all param groups for optimizer 1' adjusted_lr1 = adjusted_lr1[0] assert all(a == adjusted_lr2[0] for a in adjusted_lr2), \ 'Lr not equally adjusted for all param groups for optimizer 2' adjusted_lr2 = adjusted_lr2[0] assert init_lr * 0.1 == adjusted_lr1 and init_lr * 0.1 == adjusted_lr2, \ 'Lr not adjusted correctly, expected %f but got %f' % (init_lr * 0.1, adjusted_lr1)
def test_lr_monitor_multi_lrs(tmpdir, logging_interval): """ Test that learning rates are extracted and logged for multi lr schedulers. """ tutils.reset_seed() model = EvalModelTemplate() model.configure_optimizers = model.configure_optimizers__multiple_schedulers lr_monitor = LearningRateMonitor(logging_interval=logging_interval) trainer = Trainer( default_root_dir=tmpdir, max_epochs=2, limit_val_batches=0.1, limit_train_batches=0.5, callbacks=[lr_monitor], ) result = trainer.fit(model) assert result assert lr_monitor.lrs, 'No learning rates logged' assert len(lr_monitor.lrs) == len(trainer.lr_schedulers), \ 'Number of learning rates logged does not match number of lr schedulers' assert all([k in ['lr-Adam', 'lr-Adam-1'] for k in lr_monitor.lrs.keys()]), \ 'Names of learning rates not set correctly' if logging_interval == 'step': expected_number_logged = trainer.global_step if logging_interval == 'epoch': expected_number_logged = trainer.max_epochs assert all(len(lr) == expected_number_logged for lr in lr_monitor.lrs.values()), \ 'Length of logged learning rates do not match the expected number'
def test_horovod_multi_optimizer_with_scheduling_stepping(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__multiple_schedulers num_workers = 8 init_lr = hparams.get('learning_rate') * num_workers with patch('pytorch_lightning.trainer.distrib_parts.hvd.size' ) as mock_hvd_size: mock_hvd_size.return_value = 8 # fit model trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.5, limit_train_batches=0.2, distributed_backend='horovod') results = trainer.fit(model) assert results == 1 adjusted_lr1 = [pg['lr'] for pg in trainer.optimizers[0].param_groups][0] adjusted_lr2 = [pg['lr'] for pg in trainer.optimizers[1].param_groups][0] # Called ones after end of epoch with gamma=0.1 assert pytest.approx(init_lr * 0.1) == adjusted_lr1 # Called every 3 steps, meaning for 1 epoch of 11 batches, it is called 3 times with gamma=0.1 assert pytest.approx(init_lr * 0.1) == adjusted_lr2
def test_lr_monitor_single_lr(tmpdir): """ Test that learning rates are extracted and logged for single lr scheduler. """ tutils.reset_seed() model = EvalModelTemplate() model.configure_optimizers = model.configure_optimizers__single_scheduler lr_monitor = LearningRateMonitor() trainer = Trainer( default_root_dir=tmpdir, max_epochs=2, limit_val_batches=0.1, limit_train_batches=0.5, callbacks=[lr_monitor], ) result = trainer.fit(model) assert result assert lr_monitor.lrs, 'No learning rates logged' assert all(v is None for v in lr_monitor.last_momentum_values.values()), \ 'Momentum should not be logged by default' assert len(lr_monitor.lrs) == len(trainer.lr_schedulers), \ 'Number of learning rates logged does not match number of lr schedulers' assert lr_monitor.lr_sch_names == list(lr_monitor.lrs.keys()) == ['lr-Adam'], \ 'Names of learning rates not set correctly'
def test_none_optimizer_warning(): trainer = Trainer() model = EvalModelTemplate(tutils.get_default_hparams()) model.configure_optimizers = lambda: None with pytest.warns(UserWarning, match='will run with no optimizer'): _, __, ___ = trainer.init_optimizers(model)
def test_unknown_configure_optimizers_raises(tmpdir): """ Test exception with an unsupported configure_optimizers return """ model = EvalModelTemplate() model.configure_optimizers = lambda: 1 trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) with pytest.raises(MisconfigurationException, match="Unknown configuration for model optimizers"): trainer.fit(model)
def test_none_optimizer_warning(): trainer = Trainer() model = EvalModelTemplate() model.configure_optimizers = model.configure_optimizers__empty with pytest.warns(UserWarning, match='will run with no optimizer'): _, __, ___ = trainer.init_optimizers(model)
def test_wrong_configure_optimizers(tmpdir): """ Test that an error is thrown when no `configure_optimizers()` is defined """ tutils.reset_seed() trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) with pytest.raises(MisconfigurationException): model = EvalModelTemplate() model.configure_optimizers = None trainer.fit(model)
def test_configure_optimizers_with_frequency(tmpdir): """ Test that multiple optimizers work when corresponding frequency is set. """ model = EvalModelTemplate() model.configure_optimizers = model.configure_optimizers__multiple_optimizers_frequency trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) trainer.fit(model) assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
def test_configure_optimizers_with_frequency(tmpdir): """ Test that multiple optimizers work when corresponding frequency is set. """ model = EvalModelTemplate() model.configure_optimizers = model.configure_optimizers__multiple_optimizers_frequency trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) result = trainer.fit(model) assert result
def test_error_on_more_than_1_optimizer(tmpdir): """ Check that error is thrown when more than 1 optimizer is passed """ model = EvalModelTemplate() model.configure_optimizers = model.configure_optimizers__multiple_schedulers # logger file to get meta trainer = Trainer(default_save_path=tmpdir, max_epochs=1) with pytest.raises(MisconfigurationException): trainer.lr_find(model)
def test_lr_scheduler_with_no_actual_scheduler_raises(tmpdir): """ Test exception when lr_scheduler dict has no scheduler """ model = EvalModelTemplate() model.configure_optimizers = lambda: { 'optimizer': torch.optim.Adam(model.parameters()), 'lr_scheduler': {}, } trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) with pytest.raises(MisconfigurationException, match='The lr scheduler dict must have the key "scheduler"'): trainer.fit(model)
def test_reducelronplateau_with_no_monitor_raises(tmpdir): """ Test exception when a ReduceLROnPlateau is used with no monitor """ model = EvalModelTemplate() optimizer = torch.optim.Adam(model.parameters()) model.configure_optimizers = lambda: ([optimizer], [torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer)]) trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) with pytest.raises( MisconfigurationException, match='`configure_optimizers` must include a monitor when a `ReduceLROnPlateau`' ): trainer.fit(model)
def test_backward_count_with_closure(torch_backward): """Using a closure (e.g. with LBFGS) should lead to no extra backward calls.""" model = EvalModelTemplate() model.configure_optimizers = model.configure_optimizers__lbfgs trainer = Trainer(max_steps=5) trainer.fit(model) assert torch_backward.call_count == 5 torch_backward.reset_mock() trainer = Trainer(max_steps=5, accumulate_grad_batches=2) trainer.fit(model) assert torch_backward.call_count == 10
def test_init_optimizers_during_testing(tmpdir): """ Test that optimizers is an empty list during testing. """ model = EvalModelTemplate() model.configure_optimizers = model.configure_optimizers__multiple_schedulers trainer = Trainer(default_root_dir=tmpdir, limit_test_batches=10) trainer.test(model, ckpt_path=None) assert len(trainer.lr_schedulers) == 0 assert len(trainer.optimizers) == 0 assert len(trainer.optimizer_frequencies) == 0
def test_lr_scheduler_strict(tmpdir): """ Test "strict" support in lr_scheduler dict """ model = EvalModelTemplate() optimizer = torch.optim.Adam(model.parameters()) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer) trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) model.configure_optimizers = lambda: { 'optimizer': optimizer, 'lr_scheduler': { 'scheduler': scheduler, 'monitor': 'giraffe', 'strict': True }, } with pytest.raises( MisconfigurationException, match= r'ReduceLROnPlateau conditioned on metric .* which is not available\. Available metrics are:', ): trainer.fit(model) model.configure_optimizers = lambda: { 'optimizer': optimizer, 'lr_scheduler': { 'scheduler': scheduler, 'monitor': 'giraffe', 'strict': False, }, } with pytest.warns( RuntimeWarning, match= r'ReduceLROnPlateau conditioned on metric .* which is not available but strict' ): trainer.fit(model)
def test_none_optimizer(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__empty # fit model trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2) result = trainer.fit(model) # verify training completed assert result == 1
def test_reducelronplateau_with_no_monitor_in_lr_scheduler_dict_raises(tmpdir): """ Test exception when lr_scheduler dict has a ReduceLROnPlateau with no monitor """ model = EvalModelTemplate() optimizer = torch.optim.Adam(model.parameters()) model.configure_optimizers = lambda: { 'optimizer': optimizer, 'lr_scheduler': { 'scheduler': torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer), }, } trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) with pytest.raises(MisconfigurationException, match='must include a monitor when a `ReduceLROnPlateau`'): trainer.fit(model)
def test_optimizer_return_options(): trainer = Trainer() model = EvalModelTemplate() # single optimizer opt_a = torch.optim.Adam(model.parameters(), lr=0.002) opt_b = torch.optim.SGD(model.parameters(), lr=0.002) scheduler_a = torch.optim.lr_scheduler.StepLR(opt_a, 10) scheduler_b = torch.optim.lr_scheduler.StepLR(opt_b, 10) # single optimizer model.configure_optimizers = lambda: opt_a optim, lr_sched, freq = trainer.init_optimizers(model) assert len(optim) == 1 and len(lr_sched) == 0 and len(freq) == 0 # opt tuple model.configure_optimizers = lambda: (opt_a, opt_b) optim, lr_sched, freq = trainer.init_optimizers(model) assert len(optim) == 2 and optim[0] == opt_a and optim[1] == opt_b assert len(lr_sched) == 0 and len(freq) == 0 # opt list model.configure_optimizers = lambda: [opt_a, opt_b] optim, lr_sched, freq = trainer.init_optimizers(model) assert len(optim) == 2 and optim[0] == opt_a and optim[1] == opt_b assert len(lr_sched) == 0 and len(freq) == 0 # opt tuple of 2 lists model.configure_optimizers = lambda: ([opt_a], [scheduler_a]) optim, lr_sched, freq = trainer.init_optimizers(model) assert len(optim) == 1 and len(lr_sched) == 1 and len(freq) == 0 assert optim[0] == opt_a assert lr_sched[0] == dict(scheduler=scheduler_a, interval='epoch', frequency=1, reduce_on_plateau=False, monitor='val_loss') # opt single dictionary model.configure_optimizers = lambda: {"optimizer": opt_a, "lr_scheduler": scheduler_a} optim, lr_sched, freq = trainer.init_optimizers(model) assert len(optim) == 1 and len(lr_sched) == 1 and len(freq) == 0 assert optim[0] == opt_a assert lr_sched[0] == dict(scheduler=scheduler_a, interval='epoch', frequency=1, reduce_on_plateau=False, monitor='val_loss') # opt multiple dictionaries with frequencies model.configure_optimizers = lambda: ( {"optimizer": opt_a, "lr_scheduler": scheduler_a, "frequency": 1}, {"optimizer": opt_b, "lr_scheduler": scheduler_b, "frequency": 5}, ) optim, lr_sched, freq = trainer.init_optimizers(model) assert len(optim) == 2 and len(lr_sched) == 2 and len(freq) == 2 assert optim[0] == opt_a assert lr_sched[0] == dict(scheduler=scheduler_a, interval='epoch', frequency=1, reduce_on_plateau=False, monitor='val_loss') assert freq == [1, 5]
def test_lbfgs_cpu_model(tmpdir): """Test each of the trainer options.""" trainer_options = dict( default_root_dir=tmpdir, max_epochs=1, progress_bar_refresh_rate=0, weights_summary='top', limit_train_batches=0.2, limit_val_batches=0.2, ) hparams = EvalModelTemplate.get_default_hparams() hparams.update(optimizer_name='lbfgs', learning_rate=0.004) model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__lbfgs tpipes.run_model_test_without_loggers(trainer_options, model, min_acc=0.25)
def test_reduce_lr_on_plateau_scheduling_missing_monitor(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__reduce_lr_on_plateau # fit model trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2, ) m = '.*ReduceLROnPlateau requires returning a dict from configure_optimizers.*' with pytest.raises(MisconfigurationException, match=m): trainer.fit(model)
def test_none_optimizer(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__empty # fit model trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2, ) trainer.fit(model) # verify training completed assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
def test_lbfgs_cpu_model(tmpdir): """Test each of the trainer options.""" trainer_options = dict( default_root_dir=tmpdir, max_epochs=2, progress_bar_refresh_rate=0, weights_summary='top', train_percent_check=1.0, val_percent_check=0.2, ) hparams = EvalModelTemplate.get_default_hparams() setattr(hparams, 'optimizer_name', 'lbfgs') setattr(hparams, 'learning_rate', 0.002) model = EvalModelTemplate(hparams) model.configure_optimizers = model.configure_optimizers__lbfgs tutils.run_model_test_without_loggers(trainer_options, model, min_acc=0.5)
def test_lr_scheduler_with_extra_keys_warns(tmpdir): """ Test warning when lr_scheduler dict has extra keys """ model = EvalModelTemplate() optimizer = torch.optim.Adam(model.parameters()) model.configure_optimizers = lambda: { 'optimizer': optimizer, 'lr_scheduler': { 'scheduler': torch.optim.lr_scheduler.StepLR(optimizer, 1), 'foo': 1, 'bar': 2, }, } trainer = Trainer(default_root_dir=tmpdir, fast_dev_run=True) with pytest.warns(RuntimeWarning, match=r'Found unsupported keys in the lr scheduler dict: \[.+\]'): trainer.fit(model)
def test_call_to_trainer_method(tmpdir, optimizer): """Test that directly calling the trainer method works""" hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) if optimizer == "adagrad": model.configure_optimizers = model.configure_optimizers__adagrad before_lr = hparams.get("learning_rate") # logger file to get meta trainer = Trainer(default_root_dir=tmpdir, max_epochs=2) lrfinder = trainer.tuner.lr_find(model, mode="linear") after_lr = lrfinder.suggestion() model.learning_rate = after_lr trainer.tune(model) assert before_lr != after_lr, "Learning rate was not altered after running learning rate finder"
def test_reduce_lr_on_plateau_scheduling(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) model.configure_optimizers = model.configure_optimizers__reduce_lr_on_plateau # fit model trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, limit_val_batches=0.1, limit_train_batches=0.2) results = trainer.fit(model) assert results == 1 assert trainer.lr_schedulers[0] == \ dict(scheduler=trainer.lr_schedulers[0]['scheduler'], monitor='val_loss', interval='epoch', frequency=1, reduce_on_plateau=True), \ 'lr schduler was not correctly converted to dict'