def test_optimizer_return_options(): tutils.reset_seed() trainer = Trainer() model, hparams = tutils.get_default_model() # single optimizer opt_a = torch.optim.Adam(model.parameters(), lr=0.002) opt_b = torch.optim.SGD(model.parameters(), lr=0.002) optim, lr_sched = trainer.init_optimizers(opt_a) assert len(optim) == 1 and len(lr_sched) == 0 # opt tuple opts = (opt_a, opt_b) optim, lr_sched = trainer.init_optimizers(opts) assert len(optim) == 2 and optim[0] == opts[0] and optim[1] == opts[1] assert len(lr_sched) == 0 # opt list opts = [opt_a, opt_b] optim, lr_sched = trainer.init_optimizers(opts) assert len(optim) == 2 and optim[0] == opts[0] and optim[1] == opts[1] assert len(lr_sched) == 0 # opt tuple of lists scheduler = torch.optim.lr_scheduler.StepLR(opt_a, 10) opts = ([opt_a], [scheduler]) optim, lr_sched = trainer.init_optimizers(opts) assert len(optim) == 1 and len(lr_sched) == 1 assert optim[0] == opts[0][0] and \ lr_sched[0] == dict(scheduler=scheduler, interval='epoch', frequency=1, reduce_on_plateau=False, monitor='val_loss')
def test_adding_step_key(tmpdir): logged_step = 0 def _validation_epoch_end(outputs): nonlocal logged_step logged_step += 1 return {"log": {"step": logged_step, "val_acc": logged_step / 10}} def _training_epoch_end(outputs): nonlocal logged_step logged_step += 1 return {"log": {"step": logged_step, "train_acc": logged_step / 10}} def _log_metrics_decorator(log_metrics_fn): def decorated(metrics, step): if "val_acc" in metrics: assert step == logged_step return log_metrics_fn(metrics, step) return decorated model, hparams = tutils.get_default_model() model.validation_epoch_end = _validation_epoch_end model.training_epoch_end = _training_epoch_end trainer = Trainer( max_epochs=4, default_root_dir=tmpdir, train_percent_check=0.001, val_percent_check=0.01, num_sanity_val_steps=0, ) trainer.logger.log_metrics = _log_metrics_decorator( trainer.logger.log_metrics) trainer.fit(model)
def test_optimizer_return_options(): tutils.reset_seed() trainer = Trainer() model, hparams = tutils.get_default_model() # single optimizer opt_a = torch.optim.Adam(model.parameters(), lr=0.002) opt_b = torch.optim.SGD(model.parameters(), lr=0.002) scheduler_a = torch.optim.lr_scheduler.StepLR(opt_a, 10) scheduler_b = torch.optim.lr_scheduler.StepLR(opt_b, 10) # single optimizer model.configure_optimizers = lambda: opt_a optim, lr_sched, freq = trainer.init_optimizers(model) assert len(optim) == 1 and len(lr_sched) == 0 and len(freq) == 0 # opt tuple model.configure_optimizers = lambda: (opt_a, opt_b) optim, lr_sched, freq = trainer.init_optimizers(model) assert len(optim) == 2 and optim[0] == opt_a and optim[1] == opt_b assert len(lr_sched) == 0 and len(freq) == 0 # opt list model.configure_optimizers = lambda: [opt_a, opt_b] optim, lr_sched, freq = trainer.init_optimizers(model) assert len(optim) == 2 and optim[0] == opt_a and optim[1] == opt_b assert len(lr_sched) == 0 and len(freq) == 0 # opt tuple of 2 lists model.configure_optimizers = lambda: ([opt_a], [scheduler_a]) optim, lr_sched, freq = trainer.init_optimizers(model) assert len(optim) == 1 and len(lr_sched) == 1 and len(freq) == 0 assert optim[0] == opt_a assert lr_sched[0] == dict(scheduler=scheduler_a, interval='epoch', frequency=1, reduce_on_plateau=False, monitor='val_loss') # opt single dictionary model.configure_optimizers = lambda: {"optimizer": opt_a, "lr_scheduler": scheduler_a} optim, lr_sched, freq = trainer.init_optimizers(model) assert len(optim) == 1 and len(lr_sched) == 1 and len(freq) == 0 assert optim[0] == opt_a assert lr_sched[0] == dict(scheduler=scheduler_a, interval='epoch', frequency=1, reduce_on_plateau=False, monitor='val_loss') # opt multiple dictionaries with frequencies model.configure_optimizers = lambda: ( {"optimizer": opt_a, "lr_scheduler": scheduler_a, "frequency": 1}, {"optimizer": opt_b, "lr_scheduler": scheduler_b, "frequency": 5}, ) optim, lr_sched, freq = trainer.init_optimizers(model) assert len(optim) == 2 and len(lr_sched) == 2 and len(freq) == 2 assert optim[0] == opt_a assert lr_sched[0] == dict(scheduler=scheduler_a, interval='epoch', frequency=1, reduce_on_plateau=False, monitor='val_loss') assert freq == [1, 5]
def test_none_optimizer_warning(): tutils.reset_seed() trainer = Trainer() model, hparams = tutils.get_default_model() model.configure_optimizers = lambda: None with pytest.warns(UserWarning, match='will run with no optimizer'): _, __, ___ = trainer.init_optimizers(model)
def _init_steps_model(): """private method for initializing a model with 5% train epochs""" tutils.reset_seed() model, _ = tutils.get_default_model() # define train epoch to 5% of data train_percent = 0.5 # get number of samples in 1 epoch num_train_samples = math.floor( len(model.train_dataloader()) * train_percent) trainer_options = dict(train_percent_check=train_percent, ) return model, trainer_options, num_train_samples
def test_cpu_model(tmpdir): """Make sure model trains on CPU.""" tutils.reset_seed() trainer_options = dict(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.4, val_percent_check=0.4) model, hparams = tutils.get_default_model() tutils.run_model_test(trainer_options, model, on_gpu=False)
def run_test_from_config(trainer_options): """Trains the default model with the given config.""" tutils.reset_seed() tutils.set_random_master_port() ckpt_path = trainer_options['default_root_dir'] trainer_options['checkpoint_callback'] = ModelCheckpoint(ckpt_path) model, hparams = tutils.get_default_model() tutils.run_model_test(trainer_options, model, version=0, with_hpc=False) # Horovod should be initialized following training. If not, this will raise an exception. assert hvd.size() == 2
def test_multi_gpu_none_backend(tmpdir): """Make sure when using multiple GPUs the user can't use `distributed_backend = None`.""" tutils.reset_seed() model, hparams = tutils.get_default_model() trainer_options = dict(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.1, val_percent_check=0.1, gpus='-1') with pytest.warns(UserWarning): tutils.run_model_test(trainer_options, model)
def test_single_gpu_model(tmpdir): """Make sure single GPU works (DP mode).""" tutils.reset_seed() model, hparams = tutils.get_default_model() trainer_options = dict(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.1, val_percent_check=0.1, gpus=1) tutils.run_model_test(trainer_options, model)
def test_cpu_model(tmpdir): """Make sure model trains on CPU.""" tutils.reset_seed() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, logger=tutils.get_default_testtube_logger(tmpdir), max_epochs=1, train_percent_check=0.4, val_percent_check=0.4) model, hparams = tutils.get_default_model() tutils.run_model_test(trainer_options, model, on_gpu=False)
def test_cpu_model_with_amp(tmpdir): """Make sure model trains on CPU.""" tutils.reset_seed() trainer_options = dict(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.4, val_percent_check=0.4, precision=16) model, hparams = tutils.get_default_model() with pytest.raises((MisconfigurationException, ModuleNotFoundError)): tutils.run_model_test(trainer_options, model, on_gpu=False)
def test_lbfgs_cpu_model(tmpdir): """Test each of the trainer options.""" tutils.reset_seed() trainer_options = dict( default_root_dir=tmpdir, max_epochs=2, progress_bar_refresh_rate=0, weights_summary='top', train_percent_check=1.0, val_percent_check=0.2, ) model, hparams = tutils.get_default_model(lbfgs=True) tutils.run_model_test_no_loggers(trainer_options, model, min_acc=0.5)
def test_multi_gpu_model_ddp(tmpdir): """Make sure DDP works.""" tutils.reset_seed() tutils.set_random_master_port() model, hparams = tutils.get_default_model() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, max_epochs=1, train_percent_check=0.4, val_percent_check=0.2, gpus=[0, 1], distributed_backend='ddp') tutils.run_model_test(trainer_options, model)
def test_all_features_cpu_model(tmpdir): """Test each of the trainer options.""" tutils.reset_seed() trainer_options = dict(default_root_dir=tmpdir, gradient_clip_val=1.0, overfit_pct=0.20, track_grad_norm=2, progress_bar_refresh_rate=0, accumulate_grad_batches=2, max_epochs=1, train_percent_check=0.4, val_percent_check=0.4) model, hparams = tutils.get_default_model() tutils.run_model_test(trainer_options, model, on_gpu=False)
def test_multi_cpu_model_ddp(tmpdir): """Make sure DDP works.""" tutils.reset_seed() tutils.set_random_master_port() model, hparams = tutils.get_default_model() trainer_options = dict(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.4, val_percent_check=0.2, gpus=None, num_processes=2, distributed_backend='ddp_cpu') tutils.run_model_test(trainer_options, model, on_gpu=False)
def test_amp_gpu_dp(tmpdir): """Make sure DP + AMP work.""" tutils.reset_seed() model, hparams = tutils.get_default_model() trainer_options = dict( default_save_path=tmpdir, max_epochs=1, gpus='0, 1', # test init with gpu string distributed_backend='dp', precision=16) trainer = Trainer(**trainer_options) result = trainer.fit(model) assert result == 1
def test_multi_gpu_model_ddp2(tmpdir): """Make sure DDP2 works.""" tutils.reset_seed() tutils.set_random_master_port() model, hparams = tutils.get_default_model() trainer_options = dict(default_save_path=tmpdir, max_epochs=1, train_percent_check=0.4, val_percent_check=0.2, gpus=2, weights_summary=None, distributed_backend='ddp2') tutils.run_model_test(trainer_options, model)
def test_multi_gpu_model_dp(tmpdir): """Make sure DP works.""" tutils.reset_seed() model, hparams = tutils.get_default_model() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, distributed_backend='dp', max_epochs=1, train_percent_check=0.1, val_percent_check=0.1, gpus='-1') tutils.run_model_test(trainer_options, model) # test memory helper functions memory.get_memory_profile('min_max')
def test_multi_gpu_none_backend(tmpdir): """Make sure when using multiple GPUs the user can't use `distributed_backend = None`.""" tutils.reset_seed() if not tutils.can_run_gpu_test(): return model, hparams = tutils.get_default_model() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, max_epochs=1, train_percent_check=0.1, val_percent_check=0.1, gpus='-1') with pytest.warns(UserWarning): tutils.run_model_test(trainer_options, model)
def test_single_gpu_model(tmpdir): """Make sure single GPU works (DP mode).""" tutils.reset_seed() if not torch.cuda.is_available(): warnings.warn('test_single_gpu_model cannot run.' ' Rerun on a GPU node to run this test') return model, hparams = tutils.get_default_model() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, max_epochs=1, train_percent_check=0.1, val_percent_check=0.1, gpus=1) tutils.run_model_test(trainer_options, model)
def test_amp_single_gpu(tmpdir, backend): """Make sure DP/DDP + AMP work.""" tutils.reset_seed() model, hparams = tutils.get_default_model() trainer_options = dict(default_root_dir=tmpdir, max_epochs=1, gpus=1, distributed_backend=backend, precision=16) # tutils.run_model_test(trainer_options, model) trainer = Trainer(**trainer_options) result = trainer.fit(model) assert result == 1
def test_amp_multi_gpu(tmpdir, backend): """Make sure DP/DDP + AMP work.""" tutils.reset_seed() tutils.set_random_master_port() model, hparams = tutils.get_default_model() trainer_options = dict( default_root_dir=tmpdir, max_epochs=1, # gpus=2, gpus='0, 1', # test init with gpu string distributed_backend=backend, precision=16) # tutils.run_model_test(trainer_options, model) trainer = Trainer(**trainer_options) result = trainer.fit(model) assert result
def test_ddp_all_dataloaders_passed_to_fit(tmpdir): """Make sure DDP works with dataloaders passed to fit()""" tutils.reset_seed() tutils.set_random_master_port() model, hparams = tutils.get_default_model() trainer = Trainer(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.4, val_percent_check=0.2, gpus=[0, 1], distributed_backend='ddp') result = trainer.fit(model, train_dataloader=model.train_dataloader(), val_dataloaders=model.val_dataloader()) assert result == 1, "DDP doesn't work with dataloaders passed to fit()."
def test_default_logger_callbacks_cpu_model(tmpdir): """Test each of the trainer options.""" tutils.reset_seed() trainer_options = dict( default_root_dir=tmpdir, max_epochs=1, gradient_clip_val=1.0, overfit_pct=0.20, progress_bar_refresh_rate=0, train_percent_check=0.01, val_percent_check=0.01, ) model, hparams = tutils.get_default_model() tutils.run_model_test_no_loggers(trainer_options, model) # test freeze on cpu model.freeze() model.unfreeze()
def test_ddp_all_dataloaders_passed_to_fit(tmpdir): """Make sure DDP works with dataloaders passed to fit()""" tutils.reset_seed() tutils.set_random_master_port() model, hparams = tutils.get_default_model() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, max_epochs=1, train_percent_check=0.4, val_percent_check=0.2, gpus=[0, 1], distributed_backend='ddp') fit_options = dict(train_dataloader=model.train_dataloader(), val_dataloaders=model.val_dataloader()) trainer = Trainer(**trainer_options) result = trainer.fit(model, **fit_options) assert result == 1, "DDP doesn't work with dataloaders passed to fit()."
def test_early_stopping_cpu_model(tmpdir): """Test each of the trainer options.""" tutils.reset_seed() stopping = EarlyStopping(monitor='val_loss', min_delta=0.1) trainer_options = dict( default_root_dir=tmpdir, early_stop_callback=stopping, gradient_clip_val=1.0, overfit_pct=0.20, track_grad_norm=2, train_percent_check=0.1, val_percent_check=0.1, ) model, hparams = tutils.get_default_model() tutils.run_model_test(trainer_options, model, on_gpu=False) # test freeze on cpu model.freeze() model.unfreeze()
def test_loggers_fit_test(tmpdir, monkeypatch, logger_class): """Verify that basic functionality of all loggers.""" tutils.reset_seed() # prevent comet logger from trying to print at exit, since # pytest's stdout/stderr redirection breaks it import atexit monkeypatch.setattr(atexit, 'register', lambda _: None) model, _ = tutils.get_default_model() class StoreHistoryLogger(logger_class): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.history = [] def log_metrics(self, metrics, step): super().log_metrics(metrics, step) self.history.append((step, metrics)) logger_args = _get_logger_args(logger_class, tmpdir) logger = StoreHistoryLogger(**logger_args) trainer = Trainer( max_epochs=1, logger=logger, train_percent_check=0.2, val_percent_check=0.5, fast_dev_run=True, ) trainer.fit(model) trainer.test() log_metric_names = [(s, sorted(m.keys())) for s, m in logger.history] assert log_metric_names == [(0, ['val_acc', 'val_loss']), (0, ['train_some_val']), (1, ['test_acc', 'test_loss'])]
def test_multi_gpu_model(tmpdir, backend): """Make sure DDP works.""" tutils.reset_seed() tutils.set_random_master_port() model, hparams = tutils.get_default_model() trainer_options = dict( default_root_dir=tmpdir, max_epochs=1, train_percent_check=0.4, val_percent_check=0.2, gpus=[0, 1], distributed_backend=backend, ) # tutils.run_model_test(trainer_options, model) trainer = Trainer(**trainer_options) result = trainer.fit(model) assert result # test memory helper functions memory.get_memory_profile('min_max')