def test_single_gpu_model(tmpdir): """Make sure single GPU works (DP mode).""" trainer_options = dict(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.1, val_percent_check=0.1, gpus=1) model = EvalModelTemplate() tutils.run_model_test(trainer_options, model)
def test_cpu_model(tmpdir): """Make sure model trains on CPU.""" trainer_options = dict(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.4, val_percent_check=0.4) model = EvalModelTemplate() tutils.run_model_test(trainer_options, model, on_gpu=False)
def test_single_gpu_model(tmpdir, gpus): """Make sure single GPU works (DP mode).""" trainer_options = dict(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, limit_train_batches=0.1, limit_val_batches=0.1, gpus=gpus) model = EvalModelTemplate() tutils.run_model_test(trainer_options, model)
def test_multi_gpu_none_backend(tmpdir): """Make sure when using multiple GPUs the user can't use `distributed_backend = None`.""" trainer_options = dict(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.1, val_percent_check=0.1, gpus='-1') model = EvalModelTemplate() with pytest.warns(UserWarning): tutils.run_model_test(trainer_options, model)
def run_test_from_config(trainer_options): """Trains the default model with the given config.""" tutils.reset_seed() tutils.set_random_master_port() ckpt_path = trainer_options['default_root_dir'] trainer_options['checkpoint_callback'] = ModelCheckpoint(ckpt_path) model, hparams = tutils.get_default_model() tutils.run_model_test(trainer_options, model, version=0, with_hpc=False) # Horovod should be initialized following training. If not, this will raise an exception. assert hvd.size() == 2
def test_cpu_model_with_amp(tmpdir): """Make sure model trains on CPU.""" trainer_options = dict(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.4, val_percent_check=0.4, precision=16) model = EvalModelTemplate(tutils.get_default_hparams()) with pytest.raises((MisconfigurationException, ModuleNotFoundError)): tutils.run_model_test(trainer_options, model, on_gpu=False)
def test_amp_single_gpu(tmpdir): """Make sure DDP + AMP work.""" tutils.reset_seed() hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) trainer_options = dict(default_save_path=tmpdir, max_epochs=1, gpus=1, distributed_backend='ddp', precision=16) tutils.run_model_test(trainer_options, model)
def test_multi_gpu_none_backend(tmpdir): """Make sure when using multiple GPUs the user can't use `distributed_backend = None`.""" tutils.reset_seed() model, hparams = tutils.get_default_model() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, max_epochs=1, train_percent_check=0.1, val_percent_check=0.1, gpus='-1') with pytest.warns(UserWarning): tutils.run_model_test(trainer_options, model)
def test_all_features_cpu_model(tmpdir): """Test each of the trainer options.""" trainer_options = dict(default_root_dir=tmpdir, gradient_clip_val=1.0, overfit_pct=0.20, track_grad_norm=2, progress_bar_refresh_rate=0, accumulate_grad_batches=2, max_epochs=1, train_percent_check=0.4, val_percent_check=0.4) model = EvalModelTemplate() tutils.run_model_test(trainer_options, model, on_gpu=False)
def test_cpu_model(tmpdir): """Make sure model trains on CPU.""" tutils.reset_seed() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, logger=tutils.get_default_testtube_logger(tmpdir), max_epochs=1, train_percent_check=0.4, val_percent_check=0.4) model, hparams = tutils.get_default_model() tutils.run_model_test(trainer_options, model, on_gpu=False)
def test_amp_gpu_ddp(tmpdir): """Make sure DDP + AMP work.""" tutils.reset_seed() tutils.set_random_master_port() hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) trainer_options = dict(default_root_dir=tmpdir, max_epochs=1, gpus=2, distributed_backend='ddp', precision=16) tutils.run_model_test(trainer_options, model)
def test_multi_cpu_model_ddp(tmpdir): """Make sure DDP works.""" tutils.set_random_master_port() trainer_options = dict(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, train_percent_check=0.4, val_percent_check=0.2, gpus=None, num_processes=2, distributed_backend='ddp_cpu') model = EvalModelTemplate() tutils.run_model_test(trainer_options, model, on_gpu=False)
def test_multi_gpu_model_ddp2(tmpdir): """Make sure DDP2 works.""" tutils.reset_seed() tutils.set_random_master_port() model, hparams = tutils.get_default_model() trainer_options = dict(default_save_path=tmpdir, max_epochs=1, train_percent_check=0.4, val_percent_check=0.2, gpus=2, weights_summary=None, distributed_backend='ddp2') tutils.run_model_test(trainer_options, model)
def test_multi_gpu_model_ddp(tmpdir): """Make sure DDP works.""" tutils.reset_seed() tutils.set_random_master_port() model, hparams = tutils.get_default_model() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, max_epochs=1, train_percent_check=0.4, val_percent_check=0.2, gpus=[0, 1], distributed_backend='ddp') tutils.run_model_test(trainer_options, model)
def test_cpu_model_with_amp(tmpdir): """Make sure model trains on CPU.""" tutils.reset_seed() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, logger=tutils.get_default_testtube_logger(tmpdir), max_epochs=1, train_percent_check=0.4, val_percent_check=0.4, precision=16) model, hparams = tutils.get_default_model() with pytest.raises((MisconfigurationException, ModuleNotFoundError)): tutils.run_model_test(trainer_options, model, on_gpu=False)
def run_test_from_config(trainer_options): """Trains the default model with the given config.""" set_random_master_port() ckpt_path = trainer_options['default_root_dir'] trainer_options.update(checkpoint_callback=ModelCheckpoint(ckpt_path)) model = EvalModelTemplate() run_model_test(trainer_options, model, on_gpu=args.on_gpu, version=0, with_hpc=False) # Horovod should be initialized following training. If not, this will raise an exception. assert hvd.size() == 2 if args.on_gpu: trainer = Trainer(gpus=1, distributed_backend='horovod', max_epochs=1) # Test the root_gpu property assert trainer.root_gpu == hvd.local_rank()
def test_multi_gpu_model_dp(tmpdir): """Make sure DP works.""" tutils.reset_seed() model, hparams = tutils.get_default_model() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, distributed_backend='dp', max_epochs=1, train_percent_check=0.1, val_percent_check=0.1, gpus='-1') tutils.run_model_test(trainer_options, model) # test memory helper functions memory.get_memory_profile('min_max')
def test_all_features_cpu_model(tmpdir): """Test each of the trainer options.""" tutils.reset_seed() trainer_options = dict(default_save_path=tmpdir, gradient_clip_val=1.0, overfit_pct=0.20, track_grad_norm=2, show_progress_bar=False, logger=tutils.get_default_testtube_logger(tmpdir), accumulate_grad_batches=2, max_epochs=1, train_percent_check=0.4, val_percent_check=0.4) model, hparams = tutils.get_default_model() tutils.run_model_test(trainer_options, model, on_gpu=False)
def test_single_gpu_model(tmpdir): """Make sure single GPU works (DP mode).""" tutils.reset_seed() if not torch.cuda.is_available(): warnings.warn('test_single_gpu_model cannot run.' ' Rerun on a GPU node to run this test') return model, hparams = tutils.get_default_model() trainer_options = dict(default_save_path=tmpdir, show_progress_bar=False, max_epochs=1, train_percent_check=0.1, val_percent_check=0.1, gpus=1) tutils.run_model_test(trainer_options, model)
def test_early_stopping_cpu_model(tmpdir): """Test each of the trainer options.""" stopping = EarlyStopping(monitor='val_loss', min_delta=0.1) trainer_options = dict( default_root_dir=tmpdir, early_stop_callback=stopping, gradient_clip_val=1.0, overfit_pct=0.20, track_grad_norm=2, train_percent_check=0.1, val_percent_check=0.1, ) model = EvalModelTemplate() tutils.run_model_test(trainer_options, model, on_gpu=False) # test freeze on cpu model.freeze() model.unfreeze()
def test_amp_gpu_ddp(tmpdir): """Make sure DDP + AMP work.""" if not tutils.can_run_gpu_test(): return tutils.reset_seed() tutils.set_random_master_port() hparams = tutils.get_default_hparams() model = LightningTestModel(hparams) trainer_options = dict(default_save_path=tmpdir, show_progress_bar=True, max_epochs=1, gpus=2, distributed_backend='ddp', precision=16) tutils.run_model_test(trainer_options, model)
def test_early_stopping_cpu_model(tmpdir): """Test each of the trainer options.""" tutils.reset_seed() stopping = EarlyStopping(monitor='val_loss', min_delta=0.1) trainer_options = dict( default_save_path=tmpdir, early_stop_callback=stopping, gradient_clip_val=1.0, overfit_pct=0.20, track_grad_norm=2, logger=tutils.get_default_testtube_logger(tmpdir), train_percent_check=0.1, val_percent_check=0.1, ) model, hparams = tutils.get_default_model() tutils.run_model_test(trainer_options, model, on_gpu=False) # test freeze on cpu model.freeze() model.unfreeze()