Ejemplo n.º 1
0
def test_optimizer_return_options():
    tutils.reset_seed()

    trainer = Trainer()
    model, hparams = tutils.get_default_model()

    # single optimizer
    opt_a = torch.optim.Adam(model.parameters(), lr=0.002)
    opt_b = torch.optim.SGD(model.parameters(), lr=0.002)
    optim, lr_sched = trainer.init_optimizers(opt_a)
    assert len(optim) == 1 and len(lr_sched) == 0

    # opt tuple
    opts = (opt_a, opt_b)
    optim, lr_sched = trainer.init_optimizers(opts)
    assert len(optim) == 2 and optim[0] == opts[0] and optim[1] == opts[1]
    assert len(lr_sched) == 0

    # opt list
    opts = [opt_a, opt_b]
    optim, lr_sched = trainer.init_optimizers(opts)
    assert len(optim) == 2 and optim[0] == opts[0] and optim[1] == opts[1]
    assert len(lr_sched) == 0

    # opt tuple of lists
    scheduler = torch.optim.lr_scheduler.StepLR(opt_a, 10)
    opts = ([opt_a], [scheduler])
    optim, lr_sched = trainer.init_optimizers(opts)
    assert len(optim) == 1 and len(lr_sched) == 1
    assert optim[0] == opts[0][0] and \
        lr_sched[0] == dict(scheduler=scheduler, interval='epoch',
                            frequency=1, reduce_on_plateau=False,
                            monitor='val_loss')
Ejemplo n.º 2
0
def test_adding_step_key(tmpdir):
    logged_step = 0

    def _validation_epoch_end(outputs):
        nonlocal logged_step
        logged_step += 1
        return {"log": {"step": logged_step, "val_acc": logged_step / 10}}

    def _training_epoch_end(outputs):
        nonlocal logged_step
        logged_step += 1
        return {"log": {"step": logged_step, "train_acc": logged_step / 10}}

    def _log_metrics_decorator(log_metrics_fn):
        def decorated(metrics, step):
            if "val_acc" in metrics:
                assert step == logged_step
            return log_metrics_fn(metrics, step)

        return decorated

    model, hparams = tutils.get_default_model()
    model.validation_epoch_end = _validation_epoch_end
    model.training_epoch_end = _training_epoch_end
    trainer = Trainer(
        max_epochs=4,
        default_root_dir=tmpdir,
        train_percent_check=0.001,
        val_percent_check=0.01,
        num_sanity_val_steps=0,
    )
    trainer.logger.log_metrics = _log_metrics_decorator(
        trainer.logger.log_metrics)
    trainer.fit(model)
Ejemplo n.º 3
0
def test_optimizer_return_options():
    tutils.reset_seed()

    trainer = Trainer()
    model, hparams = tutils.get_default_model()

    # single optimizer
    opt_a = torch.optim.Adam(model.parameters(), lr=0.002)
    opt_b = torch.optim.SGD(model.parameters(), lr=0.002)
    scheduler_a = torch.optim.lr_scheduler.StepLR(opt_a, 10)
    scheduler_b = torch.optim.lr_scheduler.StepLR(opt_b, 10)

    # single optimizer
    model.configure_optimizers = lambda: opt_a
    optim, lr_sched, freq = trainer.init_optimizers(model)
    assert len(optim) == 1 and len(lr_sched) == 0 and len(freq) == 0

    # opt tuple
    model.configure_optimizers = lambda: (opt_a, opt_b)
    optim, lr_sched, freq = trainer.init_optimizers(model)
    assert len(optim) == 2 and optim[0] == opt_a and optim[1] == opt_b
    assert len(lr_sched) == 0 and len(freq) == 0

    # opt list
    model.configure_optimizers = lambda: [opt_a, opt_b]
    optim, lr_sched, freq = trainer.init_optimizers(model)
    assert len(optim) == 2 and optim[0] == opt_a and optim[1] == opt_b
    assert len(lr_sched) == 0 and len(freq) == 0

    # opt tuple of 2 lists
    model.configure_optimizers = lambda: ([opt_a], [scheduler_a])
    optim, lr_sched, freq = trainer.init_optimizers(model)
    assert len(optim) == 1 and len(lr_sched) == 1 and len(freq) == 0
    assert optim[0] == opt_a
    assert lr_sched[0] == dict(scheduler=scheduler_a, interval='epoch',
                               frequency=1, reduce_on_plateau=False, monitor='val_loss')

    # opt single dictionary
    model.configure_optimizers = lambda: {"optimizer": opt_a, "lr_scheduler": scheduler_a}
    optim, lr_sched, freq = trainer.init_optimizers(model)
    assert len(optim) == 1 and len(lr_sched) == 1 and len(freq) == 0
    assert optim[0] == opt_a
    assert lr_sched[0] == dict(scheduler=scheduler_a, interval='epoch',
                               frequency=1, reduce_on_plateau=False, monitor='val_loss')

    # opt multiple dictionaries with frequencies
    model.configure_optimizers = lambda: (
        {"optimizer": opt_a, "lr_scheduler": scheduler_a, "frequency": 1},
        {"optimizer": opt_b, "lr_scheduler": scheduler_b, "frequency": 5},
    )
    optim, lr_sched, freq = trainer.init_optimizers(model)
    assert len(optim) == 2 and len(lr_sched) == 2 and len(freq) == 2
    assert optim[0] == opt_a
    assert lr_sched[0] == dict(scheduler=scheduler_a, interval='epoch',
                               frequency=1, reduce_on_plateau=False, monitor='val_loss')
    assert freq == [1, 5]
Ejemplo n.º 4
0
def test_none_optimizer_warning():
    tutils.reset_seed()

    trainer = Trainer()
    model, hparams = tutils.get_default_model()

    model.configure_optimizers = lambda: None

    with pytest.warns(UserWarning, match='will run with no optimizer'):
        _, __, ___ = trainer.init_optimizers(model)
Ejemplo n.º 5
0
def _init_steps_model():
    """private method for initializing a model with 5% train epochs"""
    tutils.reset_seed()
    model, _ = tutils.get_default_model()

    # define train epoch to 5% of data
    train_percent = 0.5
    # get number of samples in 1 epoch
    num_train_samples = math.floor(
        len(model.train_dataloader()) * train_percent)

    trainer_options = dict(train_percent_check=train_percent, )
    return model, trainer_options, num_train_samples
Ejemplo n.º 6
0
def test_cpu_model(tmpdir):
    """Make sure model trains on CPU."""
    tutils.reset_seed()

    trainer_options = dict(default_root_dir=tmpdir,
                           progress_bar_refresh_rate=0,
                           max_epochs=1,
                           train_percent_check=0.4,
                           val_percent_check=0.4)

    model, hparams = tutils.get_default_model()

    tutils.run_model_test(trainer_options, model, on_gpu=False)
def run_test_from_config(trainer_options):
    """Trains the default model with the given config."""
    tutils.reset_seed()
    tutils.set_random_master_port()

    ckpt_path = trainer_options['default_root_dir']
    trainer_options['checkpoint_callback'] = ModelCheckpoint(ckpt_path)

    model, hparams = tutils.get_default_model()
    tutils.run_model_test(trainer_options, model, version=0, with_hpc=False)

    # Horovod should be initialized following training. If not, this will raise an exception.
    assert hvd.size() == 2
Ejemplo n.º 8
0
def test_multi_gpu_none_backend(tmpdir):
    """Make sure when using multiple GPUs the user can't use `distributed_backend = None`."""
    tutils.reset_seed()

    model, hparams = tutils.get_default_model()
    trainer_options = dict(default_root_dir=tmpdir,
                           progress_bar_refresh_rate=0,
                           max_epochs=1,
                           train_percent_check=0.1,
                           val_percent_check=0.1,
                           gpus='-1')

    with pytest.warns(UserWarning):
        tutils.run_model_test(trainer_options, model)
Ejemplo n.º 9
0
def test_single_gpu_model(tmpdir):
    """Make sure single GPU works (DP mode)."""
    tutils.reset_seed()

    model, hparams = tutils.get_default_model()

    trainer_options = dict(default_root_dir=tmpdir,
                           progress_bar_refresh_rate=0,
                           max_epochs=1,
                           train_percent_check=0.1,
                           val_percent_check=0.1,
                           gpus=1)

    tutils.run_model_test(trainer_options, model)
Ejemplo n.º 10
0
def test_cpu_model(tmpdir):
    """Make sure model trains on CPU."""
    tutils.reset_seed()

    trainer_options = dict(default_save_path=tmpdir,
                           show_progress_bar=False,
                           logger=tutils.get_default_testtube_logger(tmpdir),
                           max_epochs=1,
                           train_percent_check=0.4,
                           val_percent_check=0.4)

    model, hparams = tutils.get_default_model()

    tutils.run_model_test(trainer_options, model, on_gpu=False)
Ejemplo n.º 11
0
def test_cpu_model_with_amp(tmpdir):
    """Make sure model trains on CPU."""
    tutils.reset_seed()

    trainer_options = dict(default_root_dir=tmpdir,
                           progress_bar_refresh_rate=0,
                           max_epochs=1,
                           train_percent_check=0.4,
                           val_percent_check=0.4,
                           precision=16)

    model, hparams = tutils.get_default_model()

    with pytest.raises((MisconfigurationException, ModuleNotFoundError)):
        tutils.run_model_test(trainer_options, model, on_gpu=False)
Ejemplo n.º 12
0
def test_lbfgs_cpu_model(tmpdir):
    """Test each of the trainer options."""
    tutils.reset_seed()

    trainer_options = dict(
        default_root_dir=tmpdir,
        max_epochs=2,
        progress_bar_refresh_rate=0,
        weights_summary='top',
        train_percent_check=1.0,
        val_percent_check=0.2,
    )

    model, hparams = tutils.get_default_model(lbfgs=True)
    tutils.run_model_test_no_loggers(trainer_options, model, min_acc=0.5)
Ejemplo n.º 13
0
def test_multi_gpu_model_ddp(tmpdir):
    """Make sure DDP works."""

    tutils.reset_seed()
    tutils.set_random_master_port()

    model, hparams = tutils.get_default_model()
    trainer_options = dict(default_save_path=tmpdir,
                           show_progress_bar=False,
                           max_epochs=1,
                           train_percent_check=0.4,
                           val_percent_check=0.2,
                           gpus=[0, 1],
                           distributed_backend='ddp')

    tutils.run_model_test(trainer_options, model)
Ejemplo n.º 14
0
def test_all_features_cpu_model(tmpdir):
    """Test each of the trainer options."""
    tutils.reset_seed()

    trainer_options = dict(default_root_dir=tmpdir,
                           gradient_clip_val=1.0,
                           overfit_pct=0.20,
                           track_grad_norm=2,
                           progress_bar_refresh_rate=0,
                           accumulate_grad_batches=2,
                           max_epochs=1,
                           train_percent_check=0.4,
                           val_percent_check=0.4)

    model, hparams = tutils.get_default_model()
    tutils.run_model_test(trainer_options, model, on_gpu=False)
Ejemplo n.º 15
0
def test_multi_cpu_model_ddp(tmpdir):
    """Make sure DDP works."""
    tutils.reset_seed()
    tutils.set_random_master_port()

    model, hparams = tutils.get_default_model()
    trainer_options = dict(default_root_dir=tmpdir,
                           progress_bar_refresh_rate=0,
                           max_epochs=1,
                           train_percent_check=0.4,
                           val_percent_check=0.2,
                           gpus=None,
                           num_processes=2,
                           distributed_backend='ddp_cpu')

    tutils.run_model_test(trainer_options, model, on_gpu=False)
Ejemplo n.º 16
0
def test_amp_gpu_dp(tmpdir):
    """Make sure DP + AMP work."""
    tutils.reset_seed()

    model, hparams = tutils.get_default_model()
    trainer_options = dict(
        default_save_path=tmpdir,
        max_epochs=1,
        gpus='0, 1',  # test init with gpu string
        distributed_backend='dp',
        precision=16)

    trainer = Trainer(**trainer_options)
    result = trainer.fit(model)

    assert result == 1
Ejemplo n.º 17
0
def test_multi_gpu_model_ddp2(tmpdir):
    """Make sure DDP2 works."""

    tutils.reset_seed()
    tutils.set_random_master_port()

    model, hparams = tutils.get_default_model()
    trainer_options = dict(default_save_path=tmpdir,
                           max_epochs=1,
                           train_percent_check=0.4,
                           val_percent_check=0.2,
                           gpus=2,
                           weights_summary=None,
                           distributed_backend='ddp2')

    tutils.run_model_test(trainer_options, model)
Ejemplo n.º 18
0
def test_multi_gpu_model_dp(tmpdir):
    """Make sure DP works."""
    tutils.reset_seed()

    model, hparams = tutils.get_default_model()
    trainer_options = dict(default_save_path=tmpdir,
                           show_progress_bar=False,
                           distributed_backend='dp',
                           max_epochs=1,
                           train_percent_check=0.1,
                           val_percent_check=0.1,
                           gpus='-1')

    tutils.run_model_test(trainer_options, model)

    # test memory helper functions
    memory.get_memory_profile('min_max')
Ejemplo n.º 19
0
def test_multi_gpu_none_backend(tmpdir):
    """Make sure when using multiple GPUs the user can't use `distributed_backend = None`."""
    tutils.reset_seed()

    if not tutils.can_run_gpu_test():
        return

    model, hparams = tutils.get_default_model()
    trainer_options = dict(default_save_path=tmpdir,
                           show_progress_bar=False,
                           max_epochs=1,
                           train_percent_check=0.1,
                           val_percent_check=0.1,
                           gpus='-1')

    with pytest.warns(UserWarning):
        tutils.run_model_test(trainer_options, model)
Ejemplo n.º 20
0
def test_single_gpu_model(tmpdir):
    """Make sure single GPU works (DP mode)."""
    tutils.reset_seed()

    if not torch.cuda.is_available():
        warnings.warn('test_single_gpu_model cannot run.'
                      ' Rerun on a GPU node to run this test')
        return
    model, hparams = tutils.get_default_model()

    trainer_options = dict(default_save_path=tmpdir,
                           show_progress_bar=False,
                           max_epochs=1,
                           train_percent_check=0.1,
                           val_percent_check=0.1,
                           gpus=1)

    tutils.run_model_test(trainer_options, model)
Ejemplo n.º 21
0
def test_amp_single_gpu(tmpdir, backend):
    """Make sure DP/DDP + AMP work."""
    tutils.reset_seed()

    model, hparams = tutils.get_default_model()

    trainer_options = dict(default_root_dir=tmpdir,
                           max_epochs=1,
                           gpus=1,
                           distributed_backend=backend,
                           precision=16)

    # tutils.run_model_test(trainer_options, model)

    trainer = Trainer(**trainer_options)
    result = trainer.fit(model)

    assert result == 1
Ejemplo n.º 22
0
def test_amp_multi_gpu(tmpdir, backend):
    """Make sure DP/DDP + AMP work."""
    tutils.reset_seed()
    tutils.set_random_master_port()

    model, hparams = tutils.get_default_model()

    trainer_options = dict(
        default_root_dir=tmpdir,
        max_epochs=1,
        # gpus=2,
        gpus='0, 1',  # test init with gpu string
        distributed_backend=backend,
        precision=16)

    # tutils.run_model_test(trainer_options, model)
    trainer = Trainer(**trainer_options)
    result = trainer.fit(model)
    assert result
Ejemplo n.º 23
0
def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
    """Make sure DDP works with dataloaders passed to fit()"""

    tutils.reset_seed()
    tutils.set_random_master_port()

    model, hparams = tutils.get_default_model()

    trainer = Trainer(default_root_dir=tmpdir,
                      progress_bar_refresh_rate=0,
                      max_epochs=1,
                      train_percent_check=0.4,
                      val_percent_check=0.2,
                      gpus=[0, 1],
                      distributed_backend='ddp')
    result = trainer.fit(model,
                         train_dataloader=model.train_dataloader(),
                         val_dataloaders=model.val_dataloader())
    assert result == 1, "DDP doesn't work with dataloaders passed to fit()."
Ejemplo n.º 24
0
def test_default_logger_callbacks_cpu_model(tmpdir):
    """Test each of the trainer options."""
    tutils.reset_seed()

    trainer_options = dict(
        default_root_dir=tmpdir,
        max_epochs=1,
        gradient_clip_val=1.0,
        overfit_pct=0.20,
        progress_bar_refresh_rate=0,
        train_percent_check=0.01,
        val_percent_check=0.01,
    )

    model, hparams = tutils.get_default_model()
    tutils.run_model_test_no_loggers(trainer_options, model)

    # test freeze on cpu
    model.freeze()
    model.unfreeze()
Ejemplo n.º 25
0
def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
    """Make sure DDP works with dataloaders passed to fit()"""

    tutils.reset_seed()
    tutils.set_random_master_port()

    model, hparams = tutils.get_default_model()
    trainer_options = dict(default_save_path=tmpdir,
                           show_progress_bar=False,
                           max_epochs=1,
                           train_percent_check=0.4,
                           val_percent_check=0.2,
                           gpus=[0, 1],
                           distributed_backend='ddp')

    fit_options = dict(train_dataloader=model.train_dataloader(),
                       val_dataloaders=model.val_dataloader())

    trainer = Trainer(**trainer_options)
    result = trainer.fit(model, **fit_options)
    assert result == 1, "DDP doesn't work with dataloaders passed to fit()."
Ejemplo n.º 26
0
def test_early_stopping_cpu_model(tmpdir):
    """Test each of the trainer options."""
    tutils.reset_seed()

    stopping = EarlyStopping(monitor='val_loss', min_delta=0.1)
    trainer_options = dict(
        default_root_dir=tmpdir,
        early_stop_callback=stopping,
        gradient_clip_val=1.0,
        overfit_pct=0.20,
        track_grad_norm=2,
        train_percent_check=0.1,
        val_percent_check=0.1,
    )

    model, hparams = tutils.get_default_model()
    tutils.run_model_test(trainer_options, model, on_gpu=False)

    # test freeze on cpu
    model.freeze()
    model.unfreeze()
Ejemplo n.º 27
0
def test_loggers_fit_test(tmpdir, monkeypatch, logger_class):
    """Verify that basic functionality of all loggers."""
    tutils.reset_seed()

    # prevent comet logger from trying to print at exit, since
    # pytest's stdout/stderr redirection breaks it
    import atexit
    monkeypatch.setattr(atexit, 'register', lambda _: None)

    model, _ = tutils.get_default_model()

    class StoreHistoryLogger(logger_class):
        def __init__(self, *args, **kwargs):
            super().__init__(*args, **kwargs)
            self.history = []

        def log_metrics(self, metrics, step):
            super().log_metrics(metrics, step)
            self.history.append((step, metrics))

    logger_args = _get_logger_args(logger_class, tmpdir)
    logger = StoreHistoryLogger(**logger_args)

    trainer = Trainer(
        max_epochs=1,
        logger=logger,
        train_percent_check=0.2,
        val_percent_check=0.5,
        fast_dev_run=True,
    )
    trainer.fit(model)

    trainer.test()

    log_metric_names = [(s, sorted(m.keys())) for s, m in logger.history]
    assert log_metric_names == [(0, ['val_acc', 'val_loss']),
                                (0, ['train_some_val']),
                                (1, ['test_acc', 'test_loss'])]
Ejemplo n.º 28
0
def test_multi_gpu_model(tmpdir, backend):
    """Make sure DDP works."""

    tutils.reset_seed()
    tutils.set_random_master_port()

    model, hparams = tutils.get_default_model()
    trainer_options = dict(
        default_root_dir=tmpdir,
        max_epochs=1,
        train_percent_check=0.4,
        val_percent_check=0.2,
        gpus=[0, 1],
        distributed_backend=backend,
    )

    # tutils.run_model_test(trainer_options, model)
    trainer = Trainer(**trainer_options)
    result = trainer.fit(model)
    assert result

    # test memory helper functions
    memory.get_memory_profile('min_max')