コード例 #1
0
def test_loop_steps_only_dp(tmpdir):
    os.environ['PL_DEV_DEBUG'] = '1'

    batches = 10
    epochs = 3

    model = EvalModelTemplate()
    model.validation_step = None
    model.test_step = None
    model.training_step = model.training_step_result_obj_dp
    model.training_step_end = None
    model.training_epoch_end = None
    model.validation_step = model.validation_step_result_obj_dp
    model.validation_step_end = None
    model.validation_epoch_end = None
    model.test_dataloader = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        distributed_backend='dp',
        gpus=[0, 1],
        max_epochs=epochs,
        early_stop_callback=True,
        log_every_n_steps=2,
        limit_train_batches=batches,
        weights_summary=None,
    )

    trainer.fit(model)

    assert model.training_step_called
    assert model.validation_step_called
コード例 #2
0
def test_ckpt_metric_names_results(tmpdir):
    model = EvalModelTemplate()
    model.training_step = model.training_step_result_obj
    model.training_step_end = None
    model.training_epoch_end = None

    model.validation_step = model.validation_step_result_obj
    model.validation_step_end = None
    model.validation_epoch_end = None

    trainer = Trainer(default_root_dir=tmpdir,
                      max_epochs=1,
                      gradient_clip_val=1.0,
                      overfit_batches=0.20,
                      progress_bar_refresh_rate=0,
                      limit_train_batches=0.01,
                      limit_val_batches=0.01,
                      checkpoint_callback=ModelCheckpoint(filepath=tmpdir +
                                                          '/{val_loss:.2f}'))

    trainer.fit(model)

    # make sure the checkpoint we saved has the metric in the name
    ckpts = os.listdir(tmpdir)
    ckpts = [x for x in ckpts if 'val_loss' in x]
    assert len(ckpts) == 1
    val = re.sub('[^0-9.]', '', ckpts[0])
    assert len(val) > 3
コード例 #3
0
def test_result_obj_on_tpu(tmpdir):
    seed_everything(1234)

    batches = 5
    epochs = 2

    model = EvalModelTemplate()
    model.training_step = model.training_step_result_obj
    model.training_step_end = None
    model.training_epoch_end = None
    model.validation_step = model.validation_step_result_obj
    model.validation_step_end = None
    model.validation_epoch_end = None
    model.test_step = model.test_step_result_obj
    model.test_step_end = None
    model.test_epoch_end = None

    trainer_options = dict(default_root_dir=tmpdir,
                           max_epochs=epochs,
                           callbacks=[EarlyStopping()],
                           log_every_n_steps=2,
                           limit_train_batches=batches,
                           weights_summary=None,
                           tpu_cores=8)

    tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
コード例 #4
0
def test_result_obj_on_tpu(tmpdir):
    seed_everything(1234)
    os.environ['PL_DEV_DEBUG'] = '1'

    batches = 5
    epochs = 2

    model = EvalModelTemplate()
    model.training_step = model.training_step_result_obj
    model.training_step_end = None
    model.training_epoch_end = None
    model.validation_step = model.validation_step_result_obj
    model.validation_step_end = None
    model.validation_epoch_end = None
    model.test_step = model.test_step_result_obj
    model.test_step_end = None
    model.test_epoch_end = None

    trainer_options = dict(default_root_dir=tmpdir,
                           max_epochs=epochs,
                           early_stop_callback=True,
                           row_log_interval=2,
                           limit_train_batches=batches,
                           weights_summary=None,
                           tpu_cores=8)

    tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
コード例 #5
0
def test_warning_with_few_workers_multi_loader(mock, tmpdir, ckpt_path):
    """ Test that error is raised if dataloader with only a few workers is used """

    model = EvalModelTemplate()
    model.training_step = model.training_step__multiple_dataloaders
    model.validation_step = model.validation_step__multiple_dataloaders
    model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders
    model.test_step = model.test_step__multiple_dataloaders
    model.test_epoch_end = model.test_epoch_end__multiple_dataloaders

    # logger file to get meta
    train_dl = model.dataloader(train=True)
    train_dl.num_workers = 0

    val_dl = model.dataloader(train=False)
    val_dl.num_workers = 0

    train_dl = model.dataloader(train=False)
    train_dl.num_workers = 0

    train_multi_dl = {'a': train_dl, 'b': train_dl}
    val_multi_dl = [val_dl, val_dl]
    test_multi_dl = [train_dl, train_dl]

    fit_options = dict(train_dataloader=train_multi_dl,
                       val_dataloaders=val_multi_dl)
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        limit_val_batches=0.1,
        limit_train_batches=0.2,
    )

    # fit model
    with pytest.warns(
            UserWarning,
            match=
            'The dataloader, train dataloader, does not have many workers which may be a bottleneck.'
    ):
        trainer.fit(model, **fit_options)

    with pytest.warns(
            UserWarning,
            match=
            'The dataloader, val dataloader 0, does not have many workers which may be a bottleneck.'
    ):
        trainer.fit(model, **fit_options)

    if ckpt_path == 'specific':
        ckpt_path = trainer.checkpoint_callback.best_model_path
    test_options = dict(test_dataloaders=test_multi_dl, ckpt_path=ckpt_path)
    with pytest.warns(
            UserWarning,
            match=
            'The dataloader, test dataloader 0, does not have many workers which may be a bottleneck.'
    ):
        trainer.test(**test_options)
コード例 #6
0
def test_val_step_full_loop_result_dp(tmpdir):
    # TODO: finish the full train, val, test loop with dp
    os.environ['PL_DEV_DEBUG'] = '1'

    batches = 10
    epochs = 3

    model = EvalModelTemplate()
    model.training_step = model.training_step_full_loop_result_obj_dp
    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
    model.validation_step = model.eval_step_full_loop_result_obj_dp
    model.validation_step_end = model.eval_step_end_full_loop_result_obj_dp
    model.validation_epoch_end = model.eval_epoch_end_full_loop_result_obj_dp
    model.test_step = model.eval_step_full_loop_result_obj_dp
    model.test_step_end = model.eval_step_end_full_loop_result_obj_dp
    model.test_epoch_end = model.eval_epoch_end_full_loop_result_obj_dp

    trainer = Trainer(
        default_root_dir=tmpdir,
        distributed_backend='dp',
        gpus=[0, 1],
        max_epochs=epochs,
        early_stop_callback=True,
        log_every_n_steps=2,
        limit_train_batches=batches,
        weights_summary=None,
    )

    trainer.fit(model)

    results = trainer.test()

    # assert we returned all metrics requested
    assert len(results) == 1
    results = results[0]
    assert 'test_epoch_end_metric' in results

    # make sure we saw all the correct keys along all paths
    seen_keys = set()
    for metric in trainer.dev_debugger.logged_metrics:
        seen_keys.update(metric.keys())

    assert 'train_step_metric' in seen_keys
    assert 'train_step_end_metric' in seen_keys
    assert 'train_epoch_end_metric_epoch' in seen_keys
    assert 'validation_step_metric_step/epoch_0' in seen_keys
    assert 'validation_step_metric_epoch' in seen_keys
    assert 'validation_step_end_metric' in seen_keys
    assert 'validation_epoch_end_metric' in seen_keys
    assert 'test_step_metric_step/epoch_2' in seen_keys
    assert 'test_step_metric_epoch' in seen_keys
    assert 'test_step_end_metric' in seen_keys
    assert 'test_epoch_end_metric' in seen_keys
コード例 #7
0
def test_fit_multiple_train_loaders(tmpdir, multiple_trainloader_mode, num_training_batches):
    """Integration test for multple train loaders"""
    model = EvalModelTemplate()

    model.train_dataloader = model.train_dataloader__multiple_mapping
    # todo: add also `train_dataloader__multiple_sequence`
    model.training_step = model.training_step__multiple_dataloaders

    trainer = Trainer(
        max_epochs=1,
        default_root_dir=tmpdir,
        multiple_trainloader_mode=multiple_trainloader_mode,
    )

    assert 1 == trainer.fit(model)
    # verify the num_training_batches according to the multiple_trainloader_mode
    assert num_training_batches == trainer.num_training_batches
コード例 #8
0
def test_wrong_train_setting(tmpdir):
    """
    * Test that an error is thrown when no `train_dataloader()` is defined
    * Test that an error is thrown when no `training_step()` is defined
    """
    tutils.reset_seed()
    hparams = EvalModelTemplate.get_default_hparams()
    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1)

    with pytest.raises(MisconfigurationException):
        model = EvalModelTemplate(**hparams)
        model.train_dataloader = None
        trainer.fit(model)

    with pytest.raises(MisconfigurationException):
        model = EvalModelTemplate(**hparams)
        model.training_step = None
        trainer.fit(model)
コード例 #9
0
def test_full_loop_result_cpu(tmpdir):
    seed_everything(1234)
    os.environ['PL_DEV_DEBUG'] = '1'

    batches = 10
    epochs = 2

    model = EvalModelTemplate()
    model.training_step = model.training_step_result_obj
    model.training_step_end = None
    model.training_epoch_end = None
    model.validation_step = model.validation_step_result_obj
    model.validation_step_end = None
    model.validation_epoch_end = None
    model.test_step = model.test_step_result_obj
    model.test_step_end = None
    model.test_epoch_end = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=epochs,
        early_stop_callback=True,
        log_every_n_steps=2,
        limit_train_batches=batches,
        weights_summary=None,
    )

    trainer.fit(model)

    results = trainer.test()

    # assert we returned all metrics requested
    assert len(results) == 1
    results = results[0]
    assert results['test_loss'] < 0.3
    assert results['test_acc'] > 0.9
    assert len(results) == 2
    assert 'early_stop_on' not in results
    assert 'checkpoint_on' not in results

    results2 = trainer.test()[0]
    for k, v in results.items():
        assert results2[k] == v
コード例 #10
0
def test_result_monitor_warnings(tmpdir):
    """
    Tests that we warn when the monitor key is changed and we use Results obj
    """
    model = EvalModelTemplate()
    model.test_step = None
    model.training_step = model.training_step_result_obj
    model.training_step_end = None
    model.training_epoch_end = None
    model.validation_step = model.validation_step_result_obj
    model.validation_step_end = None
    model.validation_epoch_end = None
    model.test_dataloader = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=2,
        early_stop_callback=True,
        log_every_n_steps=2,
        limit_train_batches=2,
        weights_summary=None,
        checkpoint_callback=ModelCheckpoint(monitor='not_checkpoint_on'))

    # warn that the key was changed but metric was not found
    with pytest.raises(MisconfigurationException,
                       match="not found in the returned metrics"):
        trainer.fit(model)

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=2,
        log_every_n_steps=2,
        limit_train_batches=2,
        weights_summary=None,
        early_stop_callback=EarlyStopping(monitor='not_val_loss'))

    with pytest.raises(
            RuntimeError,
            match=
            r'.*Early stopping conditioned on metric `not_val_loss` which is not*'
    ):
        trainer.fit(model)
コード例 #11
0
def test_warning_with_few_workers_multi_loader(_, tmpdir, ckpt_path, stage):
    """ Test that error is raised if dataloader with only a few workers is used """

    model = EvalModelTemplate()
    model.training_step = model.training_step__multiple_dataloaders
    model.validation_step = model.validation_step__multiple_dataloaders
    model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders
    model.test_step = model.test_step__multiple_dataloaders
    model.test_epoch_end = model.test_epoch_end__multiple_dataloaders

    val_dl = model.dataloader(train=False)
    val_dl.num_workers = 0

    train_dl = model.dataloader(train=False)
    train_dl.num_workers = 0

    train_multi_dl = {'a': train_dl, 'b': train_dl}
    val_multi_dl = [val_dl, val_dl]
    test_multi_dl = [train_dl, train_dl]

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        limit_val_batches=0.1,
        limit_train_batches=0.2,
    )

    with pytest.warns(
            UserWarning,
            match=
            f'The dataloader, {stage} dataloader{" 0" if stage != "train" else ""}, does not have many workers'
    ):
        if stage == 'test':
            ckpt_path = trainer.checkpoint_callback.best_model_path if ckpt_path == 'specific' else ckpt_path
            trainer.test(model,
                         test_dataloaders=test_multi_dl,
                         ckpt_path=ckpt_path)
        else:
            trainer.fit(model,
                        train_dataloader=train_multi_dl,
                        val_dataloaders=val_multi_dl)
コード例 #12
0
def test_model_pickable(tmpdir, metric: Metric):
    """Make sure that metrics are pickable by including into a model and running in multi-gpu mode"""
    tutils.set_random_master_port()

    trainer_options = dict(
        default_root_dir=tmpdir,
        max_epochs=1,
        limit_train_batches=10,
        gpus=[0, 1],
        distributed_backend="ddp_spawn",
    )

    model = EvalModelTemplate()
    model.metric = metric()
    model.training_step = model.training_step__using_metrics

    trainer = Trainer(**trainer_options)
    result = trainer.fit(model)

    # correct result and ok accuracy
    assert result == 1, "ddp model failed to complete"
コード例 #13
0
def test_eval_loop_return_none(tmpdir):
    """
    Tests that we warn when the monitor key is changed and we use Results obj
    """
    model = EvalModelTemplate()
    model.test_step = None
    model.training_step = model.training_step_result_obj
    model.training_step_end = None
    model.training_epoch_end = None
    model.validation_step = model.validation_step_result_obj
    model.validation_step_end = None
    model.validation_epoch_end = model.validation_epoch_end_return_none
    model.test_dataloader = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=2,
        log_every_n_steps=2,
        limit_train_batches=2,
        weights_summary=None,
    )
    trainer.fit(model)
コード例 #14
0
def test_result_monitor_warnings(tmpdir):
    """
    Tests that we warn when the monitor key is changed and we use Results obj
    """
    model = EvalModelTemplate()
    model.test_step = None
    model.training_step = model.training_step_result_obj
    model.training_step_end = None
    model.training_epoch_end = None
    model.validation_step = model.validation_step_result_obj
    model.validation_step_end = None
    model.validation_epoch_end = None
    model.test_dataloader = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=2,
        early_stop_callback=True,
        row_log_interval=2,
        limit_train_batches=2,
        weights_summary=None,
        checkpoint_callback=ModelCheckpoint(monitor='not_val_loss')
    )

    with pytest.warns(UserWarning, match='key of ModelCheckpoint has no effect'):
        trainer.fit(model)

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=2,
        row_log_interval=2,
        limit_train_batches=2,
        weights_summary=None,
        early_stop_callback=EarlyStopping(monitor='not_val_loss')
    )

    with pytest.warns(UserWarning, match='key of EarlyStopping has no effect'):
        trainer.fit(model)
コード例 #15
0
def test_full_train_loop_with_results_obj_dp(tmpdir):
    os.environ['PL_DEV_DEBUG'] = '1'

    batches = 10
    epochs = 3

    model = EvalModelTemplate()
    model.validation_step = None
    model.test_step = None
    model.training_step = model.training_step_full_loop_result_obj_dp
    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
    model.val_dataloader = None
    model.test_dataloader = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        distributed_backend='dp',
        gpus=[0, 1],
        max_epochs=epochs,
        early_stop_callback=True,
        log_every_n_steps=2,
        limit_train_batches=batches,
        weights_summary=None,
    )

    trainer.fit(model)

    # make sure we saw all the correct keys
    seen_keys = set()
    for metric in trainer.dev_debugger.logged_metrics:
        seen_keys.update(metric.keys())

    assert 'train_step_metric' in seen_keys
    assert 'train_step_end_metric' in seen_keys
    assert 'train_epoch_end_metric_epoch' in seen_keys