コード例 #1
0
def test_fit_train_loader_only(tmpdir):
    model = EvalModelTemplate()
    train_dataloader = model.train_dataloader()

    model.train_dataloader = None
    model.val_dataloader = None
    model.test_dataloader = None

    model.validation_step = None
    model.validation_epoch_end = None

    model.test_step = None
    model.test_epoch_end = None

    trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    trainer.fit(model, train_dataloader=train_dataloader)
コード例 #2
0
def test_inf_val_dataloader(tmpdir, check_interval):
    """Test inf val data loader (e.g. IterableDataset)"""

    model = EvalModelTemplate()
    model.val_dataloader = model.val_dataloader__infinite

    # logger file to get meta
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        val_check_interval=check_interval,
    )
    result = trainer.fit(model)

    # verify training completed
    assert result == 1
コード例 #3
0
def test_dataloaders_passed_to_fit(tmpdir):
    """Test if dataloaders passed to trainer works on TPU"""

    model = EvalModelTemplate()

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        tpu_cores=8,
    )
    result = trainer.fit(
        model,
        train_dataloader=model.train_dataloader(),
        val_dataloaders=model.val_dataloader(),
    )
    assert result, "TPU doesn't work with dataloaders passed to fit()."
コード例 #4
0
def test_val_dataloader_not_implemented_error(tmpdir, check_interval):
    """Test not_implemented_error data loader (e.g. IterableDataset)"""

    model = EvalModelTemplate()
    model.val_dataloader = model.val_dataloader__not_implemented_error

    # logger file to get meta
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_steps=5,
        max_epochs=1,
        val_check_interval=check_interval,
    )
    trainer.fit(model)
    # verify training completed
    assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
コード例 #5
0
def test_train_dataloader_not_implemented_error(tmpdir, check_interval):
    """Test not_implemented_error train data loader (e.g. IterableDataset)"""

    model = EvalModelTemplate()
    model.train_dataloader = model.train_dataloader__not_implemented_error
    model.val_dataloader = model.val_dataloader__not_implemented_error

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_steps=5,
        max_epochs=1,
        val_check_interval=check_interval
    )
    result = trainer.fit(model)
    # verify training completed
    assert result == 1
コード例 #6
0
def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
    """Make sure DDP works with dataloaders passed to fit()"""
    tutils.set_random_master_port()

    model = EvalModelTemplate()
    fit_options = dict(train_dataloader=model.train_dataloader(),
                       val_dataloaders=model.val_dataloader())

    trainer = Trainer(default_root_dir=tmpdir,
                      progress_bar_refresh_rate=0,
                      max_epochs=1,
                      limit_train_batches=0.2,
                      limit_val_batches=0.2,
                      gpus=[0, 1],
                      distributed_backend='ddp_spawn')
    result = trainer.fit(model, **fit_options)
    assert result == 1, "DDP doesn't work with dataloaders passed to fit()."
コード例 #7
0
def test_model_tpu_cores_8(tmpdir):
    """Make sure model trains on TPU."""
    trainer_options = dict(
        default_root_dir=tmpdir,
        progress_bar_refresh_rate=0,
        max_epochs=1,
        tpu_cores=8,
        limit_train_batches=0.4,
        limit_val_batches=0.4,
    )

    model = EvalModelTemplate()
    # 8 cores needs a big dataset
    model.train_dataloader = _serial_train_loader
    model.val_dataloader = _serial_train_loader

    tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
コード例 #8
0
def test_val_loop_config(tmpdir):
    """"
    When either val loop or val data are missing raise warning
    """
    tutils.reset_seed()
    hparams = EvalModelTemplate.get_default_hparams()
    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1)

    # no val data has val loop
    with pytest.warns(UserWarning):
        model = EvalModelTemplate(**hparams)
        model.validation_step = None
        trainer.fit(model)

    # has val loop but no val data
    with pytest.warns(UserWarning):
        model = EvalModelTemplate(**hparams)
        model.val_dataloader = None
        trainer.fit(model)
コード例 #9
0
def test_base_tpu_model_8(tmpdir):
    """Make sure model trains on TPU."""
    trainer_options = dict(
        default_root_dir=tmpdir,
        progress_bar_refresh_rate=0,
        max_epochs=1,
        tpu_cores=8,
        limit_train_batches=0.4,
        limit_val_batches=0.4
    )

    model = EvalModelTemplate()

    # 8 cores needs a big dataset
    def long_train_loader():
        dataset = DataLoader(TrialMNIST(download=True, num_samples=15000, digits=(0, 1, 2, 5, 8)), batch_size=32)
        return dataset
    model.train_dataloader = long_train_loader
    model.val_dataloader = long_train_loader

    tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
コード例 #10
0
def test_inf_dataloaders_with_limit_percent_batches(tmpdir, limit_train_batches, limit_val_batches, limit_test_batches):
    """Verify inf train, val & test dataloaders (e.g. IterableDataset) passed with batch limit in percent"""
    model = EvalModelTemplate()
    model.train_dataloader = model.train_dataloader__infinite
    model.val_dataloader = model.val_dataloader__infinite
    model.test_dataloader = model.test_dataloader__infinite

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        limit_train_batches=limit_train_batches,
        limit_val_batches=limit_val_batches,
        limit_test_batches=limit_test_batches,
    )

    results = trainer.fit(model)
    assert results == 1
    assert trainer.num_training_batches == (0 if limit_train_batches == 0.0 else float('inf'))
    assert trainer.num_val_batches[0] == (0 if limit_val_batches == 0.0 else float('inf'))

    trainer.test(ckpt_path=None)
    assert trainer.num_test_batches[0] == (0 if limit_test_batches == 0.0 else float('inf'))
コード例 #11
0
def test_inf_dataloaders_with_limit_num_batches(tmpdir, limit_train_batches, limit_val_batches, limit_test_batches):
    """Verify inf train, val & test dataloaders (e.g. IterableDataset) passed with batch limit as number"""
    model = EvalModelTemplate()
    model.train_dataloader = model.train_dataloader__infinite
    model.val_dataloader = model.val_dataloader__infinite
    model.test_dataloader = model.test_dataloader__infinite

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        limit_train_batches=limit_train_batches,
        limit_val_batches=limit_val_batches,
        limit_test_batches=limit_test_batches,
    )

    trainer.fit(model)
    assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
    assert trainer.num_training_batches == limit_train_batches
    assert trainer.num_val_batches[0] == limit_val_batches

    trainer.test(ckpt_path=None)
    assert trainer.num_test_batches[0] == limit_test_batches
コード例 #12
0
def test_benchmark_option(tmpdir):
    """Verify benchmark option."""

    model = EvalModelTemplate()
    model.val_dataloader = model.val_dataloader__multiple

    # verify torch.backends.cudnn.benchmark is not turned on
    assert not torch.backends.cudnn.benchmark

    # fit model
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        benchmark=True,
    )
    result = trainer.fit(model)

    # verify training completed
    assert result == 1

    # verify torch.backends.cudnn.benchmark is not turned off
    assert torch.backends.cudnn.benchmark
コード例 #13
0
def test_base_tpu_16bit_model_8_cores(tmpdir):
    """Make sure model trains on TPU."""
    trainer_options = dict(
        default_root_dir=tmpdir,
        precision=16,
        progress_bar_refresh_rate=0,
        max_epochs=1,
        tpu_cores=8,
        limit_train_batches=0.4,
        limit_val_batches=0.4
    )

    model = EvalModelTemplate()

    # 8 cores needs a big dataset
    def long_train_loader():
        dataset = DataLoader(TrialMNIST(download=True, num_samples=15000, digits=(0, 1, 2, 5, 8)), batch_size=32)
        return dataset
    model.train_dataloader = long_train_loader
    model.val_dataloader = long_train_loader

    tpipes.run_model_test(trainer_options, model, on_gpu=False)
    assert os.environ.get('XLA_USE_BF16') == str(1), "XLA_USE_BF16 was not set in environment variables"
コード例 #14
0
def test_dataloaders_with_limit_num_batches(tmpdir, limit_train_batches, limit_val_batches, limit_test_batches):
    """Verify num_batches for val & test dataloaders passed with batch limit as number"""
    model = EvalModelTemplate()
    model.val_dataloader = model.val_dataloader__multiple_mixed_length
    model.test_dataloader = model.test_dataloader__multiple_mixed_length
    model.validation_step = model.validation_step__multiple_dataloaders
    model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders
    model.test_step = model.test_step__multiple_dataloaders
    model.test_epoch_end = model.test_epoch_end__multiple_dataloaders

    # train, multiple val and multiple test passed with percent_check
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        limit_train_batches=limit_train_batches,
        limit_val_batches=limit_val_batches,
        limit_test_batches=limit_test_batches,
    )
    trainer.fit(model)
    assert trainer.num_training_batches == limit_train_batches
    assert trainer.num_val_batches == [limit_val_batches] * len(trainer.val_dataloaders)
    trainer.test(ckpt_path=None)
    assert trainer.num_test_batches == [limit_test_batches] * len(trainer.test_dataloaders)
コード例 #15
0
def test_dataloaders_with_limit_percent_batches(tmpdir, limit_train_batches,
                                                limit_val_batches,
                                                limit_test_batches):
    """Verify num_batches for train, val & test dataloaders passed with batch limit in percent"""
    model = EvalModelTemplate()
    model.val_dataloader = model.val_dataloader__multiple_mixed_length
    model.test_dataloader = model.test_dataloader__multiple_mixed_length
    model.validation_step = model.validation_step__multiple_dataloaders
    model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders
    model.test_step = model.test_step__multiple_dataloaders
    model.test_epoch_end = model.test_epoch_end__multiple_dataloaders

    # train, multiple val and multiple test passed with percent_check
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        limit_train_batches=limit_train_batches,
        limit_val_batches=limit_val_batches,
        limit_test_batches=limit_test_batches,
    )
    trainer.fit(model)
    expected_train_batches = int(
        len(trainer.train_dataloader) * limit_train_batches)
    expected_val_batches = [
        int(len(dataloader) * limit_val_batches)
        for dataloader in trainer.val_dataloaders
    ]
    assert trainer.num_training_batches == expected_train_batches
    assert trainer.num_val_batches == expected_val_batches

    trainer.test(ckpt_path=None)
    expected_test_batches = [
        int(len(dataloader) * limit_test_batches)
        for dataloader in trainer.test_dataloaders
    ]
    assert trainer.num_test_batches == expected_test_batches
コード例 #16
0
def test_full_train_loop_with_results_obj_dp(tmpdir):
    os.environ['PL_DEV_DEBUG'] = '1'

    batches = 10
    epochs = 3

    model = EvalModelTemplate()
    model.validation_step = None
    model.test_step = None
    model.training_step = model.training_step_full_loop_result_obj_dp
    model.training_step_end = model.training_step_end_full_loop_result_obj_dp
    model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp
    model.val_dataloader = None
    model.test_dataloader = None

    trainer = Trainer(
        default_root_dir=tmpdir,
        distributed_backend='dp',
        gpus=[0, 1],
        max_epochs=epochs,
        early_stop_callback=True,
        log_every_n_steps=2,
        limit_train_batches=batches,
        weights_summary=None,
    )

    trainer.fit(model)

    # make sure we saw all the correct keys
    seen_keys = set()
    for metric in trainer.dev_debugger.logged_metrics:
        seen_keys.update(metric.keys())

    assert 'train_step_metric' in seen_keys
    assert 'train_step_end_metric' in seen_keys
    assert 'train_epoch_end_metric_epoch' in seen_keys
コード例 #17
0
def test_overfit_batch_limits(tmpdir):
    # ------------------------------------------------------
    # Make sure shuffle is correct across loaders initially
    # ------------------------------------------------------
    model = EvalModelTemplate()
    model.train_dataloader()

    # original train loader which should be replaced in all methods
    train_loader = model.train_dataloader()

    # make sure the val and tests are not shuffled
    assert isinstance(train_loader.sampler, RandomSampler)
    assert isinstance(model.val_dataloader().sampler, SequentialSampler)
    assert isinstance(model.test_dataloader().sampler, SequentialSampler)

    # ------------------------------------------------------
    # get the training loader and batch
    # ------------------------------------------------------
    # Create a reference train dataloader without shuffling.
    train_loader = DataLoader(model.train_dataloader().dataset, shuffle=False)
    (xa, ya) = next(iter(train_loader))
    train_loader = DataLoader(model.train_dataloader().dataset, shuffle=True)
    full_train_samples = len(train_loader)
    num_train_samples = int(0.11 * full_train_samples)

    # ------------------------------------------------------
    # set VAL and Test loaders
    # ------------------------------------------------------
    val_loader = DataLoader(model.val_dataloader().dataset, shuffle=False)
    test_loader = DataLoader(model.test_dataloader().dataset, shuffle=False)

    # set the model loaders
    model.train_dataloader = lambda: train_loader
    model.val_dataloader = lambda: val_loader
    model.test_dataloader = lambda: test_loader

    # ------------------------------------------------------
    # test train loader applies correct limits
    # ------------------------------------------------------
    trainer = Trainer(overfit_batches=4)
    trainer.reset_train_dataloader(model)
    assert trainer.num_training_batches == 4

    # make sure the loaders are the same
    (xb, yb) = next(iter(trainer.train_dataloader))
    assert torch.eq(xa, xb).all()
    assert torch.eq(ya, yb).all()

    trainer = Trainer(overfit_batches=0.11)
    trainer.reset_train_dataloader(model)
    # The dataloader should have been overwritten with a Sequential sampler.
    assert trainer.train_dataloader is not train_loader
    assert trainer.num_training_batches == num_train_samples

    # make sure the loaders are the same
    (xb, yb) = next(iter(trainer.train_dataloader))
    assert torch.eq(xa, xb).all()
    assert torch.eq(ya, yb).all()

    # ------------------------------------------------------
    # run tests for both val and test
    # ------------------------------------------------------
    for split in ['val', 'test']:

        # ------------------------------------------------------
        # test overfit_batches as percent
        # ------------------------------------------------------
        loader_num_batches, dataloaders = Trainer(
            overfit_batches=0.11)._reset_eval_dataloader(model, split)
        assert loader_num_batches[0] == num_train_samples

        # make sure we turned off shuffle for the user
        assert isinstance(dataloaders[0].sampler, SequentialSampler)

        # make sure the loaders are the same
        (xb, yb) = next(iter(dataloaders[0]))
        assert torch.eq(xa, xb).all()
        assert torch.eq(ya, yb).all()

        # ------------------------------------------------------
        # test overfit_batches as int
        # ------------------------------------------------------
        loader_num_batches, dataloaders = Trainer(
            overfit_batches=1)._reset_eval_dataloader(model, split)
        assert loader_num_batches[0] == 1
        loader_num_batches, dataloaders = Trainer(
            overfit_batches=5)._reset_eval_dataloader(model, split)
        assert loader_num_batches[0] == 5

        # ------------------------------------------------------
        # test limit_xxx_batches as percent AND int
        # ------------------------------------------------------
        if split == 'val':
            loader_num_batches, dataloaders = Trainer(
                limit_val_batches=0.1)._reset_eval_dataloader(model, split)
            assert loader_num_batches[0] == int(0.1 * len(val_loader))

            loader_num_batches, dataloaders = Trainer(
                limit_val_batches=10)._reset_eval_dataloader(model, split)
            assert loader_num_batches[0] == 10
        else:
            loader_num_batches, dataloaders = Trainer(
                limit_test_batches=0.1)._reset_eval_dataloader(model, split)
            assert loader_num_batches[0] == int(0.1 * len(test_loader))

            loader_num_batches, dataloaders = Trainer(
                limit_test_batches=10)._reset_eval_dataloader(model, split)
            assert loader_num_batches[0] == 10
コード例 #18
0
def test_wrong_test_settigs(tmpdir):
    """ Test the following cases related to test configuration of model:
        * error if `test_dataloader()` is overridden but `test_step()` is not
        * if both `test_dataloader()` and `test_step()` is overridden,
            throw warning if `test_epoch_end()` is not defined
        * error if `test_step()` is overridden but `test_dataloader()` is not
    """
    hparams = EvalModelTemplate.get_default_hparams()
    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1)

    # ----------------
    # if have test_dataloader should  have test_step
    # ----------------
    with pytest.raises(MisconfigurationException):
        model = EvalModelTemplate(hparams)
        model.test_step = None
        trainer.fit(model)

    # ----------------
    # if have test_dataloader  and  test_step recommend test_epoch_end
    # ----------------
    with pytest.warns(RuntimeWarning):
        model = EvalModelTemplate(hparams)
        model.test_epoch_end = None
        trainer.test(model)

    # ----------------
    # if have test_step and NO test_dataloader passed in tell user to pass test_dataloader
    # ----------------
    with pytest.raises(MisconfigurationException):
        model = EvalModelTemplate(hparams)
        model.test_dataloader = LightningModule.test_dataloader
        trainer.test(model)

    # ----------------
    # if have test_dataloader and NO test_step tell user to implement  test_step
    # ----------------
    with pytest.raises(MisconfigurationException):
        model = EvalModelTemplate(hparams)
        model.test_dataloader = LightningModule.test_dataloader
        model.test_step = None
        trainer.test(model, test_dataloaders=model.dataloader(train=False))

    # ----------------
    # if have test_dataloader and test_step but no test_epoch_end warn user
    # ----------------
    with pytest.warns(RuntimeWarning):
        model = EvalModelTemplate(hparams)
        model.test_dataloader = LightningModule.test_dataloader
        model.test_epoch_end = None
        trainer.test(model, test_dataloaders=model.dataloader(train=False))

    # ----------------
    # if we are just testing, no need for train_dataloader, train_step, val_dataloader, and val_step
    # ----------------
    model = EvalModelTemplate(hparams)
    model.test_dataloader = LightningModule.test_dataloader
    model.train_dataloader = None
    model.train_step = None
    model.val_dataloader = None
    model.val_step = None
    trainer.test(model, test_dataloaders=model.dataloader(train=False))
コード例 #19
0
def test_dataloaders_with_limit_num_batches(tmpdir, limit_train_batches,
                                            limit_val_batches,
                                            limit_test_batches):
    """Verify num_batches for train, val & test dataloaders passed with batch limit as number"""

    model = EvalModelTemplate()
    model.val_dataloader = model.val_dataloader__multiple_mixed_length
    model.test_dataloader = model.test_dataloader__multiple_mixed_length
    model.validation_step = model.validation_step__multiple_dataloaders
    model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders
    model.test_step = model.test_step__multiple_dataloaders
    model.test_epoch_end = model.test_epoch_end__multiple_dataloaders

    # train, multiple val and multiple test passed with percent_check
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        limit_train_batches=limit_train_batches,
        limit_val_batches=limit_val_batches,
        limit_test_batches=limit_test_batches,
    )
    trainer.fit(model)

    # -------------------------------------------
    # MAKE SURE THE TRAINER SET THE CORRECT VALUES
    # -------------------------------------------
    assert trainer.num_training_batches == limit_train_batches
    assert trainer.num_val_batches == [limit_val_batches] * len(
        trainer.val_dataloaders)
    trainer.test(ckpt_path=None)

    # when the limit is greater than the number of test batches it should be the num in loaders
    test_dataloader_lengths = [len(x) for x in model.test_dataloader()]
    if limit_test_batches > 1e10:
        assert trainer.num_test_batches == test_dataloader_lengths
    else:
        assert trainer.num_test_batches == [limit_test_batches] * len(
            trainer.test_dataloaders)

    # -------------------------------------------
    # make sure we actually saw the expected num of batches
    # -------------------------------------------
    num_val_dataloaders = len(model.val_dataloader())
    num_test_dataloaders = len(model.test_dataloader())
    if limit_train_batches > 0:

        # make sure val batches are as expected
        assert len(trainer.dev_debugger.num_seen_val_check_batches
                   ) == num_val_dataloaders
        for dataloader_idx, num_batches in trainer.dev_debugger.num_seen_val_check_batches.items(
        ):
            assert num_batches == limit_val_batches

        # make sure test batches are as expected
        assert len(trainer.dev_debugger.num_seen_test_check_batches
                   ) == num_test_dataloaders
        for dataloader_idx, num_batches in trainer.dev_debugger.num_seen_test_check_batches.items(
        ):
            if limit_test_batches > 1e10:
                assert num_batches == test_dataloader_lengths[dataloader_idx]
            else:
                assert num_batches == limit_test_batches