コード例 #1
0
def test_warning_with_iterable_dataset_and_len(tmpdir):
    """ Tests that a warning messages is shown when an IterableDataset defines `__len__`. """
    model = EvalModelTemplate()
    original_dataset = model.train_dataloader().dataset

    class IterableWithLen(IterableDataset):
        def __iter__(self):
            return iter(original_dataset)

        def __len__(self):
            return len(original_dataset)

    dataloader = DataLoader(IterableWithLen(), batch_size=16)
    assert _has_len(dataloader)
    assert _has_iterable_dataset(dataloader)
    trainer = Trainer(
        default_root_dir=tmpdir,
        max_steps=3,
    )
    with pytest.warns(UserWarning,
                      match='Your `IterableDataset` has `__len__` defined.'):
        trainer.fit(model,
                    train_dataloader=dataloader,
                    val_dataloaders=[dataloader])
    with pytest.warns(UserWarning,
                      match='Your `IterableDataset` has `__len__` defined.'):
        trainer.test(model, test_dataloaders=[dataloader])
コード例 #2
0
def test_fit_train_loader_only(tmpdir):
    model = EvalModelTemplate()
    train_dataloader = model.train_dataloader()

    model.train_dataloader = None
    model.val_dataloader = None
    model.test_dataloader = None

    model.validation_step = None
    model.validation_epoch_end = None

    model.test_step = None
    model.test_epoch_end = None

    trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir)
    trainer.fit(model, train_dataloader=train_dataloader)
コード例 #3
0
def test_inf_dataloaders_with_limit_percent_batches(tmpdir,
                                                    limit_train_batches,
                                                    limit_val_batches,
                                                    limit_test_batches):
    """Verify inf train, val & test dataloaders (e.g. IterableDataset) passed with batch limit in percent"""
    model = EvalModelTemplate()
    model.train_dataloader = model.train_dataloader__infinite
    model.val_dataloader = model.val_dataloader__infinite
    model.test_dataloader = model.test_dataloader__infinite

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        limit_train_batches=limit_train_batches,
        limit_val_batches=limit_val_batches,
        limit_test_batches=limit_test_batches,
    )

    trainer.fit(model)
    assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
    assert trainer.num_training_batches == (0 if limit_train_batches == 0.0
                                            else float('inf'))
    assert trainer.num_val_batches[0] == (0 if limit_val_batches == 0.0 else
                                          float('inf'))

    trainer.test(ckpt_path=None)
    assert trainer.num_test_batches[0] == (0 if limit_test_batches == 0.0 else
                                           float('inf'))
コード例 #4
0
def test_train_inf_dataloader_error(tmpdir):
    """Test inf train data loader (e.g. IterableDataset)"""
    model = EvalModelTemplate()
    model.train_dataloader = model.train_dataloader__infinite

    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, val_check_interval=0.5)

    with pytest.raises(MisconfigurationException, match='infinite DataLoader'):
        trainer.fit(model)
コード例 #5
0
def test_train_dataloader_not_implemented_error_failed(tmpdir):
    """Test not_implemented_error train data loader (e.g. IterableDataset)"""
    model = EvalModelTemplate()
    model.train_dataloader = model.train_dataloader__not_implemented_error

    trainer = Trainer(default_root_dir=tmpdir, max_steps=5, max_epochs=1, val_check_interval=0.5)

    with pytest.raises(MisconfigurationException, match='using an IterableDataset'):
        trainer.fit(model)
コード例 #6
0
def test_dataloaders_passed_to_fit(tmpdir):
    """Test if dataloaders passed to trainer works on TPU"""

    model = EvalModelTemplate()

    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, tpu_cores=8)
    result = trainer.fit(model,
                         train_dataloader=model.train_dataloader(),
                         val_dataloaders=model.val_dataloader())
    assert result, "TPU doesn't work with dataloaders passed to fit()."
コード例 #7
0
def test_train_dataloader_not_implemented_error(tmpdir, check_interval):
    """Test not_implemented_error train data loader (e.g. IterableDataset)"""

    model = EvalModelTemplate()
    model.train_dataloader = model.train_dataloader__not_implemented_error
    model.val_dataloader = model.val_dataloader__not_implemented_error

    trainer = Trainer(default_root_dir=tmpdir, max_steps=5, max_epochs=1, val_check_interval=check_interval)
    trainer.fit(model)
    # verify training completed
    assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
コード例 #8
0
def test_error_on_zero_len_dataloader(tmpdir):
    """ Test that error is raised if a zero-length dataloader is defined """

    model = EvalModelTemplate()
    model.train_dataloader = model.train_dataloader__zero_length

    # fit model
    with pytest.raises(ValueError):
        trainer = Trainer(default_root_dir=tmpdir,
                          max_epochs=1,
                          test_percent_check=0.5)
        trainer.fit(model)
コード例 #9
0
def test_not_implemented_error_train_dataloader(tmpdir, check_interval):
    """Test not_implemented_error train data loader (e.g. IterableDataset)"""

    model = EvalModelTemplate()
    model.train_dataloader = model.train_dataloader__not_implemented_error

    trainer = Trainer(default_root_dir=tmpdir,
                      max_epochs=1,
                      val_check_interval=check_interval)
    result = trainer.fit(model)
    # verify training completed
    assert result == 1
コード例 #10
0
def _init_steps_model():
    """private method for initializing a model with 5% train epochs"""
    model = EvalModelTemplate()

    # define train epoch to 5% of data
    train_percent = 0.5
    # get number of samples in 1 epoch
    num_train_samples = math.floor(
        len(model.train_dataloader()) * train_percent)

    trainer_options = dict(train_percent_check=train_percent, )
    return model, trainer_options, num_train_samples
コード例 #11
0
def test_inf_train_dataloader(tmpdir, check_interval):
    pytest.skip('TODO: fix speed of this test')
    """Test inf train data loader (e.g. IterableDataset)"""

    model = EvalModelTemplate()
    model.train_dataloader = model.train_dataloader__infinite

    trainer = Trainer(default_root_dir=tmpdir,
                      max_epochs=1,
                      val_check_interval=check_interval)
    result = trainer.fit(model)
    # verify training completed
    assert result == 1
コード例 #12
0
def test_inf_train_dataloader(tmpdir, check_interval):
    """Test inf train data loader (e.g. IterableDataset)"""

    model = EvalModelTemplate(tutils.get_default_hparams())
    model.train_dataloader = model.train_dataloader__infinite

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        train_check_interval=check_interval,
    )
    result = trainer.fit(model)
    # verify training completed
    assert result == 1
コード例 #13
0
def test_dataloaders_load_every_epoch(tmpdir):
    os.environ['PL_DEV_DEBUG'] = '1'

    model = EvalModelTemplate()
    train_loader = model.train_dataloader()
    model.train_dataloader = None
    val_loader = model.val_dataloader()
    model.val_dataloader = None
    test_loader = model.test_dataloader()
    model.test_dataloader = None

    # logger file to get meta
    trainer = Trainer(
        default_root_dir=tmpdir,
        limit_train_batches=0.3,
        limit_val_batches=0.3,
        reload_dataloaders_every_epoch=True,
        max_epochs=3,
    )
    result = trainer.fit(model, train_loader, val_loader)

    trainer.test(test_dataloaders=test_loader)

    assert len(trainer.dev_debugger.val_dataloader_calls) == 4
    assert len(trainer.dev_debugger.train_dataloader_calls) == 3
    assert len(trainer.dev_debugger.test_dataloader_calls) == 1

    # verify the sequence
    calls = trainer.dev_debugger.dataloader_sequence_calls
    expected_sequence = [
        'val_dataloader', 'train_dataloader', 'val_dataloader',
        'train_dataloader', 'val_dataloader', 'train_dataloader',
        'val_dataloader', 'test_dataloader'
    ]
    for call, expected in zip(calls, expected_sequence):
        assert call['name'] == expected
コード例 #14
0
def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
    """Make sure DDP works with dataloaders passed to fit()"""
    tutils.set_random_master_port()

    model = EvalModelTemplate()
    fit_options = dict(train_dataloader=model.train_dataloader(),
                       val_dataloaders=model.val_dataloader())

    trainer = Trainer(default_root_dir=tmpdir,
                      progress_bar_refresh_rate=0,
                      max_epochs=1,
                      limit_train_batches=0.2,
                      limit_val_batches=0.2,
                      gpus=[0, 1],
                      distributed_backend='ddp_spawn')
    result = trainer.fit(model, **fit_options)
    assert result == 1, "DDP doesn't work with dataloaders passed to fit()."
コード例 #15
0
def test_model_tpu_cores_8(tmpdir):
    """Make sure model trains on TPU."""
    trainer_options = dict(
        default_root_dir=tmpdir,
        progress_bar_refresh_rate=0,
        max_epochs=1,
        tpu_cores=8,
        limit_train_batches=0.4,
        limit_val_batches=0.4,
    )

    model = EvalModelTemplate()
    # 8 cores needs a big dataset
    model.train_dataloader = _serial_train_loader
    model.val_dataloader = _serial_train_loader

    tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
コード例 #16
0
def test_fit_multiple_train_loaders(tmpdir, multiple_trainloader_mode, num_training_batches):
    """Integration test for multple train loaders"""
    model = EvalModelTemplate()

    model.train_dataloader = model.train_dataloader__multiple_mapping
    # todo: add also `train_dataloader__multiple_sequence`
    model.training_step = model.training_step__multiple_dataloaders

    trainer = Trainer(
        max_epochs=1,
        default_root_dir=tmpdir,
        multiple_trainloader_mode=multiple_trainloader_mode,
    )

    assert 1 == trainer.fit(model)
    # verify the num_training_batches according to the multiple_trainloader_mode
    assert num_training_batches == trainer.num_training_batches
コード例 #17
0
def test_wrong_train_setting(tmpdir):
    """
    * Test that an error is thrown when no `train_dataloader()` is defined
    * Test that an error is thrown when no `training_step()` is defined
    """
    tutils.reset_seed()
    hparams = EvalModelTemplate.get_default_hparams()
    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1)

    with pytest.raises(MisconfigurationException):
        model = EvalModelTemplate(**hparams)
        model.train_dataloader = None
        trainer.fit(model)

    with pytest.raises(MisconfigurationException):
        model = EvalModelTemplate(**hparams)
        model.training_step = None
        trainer.fit(model)
コード例 #18
0
def test_base_tpu_model_8(tmpdir):
    """Make sure model trains on TPU."""
    trainer_options = dict(
        default_root_dir=tmpdir,
        progress_bar_refresh_rate=0,
        max_epochs=1,
        tpu_cores=8,
        limit_train_batches=0.4,
        limit_val_batches=0.4
    )

    model = EvalModelTemplate()

    # 8 cores needs a big dataset
    def long_train_loader():
        dataset = DataLoader(TrialMNIST(download=True, num_samples=15000, digits=(0, 1, 2, 5, 8)), batch_size=32)
        return dataset
    model.train_dataloader = long_train_loader
    model.val_dataloader = long_train_loader

    tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
コード例 #19
0
def test_inf_dataloaders_with_limit_num_batches(tmpdir, limit_train_batches, limit_val_batches, limit_test_batches):
    """Verify inf train, val & test dataloaders (e.g. IterableDataset) passed with batch limit as number"""
    model = EvalModelTemplate()
    model.train_dataloader = model.train_dataloader__infinite
    model.val_dataloader = model.val_dataloader__infinite
    model.test_dataloader = model.test_dataloader__infinite

    trainer = Trainer(
        default_root_dir=tmpdir,
        max_epochs=1,
        limit_train_batches=limit_train_batches,
        limit_val_batches=limit_val_batches,
        limit_test_batches=limit_test_batches,
    )

    results = trainer.fit(model)
    assert results
    assert trainer.num_training_batches == limit_train_batches
    assert trainer.num_val_batches[0] == limit_val_batches

    trainer.test(ckpt_path=None)
    assert trainer.num_test_batches[0] == limit_test_batches
コード例 #20
0
def test_base_tpu_16bit_model_8_cores(tmpdir):
    """Make sure model trains on TPU."""
    trainer_options = dict(
        default_root_dir=tmpdir,
        precision=16,
        progress_bar_refresh_rate=0,
        max_epochs=1,
        tpu_cores=8,
        limit_train_batches=0.4,
        limit_val_batches=0.4
    )

    model = EvalModelTemplate()

    # 8 cores needs a big dataset
    def long_train_loader():
        dataset = DataLoader(TrialMNIST(download=True, num_samples=15000, digits=(0, 1, 2, 5, 8)), batch_size=32)
        return dataset
    model.train_dataloader = long_train_loader
    model.val_dataloader = long_train_loader

    tpipes.run_model_test(trainer_options, model, on_gpu=False)
    assert os.environ.get('XLA_USE_BF16') == str(1), "XLA_USE_BF16 was not set in environment variables"
コード例 #21
0
def test_overfit_batch_limits(tmpdir):
    # ------------------------------------------------------
    # Make sure shuffle is correct across loaders initially
    # ------------------------------------------------------
    model = EvalModelTemplate()
    model.train_dataloader()

    # original train loader which should be replaced in all methods
    train_loader = model.train_dataloader()

    # make sure the val and tests are not shuffled
    assert isinstance(train_loader.sampler, RandomSampler)
    assert isinstance(model.val_dataloader().sampler, SequentialSampler)
    assert isinstance(model.test_dataloader().sampler, SequentialSampler)

    # ------------------------------------------------------
    # get the training loader and batch
    # ------------------------------------------------------
    # Create a reference train dataloader without shuffling.
    train_loader = DataLoader(model.train_dataloader().dataset, shuffle=False)
    (xa, ya) = next(iter(train_loader))
    train_loader = DataLoader(model.train_dataloader().dataset, shuffle=True)
    full_train_samples = len(train_loader)
    num_train_samples = int(0.11 * full_train_samples)

    # ------------------------------------------------------
    # set VAL and Test loaders
    # ------------------------------------------------------
    val_loader = DataLoader(model.val_dataloader().dataset, shuffle=False)
    test_loader = DataLoader(model.test_dataloader().dataset, shuffle=False)

    # set the model loaders
    model.train_dataloader = lambda: train_loader
    model.val_dataloader = lambda: val_loader
    model.test_dataloader = lambda: test_loader

    # ------------------------------------------------------
    # test train loader applies correct limits
    # ------------------------------------------------------
    trainer = Trainer(overfit_batches=4)
    trainer.reset_train_dataloader(model)
    assert trainer.num_training_batches == 4

    # make sure the loaders are the same
    (xb, yb) = next(iter(trainer.train_dataloader))
    assert torch.eq(xa, xb).all()
    assert torch.eq(ya, yb).all()

    trainer = Trainer(overfit_batches=0.11)
    trainer.reset_train_dataloader(model)
    # The dataloader should have been overwritten with a Sequential sampler.
    assert trainer.train_dataloader is not train_loader
    assert trainer.num_training_batches == num_train_samples

    # make sure the loaders are the same
    (xb, yb) = next(iter(trainer.train_dataloader))
    assert torch.eq(xa, xb).all()
    assert torch.eq(ya, yb).all()

    # ------------------------------------------------------
    # run tests for both val and test
    # ------------------------------------------------------
    for split in ['val', 'test']:

        # ------------------------------------------------------
        # test overfit_batches as percent
        # ------------------------------------------------------
        loader_num_batches, dataloaders = Trainer(
            overfit_batches=0.11)._reset_eval_dataloader(model, split)
        assert loader_num_batches[0] == num_train_samples

        # make sure we turned off shuffle for the user
        assert isinstance(dataloaders[0].sampler, SequentialSampler)

        # make sure the loaders are the same
        (xb, yb) = next(iter(dataloaders[0]))
        assert torch.eq(xa, xb).all()
        assert torch.eq(ya, yb).all()

        # ------------------------------------------------------
        # test overfit_batches as int
        # ------------------------------------------------------
        loader_num_batches, dataloaders = Trainer(
            overfit_batches=1)._reset_eval_dataloader(model, split)
        assert loader_num_batches[0] == 1
        loader_num_batches, dataloaders = Trainer(
            overfit_batches=5)._reset_eval_dataloader(model, split)
        assert loader_num_batches[0] == 5

        # ------------------------------------------------------
        # test limit_xxx_batches as percent AND int
        # ------------------------------------------------------
        if split == 'val':
            loader_num_batches, dataloaders = Trainer(
                limit_val_batches=0.1)._reset_eval_dataloader(model, split)
            assert loader_num_batches[0] == int(0.1 * len(val_loader))

            loader_num_batches, dataloaders = Trainer(
                limit_val_batches=10)._reset_eval_dataloader(model, split)
            assert loader_num_batches[0] == 10
        else:
            loader_num_batches, dataloaders = Trainer(
                limit_test_batches=0.1)._reset_eval_dataloader(model, split)
            assert loader_num_batches[0] == int(0.1 * len(test_loader))

            loader_num_batches, dataloaders = Trainer(
                limit_test_batches=10)._reset_eval_dataloader(model, split)
            assert loader_num_batches[0] == 10
コード例 #22
0
def test_wrong_test_settigs(tmpdir):
    """ Test the following cases related to test configuration of model:
        * error if `test_dataloader()` is overridden but `test_step()` is not
        * if both `test_dataloader()` and `test_step()` is overridden,
            throw warning if `test_epoch_end()` is not defined
        * error if `test_step()` is overridden but `test_dataloader()` is not
    """
    hparams = EvalModelTemplate.get_default_hparams()
    trainer = Trainer(default_root_dir=tmpdir, max_epochs=1)

    # ----------------
    # if have test_dataloader should  have test_step
    # ----------------
    with pytest.raises(MisconfigurationException):
        model = EvalModelTemplate(hparams)
        model.test_step = None
        trainer.fit(model)

    # ----------------
    # if have test_dataloader  and  test_step recommend test_epoch_end
    # ----------------
    with pytest.warns(RuntimeWarning):
        model = EvalModelTemplate(hparams)
        model.test_epoch_end = None
        trainer.test(model)

    # ----------------
    # if have test_step and NO test_dataloader passed in tell user to pass test_dataloader
    # ----------------
    with pytest.raises(MisconfigurationException):
        model = EvalModelTemplate(hparams)
        model.test_dataloader = LightningModule.test_dataloader
        trainer.test(model)

    # ----------------
    # if have test_dataloader and NO test_step tell user to implement  test_step
    # ----------------
    with pytest.raises(MisconfigurationException):
        model = EvalModelTemplate(hparams)
        model.test_dataloader = LightningModule.test_dataloader
        model.test_step = None
        trainer.test(model, test_dataloaders=model.dataloader(train=False))

    # ----------------
    # if have test_dataloader and test_step but no test_epoch_end warn user
    # ----------------
    with pytest.warns(RuntimeWarning):
        model = EvalModelTemplate(hparams)
        model.test_dataloader = LightningModule.test_dataloader
        model.test_epoch_end = None
        trainer.test(model, test_dataloaders=model.dataloader(train=False))

    # ----------------
    # if we are just testing, no need for train_dataloader, train_step, val_dataloader, and val_step
    # ----------------
    model = EvalModelTemplate(hparams)
    model.test_dataloader = LightningModule.test_dataloader
    model.train_dataloader = None
    model.train_step = None
    model.val_dataloader = None
    model.val_step = None
    trainer.test(model, test_dataloaders=model.dataloader(train=False))