def test_fit_train_loader_only(tmpdir): model = EvalModelTemplate() train_dataloader = model.train_dataloader() model.train_dataloader = None model.val_dataloader = None model.test_dataloader = None model.validation_step = None model.validation_epoch_end = None model.test_step = None model.test_epoch_end = None trainer = Trainer(fast_dev_run=True, default_root_dir=tmpdir) trainer.fit(model, train_dataloader=train_dataloader)
def test_inf_val_dataloader(tmpdir, check_interval): """Test inf val data loader (e.g. IterableDataset)""" model = EvalModelTemplate() model.val_dataloader = model.val_dataloader__infinite # logger file to get meta trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, val_check_interval=check_interval, ) result = trainer.fit(model) # verify training completed assert result == 1
def test_dataloaders_passed_to_fit(tmpdir): """Test if dataloaders passed to trainer works on TPU""" model = EvalModelTemplate() trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, tpu_cores=8, ) result = trainer.fit( model, train_dataloader=model.train_dataloader(), val_dataloaders=model.val_dataloader(), ) assert result, "TPU doesn't work with dataloaders passed to fit()."
def test_val_dataloader_not_implemented_error(tmpdir, check_interval): """Test not_implemented_error data loader (e.g. IterableDataset)""" model = EvalModelTemplate() model.val_dataloader = model.val_dataloader__not_implemented_error # logger file to get meta trainer = Trainer( default_root_dir=tmpdir, max_steps=5, max_epochs=1, val_check_interval=check_interval, ) trainer.fit(model) # verify training completed assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}"
def test_train_dataloader_not_implemented_error(tmpdir, check_interval): """Test not_implemented_error train data loader (e.g. IterableDataset)""" model = EvalModelTemplate() model.train_dataloader = model.train_dataloader__not_implemented_error model.val_dataloader = model.val_dataloader__not_implemented_error trainer = Trainer( default_root_dir=tmpdir, max_steps=5, max_epochs=1, val_check_interval=check_interval ) result = trainer.fit(model) # verify training completed assert result == 1
def test_ddp_all_dataloaders_passed_to_fit(tmpdir): """Make sure DDP works with dataloaders passed to fit()""" tutils.set_random_master_port() model = EvalModelTemplate() fit_options = dict(train_dataloader=model.train_dataloader(), val_dataloaders=model.val_dataloader()) trainer = Trainer(default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, limit_train_batches=0.2, limit_val_batches=0.2, gpus=[0, 1], distributed_backend='ddp_spawn') result = trainer.fit(model, **fit_options) assert result == 1, "DDP doesn't work with dataloaders passed to fit()."
def test_model_tpu_cores_8(tmpdir): """Make sure model trains on TPU.""" trainer_options = dict( default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, tpu_cores=8, limit_train_batches=0.4, limit_val_batches=0.4, ) model = EvalModelTemplate() # 8 cores needs a big dataset model.train_dataloader = _serial_train_loader model.val_dataloader = _serial_train_loader tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
def test_val_loop_config(tmpdir): """" When either val loop or val data are missing raise warning """ tutils.reset_seed() hparams = EvalModelTemplate.get_default_hparams() trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) # no val data has val loop with pytest.warns(UserWarning): model = EvalModelTemplate(**hparams) model.validation_step = None trainer.fit(model) # has val loop but no val data with pytest.warns(UserWarning): model = EvalModelTemplate(**hparams) model.val_dataloader = None trainer.fit(model)
def test_base_tpu_model_8(tmpdir): """Make sure model trains on TPU.""" trainer_options = dict( default_root_dir=tmpdir, progress_bar_refresh_rate=0, max_epochs=1, tpu_cores=8, limit_train_batches=0.4, limit_val_batches=0.4 ) model = EvalModelTemplate() # 8 cores needs a big dataset def long_train_loader(): dataset = DataLoader(TrialMNIST(download=True, num_samples=15000, digits=(0, 1, 2, 5, 8)), batch_size=32) return dataset model.train_dataloader = long_train_loader model.val_dataloader = long_train_loader tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
def test_inf_dataloaders_with_limit_percent_batches(tmpdir, limit_train_batches, limit_val_batches, limit_test_batches): """Verify inf train, val & test dataloaders (e.g. IterableDataset) passed with batch limit in percent""" model = EvalModelTemplate() model.train_dataloader = model.train_dataloader__infinite model.val_dataloader = model.val_dataloader__infinite model.test_dataloader = model.test_dataloader__infinite trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_train_batches=limit_train_batches, limit_val_batches=limit_val_batches, limit_test_batches=limit_test_batches, ) results = trainer.fit(model) assert results == 1 assert trainer.num_training_batches == (0 if limit_train_batches == 0.0 else float('inf')) assert trainer.num_val_batches[0] == (0 if limit_val_batches == 0.0 else float('inf')) trainer.test(ckpt_path=None) assert trainer.num_test_batches[0] == (0 if limit_test_batches == 0.0 else float('inf'))
def test_inf_dataloaders_with_limit_num_batches(tmpdir, limit_train_batches, limit_val_batches, limit_test_batches): """Verify inf train, val & test dataloaders (e.g. IterableDataset) passed with batch limit as number""" model = EvalModelTemplate() model.train_dataloader = model.train_dataloader__infinite model.val_dataloader = model.val_dataloader__infinite model.test_dataloader = model.test_dataloader__infinite trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_train_batches=limit_train_batches, limit_val_batches=limit_val_batches, limit_test_batches=limit_test_batches, ) trainer.fit(model) assert trainer.state == TrainerState.FINISHED, f"Training failed with {trainer.state}" assert trainer.num_training_batches == limit_train_batches assert trainer.num_val_batches[0] == limit_val_batches trainer.test(ckpt_path=None) assert trainer.num_test_batches[0] == limit_test_batches
def test_benchmark_option(tmpdir): """Verify benchmark option.""" model = EvalModelTemplate() model.val_dataloader = model.val_dataloader__multiple # verify torch.backends.cudnn.benchmark is not turned on assert not torch.backends.cudnn.benchmark # fit model trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, benchmark=True, ) result = trainer.fit(model) # verify training completed assert result == 1 # verify torch.backends.cudnn.benchmark is not turned off assert torch.backends.cudnn.benchmark
def test_base_tpu_16bit_model_8_cores(tmpdir): """Make sure model trains on TPU.""" trainer_options = dict( default_root_dir=tmpdir, precision=16, progress_bar_refresh_rate=0, max_epochs=1, tpu_cores=8, limit_train_batches=0.4, limit_val_batches=0.4 ) model = EvalModelTemplate() # 8 cores needs a big dataset def long_train_loader(): dataset = DataLoader(TrialMNIST(download=True, num_samples=15000, digits=(0, 1, 2, 5, 8)), batch_size=32) return dataset model.train_dataloader = long_train_loader model.val_dataloader = long_train_loader tpipes.run_model_test(trainer_options, model, on_gpu=False) assert os.environ.get('XLA_USE_BF16') == str(1), "XLA_USE_BF16 was not set in environment variables"
def test_dataloaders_with_limit_num_batches(tmpdir, limit_train_batches, limit_val_batches, limit_test_batches): """Verify num_batches for val & test dataloaders passed with batch limit as number""" model = EvalModelTemplate() model.val_dataloader = model.val_dataloader__multiple_mixed_length model.test_dataloader = model.test_dataloader__multiple_mixed_length model.validation_step = model.validation_step__multiple_dataloaders model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders model.test_step = model.test_step__multiple_dataloaders model.test_epoch_end = model.test_epoch_end__multiple_dataloaders # train, multiple val and multiple test passed with percent_check trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_train_batches=limit_train_batches, limit_val_batches=limit_val_batches, limit_test_batches=limit_test_batches, ) trainer.fit(model) assert trainer.num_training_batches == limit_train_batches assert trainer.num_val_batches == [limit_val_batches] * len(trainer.val_dataloaders) trainer.test(ckpt_path=None) assert trainer.num_test_batches == [limit_test_batches] * len(trainer.test_dataloaders)
def test_dataloaders_with_limit_percent_batches(tmpdir, limit_train_batches, limit_val_batches, limit_test_batches): """Verify num_batches for train, val & test dataloaders passed with batch limit in percent""" model = EvalModelTemplate() model.val_dataloader = model.val_dataloader__multiple_mixed_length model.test_dataloader = model.test_dataloader__multiple_mixed_length model.validation_step = model.validation_step__multiple_dataloaders model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders model.test_step = model.test_step__multiple_dataloaders model.test_epoch_end = model.test_epoch_end__multiple_dataloaders # train, multiple val and multiple test passed with percent_check trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_train_batches=limit_train_batches, limit_val_batches=limit_val_batches, limit_test_batches=limit_test_batches, ) trainer.fit(model) expected_train_batches = int( len(trainer.train_dataloader) * limit_train_batches) expected_val_batches = [ int(len(dataloader) * limit_val_batches) for dataloader in trainer.val_dataloaders ] assert trainer.num_training_batches == expected_train_batches assert trainer.num_val_batches == expected_val_batches trainer.test(ckpt_path=None) expected_test_batches = [ int(len(dataloader) * limit_test_batches) for dataloader in trainer.test_dataloaders ] assert trainer.num_test_batches == expected_test_batches
def test_full_train_loop_with_results_obj_dp(tmpdir): os.environ['PL_DEV_DEBUG'] = '1' batches = 10 epochs = 3 model = EvalModelTemplate() model.validation_step = None model.test_step = None model.training_step = model.training_step_full_loop_result_obj_dp model.training_step_end = model.training_step_end_full_loop_result_obj_dp model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp model.val_dataloader = None model.test_dataloader = None trainer = Trainer( default_root_dir=tmpdir, distributed_backend='dp', gpus=[0, 1], max_epochs=epochs, early_stop_callback=True, log_every_n_steps=2, limit_train_batches=batches, weights_summary=None, ) trainer.fit(model) # make sure we saw all the correct keys seen_keys = set() for metric in trainer.dev_debugger.logged_metrics: seen_keys.update(metric.keys()) assert 'train_step_metric' in seen_keys assert 'train_step_end_metric' in seen_keys assert 'train_epoch_end_metric_epoch' in seen_keys
def test_overfit_batch_limits(tmpdir): # ------------------------------------------------------ # Make sure shuffle is correct across loaders initially # ------------------------------------------------------ model = EvalModelTemplate() model.train_dataloader() # original train loader which should be replaced in all methods train_loader = model.train_dataloader() # make sure the val and tests are not shuffled assert isinstance(train_loader.sampler, RandomSampler) assert isinstance(model.val_dataloader().sampler, SequentialSampler) assert isinstance(model.test_dataloader().sampler, SequentialSampler) # ------------------------------------------------------ # get the training loader and batch # ------------------------------------------------------ # Create a reference train dataloader without shuffling. train_loader = DataLoader(model.train_dataloader().dataset, shuffle=False) (xa, ya) = next(iter(train_loader)) train_loader = DataLoader(model.train_dataloader().dataset, shuffle=True) full_train_samples = len(train_loader) num_train_samples = int(0.11 * full_train_samples) # ------------------------------------------------------ # set VAL and Test loaders # ------------------------------------------------------ val_loader = DataLoader(model.val_dataloader().dataset, shuffle=False) test_loader = DataLoader(model.test_dataloader().dataset, shuffle=False) # set the model loaders model.train_dataloader = lambda: train_loader model.val_dataloader = lambda: val_loader model.test_dataloader = lambda: test_loader # ------------------------------------------------------ # test train loader applies correct limits # ------------------------------------------------------ trainer = Trainer(overfit_batches=4) trainer.reset_train_dataloader(model) assert trainer.num_training_batches == 4 # make sure the loaders are the same (xb, yb) = next(iter(trainer.train_dataloader)) assert torch.eq(xa, xb).all() assert torch.eq(ya, yb).all() trainer = Trainer(overfit_batches=0.11) trainer.reset_train_dataloader(model) # The dataloader should have been overwritten with a Sequential sampler. assert trainer.train_dataloader is not train_loader assert trainer.num_training_batches == num_train_samples # make sure the loaders are the same (xb, yb) = next(iter(trainer.train_dataloader)) assert torch.eq(xa, xb).all() assert torch.eq(ya, yb).all() # ------------------------------------------------------ # run tests for both val and test # ------------------------------------------------------ for split in ['val', 'test']: # ------------------------------------------------------ # test overfit_batches as percent # ------------------------------------------------------ loader_num_batches, dataloaders = Trainer( overfit_batches=0.11)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == num_train_samples # make sure we turned off shuffle for the user assert isinstance(dataloaders[0].sampler, SequentialSampler) # make sure the loaders are the same (xb, yb) = next(iter(dataloaders[0])) assert torch.eq(xa, xb).all() assert torch.eq(ya, yb).all() # ------------------------------------------------------ # test overfit_batches as int # ------------------------------------------------------ loader_num_batches, dataloaders = Trainer( overfit_batches=1)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == 1 loader_num_batches, dataloaders = Trainer( overfit_batches=5)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == 5 # ------------------------------------------------------ # test limit_xxx_batches as percent AND int # ------------------------------------------------------ if split == 'val': loader_num_batches, dataloaders = Trainer( limit_val_batches=0.1)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == int(0.1 * len(val_loader)) loader_num_batches, dataloaders = Trainer( limit_val_batches=10)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == 10 else: loader_num_batches, dataloaders = Trainer( limit_test_batches=0.1)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == int(0.1 * len(test_loader)) loader_num_batches, dataloaders = Trainer( limit_test_batches=10)._reset_eval_dataloader(model, split) assert loader_num_batches[0] == 10
def test_wrong_test_settigs(tmpdir): """ Test the following cases related to test configuration of model: * error if `test_dataloader()` is overridden but `test_step()` is not * if both `test_dataloader()` and `test_step()` is overridden, throw warning if `test_epoch_end()` is not defined * error if `test_step()` is overridden but `test_dataloader()` is not """ hparams = EvalModelTemplate.get_default_hparams() trainer = Trainer(default_root_dir=tmpdir, max_epochs=1) # ---------------- # if have test_dataloader should have test_step # ---------------- with pytest.raises(MisconfigurationException): model = EvalModelTemplate(hparams) model.test_step = None trainer.fit(model) # ---------------- # if have test_dataloader and test_step recommend test_epoch_end # ---------------- with pytest.warns(RuntimeWarning): model = EvalModelTemplate(hparams) model.test_epoch_end = None trainer.test(model) # ---------------- # if have test_step and NO test_dataloader passed in tell user to pass test_dataloader # ---------------- with pytest.raises(MisconfigurationException): model = EvalModelTemplate(hparams) model.test_dataloader = LightningModule.test_dataloader trainer.test(model) # ---------------- # if have test_dataloader and NO test_step tell user to implement test_step # ---------------- with pytest.raises(MisconfigurationException): model = EvalModelTemplate(hparams) model.test_dataloader = LightningModule.test_dataloader model.test_step = None trainer.test(model, test_dataloaders=model.dataloader(train=False)) # ---------------- # if have test_dataloader and test_step but no test_epoch_end warn user # ---------------- with pytest.warns(RuntimeWarning): model = EvalModelTemplate(hparams) model.test_dataloader = LightningModule.test_dataloader model.test_epoch_end = None trainer.test(model, test_dataloaders=model.dataloader(train=False)) # ---------------- # if we are just testing, no need for train_dataloader, train_step, val_dataloader, and val_step # ---------------- model = EvalModelTemplate(hparams) model.test_dataloader = LightningModule.test_dataloader model.train_dataloader = None model.train_step = None model.val_dataloader = None model.val_step = None trainer.test(model, test_dataloaders=model.dataloader(train=False))
def test_dataloaders_with_limit_num_batches(tmpdir, limit_train_batches, limit_val_batches, limit_test_batches): """Verify num_batches for train, val & test dataloaders passed with batch limit as number""" model = EvalModelTemplate() model.val_dataloader = model.val_dataloader__multiple_mixed_length model.test_dataloader = model.test_dataloader__multiple_mixed_length model.validation_step = model.validation_step__multiple_dataloaders model.validation_epoch_end = model.validation_epoch_end__multiple_dataloaders model.test_step = model.test_step__multiple_dataloaders model.test_epoch_end = model.test_epoch_end__multiple_dataloaders # train, multiple val and multiple test passed with percent_check trainer = Trainer( default_root_dir=tmpdir, max_epochs=1, limit_train_batches=limit_train_batches, limit_val_batches=limit_val_batches, limit_test_batches=limit_test_batches, ) trainer.fit(model) # ------------------------------------------- # MAKE SURE THE TRAINER SET THE CORRECT VALUES # ------------------------------------------- assert trainer.num_training_batches == limit_train_batches assert trainer.num_val_batches == [limit_val_batches] * len( trainer.val_dataloaders) trainer.test(ckpt_path=None) # when the limit is greater than the number of test batches it should be the num in loaders test_dataloader_lengths = [len(x) for x in model.test_dataloader()] if limit_test_batches > 1e10: assert trainer.num_test_batches == test_dataloader_lengths else: assert trainer.num_test_batches == [limit_test_batches] * len( trainer.test_dataloaders) # ------------------------------------------- # make sure we actually saw the expected num of batches # ------------------------------------------- num_val_dataloaders = len(model.val_dataloader()) num_test_dataloaders = len(model.test_dataloader()) if limit_train_batches > 0: # make sure val batches are as expected assert len(trainer.dev_debugger.num_seen_val_check_batches ) == num_val_dataloaders for dataloader_idx, num_batches in trainer.dev_debugger.num_seen_val_check_batches.items( ): assert num_batches == limit_val_batches # make sure test batches are as expected assert len(trainer.dev_debugger.num_seen_test_check_batches ) == num_test_dataloaders for dataloader_idx, num_batches in trainer.dev_debugger.num_seen_test_check_batches.items( ): if limit_test_batches > 1e10: assert num_batches == test_dataloader_lengths[dataloader_idx] else: assert num_batches == limit_test_batches