def test_ckpt_metric_names_results(tmpdir): model = EvalModelTemplate() model.training_step = model.training_step_result_obj model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_obj model.validation_step_end = None model.validation_epoch_end = None trainer = Trainer(default_root_dir=tmpdir, max_epochs=1, gradient_clip_val=1.0, overfit_batches=0.20, progress_bar_refresh_rate=0, limit_train_batches=0.01, limit_val_batches=0.01, checkpoint_callback=ModelCheckpoint(filepath=tmpdir + '/{val_loss:.2f}')) trainer.fit(model) # make sure the checkpoint we saved has the metric in the name ckpts = os.listdir(tmpdir) ckpts = [x for x in ckpts if 'val_loss' in x] assert len(ckpts) == 1 val = re.sub('[^0-9.]', '', ckpts[0]) assert len(val) > 3
def test_loop_steps_only_dp(tmpdir): os.environ['PL_DEV_DEBUG'] = '1' batches = 10 epochs = 3 model = EvalModelTemplate() model.validation_step = None model.test_step = None model.training_step = model.training_step_result_obj_dp model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_obj_dp model.validation_step_end = None model.validation_epoch_end = None model.test_dataloader = None trainer = Trainer( default_root_dir=tmpdir, distributed_backend='dp', gpus=[0, 1], max_epochs=epochs, early_stop_callback=True, log_every_n_steps=2, limit_train_batches=batches, weights_summary=None, ) trainer.fit(model) assert model.training_step_called assert model.validation_step_called
def test_result_obj_on_tpu(tmpdir): seed_everything(1234) batches = 5 epochs = 2 model = EvalModelTemplate() model.training_step = model.training_step_result_obj model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_obj model.validation_step_end = None model.validation_epoch_end = None model.test_step = model.test_step_result_obj model.test_step_end = None model.test_epoch_end = None trainer_options = dict(default_root_dir=tmpdir, max_epochs=epochs, callbacks=[EarlyStopping()], log_every_n_steps=2, limit_train_batches=batches, weights_summary=None, tpu_cores=8) tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
def test_result_obj_on_tpu(tmpdir): seed_everything(1234) os.environ['PL_DEV_DEBUG'] = '1' batches = 5 epochs = 2 model = EvalModelTemplate() model.training_step = model.training_step_result_obj model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_obj model.validation_step_end = None model.validation_epoch_end = None model.test_step = model.test_step_result_obj model.test_step_end = None model.test_epoch_end = None trainer_options = dict(default_root_dir=tmpdir, max_epochs=epochs, early_stop_callback=True, row_log_interval=2, limit_train_batches=batches, weights_summary=None, tpu_cores=8) tpipes.run_model_test(trainer_options, model, on_gpu=False, with_hpc=False)
def test_val_step_full_loop_result_dp(tmpdir): # TODO: finish the full train, val, test loop with dp os.environ['PL_DEV_DEBUG'] = '1' batches = 10 epochs = 3 model = EvalModelTemplate() model.training_step = model.training_step_full_loop_result_obj_dp model.training_step_end = model.training_step_end_full_loop_result_obj_dp model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp model.validation_step = model.eval_step_full_loop_result_obj_dp model.validation_step_end = model.eval_step_end_full_loop_result_obj_dp model.validation_epoch_end = model.eval_epoch_end_full_loop_result_obj_dp model.test_step = model.eval_step_full_loop_result_obj_dp model.test_step_end = model.eval_step_end_full_loop_result_obj_dp model.test_epoch_end = model.eval_epoch_end_full_loop_result_obj_dp trainer = Trainer( default_root_dir=tmpdir, distributed_backend='dp', gpus=[0, 1], max_epochs=epochs, early_stop_callback=True, log_every_n_steps=2, limit_train_batches=batches, weights_summary=None, ) trainer.fit(model) results = trainer.test() # assert we returned all metrics requested assert len(results) == 1 results = results[0] assert 'test_epoch_end_metric' in results # make sure we saw all the correct keys along all paths seen_keys = set() for metric in trainer.dev_debugger.logged_metrics: seen_keys.update(metric.keys()) assert 'train_step_metric' in seen_keys assert 'train_step_end_metric' in seen_keys assert 'train_epoch_end_metric_epoch' in seen_keys assert 'validation_step_metric_step/epoch_0' in seen_keys assert 'validation_step_metric_epoch' in seen_keys assert 'validation_step_end_metric' in seen_keys assert 'validation_epoch_end_metric' in seen_keys assert 'test_step_metric_step/epoch_2' in seen_keys assert 'test_step_metric_epoch' in seen_keys assert 'test_step_end_metric' in seen_keys assert 'test_epoch_end_metric' in seen_keys
def test_full_loop_result_cpu(tmpdir): seed_everything(1234) os.environ['PL_DEV_DEBUG'] = '1' batches = 10 epochs = 2 model = EvalModelTemplate() model.training_step = model.training_step_result_obj model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_obj model.validation_step_end = None model.validation_epoch_end = None model.test_step = model.test_step_result_obj model.test_step_end = None model.test_epoch_end = None trainer = Trainer( default_root_dir=tmpdir, max_epochs=epochs, early_stop_callback=True, log_every_n_steps=2, limit_train_batches=batches, weights_summary=None, ) trainer.fit(model) results = trainer.test() # assert we returned all metrics requested assert len(results) == 1 results = results[0] assert results['test_loss'] < 0.3 assert results['test_acc'] > 0.9 assert len(results) == 2 assert 'early_stop_on' not in results assert 'checkpoint_on' not in results results2 = trainer.test()[0] for k, v in results.items(): assert results2[k] == v
def test_result_monitor_warnings(tmpdir): """ Tests that we warn when the monitor key is changed and we use Results obj """ model = EvalModelTemplate() model.test_step = None model.training_step = model.training_step_result_obj model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_obj model.validation_step_end = None model.validation_epoch_end = None model.test_dataloader = None trainer = Trainer( default_root_dir=tmpdir, max_epochs=2, early_stop_callback=True, log_every_n_steps=2, limit_train_batches=2, weights_summary=None, checkpoint_callback=ModelCheckpoint(monitor='not_checkpoint_on')) # warn that the key was changed but metric was not found with pytest.raises(MisconfigurationException, match="not found in the returned metrics"): trainer.fit(model) trainer = Trainer( default_root_dir=tmpdir, max_epochs=2, log_every_n_steps=2, limit_train_batches=2, weights_summary=None, early_stop_callback=EarlyStopping(monitor='not_val_loss')) with pytest.raises( RuntimeError, match= r'.*Early stopping conditioned on metric `not_val_loss` which is not*' ): trainer.fit(model)
def test_eval_loop_return_none(tmpdir): """ Tests that we warn when the monitor key is changed and we use Results obj """ model = EvalModelTemplate() model.test_step = None model.training_step = model.training_step_result_obj model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_obj model.validation_step_end = None model.validation_epoch_end = model.validation_epoch_end_return_none model.test_dataloader = None trainer = Trainer( default_root_dir=tmpdir, max_epochs=2, log_every_n_steps=2, limit_train_batches=2, weights_summary=None, ) trainer.fit(model)
def test_result_monitor_warnings(tmpdir): """ Tests that we warn when the monitor key is changed and we use Results obj """ model = EvalModelTemplate() model.test_step = None model.training_step = model.training_step_result_obj model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_obj model.validation_step_end = None model.validation_epoch_end = None model.test_dataloader = None trainer = Trainer( default_root_dir=tmpdir, max_epochs=2, early_stop_callback=True, row_log_interval=2, limit_train_batches=2, weights_summary=None, checkpoint_callback=ModelCheckpoint(monitor='not_val_loss') ) with pytest.warns(UserWarning, match='key of ModelCheckpoint has no effect'): trainer.fit(model) trainer = Trainer( default_root_dir=tmpdir, max_epochs=2, row_log_interval=2, limit_train_batches=2, weights_summary=None, early_stop_callback=EarlyStopping(monitor='not_val_loss') ) with pytest.warns(UserWarning, match='key of EarlyStopping has no effect'): trainer.fit(model)
def test_full_train_loop_with_results_obj_dp(tmpdir): os.environ['PL_DEV_DEBUG'] = '1' batches = 10 epochs = 3 model = EvalModelTemplate() model.validation_step = None model.test_step = None model.training_step = model.training_step_full_loop_result_obj_dp model.training_step_end = model.training_step_end_full_loop_result_obj_dp model.training_epoch_end = model.training_epoch_end_full_loop_result_obj_dp model.val_dataloader = None model.test_dataloader = None trainer = Trainer( default_root_dir=tmpdir, distributed_backend='dp', gpus=[0, 1], max_epochs=epochs, early_stop_callback=True, log_every_n_steps=2, limit_train_batches=batches, weights_summary=None, ) trainer.fit(model) # make sure we saw all the correct keys seen_keys = set() for metric in trainer.dev_debugger.logged_metrics: seen_keys.update(metric.keys()) assert 'train_step_metric' in seen_keys assert 'train_step_end_metric' in seen_keys assert 'train_epoch_end_metric_epoch' in seen_keys