def test_validation_step_arbitrary_dict_return(tmpdir): """ Test that val step can return an arbitrary dict """ model = DeterministicModel() model.training_step = model.training_step_dict_return model.validation_step = model.validation_step_arbitary_dict_return model.validation_step_end = None model.validation_epoch_end = None trainer = Trainer(default_root_dir=tmpdir, weights_summary=None, limit_train_batches=2, limit_val_batches=2, max_epochs=2) trainer.fit(model) # out are the results of the full loop # eval_results are output of _evaluate callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) assert len(callback_metrics) == 2 assert len(eval_results) == 2 assert eval_results[0]['some'] == 171 assert eval_results[1]['some'] == 171 assert eval_results[0]['value'] == 'a' assert eval_results[1]['value'] == 'a' # make sure correct steps were called assert model.validation_step_called assert not model.validation_step_end_called assert not model.validation_epoch_end_called
def test_val_step_step_end_no_return(tmpdir): """ Test that val step + val step end work (with no return in val step end) """ model = DeterministicModel() model.training_step = model.training_step_dict_return model.validation_step = model.validation_step_dict_return model.validation_step_end = model.validation_step_end_no_return model.validation_epoch_end = None trainer = Trainer(default_root_dir=tmpdir, weights_summary=None, limit_train_batches=2, limit_val_batches=2, max_epochs=2) trainer.fit(model) # out are the results of the full loop # eval_results are output of _evaluate callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) assert len(callback_metrics) == 0 assert len(eval_results) == 0 # make sure correct steps were called assert model.validation_step_called assert model.validation_step_end_called assert not model.validation_epoch_end_called
def test_validation_step_scalar_return(tmpdir): """ Test that val step can return a scalar """ model = DeterministicModel() model.training_step = model.training_step_dict_return model.validation_step = model.validation_step_scalar_return model.validation_step_end = None model.validation_epoch_end = None trainer = Trainer(default_root_dir=tmpdir, weights_summary=None, limit_train_batches=2, limit_val_batches=2, max_epochs=2) trainer.fit(model) # out are the results of the full loop # eval_results are output of _evaluate out, eval_results = trainer.run_evaluation(test_mode=False) assert len(out) == 0 assert len(eval_results) == 2 assert eval_results[0] == 171 and eval_results[1] == 171 # make sure correct steps were called assert model.validation_step_called assert not model.validation_step_end_called assert not model.validation_epoch_end_called
def test_validation_step_no_return(tmpdir): """ Test that val step can return nothing """ model = DeterministicModel() model.training_step = model.training_step_dict_return model.validation_step = model.validation_step_no_return model.validation_step_end = None model.validation_epoch_end = None trainer = Trainer( default_root_dir=tmpdir, fast_dev_run=True, weights_summary=None, ) trainer.fit(model) # out are the results of the full loop # eval_results are output of _evaluate out, eval_results = trainer.run_evaluation(test_mode=False) assert len(out) == 0 assert len(eval_results) == 0 # make sure correct steps were called assert model.validation_step_called assert not model.validation_step_end_called assert not model.validation_epoch_end_called
def test_val_step_only_epoch_metrics(tmpdir): """ Make sure the logged + pbar metrics are allocated accordingly when auto-reduced at epoch end """ # enable internal debugging actions os.environ['PL_DEV_DEBUG'] = '1' model = DeterministicModel() model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_only_epoch_metrics model.validation_step_end = None model.validation_epoch_end = None batches = 3 epochs = 3 trainer = Trainer( default_root_dir=tmpdir, max_epochs=epochs, row_log_interval=1, limit_train_batches=batches, weights_summary=None, ) trainer.fit(model) # make sure correct steps were called assert model.validation_step_called assert not model.validation_step_end_called assert not model.validation_epoch_end_called # no early stopping assert len(trainer.dev_debugger.early_stopping_history) == 0 # make sure we logged the exact number of metrics assert len(trainer.dev_debugger.logged_metrics) == epochs assert len(trainer.dev_debugger.pbar_added_metrics) == epochs # make sure we logged the correct epoch metrics for metric in trainer.dev_debugger.logged_metrics: assert 'no_val_no_pbar' not in metric assert 'val_step_pbar_acc' not in metric assert metric['val_step_log_acc'] == (12 + 13) / 2 assert metric['val_step_log_pbar_acc'] == (13 + 14) / 2 # make sure we logged the correct epoch pbar metrics for metric in trainer.dev_debugger.pbar_added_metrics: assert 'no_val_no_pbar' not in metric assert 'val_step_log_acc' not in metric assert metric['val_step_log_pbar_acc'] == (13 + 14) / 2 assert metric['val_step_pbar_acc'] == (14 + 15) / 2 # only 1 checkpoint expected since values didn't change after that assert len(trainer.dev_debugger.checkpoint_callback_history) == 1 # make sure the last known metric is correct assert trainer.logger_connector.callback_metrics[ 'val_checkpoint_on'] == 171
def test_val_step_epoch_end_result(tmpdir): """ Make sure val step + val epoch end works with EvalResult """ os.environ['PL_DEV_DEBUG'] = '1' model = DeterministicModel() model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_for_epoch_end_result model.validation_step_end = None model.validation_epoch_end = model.validation_epoch_end_result batches = 3 epochs = 3 trainer = Trainer( default_root_dir=tmpdir, max_epochs=epochs, log_every_n_steps=1, limit_train_batches=batches, limit_val_batches=batches, weights_summary=None, ) trainer.fit(model) assert len(trainer.logger_connector.callback_metrics) == 6 # make sure correct steps were called assert model.validation_step_called assert not model.validation_step_end_called assert model.validation_epoch_end_called # no early stopping assert len(trainer.dev_debugger.early_stopping_history) == 0 # make sure we logged the exact number of metrics assert len(trainer.dev_debugger.logged_metrics) == epochs assert len(trainer.dev_debugger.pbar_added_metrics) == epochs # make sure we logged the correct metrics for metric in trainer.dev_debugger.logged_metrics: assert metric['val_epoch_end_metric'] == 189 assert 'val_step_metric' in metric # make sure we pbar logged the correct metrics for metric in trainer.dev_debugger.pbar_added_metrics: assert metric['val_epoch_end_metric'] == 189 assert 'val_step_metric' in metric # only 1 checkpoint expected since values didn't change after that assert len(trainer.dev_debugger.checkpoint_callback_history) == 1 # make sure the last known metric is correct assert trainer.logger_connector.callback_metrics['checkpoint_on'] == 189
def test_val_step_result_callbacks(tmpdir): """ Tests that val step can be used: - val step - no other val_xxx - train loop - callbacks coming from val loop (not train loop) """ # enable internal debugging actions os.environ['PL_DEV_DEBUG'] = '1' model = DeterministicModel() model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_callbacks model.validation_step_end = None model.validation_epoch_end = None batches = 3 epochs = 300 trainer = Trainer( default_root_dir=tmpdir, max_epochs=epochs, early_stop_callback=True, log_every_n_steps=1, limit_train_batches=batches, weights_summary=None, ) trainer.fit(model) # make sure correct steps were called assert model.validation_step_called assert not model.validation_step_end_called assert not model.validation_epoch_end_called # assert that early stopping happened after the requested num of steps # if it used the train step for ES then it wouldn't be 5 assert len(trainer.dev_debugger.early_stopping_history) == 5 # only 2 checkpoints expected assert len(trainer.dev_debugger.checkpoint_callback_history) == 2 # make sure the last known metric is correct assert trainer.logger_connector.callback_metrics[ 'checkpoint_on'] == 171 + 15 # did not request any metrics to log (except the metrics saying which epoch we are on) assert len(trainer.logger_connector.progress_bar_metrics) == 0 assert len(trainer.dev_debugger.logged_metrics) == 0
def test_val_step_using_train_callbacks(tmpdir): """ ES conditioned in train CKPT conditioned in val """ # enable internal debugging actions os.environ['PL_DEV_DEBUG'] = '1' model = DeterministicModel() model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_no_callbacks model.validation_step_end = None model.validation_epoch_end = None batches = 3 epochs = 300 trainer = Trainer( default_root_dir=tmpdir, max_epochs=epochs, early_stop_callback=True, log_every_n_steps=1, limit_train_batches=batches, weights_summary=None, ) trainer.fit(model) expected_epochs = 10 # make sure correct steps were called assert model.validation_step_called assert not model.validation_step_end_called assert not model.validation_epoch_end_called # early stopping was not conditioned in val loop, but instead in train loop assert len(trainer.dev_debugger.early_stopping_history) == expected_epochs # only 2 checkpoints expected assert len(trainer.dev_debugger.checkpoint_callback_history) == 2 # make sure the last known metric is correct assert trainer.logger_connector.callback_metrics[ 'checkpoint_on'] == 171 + 20 # did not request any metrics to log (except the metrics saying which epoch we are on) assert len(trainer.logger_connector.progress_bar_metrics) == 0 assert len(trainer.dev_debugger.logged_metrics) == 0
def test_val_step_only_step_metrics(tmpdir): """ Make sure the logged + pbar metrics are allocated accordingly at every step when requested """ # enable internal debugging actions os.environ['PL_DEV_DEBUG'] = '1' model = DeterministicModel() model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_only_step_metrics model.validation_step_end = None model.validation_epoch_end = None batches = 3 epochs = 3 trainer = Trainer( default_root_dir=tmpdir, max_epochs=epochs, log_every_n_steps=1, limit_train_batches=batches, limit_val_batches=batches, weights_summary=None, ) trainer.fit(model) # make sure correct steps were called assert model.validation_step_called assert not model.validation_step_end_called assert not model.validation_epoch_end_called # no early stopping assert len(trainer.dev_debugger.early_stopping_history) == 0 # make sure we logged the exact number of metrics assert len(trainer.dev_debugger.logged_metrics) == epochs * batches assert len( trainer.dev_debugger.pbar_added_metrics) == epochs * batches + (epochs) # only 1 checkpoint expected since values didn't change after that assert len(trainer.dev_debugger.checkpoint_callback_history) == 1 # make sure the last known metric is correct assert trainer.logger_connector.callback_metrics['checkpoint_on'] == 189
def test_val_step_step_end(tmpdir): """ Test that val step + val step end work """ os.environ['PL_DEV_DEBUG'] = '0' model = DeterministicModel() model.training_step = model.training_step_dict_return model.validation_step = model.validation_step_dict_return model.validation_step_end = model.validation_step_end model.validation_epoch_end = None trainer = Trainer(default_root_dir=tmpdir, weights_summary=None, limit_train_batches=2, limit_val_batches=2, max_epochs=2) trainer.fit(model) # out are the results of the full loop # eval_results are output of _evaluate callback_metrics, eval_results = trainer.run_evaluation(test_mode=False) assert len(callback_metrics) == 1 assert len(callback_metrics[0]) == 6 callback_metrics = callback_metrics[0] assert callback_metrics['val_step_end'] == 1802 assert len(eval_results) == 2 assert eval_results[0]['log']['log_acc1'] == 12 assert eval_results[1]['log']['log_acc1'] == 13 for k in ['val_loss', 'log', 'progress_bar']: assert k in eval_results[0] assert k in eval_results[1] # ensure all the keys ended up as candidates for callbacks assert len(trainer.logger_connector.callback_metrics) in [8, 9] # make sure correct steps were called assert model.validation_step_called assert model.validation_step_end_called assert not model.validation_epoch_end_called
def test_full_val_loop(tmpdir): """ Test that val step + val step end + val epoch end """ model = DeterministicModel() model.training_step = model.training_step_dict_return model.validation_step = model.validation_step_dict_return model.validation_step_end = model.validation_step_end model.validation_epoch_end = model.validation_epoch_end trainer = Trainer( default_root_dir=tmpdir, weights_summary=None, limit_train_batches=2, limit_val_batches=3, num_sanity_val_steps=0, max_epochs=2 ) trainer.fit(model) # out are the results of the full loop # eval_results are output of _evaluate callback_metrics, eval_results = trainer.run_evaluation() assert len(callback_metrics) == 1 assert len(callback_metrics[0]) == 7 assert len(eval_results) == 1 eval_results = eval_results[0] assert eval_results['val_step_end'] == 1802 assert eval_results['val_epoch_end'] == 1233 for k in ['val_loss', 'log', 'progress_bar']: assert k in eval_results # ensure all the keys ended up as candidates for callbacks assert len(trainer.logger_connector.callback_metrics) in [9, 10] # make sure correct steps were called assert model.validation_step_called assert model.validation_step_end_called assert model.validation_epoch_end_called
def test_validation_step_dict_return(tmpdir): """ Test that val step can return a dict with all the expected keys and they end up in the correct place """ model = DeterministicModel() model.training_step = model.training_step_dict_return model.validation_step = model.validation_step_dict_return model.validation_step_end = None model.validation_epoch_end = None trainer = Trainer( default_root_dir=tmpdir, weights_summary=None, limit_train_batches=2, limit_val_batches=2, max_epochs=2 ) trainer.fit(model) # out are the results of the full loop # eval_results are output of _evaluate callback_metrics, eval_results = trainer.run_evaluation() assert len(callback_metrics) == 1 assert len(callback_metrics[0]) == 5 assert len(eval_results) == 2 assert eval_results[0]['log']['log_acc1'] == 12 assert eval_results[1]['log']['log_acc1'] == 13 for k in ['val_loss', 'log', 'progress_bar']: assert k in eval_results[0] assert k in eval_results[1] # ensure all the keys ended up as candidates for callbacks assert len(trainer.logger_connector.callback_metrics) in [7, 8] # make sure correct steps were called assert model.validation_step_called assert not model.validation_step_end_called assert not model.validation_epoch_end_called
def test_val_step_epoch_step_metrics(tmpdir): """ Make sure the logged + pbar metrics are allocated accordingly at every step when requested """ # enable internal debugging actions os.environ['PL_DEV_DEBUG'] = '1' model = DeterministicModel() model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_epoch_step_metrics model.validation_step_end = None model.validation_epoch_end = None batches = 3 epochs = 3 trainer = Trainer( default_root_dir=tmpdir, max_epochs=epochs, log_every_n_steps=1, limit_train_batches=batches, limit_val_batches=batches, weights_summary=None, ) trainer.fit(model) assert len(trainer.logger_connector.callback_metrics) == 11 expected_metrics = { 'early_stop_on', 'checkpoint_on', 'val_step_pbar_acc', 'val_step_pbar_acc_epoch', 'val_step_log_acc', 'val_step_log_acc_epoch', 'val_step_log_pbar_acc', 'val_step_log_pbar_acc_epoch', 'val_step_batch_idx', 'val_step_batch_idx_epoch' } expected_metrics.add('debug_epoch') seen_metrics = set(trainer.logger_connector.callback_metrics) assert expected_metrics == seen_metrics # make sure correct steps were called assert model.validation_step_called assert not model.validation_step_end_called assert not model.validation_epoch_end_called # no early stopping assert len(trainer.dev_debugger.early_stopping_history) == 0 # make sure we logged the exact number of metrics assert len( trainer.dev_debugger.logged_metrics) == epochs * batches + (epochs) assert len( trainer.dev_debugger.pbar_added_metrics) == epochs * batches + (epochs) # make sure we logged the correct epoch metrics for metric_idx in range(0, len(trainer.dev_debugger.logged_metrics), batches + 1): batch_metrics = trainer.dev_debugger.logged_metrics[ metric_idx:metric_idx + batches] epoch_metric = trainer.dev_debugger.logged_metrics[metric_idx + batches] epoch = epoch_metric['epoch'] # make sure the metric was split for batch_metric in batch_metrics: assert f'val_step_log_acc_step/epoch_{epoch}' in batch_metric assert f'val_step_log_pbar_acc_step/epoch_{epoch}' in batch_metric # make sure the epoch split was correct assert 'val_step_log_acc_epoch' in epoch_metric assert 'val_step_log_pbar_acc_epoch' in epoch_metric # make sure we logged the correct pbar metrics for metric_idx in range(0, len(trainer.dev_debugger.pbar_added_metrics), batches + 1): batch_metrics = trainer.dev_debugger.pbar_added_metrics[ metric_idx:metric_idx + batches] epoch_metric = trainer.dev_debugger.pbar_added_metrics[metric_idx + batches] # make sure the metric was split for batch_metric in batch_metrics: assert 'val_step_pbar_acc_step' in batch_metric assert 'val_step_log_pbar_acc_step' in batch_metric # make sure the epoch split was correct assert 'val_step_pbar_acc_epoch' in epoch_metric assert 'val_step_log_pbar_acc_epoch' in epoch_metric # only 1 checkpoint expected since values didn't change after that assert len(trainer.dev_debugger.checkpoint_callback_history) == 1 # make sure the last known metric is correct assert trainer.logger_connector.callback_metrics['checkpoint_on'] == 189
def test_val_step_only_step_metrics(tmpdir): """ Make sure the logged + pbar metrics are allocated accordingly at every step when requested """ # enable internal debugging actions os.environ['PL_DEV_DEBUG'] = '1' model = DeterministicModel() model.training_step = model.training_step_result_log_epoch_and_step_for_callbacks model.training_step_end = None model.training_epoch_end = None model.validation_step = model.validation_step_result_only_step_metrics model.validation_step_end = None model.validation_epoch_end = None batches = 3 epochs = 3 trainer = Trainer( default_root_dir=tmpdir, max_epochs=epochs, row_log_interval=1, limit_train_batches=batches, limit_val_batches=batches, weights_summary=None, ) trainer.fit(model) # make sure correct steps were called assert model.validation_step_called assert not model.validation_step_end_called assert not model.validation_epoch_end_called # no early stopping assert len(trainer.dev_debugger.early_stopping_history) == 0 # make sure we logged the exact number of metrics assert len( trainer.dev_debugger.logged_metrics) == epochs * batches + (epochs) assert len( trainer.dev_debugger.pbar_added_metrics) == epochs * batches + (epochs) # make sure we logged the correct epoch metrics total_empty_epoch_metrics = 0 epoch = 0 for metric in trainer.dev_debugger.logged_metrics: if 'epoch' in metric: epoch += 1 if len(metric) > 2: assert 'no_val_no_pbar' not in metric assert 'val_step_pbar_acc' not in metric assert metric[f'val_step_log_acc/epoch_{epoch}'] assert metric[f'val_step_log_pbar_acc/epoch_{epoch}'] else: total_empty_epoch_metrics += 1 assert total_empty_epoch_metrics == 3 # make sure we logged the correct epoch pbar metrics total_empty_epoch_metrics = 0 for metric in trainer.dev_debugger.pbar_added_metrics: if 'epoch' in metric: epoch += 1 if len(metric) > 2: assert 'no_val_no_pbar' not in metric assert 'val_step_log_acc' not in metric assert metric['val_step_log_pbar_acc'] assert metric['val_step_pbar_acc'] else: total_empty_epoch_metrics += 1 assert total_empty_epoch_metrics == 3 # only 1 checkpoint expected since values didn't change after that assert len(trainer.dev_debugger.checkpoint_callback_history) == 1 # make sure the last known metric is correct assert trainer.logger_connector.callback_metrics[ 'val_checkpoint_on'] == 171