def test_bleu_requires_checkpoint_decoder(): with pytest.raises(utils.SockeyeError) as e, tempfile.TemporaryDirectory() as tmpdir: callback.TrainingMonitor(batch_size=1, output_folder=tmpdir, optimized_metric='bleu', cp_decoder=None) assert "bleu requires CheckpointDecoder" == str(e.value)
def test_callback(optimized_metric, initial_best, train_metrics, eval_metrics, improved_seq): with tempfile.TemporaryDirectory() as tmpdir: monitor = callback.TrainingMonitor(output_folder=tmpdir, optimized_metric=optimized_metric) assert monitor.optimized_metric == optimized_metric assert monitor.get_best_validation_score() == initial_best metrics_fname = os.path.join(tmpdir, C.METRICS_NAME) for checkpoint, (train_metric, eval_metric, expected_improved) in enumerate( zip(train_metrics, eval_metrics, improved_seq), 1): monitor.checkpoint_callback(checkpoint, train_metric) assert len(monitor.metrics) == checkpoint assert monitor.metrics[-1] == { k + "-train": v for k, v in train_metric.items() } improved, best_checkpoint = monitor.eval_end_callback( checkpoint, DummyMetric(eval_metric)) assert {k + "-val" for k in eval_metric.keys()} <= monitor.metrics[-1].keys() assert improved == expected_improved assert os.path.exists(metrics_fname) metrics = utils.read_metrics_file(metrics_fname) _compare_metrics(metrics, monitor.metrics)