def test_score_fn_rescale_with_baseline(preds, targets): """Tests for functional with baseline rescaling with all layers.""" original_score = original_bert_score( preds, targets, model_type=MODEL_NAME, lang="en", all_layers=True, idf=False, batch_size=3, rescale_with_baseline=True, ) original_score = _parse_original_bert_score(original_score) metrics_score = metrics_bert_score( preds, targets, model_name_or_path=MODEL_NAME, lang="en", all_layers=True, idf=False, batch_size=3, rescale_with_baseline=True, ) for metric in _METRICS: _assert_list(metrics_score[metric], original_score[metric])
def _test_score_ddp_fn(rank, world_size, preds, targets): """Core functionality for the `test_score_ddp` test.""" original_score = original_bert_score(preds, targets, model_type=MODEL_NAME, num_layers=8, idf=False, batch_size=3) original_score = _parse_original_bert_score(original_score) _bert_score_ddp(rank, world_size, preds, targets, original_score)
def test_score_fn_with_idf(preds, targets): """Tests for functional with IDF rescaling.""" original_score = original_bert_score(preds, targets, model_type=MODEL_NAME, num_layers=12, idf=True, batch_size=3) original_score = _parse_original_bert_score(original_score) metrics_score = metrics_bert_score(preds, targets, model_name_or_path=MODEL_NAME, num_layers=12, idf=True, batch_size=3) for metric in _METRICS: _assert_list(metrics_score[metric], original_score[metric])
def test_score_fn(preds, targets): """Tests for functional.""" original_score = original_bert_score(preds, targets, model_type=MODEL_NAME, num_layers=8, idf=False, batch_size=3) original_score = _parse_original_bert_score(original_score) metrics_score = metrics_bert_score(preds, targets, model_name_or_path=MODEL_NAME, num_layers=8, idf=False, batch_size=3) for metric in _METRICS: _assert_list(metrics_score[metric], original_score[metric])
def test_score_all_layers_with_idf(preds, targets): """Tests for metric and all layers with IDF rescaling.""" original_score = original_bert_score(preds, targets, model_type=MODEL_NAME, all_layers=True, idf=True, batch_size=3) original_score = _parse_original_bert_score(original_score) scorer = BERTScore(model_name_or_path=MODEL_NAME, all_layers=True, idf=True, batch_size=3) scorer.update(preds=preds, target=targets) metrics_score = scorer.compute() for metric in _METRICS: _assert_list(metrics_score[metric], original_score[metric])
def test_score(preds, targets): """Tests for metric.""" original_score = original_bert_score(preds, targets, model_type=MODEL_NAME, num_layers=8, idf=False, batch_size=3) original_score = _parse_original_bert_score(original_score) scorer = BERTScore(model_name_or_path=MODEL_NAME, num_layers=8, idf=False, batch_size=3) scorer.update(preds=preds, target=targets) metrics_score = scorer.compute() for metric in _METRICS: _assert_list(metrics_score[metric], original_score[metric])
def test_accumulation(preds, targets): """Tests for metric works with accumulation.""" original_score = original_bert_score(sum(preds, []), sum(targets, []), model_type=MODEL_NAME, num_layers=8, idf=False, batch_size=3) original_score = _parse_original_bert_score(original_score) scorer = BERTScore(model_name_or_path=MODEL_NAME, num_layers=8, idf=False, batch_size=3) for p, r in zip(preds, targets): scorer.update(preds=p, target=r) metrics_score = scorer.compute() for metric in _METRICS: _assert_list(metrics_score[metric], original_score[metric])
def test_score_fn_all_layers(preds, targets, device): """Tests for functional and all layers.""" if not torch.cuda.is_available() and device == "cuda": pytest.skip("Test requires GPU support") original_score = original_bert_score(preds, targets, model_type=MODEL_NAME, all_layers=True, idf=False, batch_size=3) original_score = _parse_original_bert_score(original_score) metrics_score = metrics_bert_score(preds, targets, model_name_or_path=MODEL_NAME, all_layers=True, idf=False, batch_size=3, device=device) for metric in _METRICS: _assert_list(metrics_score[metric], original_score[metric])