def test_corpus_fkgl(): assert corpus_fkgl( get_orig_sents('turkcorpus_test_legacy')) == pytest.approx(9.9, abs=1e-1) assert corpus_fkgl( get_refs_sents('turkcorpus_test_legacy')[0]) == pytest.approx(8.2, abs=1e-1)
def test_corpus_sari(): orig_sents = get_orig_sents('turkcorpus_test') refs_sents = get_refs_sents('turkcorpus_test') system_outputs_dir = get_system_outputs_dir('turkcorpus_test') hyp_sents = read_lines(system_outputs_dir / "ACCESS") sari_score = sari.corpus_sari(orig_sents, hyp_sents, refs_sents) assert sari_score == pytest.approx( 41.381013) # Scores from MUSS https://arxiv.org/abs/2005.00352
def test_corpus_samsa(): orig_sents = get_orig_sents('qats_test') refs_sents = get_refs_sents('qats_test') samsa_score = corpus_samsa(orig_sents, refs_sents[0], lowercase=False, tokenizer='moses') assert samsa_score == pytest.approx(36.94996509406232)
def test_corpus_sari_legacy(): orig_sents = get_orig_sents('turkcorpus_test_legacy') refs_sents = get_refs_sents('turkcorpus_test_legacy') system_outputs_dir = get_system_outputs_dir('turkcorpus_test') hyp_sents = read_lines(system_outputs_dir / "tok.low/Dress-Ls.tok.low") sari_score = sari.corpus_sari(orig_sents, hyp_sents, refs_sents, legacy=True) assert sari_score == pytest.approx(37.266058818588216) hyp_sents = read_lines(system_outputs_dir / "tok.low/Dress.tok.low") sari_score = sari.corpus_sari(orig_sents, hyp_sents, refs_sents, legacy=True) assert sari_score == pytest.approx(37.08210095744638) hyp_sents = read_lines(system_outputs_dir / "tok.low/EncDecA.tok.low") sari_score = sari.corpus_sari(orig_sents, hyp_sents, refs_sents, legacy=True) assert sari_score == pytest.approx(35.65754396121206) hyp_sents = read_lines(system_outputs_dir / "tok.low/Hybrid.tok.low") sari_score = sari.corpus_sari(orig_sents, hyp_sents, refs_sents, legacy=True) assert sari_score == pytest.approx(31.39665078989411) hyp_sents = read_lines(system_outputs_dir / "tok.low/PBMT-R.tok.low") sari_score = sari.corpus_sari(orig_sents, hyp_sents, refs_sents, legacy=True) assert sari_score == pytest.approx(38.558843050332037) hyp_sents = read_lines(system_outputs_dir / "tok.low/SBMT-SARI.tok.low") sari_score = sari.corpus_sari(orig_sents, hyp_sents, refs_sents, legacy=True) assert sari_score == pytest.approx(39.964857928109127)
def get_orig_and_refs_sents(test_set, orig_sents_path=None, refs_sents_paths=None): # Get original and reference sentences if test_set == 'custom': assert orig_sents_path is not None assert refs_sents_paths is not None if type(refs_sents_paths) == str: refs_sents_paths = refs_sents_paths.split(',') orig_sents = read_lines(orig_sents_path) refs_sents = [ read_lines(ref_sents_path) for ref_sents_path in refs_sents_paths ] else: orig_sents = get_orig_sents(test_set) refs_sents = get_refs_sents(test_set) # Final checks assert all([len(orig_sents) == len(ref_sents) for ref_sents in refs_sents]) return orig_sents, refs_sents
def get_orig_and_refs_sents(test_set, orig_sents_path=None, refs_sents_paths=None): # Get original and reference sentences if test_set == "custom": assert orig_sents_path is not None assert refs_sents_paths is not None if type(refs_sents_paths) == str: refs_sents_paths = refs_sents_paths.split(",") orig_sents = read_lines(orig_sents_path) refs_sents = [ read_lines(ref_sents_path) for ref_sents_path in refs_sents_paths ] else: orig_sents = get_orig_sents(test_set) refs_sents = get_refs_sents(test_set) # Final checks assert all( [len(orig_sents) == len(ref_sents) for ref_sents in refs_sents] ), f'Not same number of lines for test_set={test_set}, orig_sents_path={orig_sents_path}, refs_sents_paths={refs_sents_paths}' # noqa: E501 return orig_sents, refs_sents