Esempio n. 1
0
def test_get_scenes():
    text = 'You are waiting for a train , this train will take you far away .'
    expected_scenes = [
        ['You', 'are', 'waiting', 'for', 'a', 'train'],
        ['this', 'train', 'will', 'take', 'you', 'far', 'away'],
    ]
    scenes = get_scenes_text(ucca_parse_texts([text])[0])
    assert scenes == expected_scenes
Esempio n. 2
0
def get_samsa_sentence_scores(orig_sents: List[str], sys_sents: List[str], lowercase: bool = False, tokenizer: str = '13a',
                              verbose: bool = False):
    print('Warning: SAMSA metric is long to compute (120 sentences ~ 4min), disable it if you need fast evaluation.')

    orig_sents = [utils_prep.normalize(sent, lowercase, tokenizer) for sent in orig_sents]
    orig_ucca_passages = ucca_parse_texts(orig_sents)
    orig_synt_scenes = syntactic_parse_ucca_scenes(orig_ucca_passages, tokenize=False, sentence_split=False,
                                                   verbose=verbose)

    sys_sents = [utils_prep.normalize(output, lowercase, tokenizer) for output in sys_sents]
    sys_sents_synt = syntactic_parse_texts(sys_sents, tokenize=False, sentence_split=True, verbose=verbose)

    sentences_scores = []
    for orig_passage, orig_scenes, sys_synt in tqdm(zip(orig_ucca_passages, orig_synt_scenes, sys_sents_synt),
                                                    disable=(not verbose)):
        sentences_scores.append(100. * compute_samsa(orig_passage, orig_scenes, sys_synt))

    return sentences_scores
Esempio n. 3
0
def corpus_samsa(orig_sentences: List[str], sys_outputs: List[str], lowercase: bool = False, tokenizer: str = '13a',
                 verbose: bool = False):
    print('Warning: SAMSA metric is long to compute, disable it if if you need fast evaluation.')
    orig_sentences = [utils_prep.normalize(sent, lowercase, tokenizer) for sent in orig_sentences]
    orig_ucca_sents = ucca_parse_texts(orig_sentences)

    sys_outputs = [utils_prep.normalize(output, lowercase, tokenizer) for output in sys_outputs]
    sys_synt_outputs = syntactic_parse_texts(sys_outputs, tokenize=False, sentence_split=True, verbose=verbose)

    if verbose:
        print("Computing SAMSA score...")

    samsa_score = 0.0
    for orig_ucca, sys_synt in tqdm(zip(orig_ucca_sents, sys_synt_outputs), disable=(not verbose)):
        samsa_score += compute_samsa(orig_ucca, sys_synt)

    samsa_score /= len(orig_sentences)

    return 100. * samsa_score