def compute_ngram_scores(candidate: Sequence[Any], reference: Sequence[Any], n: int = 4) -> Score: """ Compute the score based on ngram co-occurence of sequences of items Args: candidate: candidate sequence of items reference: reference sequence of items n: ngram order Returns: The score containing the number of ngram co-occurences .. versionadded:: 0.4.5 """ # ngrams of the candidate candidate_counter = ngrams(candidate, n) # ngrams of the references reference_counter = ngrams(reference, n) # ngram co-occurences in the candidate and the references match_counters = candidate_counter & reference_counter # the score is defined using Fraction return Score( match=sum(match_counters.values()), candidate=sum(candidate_counter.values()), reference=sum(reference_counter.values()), )
def test_ngrams(sequence, n, expected_keys, expected_values): ngrams_counter = ngrams(sequence=sequence, n=n) assert list(ngrams_counter.values()) == expected_values assert list(ngrams_counter.keys()) == expected_keys