def rouge_n(candidate_tokens, reference_tokens_list, n): """ Return the ROUGE-N score (aka N-gram co-occurrence statistic), where candidate_tokens is a sequence of candidate tokens, reference_tokens_list is a list of sequences of reference tokens, and and n stands for the lenght of the n-gram """ # this is rouge-n on tokens candidate_ngrams = n_gram_slices(candidate_tokens, n) reference_ngrams = [] for rt in reference_tokens_list: reference_ngrams += n_gram_slices(rt, n) return rouge_ngram(candidate_ngrams, reference_ngrams)
def test_n_gram_slices_unigram(self): ng = n_gram_slices(self.tokens, 1) self.assertEqual(ng, [ ("a",), ("nice",), ("sentence",) ])
def test_n_gram_strings_empty(self): ng = n_gram_slices(tuple(), 2) self.assertEqual(ng, [])
def test_n_gram_slices_padded_empty(self): # this is probably not what you want, # but in a sense it is correct ng = n_gram_slices((), 2, pad_size=1) self.assertEqual(ng, [ ("_", "_") ])
def test_n_gram_slices_bigram_padded(self): ng = n_gram_slices(self.tokens, 2, pad_size=1) self.assertEqual(ng, [ ("_", "a"), ("a", "nice"), ("nice", "sentence"), ("sentence", "_") ])
def test_n_gram_slices_unigram_padded(self): ng = n_gram_slices(self.tokens, 1, pad_size=0) self.assertEqual(ng, [ ("a",), ("nice",), ("sentence",) ])
def test_n_gram_slices_empty(self): ng = n_gram_slices((), 2) self.assertEqual(ng, [])
def test_n_gram_slices_trigram(self): ng = n_gram_slices(self.tokens, 3) self.assertEqual(ng, [ ("a", "nice", "sentence") ])
def test_n_gram_slices_unigram(self): ng = n_gram_slices(self.tokens, 1) self.assertEqual(ng, [("a", ), ("nice", ), ("sentence", )])
def test_n_gram_slices_padded_empty(self): # this is probably not what you want, # but in a sense it is correct ng = n_gram_slices((), 2, pad_size=1) self.assertEqual(ng, [("_", "_")])
def test_n_gram_slices_bigram_padded(self): ng = n_gram_slices(self.tokens, 2, pad_size=1) self.assertEqual(ng, [("_", "a"), ("a", "nice"), ("nice", "sentence"), ("sentence", "_")])
def test_n_gram_slices_unigram_padded(self): ng = n_gram_slices(self.tokens, 1, pad_size=0) self.assertEqual(ng, [("a", ), ("nice", ), ("sentence", )])
def test_n_gram_slices_trigram(self): ng = n_gram_slices(self.tokens, 3) self.assertEqual(ng, [("a", "nice", "sentence")])