Esempio n. 1
0
def rouge_n(candidate_tokens, reference_tokens_list, n):
    """
    Return the ROUGE-N score (aka N-gram co-occurrence statistic), where
    candidate_tokens is a sequence of candidate tokens,
    reference_tokens_list is a list of sequences of reference tokens, and
    and n stands for the lenght of the n-gram
    """
    # this is rouge-n on tokens
    candidate_ngrams = n_gram_slices(candidate_tokens, n)
    reference_ngrams = []
    
    for rt in reference_tokens_list:
        reference_ngrams += n_gram_slices(rt, n)
        
    return rouge_ngram(candidate_ngrams, reference_ngrams)
Esempio n. 2
0
def rouge_n(candidate_tokens, reference_tokens_list, n):
    """
    Return the ROUGE-N score (aka N-gram co-occurrence statistic), where
    candidate_tokens is a sequence of candidate tokens,
    reference_tokens_list is a list of sequences of reference tokens, and
    and n stands for the lenght of the n-gram
    """
    # this is rouge-n on tokens
    candidate_ngrams = n_gram_slices(candidate_tokens, n)
    reference_ngrams = []

    for rt in reference_tokens_list:
        reference_ngrams += n_gram_slices(rt, n)

    return rouge_ngram(candidate_ngrams, reference_ngrams)
Esempio n. 3
0
 def test_n_gram_slices_unigram(self):
     ng = n_gram_slices(self.tokens, 1)
     self.assertEqual(ng, [ ("a",), ("nice",), ("sentence",) ])
Esempio n. 4
0
 def test_n_gram_strings_empty(self):
     ng = n_gram_slices(tuple(), 2)
     self.assertEqual(ng, [])    
Esempio n. 5
0
 def test_n_gram_slices_padded_empty(self):
     # this is probably not what you want,
     # but in a sense it is correct
     ng = n_gram_slices((), 2, pad_size=1)
     self.assertEqual(ng, [ ("_", "_") ])    
Esempio n. 6
0
 def test_n_gram_slices_bigram_padded(self):
     ng = n_gram_slices(self.tokens, 2, pad_size=1)
     self.assertEqual(ng, [ ("_", "a"), 
                            ("a", "nice"), 
                            ("nice", "sentence"), 
                            ("sentence", "_") ])
Esempio n. 7
0
 def test_n_gram_slices_unigram_padded(self):
     ng = n_gram_slices(self.tokens, 1, pad_size=0)
     self.assertEqual(ng, [ ("a",), ("nice",), ("sentence",) ])
Esempio n. 8
0
 def test_n_gram_slices_empty(self):
     ng = n_gram_slices((), 2)
     self.assertEqual(ng, [])    
Esempio n. 9
0
 def test_n_gram_slices_trigram(self):
     ng = n_gram_slices(self.tokens, 3)
     self.assertEqual(ng, [ ("a", "nice", "sentence") ])
Esempio n. 10
0
 def test_n_gram_slices_unigram(self):
     ng = n_gram_slices(self.tokens, 1)
     self.assertEqual(ng, [("a", ), ("nice", ), ("sentence", )])
Esempio n. 11
0
 def test_n_gram_strings_empty(self):
     ng = n_gram_slices(tuple(), 2)
     self.assertEqual(ng, [])
Esempio n. 12
0
 def test_n_gram_slices_padded_empty(self):
     # this is probably not what you want,
     # but in a sense it is correct
     ng = n_gram_slices((), 2, pad_size=1)
     self.assertEqual(ng, [("_", "_")])
Esempio n. 13
0
 def test_n_gram_slices_bigram_padded(self):
     ng = n_gram_slices(self.tokens, 2, pad_size=1)
     self.assertEqual(ng, [("_", "a"), ("a", "nice"), ("nice", "sentence"),
                           ("sentence", "_")])
Esempio n. 14
0
 def test_n_gram_slices_unigram_padded(self):
     ng = n_gram_slices(self.tokens, 1, pad_size=0)
     self.assertEqual(ng, [("a", ), ("nice", ), ("sentence", )])
Esempio n. 15
0
 def test_n_gram_slices_empty(self):
     ng = n_gram_slices((), 2)
     self.assertEqual(ng, [])
Esempio n. 16
0
 def test_n_gram_slices_trigram(self):
     ng = n_gram_slices(self.tokens, 3)
     self.assertEqual(ng, [("a", "nice", "sentence")])