Exemple #1
0
def rouge_n(candidate_tokens, reference_tokens_list, n):
    """
    Return the ROUGE-N score (aka N-gram co-occurrence statistic), where
    candidate_tokens is a sequence of candidate tokens,
    reference_tokens_list is a list of sequences of reference tokens, and
    and n stands for the lenght of the n-gram
    """
    # this is rouge-n on tokens
    candidate_ngrams = n_gram_slices(candidate_tokens, n)
    reference_ngrams = []
    
    for rt in reference_tokens_list:
        reference_ngrams += n_gram_slices(rt, n)
        
    return rouge_ngram(candidate_ngrams, reference_ngrams)
Exemple #2
0
def rouge_n(candidate_tokens, reference_tokens_list, n):
    """
    Return the ROUGE-N score (aka N-gram co-occurrence statistic), where
    candidate_tokens is a sequence of candidate tokens,
    reference_tokens_list is a list of sequences of reference tokens, and
    and n stands for the lenght of the n-gram
    """
    # this is rouge-n on tokens
    candidate_ngrams = n_gram_slices(candidate_tokens, n)
    reference_ngrams = []

    for rt in reference_tokens_list:
        reference_ngrams += n_gram_slices(rt, n)

    return rouge_ngram(candidate_ngrams, reference_ngrams)
Exemple #3
0
 def test_n_gram_slices_unigram(self):
     ng = n_gram_slices(self.tokens, 1)
     self.assertEqual(ng, [ ("a",), ("nice",), ("sentence",) ])
Exemple #4
0
 def test_n_gram_strings_empty(self):
     ng = n_gram_slices(tuple(), 2)
     self.assertEqual(ng, [])    
Exemple #5
0
 def test_n_gram_slices_padded_empty(self):
     # this is probably not what you want,
     # but in a sense it is correct
     ng = n_gram_slices((), 2, pad_size=1)
     self.assertEqual(ng, [ ("_", "_") ])    
Exemple #6
0
 def test_n_gram_slices_bigram_padded(self):
     ng = n_gram_slices(self.tokens, 2, pad_size=1)
     self.assertEqual(ng, [ ("_", "a"), 
                            ("a", "nice"), 
                            ("nice", "sentence"), 
                            ("sentence", "_") ])
Exemple #7
0
 def test_n_gram_slices_unigram_padded(self):
     ng = n_gram_slices(self.tokens, 1, pad_size=0)
     self.assertEqual(ng, [ ("a",), ("nice",), ("sentence",) ])
Exemple #8
0
 def test_n_gram_slices_empty(self):
     ng = n_gram_slices((), 2)
     self.assertEqual(ng, [])    
Exemple #9
0
 def test_n_gram_slices_trigram(self):
     ng = n_gram_slices(self.tokens, 3)
     self.assertEqual(ng, [ ("a", "nice", "sentence") ])
Exemple #10
0
 def test_n_gram_slices_unigram(self):
     ng = n_gram_slices(self.tokens, 1)
     self.assertEqual(ng, [("a", ), ("nice", ), ("sentence", )])
Exemple #11
0
 def test_n_gram_strings_empty(self):
     ng = n_gram_slices(tuple(), 2)
     self.assertEqual(ng, [])
Exemple #12
0
 def test_n_gram_slices_padded_empty(self):
     # this is probably not what you want,
     # but in a sense it is correct
     ng = n_gram_slices((), 2, pad_size=1)
     self.assertEqual(ng, [("_", "_")])
Exemple #13
0
 def test_n_gram_slices_bigram_padded(self):
     ng = n_gram_slices(self.tokens, 2, pad_size=1)
     self.assertEqual(ng, [("_", "a"), ("a", "nice"), ("nice", "sentence"),
                           ("sentence", "_")])
Exemple #14
0
 def test_n_gram_slices_unigram_padded(self):
     ng = n_gram_slices(self.tokens, 1, pad_size=0)
     self.assertEqual(ng, [("a", ), ("nice", ), ("sentence", )])
Exemple #15
0
 def test_n_gram_slices_empty(self):
     ng = n_gram_slices((), 2)
     self.assertEqual(ng, [])
Exemple #16
0
 def test_n_gram_slices_trigram(self):
     ng = n_gram_slices(self.tokens, 3)
     self.assertEqual(ng, [("a", "nice", "sentence")])