def rouge_s(tokens1, tokens2, distance_limit=sys.maxint, beta=1): """ Return the ROUGE-S score for token sequences tokens1 and tokens2, where distance_limit defines the maximal skip distance, and beta determines the relative importance of recall in the F-measure, """ # implementation based on section 5 from Lin04 skip_bigrams1 = skip_bigram_slices(tokens1, distance_limit) skip_bigrams2 = skip_bigram_slices(tokens2, distance_limit) return rouge_skip_bigram(skip_bigrams1, skip_bigrams2, beta)
def test_s_gram_slices_limit(self): self.tokens = tuple("a really nice sentence".split()) sb = skip_bigram_slices(self.tokens, 1) self.assertEqual(sb, [ ("a", "really"), ("a", "nice"), # skips ("a", "sentence"), ("really", "nice"), ("really", "sentence"), ("nice", "sentence") ])
def test_s_gram_slices_limit(self): self.tokens = tuple("a really nice sentence".split()) sb = skip_bigram_slices(self.tokens, 1) self.assertEqual( sb, [ ("a", "really"), ("a", "nice"), # skips ("a", "sentence"), ("really", "nice"), ("really", "sentence"), ("nice", "sentence") ])
def test_s_gram_slices(self): self.tokens = tuple("a nice sentence".split()) sb = skip_bigram_slices(self.tokens) self.assertEqual(sb, [ ("a", "nice"), ("a", "sentence"), ("nice", "sentence") ])
def test_s_gram_slices_empty(self): sb = skip_bigram_slices([]) self.assertEqual(sb, [])
def test_s_gram_slices(self): self.tokens = tuple("a nice sentence".split()) sb = skip_bigram_slices(self.tokens) self.assertEqual(sb, [("a", "nice"), ("a", "sentence"), ("nice", "sentence")])