def test_almost_correct(self): segment_a = self.tokenize( "risk assessment has to be undertaken by those who are qualified and expert in that area - that is the scientists ." ) segment_b = self.tokenize( " risk assessment must be made of those who are qualified and expertise in the sector - these are the scientists ." ) scorer = CharacterFScorer('n=6,beta=3') scorer.set_reference(segment_a) self.assertEqual('{0:.12f}'.format(scorer.score(segment_b)), "0.652414427449")
def get(self, config_string): """ Returns a scorer matching the metric and parameters defined in @param config string. Example: ScorerProvider.get("BLEU n=4") returns a SmoothedBleuScorer object that considers n-gram precision up to n=4. If more than one metrics are provided (separated by `;`), an interpolated scorer will be returned. Example: ScorerProvider.get("INTERPOLATE w=0.5,0.5; SENTENCEBLEU n=4; METEOR meteor_language=fr, meteor_path=/foo/bar/meteor") returns an InterpolatedScorer object that scores hypotheses using 0.5 * bleu_score + 0.5 * meteor_score. """ # interpolation if config_string.startswith("INTERPOLATE"): return si.ScorerInterpolator(config_string) try: scorer, arguments = config_string.split(" ", 1) except ValueError: scorer = config_string arguments = '' if scorer == 'SENTENCEBLEU': return SentenceBleuScorer(arguments) elif scorer == 'METEOR': return MeteorScorer(arguments) elif scorer == 'BEER': return BeerScorer(arguments) elif scorer == 'CHRF': return CharacterFScorer(arguments) # add other scorers here else: raise NotImplementedError("No such scorer: %s" % scorer)
def test_one_character(self): segment_a = self.tokenize("A") segment_b = self.tokenize("A") scorer = CharacterFScorer('n=6,beta=3') scorer.set_reference(segment_a) self.assertEqual(scorer.score(segment_b), 1.0)
def test_half_right(self): segment_a = self.tokenize("AB") segment_b = self.tokenize("AA") scorer = CharacterFScorer('n=6,beta=3') scorer.set_reference(segment_a) self.assertEqual(scorer.score(segment_b), 0.25)
def test_completely_different_segments(self): segment_a = self.tokenize("AAAAAA") segment_b = self.tokenize("BBBB") scorer = CharacterFScorer('n=3,beta=3') scorer.set_reference(segment_a) self.assertEqual(scorer.score(segment_b), 0.0)
def test_identical_segments(self): segment = self.tokenize("Consistency is the last refuge of the unimaginative") scorer = CharacterFScorer('n=6,beta=3') scorer.set_reference(segment) self.assertEqual(scorer.score(segment), 1.0)
def test_almost_correct(self): segment_a = self.tokenize("risk assessment has to be undertaken by those who are qualified and expert in that area - that is the scientists .") segment_b = self.tokenize(" risk assessment must be made of those who are qualified and expertise in the sector - these are the scientists .") scorer = CharacterFScorer('n=6,beta=3') scorer.set_reference(segment_a) self.assertEqual('{0:.12f}'.format(scorer.score(segment_b)), "0.652414427449")