예제 #1
0
 def test_almost_correct(self):
     segment_a = self.tokenize(
         "risk assessment has to be undertaken by those who are qualified and expert in that area - that is the scientists ."
     )
     segment_b = self.tokenize(
         " risk assessment must be made of those who are qualified and expertise in the sector - these are the scientists ."
     )
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual('{0:.12f}'.format(scorer.score(segment_b)),
                      "0.652414427449")
예제 #2
0
    def get(self, config_string):
        """
        Returns a scorer matching the metric and parameters defined in @param
        config string.

        Example: ScorerProvider.get("BLEU n=4") returns a SmoothedBleuScorer
                 object that considers n-gram precision up to n=4.

        If more than one metrics are provided (separated by `;`),
        an interpolated scorer will be returned.

        Example: ScorerProvider.get("INTERPOLATE w=0.5,0.5; SENTENCEBLEU n=4; METEOR meteor_language=fr, meteor_path=/foo/bar/meteor")
                 returns an InterpolatedScorer object that scores hypotheses
                 using 0.5 * bleu_score + 0.5 * meteor_score.
        """
        # interpolation
        if config_string.startswith("INTERPOLATE"):
            return si.ScorerInterpolator(config_string)
        try:
            scorer, arguments = config_string.split(" ", 1)
        except ValueError:
            scorer = config_string
            arguments = ''
        if scorer == 'SENTENCEBLEU':
            return SentenceBleuScorer(arguments)
        elif scorer == 'METEOR':
            return MeteorScorer(arguments)
        elif scorer == 'BEER':
            return BeerScorer(arguments)
        elif scorer == 'CHRF':
            return CharacterFScorer(arguments)
        # add other scorers here
        else:
            raise NotImplementedError("No such scorer: %s" % scorer)
예제 #3
0
 def test_one_character(self):
     segment_a = self.tokenize("A")
     segment_b = self.tokenize("A")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual(scorer.score(segment_b), 1.0)
예제 #4
0
 def test_half_right(self):
     segment_a = self.tokenize("AB")
     segment_b = self.tokenize("AA")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual(scorer.score(segment_b), 0.25)                     
예제 #5
0
 def test_completely_different_segments(self):
     segment_a = self.tokenize("AAAAAA")
     segment_b = self.tokenize("BBBB")
     scorer = CharacterFScorer('n=3,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual(scorer.score(segment_b), 0.0)
예제 #6
0
 def test_identical_segments(self):
     segment = self.tokenize("Consistency is the last refuge of the unimaginative")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment)
     self.assertEqual(scorer.score(segment), 1.0)   
예제 #7
0
 def test_almost_correct(self):
     segment_a = self.tokenize("risk assessment has to be undertaken by those who are qualified and expert in that area - that is the scientists .")
     segment_b = self.tokenize(" risk assessment must be made of those who are qualified and expertise in the sector - these are the scientists .")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual('{0:.12f}'.format(scorer.score(segment_b)), "0.652414427449")
예제 #8
0
 def test_one_character(self):
     segment_a = self.tokenize("A")
     segment_b = self.tokenize("A")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual(scorer.score(segment_b), 1.0)
예제 #9
0
 def test_half_right(self):
     segment_a = self.tokenize("AB")
     segment_b = self.tokenize("AA")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual(scorer.score(segment_b), 0.25)                     
예제 #10
0
 def test_completely_different_segments(self):
     segment_a = self.tokenize("AAAAAA")
     segment_b = self.tokenize("BBBB")
     scorer = CharacterFScorer('n=3,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual(scorer.score(segment_b), 0.0)
예제 #11
0
 def test_identical_segments(self):
     segment = self.tokenize("Consistency is the last refuge of the unimaginative")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment)
     self.assertEqual(scorer.score(segment), 1.0)