예제 #1
0
 def test_almost_correct(self):
     segment_a = self.tokenize(
         "risk assessment has to be undertaken by those who are qualified and expert in that area - that is the scientists ."
     )
     segment_b = self.tokenize(
         " risk assessment must be made of those who are qualified and expertise in the sector - these are the scientists ."
     )
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual('{0:.12f}'.format(scorer.score(segment_b)),
                      "0.652414427449")
예제 #2
0
 def test_one_character(self):
     segment_a = self.tokenize("A")
     segment_b = self.tokenize("A")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual(scorer.score(segment_b), 1.0)
예제 #3
0
 def test_half_right(self):
     segment_a = self.tokenize("AB")
     segment_b = self.tokenize("AA")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual(scorer.score(segment_b), 0.25)                     
예제 #4
0
 def test_completely_different_segments(self):
     segment_a = self.tokenize("AAAAAA")
     segment_b = self.tokenize("BBBB")
     scorer = CharacterFScorer('n=3,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual(scorer.score(segment_b), 0.0)
예제 #5
0
 def test_identical_segments(self):
     segment = self.tokenize("Consistency is the last refuge of the unimaginative")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment)
     self.assertEqual(scorer.score(segment), 1.0)   
예제 #6
0
 def test_almost_correct(self):
     segment_a = self.tokenize("risk assessment has to be undertaken by those who are qualified and expert in that area - that is the scientists .")
     segment_b = self.tokenize(" risk assessment must be made of those who are qualified and expertise in the sector - these are the scientists .")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual('{0:.12f}'.format(scorer.score(segment_b)), "0.652414427449")
예제 #7
0
 def test_one_character(self):
     segment_a = self.tokenize("A")
     segment_b = self.tokenize("A")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual(scorer.score(segment_b), 1.0)
예제 #8
0
 def test_half_right(self):
     segment_a = self.tokenize("AB")
     segment_b = self.tokenize("AA")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual(scorer.score(segment_b), 0.25)                     
예제 #9
0
 def test_completely_different_segments(self):
     segment_a = self.tokenize("AAAAAA")
     segment_b = self.tokenize("BBBB")
     scorer = CharacterFScorer('n=3,beta=3')
     scorer.set_reference(segment_a)
     self.assertEqual(scorer.score(segment_b), 0.0)
예제 #10
0
 def test_identical_segments(self):
     segment = self.tokenize("Consistency is the last refuge of the unimaginative")
     scorer = CharacterFScorer('n=6,beta=3')
     scorer.set_reference(segment)
     self.assertEqual(scorer.score(segment), 1.0)