def testWithScorer(self): choices = [ "new york mets vs chicago cubs", "chicago cubs at new york mets", "atlanta braves vs pittsbugh pirates", "new york yankees vs boston red sox" ] choices_dict = { 1: "new york mets vs chicago cubs", 2: "chicago cubs vs chicago white sox", 3: "philladelphia phillies vs atlanta braves", 4: "braves vs mets" } # in this hypothetical example we care about ordering, so we use quick ratio query = "new york mets at chicago cubs" scorer = fuzzywuzzy.QRatio # first, as an example, the normal way would select the "more # 'complete' match of choices[1]" best = fuzzywuzzy.extractOne(query, choices) self.assertEqual(best[0], choices[1]) # now, use the custom scorer best = fuzzywuzzy.extractOne(query, choices, scorer=scorer) self.assertEqual(best[0], choices[0]) best = fuzzywuzzy.extractOne(query, choices_dict) self.assertEqual(best[0], choices_dict[1])
def test_simplematch(self): basic_string = 'a, b' match_strings = ['a, b'] result = fuzzywuzzy.extractOne(basic_string, match_strings, scorer=fuzzywuzzy.ratio) part_result = fuzzywuzzy.extractOne(basic_string, match_strings, scorer=fuzzywuzzy.partial_ratio) self.assertEqual(result, ('a, b', 100)) self.assertEqual(part_result, ('a, b', 100))
def testNullStrings(self): choices = [ None, "new york mets vs chicago cubs", "new york yankees vs boston red sox", None, None ] query = "new york mets at chicago cubs" best = fuzzywuzzy.extractOne(query, choices) self.assertEqual(best[0], choices[1])
def testWithProcessor(self): events = [ ["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"], ["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"], ["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"], ] query = ["new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm"], best = fuzzywuzzy.extractOne(query, events, processor=lambda event: event[0]) self.assertEqual(best[0], events[0])
def testWithCutoff2(self): choices = [ "new york mets vs chicago cubs", "chicago cubs at new york mets", "atlanta braves vs pittsbugh pirates", "new york yankees vs boston red sox" ] query = "new york mets vs chicago cubs" # Only find 100-score cases res = fuzzywuzzy.extractOne(query, choices, score_cutoff=100) self.assertTrue(res is not None) best_match, score = res self.assertTrue(best_match is choices[0])
def testWithCutoff(self): choices = [ "new york mets vs chicago cubs", "chicago cubs at new york mets", "atlanta braves vs pittsbugh pirates", "new york yankees vs boston red sox" ] query = "los angeles dodgers vs san francisco giants" # in this situation, this is an event that does not exist in the list # we don't want to randomly match to something, so we use a reasonable cutoff best = fuzzywuzzy.extractOne(query, choices, score_cutoff=50) self.assertTrue(best is None)
def testGetBestChoice4(self): query = "chicago cubs vs new york mets" best = fuzzywuzzy.extractOne(query, self.baseball_strings) self.assertEqual(best[0], self.baseball_strings[0])
def testGetBestChoice3(self): query = "atlanta braves at philadelphia phillies" best = fuzzywuzzy.extractOne(query, self.baseball_strings) self.assertEqual(best[0], self.baseball_strings[2])
def testGetBestChoice1(self): query = "new york mets at atlanta braves" best = fuzzywuzzy.extractOne(query, self.baseball_strings) self.assertEqual(best[0], "braves vs mets")