def testWithScorer(self): choices = [ "new york mets vs chicago cubs", "chicago cubs at new york mets", "atlanta braves vs pittsbugh pirates", "new york yankees vs boston red sox" ] choices_dict = { 1: "new york mets vs chicago cubs", 2: "chicago cubs vs chicago white sox", 3: "philladelphia phillies vs atlanta braves", 4: "braves vs mets" } # in this hypothetical example we care about ordering, so we use quick ratio query = "new york mets at chicago cubs" scorer = fuzz.QRatio # first, as an example, the normal way would select the "more # 'complete' match of choices[1]" best = process.extractOne(query, choices) self.assertEqual(best[0], choices[1]) # now, use the custom scorer best = process.extractOne(query, choices, scorer=scorer) self.assertEqual(best[0], choices[0]) best = process.extractOne(query, choices_dict) self.assertEqual(best[0], choices_dict[1])
def test_simplematch(self): basic_string = 'a, b' match_strings = ['a, b'] result = process.extractOne(basic_string, match_strings, scorer=fuzz.ratio) part_result = process.extractOne(basic_string, match_strings, scorer=fuzz.partial_ratio) self.assertEqual(result, ('a, b', 100)) self.assertEqual(part_result, ('a, b', 100))
async def get_board_name(name: str) -> Optional[Tuple[str, float]]: hot_topic_name = ["十大热门话题", "十大热门", "十大", "98十大", "热门"] new_topic_name = ["新帖", "查看新帖", "查看最新", "最新帖子"] res = process.extractOne(name, hot_topic_name) if res and res[1] > 80: return "十大", res[1] res = process.extractOne(name, new_topic_name) if res and res[1] > 80: return "新帖", res[1] board_all = await cc98_api.board_all2() board_list = [board["name"] for board in board_all.values()] return process.extractOne(name, board_list)
def options_guard(select: str, options: List[str]): if select not in options: possible_select = process.extractOne(select, options)[0] raise ValueError( f"'{select}' is not available, do you mean '{possible_select}'. " f"Available options are {', '.join([str(i) for i in options])}") else: return select
def testWithProcessor(self): events = [ ["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"], ["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"], ["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"], ] query = ["new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm"], best = process.extractOne(query, events, processor=lambda event: event[0]) self.assertEqual(best[0], events[0])
def getFuzzyColor(color_name): # we want a 85% match for thefuzz fuzzyMatch = process.extractOne(color_name.title(), colors_list, scorer=fuzz.token_sort_ratio) fuzz_color, percent_match = fuzzyMatch[0], fuzzyMatch[1] if percent_match >= 85: return fuzz_color else: return None
def testNullStrings(self): choices = [ None, "new york mets vs chicago cubs", "new york yankees vs boston red sox", None, None ] query = "new york mets at chicago cubs" best = process.extractOne(query, choices) self.assertEqual(best[0], choices[1])
def testWithCutoff2(self): choices = [ "new york mets vs chicago cubs", "chicago cubs at new york mets", "atlanta braves vs pittsbugh pirates", "new york yankees vs boston red sox" ] query = "new york mets vs chicago cubs" # Only find 100-score cases res = process.extractOne(query, choices, score_cutoff=100) self.assertTrue(res is not None) best_match, score = res self.assertTrue(best_match is choices[0])
def map_to_probe(self, payload, proto): plen = min(len(payload), 50) processed = [i[0:plen] for i in list(self.probes[proto].values())] closest = difflib.get_close_matches(payload[0:plen], processed, 1, cutoff=0) if not closest: closest, score = process.extractOne(payload[0:plen], processed) else: closest = closest[0] for name, probe in self.probes[proto].items(): if probe[0:plen] == closest: return name, probe, self.payloads[proto][name] return None
def testWithCutoff(self): choices = [ "new york mets vs chicago cubs", "chicago cubs at new york mets", "atlanta braves vs pittsbugh pirates", "new york yankees vs boston red sox" ] query = "los angeles dodgers vs san francisco giants" # in this situation, this is an event that does not exist in the list # we don't want to randomly match to something, so we use a reasonable cutoff best = process.extractOne(query, choices, score_cutoff=50) self.assertTrue(best is None)
def test_process_warning(capsys): """Check that a string reduced to 0 by processor logs a warning to stderr""" query = ':::::::' choices = [':::::::'] _ = process.extractOne(query, choices) out, err = capsys.readouterr() outstr = ("WARNING:root:Applied processor reduces " "input query to empty string, " "all comparisons will have score 0. " "[Query: ':::::::']\n") assert err == outstr
def testGetBestChoice4(self): query = "chicago cubs vs new york mets" best = process.extractOne(query, self.baseball_strings) self.assertEqual(best[0], self.baseball_strings[0])
def testGetBestChoice3(self): query = "atlanta braves at philadelphia phillies" best = process.extractOne(query, self.baseball_strings) self.assertEqual(best[0], self.baseball_strings[2])
def testGetBestChoice1(self): query = "new york mets at atlanta braves" best = process.extractOne(query, self.baseball_strings) self.assertEqual(best[0], "braves vs mets")
# -*- coding:utf-8 -*- """ 参考 https://github.com/seatgeek/thefuzz """ __author__ = "aaron.qiu" from pprint import pprint from thefuzz import fuzz from thefuzz import process if __name__ == '__main__': pprint(fuzz.ratio("this is a test", "this is a test!")) pprint(fuzz.partial_ratio("this is a test", "this is a test!")) pprint(fuzz.ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")) pprint( fuzz.token_sort_ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear")) pprint(fuzz.token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")) pprint(fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear")) choices = [ "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" ] pprint(process.extract("new york jets", choices, limit=2)) pprint(process.extractOne("cowboys", choices)) songs = "/data/soft" pprint(process.extractOne("System of a down - Hypnotize - apache", songs)) process.extractOne("System of a down - Hypnotize - Heroin", songs, scorer=fuzz.token_sort_ratio)