Example #1
0
    def testWithScorer(self):
        choices = [
            "new york mets vs chicago cubs",
            "chicago cubs at new york mets",
            "atlanta braves vs pittsbugh pirates",
            "new york yankees vs boston red sox"
        ]

        choices_dict = {
            1: "new york mets vs chicago cubs",
            2: "chicago cubs vs chicago white sox",
            3: "philladelphia phillies vs atlanta braves",
            4: "braves vs mets"
        }

        # in this hypothetical example we care about ordering, so we use quick ratio
        query = "new york mets at chicago cubs"
        scorer = fuzz.QRatio

        # first, as an example, the normal way would select the "more
        # 'complete' match of choices[1]"

        best = process.extractOne(query, choices)
        self.assertEqual(best[0], choices[1])

        # now, use the custom scorer

        best = process.extractOne(query, choices, scorer=scorer)
        self.assertEqual(best[0], choices[0])

        best = process.extractOne(query, choices_dict)
        self.assertEqual(best[0], choices_dict[1])
Example #2
0
    def test_simplematch(self):
        basic_string = 'a, b'
        match_strings = ['a, b']

        result = process.extractOne(basic_string, match_strings, scorer=fuzz.ratio)
        part_result = process.extractOne(basic_string, match_strings, scorer=fuzz.partial_ratio)

        self.assertEqual(result, ('a, b', 100))
        self.assertEqual(part_result, ('a, b', 100))
Example #3
0
async def get_board_name(name: str) -> Optional[Tuple[str, float]]:
    hot_topic_name = ["十大热门话题", "十大热门", "十大", "98十大", "热门"]
    new_topic_name = ["新帖", "查看新帖", "查看最新", "最新帖子"]

    res = process.extractOne(name, hot_topic_name)
    if res and res[1] > 80:
        return "十大", res[1]

    res = process.extractOne(name, new_topic_name)
    if res and res[1] > 80:
        return "新帖", res[1]

    board_all = await cc98_api.board_all2()
    board_list = [board["name"] for board in board_all.values()]
    return process.extractOne(name, board_list)
Example #4
0
def options_guard(select: str, options: List[str]):
    if select not in options:
        possible_select = process.extractOne(select, options)[0]
        raise ValueError(
            f"'{select}' is not available, do you mean '{possible_select}'. "
            f"Available options are {', '.join([str(i) for i in options])}")
    else:
        return select
Example #5
0
    def testWithProcessor(self):
        events = [
            ["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"],
            ["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"],
            ["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"],
        ]
        query = ["new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm"],

        best = process.extractOne(query, events, processor=lambda event: event[0])
        self.assertEqual(best[0], events[0])
Example #6
0
def getFuzzyColor(color_name):
    # we want a 85% match for thefuzz
    fuzzyMatch = process.extractOne(color_name.title(),
                                    colors_list,
                                    scorer=fuzz.token_sort_ratio)
    fuzz_color, percent_match = fuzzyMatch[0], fuzzyMatch[1]
    if percent_match >= 85:
        return fuzz_color
    else:
        return None
Example #7
0
    def testNullStrings(self):
        choices = [
            None,
            "new york mets vs chicago cubs",
            "new york yankees vs boston red sox",
            None,
            None
        ]

        query = "new york mets at chicago cubs"

        best = process.extractOne(query, choices)
        self.assertEqual(best[0], choices[1])
Example #8
0
    def testWithCutoff2(self):
        choices = [
            "new york mets vs chicago cubs",
            "chicago cubs at new york mets",
            "atlanta braves vs pittsbugh pirates",
            "new york yankees vs boston red sox"
        ]

        query = "new york mets vs chicago cubs"
        # Only find 100-score cases
        res = process.extractOne(query, choices, score_cutoff=100)
        self.assertTrue(res is not None)
        best_match, score = res
        self.assertTrue(best_match is choices[0])
Example #9
0
	def map_to_probe(self, payload, proto):
		plen = min(len(payload), 50)
		processed = [i[0:plen] for i in list(self.probes[proto].values())]
		closest = difflib.get_close_matches(payload[0:plen], processed, 1, cutoff=0)

		if not closest:
			closest, score = process.extractOne(payload[0:plen], processed)
		else:
			closest = closest[0]

		for name, probe in self.probes[proto].items():
			if probe[0:plen] == closest:
				return name, probe, self.payloads[proto][name]

		return None
Example #10
0
    def testWithCutoff(self):
        choices = [
            "new york mets vs chicago cubs",
            "chicago cubs at new york mets",
            "atlanta braves vs pittsbugh pirates",
            "new york yankees vs boston red sox"
        ]

        query = "los angeles dodgers vs san francisco giants"

        # in this situation, this is an event that does not exist in the list
        # we don't want to randomly match to something, so we use a reasonable cutoff

        best = process.extractOne(query, choices, score_cutoff=50)
        self.assertTrue(best is None)
Example #11
0
def test_process_warning(capsys):
    """Check that a string reduced to 0 by processor logs a warning to stderr"""

    query = ':::::::'
    choices = [':::::::']

    _ = process.extractOne(query, choices)

    out, err = capsys.readouterr()

    outstr = ("WARNING:root:Applied processor reduces "
              "input query to empty string, "
              "all comparisons will have score 0. "
              "[Query: ':::::::']\n")

    assert err == outstr
Example #12
0
 def testGetBestChoice4(self):
     query = "chicago cubs vs new york mets"
     best = process.extractOne(query, self.baseball_strings)
     self.assertEqual(best[0], self.baseball_strings[0])
Example #13
0
 def testGetBestChoice3(self):
     query = "atlanta braves at philadelphia phillies"
     best = process.extractOne(query, self.baseball_strings)
     self.assertEqual(best[0], self.baseball_strings[2])
Example #14
0
 def testGetBestChoice1(self):
     query = "new york mets at atlanta braves"
     best = process.extractOne(query, self.baseball_strings)
     self.assertEqual(best[0], "braves vs mets")
Example #15
0
# -*- coding:utf-8 -*-
"""
参考 https://github.com/seatgeek/thefuzz
"""
__author__ = "aaron.qiu"

from pprint import pprint
from thefuzz import fuzz
from thefuzz import process

if __name__ == '__main__':
    pprint(fuzz.ratio("this is a test", "this is a test!"))
    pprint(fuzz.partial_ratio("this is a test", "this is a test!"))
    pprint(fuzz.ratio("fuzzy wuzzy was a bear", "wuzzy fuzzy was a bear"))
    pprint(
        fuzz.token_sort_ratio("fuzzy wuzzy was a bear",
                              "wuzzy fuzzy was a bear"))
    pprint(fuzz.token_sort_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear"))
    pprint(fuzz.token_set_ratio("fuzzy was a bear", "fuzzy fuzzy was a bear"))
    choices = [
        "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"
    ]
    pprint(process.extract("new york jets", choices, limit=2))
    pprint(process.extractOne("cowboys", choices))
    songs = "/data/soft"
    pprint(process.extractOne("System of a down - Hypnotize - apache", songs))
    process.extractOne("System of a down - Hypnotize - Heroin",
                       songs,
                       scorer=fuzz.token_sort_ratio)