Python extractOne Beispiele, rapidfuzz.process.extractOne Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: marvel_request.py Projekt: asengardeon/chatbot_marvel

def __fix_char_name(char_name: str):
    hero_names = __load_all_heroes_names()
    logging.info(process.extractOne(char_name, hero_names, scorer=fuzz.WRatio))
    nome, percentual, id = process.extractOne(char_name,
                                              hero_names,
                                              scorer=fuzz.WRatio)
    return nome

Beispiel #2

0

Datei anzeigen

def argument_parser(sc, arg1):
    clean_arg1 = sanitise_input(arg1)
    if sc == 'dmg':
        dmg_bracket = process.extractOne(clean_arg1, dmg_bracket_list())
        return dmg_bracket[0]
    elif sc == 'rand':
        try:
            int(arg1)
        except ValueError:
            return 10
        except TypeError:
            return 10
        else:
            return arg1
    elif clean_arg1.lower() in shct_parse_list():
        shortcut = shortcut_obj(clean_arg1.lower())
        return shortcut[0]['name']
 #DIFFERENT WAY TO DO SAME THING
 #   elif (sc == 'aura' or sc == 'zen' or sc == 'affinity') and (len(clean_arg1) <= 4):
 #       print('in here')
 #       shortcut = shortcut_obj(clean_arg1.lower())
 #       if len(shortcut) > 0:
 #            return shortcut[0]['name']
    else:
        arg_found = process.extractOne(clean_arg1, arg_parse_list())
        return arg_found[0]

Beispiel #3

0

Datei anzeigen

Datei: question_classifier_old.py Projekt: Sumsky21/bangumi_project

    def check_dict(self, question):
        txt_cut = "/".join(jieba.cut(cc.convert(question)))

        top = int(len(txt_cut) / 3)
        keywords = jieba.analyse.extract_tags(question, topK=top)
        selected_anime = []
        selected_staff = []
        maybe = []
        for keyword in keywords:
            p_anime_list = process.extractOne(keyword, self.anime_list)
            p_staff_list = process.extractOne(keyword, self.staff_list)
            #print(p_anime_list)
            if p_anime_list[1] >= 70:
                maybe.append(p_anime_list[0])
                #print(p_anime_list, fuzz.partial_ratio(p_anime_list[0], question))
                if fuzz.partial_ratio(p_anime_list[0], question) > 50:
                    selected_anime.append(p_anime_list[0])
            if p_staff_list[1] >= 70:
                maybe.append(p_staff_list[0])
                if fuzz.partial_ratio(p_staff_list[0], question) > 50:
                    selected_staff.append(p_staff_list[0])
        if not (selected_anime or selected_staff):
            check = []
            for i in maybe:
                ask = "请问您说的是{0}吗？(是/否）\n".format(i)
                check = input(ask)
                if check == '否':
                    maybe.remove(i)
            selected = maybe
        else:
            selected = selected_anime + selected_staff
        region_dict = {i: self.wdtype_dict.get(i) for i in selected}
        return region_dict, selected

Beispiel #4

0

Datei anzeigen

    def determine_query(self, query=None):
        '''To determine which type of query has been
        asked by the user for the recommendation'''
        if query is None:
            return None
        # now check the query matched which of the
        # part the best...
        query = query.lower()
        if len(query.split(" ")):
            q = query.split(" ")[0]
        if ("sci" in query or "fi" in query):
            q = 'sci-fi'
        if q in self.genres:
            return "Genre"
        # check for name query
        n_match = process.extractOne(query, list(self.names['primaryName']))
        # check for movie query
        m_match = process.extractOne(query, self.movie_dict.keys())

        name_perc = n_match[1]
        movie_perc = m_match[1]
        if (movie_perc > name_perc):
            return "Movie"
        else:
            return "Name"

Beispiel #5

0

Datei anzeigen

Datei: test_process.py Projekt: RemiBeaupreARA/RapidFuzz

    def testWithScorer(self):
        choices = [
            "new york mets vs chicago cubs", "chicago cubs at new york mets",
            "atlanta braves vs pittsbugh pirates",
            "new york yankees vs boston red sox"
        ]

        choices_mapping = {
            1: "new york mets vs chicago cubs",
            2: "chicago cubs at new york mets",
            3: "atlanta braves vs pittsbugh pirates",
            4: "new york yankees vs boston red sox"
        }

        # in this hypothetical example we care about ordering, so we use quick ratio
        query = "new york mets at chicago cubs"

        # first, as an example, the normal way would select the "more 'complete' match of choices[1]"
        best = process.extractOne(query, choices)
        self.assertEqual(best[0], choices[1])
        best = process.extractOne(query, choices_mapping)
        self.assertEqual(best[0], choices_mapping[2])

        # now, use the custom scorer
        best = process.extractOne(query, choices, scorer=fuzz.QRatio)
        self.assertEqual(best[0], choices[0])
        best = process.extractOne(query, choices_mapping, scorer=fuzz.QRatio)
        self.assertEqual(best[0], choices_mapping[1])

Beispiel #6

0

Datei anzeigen

 async def apex(self, ctx, *, arg1):
     rank_list = [i[0] for i in sql_rank_obj()]
     tokens = arg1.split(" ")
     s_obj = sql_ship_obj()
     ##If no apex rank is given, show list of available apexes
     if len(tokens) == 1:
         s_obj = ShipData(ctx, arg1).s_obj
         apex_embed_title = f"Apexes for {s_obj['name']}"
         colour = get_em_colour(s_obj['affinity'])
         embed = discord.Embed(title=apex_embed_title,
                               description=ApexLister(ctx, arg1).embed_list,
                               color=colour)
         #embed.set_image(url=get_ship_image(s_obj['number']))
         await ctx.send(embed=embed)
     ##If rank is given
     else:
         a_obj = sql_apex_num_obj()
         s_obj = ShipData(ctx, arg1).s_obj
         apex_tier = process.extractOne(arg1, rank_list)[0]
         apex_obj = ApexData(ctx, s_obj['name'], apex_tier)
         colour = get_em_colour(s_obj['affinity'])
         embed = discord.Embed(title=apex_obj.embed_title,
                               color=colour,
                               description=apex_obj.embed_desc)
         for i in a_obj:
             if i['id'] == s_obj['number'] and i['rank'] == apex_tier:
                 embed.set_thumbnail(
                     url=get_ship_image(f"{i['id']}_apex_{i['apex_num']}"))
         await ctx.send(embed=embed)

Beispiel #7

0

Datei anzeigen

Datei: util.py Projekt: gjwgit/wajig

def find_best_match(misspelled, candidates):
    """Find the best matched word with <misspelled> in <candidates>."""

    return fuzzprocess.extractOne(misspelled,
                                  candidates,
                                  scorer=fuzz.ratio,
                                  score_cutoff=60)[0:2]

Beispiel #8

0

Datei anzeigen

    async def queue(self, ctx: commands.Context, *, roles):
        """
        Puts you in a queue in the current channel for the specified roles.
        Roles are top, jungle, mid, bot, and support.

        Example usage:
            !queue support
            !queue mid bot
        """
        player = await self.bot.get_player(ctx)

        # First, we check if the last game of the player is still ongoing.
        try:
            game, participant = player.get_last_game()
            if not game.winner:
                await ctx.send(
                    "Your last game looks to be ongoing. "
                    "Please use !won or !lost to inform the result if the game is over.",
                    delete_after=self.bot.short_notice_duration,
                )
                return
        # This happens if the player has not played a game yet as get_last returns None and can’t be unpacked
        except TypeError:
            pass

        session = get_session()
        queue_player = (session.query(QueuePlayer).filter(
            QueuePlayer.player_id == Player.discord_id).filter(
                QueuePlayer.ready_check != None)).first()
        session.close()

        if queue_player:
            await ctx.send(
                "It seems you are in a pre-game check. You will be able to queue again once it is over."
            )
            return

        clean_roles = set()
        for role in roles.split(" "):
            clean_role, score = process.extractOne(role, roles_list)
            if score < 80:
                continue
            else:
                clean_roles.add(clean_role)

        if not clean_roles:
            await ctx.send(self.bot.role_not_understood,
                           delete_after=self.bot.warning_duration)
            return

        for role in clean_roles:
            self.add_player_to_queue(player, role, ctx.channel.id)

        for role in clean_roles:
            # Dirty code to get the emoji related to the letters
            await ctx.message.add_reaction(get_role_emoji(role))

        await self.matchmaking_process(ctx)

        await self.send_queue(ctx)

Beispiel #9

0

Datei anzeigen

Datei: stats_cog.py Projekt: fernanlukban/inhouse_bot

    async def ranking(self, ctx: commands.Context, role='all'):
        """
        Returns the top 20 players for the selected role.
        """
        if role == 'all':
            clean_role = role
        else:
            clean_role, score = process.extractOne(role, roles_list)
            if score < 80:
                await ctx.send(self.bot.role_not_understood, delete_after=30)
                return

        session = get_session()

        role_ranking = session.query(PlayerRating).order_by(
            -PlayerRating.mmr).filter(PlayerRating.games > 0)

        if clean_role != 'all':
            role_ranking = role_ranking.filter(PlayerRating.role == clean_role)

        table = [['Rank', 'Name', 'MMR', 'Games'] +
                 ['Role' if clean_role == 'all' else None]]

        for rank, rating in enumerate(role_ranking.limit(20)):
            table.append([
                inflect_engine.ordinal(rank + 1), rating.player.name,
                f'{rating.mmr:.1f}',
                rating.get_games()
            ] + [rating.role if clean_role == 'all' else None])

        await ctx.send(f'Ranking for {clean_role} is:\n'
                       f'```{tabulate(table, headers="firstrow")}```')

Beispiel #10

0

Datei anzeigen

Datei: fun.py Projekt: Run1e/AceBot

	async def bill(self, ctx, *, query: str = None):
		'''Get a random Bill Wurtz video from his website, with optional search.'''

		async with ctx.typing():
			if query is None:
				picked = choice(list(self.bill_cache.keys()))

			elif query.startswith('latest'):
				picked = self.bill_latest

			else:
				picked, score, junk = process.extractOne(query, self.bill_cache.keys())

				if score < 50:
					raise commands.CommandError(
						"Couldn't match that search with certainty.\n"
						f"Closest match: '{picked.strip()}'"
					)

		href, bill_date = self.bill_cache[picked]

		await ctx.send(
			f"{bill_date}: "
			f"*{disnake.utils.escape_markdown(picked)}* \n"
			f"{BILL_WURTZ_URL + href}"
		)

Beispiel #11

0

Datei anzeigen

Datei: fair_check.py Projekt: ACz-UniBi/FAIRsFAIR_fuji-tool

 def lookup_metadatastandard_by_name(self, value):
     found = None
     # get standard name with the highest matching percentage using fuzzywuzzy
     highest = process.extractOne(value, FAIRCheck.COMMUNITY_STANDARDS_NAMES, scorer=fuzz.token_sort_ratio)
     if highest[1] > 80:
         found = highest[0]
     return found

Beispiel #12

0

Datei anzeigen

Datei: discovery.py Projekt: marcelotrevisani/grayskull

def match_license(name: str) -> dict:
    """Match if the given license name matches any license present on
    spdx.org

    :param name: License name
    :return: Information of the license matched
    """
    all_licenses = get_all_licenses_from_spdx()
    name = re.sub(r"\s+license\s*", "", name.strip(), flags=re.IGNORECASE)

    best_matches = process.extract(name, _get_all_license_choice(all_licenses))
    spdx_license = best_matches[0]
    if spdx_license[1] != 100:
        best_matches = [
            lic[0] for lic in best_matches if not lic[0].endswith("-only")
        ]

        if best_matches:
            best_matches = process.extract(name,
                                           best_matches,
                                           scorer=token_set_ratio)
            spdx_license = best_matches[0]
            best_matches = [
                lic[0] for lic in best_matches if lic[1] >= spdx_license[1]
            ]
            if len(best_matches) > 1:
                spdx_license = process.extractOne(name,
                                                  best_matches,
                                                  scorer=token_sort_ratio)

    log.info(f"Best match for license {name} was {spdx_license}.\n"
             f"Best matches: {best_matches}")

    return _get_license(spdx_license[0], all_licenses)

Beispiel #13

0

Datei anzeigen

    def result(self):
        match_score_set = ("", 0)
        title_list, cr_list = [], []
        splitter = r'[|\t\n\r\f\v]+| – '
        if self.website_title:
            title_list = [
                std_name(s, level=7) for s in re.split(
                    splitter, self.text_cleaner(self.website_title)) if s
            ]
        if self.copyright_statement:
            cr_list = [
                std_name(s, level=7) for s in re.split(
                    splitter, self.text_cleaner(self.copyright_statement)) if s
            ]
        compare_list = title_list + cr_list + [
            std_name(self.web_domain, level=7)
        ]

        # print(f"compare_list: {compare_list} \n")

        if self.company_name_normalized and compare_list:
            # match_score_set = process.extract(self.company_name_normalized, compare_list)
            # match_score_set = max(match_score_set, key=lambda x:x[1])
            match_score_set = process.extractOne(self.company_name_normalized,
                                                 compare_list)
        return (match_score_set)

Beispiel #14

0

Datei anzeigen

Datei: test_process.py Projekt: dish59742/RapidFuzz

    def testWithProcessor(self):
        """
        extractOne should accept any type as long as it is a string
        after preprocessing
        """
        events = [
            [
                "chicago cubs vs new york mets", "CitiField", "2011-05-11",
                "8pm"
            ],
            [
                "new york yankees vs boston red sox", "Fenway Park",
                "2011-05-11", "8pm"
            ],
            [
                "atlanta braves vs pittsburgh pirates", "PNC Park",
                "2011-05-11", "8pm"
            ],
        ]
        query = events[0]

        best = process.extractOne(query,
                                  events,
                                  processor=lambda event: event[0])
        self.assertEqual(best[0], events[0])

Beispiel #15

0

Datei anzeigen

def edit_correct(word: str,
                 wordlist: Iterable[str],
                 max_dist: str = 2) -> Optional[str]:
    """Fix spelling mistakes in input word by
    computing the levenstein distance to a list of valid words
    sorted by decreasing priority. The closest word within
    maximum edit distance is returned. Ties are resolved by
    picking the highest priority word.

    Examples
    --------
    >>> edit_correct("kug", ("mug", "bug", "but"))
    'mug'
    >>> edit_correct("bug", ("mug", "bug", "but"))
    'bug'
    >>> edit_correct("friend", ("mug", "bug", "but"))
    """
    if word in wordlist:
        return word
    # extractOne returns the most similar word, in case of
    # ties, the first word is returned. Since words are in
    # decreasing order of priority, this will automatically return
    # the most relevant word
    res = process.extractOne(word,
                             wordlist,
                             scorer=string_metric.levenshtein,
                             score_cutoff=max_dist)
    # Extract string from the output (if a match was found)
    if res is not None:
        res = res[0]
    return res

Beispiel #16

0

Datei anzeigen

def match_authors(name, allcontrib_names, allcontrib_login):
    """
    Use fuzzy string matching to match names of committers
    to the names or handles mentioned in the allcontributors file.

    Args:
        name: str; name of committer
        allcontrib_names: list; names in allcontributorsrc file
        allcontrib_login: list; logins in allcontributorsrc file

    Returns:

    """

    # First, match the name. If no match, try Github login
    matching = process.extractOne(name,
                                  allcontrib_names,
                                  scorer=fuzz.token_sort_ratio,
                                  score_cutoff=71)
    if not matching:
        # we likely haven't found a match yet, lets check Github handles
        matching = process.extract(name,
                                   allcontrib_login,
                                   scorer=fuzz.token_sort_ratio,
                                   score_cutoff=71)
    if matching:
        return [name, matching[0]], None
    else:
        return None, name

Beispiel #17

0

Datei anzeigen

Datei: categoryPredictionApp.py Projekt: mjason37/course-category-prediction-app

def predictCategory(categoryToPredictFor):
    """categoryToPredictFor = courses we are trying to predict their category for
    return = a dataframe with input course, predicted category and a confidence matching score."""

    # Returns best matching course, similarity score and index of the course
    matchedCourse = process.extractOne(categoryToPredictFor,
                                       df.courseTitle.values,
                                       scorer=fuzz.token_set_ratio)

    # Make query by index to extract the broad category
    matchedCategory = df.iloc[matchedCourse[2]].broadCategory1

    # Extract similarity score for corresponding category
    matchConfidence = matchedCourse[1]

    # Create a dataframe off input course, its corresponding category and similarity score
    matchedDf = pd.DataFrame(
        {
            "categoryFor": categoryToPredictFor,
            "category": matchedCategory,
            "matchConfidence": round(matchConfidence)
        },
        index=[0])

    return matchedDf

Beispiel #18

0

Datei anzeigen

 def _string_fuzzy_match(
     cls, match: typing.Any, choices: typing.Sequence[typing.Any], min_score: float
 ) -> typing.Optional[str]:
     choice, score, index = process.extractOne(match, choices)
     val = None
     if score >= min_score:
         val = choice
     return val

Beispiel #19

0

Datei anzeigen

Datei: test_process.py Projekt: dish59742/RapidFuzz

    def testWithCutoffEdgeCases(self):
        choices = [
            "new york mets vs chicago cubs", "chicago cubs at new york mets",
            "atlanta braves vs pittsbugh pirates",
            "new york yankees vs boston red sox"
        ]

        query = "new york mets vs chicago cubs"
        # Only find 100-score cases
        best = process.extractOne(query, choices, score_cutoff=100)
        self.assertIsNotNone(best)
        self.assertEqual(best[0], choices[0])

        # 0-score cases do not return None
        best = process.extractOne("", choices)
        self.assertIsNotNone(best)
        self.assertEqual(best[1], 0)

Beispiel #20

0

Datei anzeigen

Datei: test_process.py Projekt: dish59742/RapidFuzz

    def testWithCutoff(self):
        choices = [
            "new york mets vs chicago cubs", "chicago cubs at new york mets",
            "atlanta braves vs pittsbugh pirates",
            "new york yankees vs boston red sox"
        ]

        query = "los angeles dodgers vs san francisco giants"

        # in this situation, this is an event that does not exist in the list
        # we don't want to randomly match to something, so we use a reasonable cutoff
        best = process.extractOne(query, choices, score_cutoff=50)
        self.assertIsNone(best)

        # however if we had no cutoff, something would get returned
        best = process.extractOne(query, choices)
        self.assertIsNotNone(best)

Beispiel #21

0

Datei anzeigen

 def match(cls, query):
     if not cls.cache:
         cls.populate_cache()
     result = process.extractOne(query.casefold(),
                                 cls.cache.keys(),
                                 score_cutoff=90)
     if result:
         return cls.cache[result[0]]

Beispiel #22

0

Datei anzeigen

Datei: fair_check.py Projekt: ACz-UniBi/FAIRsFAIR_fuji-tool

 def lookup_metadatastandard_by_uri(self, value):
     found = None
     # get standard uri with the highest matching percentage using fuzzywuzzy
     highest = process.extractOne(value, FAIRCheck.COMMUNITY_METADATA_STANDARDS_URIS_LIST,
                                  scorer=fuzz.token_sort_ratio)
     if highest[1] > 90:
         found = highest[0]
     return found

Beispiel #23

0

Datei anzeigen

def get_closest_scheme(rta, scheme_name, rta_code=None, amc_code=None):
    qs = scheme_lookup(rta, scheme_name, rta_code=rta_code, amc_code=amc_code)
    if qs.count() == 0:
        raise ValueError("No schemes found")
    schemes = dict(qs.values_list("name", "pk"))
    key, *_ = process.extractOne(scheme_name, schemes.keys())
    scheme_id = schemes[key]
    return scheme_id

Beispiel #24

0

Datei anzeigen

Datei: homebrew.py Projekt: lightning-bot/Lightning

 def get_match(self,
               word_list: list,
               word: str,
               score_cutoff: int = 60,
               partial=False) -> Optional[str]:
     if partial:
         result = process.extractOne(word,
                                     word_list,
                                     scorer=fuzz.partial_ratio,
                                     score_cutoff=score_cutoff)
     else:
         result = process.extractOne(word,
                                     word_list,
                                     scorer=fuzz.ratio,
                                     score_cutoff=score_cutoff)
     if not result:
         return None
     return result

Beispiel #25

0

Datei anzeigen

Datei: parser.py Projekt: Mohammad-Afaque/horrible-downloader

 def get_proper_title(self, title: str, min_threshold=0) -> str:
     # because we're dealing with html, there will be character references.
     # there might be other references other than the ampersand.
     title = title.replace("&amp;", "&")
     proper_title, ratio = process.extractOne(title, self.shows.keys())
     # if the proper_title is too different than the title, return "".
     if ratio <= min_threshold:
         return ""
     return proper_title

Beispiel #26

0

Datei anzeigen

 def fuzzymatch(self, faq):
     answer = fuzzproc.extractOne(faq,
                                  self.questions,
                                  scorer=fuzz.token_sort_ratio)
     if answer[1] > 60:
         print(answer)
         return self.answers[self.faqs["questions"][answer[0]]]
     else:
         return None

Beispiel #27

0

Datei anzeigen

def ship_search(find_this):
    # using the class initiated list ship_list find one ship name that 
    # matches the given string as close as possible
    found_this = process.extractOne(find_this, get_ships())
    # rapidfuzz returns the name and the ratio so strip the ratio and keep
    # the ship name
    ship_name = found_this[0]
    # return the ship name as a string
    return ship_name

Beispiel #28

0

Datei anzeigen

Datei: TemuSTS.py Projekt: TeMU-BSC/TemuSTS

def fuzzy(allterms, sentence, cutoff=93):
    highest = process.extractOne(sentence,
                                 allterms,
                                 processor=None,
                                 score_cutoff=cutoff)
    if highest:
        return highest
    else:
        return None

Beispiel #29

0

Datei anzeigen

Datei: __init__.py Projekt: KelabDev/iswust_bot

async def _(session: NLPSession):
    raw_message: List[str] = session.event["raw_message"].split()
    # 假设该消息为命令，取第一个字段
    query_cmd = raw_message[0]

    fuzz_cmd = None
    confidence = None
    # 检查 commands
    commands_dct = CommandManager._commands
    choices = gen_commands_keys(commands_dct)
    # 模糊匹配命令与 commands
    result = process.extractOne(query_cmd, choices, scorer=fuzz.WRatio)
    if result:
        cmd_name, confidence = result
        _cmd = (cmd_name, )
        if commands_dct.get(_cmd) is not None:
            if check_permission(session.bot, session.event,
                                commands_dct[_cmd].permission):
                fuzz_cmd = cmd_name

    # 检查 commands 没有匹配到命令
    if fuzz_cmd is None:
        # 检查 aliases
        aliases_dct = CommandManager._aliases  # type: Dict[str, Command]
        choices = set(aliases_dct.keys())
        # 模糊匹配命令与 aliases
        result = process.extractOne(query_cmd, choices, scorer=fuzz.WRatio)
        if result:
            alias, confidence = result
            if check_permission(session.bot, session.event,
                                aliases_dct[alias].permission):
                fuzz_cmd = alias

    if fuzz_cmd is not None and confidence is not None:
        logger.debug(f"query_cmd: {query_cmd}")
        logger.debug(f"fuzz cmd, confidence: {fuzz_cmd} {confidence}")
        if confidence - 66 > 0:
            raw_message[0] = fuzz_cmd
            return IntentCommand(
                confidence,
                "switch",
                current_arg=" ".join(raw_message),
            )

Beispiel #30

0

Datei anzeigen

Datei: 2.2.py Projekt: Sinrez/pyCoursera

def check_word():
  badwords = ['дурак', 'идиот', 'урод', 'хмырь','f**k', 'fack', 'фак']
  strr = input('Введите строку: ').strip()
  check_list = strr.split()
  # print(check_list)
  for word in check_list:
    s = process.extractOne(word, badwords, scorer = fuzz.WRatio)
    if s[1] > 50:
      strr = strr.replace(word, 'цензура')
  print(strr)