def __fix_char_name(char_name: str): hero_names = __load_all_heroes_names() logging.info(process.extractOne(char_name, hero_names, scorer=fuzz.WRatio)) nome, percentual, id = process.extractOne(char_name, hero_names, scorer=fuzz.WRatio) return nome
def argument_parser(sc, arg1): clean_arg1 = sanitise_input(arg1) if sc == 'dmg': dmg_bracket = process.extractOne(clean_arg1, dmg_bracket_list()) return dmg_bracket[0] elif sc == 'rand': try: int(arg1) except ValueError: return 10 except TypeError: return 10 else: return arg1 elif clean_arg1.lower() in shct_parse_list(): shortcut = shortcut_obj(clean_arg1.lower()) return shortcut[0]['name'] #DIFFERENT WAY TO DO SAME THING # elif (sc == 'aura' or sc == 'zen' or sc == 'affinity') and (len(clean_arg1) <= 4): # print('in here') # shortcut = shortcut_obj(clean_arg1.lower()) # if len(shortcut) > 0: # return shortcut[0]['name'] else: arg_found = process.extractOne(clean_arg1, arg_parse_list()) return arg_found[0]
def check_dict(self, question): txt_cut = "/".join(jieba.cut(cc.convert(question))) top = int(len(txt_cut) / 3) keywords = jieba.analyse.extract_tags(question, topK=top) selected_anime = [] selected_staff = [] maybe = [] for keyword in keywords: p_anime_list = process.extractOne(keyword, self.anime_list) p_staff_list = process.extractOne(keyword, self.staff_list) #print(p_anime_list) if p_anime_list[1] >= 70: maybe.append(p_anime_list[0]) #print(p_anime_list, fuzz.partial_ratio(p_anime_list[0], question)) if fuzz.partial_ratio(p_anime_list[0], question) > 50: selected_anime.append(p_anime_list[0]) if p_staff_list[1] >= 70: maybe.append(p_staff_list[0]) if fuzz.partial_ratio(p_staff_list[0], question) > 50: selected_staff.append(p_staff_list[0]) if not (selected_anime or selected_staff): check = [] for i in maybe: ask = "请问您说的是{0}吗?(是/否)\n".format(i) check = input(ask) if check == '否': maybe.remove(i) selected = maybe else: selected = selected_anime + selected_staff region_dict = {i: self.wdtype_dict.get(i) for i in selected} return region_dict, selected
def determine_query(self, query=None): '''To determine which type of query has been asked by the user for the recommendation''' if query is None: return None # now check the query matched which of the # part the best... query = query.lower() if len(query.split(" ")): q = query.split(" ")[0] if ("sci" in query or "fi" in query): q = 'sci-fi' if q in self.genres: return "Genre" # check for name query n_match = process.extractOne(query, list(self.names['primaryName'])) # check for movie query m_match = process.extractOne(query, self.movie_dict.keys()) name_perc = n_match[1] movie_perc = m_match[1] if (movie_perc > name_perc): return "Movie" else: return "Name"
def testWithScorer(self): choices = [ "new york mets vs chicago cubs", "chicago cubs at new york mets", "atlanta braves vs pittsbugh pirates", "new york yankees vs boston red sox" ] choices_mapping = { 1: "new york mets vs chicago cubs", 2: "chicago cubs at new york mets", 3: "atlanta braves vs pittsbugh pirates", 4: "new york yankees vs boston red sox" } # in this hypothetical example we care about ordering, so we use quick ratio query = "new york mets at chicago cubs" # first, as an example, the normal way would select the "more 'complete' match of choices[1]" best = process.extractOne(query, choices) self.assertEqual(best[0], choices[1]) best = process.extractOne(query, choices_mapping) self.assertEqual(best[0], choices_mapping[2]) # now, use the custom scorer best = process.extractOne(query, choices, scorer=fuzz.QRatio) self.assertEqual(best[0], choices[0]) best = process.extractOne(query, choices_mapping, scorer=fuzz.QRatio) self.assertEqual(best[0], choices_mapping[1])
async def apex(self, ctx, *, arg1): rank_list = [i[0] for i in sql_rank_obj()] tokens = arg1.split(" ") s_obj = sql_ship_obj() ##If no apex rank is given, show list of available apexes if len(tokens) == 1: s_obj = ShipData(ctx, arg1).s_obj apex_embed_title = f"Apexes for {s_obj['name']}" colour = get_em_colour(s_obj['affinity']) embed = discord.Embed(title=apex_embed_title, description=ApexLister(ctx, arg1).embed_list, color=colour) #embed.set_image(url=get_ship_image(s_obj['number'])) await ctx.send(embed=embed) ##If rank is given else: a_obj = sql_apex_num_obj() s_obj = ShipData(ctx, arg1).s_obj apex_tier = process.extractOne(arg1, rank_list)[0] apex_obj = ApexData(ctx, s_obj['name'], apex_tier) colour = get_em_colour(s_obj['affinity']) embed = discord.Embed(title=apex_obj.embed_title, color=colour, description=apex_obj.embed_desc) for i in a_obj: if i['id'] == s_obj['number'] and i['rank'] == apex_tier: embed.set_thumbnail( url=get_ship_image(f"{i['id']}_apex_{i['apex_num']}")) await ctx.send(embed=embed)
def find_best_match(misspelled, candidates): """Find the best matched word with <misspelled> in <candidates>.""" return fuzzprocess.extractOne(misspelled, candidates, scorer=fuzz.ratio, score_cutoff=60)[0:2]
async def queue(self, ctx: commands.Context, *, roles): """ Puts you in a queue in the current channel for the specified roles. Roles are top, jungle, mid, bot, and support. Example usage: !queue support !queue mid bot """ player = await self.bot.get_player(ctx) # First, we check if the last game of the player is still ongoing. try: game, participant = player.get_last_game() if not game.winner: await ctx.send( "Your last game looks to be ongoing. " "Please use !won or !lost to inform the result if the game is over.", delete_after=self.bot.short_notice_duration, ) return # This happens if the player has not played a game yet as get_last returns None and can’t be unpacked except TypeError: pass session = get_session() queue_player = (session.query(QueuePlayer).filter( QueuePlayer.player_id == Player.discord_id).filter( QueuePlayer.ready_check != None)).first() session.close() if queue_player: await ctx.send( "It seems you are in a pre-game check. You will be able to queue again once it is over." ) return clean_roles = set() for role in roles.split(" "): clean_role, score = process.extractOne(role, roles_list) if score < 80: continue else: clean_roles.add(clean_role) if not clean_roles: await ctx.send(self.bot.role_not_understood, delete_after=self.bot.warning_duration) return for role in clean_roles: self.add_player_to_queue(player, role, ctx.channel.id) for role in clean_roles: # Dirty code to get the emoji related to the letters await ctx.message.add_reaction(get_role_emoji(role)) await self.matchmaking_process(ctx) await self.send_queue(ctx)
async def ranking(self, ctx: commands.Context, role='all'): """ Returns the top 20 players for the selected role. """ if role == 'all': clean_role = role else: clean_role, score = process.extractOne(role, roles_list) if score < 80: await ctx.send(self.bot.role_not_understood, delete_after=30) return session = get_session() role_ranking = session.query(PlayerRating).order_by( -PlayerRating.mmr).filter(PlayerRating.games > 0) if clean_role != 'all': role_ranking = role_ranking.filter(PlayerRating.role == clean_role) table = [['Rank', 'Name', 'MMR', 'Games'] + ['Role' if clean_role == 'all' else None]] for rank, rating in enumerate(role_ranking.limit(20)): table.append([ inflect_engine.ordinal(rank + 1), rating.player.name, f'{rating.mmr:.1f}', rating.get_games() ] + [rating.role if clean_role == 'all' else None]) await ctx.send(f'Ranking for {clean_role} is:\n' f'```{tabulate(table, headers="firstrow")}```')
async def bill(self, ctx, *, query: str = None): '''Get a random Bill Wurtz video from his website, with optional search.''' async with ctx.typing(): if query is None: picked = choice(list(self.bill_cache.keys())) elif query.startswith('latest'): picked = self.bill_latest else: picked, score, junk = process.extractOne(query, self.bill_cache.keys()) if score < 50: raise commands.CommandError( "Couldn't match that search with certainty.\n" f"Closest match: '{picked.strip()}'" ) href, bill_date = self.bill_cache[picked] await ctx.send( f"{bill_date}: " f"*{disnake.utils.escape_markdown(picked)}* \n" f"{BILL_WURTZ_URL + href}" )
def lookup_metadatastandard_by_name(self, value): found = None # get standard name with the highest matching percentage using fuzzywuzzy highest = process.extractOne(value, FAIRCheck.COMMUNITY_STANDARDS_NAMES, scorer=fuzz.token_sort_ratio) if highest[1] > 80: found = highest[0] return found
def match_license(name: str) -> dict: """Match if the given license name matches any license present on spdx.org :param name: License name :return: Information of the license matched """ all_licenses = get_all_licenses_from_spdx() name = re.sub(r"\s+license\s*", "", name.strip(), flags=re.IGNORECASE) best_matches = process.extract(name, _get_all_license_choice(all_licenses)) spdx_license = best_matches[0] if spdx_license[1] != 100: best_matches = [ lic[0] for lic in best_matches if not lic[0].endswith("-only") ] if best_matches: best_matches = process.extract(name, best_matches, scorer=token_set_ratio) spdx_license = best_matches[0] best_matches = [ lic[0] for lic in best_matches if lic[1] >= spdx_license[1] ] if len(best_matches) > 1: spdx_license = process.extractOne(name, best_matches, scorer=token_sort_ratio) log.info(f"Best match for license {name} was {spdx_license}.\n" f"Best matches: {best_matches}") return _get_license(spdx_license[0], all_licenses)
def result(self): match_score_set = ("", 0) title_list, cr_list = [], [] splitter = r'[|\t\n\r\f\v]+| – ' if self.website_title: title_list = [ std_name(s, level=7) for s in re.split( splitter, self.text_cleaner(self.website_title)) if s ] if self.copyright_statement: cr_list = [ std_name(s, level=7) for s in re.split( splitter, self.text_cleaner(self.copyright_statement)) if s ] compare_list = title_list + cr_list + [ std_name(self.web_domain, level=7) ] # print(f"compare_list: {compare_list} \n") if self.company_name_normalized and compare_list: # match_score_set = process.extract(self.company_name_normalized, compare_list) # match_score_set = max(match_score_set, key=lambda x:x[1]) match_score_set = process.extractOne(self.company_name_normalized, compare_list) return (match_score_set)
def testWithProcessor(self): """ extractOne should accept any type as long as it is a string after preprocessing """ events = [ [ "chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm" ], [ "new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm" ], [ "atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm" ], ] query = events[0] best = process.extractOne(query, events, processor=lambda event: event[0]) self.assertEqual(best[0], events[0])
def edit_correct(word: str, wordlist: Iterable[str], max_dist: str = 2) -> Optional[str]: """Fix spelling mistakes in input word by computing the levenstein distance to a list of valid words sorted by decreasing priority. The closest word within maximum edit distance is returned. Ties are resolved by picking the highest priority word. Examples -------- >>> edit_correct("kug", ("mug", "bug", "but")) 'mug' >>> edit_correct("bug", ("mug", "bug", "but")) 'bug' >>> edit_correct("friend", ("mug", "bug", "but")) """ if word in wordlist: return word # extractOne returns the most similar word, in case of # ties, the first word is returned. Since words are in # decreasing order of priority, this will automatically return # the most relevant word res = process.extractOne(word, wordlist, scorer=string_metric.levenshtein, score_cutoff=max_dist) # Extract string from the output (if a match was found) if res is not None: res = res[0] return res
def match_authors(name, allcontrib_names, allcontrib_login): """ Use fuzzy string matching to match names of committers to the names or handles mentioned in the allcontributors file. Args: name: str; name of committer allcontrib_names: list; names in allcontributorsrc file allcontrib_login: list; logins in allcontributorsrc file Returns: """ # First, match the name. If no match, try Github login matching = process.extractOne(name, allcontrib_names, scorer=fuzz.token_sort_ratio, score_cutoff=71) if not matching: # we likely haven't found a match yet, lets check Github handles matching = process.extract(name, allcontrib_login, scorer=fuzz.token_sort_ratio, score_cutoff=71) if matching: return [name, matching[0]], None else: return None, name
def predictCategory(categoryToPredictFor): """categoryToPredictFor = courses we are trying to predict their category for return = a dataframe with input course, predicted category and a confidence matching score.""" # Returns best matching course, similarity score and index of the course matchedCourse = process.extractOne(categoryToPredictFor, df.courseTitle.values, scorer=fuzz.token_set_ratio) # Make query by index to extract the broad category matchedCategory = df.iloc[matchedCourse[2]].broadCategory1 # Extract similarity score for corresponding category matchConfidence = matchedCourse[1] # Create a dataframe off input course, its corresponding category and similarity score matchedDf = pd.DataFrame( { "categoryFor": categoryToPredictFor, "category": matchedCategory, "matchConfidence": round(matchConfidence) }, index=[0]) return matchedDf
def _string_fuzzy_match( cls, match: typing.Any, choices: typing.Sequence[typing.Any], min_score: float ) -> typing.Optional[str]: choice, score, index = process.extractOne(match, choices) val = None if score >= min_score: val = choice return val
def testWithCutoffEdgeCases(self): choices = [ "new york mets vs chicago cubs", "chicago cubs at new york mets", "atlanta braves vs pittsbugh pirates", "new york yankees vs boston red sox" ] query = "new york mets vs chicago cubs" # Only find 100-score cases best = process.extractOne(query, choices, score_cutoff=100) self.assertIsNotNone(best) self.assertEqual(best[0], choices[0]) # 0-score cases do not return None best = process.extractOne("", choices) self.assertIsNotNone(best) self.assertEqual(best[1], 0)
def testWithCutoff(self): choices = [ "new york mets vs chicago cubs", "chicago cubs at new york mets", "atlanta braves vs pittsbugh pirates", "new york yankees vs boston red sox" ] query = "los angeles dodgers vs san francisco giants" # in this situation, this is an event that does not exist in the list # we don't want to randomly match to something, so we use a reasonable cutoff best = process.extractOne(query, choices, score_cutoff=50) self.assertIsNone(best) # however if we had no cutoff, something would get returned best = process.extractOne(query, choices) self.assertIsNotNone(best)
def match(cls, query): if not cls.cache: cls.populate_cache() result = process.extractOne(query.casefold(), cls.cache.keys(), score_cutoff=90) if result: return cls.cache[result[0]]
def lookup_metadatastandard_by_uri(self, value): found = None # get standard uri with the highest matching percentage using fuzzywuzzy highest = process.extractOne(value, FAIRCheck.COMMUNITY_METADATA_STANDARDS_URIS_LIST, scorer=fuzz.token_sort_ratio) if highest[1] > 90: found = highest[0] return found
def get_closest_scheme(rta, scheme_name, rta_code=None, amc_code=None): qs = scheme_lookup(rta, scheme_name, rta_code=rta_code, amc_code=amc_code) if qs.count() == 0: raise ValueError("No schemes found") schemes = dict(qs.values_list("name", "pk")) key, *_ = process.extractOne(scheme_name, schemes.keys()) scheme_id = schemes[key] return scheme_id
def get_match(self, word_list: list, word: str, score_cutoff: int = 60, partial=False) -> Optional[str]: if partial: result = process.extractOne(word, word_list, scorer=fuzz.partial_ratio, score_cutoff=score_cutoff) else: result = process.extractOne(word, word_list, scorer=fuzz.ratio, score_cutoff=score_cutoff) if not result: return None return result
def get_proper_title(self, title: str, min_threshold=0) -> str: # because we're dealing with html, there will be character references. # there might be other references other than the ampersand. title = title.replace("&", "&") proper_title, ratio = process.extractOne(title, self.shows.keys()) # if the proper_title is too different than the title, return "". if ratio <= min_threshold: return "" return proper_title
def fuzzymatch(self, faq): answer = fuzzproc.extractOne(faq, self.questions, scorer=fuzz.token_sort_ratio) if answer[1] > 60: print(answer) return self.answers[self.faqs["questions"][answer[0]]] else: return None
def ship_search(find_this): # using the class initiated list ship_list find one ship name that # matches the given string as close as possible found_this = process.extractOne(find_this, get_ships()) # rapidfuzz returns the name and the ratio so strip the ratio and keep # the ship name ship_name = found_this[0] # return the ship name as a string return ship_name
def fuzzy(allterms, sentence, cutoff=93): highest = process.extractOne(sentence, allterms, processor=None, score_cutoff=cutoff) if highest: return highest else: return None
async def _(session: NLPSession): raw_message: List[str] = session.event["raw_message"].split() # 假设该消息为命令,取第一个字段 query_cmd = raw_message[0] fuzz_cmd = None confidence = None # 检查 commands commands_dct = CommandManager._commands choices = gen_commands_keys(commands_dct) # 模糊匹配命令与 commands result = process.extractOne(query_cmd, choices, scorer=fuzz.WRatio) if result: cmd_name, confidence = result _cmd = (cmd_name, ) if commands_dct.get(_cmd) is not None: if check_permission(session.bot, session.event, commands_dct[_cmd].permission): fuzz_cmd = cmd_name # 检查 commands 没有匹配到命令 if fuzz_cmd is None: # 检查 aliases aliases_dct = CommandManager._aliases # type: Dict[str, Command] choices = set(aliases_dct.keys()) # 模糊匹配命令与 aliases result = process.extractOne(query_cmd, choices, scorer=fuzz.WRatio) if result: alias, confidence = result if check_permission(session.bot, session.event, aliases_dct[alias].permission): fuzz_cmd = alias if fuzz_cmd is not None and confidence is not None: logger.debug(f"query_cmd: {query_cmd}") logger.debug(f"fuzz cmd, confidence: {fuzz_cmd} {confidence}") if confidence - 66 > 0: raw_message[0] = fuzz_cmd return IntentCommand( confidence, "switch", current_arg=" ".join(raw_message), )
def check_word(): badwords = ['дурак', 'идиот', 'урод', 'хмырь','f**k', 'fack', 'фак'] strr = input('Введите строку: ').strip() check_list = strr.split() # print(check_list) for word in check_list: s = process.extractOne(word, badwords, scorer = fuzz.WRatio) if s[1] > 50: strr = strr.replace(word, 'цензура') print(strr)