def case_insensitive_close_matches(word, possibilities, n=3, cutoff=0.6, excpt=None): """Use SequenceMatcher to return list of the best "good enough" matches. word is a sequence for which close matches are desired (typically a string). possibilities is a list of sequences against which to match word (typically a list of strings). Optional arg n (default 3) is the maximum number of close matches to return. n must be > 0. Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities that don't score at least that similar to word are ignored. The best (no more than n) matches among the possibilities are returned in a list, sorted by similarity score, most similar first. >>> get_close_matches("appel", ["ape", "apple", "peach", "puppy"]) ['apple', 'ape'] >>> import keyword as _keyword >>> get_close_matches("wheel", _keyword.kwlist) ['while'] >>> get_close_matches("Apple", _keyword.kwlist) [] >>> get_close_matches("accept", _keyword.kwlist) ['except'] """ if not n > 0: raise ValueError("n must be > 0: %r" % (n, )) if not 0.0 <= cutoff <= 1.0: raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff, )) result = [] s = _SequenceMatcher() s.set_seq2(supercasefold(word)) for x in possibilities: x_ = supercasefold(x) s.set_seq1(x_) if s.real_quick_ratio() >= cutoff and \ s.quick_ratio() >= cutoff and \ s.ratio() >= cutoff: result.append((s.ratio(), x)) # Move the best scorers to head of list result = _nlargest(n, result) # Strip scores for the best n matches ret = [x for score, x in result] if not excpt is None: did_you_mean = "'{}' not found, did you mean {}?".format( word, " or ".join("'{}'".format(s) for s in ret)) raise excpt(did_you_mean) return ret
def case_insensitive_close_matches(word, possibilities, n=3, cutoff=0.6, excpt=None): """Get a list of the best "good enough" matches. Parameters ---------- word : str A base string for which close matches are desired possibilities : Collection[str] Word list against which to match word. n : int, default 3 Maximum number of close matches to return, must be > 0. cutoff : float, default 0.6 A float in the range [0, 1]. Possibilities that don't score at least that similar to `word` are ignored. The best (no more than n) matches among the possibilities are returned in a list, sorted by similarity score, most similar first. Examples -------- >>> case_insensitive_close_matches("appel", ["ape", "apple", "peach", "puppy"]) ['apple', 'ape'] >>> import keyword >>> case_insensitive_close_matches("wheel", keyword.kwlist) ['while'] >>> case_insensitive_close_matches("apples", keyword.kwlist) [] >>> case_insensitive_close_matches("Accept", keyword.kwlist) ['except'] >>> case_insensitive_close_matches("NonLocal", keyword.kwlist) ['nonlocal'] """ if not n > 0: raise ValueError("n must be > 0: %r" % (n,)) if not 0.0 <= cutoff <= 1.0: raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,)) result = [] s = _SequenceMatcher() s.set_seq2(supercasefold(word)) for x in possibilities: x_ = supercasefold(x) s.set_seq1(x_) if s.real_quick_ratio() >= cutoff and \ s.quick_ratio() >= cutoff and \ s.ratio() >= cutoff: result.append((s.ratio(), x)) # Move the best scorers to head of list result = _nlargest(n, result) # Strip scores for the best n matches ret = [x for score, x in result] if not excpt is None: did_you_mean = "'{}' not found, did you mean {}?".format(word, " or ".join("'{}'".format(s) for s in ret)) raise excpt(did_you_mean) return ret
def case_insensitive_close_matches(word, possibilities, n=3, cutoff=0.6, excpt=None): """Use SequenceMatcher to return list of the best "good enough" matches. word is a sequence for which close matches are desired (typically a string). possibilities is a list of sequences against which to match word (typically a list of strings). Optional arg n (default 3) is the maximum number of close matches to return. n must be > 0. Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities that don't score at least that similar to word are ignored. The best (no more than n) matches among the possibilities are returned in a list, sorted by similarity score, most similar first. >>> get_close_matches("appel", ["ape", "apple", "peach", "puppy"]) ['apple', 'ape'] >>> import keyword as _keyword >>> get_close_matches("wheel", _keyword.kwlist) ['while'] >>> get_close_matches("Apple", _keyword.kwlist) [] >>> get_close_matches("accept", _keyword.kwlist) ['except'] """ if not n > 0: raise ValueError("n must be > 0: %r" % (n,)) if not 0.0 <= cutoff <= 1.0: raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,)) result = [] s = _SequenceMatcher() s.set_seq2(supercasefold(word)) for x in possibilities: x_ = supercasefold(x) s.set_seq1(x_) if s.real_quick_ratio() >= cutoff and \ s.quick_ratio() >= cutoff and \ s.ratio() >= cutoff: result.append((s.ratio(), x)) # Move the best scorers to head of list result = _nlargest(n, result) # Strip scores for the best n matches ret = [x for score, x in result] if not excpt is None: did_you_mean = "'{}' not found, did you mean {}?".format(word, " or ".join("'{}'".format(s) for s in ret)) raise excpt(did_you_mean) return ret
def _standart(self, s1, s2): matcher = _SequenceMatcher(a=s1, b=s2) match = matcher.find_longest_match(0, len(s1), 0, len(s2)) return s1[match.a: match.a + match.size]