def levenshtein(string1, string2): if string1 is None: string1 = "" if string2 is None: string2 = "" string_matcher = StringMatcher(seq1=string1.lower(), seq2=string2.lower()) return string_matcher.distance()
def levenshtein(string1, string2): if string1 is None: string1 = "" if string2 is None: string2 = "" string_matcher = StringMatcher(seq1=string1.lower(), seq2=string2.lower()) return string_matcher.distance()
def get_match_score(phrase, words, min_distance=2): score = 0 phrase_len = len(''.join(phrase)) for p in phrase: matcher = StringMatcher(seq1=p) for w in words: matcher.set_seq2(w) match_distance = matcher.distance() if match_distance <= min_distance: score += max(0, len(p) - match_distance) / phrase_len return score
def get_loosly_matching_keyword(self, term): splitted_terms = self.tokenize_text(term) max_nb_commun = 0 most_common_keys = [] for key, key_tokens in self.tokenized_keys_.items(): nb_words_common = 0 for potential_word in splitted_terms: if potential_word in key_tokens: nb_words_common += 1 if nb_words_common > max_nb_commun: max_nb_commun = nb_words_common most_common_keys = [] if nb_words_common == max_nb_commun: most_common_keys.append(key) min_distance = 9999999 result = None for key in most_common_keys: match = StringMatcher(seq1=key, seq2=term) distance = match.distance() if distance < min_distance: min_distance = distance result = key return result
def is_typo(word, word_from_dict): sm = StringMatcher() sm.set_seq1(word) sm.set_seq2(word_from_dict) dist = sm.distance() return dist == 1 or (dist == 2 and fl(word, word_from_dict))
def levenshtein_ratio(s1, s2): m = StringMatcher(None, s1, s2) return truncate(m.ratio(), 2), m.distance()