Ejemplo n.º 1
0
def levenshtein(string1, string2):
    if string1 is None:
        string1 = ""
    if string2 is None:
        string2 = ""

    string_matcher = StringMatcher(seq1=string1.lower(), seq2=string2.lower())
    return string_matcher.distance()
Ejemplo n.º 2
0
def levenshtein(string1, string2):
    if string1 is None:
        string1 = ""
    if string2 is None:
        string2 = ""

    string_matcher = StringMatcher(seq1=string1.lower(), seq2=string2.lower())
    return string_matcher.distance()
Ejemplo n.º 3
0
def get_match_score(phrase, words, min_distance=2):
    score = 0
    phrase_len = len(''.join(phrase))
    for p in phrase:
        matcher = StringMatcher(seq1=p)
        for w in words:
            matcher.set_seq2(w)
            match_distance = matcher.distance()
            if match_distance <= min_distance:
                score += max(0, len(p) - match_distance) / phrase_len
    return score
Ejemplo n.º 4
0
    def get_loosly_matching_keyword(self, term):
        splitted_terms = self.tokenize_text(term)
        max_nb_commun = 0
        most_common_keys = []
        for key, key_tokens in self.tokenized_keys_.items():
            nb_words_common = 0
            for potential_word in splitted_terms:
                if potential_word in key_tokens:
                    nb_words_common += 1
            if nb_words_common > max_nb_commun:
                max_nb_commun = nb_words_common
                most_common_keys = []
            if nb_words_common == max_nb_commun:
                most_common_keys.append(key)

        min_distance = 9999999
        result = None
        for key in most_common_keys:
            match = StringMatcher(seq1=key, seq2=term)
            distance = match.distance()
            if distance < min_distance:
                min_distance = distance
                result = key
        return result
Ejemplo n.º 5
0
def is_typo(word, word_from_dict):
    sm = StringMatcher()
    sm.set_seq1(word)
    sm.set_seq2(word_from_dict)
    dist = sm.distance()
    return dist == 1 or (dist == 2 and fl(word, word_from_dict))
def levenshtein_ratio(s1, s2):
    m = StringMatcher(None, s1, s2)
    return truncate(m.ratio(), 2), m.distance()