def test_damerau_levenshtein_distance(self): """Test for Damerau-Levenshtein Distance between two words""" l = Levenshtein() dist = l.Damerau_Levenshtein_Distance( "all haile whose solempne glorious concepcioun", "fresche floure in quhom the hevinlie dewe doun fell") self.assertEqual(dist, 35)
def test_levenshtein_distance(self): """Test for Levenshtein Distance between two words""" l = Levenshtein() dist = l.Levenshtein_Distance( "now grete glorious god through grace of himselven", "and the precious prayer of his pris moder") self.assertEqual(dist, 36)
def _calculate_ratios(self, list_a, list_b): """ Calulate a matrix of string comparisons given two input lists :param list_a: list [object] :param list_b: list [object] :return: list [[Comparison]] """ comparisons = [] l = Levenshtein() # For all strings in list a for i, str_a in enumerate(list_a): # Add a new list to our list of lists of comparisons comparisons.append([]) # Compare str_a to every string in list_b for str_b in list_b: # If the sanitize, input flag is set, make the ratio with the sanitized values if self.sanitize_input: new_comparison = Comparison( str_a['text'], str_b['text'], l.ratio(str_a['sanitized'], str_b['sanitized']) ) # Otherwise, make the ratio with the original, unsanitize text strings else: new_comparison = Comparison( str_a['text'], str_b['text'], l.ratio(str_a['text'], str_b['text']) ) # If text metadata is set on this class for text a or b, save that data with the # comparison if self.text_ref_a: new_comparison.set_ref_a(self.text_ref_a) if self.text_ref_b: new_comparison.set_ref_b(self.text_ref_b) # Finally, append the new comparison to the list of comparisons comparisons[i].append(new_comparison) return comparisons
def compute_distance_matrix(proper_nouns: List[str]): """ Distance matrix with Levenshtein distance. :param proper_nouns: Items of proper_nouns must be unique :return: """ levenshtein = Levenshtein() # We try to keep regroup different forms of a lemma distance_matrix = np.zeros((len(proper_nouns), len(proper_nouns))) for i in range(len(proper_nouns)): for j in range(len(proper_nouns)): distance_matrix[i, j] = levenshtein.Levenshtein_Distance(proper_nouns[i], proper_nouns[j]) return distance_matrix
def test_distance_ratio(self): """Test returning simple Levenshtein distance calculation ratio between two strings""" l = Levenshtein() ratio = l.ratio("dique deaeque omnes, studium quibus arua tueri,", "dique deaeque omnes, quibus est tutela per agros,") self.assertEqual(ratio, 0.71)
def compare_levenshtein(self, other_text): return Levenshtein().ratio(self.data, other_text)