Ejemplo n.º 1
0
 def test_damerau_levenshtein_distance(self):
     """Test for Damerau-Levenshtein Distance between two words"""
     l = Levenshtein()
     dist = l.Damerau_Levenshtein_Distance(
         "all haile whose solempne glorious concepcioun",
         "fresche floure in quhom the hevinlie dewe doun fell")
     self.assertEqual(dist, 35)
Ejemplo n.º 2
0
 def test_levenshtein_distance(self):
     """Test for Levenshtein Distance between two words"""
     l = Levenshtein()
     dist = l.Levenshtein_Distance(
         "now grete glorious god through grace of himselven",
         "and the precious prayer of his pris moder")
     self.assertEqual(dist, 36)
Ejemplo n.º 3
0
    def _calculate_ratios(self, list_a, list_b):
        """
        Calulate a matrix of string comparisons given two input lists
        :param list_a: list [object]
        :param list_b: list [object]
        :return: list [[Comparison]]
        """

        comparisons = []
        l = Levenshtein()

        # For all strings in list a
        for i, str_a in enumerate(list_a):

            # Add a new list to our list of lists of comparisons
            comparisons.append([])

            # Compare str_a to every string in list_b
            for str_b in list_b:

                # If the sanitize, input flag is set, make the ratio with the sanitized values
                if self.sanitize_input:
                    new_comparison = Comparison(
                                            str_a['text'],
                                            str_b['text'],
                                            l.ratio(str_a['sanitized'], str_b['sanitized'])
                                        )

                # Otherwise, make the ratio with the original, unsanitize text strings
                else:
                    new_comparison = Comparison(
                                            str_a['text'],
                                            str_b['text'],
                                            l.ratio(str_a['text'], str_b['text'])
                                        )

                # If text metadata is set on this class for text a or b, save that data with the
                # comparison
                if self.text_ref_a:
                    new_comparison.set_ref_a(self.text_ref_a)
                if self.text_ref_b:
                    new_comparison.set_ref_b(self.text_ref_b)

                # Finally, append the new comparison to the list of comparisons
                comparisons[i].append(new_comparison)

        return comparisons
Ejemplo n.º 4
0
def compute_distance_matrix(proper_nouns: List[str]):
    """
    Distance matrix with Levenshtein distance.

    :param proper_nouns: Items of proper_nouns must be unique
    :return:
    """
    levenshtein = Levenshtein()

    # We try to keep regroup different forms of a lemma
    distance_matrix = np.zeros((len(proper_nouns), len(proper_nouns)))

    for i in range(len(proper_nouns)):
        for j in range(len(proper_nouns)):
            distance_matrix[i, j] = levenshtein.Levenshtein_Distance(proper_nouns[i], proper_nouns[j])

    return distance_matrix
Ejemplo n.º 5
0
 def test_distance_ratio(self):
     """Test returning simple Levenshtein distance calculation ratio between two strings"""
     l = Levenshtein()
     ratio = l.ratio("dique deaeque omnes, studium quibus arua tueri,",
                     "dique deaeque omnes, quibus est tutela per agros,")
     self.assertEqual(ratio, 0.71)
Ejemplo n.º 6
0
 def compare_levenshtein(self, other_text):
     return Levenshtein().ratio(self.data, other_text)