Ejemplo n.º 1
0
    def distance(self, string_1, string_2, max_distance):
        if string_1 is None or string_2 is None:
            return helpers.null_distance_results(string_1, string_2,
                                                 max_distance)
        if max_distance <= 0:
            return 0 if string_1 == string_2 else -1
        max_distance = int(min(2 ** 31 - 1, max_distance))
        # if strings of different lengths, ensure shorter string is in string_1.
        # This can result in a little faster speed by spending more time
        # spinning just the inner loop during the main processing.
        if len(string_1) > len(string_2):
            string_2, string_1 = string_1, string_2
        if len(string_2) - len(string_1) > max_distance:
            return -1
        # identify common suffix and/or prefix that can be ignored
        len_1, len_2, start = helpers.prefix_suffix_prep(string_1, string_2)
        if len_1 == 0:
            return len_2 if len_2 <= max_distance else -1

        if len_2 > len(self._base_char_1_costs):
            self._base_char_1_costs = np.zeros(len_2, dtype=np.int32)
            self._base_prev_char_1_costs = np.zeros(len_2, dtype=np.int32)
        if max_distance < len_2:
            return self._distance_max(string_1, string_2, len_1, len_2, start,
                                      max_distance, self._base_char_1_costs,
                                      self._base_prev_char_1_costs)
        return self._distance(string_1, string_2, len_1, len_2, start,
                              self._base_char_1_costs,
                              self._base_prev_char_1_costs)
Ejemplo n.º 2
0
    def distance(self, string_1: str, string_2: str, max_distance: int) -> int:
        """Computes the Damerau-Levenshtein optimal string alignment edit
        distance between two strings.

        Args:
            string_1: One of the strings to compare.
            string_2: The other string to compare.
            max_distance: The maximum distance that is of interest.

        Returns:
            -1 if the distance is greater than the max_distance, 0 if the strings
                are equivalent, otherwise a positive number whose magnitude
                increases as difference between the strings increases.
        """
        if string_1 is None or string_2 is None:
            return helpers.null_distance_results(string_1, string_2,
                                                 max_distance)
        if max_distance <= 0:
            return 0 if string_1 == string_2 else -1
        max_distance = int(min(2**31 - 1, max_distance))
        # if strings of different lengths, ensure shorter string is in string_1.
        # This can result in a little faster speed by spending more time spinning
        # just the inner loop during the main processing.
        if len(string_1) > len(string_2):
            string_2, string_1 = string_1, string_2
        if len(string_2) - len(string_1) > max_distance:
            return -1
        # identify common suffix and/or prefix that can be ignored
        len_1, len_2, start = helpers.prefix_suffix_prep(string_1, string_2)
        if len_1 == 0:
            return len_2 if len_2 <= max_distance else -1

        if len_2 > len(self._base_char_1_costs):
            self._base_char_1_costs = [0 for _ in range(len_2)]
            self._base_prev_char_1_costs = [0 for _ in range(len_2)]
        if max_distance < len_2:
            return self._distance_max(
                string_1,
                string_2,
                len_1,
                len_2,
                start,
                max_distance,
                self._base_char_1_costs,
                self._base_prev_char_1_costs,
            )
        return self._distance(
            string_1,
            string_2,
            len_1,
            len_2,
            start,
            self._base_char_1_costs,
            self._base_prev_char_1_costs,
        )
Ejemplo n.º 3
0
    def distance(self, string_1, string_2, max_distance):
        """Compute and return the Levenshtein edit distance between two
        strings.

        Parameters
        ----------
        string_1 : str
            One of the strings to compare.
        string_2 : str
            The other string to compare.
        max_distance : int
            The maximum distance that is of interest.

        Returns
        -------
        int
            -1 if the distance is greater than the maxDistance, 0 if
            the strings are equivalent, otherwise a positive number
            whose magnitude increases as difference between the strings
            increases.
        """
        if string_1 is None or string_2 is None:
            return helpers.null_distance_results(string_1, string_2,
                                                 max_distance)
        if max_distance <= 0:
            return 0 if string_1 == string_2 else -1
        max_distance = max_distance = int(min(2**31 - 1, max_distance))
        # if strings of different lengths, ensure shorter string is in
        # string_1. This can result in a little faster speed by
        # spending more time spinning just the inner loop during the
        # main processing.
        if len(string_1) > len(string_2):
            string_2, string_1 = string_1, string_2
        if len(string_2) - len(string_1) > max_distance:
            return -1
        # identify common suffic and/or prefix that can be ignored
        len_1, len_2, start = helpers.prefix_suffix_prep(string_1, string_2)
        if len_1 == 0:
            return len_2 if len_2 <= max_distance else -1

        if len_2 > len(self._base_char_1_costs):
            self._base_char_1_costs = np.zeros(len_2, dtype=np.int32)
        if max_distance < len_2:
            return self._distance_max(string_1, string_2, len_1, len_2, start,
                                      max_distance, self._base_char_1_costs)
        return self._distance(string_1, string_2, len_1, len_2, start,
                              self._base_char_1_costs)