Example #1
0
 def is_similar(self, other_person_address):
     if not self.is_populated or not other_person_address.is_populated:
         return False
     if self.id == other_person_address.id:
         return True
     return (jellyfish.damerau_levenshtein_distance(self.concat, other_person_address.concat) < 3 and
         len(self.concat) > 20)
Example #2
0
def names_conflict(name1, name2):
    """
    For comparing name parts.  We're just checking for conflicts.  The following examples should help:

      None does not conflict with a
      a does not conflict with a
      ambrose does not conflict with ambrose
      a does not conflict with ambrose

      t does conflict with ambrose
      am does conflict with ambrose

      ambrost does not conflict with ambrose # our fuzzy matching

    Again, we're just trying to ensure there are no conflicts between the names.  A single letter is considered an
    initial which is why a single letter does not conflict with a full name, but two letters can.

    We've introduced a wee little bit of fuzziness to the matching by allowing there to be one Damerau-Levenshtein
    edit distance between names, provided the names have a little meat to them (obviously, "A" is one edit distance
    from "Z", so we need to ensure the names have a little bit of length to them).
    """

    return not (name1 is None or
                name2 is None or
                name1 == name2 or
                ((len(name1) == 1 or len(name2) == 1) and name1[0] == name2[0]) or
                (len(name1) > 3 and len(name2) > 3 and jellyfish.damerau_levenshtein_distance(name1, name2) < 2))
Example #3
0
    def is_similar(self, other_person_name):
        if self.family is None or other_person_name.family is None:
            return False
        if self.id == other_person_name.id:
            return True

        # This seems redundant-ish, but Levenshtein is fairly inefficient.  If we can reduce runs through the
        # here, it should help speed things up a bit.
        if self.family != other_person_name.family and \
                        jellyfish.damerau_levenshtein_distance(self.family, other_person_name.family) > 1:
            return False

        if (self.given is None and self.other is None) or \
                (other_person_name.given is None and other_person_name.other is None):
            return True

        if ((self.given is not None or other_person_name.other is not None) and
                (self.other is not None or other_person_name.given is not None) and
                not common.names_conflict(self.given, other_person_name.given) and
                     not common.names_conflict(self.other, other_person_name.other)):
            return True

        if ((self.given is not None or other_person_name.given is not None) and
                (self.other is not None or other_person_name.other is not None) and
                not common.names_conflict(self.given, other_person_name.other) and
                     not common.names_conflict(self.other, other_person_name.given)):
            return True

        return False