def is_similar(self, other_person_address): if not self.is_populated or not other_person_address.is_populated: return False if self.id == other_person_address.id: return True return (jellyfish.damerau_levenshtein_distance(self.concat, other_person_address.concat) < 3 and len(self.concat) > 20)
def names_conflict(name1, name2): """ For comparing name parts. We're just checking for conflicts. The following examples should help: None does not conflict with a a does not conflict with a ambrose does not conflict with ambrose a does not conflict with ambrose t does conflict with ambrose am does conflict with ambrose ambrost does not conflict with ambrose # our fuzzy matching Again, we're just trying to ensure there are no conflicts between the names. A single letter is considered an initial which is why a single letter does not conflict with a full name, but two letters can. We've introduced a wee little bit of fuzziness to the matching by allowing there to be one Damerau-Levenshtein edit distance between names, provided the names have a little meat to them (obviously, "A" is one edit distance from "Z", so we need to ensure the names have a little bit of length to them). """ return not (name1 is None or name2 is None or name1 == name2 or ((len(name1) == 1 or len(name2) == 1) and name1[0] == name2[0]) or (len(name1) > 3 and len(name2) > 3 and jellyfish.damerau_levenshtein_distance(name1, name2) < 2))
def is_similar(self, other_person_name): if self.family is None or other_person_name.family is None: return False if self.id == other_person_name.id: return True # This seems redundant-ish, but Levenshtein is fairly inefficient. If we can reduce runs through the # here, it should help speed things up a bit. if self.family != other_person_name.family and \ jellyfish.damerau_levenshtein_distance(self.family, other_person_name.family) > 1: return False if (self.given is None and self.other is None) or \ (other_person_name.given is None and other_person_name.other is None): return True if ((self.given is not None or other_person_name.other is not None) and (self.other is not None or other_person_name.given is not None) and not common.names_conflict(self.given, other_person_name.given) and not common.names_conflict(self.other, other_person_name.other)): return True if ((self.given is not None or other_person_name.given is not None) and (self.other is not None or other_person_name.other is not None) and not common.names_conflict(self.given, other_person_name.other) and not common.names_conflict(self.other, other_person_name.given)): return True return False