def test_compare_names(self): ''' Test comparison and score of names ''' score, factor = get_score_compare_names("Juan", "Fernandez", "Macias", "García") assert (score + factor == 0.0) score, factor = get_score_compare_names("Juan", "Gomez", "Juan", "Gómez") assert (score == 4.0) assert (factor == 1.0) score, factor = get_score_compare_names("Juan Antonio", "Gomez Perez", "Juan", "Gómez") assert (score > 3.0) assert (factor > 0.8) score, factor = get_score_compare_names("Juan José Fernando", "Gomez Perez", "Agustín Juan", "Gómez") assert (score > 2.0) assert (factor > 0.1) score, factor = get_score_compare_names("Juan", "de la Fuente", "Juan", "Fuente", language="es") assert (score == 4.0) assert (factor == 1.0)
def comparison_score(self, profile, data_language="en", name_convention="father_surname"): ''' Get the score value in comparison ''' score, factor = get_score_compare_names(self.getName(), self.getSurname(), profile.getName(), profile.getSurname(), language=data_language, convention=name_convention) #Comparing gender if (self.getGender()) and (profile.getGender()): if self.getGender() == profile.getGender(): score += 0.5 else: factor = 0.5 * factor for event_name in MERGE_EVENTS: self_event = self.get_specific_event(event_name) other_event = profile.get_specific_event(event_name) if self_event and other_event and self_event.is_any_date_available( ) and other_event.is_any_date_available(): score_temp, factor_temp = get_score_compare_dates( self_event, other_event) score += score_temp factor = factor * factor_temp return score, factor
def comparison_score(self, profile, data_language="en", name_convention="father_surname"): ''' Get the score value in comparison ''' score, factor = get_score_compare_names(self.getName(), self.getSurname(), profile.getName(), profile.getSurname(), language=data_language, convention=name_convention) #Comparing big differences in events score1, factor1 = score_factor_birth_and_death( self.get_earliest_event_in_event_form(), self.get_latest_event_in_event_form(), profile.getEvents()) score2, factor2 = score_factor_birth_and_death( profile.get_earliest_event_in_event_form(), self.get_latest_event_in_event_form(), self.getEvents()) score += score1 + score2 factor = factor * factor1 * factor2 #Comparing gender if (self.getGender()) and (profile.getGender()): if self.getGender() == profile.getGender(): score += 0.5 elif (self.getGender() != "U") and (profile.getGender() != "U"): factor = 0.1 * factor for event_name in MERGE_EVENTS: self_event = self.get_specific_event(event_name) other_event = profile.get_specific_event(event_name) if event_name == "marriage": if len(self_event) > 0: self_event = self_event[0] else: self_event = None if len(other_event) > 0: other_event = other_event[0] else: other_event = None if self_event and other_event and self_event.is_any_date_available( ) and other_event.is_any_date_available(): score_temp, factor_temp = get_score_compare_dates( self_event, other_event) if (not ((self_event.get_event_type() == "marriage") and (factor_temp < 1.0))) and ( self_event.get_event_type() != "residence"): score += score_temp factor = factor * factor_temp return score, factor
def comparison_score(self, profile, data_language="en", name_convention="father_surname"): ''' Get the score value in comparison ''' score, factor = get_score_compare_names(self.gen_data["name"], self.gen_data["surname"], profile.gen_data["name"], profile.gen_data["surname"], language=data_language, convention=name_convention) #Comparing gender if ("gender" in self.gen_data) and ("gender" in profile.gen_data): if self.gen_data["gender"] == profile.gen_data["gender"]: score += 0.5 else: factor = 0.5*factor for date_id in MERGE_DATES: if (date_id in self.gen_data.keys()) and (date_id in profile.gen_data.keys()): score_temp, factor_temp = get_score_compare_dates(self.gen_data[date_id], self.gen_data["accuracy_" + date_id], profile.gen_data[date_id], profile.gen_data["accuracy_" + date_id]) score += score_temp factor = factor*factor_temp return score, factor
def test_compare_names(self): ''' Test comparison and score of names ''' score, factor = get_score_compare_names("Alejandra", "Martín Martín", "Alexandra", "Martín Martín", language = "es") assert(score *factor > 2.0) score, factor = get_score_compare_names("Matías", "Martín Martín", "Mathias", "Martín Martín", language = "es") assert(score *factor >5.0) score, factor = get_score_compare_names("Juan", "Fernandez", "Macias", "García") assert(score * factor < 0.06) score, factor = get_score_compare_names("Juan", "Gomez", "Juan", "Gómez") assert(score == 4.0) assert(factor == 1.0) score, factor = get_score_compare_names("Juan Antonio", "Gomez Perez", "Juan", "Gómez") assert(score > 3.0) assert(factor > 0.8) score, factor = get_score_compare_names("Juan Antonio", "Gomez Perez", "Juan", "Gómez", language="es") assert(score > 3.0) assert(factor > 0.8) score, factor = get_score_compare_names("Juan José Fernando", "Gomez Perez", "Agustín Juan", "Gómez") assert(score > 2.0) assert(factor > 0.1) score, factor = get_score_compare_names("Juan", "de la Fuente", "Juan", "Fuente", language="es") assert(score == 4.0) assert(factor == 1.0) score, factor = get_score_compare_names("Juan", "Gómez Martín", "Juan", "Martín Gómez", language="es") assert(score > 2.0) assert(factor < 0.01) score, factor = get_score_compare_names("Juan", "Gómez Martín", "Juan", "Gomez Martin", language="es") assert(score == 6.0) assert(factor == 1.0) score, factor = get_score_compare_names("Juan", "Gómez", "Juan", "Pérez Gomez", language="es") assert(score*factor < 2.0) score, factor = get_score_compare_names("Josefa", "Arnanz Tejera", "Josepha", "Arnanz Texera", language="es") assert(score > 5.5) assert(factor > 0.79) score, factor = get_score_compare_names("Lorenza", "Enríquez López", "Lorenza", "Sancho López", language = "es") print(score*factor <1.01 )
def comparison_score(self, profile, data_language="en", name_convention="father_surname"): ''' Get the score value in comparison ''' score, factor = get_score_compare_names(self.getName(), self.getSurname(), profile.getName(), profile.getSurname(), language=data_language, convention=name_convention) #We compare now all the potential names, in case that there is a different convention score_nicks = 0 factor_nicks = 0 for complete_name_self in self.get_all_names(): name_self, surname_self, _ = get_name_surname_from_complete_name( complete_name_self, convention=name_convention, language=data_language) for complete_name_other in profile.get_all_names(): name_other, surname_other, _ = get_name_surname_from_complete_name( complete_name_other, convention=name_convention, language=data_language) score_int, factor_int = get_score_compare_names( name_self, surname_self, name_other, surname_other, language=data_language, convention=name_convention) #We take the bigger of the scores if (score_int > score_nicks) and (factor_int > factor_nicks): score_nicks = score_int factor_nicks = factor_int #We only take the new score if bigger than the previous one, but we include a penalty (we introduce preference to the score given by formal names) if (score_nicks > score) and (factor_nicks > factor) and ( score_nicks * factor_nicks * 0.5 > score * factor): score = score_nicks * 0.5 factor = factor_nicks #Comparing big differences in events score1, factor1 = score_factor_birth_and_death( self.get_earliest_event_in_event_form(), self.get_latest_event_in_event_form(), profile.getEvents()) score2, factor2 = score_factor_birth_and_death( profile.get_earliest_event_in_event_form(), self.get_latest_event_in_event_form(), self.getEvents()) #In this stage we add all obtained scores and factors. score += score1 + score2 factor = factor * factor1 * factor2 #Comparing gender if (self.getGender()) and (profile.getGender()): if self.getGender() == profile.getGender(): score += 0.5 elif (self.getGender() != "U") and (profile.getGender() != "U"): factor = 0.1 * factor for event_name in MERGE_EVENTS: self_event = self.get_specific_event(event_name) other_event = profile.get_specific_event(event_name) if event_name == "marriage": if len(self_event) > 0: self_event = self_event[0] else: self_event = None if len(other_event) > 0: other_event = other_event[0] else: other_event = None if self_event and other_event and self_event.is_any_date_available( ) and other_event.is_any_date_available(): score_temp, factor_temp = get_score_compare_dates( self_event, other_event) if (not ((self_event.get_event_type() == "marriage") and (factor_temp < 1.0))) and ( self_event.get_event_type() != "residence"): score += score_temp factor = factor * factor_temp return score, factor