def test_compare_names(self):
     '''
     Test comparison and score of names
     '''
     score, factor = get_score_compare_names("Juan", "Fernandez", "Macias",
                                             "García")
     assert (score + factor == 0.0)
     score, factor = get_score_compare_names("Juan", "Gomez", "Juan",
                                             "Gómez")
     assert (score == 4.0)
     assert (factor == 1.0)
     score, factor = get_score_compare_names("Juan Antonio", "Gomez Perez",
                                             "Juan", "Gómez")
     assert (score > 3.0)
     assert (factor > 0.8)
     score, factor = get_score_compare_names("Juan José Fernando",
                                             "Gomez Perez", "Agustín Juan",
                                             "Gómez")
     assert (score > 2.0)
     assert (factor > 0.1)
     score, factor = get_score_compare_names("Juan",
                                             "de la Fuente",
                                             "Juan",
                                             "Fuente",
                                             language="es")
     assert (score == 4.0)
     assert (factor == 1.0)
Esempio n. 2
0
 def comparison_score(self,
                      profile,
                      data_language="en",
                      name_convention="father_surname"):
     '''
     Get the score value in comparison
     '''
     score, factor = get_score_compare_names(self.getName(),
                                             self.getSurname(),
                                             profile.getName(),
                                             profile.getSurname(),
                                             language=data_language,
                                             convention=name_convention)
     #Comparing gender
     if (self.getGender()) and (profile.getGender()):
         if self.getGender() == profile.getGender():
             score += 0.5
         else:
             factor = 0.5 * factor
     for event_name in MERGE_EVENTS:
         self_event = self.get_specific_event(event_name)
         other_event = profile.get_specific_event(event_name)
         if self_event and other_event and self_event.is_any_date_available(
         ) and other_event.is_any_date_available():
             score_temp, factor_temp = get_score_compare_dates(
                 self_event, other_event)
             score += score_temp
             factor = factor * factor_temp
     return score, factor
 def comparison_score(self,
                      profile,
                      data_language="en",
                      name_convention="father_surname"):
     '''
     Get the score value in comparison
     '''
     score, factor = get_score_compare_names(self.getName(),
                                             self.getSurname(),
                                             profile.getName(),
                                             profile.getSurname(),
                                             language=data_language,
                                             convention=name_convention)
     #Comparing big differences in events
     score1, factor1 = score_factor_birth_and_death(
         self.get_earliest_event_in_event_form(),
         self.get_latest_event_in_event_form(), profile.getEvents())
     score2, factor2 = score_factor_birth_and_death(
         profile.get_earliest_event_in_event_form(),
         self.get_latest_event_in_event_form(), self.getEvents())
     score += score1 + score2
     factor = factor * factor1 * factor2
     #Comparing gender
     if (self.getGender()) and (profile.getGender()):
         if self.getGender() == profile.getGender():
             score += 0.5
         elif (self.getGender() != "U") and (profile.getGender() != "U"):
             factor = 0.1 * factor
     for event_name in MERGE_EVENTS:
         self_event = self.get_specific_event(event_name)
         other_event = profile.get_specific_event(event_name)
         if event_name == "marriage":
             if len(self_event) > 0: self_event = self_event[0]
             else: self_event = None
             if len(other_event) > 0: other_event = other_event[0]
             else: other_event = None
         if self_event and other_event and self_event.is_any_date_available(
         ) and other_event.is_any_date_available():
             score_temp, factor_temp = get_score_compare_dates(
                 self_event, other_event)
             if (not ((self_event.get_event_type() == "marriage") and
                      (factor_temp < 1.0))) and (
                          self_event.get_event_type() != "residence"):
                 score += score_temp
                 factor = factor * factor_temp
     return score, factor
Esempio n. 4
0
 def comparison_score(self, profile, data_language="en", name_convention="father_surname"):
     '''
     Get the score value in comparison
     '''
     score, factor = get_score_compare_names(self.gen_data["name"], self.gen_data["surname"],
                     profile.gen_data["name"], profile.gen_data["surname"], language=data_language, convention=name_convention)
     #Comparing gender
     if ("gender" in self.gen_data) and ("gender" in profile.gen_data):
         if self.gen_data["gender"] == profile.gen_data["gender"]:
             score += 0.5
         else:
             factor = 0.5*factor
     for date_id in MERGE_DATES:
         if (date_id in self.gen_data.keys()) and (date_id in profile.gen_data.keys()):
             score_temp, factor_temp = get_score_compare_dates(self.gen_data[date_id],
                                                           self.gen_data["accuracy_" + date_id],
                                                           profile.gen_data[date_id],
                                                           profile.gen_data["accuracy_" + date_id])
             score += score_temp
             factor = factor*factor_temp
     return score, factor
 def test_compare_names(self):
     '''
     Test comparison and score of names
     '''
     score, factor = get_score_compare_names("Alejandra", "Martín Martín", "Alexandra", "Martín Martín", language = "es")
     assert(score *factor > 2.0)
     score, factor = get_score_compare_names("Matías", "Martín Martín", "Mathias", "Martín Martín", language = "es")
     assert(score *factor >5.0)
     score, factor = get_score_compare_names("Juan", "Fernandez", "Macias", "García")
     assert(score * factor < 0.06)
     score, factor = get_score_compare_names("Juan", "Gomez", "Juan", "Gómez")
     assert(score == 4.0)
     assert(factor == 1.0)
     score, factor = get_score_compare_names("Juan Antonio", "Gomez Perez", "Juan", "Gómez")
     assert(score > 3.0)
     assert(factor > 0.8)
     score, factor = get_score_compare_names("Juan Antonio", "Gomez Perez", "Juan", "Gómez", language="es")
     assert(score > 3.0)
     assert(factor > 0.8)
     score, factor = get_score_compare_names("Juan José Fernando", "Gomez Perez", "Agustín Juan", "Gómez")
     assert(score > 2.0)
     assert(factor > 0.1)
     score, factor = get_score_compare_names("Juan", "de la Fuente", "Juan", "Fuente", language="es")
     assert(score == 4.0)
     assert(factor == 1.0)
     score, factor = get_score_compare_names("Juan", "Gómez Martín", "Juan", "Martín Gómez", language="es")
     assert(score > 2.0)
     assert(factor < 0.01)
     score, factor = get_score_compare_names("Juan", "Gómez Martín", "Juan", "Gomez Martin", language="es")
     assert(score == 6.0)
     assert(factor == 1.0)
     score, factor = get_score_compare_names("Juan", "Gómez", "Juan", "Pérez Gomez", language="es")
     assert(score*factor < 2.0)
     
     score, factor = get_score_compare_names("Josefa", "Arnanz Tejera", "Josepha", "Arnanz Texera", language="es")
     assert(score > 5.5)
     assert(factor > 0.79)
     score, factor = get_score_compare_names("Lorenza", "Enríquez López", "Lorenza", "Sancho López", language = "es")
     print(score*factor <1.01 )
 def comparison_score(self,
                      profile,
                      data_language="en",
                      name_convention="father_surname"):
     '''
     Get the score value in comparison
     '''
     score, factor = get_score_compare_names(self.getName(),
                                             self.getSurname(),
                                             profile.getName(),
                                             profile.getSurname(),
                                             language=data_language,
                                             convention=name_convention)
     #We compare now all the potential names, in case that there is a different convention
     score_nicks = 0
     factor_nicks = 0
     for complete_name_self in self.get_all_names():
         name_self, surname_self, _ = get_name_surname_from_complete_name(
             complete_name_self,
             convention=name_convention,
             language=data_language)
         for complete_name_other in profile.get_all_names():
             name_other, surname_other, _ = get_name_surname_from_complete_name(
                 complete_name_other,
                 convention=name_convention,
                 language=data_language)
             score_int, factor_int = get_score_compare_names(
                 name_self,
                 surname_self,
                 name_other,
                 surname_other,
                 language=data_language,
                 convention=name_convention)
             #We take the bigger of the scores
             if (score_int > score_nicks) and (factor_int > factor_nicks):
                 score_nicks = score_int
                 factor_nicks = factor_int
     #We only take the new score if bigger than the previous one, but we include a penalty (we introduce preference to the score given by formal names)
     if (score_nicks > score) and (factor_nicks > factor) and (
             score_nicks * factor_nicks * 0.5 > score * factor):
         score = score_nicks * 0.5
         factor = factor_nicks
     #Comparing big differences in events
     score1, factor1 = score_factor_birth_and_death(
         self.get_earliest_event_in_event_form(),
         self.get_latest_event_in_event_form(), profile.getEvents())
     score2, factor2 = score_factor_birth_and_death(
         profile.get_earliest_event_in_event_form(),
         self.get_latest_event_in_event_form(), self.getEvents())
     #In this stage we add all obtained scores and factors.
     score += score1 + score2
     factor = factor * factor1 * factor2
     #Comparing gender
     if (self.getGender()) and (profile.getGender()):
         if self.getGender() == profile.getGender():
             score += 0.5
         elif (self.getGender() != "U") and (profile.getGender() != "U"):
             factor = 0.1 * factor
     for event_name in MERGE_EVENTS:
         self_event = self.get_specific_event(event_name)
         other_event = profile.get_specific_event(event_name)
         if event_name == "marriage":
             if len(self_event) > 0: self_event = self_event[0]
             else: self_event = None
             if len(other_event) > 0: other_event = other_event[0]
             else: other_event = None
         if self_event and other_event and self_event.is_any_date_available(
         ) and other_event.is_any_date_available():
             score_temp, factor_temp = get_score_compare_dates(
                 self_event, other_event)
             if (not ((self_event.get_event_type() == "marriage") and
                      (factor_temp < 1.0))) and (
                          self_event.get_event_type() != "residence"):
                 score += score_temp
                 factor = factor * factor_temp
     return score, factor