def test_wrong_inputs(self):
     '''
     Test metaphone differences
     '''
     assert("Petra Regalada" == get_name_from_fullname("Petra Regalada Molpezérez Gómez", ['Molpecérez', 'Molpezerez'], ['Gómez', 'Gomez']))
     assert("Segunda" == get_name_from_fullname("Segunda Molpécerez Gómez", ['Molpecérez', 'Molpezerez'], ['Gómez', 'Gomez']))
     
     #Secure the updated version of metaphone takes into account b/v
     assert("Francisco" == get_name_from_fullname("Francisco Vaca Gomez", ['Baca'], ['Gómez', 'Gomez'], language="es"))
Esempio n. 2
0
    def test_get_name(self):
        '''
        Test getting name out of complete name
        '''
        #Surnames
        list_father = [FATHER_SURNAME]
        list_mother = [MOTHER_SURNAME]
        assert (get_name_from_fullname(FULL_NAME, list_father,
                                       list_mother) == ACTUAL_NAME)
        assert (get_name_from_fullname(FULL_NAME_SPANISH, list_father,
                                       list_mother) == ACTUAL_NAME)

        assert (get_name_from_fullname("Eusebio Masa Y Viloria", ['Masa'],
                                       ['Viloria'],
                                       language="es") == "Eusebio")
        #This one checks the bug of "de Bargas" issue
        assert (get_name_from_fullname("Geronima De Bargas Albarez",
                                       ['Vargas'], ['Álvarez'],
                                       language="es") == "Gerónima")
        #The bug with the "de" particle
        assert (get_name_from_fullname("Florentina Montemayor Leon",
                                       ['Montemayor'], ['de Leon'],
                                       language="es") == "Florentina")
        #Checking the bug of "Maria de la Cruz" captured as just Maria, due to similarity phonetically of Cruz and Grazia
        assert (get_name_from_fullname("Maria De La Cruz Garcia",
                                       ['García', 'Grazia'], ['Díez', 'Díaz'],
                                       language="es") == "María de la Cruz")
 def __get_profiles__(self):
     '''
     This function will take all different profiles included inside the excel file
     '''
     current_sheet = self.loaded_data[self.sheet_title]
     #Iterator of missing inptus
     number_missing = 0
     #The id number to be used
     id_profiles = 0
     #Temporal variable checking the correct reading
     correct_introduction = True
     #Intermediate variables for potential parent surnames in the input file
     potential_father_surname = []
     potential_father_surname_repetitions = []
     potential_mother_surname = []
     potential_mother_surname_repetitions = []
     #Intermediate variables for potential parent names in the input file
     potential_father_name = []
     potential_father_name_repetitions = []
     potential_mother_name = []
     potential_mother_name_repetitions = []
     #We firstly detect the surnames of the parents of the profile,we cannot avoid the double
     #iteration
     for row in range(self.initial_row+1, self.loaded_data[self.sheet_title].max_row+1):
         for column_index in range(column_index_from_string(self.initial_column),self.loaded_data[self.sheet_title].max_column):
             column_criteria = current_sheet.cell(row=self.initial_row, column=column_index).value
             cell_value = current_sheet.cell(row=row, column=column_index).value
             if (column_criteria in ["father_full_name", "mother_full_name"]  ):
                 #If the cell_value is null we shall avoid continuing
                 if (cell_value != None):
                     name_data = get_name_surname_from_complete_name(cell_value, convention=self.naming_convention, language=self.language)
                     #We have two surnames or one?
                     surname_cand = name_data[1]
                     if (name_data[2] == 2):
                         surname_cand = name_data[1].split()[0]
                     if(column_criteria == "father_full_name"):
                         if (not surname_cand in potential_father_surname):
                             potential_father_surname.append(surname_cand)
                             potential_father_surname_repetitions.append(1)
                         else:
                             index = potential_father_surname.index(surname_cand)
                             potential_father_surname_repetitions[index] = potential_father_surname_repetitions[index] + 1
                         if (not name_data[0] in potential_father_name):
                             potential_father_name.append(name_data[0])
                             potential_father_name_repetitions.append(1)
                         else:
                             index = potential_father_name.index(name_data[0])
                             potential_father_name_repetitions[index] = potential_father_name_repetitions[index] + 1
                     elif(column_criteria == "mother_full_name"):
                         if (not surname_cand in potential_mother_surname):
                             potential_mother_surname.append(surname_cand)
                             potential_mother_surname_repetitions.append(1)
                         else:
                             index = potential_mother_surname.index(surname_cand)
                             potential_mother_surname_repetitions[index] = potential_mother_surname_repetitions[index] + 1
                         if (not name_data[0] in potential_mother_name):
                             potential_mother_name.append(name_data[0])
                             potential_mother_name_repetitions.append(1)
                         else:
                             index = potential_mother_name.index(name_data[0])
                             potential_mother_name_repetitions[index] = potential_mother_name_repetitions[index] + 1
     index_father_surname = potential_father_surname_repetitions.index(max(potential_father_surname_repetitions))
     index_mother_surname = potential_mother_surname_repetitions.index(max(potential_mother_surname_repetitions))
     father_surname = potential_father_surname[index_father_surname]
     mother_surname = potential_mother_surname[index_mother_surname]
     index_father_name = potential_father_name_repetitions.index(max(potential_father_name_repetitions))
     index_mother_name = potential_mother_name_repetitions.index(max(potential_mother_name_repetitions))
     father_name = potential_father_name[index_father_name]
     mother_name = potential_mother_name[index_mother_name]
     self.father_profile = gen_profile(father_name, father_surname)
     self.mother_profile = gen_profile(mother_name, mother_surname)
     children_surname = get_children_surname(father_surname, mother_surname, self.naming_convention)
     #Now we read the complete file
     for row in range(self.initial_row+1, self.loaded_data[self.sheet_title].max_row+1):
         included_profile = gen_profile("TBD", children_surname)
         included_right = True
         for column_index in range(column_index_from_string(self.initial_column),self.loaded_data[self.sheet_title].max_column):
             column_criteria = current_sheet.cell(row=self.initial_row, column=column_index).value
             cell_value = current_sheet.cell(row=row, column=column_index).value
             #We are ignoring all those cells that are empty.
             if ( cell_value ):
                 this_introduction = True
                 #Ok, now we go one by one each of the different values
                 if(column_criteria == "gender"):
                     this_introduction = included_profile.setCheckedGender(cell_value)
                 elif (column_criteria in LOCATION_EQUIVALENCE.keys()):
                     included_profile.setPlaces(LOCATION_EQUIVALENCE[column_criteria], cell_value, self.language)
                 elif (column_criteria == "person_url"):
                     included_profile.setWebReference("https://familysearch.org/" +cell_value)
                 elif (column_criteria in date_fields.keys()):
                     #Notice that we shall detect if the given date is a year or a specific date
                     #we will make the different using "about" and using datetime in the background
                     if(is_year(cell_value)):
                         this_introduction = self.__include_a_date__(column_criteria, included_profile, datetime.strptime(str(cell_value.replace(" ", "")), "%Y").date(), "ABOUT")
                     else:
                         this_introduction = self.__include_a_date__(column_criteria, included_profile, datetime.strptime(cell_value, "%d %b %Y").date(), "EXACT")
                 elif(column_criteria == "full_name"):
                     included_profile.set_name(get_name_from_fullname(cell_value,potential_father_surname, potential_mother_surname, language=self.language))
                     #In the case the name if not the same, we create it as nickname
                     if (cell_value != included_profile.returnFullName()): included_profile.add_nickname(cell_value)
                 elif (column_criteria == "spouse_full_name"):
                     #Here we create a new profile using the surname of the guy
                     names = get_name_surname_from_complete_name(cell_value, convention=self.naming_convention, language=self.language)
                     partner = gen_profile(names[0], names[1])
                     partner.set_id(id_profiles)
                     #If the surname is changed we shall include the previous surname in the nicknames
                     if (cell_value != partner.returnFullName()): partner.add_nickname(cell_value)
                     #Now we link the profiles
                     included_profile.set_marriage_id_link(id_profiles)
                     self.related_profiles[id_profiles] = partner
                 elif (column_criteria == "other_full_names"):
                     #The separator provided by family search is semicolumn
                     parents = cell_value.split(";")
                     #We obtain firstly the different names
                     father_name, father_surname, _ = get_name_surname_from_complete_name(parents[0], convention=self.naming_convention, language=self.language)
                     if (len(parents) == 2):
                         mother_name, mother_surname, _ = get_name_surname_from_complete_name(parents[1], convention=self.naming_convention, language=self.language)
                     #The algorithm provides an empty surname, we fill it with not known
                     if (father_surname == ""): father_surname = NOT_KNOWN_VALUE
                     if (mother_surname == ""): mother_surname = NOT_KNOWN_VALUE
                     #Create the standard profiles
                     father = gen_profile(father_name, father_surname)
                     mother = gen_profile(mother_name, mother_surname)
                     #If the surname is changed we shall include the previous surname in the nicknames
                     if (parents[0] != father.returnFullName()): father.add_nickname(parents[0])
                     if (len(parents) == 2) and (parents[1] != mother.returnFullName()): mother.add_nickname(parents[1])
                     #add gender
                     father.setCheckedGender("M")
                     mother.setCheckedGender("F")
                     self.parents_profiles[id_profiles] = [father, mother]
                 elif (column_criteria in ignored_fields):
                     pass
                 else:
                     number_missing = number_missing + 1
                     logging.warning(COLUMN_NOT_FOUND + column_criteria)
                 if (not this_introduction): included_right = False
             #This is a way to later on identify the link between the profiles
         id_profiles += 1
         if(not included_right) : correct_introduction = False
         self.profiles.append(included_profile)
     #Now we know the data we fix some with the proper logic
     for profile_obtained in self.profiles:
         #If the baptism and birth are close enough we assign the birth place to the baptism place
         birth_d = profile_obtained.gen_data.get("birth_date", None)
         bapt_d = profile_obtained.gen_data.get("baptism_date", None)
         if birth_d and bapt_d:
             difference = bapt_d - birth_d
             if abs(difference.days) < DIFFERNCE_BIRTH_BAPT:
                 place_birth = profile_obtained.gen_data.get("birth_place", {}).get("raw", None)
                 place_baptism = profile_obtained.gen_data.get("baptism_place", {}).get("raw", None)
                 if place_baptism and not place_birth:
                     profile_obtained.setPlaces("birth_place",get_location_standard(profile_obtained.gen_data["baptism_place"]), self.language)
         if profile_obtained.gen_data.get("marriage_link", None) in self.related_profiles.keys():
             id_of_marriage = profile_obtained.gen_data["marriage_link"]
             partner = self.related_profiles[id_of_marriage]
             partner.setWebReference(profile_obtained.gen_data["web_ref"])
             #It is a partner so we add as opposite sex!
             partner.setCheckedGender(get_partner_gender(profile_obtained.gen_data["gender"]))
             partner.setCheckedDate("marriage_date", profile_obtained.gen_data["marriage_date"], profile_obtained.gen_data["accuracy_marriage_date"]  )
             partner.setPlaces("marriage_place", profile_obtained.gen_data["marriage_place"]["raw"], language=self.language )
             if id_of_marriage in self.parents_profiles.keys():
                 father = self.parents_profiles[id_of_marriage][0]
                 mother = self.parents_profiles[id_of_marriage][1]
                 father.setWebReference(profile_obtained.gen_data["web_ref"])
                 mother.setWebReference(profile_obtained.gen_data["web_ref"])
                 surnames = get_splitted_name_from_complete_name(partner.gen_data["surname"], language=self.language)[0]
                 if (father.gen_data["surname"] == NOT_KNOWN_VALUE):
                     #It might be the case that the surname is empty
                     #Ok the data was not including the right data, but we know the surname
                     if (self.naming_convention == "spanish_surname" and len(surnames) != 0):
                         father.gen_data["surname"] = surnames[0]
                     else:
                         father.gen_data["surname"] = partner.gen_data["surname"]
                 if (mother.gen_data["surname"] == NOT_KNOWN_VALUE) and (self.naming_convention == "spanish_surname") and (len(surnames) == 2):
                     mother.gen_data["surname"] = surnames[1]
                 if (self.naming_convention == "spanish_surname"):
                     #We need to ensure 2 surnames in spanish naming conventions
                     if not (mother.gen_data["surname"] in partner.gen_data["surname"]) or (len(partner.gen_data["surname"].split()) == 1):
                         #In the case we have 2 surnames, we try to eliminate the second surnames.
                         partner_surname_data = get_splitted_name_from_complete_name(partner.gen_data["surname"], language=self.language)
                         mother_surname_data = get_splitted_name_from_complete_name(mother.gen_data["surname"], language=self.language)
                         if len(partner.gen_data["nicknames"]) == 0: partner.add_nickname(partner.returnFullName())
                         partner.gen_data["surname"] = " ".join([partner_surname_data[0][0], mother_surname_data[0][0]])
     #Finally, let's merge those profiles that are the same!
     indexes_to_remove = []
     iterating_list = list(self.profiles)
     for i in range(len(iterating_list)):
         #We are going one by one all the different profiles
         if not i in indexes_to_remove:
             for j, other_prof in enumerate(iterating_list[i+1:]):
                 merged = self.profiles[i].merge_profile(other_prof, language=self.language, convention=self.naming_convention)
                 if merged:
                     indexes_to_remove.append(i+j+1)
     new_values = list(set(indexes_to_remove))
     new_values.sort()
     for deletion in reversed(new_values):
         del self.profiles[deletion]
     return correct_introduction