def test_wrong_inputs(self): ''' Test metaphone differences ''' assert("Petra Regalada" == get_name_from_fullname("Petra Regalada Molpezérez Gómez", ['Molpecérez', 'Molpezerez'], ['Gómez', 'Gomez'])) assert("Segunda" == get_name_from_fullname("Segunda Molpécerez Gómez", ['Molpecérez', 'Molpezerez'], ['Gómez', 'Gomez'])) #Secure the updated version of metaphone takes into account b/v assert("Francisco" == get_name_from_fullname("Francisco Vaca Gomez", ['Baca'], ['Gómez', 'Gomez'], language="es"))
def test_get_name(self): ''' Test getting name out of complete name ''' #Surnames list_father = [FATHER_SURNAME] list_mother = [MOTHER_SURNAME] assert (get_name_from_fullname(FULL_NAME, list_father, list_mother) == ACTUAL_NAME) assert (get_name_from_fullname(FULL_NAME_SPANISH, list_father, list_mother) == ACTUAL_NAME) assert (get_name_from_fullname("Eusebio Masa Y Viloria", ['Masa'], ['Viloria'], language="es") == "Eusebio") #This one checks the bug of "de Bargas" issue assert (get_name_from_fullname("Geronima De Bargas Albarez", ['Vargas'], ['Álvarez'], language="es") == "Gerónima") #The bug with the "de" particle assert (get_name_from_fullname("Florentina Montemayor Leon", ['Montemayor'], ['de Leon'], language="es") == "Florentina") #Checking the bug of "Maria de la Cruz" captured as just Maria, due to similarity phonetically of Cruz and Grazia assert (get_name_from_fullname("Maria De La Cruz Garcia", ['García', 'Grazia'], ['Díez', 'Díaz'], language="es") == "María de la Cruz")
def __get_profiles__(self): ''' This function will take all different profiles included inside the excel file ''' current_sheet = self.loaded_data[self.sheet_title] #Iterator of missing inptus number_missing = 0 #The id number to be used id_profiles = 0 #Temporal variable checking the correct reading correct_introduction = True #Intermediate variables for potential parent surnames in the input file potential_father_surname = [] potential_father_surname_repetitions = [] potential_mother_surname = [] potential_mother_surname_repetitions = [] #Intermediate variables for potential parent names in the input file potential_father_name = [] potential_father_name_repetitions = [] potential_mother_name = [] potential_mother_name_repetitions = [] #We firstly detect the surnames of the parents of the profile,we cannot avoid the double #iteration for row in range(self.initial_row+1, self.loaded_data[self.sheet_title].max_row+1): for column_index in range(column_index_from_string(self.initial_column),self.loaded_data[self.sheet_title].max_column): column_criteria = current_sheet.cell(row=self.initial_row, column=column_index).value cell_value = current_sheet.cell(row=row, column=column_index).value if (column_criteria in ["father_full_name", "mother_full_name"] ): #If the cell_value is null we shall avoid continuing if (cell_value != None): name_data = get_name_surname_from_complete_name(cell_value, convention=self.naming_convention, language=self.language) #We have two surnames or one? surname_cand = name_data[1] if (name_data[2] == 2): surname_cand = name_data[1].split()[0] if(column_criteria == "father_full_name"): if (not surname_cand in potential_father_surname): potential_father_surname.append(surname_cand) potential_father_surname_repetitions.append(1) else: index = potential_father_surname.index(surname_cand) potential_father_surname_repetitions[index] = potential_father_surname_repetitions[index] + 1 if (not name_data[0] in potential_father_name): potential_father_name.append(name_data[0]) potential_father_name_repetitions.append(1) else: index = potential_father_name.index(name_data[0]) potential_father_name_repetitions[index] = potential_father_name_repetitions[index] + 1 elif(column_criteria == "mother_full_name"): if (not surname_cand in potential_mother_surname): potential_mother_surname.append(surname_cand) potential_mother_surname_repetitions.append(1) else: index = potential_mother_surname.index(surname_cand) potential_mother_surname_repetitions[index] = potential_mother_surname_repetitions[index] + 1 if (not name_data[0] in potential_mother_name): potential_mother_name.append(name_data[0]) potential_mother_name_repetitions.append(1) else: index = potential_mother_name.index(name_data[0]) potential_mother_name_repetitions[index] = potential_mother_name_repetitions[index] + 1 index_father_surname = potential_father_surname_repetitions.index(max(potential_father_surname_repetitions)) index_mother_surname = potential_mother_surname_repetitions.index(max(potential_mother_surname_repetitions)) father_surname = potential_father_surname[index_father_surname] mother_surname = potential_mother_surname[index_mother_surname] index_father_name = potential_father_name_repetitions.index(max(potential_father_name_repetitions)) index_mother_name = potential_mother_name_repetitions.index(max(potential_mother_name_repetitions)) father_name = potential_father_name[index_father_name] mother_name = potential_mother_name[index_mother_name] self.father_profile = gen_profile(father_name, father_surname) self.mother_profile = gen_profile(mother_name, mother_surname) children_surname = get_children_surname(father_surname, mother_surname, self.naming_convention) #Now we read the complete file for row in range(self.initial_row+1, self.loaded_data[self.sheet_title].max_row+1): included_profile = gen_profile("TBD", children_surname) included_right = True for column_index in range(column_index_from_string(self.initial_column),self.loaded_data[self.sheet_title].max_column): column_criteria = current_sheet.cell(row=self.initial_row, column=column_index).value cell_value = current_sheet.cell(row=row, column=column_index).value #We are ignoring all those cells that are empty. if ( cell_value ): this_introduction = True #Ok, now we go one by one each of the different values if(column_criteria == "gender"): this_introduction = included_profile.setCheckedGender(cell_value) elif (column_criteria in LOCATION_EQUIVALENCE.keys()): included_profile.setPlaces(LOCATION_EQUIVALENCE[column_criteria], cell_value, self.language) elif (column_criteria == "person_url"): included_profile.setWebReference("https://familysearch.org/" +cell_value) elif (column_criteria in date_fields.keys()): #Notice that we shall detect if the given date is a year or a specific date #we will make the different using "about" and using datetime in the background if(is_year(cell_value)): this_introduction = self.__include_a_date__(column_criteria, included_profile, datetime.strptime(str(cell_value.replace(" ", "")), "%Y").date(), "ABOUT") else: this_introduction = self.__include_a_date__(column_criteria, included_profile, datetime.strptime(cell_value, "%d %b %Y").date(), "EXACT") elif(column_criteria == "full_name"): included_profile.set_name(get_name_from_fullname(cell_value,potential_father_surname, potential_mother_surname, language=self.language)) #In the case the name if not the same, we create it as nickname if (cell_value != included_profile.returnFullName()): included_profile.add_nickname(cell_value) elif (column_criteria == "spouse_full_name"): #Here we create a new profile using the surname of the guy names = get_name_surname_from_complete_name(cell_value, convention=self.naming_convention, language=self.language) partner = gen_profile(names[0], names[1]) partner.set_id(id_profiles) #If the surname is changed we shall include the previous surname in the nicknames if (cell_value != partner.returnFullName()): partner.add_nickname(cell_value) #Now we link the profiles included_profile.set_marriage_id_link(id_profiles) self.related_profiles[id_profiles] = partner elif (column_criteria == "other_full_names"): #The separator provided by family search is semicolumn parents = cell_value.split(";") #We obtain firstly the different names father_name, father_surname, _ = get_name_surname_from_complete_name(parents[0], convention=self.naming_convention, language=self.language) if (len(parents) == 2): mother_name, mother_surname, _ = get_name_surname_from_complete_name(parents[1], convention=self.naming_convention, language=self.language) #The algorithm provides an empty surname, we fill it with not known if (father_surname == ""): father_surname = NOT_KNOWN_VALUE if (mother_surname == ""): mother_surname = NOT_KNOWN_VALUE #Create the standard profiles father = gen_profile(father_name, father_surname) mother = gen_profile(mother_name, mother_surname) #If the surname is changed we shall include the previous surname in the nicknames if (parents[0] != father.returnFullName()): father.add_nickname(parents[0]) if (len(parents) == 2) and (parents[1] != mother.returnFullName()): mother.add_nickname(parents[1]) #add gender father.setCheckedGender("M") mother.setCheckedGender("F") self.parents_profiles[id_profiles] = [father, mother] elif (column_criteria in ignored_fields): pass else: number_missing = number_missing + 1 logging.warning(COLUMN_NOT_FOUND + column_criteria) if (not this_introduction): included_right = False #This is a way to later on identify the link between the profiles id_profiles += 1 if(not included_right) : correct_introduction = False self.profiles.append(included_profile) #Now we know the data we fix some with the proper logic for profile_obtained in self.profiles: #If the baptism and birth are close enough we assign the birth place to the baptism place birth_d = profile_obtained.gen_data.get("birth_date", None) bapt_d = profile_obtained.gen_data.get("baptism_date", None) if birth_d and bapt_d: difference = bapt_d - birth_d if abs(difference.days) < DIFFERNCE_BIRTH_BAPT: place_birth = profile_obtained.gen_data.get("birth_place", {}).get("raw", None) place_baptism = profile_obtained.gen_data.get("baptism_place", {}).get("raw", None) if place_baptism and not place_birth: profile_obtained.setPlaces("birth_place",get_location_standard(profile_obtained.gen_data["baptism_place"]), self.language) if profile_obtained.gen_data.get("marriage_link", None) in self.related_profiles.keys(): id_of_marriage = profile_obtained.gen_data["marriage_link"] partner = self.related_profiles[id_of_marriage] partner.setWebReference(profile_obtained.gen_data["web_ref"]) #It is a partner so we add as opposite sex! partner.setCheckedGender(get_partner_gender(profile_obtained.gen_data["gender"])) partner.setCheckedDate("marriage_date", profile_obtained.gen_data["marriage_date"], profile_obtained.gen_data["accuracy_marriage_date"] ) partner.setPlaces("marriage_place", profile_obtained.gen_data["marriage_place"]["raw"], language=self.language ) if id_of_marriage in self.parents_profiles.keys(): father = self.parents_profiles[id_of_marriage][0] mother = self.parents_profiles[id_of_marriage][1] father.setWebReference(profile_obtained.gen_data["web_ref"]) mother.setWebReference(profile_obtained.gen_data["web_ref"]) surnames = get_splitted_name_from_complete_name(partner.gen_data["surname"], language=self.language)[0] if (father.gen_data["surname"] == NOT_KNOWN_VALUE): #It might be the case that the surname is empty #Ok the data was not including the right data, but we know the surname if (self.naming_convention == "spanish_surname" and len(surnames) != 0): father.gen_data["surname"] = surnames[0] else: father.gen_data["surname"] = partner.gen_data["surname"] if (mother.gen_data["surname"] == NOT_KNOWN_VALUE) and (self.naming_convention == "spanish_surname") and (len(surnames) == 2): mother.gen_data["surname"] = surnames[1] if (self.naming_convention == "spanish_surname"): #We need to ensure 2 surnames in spanish naming conventions if not (mother.gen_data["surname"] in partner.gen_data["surname"]) or (len(partner.gen_data["surname"].split()) == 1): #In the case we have 2 surnames, we try to eliminate the second surnames. partner_surname_data = get_splitted_name_from_complete_name(partner.gen_data["surname"], language=self.language) mother_surname_data = get_splitted_name_from_complete_name(mother.gen_data["surname"], language=self.language) if len(partner.gen_data["nicknames"]) == 0: partner.add_nickname(partner.returnFullName()) partner.gen_data["surname"] = " ".join([partner_surname_data[0][0], mother_surname_data[0][0]]) #Finally, let's merge those profiles that are the same! indexes_to_remove = [] iterating_list = list(self.profiles) for i in range(len(iterating_list)): #We are going one by one all the different profiles if not i in indexes_to_remove: for j, other_prof in enumerate(iterating_list[i+1:]): merged = self.profiles[i].merge_profile(other_prof, language=self.language, convention=self.naming_convention) if merged: indexes_to_remove.append(i+j+1) new_values = list(set(indexes_to_remove)) new_values.sort() for deletion in reversed(new_values): del self.profiles[deletion] return correct_introduction