def test_name_splitted(self):
     '''
     Test the split of names function
     '''
     c_name1 = "Juan  Martínez "
     s_name = get_splitted_name_from_complete_name(c_name1)
     assert(s_name[0][0] == "Juan")
     assert(s_name[0][1] == "Martínez")
     
     c_name2 = "Juan   DE la MANcha del chaCON "
     s_name = get_splitted_name_from_complete_name(c_name2, language="es")
     assert(s_name[0][0] == "Juan")
     assert(s_name[0][1] == "de la Mancha")
     assert(s_name[0][2] == "del Chacón")
Exemple #2
0
 def merge_profile(self, profile, language="en", convention="father_surname"):
     '''
     This will merge into this profile the information from the attached profile
     it will return True if information is mixed and False if merge is not DivisionImpossible
     '''
     score, factor = self.comparison_score(profile, data_language = language,  name_convention = convention)
     if (score*factor > 2.0):
         #Ok, we consider the size big enough
         for key_data in ALL_DATA:
             if(profile.gen_data.get(key_data, None) != None):
                 #That means we have some data!, exists in the other?
                 if(self.gen_data.get(key_data, None) == None):
                     #So is a new data!
                     self.gen_data[key_data] = profile.gen_data[key_data]
                 else:
                     #We have data in both!
                     if (key_data == "name"):
                         name1 = get_splitted_name_from_complete_name(self.getName(), language=language)
                         name2 = get_splitted_name_from_complete_name(profile.getName(), language=language)
                         if (len(name2) > len(name1)): self.set_name(profile.getName())  
                     elif (key_data == "surname"):
                         surname1 = get_splitted_name_from_complete_name(self.getSurname(), language=language)
                         surname2 = get_splitted_name_from_complete_name(profile.getSurname(), language=language)
                         if (len(surname2[0]) > len(surname1[0])): self.set_surname(profile.getSurname()) 
                     elif (key_data == "comments"):
                         self.gen_data["comments"] += "\n" + profile.gen_data["comments"]
                     elif (key_data in DATA_LISTS):
                         for info in profile.gen_data[key_data]:
                             if info not in self.gen_data[key_data] : self.gen_data[key_data].append(info)
                     elif (key_data in EVENT_TYPE):
                         #Merging the data input
                         event_new = profile.gen_data[key_data]
                         if event_new.get_accuracy() == "EXACT":
                             self.gen_data[key_data].setDate(event_new.get_year(),event_new.get_month(),event_new.get_day(),event_new.get_accuracy())
                         for key_location in LOCATION_KEYS:
                             if event_new.get_location() and (event_new.get_location().get(key_location, None) != None):
                                 if self.gen_data[key_data].get_location() and (self.gen_data[key_data].get_location().get(key_location, None) == None):
                                     self.gen_data[key_data].setParameterInLocation(key_location, profile.gen_data[key_data][key_location])
         return True
     else:
         return False
Exemple #3
0
 def merge_profile(self, profile, language="en", convention="father_surname"):
     '''
     This will merge into this profile the information from the attached profile
     it will return True if information is mixed and False if merge is not DivisionImpossible
     '''
     score, factor = self.comparison_score(profile, data_language = language,  name_convention = convention)
     if (score*factor > 2.0):
         #Ok, we consider the size big enough
         for key_data in ALL_DATA:
             if(profile.gen_data.get(key_data, None) != None):
                 #That means we have some data!, exists in the other?
                 if(self.gen_data.get(key_data, None) == None):
                     #So is a new data!
                     self.gen_data[key_data] = profile.gen_data[key_data]
                 else:
                     #We have data in both!
                     if (key_data == "name"):
                         name1 = get_splitted_name_from_complete_name(self.gen_data["name"], language=language)
                         name2 = get_splitted_name_from_complete_name(profile.gen_data["name"], language=language)
                         if (len(name2) > len(name1)): self.gen_data["name"] = profile.gen_data["name"]
                     elif (key_data == "surname"):
                         surname1 = get_splitted_name_from_complete_name(self.gen_data["surname"], language=language)
                         surname2 = get_splitted_name_from_complete_name(profile.gen_data["surname"], language=language)
                         if (len(surname2[0]) > len(surname1[0])): self.gen_data["surname"] = profile.gen_data["surname"]
                     elif (key_data == "comments"):
                         self.gen_data["comments"] += "\n" + profile.gen_data["comments"]
                     elif (key_data in DATA_LISTS):
                         for info in profile.gen_data[key_data]:
                             if info not in self.gen_data[key_data] : self.gen_data[key_data].append(info)
                     elif (key_data in MERGE_DATES):
                         if profile.gen_data[EVENT_DATA[key_data.replace("_date","")]["accuracy"]] == "EXACT":
                             self.gen_data[EVENT_DATA[key_data.replace("_date","")]["accuracy"]] = "EXACT"
                             self.gen_data[EVENT_DATA[key_data.replace("_date","")]["date"]] = profile.gen_data[EVENT_DATA[key_data.replace("_date","")]["date"]]
                     elif (key_data in DATA_PLACES):
                         for key_location in LOCATION_KEYS:
                             if profile.gen_data[key_data].get(key_location, None) != None:
                                 if self.gen_data[key_data].get(key_location, None) == None:
                                     self.gen_data[key_data][key_location] = profile.gen_data[key_data][key_location]
         return True
     else:
         return False
 def __get_profiles__(self):
     '''
     This function will take all different profiles included inside the excel file
     '''
     current_sheet = self.loaded_data[self.sheet_title]
     #Iterator of missing inptus
     number_missing = 0
     #The id number to be used
     id_profiles = 0
     #Temporal variable checking the correct reading
     correct_introduction = True
     #Intermediate variables for potential parent surnames in the input file
     potential_father_surname = []
     potential_father_surname_repetitions = []
     potential_mother_surname = []
     potential_mother_surname_repetitions = []
     #Intermediate variables for potential parent names in the input file
     potential_father_name = []
     potential_father_name_repetitions = []
     potential_mother_name = []
     potential_mother_name_repetitions = []
     #We firstly detect the surnames of the parents of the profile,we cannot avoid the double
     #iteration
     for row in range(self.initial_row+1, self.loaded_data[self.sheet_title].max_row+1):
         for column_index in range(column_index_from_string(self.initial_column),self.loaded_data[self.sheet_title].max_column):
             column_criteria = current_sheet.cell(row=self.initial_row, column=column_index).value
             cell_value = current_sheet.cell(row=row, column=column_index).value
             if (column_criteria in ["father_full_name", "mother_full_name"]  ):
                 #If the cell_value is null we shall avoid continuing
                 if (cell_value != None):
                     name_data = get_name_surname_from_complete_name(cell_value, convention=self.naming_convention, language=self.language)
                     #We have two surnames or one?
                     surname_cand = name_data[1]
                     if (name_data[2] == 2):
                         surname_cand = name_data[1].split()[0]
                     if(column_criteria == "father_full_name"):
                         if (not surname_cand in potential_father_surname):
                             potential_father_surname.append(surname_cand)
                             potential_father_surname_repetitions.append(1)
                         else:
                             index = potential_father_surname.index(surname_cand)
                             potential_father_surname_repetitions[index] = potential_father_surname_repetitions[index] + 1
                         if (not name_data[0] in potential_father_name):
                             potential_father_name.append(name_data[0])
                             potential_father_name_repetitions.append(1)
                         else:
                             index = potential_father_name.index(name_data[0])
                             potential_father_name_repetitions[index] = potential_father_name_repetitions[index] + 1
                     elif(column_criteria == "mother_full_name"):
                         if (not surname_cand in potential_mother_surname):
                             potential_mother_surname.append(surname_cand)
                             potential_mother_surname_repetitions.append(1)
                         else:
                             index = potential_mother_surname.index(surname_cand)
                             potential_mother_surname_repetitions[index] = potential_mother_surname_repetitions[index] + 1
                         if (not name_data[0] in potential_mother_name):
                             potential_mother_name.append(name_data[0])
                             potential_mother_name_repetitions.append(1)
                         else:
                             index = potential_mother_name.index(name_data[0])
                             potential_mother_name_repetitions[index] = potential_mother_name_repetitions[index] + 1
     index_father_surname = potential_father_surname_repetitions.index(max(potential_father_surname_repetitions))
     index_mother_surname = potential_mother_surname_repetitions.index(max(potential_mother_surname_repetitions))
     father_surname = potential_father_surname[index_father_surname]
     mother_surname = potential_mother_surname[index_mother_surname]
     index_father_name = potential_father_name_repetitions.index(max(potential_father_name_repetitions))
     index_mother_name = potential_mother_name_repetitions.index(max(potential_mother_name_repetitions))
     father_name = potential_father_name[index_father_name]
     mother_name = potential_mother_name[index_mother_name]
     self.father_profile = gen_profile(father_name, father_surname)
     self.mother_profile = gen_profile(mother_name, mother_surname)
     children_surname = get_children_surname(father_surname, mother_surname, self.naming_convention)
     #Now we read the complete file
     for row in range(self.initial_row+1, self.loaded_data[self.sheet_title].max_row+1):
         included_profile = gen_profile("TBD", children_surname)
         included_right = True
         for column_index in range(column_index_from_string(self.initial_column),self.loaded_data[self.sheet_title].max_column):
             column_criteria = current_sheet.cell(row=self.initial_row, column=column_index).value
             cell_value = current_sheet.cell(row=row, column=column_index).value
             #We are ignoring all those cells that are empty.
             if ( cell_value ):
                 this_introduction = True
                 #Ok, now we go one by one each of the different values
                 if(column_criteria == "gender"):
                     this_introduction = included_profile.setCheckedGender(cell_value)
                 elif (column_criteria in LOCATION_EQUIVALENCE.keys()):
                     included_profile.setPlaces(LOCATION_EQUIVALENCE[column_criteria], cell_value, self.language)
                 elif (column_criteria == "person_url"):
                     included_profile.setWebReference("https://familysearch.org/" +cell_value)
                 elif (column_criteria in date_fields.keys()):
                     #Notice that we shall detect if the given date is a year or a specific date
                     #we will make the different using "about" and using datetime in the background
                     if(is_year(cell_value)):
                         this_introduction = self.__include_a_date__(column_criteria, included_profile, datetime.strptime(str(cell_value.replace(" ", "")), "%Y").date(), "ABOUT")
                     else:
                         this_introduction = self.__include_a_date__(column_criteria, included_profile, datetime.strptime(cell_value, "%d %b %Y").date(), "EXACT")
                 elif(column_criteria == "full_name"):
                     included_profile.set_name(get_name_from_fullname(cell_value,potential_father_surname, potential_mother_surname, language=self.language))
                     #In the case the name if not the same, we create it as nickname
                     if (cell_value != included_profile.returnFullName()): included_profile.add_nickname(cell_value)
                 elif (column_criteria == "spouse_full_name"):
                     #Here we create a new profile using the surname of the guy
                     names = get_name_surname_from_complete_name(cell_value, convention=self.naming_convention, language=self.language)
                     partner = gen_profile(names[0], names[1])
                     partner.set_id(id_profiles)
                     #If the surname is changed we shall include the previous surname in the nicknames
                     if (cell_value != partner.returnFullName()): partner.add_nickname(cell_value)
                     #Now we link the profiles
                     included_profile.set_marriage_id_link(id_profiles)
                     self.related_profiles[id_profiles] = partner
                 elif (column_criteria == "other_full_names"):
                     #The separator provided by family search is semicolumn
                     parents = cell_value.split(";")
                     #We obtain firstly the different names
                     father_name, father_surname, _ = get_name_surname_from_complete_name(parents[0], convention=self.naming_convention, language=self.language)
                     if (len(parents) == 2):
                         mother_name, mother_surname, _ = get_name_surname_from_complete_name(parents[1], convention=self.naming_convention, language=self.language)
                     #The algorithm provides an empty surname, we fill it with not known
                     if (father_surname == ""): father_surname = NOT_KNOWN_VALUE
                     if (mother_surname == ""): mother_surname = NOT_KNOWN_VALUE
                     #Create the standard profiles
                     father = gen_profile(father_name, father_surname)
                     mother = gen_profile(mother_name, mother_surname)
                     #If the surname is changed we shall include the previous surname in the nicknames
                     if (parents[0] != father.returnFullName()): father.add_nickname(parents[0])
                     if (len(parents) == 2) and (parents[1] != mother.returnFullName()): mother.add_nickname(parents[1])
                     #add gender
                     father.setCheckedGender("M")
                     mother.setCheckedGender("F")
                     self.parents_profiles[id_profiles] = [father, mother]
                 elif (column_criteria in ignored_fields):
                     pass
                 else:
                     number_missing = number_missing + 1
                     logging.warning(COLUMN_NOT_FOUND + column_criteria)
                 if (not this_introduction): included_right = False
             #This is a way to later on identify the link between the profiles
         id_profiles += 1
         if(not included_right) : correct_introduction = False
         self.profiles.append(included_profile)
     #Now we know the data we fix some with the proper logic
     for profile_obtained in self.profiles:
         #If the baptism and birth are close enough we assign the birth place to the baptism place
         birth_d = profile_obtained.gen_data.get("birth_date", None)
         bapt_d = profile_obtained.gen_data.get("baptism_date", None)
         if birth_d and bapt_d:
             difference = bapt_d - birth_d
             if abs(difference.days) < DIFFERNCE_BIRTH_BAPT:
                 place_birth = profile_obtained.gen_data.get("birth_place", {}).get("raw", None)
                 place_baptism = profile_obtained.gen_data.get("baptism_place", {}).get("raw", None)
                 if place_baptism and not place_birth:
                     profile_obtained.setPlaces("birth_place",get_location_standard(profile_obtained.gen_data["baptism_place"]), self.language)
         if profile_obtained.gen_data.get("marriage_link", None) in self.related_profiles.keys():
             id_of_marriage = profile_obtained.gen_data["marriage_link"]
             partner = self.related_profiles[id_of_marriage]
             partner.setWebReference(profile_obtained.gen_data["web_ref"])
             #It is a partner so we add as opposite sex!
             partner.setCheckedGender(get_partner_gender(profile_obtained.gen_data["gender"]))
             partner.setCheckedDate("marriage_date", profile_obtained.gen_data["marriage_date"], profile_obtained.gen_data["accuracy_marriage_date"]  )
             partner.setPlaces("marriage_place", profile_obtained.gen_data["marriage_place"]["raw"], language=self.language )
             if id_of_marriage in self.parents_profiles.keys():
                 father = self.parents_profiles[id_of_marriage][0]
                 mother = self.parents_profiles[id_of_marriage][1]
                 father.setWebReference(profile_obtained.gen_data["web_ref"])
                 mother.setWebReference(profile_obtained.gen_data["web_ref"])
                 surnames = get_splitted_name_from_complete_name(partner.gen_data["surname"], language=self.language)[0]
                 if (father.gen_data["surname"] == NOT_KNOWN_VALUE):
                     #It might be the case that the surname is empty
                     #Ok the data was not including the right data, but we know the surname
                     if (self.naming_convention == "spanish_surname" and len(surnames) != 0):
                         father.gen_data["surname"] = surnames[0]
                     else:
                         father.gen_data["surname"] = partner.gen_data["surname"]
                 if (mother.gen_data["surname"] == NOT_KNOWN_VALUE) and (self.naming_convention == "spanish_surname") and (len(surnames) == 2):
                     mother.gen_data["surname"] = surnames[1]
                 if (self.naming_convention == "spanish_surname"):
                     #We need to ensure 2 surnames in spanish naming conventions
                     if not (mother.gen_data["surname"] in partner.gen_data["surname"]) or (len(partner.gen_data["surname"].split()) == 1):
                         #In the case we have 2 surnames, we try to eliminate the second surnames.
                         partner_surname_data = get_splitted_name_from_complete_name(partner.gen_data["surname"], language=self.language)
                         mother_surname_data = get_splitted_name_from_complete_name(mother.gen_data["surname"], language=self.language)
                         if len(partner.gen_data["nicknames"]) == 0: partner.add_nickname(partner.returnFullName())
                         partner.gen_data["surname"] = " ".join([partner_surname_data[0][0], mother_surname_data[0][0]])
     #Finally, let's merge those profiles that are the same!
     indexes_to_remove = []
     iterating_list = list(self.profiles)
     for i in range(len(iterating_list)):
         #We are going one by one all the different profiles
         if not i in indexes_to_remove:
             for j, other_prof in enumerate(iterating_list[i+1:]):
                 merged = self.profiles[i].merge_profile(other_prof, language=self.language, convention=self.naming_convention)
                 if merged:
                     indexes_to_remove.append(i+j+1)
     new_values = list(set(indexes_to_remove))
     new_values.sort()
     for deletion in reversed(new_values):
         del self.profiles[deletion]
     return correct_introduction