def test_parse_from_string(): case_1 = """0 @I5@ INDI 1 NAME First /Last/ 1 SEX M 1 BIRT 2 DATE 1 JAN 1900 2 PLAC Kirkland, King, Washington, USA 3 MAP 4 LATI N47.680663 4 LONG W122.234319 """ gedcom_parser = Parser() gedcom_parser.parse([(a + '\n').encode('utf-8-sig') for a in case_1.splitlines()]) element_1 = gedcom_parser.get_root_child_elements()[0] assert isinstance(element_1, IndividualElement) assert element_1.get_tag() == 'INDI' assert element_1.get_pointer() == '@I5@' element_1_children = element_1.get_child_elements() assert len(element_1_children) == 3 assert element_1_children[0].get_tag() == 'NAME' assert element_1_children[1].get_tag() == 'SEX' assert element_1_children[2].get_tag() == 'BIRT' case_2 = """0 @F28@ FAM 1 HUSB @I80@ 1 WIFE @I81@ 1 CHIL @I9@ 2 _FREL Natural 2 _MREL Natural 1 CHIL @I84@ 2 _FREL Natural 2 _MREL Natural 1 CHIL @I85@ 2 _FREL Natural 2 _MREL Natural """ gedcom_parser.parse([(a + '\n').encode('utf-8-sig') for a in case_2.splitlines()]) element_2 = gedcom_parser.get_root_child_elements()[0] assert element_2.get_tag() == 'FAM' assert element_2.get_pointer() == '@F28@' element_2_children = element_2.get_child_elements() assert len(element_2_children) == 5 assert element_2_children[0].get_tag() == 'HUSB' assert element_2_children[1].get_tag() == 'WIFE' assert element_2_children[2].get_tag() == 'CHIL' assert element_2_children[3].get_value() == '@I84@'
def setUp(self): gedcom_parser = Parser() file_path = 'tests/files/Musterstammbaum.ged' gedcom_parser.parse_file(file_path) self.parser = gedcom_parser self.root = gedcom_parser.get_root_element() self.child_elements = gedcom_parser.get_root_child_elements()
def parse(self, gedcom_file_path: str): parser = Parser() parser.parse_file(gedcom_file_path) root = parser.get_root_child_elements() for element in root: if isinstance(element, IndividualElement): self._parse_individual(element) elif isinstance(element, FamilyElement): self._parse_family(element)
def handle(self, *args, **kwargs): filename = kwargs["file name"] # validate that the user gave file with extension ged if filename.suffix != ".ged": raise CommandError("Please specify GEDCOM file, ex: myGedcom.ged") # Check that the file is there path = Path( "mysite/familytree/management/commands/gedcom_files/" ) # @@TODO: update to take the whole path (so it doesn't need to be saved in a particular folder) path_plus_file = path.joinpath(filename) if path_plus_file.is_file(): gedcom_parser = Parser() gedcom_parser.parse_file(path_plus_file) root_child_elements = gedcom_parser.get_root_child_elements() # Find/add person records for element in root_child_elements: if isinstance(element, IndividualElement): self.handle_person(element) # Find/add family records (after person records exist, so we can look up parents) # also save intermediate dictionary: CHIL INDI - family INDI for element in root_child_elements: if isinstance(element, FamilyElement): self.handle_family(element) # now that we've saved all the people and families, populate orig_family on people records self.add_person_family_values(self.child_family_dict) else: raise CommandError( "That gedcom file does not exist in the expected directory" ) # gather run results run_results = "gedcom_person_records: " + str(self.gedcom_person_records) + "\n" run_results += ( "gedcom_family_records: " + str(self.gedcom_family_records) + "\n" ) run_results += "person_added_count: " + str(self.person_added_count) + "\n" run_results += "person_skipped_count: " + str(self.person_skipped_count) + "\n" run_results += "family_added_count: " + str(self.family_added_count) + "\n" # Display and log them self.stdout.write(self.style.SUCCESS("You passed filename: ") + str(filename)) self.stdout.write(run_results) f = open("ImportInfo.txt", "w") f.write(run_results) f.closed
def test_parse_file(): parser = Parser() assert len(parser.get_root_child_elements()) == 0 parser.parse_file('tests/files/Musterstammbaum.ged') assert len(parser.get_root_child_elements()) == 34 individuals_in_root_child_elements = 0 individuals_in_element_list = 0 for element in parser.get_root_child_elements(): if isinstance(element, IndividualElement): individuals_in_root_child_elements += 1 for element in parser.get_element_list(): if isinstance(element, IndividualElement): individuals_in_element_list += 1 assert individuals_in_root_child_elements == 20 assert individuals_in_element_list == 20
def import_gedcom_file(self, gedcom_file_path): gedcom_parser = Parser() gedcom_parser.parse_file(gedcom_file_path) root_child_elements = gedcom_parser.get_root_child_elements() # Parse all elements in the GEDCOM file, recording details from # individual and family elements. families = [] # Lookup from gedcom individual pointer (e.g. "@I219") to api.Individual. individuals = dict() for element in root_child_elements: if isinstance(element, IndividualElement): individuals[element.get_pointer()] = self.parse_indi(element) elif isinstance(element, FamilyElement): families.append(self.parse_family(element)) # Note: in order to relations in the DB, we need to commit the # Individuals to the DB so they have valid PK's. for individual in individuals.values(): individual.save() for (husband, wife, married_date, place, children, note) in families: family = Family( married_date = married_date, married_location = place, note = note, ) family.save() for partner in filter(lambda k: k != '', [husband, wife]): individuals[partner].partner_in_families.add(family) individuals[partner].save() family.save() for child in children: if individuals[child].child_in_family != None: raise Exception("Can't handle child {} being a child of two families!".format(child)) individuals[child].child_in_family = family individuals[child].save() self.stdout.write(self.style.SUCCESS('Successfully parsed {} individuals {} families'.format( len(individuals), len(families))))
FAM_TABLE = PrettyTable() INDI_TABLE.field_names = [ "ID", "Name", "Gender", "Birthday", "Age", "Alive", "Death", "Child", "Spouse" ] FAM_TABLE.field_names = [ "ID", "Married", "Divorced", "Husband ID", "Husband Name", "Wife ID", "Wife Name", "Children" ] gedcom_parser = Parser() gedcom_parser.parse_file(file_path, False) # Disable strict parsing elements = gedcom_parser.get_element_list() root_child_elements = gedcom_parser.get_root_child_elements() def convertGedcomDate(datestring): return dt.strptime(datestring, "%d %b %Y") def processGedcom(file_path): """Helper function for reading GEDCOM files when unit testing""" gedcom_parser.parse_file(file_path, False) elements = gedcom_parser.get_element_list() root_child_elements = gedcom_parser.get_root_child_elements() for element in root_child_elements: if isinstance(element, IndividualElement): return element
class GedcomManipulator: def __init__(self, file_path): self.gedcom_parser = Parser() self.gedcom_parser.parse_file(file_path, False) # Disable strict parsing self.root_child_elements = self.gedcom_parser.get_root_child_elements() self.fullName = [] @staticmethod def first_name(el): "Returns the first name of the element (joined)" return ''.join(el.get_name()[0]) def get_full_name(self, el): """Returns the full names of all elements in a gedcome file (.ged)""" self.fullName = [self.first_name(el)] parent = el while parent: # while parent is not an empty list parent = self.gedcom_parser.get_parents(parent) if parent: self.fullName.append(self.first_name(parent[0])) parent = parent[0] if el.get_name()[1]: self.fullName.append(el.get_name()[1]) return self.fullName def write_csv(self, output_file_path): pointer = [el.get_pointer() for el in self.root_child_elements[1:2260]] wb = Workbook() ws = wb.active row = 2 for el in self.root_child_elements[1:]: if el.get_tag( ) == 'FAM': # if element tag is "Individual," extract full name. for child in el.get_child_elements(): element = self.root_child_elements[ pointer.index(child.get_value()) + 1] full_name = self.get_full_name(element) if child.get_tag() != 'CHIL': for col, val in enumerate(full_name[::-1], start=1): cell = ws.cell(row=row, column=col + 1) cell.value = val if child.get_tag() == 'HUSB': cell.fill = PatternFill("solid", fgColor="66CCFF") elif child.get_tag() == 'WIFE': cell.fill = PatternFill("solid", fgColor="FFCCFF") row += 1 else: cell = ws.cell(row=row, column=2) cell.value = self.first_name(element) cell.fill = PatternFill("solid", fgColor="00CCCC") row += 1 row += 1 else: # else terminate, i.e. if tag is "family" pass wb.save(output_file_path)
def run_checker(file_path): results = "Individual Name,Spouse Name,Shared Ancestor, # of Generations Removed from Individual, # of Generations Removed from Spouse<br />" # Initialize the parser gedcom_parser = Parser() # Parse your file gedcom_parser.parse_file(file_path, False) root_child_elements = gedcom_parser.get_root_child_elements() def print_name(person): name = person.get_name() return name[0] + " " + name[1] def get_ancestors(person, level=0): parents = gedcom_parser.get_parents(person, "ALL") for index, parent in enumerate(parents): parents[index] = (parent, level) ancestors = [] ancestors.extend(parents) for parent in parents: ancestors.extend(get_ancestors(parent[0], level + 1)) return ancestors def are_related(person_one, person_two): ancestors_one = get_ancestors(person_one) ancestors_two = get_ancestors(person_two) # traverse in the 1st list for x in ancestors_one: # traverse in the 2nd list for y in ancestors_two: # if one common if x[0] == y[0]: return (x[0], x[1], y[1]) return False count = 0 # Iterate through all root child elements for individual in root_child_elements: # Is the `element` an actual `IndividualElement`? (Allows usage of extra functions such as `surname_match` and `get_name`.) if isinstance(individual, IndividualElement): families = gedcom_parser.get_families( individual, gedcom.tags.GEDCOM_TAG_FAMILY_SPOUSE) for family in families: family_members = gedcom_parser.get_family_members( family, members_type=gedcom.tags.GEDCOM_TAG_WIFE) if len(family_members) > 0: for spouse in family_members: shared_ancestor_tuple = are_related(individual, spouse) if individual != spouse and shared_ancestor_tuple: results += print_name( individual ) + "," + print_name(spouse) + "," + print_name( shared_ancestor_tuple[0]) + "," + str( shared_ancestor_tuple[1]) + "," + str( shared_ancestor_tuple[2]) + "<br />" count += 1 results += 'Total count: ' + str(count) return results
class GedcomManipulator(object): def __init__(self, filename): self.filename = filename self.gedcom = Parser() self.gedcom.parse_file(self.filename) self.names = None @property def namelist(self): if self.names is None: self.names = [] root_child_elements = self.gedcom.get_root_child_elements() for element in root_child_elements: if isinstance(element, IndividualElement): self.names.append( (element.get_pointer(), " ".join(element.get_name()))) return self.names def get_cousins(self, _id, level=2): """Find all cousins of given distance.""" root = self.gedcom[_id] assert root is not None atlevel = 0 prevqueue = [ root, ] while atlevel < level: queue = [] for person in prevqueue: for par in person.parents: queue.append(par) prevqueue = queue atlevel += 1 queue = set() for person in prevqueue: famc = person['FAMC'] if famc is None: continue def add_siblings(*families): for family in families: for child in family.as_individual().children: if child.as_individual().id != person.id: queue.add(child.as_individual()) if isinstance(famc, list): add_siblings(*famc) else: add_siblings(famc) prevqueue = queue while atlevel > 0: queue = set() for person in prevqueue: fams = person['FAMS'] if fams is None: continue def add_children(*families): for family in families: for child in family.as_individual().children: queue.add(child.as_individual()) if isinstance(fams, list): add_children(*fams) else: add_children(fams) prevqueue = queue atlevel -= 1 return prevqueue def get_ydna(self, _id): """Find all people that would/should have the same Y-DNA.""" root = self.gedcom[_id] queue = [ root, ] outelements = set() while queue: cur = queue.pop(0) if cur is None: continue if cur in outelements: continue fams = cur['FAMS'] if cur.father: queue.append(cur.father) if fams is not None: def add_children(*families): for family in families: for child in family.as_individual().children: if child.as_individual().is_male: queue.append(child.as_individual()) if isinstance(fams, list): add_children(*fams) else: add_children(fams) outelements.add(cur) return outelements def get_branch(self, _id, siblings=False, descendants=False, ancestors=True): root = self.gedcom[_id] queue = [ root, ] outelements = set() while queue: # print(len(queue)) cur = queue.pop(0) if cur is None: continue if cur in outelements: continue famc = cur['FAMC'] fams = cur['FAMS'] if ancestors and famc: if famc is not None: outelements.add(famc) for par in cur.parents: queue.append(par) if siblings and famc: if isinstance(famc, list): continue fam = famc.as_individual() if fam is not None: outelements.add(fam) for child in fam.children: queue.append(child.as_individual()) if descendants and fams: # if cur != root: if isinstance(fams, list): for fam in fams: fam = fam.as_individual() if fam is None: continue outelements.add(fams) if fam.husband is not None: outelements.add(fam.husband.as_individual()) if fam.wife is not None: outelements.add(fam.wife.as_individual()) for child in fam.children: queue.append(child.as_individual()) elif isinstance(fams, gedcom.Spouse): fam = fams.as_individual() if fam is not None: outelements.add(fams) if fam.husband is not None: outelements.add(fam.husband.as_individual()) if fam.wife is not None: outelements.add(fam.wife.as_individual()) for child in fam.children: queue.append(child.as_individual()) elif fams is None: pass else: # print(type(fams)) pass outelements.add(cur) output = gedcom.GedcomFile() for element in outelements: output.add_element(element) print(len(outelements)) return output