def getIndividual(self, individualIdString): cursor = self.mysql.connection.cursor() cursor.execute( '''SELECT i_gedcom FROM wt_individuals WHERE i_id = %s''', [individualIdString]) rows = cursor.fetchall() if not rows: return None gedcom = rows[0][0] # Get the first row, gedcom column with BytesIO(bytes(gedcom, 'utf_8')) as gedcom_file: parser = GedcomReader(gedcom_file) for individualRecord in parser.records0('INDI'): individual = Individual(self.recordIdString(individualRecord), individualRecord.name.format()) individual.sexString = individualRecord.sex if individualRecord.sub_tag( 'BIRT') and individualRecord.sub_tag('BIRT').sub_tag( 'DATE'): individual.bornString = individualRecord.sub_tag( 'BIRT').sub_tag('DATE').value.fmt() if individualRecord.sub_tag( 'DEAT') and individualRecord.sub_tag('DEAT').sub_tag( 'DATE'): individual.bornString = individualRecord.sub_tag( 'DEAT').sub_tag('DATE').value.fmt() # TODO(atrookey): May be buggy, need to support multiple child families familyChildRecord = self.firstElement( self.getFamilyRecords(individualRecord, 'FAMC')) if familyChildRecord: # TODO(atrookey): May be buggy, need to support multiple wives. motherRecord = self.firstElement( self.getIndividualRecords(familyChildRecord, 'WIFE')) if motherRecord: individual.addMother(motherRecord.name.format(), motherRecord.xref_id.strip('@')) # TODO(atrookey): May be buggy, need to support multiple husbands. fatherRecord = self.firstElement( self.getIndividualRecords(familyChildRecord, 'HUSB')) if fatherRecord: individual.addFather(fatherRecord.name.format(), fatherRecord.xref_id.strip('@')) familySpouseRecords = self.getFamilyRecords( individualRecord, 'FAMS') for familySpouseRecord in familySpouseRecords: family = Family(self.recordIdString(familySpouseRecord)) partnerRecord = self.getPartnerRecordForFamily( familySpouseRecord, individualRecord) if partnerRecord: family.addPartner(partnerRecord.name.format(), self.recordIdString(partnerRecord)) childRecords = self.getIndividualRecords( familySpouseRecord, 'CHIL') for childRecord in childRecords: family.addChild(childRecord.name.format(), childRecord.xref_id.strip('@')) individual.addFamily(family) return individual return None
def ged_reader(self): count_none_rin = 0 # Initialize GED parser. with GedcomReader(self.ged_path, encoding='utf-8') as parser: # iterate over all INDI records for i, record in enumerate(parser.records0('INDI')): # Get individual RIN ID. ind_ref = self.format_rin(record.xref_id) # Get the RIN ID of the individuals parents. # If the parent does not exist, set to 0. # Get father ID. fa = record.father fa_ref = '0' if not fa is None: if fa.xref_id is not None: fa_ref = self.format_rin(fa.xref_id) # Get mother ID. mo = record.mother mo_ref = '0' if not mo is None: if mo.xref_id is not None: mo_ref = self.format_rin(mo.xref_id) # Get information about individual in a dictionary. ind_records = {r.tag: r for r in record.sub_records} sex = ind_records['SEX'].value # Append a tuple to the data list. record = (ind_ref, fa_ref, mo_ref, sex) self.data.append(record)
def getRecords(self, sourceRecord, tag, sqlQuery, topLevelTag): records = [] # Record.sub_tags appears to be broken. Using this code instead. Source model.py for subRecord in [ x for x in (sourceRecord.sub_records or []) if x.tag == tag ]: # Pointers used, gedcom incomplete. Strip @ signs referenceId = subRecord.value.strip('@') cursor = self.mysql.connection.cursor() cursor.execute(sqlQuery, [referenceId]) gedcom = cursor.fetchall()[0][0] with BytesIO(bytes(gedcom, 'utf_8')) as gedcom_file: parser = GedcomReader(gedcom_file) # For loop is misleading, only loops once. for record in parser.records0(topLevelTag): records.append(record) return records
def main(gedfile): print_by_type_counter = defaultdict(int) with GedcomReader(gedfile, encoding='utf-8') as rdr: for record in rdr.records0(): t = type(record).__name__ key = t + '_' + record.tag print_by_type_counter[key] += 1 if print_by_type_counter[key] <= 1: print(f'\n0 {key}: {str(record)}') print_subrecords(record, depth=1)
def ged_reader(self): count_none_rin = 0 # Initialize GED parser. with GedcomReader(self.ged_path, encoding='utf-8') as parser: n_na = 0 # iterate over all INDI records for i, record in enumerate(parser.records0('INDI')): # Get individual RIN ID. ind_ref = self.format_rin(record.xref_id) # Get information about individual in a dictionary. ind_records = {r.tag: r for r in record.sub_records} birth = ind_records.get('BIRT') # If birth year is not found in record, it is set to NA. birth_year = 'NA' if birth is not None: birth_records = {r.tag: r for r in birth.sub_records} # Get birth year of individual. birth_date_record = birth_records.get( 'DATE') # Date record, or None. if birth_date_record is not None: # Get the birth date as a string. birth_date_str = str(birth_date_record.value) # Unfortunately, the dates are inconsistently formateed. # Use dateutils to automatically parse the date and get the birth year. birth_year_fmt = format_date_year(birth_date_str) # If we were not able to parse the date, use NA. if birth_year_fmt is not None: birth_year = birth_year_fmt else: logging.info( 'Could not parse birth date of record %s: %s' % (ind_ref, birth_date_str)) if birth_year == 'NA': n_na += 1 else: # Append a tuple to the data list. record = (ind_ref, birth_year) self.data.append(record) logging.info('Number of records with NA birth year: %d' % n_na)
def ged_reader(self): count_none_rin = 0 # Initialize GED parser. with GedcomReader(self.ged_path, encoding='utf-8') as parser: n_na = 0 # iterate over all INDI records for i, record in enumerate(parser.records0('INDI')): # Get individual RIN ID. ind_ref = self.format_rin(record.xref_id) # Get information about individual in a dictionary. ind_records = {r.tag: r for r in record.sub_records} # Get the record with tag "REFN". refn = ind_records.get('REFN') # If we are not able to make an encrypted ID, it will be "NA". hash_id = 'NA' if refn is not None: refn = refn.value # Reformat the ID. pid = self.reformat_refn(refn) # If it was possible to get the ID in the correct format, we encrypt # it using sha256. if pid is not None: # Check that the personal ID is correctly formatted. pid_ok = check_pid(pid) if pid_ok: # Encrypt the personal ID. hash_id = encrypt(pid) else: logging.warning( 'PID %s (corresponding to REFN %s) does not contain a proper date' % (pid, refn)) if hash_id == 'NA': n_na += 1 else: # Append a tuple to the data list. record = (ind_ref, hash_id) self.data.append(record) logging.info('Number of records with NA hash ID: %d' % n_na)
from ged4py import GedcomReader path = r"C:\Python\Python38\Django\familysite\ca1z66_78236416fprf45ca4e51z3.ged" people = [] with GedcomReader(path) as parser: for i, indi in enumerate(parser.records0("INDI")): people.append(i) json = '' json += '[ \n' with GedcomReader(path) as parser: for i, indi in enumerate(parser.records0("INDI")): if len(indi.name.given.split(" ")) > 1: patronym = indi.name.given.split(" ")[-1] gedcom_id = indi.__dict__['xref_id'] place_death = indi.sub_tag_value("DEAT/PLAC") place_birth = indi.sub_tag_value("BIRT/PLAC") name_maiden = indi.name.maiden name_last = indi.name.surname name_first = indi.name.first individual_notes = indi.sub_tag_value("NOTE") #individual_id gender = indi.sex date_death = indi.sub_tag_value("DEAT/DATE") date_birth = indi.sub_tag_value("BIRT/DATE") json += '\t{ \n' json += '\t\t"model" : "familysite.familyroster.Individual",' + ',\n' json += '\t\t"pk" : ' + f'{i+1}' + ',\n' json += '\t\t"fields" : {\n' json += '\t\t\t"gedcom_id" : ' + f'"{gedcom_id}"' + ',\n' json += '\t\t\t"name_last" : ' + f'"{name_last}"' + ',\n' json += '\t\t\t"name_first" : ' + f'"{name_first}"' + ',\n'
ged_path = sys.argv[1] # Path to input GED file. csv_path = sys.argv[2] # Path to output CSV file. # List to store relevant fields of all records in. gen = list() count_none_rin = 0 def format_rin(rin): '''Extract RIN, as Gedcom represents RIN as e.g. @I1@.''' return rin[2:-1] # Initialize GED parser. with GedcomReader(ged_path, encoding='utf-8') as parser: # iterate over all INDI records for i, record in enumerate(parser.records0('INDI')): # Get individual RIN ID. ind_ref = int(format_rin(record.xref_id)) # Get the RIN ID of the individuals parents. # If the parent does not exist, set to 0. fa = record.father fa_ref = 0 if not fa is None: if fa.xref_id is not None: fa_ref = int(format_rin(fa.xref_id)) mo = record.mother
def create_humans(self) -> Dict[str, Human]: with GedcomReader(self.file_path) as parser: return self.__create_humans(parser.records0)