def div_before_death(fam): for key, family in fam.items(): if family.divorced[0] not in ['N/A', '', 'N']: divor_day = datetime.strptime(family.divorced[0], "%d %b %Y") wife_death = family.wife[0].death[0] hus_death = family.husband[0].death[0] if wife_death[0] not in ['N/A', '', 'N']: wife_deathday = datetime.strptime(wife_death, "%d %b %Y") if wife_deathday < divor_day: new_error = GedcomError(( "ERROR", "FAMILY", "US06", family.divorced[1], key ), f"divorce date {divor_day} after wife's death {wife_deathday}" ) family.error_list = new_error if hus_death[0] not in ['N/A', '', 'N']: hus_deathday = datetime.strptime(hus_death, "%d %b %Y") if hus_deathday < divor_day: new_error = GedcomError(( "ERROR", "FAMILY", "US06", family.divorced[1], key ), f"divorce date {divor_day} after husband's death {hus_deathday}" ) family.error_list = new_error
def birth_b_marriage_us02(fam): """ US02: birth before marriage""" for key, value in fam.items(): id_h = value.husband[0] birth_h = id_h.birthday h_birth_date = datetime.strptime(birth_h[0], "%d %b %Y") id_w = value.wife[0] birth_w = id_w.birthday w_birth_date = datetime.strptime(birth_w[0], "%d %b %Y") date_married = value.married marriage_date = datetime.strptime(date_married[0], "%d %b %Y") if h_birth_date >= marriage_date: new_error = GedcomError(( "ERROR", "FAMILY", "US02", birth_h[1], key ), f"husband {id_h.indi_id[0]} birthday {birth_h[0]} isn't before married date {date_married[0]}" ) value.error_list = new_error if w_birth_date >= marriage_date: new_error = GedcomError(( "ERROR", "FAMILY", "US02", birth_w[1], key ), f" wife {id_w.indi_id[0]} birthday {birth_w[0]} isn't before married date {date_married[0]}" ) value.error_list = new_error else: continue
def US05(fam): for key, value in fam.items(): date_married = value.married if date_married in {'N/A', ''}: raise ValueError(f"Lost:{value.id[0]} family marriage date lost") else: mar_date = datetime.strptime(date_married[0], "%d %b %Y") hus_id = value.husband[0] wif_id = value.wife[0] hus_death = hus_id.death wif_death = wif_id.death if hus_death not in ('N/A', ''): hus_death_date = datetime.strptime(hus_death[0], "%d %b %Y") if hus_death_date < mar_date: new_error = GedcomError(("Error", "FAMILY", "US05", date_married[1], key), f"Married {date_married[0]} after husband's ({hus_id.indi_id[0]}) death on {hus_death[0]}") value.error_list = new_error elif wif_death not in ('N/A', ''): wif_death_date = datetime.strptime(wif_death[0], "%d %b %Y") if wif_death_date < mar_date: new_error = GedcomError(("Error", "FAMILY", "US05", date_married[1], key), f"Married {date_married[0]} after wife's ({wif_id.indi_id[0]}) death on {wif_death[0]}") value.error_list = new_error else: continue
def check_existence_in_family(role, type, indi, key, value, offset): if role.indi_id[0] not in indi.keys(): new_error = GedcomError(( "Error", "Family", "US26", role.husband[1], key ), f"Corresponding issue happened. {role.indi_id[0]} not found in individual list." ) value.error_list = new_error elif key not in [id_tuple[0] for id_tuple in indi[role.indi_id[0]].spouse ] and type == "husband": new_error = GedcomError( ("Error", "Family", "US26", value.husband[1], key), f"Corresponding issue happened. Family {key}'s spouse " f"{role.indi_id[0]} has wrong family Id.") value.error_list = new_error elif key not in [id_tuple[0] for id_tuple in indi[role.indi_id[0]].spouse ] and type == "wife": new_error = GedcomError( ("Error", "Family", "US26", value.wife[1], key), f"Corresponding issue happened. Family {key}'s spouse " f"{role.indi_id[0]} has wrong family Id.") value.error_list = new_error elif key not in [id_tuple[0] for id_tuple in indi[role.indi_id[0]].child ] and type == "child": new_error = GedcomError( ("Error", "Family", "US26", value.wife[1] + 1, key), f"Corresponding issue happened. Family {key}'s children " f"{role.indi_id[0]} has wrong family Id.") value.error_list = new_error
def check_existence_in_indi(family_id_list, type, fam, key, value): for offset, family_id in enumerate( [id_tuple[0] for id_tuple in family_id_list]): if family_id not in fam.keys(): new_error = GedcomError(( "Error", "Individual", "US26", family_id_list[offset][1], key ), f"Corresponding issue happened. {family_id} not found in family list." ) value.error_list = new_error elif type == "child" and key not in [ child.indi_id[0] for child in fam[family_id].children ]: new_error = GedcomError(( "Error", "Individual", "US26", family_id_list[offset][1], key ), f"Corresponding issue happened. Family {family_id} does not have individual {key}." ) value.error_list = new_error elif type == "spouse" and (key != fam[family_id].husband[0].indi_id[0] and key != fam[family_id].wife[0].indi_id[0]): new_error = GedcomError(( "Error", "Individual", "US26", family_id_list[offset][1], key ), f"Corresponding issue happened. Family {family_id} does not have individual {key}." ) value.error_list = new_error
def unique_family_24(fam): person = dict() for key, family in fam.items(): wife = family.wife[0].indi_id[0] husband = family.husband[0].indi_id[0] if wife in person.keys(): marry_date = family.married[0] if marry_date in person[wife]: new_error = GedcomError(("ANOMALY", "FAMILY", "US24", family.married[1], key), f"wife should have same marry date in different family") family.error_list = new_error else: person[wife].append(marry_date) else: person[wife] = [family.married[0]] if husband in person.keys(): marry_date = family.married[0] if marry_date in person[husband]: new_error = GedcomError(("ANOMALY", "FAMILY", "US24", family.married[1], key), f"husband should have same marry date in different family") family.error_list = new_error else: person[husband].append(marry_date) else: person[husband] = [family.married[0]]
def birth_before_marriage(fam): for key, family in fam.items(): div_date = "" marr_date = datetime.strptime(family.married[0], "%d %b %Y") if_div = False if family.divorced != "N/A": div_date = datetime.strptime(family.divorced[0], "%d %b %Y") if_div = True for child in family.children: birth = datetime.strptime(child.birthday[0], "%d %b %Y") bir_line = child.birthday[1] child_id = child.indi_id[0] if birth <= marr_date: """{"error_type": self.__error_info[0], "indi_or_fam": self.__error_info[1], "user_story": self.__error_info[2], "error_line": self.__error_info[3], "entity_id": self.__error_info[4]} """ new_error = GedcomError(( "ANOMALY", "FAMILY", "US08", bir_line, key ), f"Child {child_id} born {birth} before marriage on {marr_date}" ) family.error_list = new_error elif if_div and birth > div_date + timedelta(days=270): new_error = GedcomError(( "ANOMALY", "FAMILY", "US08", bir_line, key ), f"Child {child_id} born {birth} after divorce on {div_date}" ) family.error_list = new_error if family.wife[0].death != "N/A": mom_death = datetime.strptime(family.wife[0].death[0], "%d %b %Y") if birth > mom_death: new_error = GedcomError(( "ANOMALY", "FAMILY", "US09", bir_line, key ), f"Child {child_id} born {birth} after mother's death on {mom_death}" ) family.error_list = new_error print( f"ANOMALY: FAMILY: US09: {bir_line}: {key}: " f"Child {child_id} born {birth} after mother's death on {mom_death}" ) if family.husband[0].death != "N/A": dad_death = datetime.strptime(family.husband[0].death[0], "%d %b %Y") if birth > dad_death + timedelta(days=270): new_error = GedcomError(( "ANOMALY", "FAMILY", "US09", bir_line, key ), f"Child {child_id} born {birth} after daddy's death on {dad_death}" ) family.error_list = new_error
def corrent_gender_us21(fam): for key, family in fam.items(): husband = family.husband[0] wife = family.wife[0] if husband.gender[0] == 'F': new_error = GedcomError(("ANOMALY", "FAMILY", "US21", family.husband[1], key), f"husband's gender should be male") family.error_list = new_error if wife.gender[0] == 'M': new_error = GedcomError(("ANOMALY", "FAMILY", "US21", family.wife[1], key), f"wife's gender should be female") family.error_list = new_error
def unique_first_names_in_families_us25(fam): """ US25: Unique first names in families """ for key, value in fam.items(): dict_c_name = defaultdict(lambda: []) if value.children == ['N/A']: continue else: for child in value.children: id_c = child.indi_id[0] name_c = child.name first_name_c = name_c[0].split(' ')[0] if not dict_c_name[first_name_c]: dict_c_name[first_name_c] = [id_c, name_c[1]] else: l_id = dict_c_name[first_name_c] l_id.extend([id_c, name_c[1]]) dict_c_name[first_name_c] = l_id for name1, value1 in dict_c_name.items(): if len(value1) != 2: l_child = [] for n in range(0, len(value1), 2): l_child.append(value1[n]) new_error = GedcomError(( "ANOMALY", "FAMILY", "US25", value1[1], key ), f"children {l_child} of family {key} have the same first name '{name1}'" ) value.error_list = new_error else: continue
def multiple_siblings(fam): for key, family in fam.items(): if len(family.children) > 15: new_error = GedcomError(("ERROR", "FAMILY", "US15", family.children[15].indi_id[1], key), f"More than 15 siblings in one family.") family.error_list = new_error
def multiple_birth(fam): for key, family in fam.items(): if len(family.children) > 5 and \ len(set([datetime.strptime(child.birthday[0], "%d %b %Y") for child in family.children])) == 1: new_error = GedcomError( ("ERROR", "FAMILY", "US14", family.children[4].birthday[1], key), f"More than 5 siblings born at the same day.") family.error_list = new_error
def no_marriage_to_children_us17(fam): """ US17: no_marriage_to_children""" dict_h = defaultdict(lambda: 'N/A') dict_w = defaultdict(lambda: 'N/A') for key, value in fam.items(): l_c = [] id_h = value.husband[0] id_w = value.wife[0] if value.children == ['N/A']: continue else: for child in value.children: id_c = child.indi_id[0] l_c.append(id_c) if dict_h[id_h.indi_id[0]] == 'N/A': dict_h[id_h.indi_id[0]] = l_c else: dict_h[id_h.indi_id[0]] = l_c + dict_h[id_h.indi_id[0]] if dict_w[id_w.indi_id[0]] == 'N/A': dict_w[id_w.indi_id[0]] = l_c else: dict_w[id_w.indi_id[0]] = l_c + dict_w[id_w.indi_id[0]] for key1, value1 in fam.items(): husband_id = value1.husband[0] wife_id = value1.wife[0] if wife_id.indi_id[0] in dict_h[husband_id.indi_id[0]]: new_error = GedcomError(( "ERROR", "FAMILY", "US17", wife_id.indi_id[1], key1 ), f"father {husband_id.indi_id[0]} married to his child {wife_id.indi_id[0]}" ) value1.error_list = new_error if husband_id.indi_id[0] in dict_w[wife_id.indi_id[0]]: new_error = GedcomError(( "ERROR", "FAMILY", "US17", husband_id.indi_id[1], key1 ), f"mother {wife_id.indi_id[0]} married to her child {husband_id.indi_id[0]}" ) value1.error_list = new_error else: continue
def less_than_150(indi): for key, people in indi.items(): birthday = datetime.strptime(people.birthday[0], "%d %b %Y") deathday = "" if people.death != 'N/A': deathday = datetime.strptime(people.death[0], "%d %b %Y") if people.alive is False and birthday + timedelta(days=150 * 365) < deathday: new_error = GedcomError( ("ERROR", "INDIVIDUAL", "US07", people.death[1], key), f"{people.name[0]}'s age should be less than 150 years.") people.error_list = new_error elif people.alive is True and birthday < datetime.today() - timedelta( days=150 * 365): new_error = GedcomError( ("ERROR", "INDIVIDUAL", "US07", people.birthday[1], key), f"{people.name[0]} should no more than 150 years old.") people.error_list = new_error
def parents_not_too_old_us12(fam): """ US12: parents_not_too_old""" for key, value in fam.items(): if value.children == ['N/A']: continue else: id_h = value.husband[0] birth_h = id_h.birthday h_birth_date = datetime.strptime(birth_h[0], "%d %b %Y") id_w = value.wife[0] birth_w = id_w.birthday w_birth_date = datetime.strptime(birth_w[0], "%d %b %Y") for child in value.children: id_c = child.indi_id[0] birth_c = child.birthday c_birth_date = datetime.strptime(birth_c[0], "%d %b %Y") num_hc = c_birth_date - h_birth_date l_hc = str(num_hc).split(' ') num_wc = c_birth_date - w_birth_date l_wc = str(num_wc).split(' ') if int(l_hc[0]) >= 80 * 365.25: new_error = GedcomError( ("ERROR", "FAMILY", "US12", birth_h[1], key), f"father {id_h.indi_id[0]} {birth_h[0]} " f"is not less than 80 years old than child {id_c} {birth_c[0]}" ) value.error_list = new_error if int(l_wc[0]) >= 60 * 365.25: new_error = GedcomError( ("ERROR", "FAMILY", "US12", birth_w[1], key), f"mother {id_w.indi_id[0]} {birth_w[0]} " f"is not less than 60 years old than child {id_c} {birth_c[0]}" ) value.error_list = new_error else: continue
def marry_after_14(fam): for key, family in fam.items(): marr_date = datetime.strptime(family.married[0], "%d %b %Y") husband_id = family.husband[0] wife_id = family.wife[0] hus_birth = datetime.strptime(husband_id.birthday[0], "%d %b %Y") wife_birth = datetime.strptime(wife_id.birthday[0], "%d %b %Y") days1 = marr_date - hus_birth days2 = marr_date - wife_birth if days1.days <= 14 * 365.25: new_error = GedcomError( ("ANOMALY", "FAMILY", "US10", family.married[1], key), f"Child {husband_id.indi_id[0]}'s marry date is {marr_date}," f"less than 14 years after {hus_birth} ") family.error_list = new_error if days2.days <= 14 * 365.25: new_error = GedcomError( ("ANOMALY", "FAMILY", "US10", family.married[1], key), f"Child {wife_id.indi_id[0]}'s marry date is {marr_date}," f"less than 14 years after {wife_birth} ") family.error_list = new_error
def date_before_current(indi, fam): for key, family in fam.items(): marry_day = datetime.strptime(family.married[0], "%d %b %Y") if marry_day > today: new_error = GedcomError( ("ERROR", "FAMILY", "US01", family.married[1], key), f"marry date {marry_day} before today") family.error_list = new_error if family.divorced[0] not in ['N/A', '', 'N']: divor_day = datetime.strptime(family.divorced[0], "%d %b %Y") if divor_day > today: new_error = GedcomError( ("ERROR", "FAMILY", "US01", family.divorced[1], key), f"divorce date {divor_day} before today") family.error_list = new_error for key, individual in indi.items(): birth_day = datetime.strptime(individual.birthday[0], "%d %b %Y") if birth_day > today: new_error = GedcomError( ("ERROR", "INDIVIDUAL", "US01", individual.birthday[1], key), f"birthday date {birth_day} before today") individual.error_list = new_error if individual.death[0] not in ['N/A', '', 'N']: death_day = datetime.strptime(individual.death[0], "%d %b %Y") if death_day > today: new_error = GedcomError( ("ERROR", "INDIVIDUAL", "US01", individual.death[1], key), f"death date {death_day} before today") individual.error_list = new_error
def first_cousin_should_not_marry_us19(fam): grandparents_family_id_lis = [] for key, value in fam.items(): hus_id = value.husband[0] wif_id = value.wife[0] hus_family = hus_id.child wif_family = wif_id.child if hus_family not in ['N/A', ''] and wif_family not in ['N/A', '']: for family_hus in hus_family: family1 = fam.get(family_hus[0]) husband_father_id = family1.husband[0] husband_mother_id = family1.wife[0] father_child = husband_father_id.child mother_child = husband_mother_id.child for child in father_child: if father_child not in ['N/A', '']: grandparents_family_id_lis.append(child[0]) for child in mother_child: if mother_child not in ['N/A', '']: grandparents_family_id_lis.append(child[0]) for family_wif in wif_family: family2 = fam.get(family_wif[0]) wife_father_id = family2.husband[0] wife_mother_id = family2.wife[0] father_child = wife_father_id.child mother_child = wife_mother_id.child for child in father_child: if father_child not in ['N/A', '']: grandparents_family_id_lis.append(child[0]) for child in mother_child: if mother_child not in ['N/A', '']: grandparents_family_id_lis.append(child[0]) if len(grandparents_family_id_lis) == set( grandparents_family_id_lis): grandparents_family_id_lis = [] else: new_error = GedcomError(( "Error", "FAMILY", "US19", value.id[1], key ), f"First cousins marry one another happened in family {value.id[0]}" ) value.error_list = new_error else: continue
def sibi_not_marry(fam): fami = {} for key, family in fam.items(): husband = family.husband[0] wife = family.wife[0] if husband.child != 'N/A' and wife.child != 'N/A': fami[key] = { 'hus_fam': [], 'wife_fam': [], 'hus_par': set(), 'wife_par': set(), 'family': family } for i in husband.child: fami[key]['hus_fam'].append(i[0]) for i in wife.child: fami[key]['wife_fam'].append(i[0]) for key, family in fam.items(): for i, j in fami.items(): if key in j['hus_fam']: j['hus_par'].add(family.husband[0].indi_id) j['hus_par'].add(family.wife[0].indi_id) if key in j['wife_fam']: j['wife_par'].add(family.husband[0].indi_id) j['wife_par'].add(family.wife[0].indi_id) global iserror for key, value in fami.items(): for num in value['wife_par']: if num in value['hus_par']: iserror = 1 new_error = GedcomError( ("ANOMALY", "FAMILY", "US18", value['family'].id[1], key), f"Siblings should not marry one another") value['family'].error_list = new_error break
def US11(indi, fam): for key, value in indi.items(): lis_spouse = value.spouse if len(lis_spouse) > 1 and lis_spouse not in ['N/A', '']: for i in range(len(lis_spouse)): fam_id_1 = lis_spouse[i][0] date_married_1 = fam[fam_id_1].married marriage_date_1 = datetime.strptime(date_married_1[0], "%d %b %Y") date_divorced_1 = fam[fam_id_1].divorced if date_divorced_1 in ('N/A', ''): divorced_date_1 = datetime.strptime( '21 MAR 2020', "%d %b %Y") else: divorced_date_1 = datetime.strptime( date_divorced_1[0], "%d %b %Y") for j in range(i + 1, len(lis_spouse)): fam_id_2 = lis_spouse[j][0] date_married_2 = fam[fam_id_2].married marriage_date_2 = datetime.strptime( date_married_2[0], "%d %b %Y") divorced_date_2 = fam[fam_id_2].divorced if divorced_date_2 in ('N/A', ''): divorced_date_2 = datetime.strptime( '21 MAR 2020', "%d %b %Y") else: divorced_date_2 = datetime.strptime( divorced_date_2[0], "%d %b %Y") if marriage_date_1 >= divorced_date_2 or marriage_date_2 >= divorced_date_1: continue else: new_error = GedcomError(( "Error", "INDIVIDUAL", "US11", date_married_1[1], key ), f"bigamy happened on family {fam_id_1} and family {fam_id_2}" ) value.error_list = new_error else: continue
def build_family(fam, indi_dict): fam_dict = defaultdict(Family) for family in fam: if family["FAM"][0] in fam_dict.keys(): new_error = GedcomError(("ERROR", "FAMILY", "US22", family["FAM"][1], family["FAM"][0]), f"Family ID {family['FAM'][0]} should be unique!") fam_dict[family["FAM"][0]].error_list = new_error else: new_fam = Family(family["FAM"], family["MARR"], family["DIV"], # add husband into family dictionary as an individual class # which search from individual dictionary (indi_dict[family["HUSB"][0]], family["HUSB"][1]), (indi_dict[family["WIFE"][0]], family["WIFE"][1])) if family["CHIL"] != "N/A": for people in family["CHIL"]: new_child = indi_dict[people[0]] new_fam.children = new_child fam_dict[family["FAM"][0]] = new_fam return fam_dict
def USO4(fam): for key, value in fam.items(): date_divorced = value.divorced date_married = value.married if date_divorced in ('N/A', ''): continue else: if date_married in ('N/A', ''): raise ValueError(f"Lost:{value.id[0]} family marriage date lost") else: div_date = datetime.strptime(date_divorced[0], "%d %b %Y") mar_date = datetime.strptime(date_married[0], "%d %b %Y") if div_date < mar_date: new_error = GedcomError(("Error", "FAMILY", "US04", date_divorced[1], key), f"Divorced {date_divorced[0]} before married {date_married[0]}") value.error_list = new_error
def unique_name_and_birthday_us23(indi): lis_individual = [] for key, value in indi.items(): name = value.name birthday = value.birthday lis_individual.append((name[0], birthday[0])) set_individual = set(lis_individual) if len(lis_individual) != len(set_individual): for item in set_individual: lis_individual.remove(item) set_individual = set(lis_individual) for i in set_individual: for key, value in indi.items(): if value.name[0] == i[0]: new_error = GedcomError( ("Error", "INDIVIDUAL", "US23", value.name[1], key), f"more than one individual with same name {value.name[0]} and birth date " + f"{value.birthday[0]} appear in the GED file") value.error_list = new_error
def birth_b_death_us03(indi): """ US03: birth before death""" for key, value in indi.items(): birth = value.birthday death = value.death if death != 'N/A': death_date = datetime.strptime(death[0], "%d %b %Y") else: continue birth_date = datetime.strptime(birth[0], "%d %b %Y") if birth_date >= death_date: new_error = GedcomError(( "ERROR", "INDIVIDUAL", "US03", birth[1], key ), f"{key}'s birthday {birth[0]} isn't before death date {death[0]}" ) value.error_list = new_error else: continue
def build_individual(indi): indi_dict = defaultdict(Individual) for people in indi: if people["INDI"][0] in indi_dict.keys(): new_error = GedcomError(("ERROR", "INDIVIDUAL", "US22", people["INDI"][1], people["INDI"][0]), f"Individual ID {people['INDI'][0]} should be unique!") indi_dict[people["INDI"][0]].error_list = new_error else: indi_birth_date = datetime.strptime(people["BIRTH"][0], "%d %b %Y") current_date = datetime.now() if people["DEAT"] == "N/A": num_date = current_date - indi_birth_date l_num_date = str(num_date).split(' ') age = int(float(l_num_date[0]) / 365.25) else: death_date = datetime.strptime(people["DEAT"][0], "%d %b %Y") num_date = death_date - indi_birth_date l_num_date = str(num_date).split(' ') age = f"death age: {int(float(l_num_date[0]) / 365.25)}" new_indi = Individual(people["INDI"], people["NAME"], people["SEX"], people["BIRTH"], people["DEAT"], people["FAMC"], people["FAMS"], age) indi_dict[people["INDI"][0]] = new_indi return indi_dict