def MarriageBeforeDivorce(row): msg = [] if isinstance(row['Married'], str) and isinstance(row['Divorced'], str): married_datetime = convertStringToDatetime(row['Married']) divorce_datetime = convertStringToDatetime(row['Divorced']) if married_datetime > divorce_datetime: msg.append('ERROR: FAMILY: US04: ' + str(row['ID']) + ': Divorced: ' + str(divorce_datetime.date()) + ' occurs before Marriage: ' + str(married_datetime.date())) return msg
def BirthBeforeDeath(individualsDF): msg = [] #empty list, this is where error messages will go for index, row in individualsDF.iterrows(): #checking each individual in the family tree if row['Alive'] == False: #only if the individual is dead birth_datetime = convertStringToDatetime(row['Birthday']) death_datetime = convertStringToDatetime(row['Death']) #The rows above isolate the birth and death if birth_datetime > death_datetime: #if the birth is after the death msg.append('ERROR: INDIVIDUAL: US03: ' + str(row['ID']) + ' Died: ' + str(death_datetime.date()) + ' occurs before Birth: ' + str(birth_datetime.date())) #append the error message return msg
def siblings_spacing(individualsDF, familiesDF): errors = [] for index, famRow in familiesDF.iterrows(): if isinstance(famRow["Children"], str) and len(literal_eval(famRow["Children"])) > 1: # No need to check if there is only one child in this family childList = literal_eval(famRow['Children']) for childId in childList: child1Bday_dt = convertStringToDatetime(individualsDF.iloc[int(childId[1:])-1]["Birthday"]) for childId2 in childList[(childList.index(childId)+1):]: child2Bday_dt = convertStringToDatetime(individualsDF.iloc[int(childId2[1:])-1]["Birthday"]) if (child1Bday_dt-child2Bday_dt).total_seconds() >= 2*24*60*60 and (child1Bday_dt-child2Bday_dt).total_seconds() <= 8*31*24*60*60: errors.append("ERROR: FAMILY: US13: {}: Birthday of siblings {} & {} are between 2 days and 8 months".format(famRow["ID"], childId, childId2)) return errors
def birth_before_parent_marriage(fam_row, indi_df): errors = [] if isinstance(fam_row["Married"], str): marriage_dt = convertStringToDatetime(fam_row['Married']) children_list = fam_row['Children'] for index, row in indi_df.iterrows(): #try: if type(children_list) != float and row['ID'] in children_list: birth_dt = convertStringToDatetime(row['Birthday']) if birth_dt < marriage_dt: errors.append("ANOMOLY: FAMILY: US08: {}: Child {} born {} before marriage on {}".format(fam_row['ID'], row['ID'], birth_dt.date(), marriage_dt.date())) #print(errors) return errors
def birth_before_marriage_check(individuals2, families2): errors = [] for index, row_indi in individuals2.iterrows(): for index, row_fam in families2.iterrows(): if row_indi["ID"] == row_fam["Husband ID"] or row_indi[ "ID"] == row_fam["Wife ID"]: if isinstance(row_fam["Married"], str): birth_dt = convertStringToDatetime(row_indi['Birthday']) marriage_dt = convertStringToDatetime(row_fam['Married']) if birth_dt > marriage_dt: errors.append( "ERROR: INDIVIDUAL: US02: {}: Married {} before born {}" .format(row_indi["ID"], marriage_dt.date(), birth_dt.date())) return errors
def marriage_after_fourteen(individuals2, families2): errors = [] for index, row_indi in individuals2.iterrows(): for index, row_fam in families2.iterrows(): if row_indi["ID"] == row_fam["Husband ID"] or row_indi[ "ID"] == row_fam["Wife ID"]: if isinstance(row_fam["Married"], str): # compare marriage and birthdate and get the difference # if difference is less than 14 then print an error birth_dt = convertStringToDatetime(row_indi['Birthday']) marriage_dt = convertStringToDatetime(row_fam['Married']) duration_in_s = (marriage_dt - birth_dt).total_seconds() years = divmod(duration_in_s, 31536000)[0] if years < 14: errors.append( "ERROR: INDIVIDUAL: US10: {}: Married {} before age 14" .format(row_indi["ID"], marriage_dt.date())) return errors
def birth_before_current_check(row): errors = [] currDate = datetime.datetime.now() # row['Birthday'] comes in as a string from the csv, we convert it and store it as a list 'birth' here birth_datetime = convertStringToDatetime(row['Birthday']) if birth_datetime > currDate: errors.append("ERROR: INDIVIDUAL: US01: {}: Birthday {} occurs in the future".format(row['ID'], birth_datetime.date())) #print(errors) return errors
def unique_child_name_and_birth(individuals, fam_row): errors = [] seen = {} children = fam_row['Children'] if not isinstance(children ,float): fam_id = fam_row["ID"] for index, row in individuals.iterrows(): if row["ID"] in children: if not isinstance(row['Birthday'] ,float): birth_dt = convertStringToDatetime(row['Birthday']) if row["Name"] in seen and seen.get(row["Name"]) == birth_dt: errors.append("ANOMOLY: FAMILY: US25: {}: More than one child is not unique".format(fam_id)) else: seen[row["Name"]] = birth_dt return errors
def divorce_before_death_check(famRow, individualsDF): errors = [] # Only runs if the family was actually married and does not have a value of "NaN" if isinstance(famRow["Divorced"], str): # Retrieves the married date and converts it to a number using "datetime.datetime" divorced_dt = convertStringToDatetime(famRow['Divorced']) # Checks if the husband in the family is dead, if so, proceed if individualsDF.iloc[int(famRow["Husband ID"][1:]) - 1]["Alive"] == False: # Retrieves the husband's death date and converts it to a number using "datetime.datetime" husbandDeath_dt = convertStringToDatetime( individualsDF.iloc[int(famRow["Husband ID"][1:]) - 1]["Death"]) # Finally checks if divorce occured after the husband died, if so, append an error if divorced_dt > husbandDeath_dt: errors.append( "ERROR: INDIVIDUAL: US06: {}: Death {} before Divorced {}". format(famRow["Husband ID"], husbandDeath_dt.date(), divorced_dt.date())) # Checks if the wife in the family is dead, if so, proceed if individualsDF.iloc[int(famRow["Wife ID"][1:]) - 1]["Alive"] == False: # Retrieves the wife's death date and converts it to a number using "datetime.datetime" wifeDeath_dt = convertStringToDatetime( individualsDF.iloc[int(famRow["Wife ID"][1:]) - 1]["Death"]) # Finally checks if divorce occured after the wife died, if so, append an error if divorced_dt > wifeDeath_dt: errors.append( "ERROR: INDIVIDUAL: US06: {}: Death {} before Divorced {}". format(famRow["Wife ID"], wifeDeath_dt.date(), divorced_dt.date())) return errors
def no_more_than_5_kids_check(individualsDF, familiesDF): errors = [] for index, famRow in familiesDF.iterrows(): numTwins = 0 if isinstance(famRow["Children"], str) and len(literal_eval(famRow["Children"])) > 5: # Needs to be greater than 5 birthList=[] for childId in literal_eval(famRow['Children']): childBday_dt = convertStringToDatetime(individualsDF.iloc[int(childId[1:])-1]["Birthday"]) birthList.append(childBday_dt) for birth in birthList: for birth2 in birthList[(birthList.index(birth)+1):]: if abs((birth-birth2).days) < 2: numTwins += 1 if(numTwins > 5): errors.append("ERROR: FAMILY: US14: {}: More than 5 children born at the same time (Within 2 days of eachother)".format(famRow["ID"])) return errors
def get_parents_death(individuals2, row_fam): children = row_fam["Children"] mother_death = [False, None] father_death = [False, None] for index, row_indi in individuals2.iterrows(): if isinstance(row_indi["Death"], str): if row_indi["ID"] == row_fam["Wife ID"] or row_indi[ "ID"] == row_fam["Husband ID"]: death_dt = convertStringToDatetime(row_indi['Death']) if row_indi["ID"] == row_fam["Wife ID"]: mother_death = [True, death_dt] else: father_death = [True, death_dt] if not isinstance(children, float): return birth_before_parents_death(individuals2, row_fam["ID"], mother_death, father_death, children) else: return []
def birth_before_parents_death(individuals2, fam, mother_death, father_death, children): errors = [] for index, row_indi in individuals2.iterrows(): if row_indi["ID"] in children: birth_dt = convertStringToDatetime(row_indi['Birthday']) if mother_death[0] and birth_dt > mother_death[1]: errors.append( "ERROR: FAMILY: US09: {}: Child {} born {} after mother's death on {}" .format(fam, row_indi["ID"], birth_dt.date, mother_death[1].date)) nine_month = relativedelta(months=9) if father_death[0] and (birth_dt - nine_month) > father_death[1]: errors.append( "ERROR: FAMILY: US09: {}: Child {} born {} after more than nine months after father's death on {}" .format(fam, row_indi["ID"], birth_dt.date(), father_death[1].date())) return errors
def NoBigamy(dataframe): msg = [] for index, row in dataframe.iterrows(): if isinstance(row['Husband ID'], str): hus_id = row['Husband ID'] #save husband id if isinstance(row['Wife ID'], str): wife_id = row['Wife ID'] #save wife id if isinstance(row['Married'], str): married_datetime1 = convertStringToDatetime(row['Married']) for index2, row2 in dataframe.iterrows(): if row2.all() == row.all(): #dont compare row to itself continue if wife_id == row2['Wife ID']: if isinstance(row2['Married'], str): #get second marriage date married_datetime2 = convertStringToDatetime( row2['Married']) if isinstance( row['Divorced'], str ): #different case if family is divorced or not divorce_datetime1 = convertStringToDatetime( row['Divorced']) #save divorce date if married_datetime2 > married_datetime1 and married_datetime2 < divorce_datetime1: msg.append('ERROR: FAMILY: US11: ' + str(row['ID']) + ': ' + wife_id + ' married to both ' + hus_id + ' and ' + str(row2['Husband ID']) + ' at the same time') elif isinstance(row2['Divorced'], str): divorce_datetime2 = convertStringToDatetime( row2['Divorced']) #save divorce date if married_datetime1 > married_datetime2 and married_datetime1 < divorce_datetime2: msg.append('ERROR: FAMILY: US11: ' + str(row['ID']) + ': ' + wife_id + ' married to both ' + hus_id + ' and ' + str(row2['Husband ID']) + ' at the same time') else: msg.append('ERROR: FAMILY: US11: ' + str(row['ID']) + ': ' + wife_id + ' married to both ' + hus_id + ' and ' + str(row2['Husband ID']) + ' at the same time') if hus_id == row2['Husband ID']: if isinstance(row2['Married'], str): #get second marriage date married_datetime2 = convertStringToDatetime( row2['Married']) if isinstance( row['Divorced'], str ): #different case if family is divorced or not divorce_datetime1 = convertStringToDatetime( row['Divorced']) #save divorce date if married_datetime2 > married_datetime1 and married_datetime2 < divorce_datetime1: msg.append('ERROR: FAMILY: US11: ' + str(row['ID']) + ': ' + hus_id + ' married to both ' + wife_id + ' and ' + str(row2['Wife ID']) + ' at the same time') elif isinstance(row2['Divorced'], str): divorce_datetime2 = convertStringToDatetime( row2['Divorced']) #save divorce date if married_datetime1 > married_datetime2 and married_datetime1 < divorce_datetime2: msg.append('ERROR: FAMILY: US11: ' + str(row['ID']) + ': ' + hus_id + ' married to both ' + wife_id + ' and ' + str(row2['Wife ID']) + ' at the same time') else: msg.append('ERROR: FAMILY: US11: ' + str(row['ID']) + ': ' + hus_id + ' married to both ' + wife_id + ' and ' + str(row2['Wife ID']) + ' at the same time') return msg