def parse_siblings( dir_data='/Users/josephrobinson//Dropbox/Families_In_The_Wild/Database/FIDs/', f_mids='mid.csv'): """ Parse brother pairs by referencing member ID LUT and relationship matrix. Siblings RID is 2 and brother, of course, are Males. Thus, these are the factors we use to identify pairs. :param dir_data: Directory containing folders of FIDs (e.g., F0001/, ..., F????/). :param f_rel_matrix: :param f_mids: :return: """ kind = 'siblings' # family directories dirs_fid, fid_list = load_fids(dir_data) print("{} families are being processed".format(len(fid_list))) # Load MID LUT for all FIDs. df_mids = db.load_mids(dirs_fid, f_csv=f_mids) # Load relationship matrices for all FIDs. # df_relationships = load_relationship_matrices(dirs_fid, f_csv=f_rel_matrix) siblings = [] for i, fid in enumerate(fid_list): # ids = [i for i, s in enumerate(genders) if 'Male' in s] rel_mat = db.parse_relationship_matrices(df_mids[i]) # rel_mat = np.array(df_relationships[i]) genders = list(df_mids[i].Gender) # ids_not = [j for j, s in enumerate(genders) if 'Male' not in s] # rel_mat[ids_not, :] = 0 # rel_mat[:, ids_not] = 0 sibling_ids = np.where(rel_mat == 2) if not helpers.check_npairs(len(sibling_ids[1]), kind, fid): continue sib_ids = db.get_unique_pairs(sibling_ids) # sib_ids = [(b1, b2) if b1 < b2 else (b2, b1) for b1, b2 in zip(list(sibling_ids[0]), list(sibling_ids[1]))] # sib_id = list(set(sib_ids)) sibling_ids = list(set(sib_ids)) for ids in sib_ids: # remove if brother or sister pair if ('Male' in genders[ids[0]] and 'Male' in genders[ids[1]]) or \ ('Female' in genders[ids[0]] and 'Female' in genders[ids[1]]): print("Removing", ids) sibling_ids.remove(ids) for ids in enumerate(sibling_ids): print(ids) indices = list(np.array(ids[1]) + 1) siblings.append(db.Pair(mids=indices, fid=fid, kind=kind)) del indices return siblings
def parse_sisters( dir_data='/Users/josephrobinson//Dropbox/Families_In_The_Wild/Database/FIDs/', f_mids='mid.csv'): """ Parse sister pairs by referencing member ID LUT and relationship matrix. Siblings RID is 2 and sister, of course, are Females. Thus, these are the factors we use to identify pairs. :param dir_data: Directory containing folders of FIDs (e.g., F0001/, ..., F????/). :param f_rel_matrix: :param f_mids: :return: """ # family directories kind = 'sisters' dirs_fid, fid_list = load_fids(dir_data) print("{} families are being processed".format(len(fid_list))) # Load MID LUT for all FIDs. df_mids = db.load_mids(dirs_fid, f_csv=f_mids) # # Load relationship matrices for all FIDs. # df_relationships = load_relationship_matrices(dirs_fid, f_csv=f_rel_matrix) sisters = [] for i, fid in enumerate(fid_list): # ids = [i for i, s in enumerate(genders) if 'Male' in s] rel_mat = db.parse_relationship_matrices(df_mids[i]) genders = list(df_mids[i].Gender) # zero out female subjects rel_mat = db.specify_gender(rel_mat, genders, 'Female') sister_ids = np.where(rel_mat == 2) if not helpers.check_npairs(len(sister_ids[1]), kind, fid): continue # add to list of brothers sisters = db.set_pairs(sisters, sister_ids, kind, fid) return sisters
def parse_grandparents( dir_data='/Users/josephrobinson//Dropbox/Families_In_The_Wild/Database/FIDs/', f_mids='mid.csv'): """ Parse sister pairs by referencing member ID LUT and relationship matrix. Siblings RID is 2 and sister, of course, are Females. Thus, these are the factors we use to identify pairs. :param dir_data: Directory containing folders of FIDs (e.g., F0001/, ..., F????/). :param f_rel_matrix: :param f_mids: :return: """ # family directories dirs_fid, fid_list = load_fids(dir_data) print("{} families are being processed".format(len(fid_list))) # Load MID LUT for all FIDs. # Load MID LUT for all FIDs. df_mids = db.load_mids(dirs_fid, f_csv=f_mids) # Load relationship matrices for all FIDs. # df_relationships = load_relationship_matrices(dirs_fid, f_csv=f_rel_matrix) gfgd = [] gfgs = [] gmgd = [] gmgs = [] kind = 'parent-child' for i, fid in enumerate(fid_list): # ids = [i for i, s in enumerate(genders) if 'Male' in s] rel_mat = db.parse_relationship_matrices(df_mids[i]) genders = list(df_mids[i].Gender) # ids_not = [j for j, s in enumerate(genders) if 'Female' not in s] # rel_mat[ids_not, :] = 0 # rel_mat[:, ids_not] = 0 # indices of matrix containing 4 or 1; that the matrix is inversed across the diagonal # mat_ids = np.where(rel_mat == 1) and np.where(rel_mat.T == 1), np.where(rel_mat == 1) and np.where( # rel_mat.T == 4) c_ids = np.where(rel_mat == 3) p_ids = np.where(rel_mat == 6) if len(c_ids[0]) != len(p_ids[0]): warn.warn("Number of children and parents are different.") if not helpers.check_npairs(len(c_ids[0]), kind, fid): continue # ch_ids = [(p1, p2) for p1, p2 in zip(list(c_ids[0]), list(c_ids[1]))] par_ids = [(p1, p2) for p1, p2 in zip(list(p_ids[0]), list(p_ids[1]))] # ch_ids = list(set(ch_ids)) for p in par_ids: print(p) p_mid = list(np.array(p) + 1)[0] c_mid = list(np.array(p) + 1)[1] p_gender = genders[p_mid - 1] c_gender = genders[c_mid - 1] if 'Female' in p_gender: # fathers if 'Female' in c_gender: # son gfgs.append( db.Pair(mids=(p_mid, c_mid), fid=fid, kind='gmgd')) else: # daughter gfgd.append( db.Pair(mids=(p_mid, c_mid), fid=fid, kind='gmgs')) else: # mothers if 'Female' in c_gender: gmgs.append( db.Pair(mids=(p_mid, c_mid), fid=fid, kind='gfgd')) else: gmgd.append( db.Pair(mids=(p_mid, c_mid), fid=fid, kind='gfgd')) return gfgd, gfgs, gmgd, gmgs
def tri_subjects( dir_data='/Users/josephrobinson//Dropbox/Families_In_The_Wild/Database/FIDs/', f_mids='mid.csv'): """ Parse sister pairs by referencing member ID LUT and relationship matrix. def accumulate(l): it = itertools.groupby(l, accumulate(operator.itemgetter(0))) for key, subiter in it: # print(subiter[0]) yield key, (item[1] for item in subiter) Siblings RID is 2 and sister, of course, are Females. Thus, these are the factors we use to identify pairs. :param dir_data: Directory containing folders of FIDs (e.g., F0001/, ..., F????/). :param f_mids: :return: """ # family directories dirs_fid, fid_list = load_fids(dir_data) print("{} families are being processed".format(len(fid_list))) # Load MID LUT for all FIDs. df_mids = db.load_mids(dirs_fid, f_csv=f_mids) # # Load relationship matrices for all FIDs. # df_relationships = load_relationship_matrices(dirs_fid, f_csv=f_rel_matrix) fms = [] fmd = [] kind = 'parents-child' for i, fid in enumerate(fid_list): print(fid) # ids = [i for i, s in enumerate(genders) if 'Male' in s] # rel_mat = db.parse_relationship_matrices(df_mids[i]) rel_mat = db.parse_relationship_matrices(df_mids[i]) genders = list(df_mids[i].Gender) # indices of matrix containing 4 or 1; that the matrix is inversed across the diagonal # mat_ids = np.where(rel_mat == 1) and np.where(rel_mat.T == 1), np.where(rel_mat == 1) and np.where( # rel_mat.T == 4) c_ids = np.where(rel_mat == 1) p_ids = np.where(rel_mat == 4) if len(list(c_ids)) == 0: logger.warn("No pair of parents for child in {}.".format(fid)) # print("Two parents are not present for child") continue if len(set(p_ids[1]).__xor__(set(c_ids[0]))) or len( set(c_ids[1]).__xor__(set(p_ids[0]))): logger.error("Unmatched pair in {}.".format(fid)) # print("Unmatched pair") continue ch_ids = [(p1, p2) for p1, p2 in zip(list(c_ids[0]), list(c_ids[1]))] cp_pairs = group_child_parents(ch_ids) for cid, pids in cp_pairs.items(): # pars = rows[np.where(cols == cc)] if len(pids) != 2: if len(pids) > 2: logger.error("{} parents in {}. {}".format( len(pids), fid, pids)) continue # warn.warn("Three parents") else: continue try: p_genders = [genders[pids[0][1]], genders[pids[1][1]]] except IndexError: print() if "Male" in p_genders[0] and "Female" in p_genders[1]: pars_ids = (pids[0][1] + 1, pids[1][1] + 1) elif "Male" in p_genders[1] and "Female" in p_genders[0]: pars_ids = pids[1][1] + 1, pids[0][1] + 1 else: logger.error("Parents of same gender in {}. {}".format( fid, pids)) continue # warn.warn("Parents are of same gender for ", fid) cmid = "{}/MID{}".format(fid, cid + 1) fmid = "{}/MID{}".format(fid, pars_ids[0]) mmid = "{}/MID{}".format(fid, pars_ids[1]) if "Male" in genders[cid]: fms.append((fmid, mmid, cmid)) else: fmd.append((fmid, mmid, cmid)) return fmd, fms