예제 #1
0
def parse_siblings(dir_data, logger=None, f_mids='mid.csv'):
    """
    Parse brother pairs by referencing member ID LUT and relationship matrix.

    Siblings RID is 2 and brother, of course, are Males. Thus, these are the factors we use to identify pairs.

    :param dir_data:        Directory containing folders of FIDs (e.g., F0001/, ..., F????/).
    :param f_rel_matrix:
    :param f_mids:
    :return:
    """
    kind = 'siblings'
    # family directories
    dirs_fid, fid_list = load_fids(dir_data)

    print("{} families are being processed".format(len(fid_list)))
    # Load MID LUT for all FIDs.
    df_mids = db.load_mids(dirs_fid, f_csv=f_mids)

    # Load relationship matrices for all FIDs.
    # df_relationships = load_relationship_matrices(dirs_fid, f_csv=f_rel_matrix)
    siblings = []
    for i, fid in enumerate(fid_list):
        # ids = [i for i, s in enumerate(genders) if 'Male' in s]
        rel_mat = db.parse_relationship_matrices(df_mids[i])
        # rel_mat = np.array(df_relationships[i])
        genders = list(df_mids[i].Gender)

        success, genders = helpers.check_gender_label(genders)
        if not success:
            logger.error("Gender notation incorrect for {}".format(fid))
        # ids_not = [j for j, s in enumerate(genders) if 'Male' not in s]
        # rel_mat[ids_not, :] = 0
        # rel_mat[:, ids_not] = 0

        sibling_ids = np.where(rel_mat == 2)

        if not helpers.check_npairs(len(sibling_ids[1]), kind, fid):
            continue

        sib_ids = db.get_unique_pairs(sibling_ids)
        # sib_ids = [(b1, b2) if b1 < b2 else (b2, b1) for b1, b2 in zip(list(sibling_ids[0]), list(sibling_ids[1]))]
        # sib_id = list(set(sib_ids))

        sibling_ids = list(set(sib_ids))

        for ids in sib_ids:
            # remove if brother or sister pair
            if ('m' in genders[ids[0]] and 'm' in genders[ids[1]]) or \
                    ('f' in genders[ids[0]] and 'f' in genders[ids[1]]):
                print("Removing", ids)
                sibling_ids.remove(ids)

        for ids in enumerate(sibling_ids):
            print(ids)
            indices = list(np.array(ids[1]) + 1)
            siblings.append(db.Pair(mids=indices, fid=fid, kind=kind))
            del indices

    return siblings
예제 #2
0
def parse_sisters(dir_data, logger=None, f_mids='mid.csv'):
    """
    Parse sister pairs by referencing member ID LUT and relationship matrix.

    Siblings RID is 2 and sister, of course, are Females. Thus, these are the factors we use to identify pairs.

    :param dir_data:        Directory containing folders of FIDs (e.g., F0001/, ..., F????/).
    :param f_rel_matrix:
    :param f_mids:
    :return:
    """

    # family directories
    kind = 'sisters'
    dirs_fid, fid_list = load_fids(dir_data)

    logger.info("{} families are being processed".format(len(fid_list)))
    # Load MID LUT for all FIDs.
    df_mids = db.load_mids(dirs_fid, f_csv=f_mids)

    # # Load relationship matrices for all FIDs.
    # df_relationships = load_relationship_matrices(dirs_fid, f_csv=f_rel_matrix)
    sisters = []
    for i, fid in enumerate(fid_list):
        # ids = [i for i, s in enumerate(genders) if 'Male' in s]
        rel_mat = db.parse_relationship_matrices(df_mids[i])
        genders = list(df_mids[i].Gender)
        success, genders = helpers.check_gender_label(genders)
        if not success:
            logger.error("Gender notation incorrect for {}".format(fid))
        # zero out female subjects
        rel_mat = db.specify_gender(rel_mat, genders, 'f')

        sister_ids = np.where(rel_mat == 2)

        if not helpers.check_npairs(len(sister_ids[1]), kind, fid):
            continue

        # add to list of brothers
        sisters = db.set_pairs(sisters, sister_ids, kind, fid)

    return sisters
예제 #3
0
def parse_grandparents(dir_data, logger=None, f_mids='mid.csv'):
    """
    Parse sister pairs by referencing member ID LUT and relationship matrix.

    Siblings RID is 2 and sister, of course, are Females. Thus, these are the factors we use to identify pairs.

    :param dir_data:        Directory containing folders of FIDs (e.g., F0001/, ..., F????/).
    :param f_rel_matrix:
    :param f_mids:
    :return:
    """

    # family directories
    dirs_fid, fid_list = load_fids(dir_data)

    logger.info("{} families are being processed".format(len(fid_list)))
    # Load MID LUT for all FIDs.
    # Load MID LUT for all FIDs.
    df_mids = db.load_mids(dirs_fid, f_csv=f_mids)

    # Load relationship matrices for all FIDs.
    # df_relationships = load_relationship_matrices(dirs_fid, f_csv=f_rel_matrix)
    gfgd = []
    gfgs = []
    gmgd = []
    gmgs = []
    kind = 'parent-child'
    for i, fid in enumerate(fid_list):
        # ids = [i for i, s in enumerate(genders) if 'Male' in s]
        rel_mat = db.parse_relationship_matrices(df_mids[i])
        genders = list(df_mids[i].Gender)
        success, genders = helpers.check_gender_label(genders)
        if not success:
            logger.error("Gender notation incorrect for {}".format(fid))
        # ids_not = [j for j, s in enumerate(genders) if 'Female' not in s]
        # rel_mat[ids_not, :] = 0
        # rel_mat[:, ids_not] = 0

        # indices of matrix containing 4 or 1; that the matrix is inversed across the diagonal
        # mat_ids = np.where(rel_mat == 1) and np.where(rel_mat.T == 1), np.where(rel_mat == 1) and np.where(
        #     rel_mat.T == 4)

        c_ids = np.where(rel_mat == 3)
        p_ids = np.where(rel_mat == 6)
        if len(c_ids[0]) != len(p_ids[0]):
            logger.warn("Number of children and parents are different.")

        if not helpers.check_npairs(len(c_ids[0]), kind, fid):
            continue
        # ch_ids = [(p1, p2) for p1, p2 in zip(list(c_ids[0]), list(c_ids[1]))]
        par_ids = [(p1, p2) for p1, p2 in zip(list(p_ids[0]), list(p_ids[1]))]

        # ch_ids = list(set(ch_ids))
        for p in par_ids:
            print(p)
            p_mid = list(np.array(p) + 1)[0]
            c_mid = list(np.array(p) + 1)[1]

            p_gender = genders[p_mid - 1]
            c_gender = genders[c_mid - 1]
            if 'f' in p_gender:
                # fathers
                if 'f' in c_gender:
                    # son
                    gfgs.append(db.Pair(mids=(p_mid, c_mid), fid=fid, kind='gmgd'))
                else:
                    # daughter
                    gfgd.append(db.Pair(mids=(p_mid, c_mid), fid=fid, kind='gmgs'))
            else:
                # mothers
                if 'f' in c_gender:
                    gmgs.append(db.Pair(mids=(p_mid, c_mid), fid=fid, kind='gfgd'))
                else:
                    gmgd.append(db.Pair(mids=(p_mid, c_mid), fid=fid, kind='gfgd'))

    return gfgd, gfgs, gmgd, gmgs