def filter_twinsID_rfMRI():
    """
    Check if a subject have all 4 rfMRI runs.
    If not, filter twins which don't have all 4 rfMRI runs.
    """
    import numpy as np
    import pandas as pd

    twins_file = pjoin(work_dir, 'twins_id.csv')
    subjs_file = pjoin(
        proj_dir, 'analysis/s2/1080_fROI/refined_with_Kevin/'
        'rfMRI/rfMRI_REST_id')
    trg_file = pjoin(work_dir, 'twins_id_rfMRI.csv')

    twins_df = pd.read_csv(twins_file)
    subjs_twin = set(np.concatenate([twins_df['twin1'], twins_df['twin2']]))
    subjs_id = set([int(_) for _ in open(subjs_file).read().splitlines()])
    flag = subjs_twin.issubset(subjs_id)
    if flag:
        print('All twins have all 4 rfMRI runs')
    else:
        print("Filter twins which don't have all 4 rfMRI runs.")
        h2.filter_twins_id(data=twins_df,
                           limit_set=subjs_id,
                           trg_file=trg_file)
        h2.count_twins_id(trg_file)
def filter_twinsID_G1G2():
    """
    Check if the subject is in G1 or G2.
    If not, filter twins who are not in G1 or G2.
    """
    import numpy as np
    import pandas as pd

    hemis = ('lh', 'rh')
    twins_file = pjoin(work_dir, 'twins_id.csv')
    gid_file = pjoin(
        proj_dir, 'analysis/s2/1080_fROI/refined_with_Kevin/'
        'grouping/group_id_{hemi}.npy')
    subjs_file = pjoin(proj_dir, 'analysis/s2/subject_id')
    trg_file = pjoin(work_dir, 'twins_id_G1G2_{hemi}.csv')

    twins_df = pd.read_csv(twins_file)
    subjs_twin = set(np.concatenate([twins_df['twin1'], twins_df['twin2']]))
    subjs_1080 = np.array(
        [int(_) for _ in open(subjs_file).read().splitlines()])
    for hemi in hemis:
        gid_vec = np.load(gid_file.format(hemi=hemi))
        gid_idx_vec = np.logical_or(gid_vec == 1, gid_vec == 2)
        subjs_id = subjs_1080[gid_idx_vec]
        flag = subjs_twin.issubset(subjs_id)
        if flag:
            print('All twins are in G1 and G2')
        else:
            print("Filter twins who are not in G1 or G2.")
            trg_file_tmp = trg_file.format(hemi=hemi)
            h2.filter_twins_id(data=twins_df,
                               limit_set=subjs_id,
                               trg_file=trg_file_tmp)
            h2.count_twins_id(trg_file_tmp)
def get_twinsID():
    """
    Get twins ID according to 'ZygosityGT' and pair the twins according to
    'Family_ID' from HCP restricted information.
    """
    src_file = '/nfs/m1/hcp/S1200_behavior_restricted.csv'
    trg_file = pjoin(work_dir, 'twins_id.csv')

    h2.get_twins_id(src_file, trg_file)
    h2.count_twins_id(trg_file)
def filter_twinsID_1080():
    """
    Check if it's a subset of 1080 subjects.
    If not, filter twins which are not in 1080 subjects.
    """
    import numpy as np
    import pandas as pd

    twins_file = pjoin(work_dir, 'twins_id.csv')
    subjs_file = pjoin(proj_dir, 'analysis/s2/subject_id')
    trg_file = pjoin(work_dir, 'twins_id_1080.csv')

    twins_df = pd.read_csv(twins_file)
    subjs_twin = set(np.concatenate([twins_df['twin1'], twins_df['twin2']]))
    subjs_id = set([int(_) for _ in open(subjs_file).read().splitlines()])
    flag = subjs_twin.issubset(subjs_id)
    if flag:
        print('All twins is a subset of 1080 subjects.')
    else:
        print('Filter twins which are not in 1080 subjects.')
        h2.filter_twins_id(data=twins_df,
                           limit_set=subjs_id,
                           trg_file=trg_file)
        h2.count_twins_id(trg_file)