def filter_twinsID_rfMRI(): """ Check if a subject have all 4 rfMRI runs. If not, filter twins which don't have all 4 rfMRI runs. """ import numpy as np import pandas as pd twins_file = pjoin(work_dir, 'twins_id.csv') subjs_file = pjoin( proj_dir, 'analysis/s2/1080_fROI/refined_with_Kevin/' 'rfMRI/rfMRI_REST_id') trg_file = pjoin(work_dir, 'twins_id_rfMRI.csv') twins_df = pd.read_csv(twins_file) subjs_twin = set(np.concatenate([twins_df['twin1'], twins_df['twin2']])) subjs_id = set([int(_) for _ in open(subjs_file).read().splitlines()]) flag = subjs_twin.issubset(subjs_id) if flag: print('All twins have all 4 rfMRI runs') else: print("Filter twins which don't have all 4 rfMRI runs.") h2.filter_twins_id(data=twins_df, limit_set=subjs_id, trg_file=trg_file) h2.count_twins_id(trg_file)
def filter_twinsID_G1G2(): """ Check if the subject is in G1 or G2. If not, filter twins who are not in G1 or G2. """ import numpy as np import pandas as pd hemis = ('lh', 'rh') twins_file = pjoin(work_dir, 'twins_id.csv') gid_file = pjoin( proj_dir, 'analysis/s2/1080_fROI/refined_with_Kevin/' 'grouping/group_id_{hemi}.npy') subjs_file = pjoin(proj_dir, 'analysis/s2/subject_id') trg_file = pjoin(work_dir, 'twins_id_G1G2_{hemi}.csv') twins_df = pd.read_csv(twins_file) subjs_twin = set(np.concatenate([twins_df['twin1'], twins_df['twin2']])) subjs_1080 = np.array( [int(_) for _ in open(subjs_file).read().splitlines()]) for hemi in hemis: gid_vec = np.load(gid_file.format(hemi=hemi)) gid_idx_vec = np.logical_or(gid_vec == 1, gid_vec == 2) subjs_id = subjs_1080[gid_idx_vec] flag = subjs_twin.issubset(subjs_id) if flag: print('All twins are in G1 and G2') else: print("Filter twins who are not in G1 or G2.") trg_file_tmp = trg_file.format(hemi=hemi) h2.filter_twins_id(data=twins_df, limit_set=subjs_id, trg_file=trg_file_tmp) h2.count_twins_id(trg_file_tmp)
def get_twinsID(): """ Get twins ID according to 'ZygosityGT' and pair the twins according to 'Family_ID' from HCP restricted information. """ src_file = '/nfs/m1/hcp/S1200_behavior_restricted.csv' trg_file = pjoin(work_dir, 'twins_id.csv') h2.get_twins_id(src_file, trg_file) h2.count_twins_id(trg_file)
def filter_twinsID_1080(): """ Check if it's a subset of 1080 subjects. If not, filter twins which are not in 1080 subjects. """ import numpy as np import pandas as pd twins_file = pjoin(work_dir, 'twins_id.csv') subjs_file = pjoin(proj_dir, 'analysis/s2/subject_id') trg_file = pjoin(work_dir, 'twins_id_1080.csv') twins_df = pd.read_csv(twins_file) subjs_twin = set(np.concatenate([twins_df['twin1'], twins_df['twin2']])) subjs_id = set([int(_) for _ in open(subjs_file).read().splitlines()]) flag = subjs_twin.issubset(subjs_id) if flag: print('All twins is a subset of 1080 subjects.') else: print('Filter twins which are not in 1080 subjects.') h2.filter_twins_id(data=twins_df, limit_set=subjs_id, trg_file=trg_file) h2.count_twins_id(trg_file)