def pair_dist(rand_pair, sub_files, sub_data=[], reg_var=[], len_time=235, data_field='dtseries'): """ Pair distance """ sub_data = np.array(sub_data) if sub_data.size > 0: sub1_data = sub_data[:, :, rand_pair[0]] sub2_data = sub_data[:, :, rand_pair[1]] else: sub1_data = spio.loadmat(sub_files[rand_pair[0]])[data_field].T sub2_data = spio.loadmat(sub_files[rand_pair[1]])[data_field].T sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) sub2_data, _ = brainSync(X=sub1_data, Y=sub2_data) fmri_diff = sp.sum((sub2_data - sub1_data)**2, axis=0) # Returns SQUARE of the distance if len(reg_var) > 0: regvar_diff = sp.square(reg_var[rand_pair[0]] - reg_var[rand_pair[1]]) return fmri_diff, regvar_diff else: return fmri_diff
def corr_perm_test(X_pairs, Y_pairs, reg_var, num_sub, nperm=1000): # X: nsub x vertices # Y: cognitive scores nsub X 1 X, _, _ = normalizeData(X_pairs) num_pairs = X.shape[0] Y_pairs, _, _ = normalizeData(Y_pairs[:, None]) rho_orig = np.sum(X * Y_pairs, axis=0) max_null = np.zeros(nperm) n_count = np.zeros(X.shape[1]) print('Permutation testing') for ind in tqdm(range(nperm)): pairs, _ = gen_rand_pairs(num_sub=num_sub, num_pairs=num_pairs) pairs = np.array(pairs) Y = sp.square(reg_var[pairs[:, 0]] - reg_var[pairs[:, 1]]) Y, _, _ = normalizeData(Y[:, None]) rho_perm = np.sum(X * Y, axis=0) max_null[ind] = np.amax(rho_perm) n_count += np.float32(rho_perm >= rho_orig) pval_max = np.sum(rho_orig[:, None] <= max_null[None, :], axis=1) / nperm pval_perm = n_count / nperm _, pval_perm_fdr = fdrcorrection(pval_perm) return pval_max, pval_perm_fdr, pval_perm
def pair_dist_two_groups(rand_pair, sub_grp1_files, sub_grp2_files, sub_data1=[], sub_data2=[], len_time=235): sub_data1 = np.array(sub_data1) sub_data2 = np.array(sub_data2) """ Pair distance for two groups of subjects """ if sub_data1.size > 0: sub1_data = sub_data1[:, :, rand_pair[0]] else: sub1_data = spio.loadmat(sub_grp1_files[rand_pair[0]])['dtseries'].T sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) if sub_data2.size > 0: sub2_data = sub_data2[:, :, rand_pair[1]] else: sub2_data = spio.loadmat(sub_grp2_files[rand_pair[1]])['dtseries'].T sub2_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _ = brainSync(X=sub1_data, Y=sub2_data) fmri_diff = sp.sum((sub2_data - sub1_data)**2, axis=0) # Returns SQUARE of the distance return fmri_diff
def pair_dist(rand_pair, sub_files, reg_var, len_time=235): """ Pair distance """ sub1_data = spio.loadmat(sub_files[rand_pair[0]])['dtseries'].T sub2_data = spio.loadmat(sub_files[rand_pair[1]])['dtseries'].T sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) sub2_data, _ = brainSync(X=sub1_data, Y=sub2_data) fmri_diff = sp.sum((sub2_data - sub1_data)**2, axis=0) regvar_diff = sp.square(reg_var[rand_pair[0]] - reg_var[rand_pair[1]]) return fmri_diff, regvar_diff
def sub2ctrl_dist(sub_file, ctrl_files, len_time=235): """ Compare a subject to controls """ sub_data = spio.loadmat(sub_file)['dtseries'].T sub_data, _, _ = normalizeData(sub_data[:len_time, :]) num_vert = sub_data.shape[1] fmri_diff = sp.zeros((num_vert, len(ctrl_files))) for ind, fname in enumerate(tqdm(ctrl_files)): ctrl_data = spio.loadmat(fname)['dtseries'].T ctrl_data, _, _ = normalizeData(ctrl_data[:len_time, :]) ctrl_data, _ = brainSync(X=sub_data, Y=ctrl_data) fmri_diff[:, ind] = sp.sum((sub_data - ctrl_data)**2, axis=0) return fmri_diff
def get_connectivity(data, labels, label_ids): # compute adj matrix #%% if type(data) == str: df = spio.loadmat(data) data = df['dtseries'].T num_time = data.shape[0] num_rois = len(label_ids) rtseries = np.zeros((num_time, num_rois)) # 171x16 for i, id in enumerate(label_ids): idx = labels == id rtseries[:, i] = np.mean(data[:, idx], axis=1) rtseries, _, _ = normalizeData(rtseries) conn = abs(np.corrcoef(rtseries.T)) conn[~np.isfinite(conn)] = 0 # define the infinite value edges as no connection ##======Added=======## for i in range(conn.shape[0]): conn[i, i] = 1.0 for j in range(conn.shape[1]): conn[i, j] = conn[j, i] ##================## ## the adjacency matrix here is not binary. we use the correlation coefficient directly. #print(conn.shape, rtseries.T.shape) return conn, rtseries.T # 16x171, ROI/Node. 16*16 for conn
def dist2atlas_reg(bfp_path, ref_atlas, sub_files, reg_var, len_time=235): """ Perform regression stats based on square distance to atlas """ print('dist2atlas_reg, assume that the data is normalized') num_vert = ref_atlas.shape[1] num_sub = len(sub_files) # Take absolute value of difference from the mean # for the IQ measure reg_var = sp.absolute(reg_var - sp.mean(reg_var)) diff = sp.zeros((num_vert, num_sub)) # Compute distance to atlas for ind in tqdm(range(num_sub)): sub_data = spio.loadmat(sub_files[ind])['dtseries'].T sub_data, _, _ = normalizeData(sub_data[:len_time, :]) Y2, _ = brainSync(X=ref_atlas, Y=sub_data) diff[:, ind] = sp.sum((Y2 - ref_atlas)**2, axis=0) corr_pval = sp.zeros(num_vert) for vrt in tqdm(range(num_vert)): _, corr_pval[vrt] = sp.stats.pearsonr(diff[vrt, :], reg_var) corr_pval[sp.isnan(corr_pval)] = .5 lab = spio.loadmat(bfp_path + '/supp_data/USCBrain_grayord_labels.mat') labs = lab['labels'].squeeze() corr_pval_fdr = sp.zeros(num_vert) _, pv = fdrcorrection(corr_pval[labs > 0]) corr_pval_fdr[labs > 0] = pv return corr_pval, corr_pval_fdr
def load_bfp_data(sub_fname, LenTime): ''' sub_fname: list of filenames of .mat files that contains Time x Vertex matrix of subjects' preprocessed fMRI data ''' ''' LenTime: number of timepoints in data. this should be the same in all subjects ''' ''' Outputs 3D matrix: Time x Vector x Subjects ''' count1 = 0 subN = len(sub_fname) print('loading data for ' + str(subN) + ' subjects') pbar = tqdm(total=subN) for ind in range(subN): fname = sub_fname[ind] df = spio.loadmat(fname) data = df['dtseries'].T if int(data.shape[0]) != LenTime: print(sub_fname[ind] + ' has %d timepoints, while %d were expected' % (data.shape[0], LenTime)) d, _, _ = normalizeData(data[:LenTime, ]) if count1 == 0: sub_data = sp.zeros((LenTime, d.shape[1], subN)) sub_data[:, :, count1] = d count1 += 1 pbar.update(1) if count1 == subN: break pbar.close() print('loaded data for ' + str(subN) + ' subjects') return sub_data
def get_connectivity(data, labels, label_ids): #%% if type(data) == str: df = spio.loadmat(data) data = df['dtseries'].T num_time = data.shape[0] num_rois = len(label_ids) rtseries = np.zeros((num_time, num_rois)) for i, id in enumerate(label_ids): idx = labels == id rtseries[:, i] = np.mean(data[:, idx], axis=1) rtseries, _, _ = normalizeData(rtseries) conn = np.corrcoef(rtseries.T) conn[~np.isfinite(conn)] = 0 return conn
def randpairsdist_reg(bfp_path, sub_files, reg_var, num_pairs=1000, len_time=235): """ Perform regression stats based on square distance between random pairs """ print('dist2atlas_reg, assume that the data is normalized') print('This function is deprecated!!!!!!!!!!') # Get the number of vertices from a file num_vert = spio.loadmat(sub_files[0])['dtseries'].shape[0] # Generate random pairs rand_pairs = sp.random.choice(len(sub_files), (num_pairs, 2), replace=True) fmri_diff = sp.zeros((num_vert, num_pairs)) regvar_diff = sp.zeros(num_pairs) print('Reading subjects') # Compute distance to atlas for ind in tqdm(range(num_pairs)): sub1_data = spio.loadmat(sub_files[rand_pairs[ind, 0]])['dtseries'].T sub2_data = spio.loadmat(sub_files[rand_pairs[ind, 1]])['dtseries'].T sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) sub2_data, _ = brainSync(X=sub1_data, Y=sub2_data) fmri_diff[:, ind] = sp.sum((sub2_data - sub1_data)**2, axis=0) regvar_diff[ind] = sp.square(reg_var[rand_pairs[ind, 0]] - reg_var[rand_pairs[ind, 1]]) corr_pval = sp.zeros(num_vert) for ind in tqdm(range(num_vert)): _, corr_pval[ind] = sp.stats.pearsonr(fmri_diff[ind, :], regvar_diff) corr_pval[sp.isnan(corr_pval)] = .5 labs = spio.loadmat(bfp_path + '/supp_data/USCBrain_grayord_labels.mat' )['labels'].squeeze() corr_pval_fdr = sp.zeros(num_vert) _, corr_pval_fdr[labs > 0] = fdrcorrection(corr_pval[labs > 0]) return corr_pval, corr_pval_fdr
def corr_pearson_fdr(X_pairs, Y_pairs, reg_var, num_sub, nperm=1000): # X: nsub x vertices # Y: cognitive scores nsub X 1 X, _, _ = normalizeData(X_pairs) Y, _, _ = normalizeData(Y_pairs[:, None]) num_vert = X.shape[1] corr_pval = np.zeros(num_vert) for ind in tqdm(range(num_vert)): _, corr_pval[ind] = sp.stats.pearsonr(X[:, ind], Y.squeeze()) corr_pval[np.isnan(corr_pval)] = .5 _, corr_pval_fdr = fdrcorrection(corr_pval) return corr_pval_fdr, corr_pval
def pairsdist_regression(bfp_path, sub_files, reg_var, num_perm=1000, num_pairs=0, len_time=235): """ Perform regression stats based on square distance between random pairs """ # Get the number of vertices from a file num_vert = spio.loadmat(sub_files[0])['dtseries'].shape[0] num_sub = len(sub_files) # Allocate memory for subject data sub_data = np.zeros(shape=(len_time, num_vert, num_sub)) # Generate random pairs print('Reading subjects') for subno, filename in enumerate(tqdm(sub_files)): data = spio.loadmat(filename)['dtseries'].T sub_data[:, :, subno], _, _ = normalizeData(data[:len_time, :]) pairs = list(itertools.combinations(range(num_sub), r=2)) if num_pairs > 0: rn = np.random.permutation(len(pairs)) pairs = [pairs[i] for i in rn] pairs = pairs[:num_pairs] fmri_diff = sp.zeros((num_vert, len(pairs))) regvar_diff = sp.zeros(len(pairs)) print('Computing pairwise differences') for pn, pair in enumerate(tqdm(pairs)): Y2, _ = brainSync(X=sub_data[:, :, pair[0]], Y=sub_data[:, :, pair[1]]) fmri_diff[:, pn] = np.sum((Y2 - sub_data[:, :, pair[0]])**2, axis=0) regvar_diff[pn] = (reg_var[pair[0]] - reg_var[pair[1]])**2 corr_pval = corr_perm_test(X=fmri_diff.T, Y=regvar_diff) # corr_pval = sp.zeros(num_vert) # for ind in tqdm(range(num_vert)): # _, corr_pval[ind] = sp.stats.pearsonr(fmri_diff[ind, :], regvar_diff) # corr_pval[sp.isnan(corr_pval)] = .5 # labs = spio.loadmat( bfp_path + '/supp_data/USCBrain_grayordinate_labels.mat')['labels'].squeeze() labs[sp.isnan(labs)] = 0 corr_pval[labs == 0] = 0.5 corr_pval_fdr = 0.5 * sp.ones(num_vert) _, corr_pval_fdr[labs > 0] = fdrcorrection(corr_pval[labs > 0]) return corr_pval, corr_pval_fdr
def main(): studydir = '/ImagePTE1/ajoshi/fitbir/preproc/maryland_rao_v1' epi_txt = '/ImagePTE1/ajoshi/fitbir/preproc/maryland_rao_v1_epilepsy_imgs.txt' nonepi_txt = '/ImagePTE1/ajoshi/fitbir/preproc/maryland_rao_v1_nonepilepsy_imgs_37.txt' with open(epi_txt) as f: epiIds = f.readlines() with open(nonepi_txt) as f: nonepiIds = f.readlines() epiIds = list(map(lambda x: x.strip(), epiIds)) nonepiIds = list(map(lambda x: x.strip(), nonepiIds)) epi_files = list() nonepi_files = list() for sub in epiIds: fname = os.path.join(studydir, sub, 'BFP', sub, 'func', sub + '_rest_bold.32k.GOrd.mat') if os.path.isfile(fname): epi_files.append(fname) for sub in nonepiIds: fname = os.path.join(studydir, sub, 'BFP', sub, 'func', sub + '_rest_bold.32k.GOrd.mat') if os.path.isfile(fname): nonepi_files.append(fname) epi_data = load_bfp_data(epi_files, 171) nonepi_data = load_bfp_data(nonepi_files, 171) t = time.time() X2, Os, Costdif, TotalError = groupBrainSync(nonepi_data) elapsed = time.time() - t np.savez('grp_atlas2.npz', X2=X2, Os=Os) atlas_data, _, _ = normalizeData(np.mean(X2, axis=1)) np.savez('grp_atlas.npz', atlas_data=atlas_data) # Do Pointwise stats # pointwise_stats(epi_data, nonepi_data) vis_grayord_sigcorr(pval, rval, cf.outname, cf.out_dir, int(cf.smooth_iter), cf.save_surfaces, cf.save_figures, 'True') print('done')
def lin_reg(bfp_path, ref_atlas, sub_files, reg_var, Vndim=235, Sndim=20, len_time=235): """ Perform regression stats based on distance to atlas """ num_vert = ref_atlas.shape[1] num_sub = len(sub_files) a = spio.loadmat(bfp_path + '/supp_data/USCBrain_grayord_labels.mat') labs = a['labels'].squeeze() labs[sp.isnan(labs)] = 0 print('Computing PCA basis function from the atlas') pca = PCA(n_components=Vndim) pca.fit(ref_atlas.T) reduced_data = sp.zeros((Vndim, num_vert, num_sub)) for ind in tqdm(range(num_sub)): sub_data = spio.loadmat(sub_files[ind])['dtseries'].T sub_data, _, _ = normalizeData(sub_data[:len_time, :]) Y2, _ = brainSync(X=ref_atlas, Y=sub_data) if Vndim == len_time: reduced_data[:, :, ind] = sub_data else: reduced_data[:, :, ind] = pca.transform(Y2.T).T pval_linreg = sp.zeros(num_vert) pca = PCA(n_components=Sndim) for vrt in tqdm(range(num_vert)): X = reduced_data[:, vrt, :] if Sndim != num_sub: pca.fit(X.T) X = pca.transform(X.T).T X = sm.add_constant(X.T) est = sm.OLS(reg_var, X) pval_linreg[vrt] = est.fit().f_pvalue print('Regression is done') pval_linreg[sp.isnan(pval_linreg)] = .5 pval_linreg_fdr = sp.zeros(num_vert) _, pv = fdrcorrection(pval_linreg[labs > 0]) pval_linreg_fdr[labs > 0] = pv return pval_linreg, pval_linreg_fdr
def get_connectivity(data, labels, label_ids): # compute adj matrix if type(data) == str: df = spio.loadmat(data) data = df['dtseries'].T num_time = data.shape[0] num_rois = len(label_ids) rtseries = np.zeros((aug_times, num_time, num_rois)) # 171x16/ 95 /158 partial_corrM = np.zeros((aug_times, num_rois, num_rois)) conn = np.zeros((aug_times, num_rois, num_rois)) for k in range(aug_times): for i, id in enumerate(label_ids): ##============================ idx = labels == id data_within_roi = data[:, idx] num_voxels = data_within_roi.shape[1] if num_voxels < 3: rtseries[k, :, i] = np.mean(data_within_roi, axis=1) else: selected_voxels_id = sorted( random.sample(range(1, num_voxels), num_voxels // 3)) rtseries[k, :, i] = np.mean(data_within_roi[:, selected_voxels_id], axis=1) # print(idx.shape, data_within_roi.shape, len(selected_voxels_id)) # pdb.set_trace() rtseries[k], _, _ = normalizeData(rtseries[k]) ##================================================================## partial_measure = ConnectivityMeasure(kind='partial correlation') partial_corrM[k] = partial_measure.fit_transform([rtseries[k]])[0] ##================================================================## conn[k] = np.corrcoef(rtseries[k].T) conn[~np.isfinite( conn)] = 0 # define the infinite value edges as no connection ##===================Added===========================## for i in range(conn[k].shape[0]): conn[k, i, i] = 1.0 for j in range(conn[k].shape[1]): conn[k, i, j] = conn[k, j, i] ##================## ## the adjacency matrix here is not binary. we use the correlation coefficient directly. #print(conn.shape, rtseries.T.shape) return conn, partial_corrM, np.transpose( rtseries, (0, 2, 1)) # 16x171, ROI/Node. 16*16 for conn
def pair_dist_simulation(rand_pair, sub_files, sub_data=[], reg_var=[], len_time=235, roi=[]): """ Pair distance """ # normalize the clinical variable reg_var_norm, _, _ = normalizeData(reg_var) roi_ind, _ = np.where(roi) noise_data = (reg_var_norm - np.min(reg_var_norm)) * np.random.normal( size=(len(roi_ind), len_time, len(reg_var))) sub_data = np.array(sub_data) if sub_data.size > 0: sub1_data = sub_data[:, :, rand_pair[0]] sub2_data = sub_data[:, :, rand_pair[1]] sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) sub1_data += noise_data[:, :, rand_pair[0]] sub2_data += noise_data[:, :, rand_pair[1]] sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) else: sub1_data = spio.loadmat(sub_files[rand_pair[0]])['dtseries'].T sub2_data = spio.loadmat(sub_files[rand_pair[1]])['dtseries'].T sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) sub1_data[:len_time, roi_ind] += noise_data[:, :, rand_pair[0]].T sub2_data[:len_time, roi_ind] += noise_data[:, :, rand_pair[1]].T sub1_data, _, _ = normalizeData(sub1_data[:len_time, :]) sub2_data, _, _ = normalizeData(sub2_data[:len_time, :]) sub2_data, _ = brainSync(X=sub1_data, Y=sub2_data) fmri_diff = sp.sum((sub2_data - sub1_data)**2, axis=0) # Returns SQUARE of the distance if len(reg_var) > 0: regvar_diff = sp.square(reg_var[rand_pair[0]] - reg_var[rand_pair[1]]) return fmri_diff, regvar_diff else: return fmri_diff
def load_bfp_dataT_dist2atlas(sub_fname, atlas_fname, LenTime, matchT): ''' sub_fname: list of filenames of .mat files that contains Time x Vertex matrix of subjects' preprocessed fMRI data ''' ''' LenTime: number of timepoints in data. this should be the same in all subjects ''' ''' Outputs 3D matrix: Time x Vector x Subjects ''' count1 = 0 subN = len(sub_fname) print('loading data for ' + str(subN) + ' subjects') pbar = tqdm(total=subN) numT = np.zeros(subN) atlas_data = spio.loadmat(atlas_fname) atlas = atlas_data['atlas_data'] subTest_diff = np.zeros((atlas.shape[1], subN)) for ind in range(subN): fname = sub_fname[ind] df = spio.loadmat(fname) data = df['dtseries'].T numT[ind] = data.shape[0] if int(data.shape[0]) != LenTime: if bool(matchT) == True: t = int(LenTime - numT[ind]) v = data.shape[1] temp = np.zeros((t, v)) data = np.concatenate((data, temp)) else: print(sub_fname[ind] + ' does not have the correct number of timepoints') d, _, _ = normalizeData(data) syn_data, _ = brainSync(X=atlas, Y=d) subTest_diff[:, ind], _ = dist2atlas_sub(atlas, syn_data) count1 += 1 pbar.update(1) if count1 == subN: break pbar.close() print('loaded data for ' + str(subN) + ' subjects') return subTest_diff, numT
def load_bfp_dataT(sub_fname, LenTime, matchT): ''' sub_fname: list of filenames of .mat files that contains Time x Vertex matrix of subjects' preprocessed fMRI data ''' ''' LenTime: number of timepoints in data. this should be the same in all subjects ''' ''' Outputs 3D matrix: Time x Vector x Subjects ''' count1 = 0 subN = len(sub_fname) print('loading data for ' + str(subN) + ' subjects') pbar = tqdm(total=subN) numT = np.zeros(subN) for ind in range(subN): fname = sub_fname[ind] df = spio.loadmat(fname) data = df['dtseries'].T numT[ind] = data.shape[0] if int(data.shape[0]) != LenTime: if matchT == 'True': t = int(LenTime - numT[ind]) v = data.shape[1] temp = np.zeros((t, v)) data = np.concatenate((data, temp)) else: print(sub_fname[ind] + ' does not have the correct number of timepoints') d, _, _ = normalizeData(data) if count1 == 0: sub_data = np.zeros((LenTime, d.shape[1], subN)) sub_data[:, :, count1] = d[:LenTime, ] count1 += 1 pbar.update(1) if count1 == subN: break pbar.close() print('loaded data for ' + str(subN) + ' subjects') return sub_data, numT
def get_connectivity(fname, labels, label_ids): #%% df = spio.loadmat(fname) data = df['dtseries'].T num_time = data.shape[0] num_rois = len(label_ids) rtseries = np.zeros((num_time, num_rois)) for i, id in enumerate(label_ids): print(id) idx = labels == id rtseries[:, i] = np.mean(data[:, idx], axis=1) rtseries, _, _ = normalizeData(rtseries) conn = np.corrcoef(rtseries.T) return conn
sub = lst[0] data = scipy.io.loadmat( os.path.join(p_dir, sub, sub + '.rfMRI_REST2_LR.\ reduce3.ftdata.NLM_11N_hvar_25.mat')) LR_flag = msk['LR_flag'] LR_flag = np.squeeze(LR_flag) != 0 data = data['ftdata_NLM'] temp = data[LR_flag, :] d2 = temp.T ind = 0 IntV = range(10, 1200, 10) rms = sp.zeros(len(IntV)) for len1 in IntV: sub_data1, _, _ = normalizeData(d1[:len1, :]) sub_data2, _, _ = normalizeData(d2[:len1, :]) s = sp.std(sub_data2, axis=0) sub_data1 = sub_data1[:, s > 1e-2] sub_data2 = sub_data2[:, s > 1e-2] sub_data2_sync, Rot = brainSync(X=sub_data1, Y=sub_data2) rms[ind] = sp.linalg.norm(sub_data2_sync - sub_data1) / sp.sqrt( sp.linalg.norm(sub_data2_sync)**2 + sp.linalg.norm(sub_data1)**2) ind += 1 print len1, ':', rms[ind - 1] plt.plot(IntV, rms) plt.ylim(ymax=0.7, ymin=0.30) plt.savefig('sync_vs_len_same_sub2.pdf') plt.show()
dfs_right_sm = readdfs( os.path.join(p_dir_ref, 'reference', ref + '.aparc\ .a2009s.32k_fs.reduce3.very_smooth.right.dfs')) sub = lst[0] # dat = scipy.io.loadmat('/big_disk/ajoshi/with_andrew/100307/100307.\ # tfMRI_MOTOR_LR.reduce3.ftdata.NLM_11N_hvar_5.mat') #fmotor = dat['ftdata_NLM'].T # fmotor,_,_=normalizeData(fmotor) dat = spio.loadmat('/big_disk/ajoshi/with_andrew/100307/100307.\ rfMRI_REST1_RL.reduce3.ftdata.NLM_11N_hvar_5.mat') fmotor = dat['ftdata_NLM'].T fmotor = fmotor[:284, :] fmotor, _, _ = normalizeData(fmotor) dat = spio.loadmat('/big_disk/ajoshi/with_andrew/100307/100307.\ rfMRI_REST1_LR.reduce3.ftdata.NLM_11N_hvar_5.mat') frest = dat['ftdata_NLM'].T frest = frest[:fmotor.shape[0], :] frest, _, _ = normalizeData(frest) diffbefore = fmotor - frest fmotor, _ = brainSync(frest, fmotor) diffafter = fmotor - frest plt.imshow(sp.absolute(diffbefore), aspect='auto', clim=(0, 0.1))
os.path.join(p_dir_ref, 'reference', ref + '.aparc.a2009s.32k_fs.reduce3.very_smooth.left.dfs')) count1 = 0 roilist = [30, 72, 9, 47] #pc #ref=lst[11] datasub = scipy.io.loadmat( os.path.join(p_dir, sub, sub + '.rfMRI_REST2_RL.reduce3.ftdata.NLM_11N_hvar_25.mat')) dataref = scipy.io.loadmat( os.path.join(p_dir, ref, ref + '.rfMRI_REST1_RL.reduce3.ftdata.NLM_11N_hvar_25.mat')) LR_flag = msk['LR_flag'] LR_flag = np.squeeze(LR_flag) > 0 data = dataref['ftdata_NLM'] sub1, _, _ = normalizeData(data[LR_flag, :].T) #sub1 = sub1.T data = datasub['ftdata_NLM'] sub2, _, _ = normalizeData(data[LR_flag, :].T) #sub2 = sub2.T msk_small_region = np.in1d(dfs_left.labels, roilist) # msk_small_region = (dfs_left.labels == 30) | (dfs_left.labels == 72) | (dfs_left.labels == 9) | (dfs_left.labels == 47) # % motor d = sub1[:, msk_small_region] ref_mean_pc = sp.mean(d, axis=1) ref_mean_pc = ref_mean_pc - sp.mean(ref_mean_pc) ref_mean_pc = ref_mean_pc / (sp.std(ref_mean_pc)) d = sub2[:, msk_small_region]
BFPPATH = '/home/ajoshi/coding_ground/bfp/supp_data' # Read Reference parcellation refLeft = readdfs(os.path.join(BFPPATH, 'bci32kleft.dfs')) refRight = readdfs(os.path.join(BFPPATH, 'bci32kright.dfs')) # nullsubDir = '/deneb_disk/Beijing_Zhang_bfp/' lst = glob.glob(nullsubDir+'*LB40.mat') nsub = 50#len(lst) #ids={'sn8133','sn4055','tr4277','sn7915','sn5895','sn7602','sn6012','tr3170','sn6594','sn7256','sub05267','sub06880'}; vsub = sp.io.loadmat('/deneb_disk/from_Todd_Constable_Epilepsy_Processed\ /sn7915/func/sn7915_rest_bold.32k.GOrd_LB40.mat') vsub, _, _ = normalizeData(vsub['dtseries'].T) print("There are %d subjects" % nsub) print("Reading the subject data") # %% for ind1 in range(nsub): vrest = sp.io.loadmat(os.path.join(nullsubDir, lst[ind1])) vrest = vrest['dtseries'].T if ind1 == 0: vrest_subs = sp.zeros([vsub.shape[0], vrest.shape[1], nsub]) vrest = vrest[:vsub.shape[0], :] vrest_subs[:, :, ind1], _, _ = normalizeData(vrest) # print(ind1, end=' ') print ind1,
def randpair_groupdiff_ftest(sub_grp1_files, sub_grp2_files, num_pairs, len_time=255): print('Grp diff using f-test and brainsync') num_vert = spio.loadmat(sub_grp1_files[0])['dtseries'].shape[0] print('Generating random pairs from group 1') pairs_grp1, num_pairs1 = gen_rand_pairs(num_sub=len(sub_grp1_files), num_pairs=num_pairs) fmri_diff1 = sp.zeros((num_vert, num_pairs1)) # Preload data This only slighly faster, better is to load on the fly and multiprocess print('Reading data for group 1') sub_data1 = np.zeros((len_time, num_vert, len(sub_grp1_files))) for i, fname in enumerate(tqdm(sub_grp1_files)): sub1_data = spio.loadmat(fname)['dtseries'][:, :len_time].T sub_data1[:, :, i], _, _ = normalizeData(sub1_data) print('Compute differences in fMRI of random pairs from group 1') for i, rand_pair in enumerate(tqdm(pairs_grp1)): fmri_diff1[:, i] = pair_dist(rand_pair=rand_pair, sub_files=sub_grp1_files, sub_data=sub_data1, len_time=len_time) S1 = 0.5 * np.mean(fmri_diff1, axis=1) print('Generating random pairs from group 2') pairs_grp2, num_pairs2 = gen_rand_pairs(num_sub=len(sub_grp2_files), num_pairs=num_pairs) fmri_diff2 = sp.zeros((num_vert, num_pairs2)) # Preload data for group 2 print('Reading data for group 2') sub_data2 = np.zeros((len_time, num_vert, len(sub_grp2_files))) for i, fname in enumerate(tqdm(sub_grp2_files)): sub2_data = spio.loadmat(fname)['dtseries'][:, :len_time].T sub_data2[:, :, i], _, _ = normalizeData(sub2_data) print('Compute differences in fMRI of random pairs from group 2') for i, rand_pair in enumerate(tqdm(pairs_grp2)): fmri_diff2[:, i] = pair_dist(rand_pair=rand_pair, sub_files=sub_grp2_files, sub_data=sub_data2, len_time=len_time) S2 = 0.5 * np.mean(fmri_diff2, axis=1) # We will perform f-test test (modified in a pairwise stats) # n1 = sub_data1.shape[2] * len_time n2 = sub_data2.shape[2] * len_time F = S1 / (S2 + 1e-16) pval = 1 - ss.f.cdf(F, n1 - 1, n2 - 1) return F, pval
def kernel_regression_ftest_permutation(bfp_path, sub_files, reg_var, nperm=100, len_time=235, num_proc=4, fdr_test=False, simulation=False): """ and Kernel Regression """ if simulation: # added for simulation labs = spio.loadmat( '/ImagePTE1/ajoshi/code_farm/bfp/supp_data/USCLobes_grayordinate_labels.mat' )['labels'] roi = (labs == 200) # R. Parietal Lobe # Normalize the variable reg_var, _, _ = normalizeData(reg_var) # Get the number of vertices from a file num_vert = spio.loadmat(sub_files[0])['dtseries'].shape[0] num_sub = len(sub_files) pairs = np.array(list(itertools.combinations(range(num_sub), r=2))) num_pairs = len(pairs) fmri_diff = np.zeros((num_vert, num_pairs)) regvar_diff = np.zeros(num_pairs) if simulation: pairdistfunc = partial(pair_dist_simulation, roi=roi) else: pairdistfunc = pair_dist if num_proc > 1: pool = Pool(num_proc) results = pool.imap( partial(pairdistfunc, sub_files=sub_files, reg_var=reg_var, len_time=len_time), pairs) ind = 0 for res in results: fmri_diff[:, ind] = res[0] regvar_diff[ind] = res[1] ind += 1 else: for ind in tqdm(range(len(pairs))): fmri_diff[:, ind], regvar_diff[ind] = pairdistfunc( sub_files=sub_files, reg_var=reg_var, len_time=len_time, rand_pair=pairs[ind]) kr = KRR(kernel='precomputed', alpha=0.1) D = np.zeros((num_sub, num_sub)) pval_kr_ftest = np.zeros(num_vert) gamma = 2.6 #2 #5 # checked by brute force #5 gives a lot of significance # bandwidth for RBF rho = np.zeros(num_vert) res = np.zeros(num_vert) null_res = np.zeros(num_vert) #reg_var = np.random.permutation(reg_var) for v in tqdm(range(num_vert)): D = np.zeros((num_sub, num_sub)) D[pairs[:, 0], pairs[:, 1]] = fmri_diff[v, :] D = D + D.T # make it symmetric D = np.exp(-gamma * D) #D = (2-D)/2 # Do this in a split train test split kr = KRR(kernel='precomputed', alpha=0.1) kr.fit(D, reg_var) pred_v = kr.predict(D) rho[v] = np.corrcoef(pred_v, reg_var)[0, 1] res[v] = np.mean((pred_v - reg_var)**2) for p in range(nperm): reg_var_null = np.random.permutation(reg_var) kr = KRR(kernel='precomputed', alpha=0.1) kr.fit(D, reg_var_null) pred_v_null = kr.predict(D) null_res[v] += np.sum((pred_v_null - reg_var_null)**2) null_res[v] = null_res[v] / (nperm * num_sub) #null_res[v] = np.mean(reg_var**2) print('Doing f test') for v in tqdm(range(num_vert)): Fstat = res[v] / null_res[v] pval_kr_ftest[v] = f.cdf(Fstat, num_sub - 1, num_sub - 1) _, pval_kr_ftest_fdr = fdrcorrection(pval_kr_ftest) #Fstat = np.mean((pred_v-reg_var)**2) / \ # np.mean((pred_var_null-reg_var)**2) #pval_kr[v] = f.cdf(Fstat, num_sub-1, num_sub-1) return pval_kr_ftest, pval_kr_ftest_fdr
def kernel_regression_choose_gamma(bfp_path, sub_files, reg_var, nperm=1000, len_time=235, num_proc=4, fdr_test=False): """ Choose gamma for Kernel Regression """ # Normalize the variable reg_var, _, _ = normalizeData(reg_var) # Get the number of vertices from a file num_vert = spio.loadmat(sub_files[0])['dtseries'].shape[0] num_sub = len(sub_files) pairs = np.array(list(itertools.combinations(range(num_sub), r=2))) num_pairs = len(pairs) fmri_diff = np.zeros((num_vert, num_pairs)) regvar_diff = np.zeros(num_pairs) # added for simulation labs = spio.loadmat( '/ImagePTE1/ajoshi/code_farm/bfp/supp_data/USCLobes_grayordinate_labels.mat')['labels'] roi = (labs == 200) # R. Parietal Lobe pairdistfunc = pair_dist_simulation if num_proc > 1: pool = Pool(num_proc) results = pool.imap( partial(pairdistfunc, sub_files=sub_files, reg_var=reg_var, len_time=len_time, roi=roi), pairs) ind = 0 for res in results: fmri_diff[:, ind] = res[0] regvar_diff[ind] = res[1] ind += 1 else: for ind in tqdm(range(len(pairs))): fmri_diff[:, ind], regvar_diff[ind] = pairdistfunc( sub_files=sub_files, reg_var=reg_var, len_time=len_time, rand_pair=pairs[ind],roi=roi) kr = KRR(kernel='precomputed') #, alpha=1.1) D = np.zeros((num_sub, num_sub)) pval_kr = np.zeros(num_vert) #5 # checked by brute force #5 gives a lot of significance # bandwidth for RBF nperm = 50 rho = np.zeros(num_vert) num_sub_val = 5 gamma_values = np.arange(1e-8,.2,.01)#np.arange(1e-8,15,.1) rho_all=np.zeros(len(gamma_values)) roi_ind, _ = np.where(roi) gamma = 2.6 for i, alpha in enumerate(gamma_values): for v in roi_ind[::5]: #range(0,num_vert,100): D = np.zeros((num_sub, num_sub)) D[pairs[:, 0], pairs[:, 1]] = fmri_diff[v, :] D = D+D.T # make it symmetric D = np.exp(-gamma * D) # Do this in a split train test split D_train = D[:num_sub-num_sub_val,:num_sub-num_sub_val] kr = KRR(kernel='precomputed', alpha=alpha) kr.fit(D_train, reg_var[:num_sub-num_sub_val]) D_val = D[num_sub-num_sub_val:,:num_sub-num_sub_val] pred_v = kr.predict(D_val) if np.var(pred_v)<1e-6: rho[v] = 0 else: rho[v] = np.corrcoef(pred_v,reg_var[num_sub-num_sub_val:])[0,1] rho_all[i]=np.mean(rho) print(alpha,np.mean(rho)) print(np.argmax(rho_all), gamma_values[np.argmax(rho_all)]) return gamma_values[np.argmax(rho_all)]
BFPPATH = '/big_disk/ajoshi/coding_ground/bfp' BrainSuitePath = '/home/ajoshi/BrainSuite17a/svreg' NCMP = 51 surfObj = readdfs(join(BFPPATH, 'supp_data', 'bci32kright.dfs')) numVert = len(surfObj.vertices) #sub1n='/big_disk/ajoshi/HCP100/HCP100/135932/MNINonLinear/Results/rfMRI_REST1_LR/rfMRI_REST1_LR_Atlas_hp2000_clean.dtseries.nii'; sub1n = '/big_disk/ajoshi/with_andrew/100307/100307.rfMRI_REST1_LR.reduce3.ftdata.hvar_0.mat' sub1n_tsk = '/big_disk/ajoshi/with_andrew/100307/100307.tfMRI_MOTOR_LR.reduce3.ftdata.hvar_0.mat' #sub1n = '/deneb_disk/HCP/196750/MNINonLinear/Results/rfMRI_REST1_LR/rfMRI_REST1_LR_Atlas_hp2000_clean.dtseries.nii' #sub1n_tsk = '/deneb_disk/HCP/196750/MNINonLinear/Results/tfMRI_MOTOR_LR/tfMRI_MOTOR_LR_Atlas.dtseries.nii' X = spio.loadmat(sub1n) X = X['ftdata'] X, _, _ = normalizeData(X.T) #sub1 = nilearn.image.load_img(sub1n) #sub1 = nib.load(sub1n) #X = sub1.get_data().T Xtsk = spio.loadmat(sub1n_tsk) Xtsk = Xtsk['ftdata'] Xtsk, _, _ = normalizeData(Xtsk.T) #sub1tsk = nib.cifti2.cifti2.load(sub1n_tsk) # %% Explained variance _, s, _ = np.linalg.svd(np.dot(X, X.T)) plt.figure() plt.plot(s[:50])
if int(dx) == 3: adhdInattentive.append(sub) print(sub, dx, qc) normSubOrig = normSub #%% Read Normal Subjects normSub = normSub[:50] count1 = 0 for sub in normSub: fname = os.path.join(p_dir, sub + '_rest_bold.32k.GOrd.mat') df = spio.loadmat(fname) data = df['dtseries'].T d, _, _ = normalizeData(data) if count1 == 0: sub_data = sp.zeros((235, d.shape[1], len(normSub))) sub_data[:, :, count1] = d[:235, ] count1 += 1 print(count1, ) if count1 == 50: break #%% Create Average atlas by synchronizing everyones data to one subject atlas = 0 q = 3 nSub = len(normSub) for ind in range(nSub):
for ind in range(NSUB): sub = lst[ind] data = spio.loadmat( os.path.join( p_dir, sub, sub + '.rfMRI_REST1_LR.\ reduce3.ftdata.NLM_11N_hvar_25.mat')) LR_flag = msk['LR_flag'] LR_flag = np.squeeze(LR_flag) != 0 data = data['ftdata_NLM'] # temp = data[LR_flag, :] # m = np.mean(temp, 1) # temp = temp - m[:, None] # s = np.std(temp, 1)+1e-16 # temp = temp/s[:, None] # temp = temp[:, :d1.shape[0]] d2, _, _ = normalizeData(data.T) d2, _ = brainSync(dr, d2) meanData = meanData + d2 print(ind, end=',') # %% Do the PCA np.savez('mean_data_filt.npz', meanData=meanData) p = PCA(n_components=NCMP) D = p.fit_transform(meanData.T).T # %% Explained variance _, s, _ = np.linalg.svd(np.dot(meanData, meanData.T)) plt.figure() plt.plot(s[:50]) plt.title('sigma plot')
os.path.join(p_dir_ref, 'reference', ref + '.aparc.\ a2009s.32k_fs.reduce3.very_smooth.left.dfs')) nSub = 0 avgCorrL = 0 avgCorrR = 0 # Read all the data for sub in lst: data = scipy.io.loadmat( os.path.join( p_dir, sub, sub + '.rfMRI_REST1_LR.\ reduce3.ftdata.NLM_11N_hvar_25.mat')) data = data['ftdata_NLM'] sub1L, _, _ = normalizeData(data[~LR_flag, :].T) sub1R, _, _ = normalizeData(data[LR_flag, :].T) data = scipy.io.loadmat( os.path.join( p_dir, sub, sub + '.rfMRI_REST2_LR.\ reduce3.ftdata.NLM_11N_hvar_25.mat')) data = data['ftdata_NLM'] sub2L, _, _ = normalizeData(data[~LR_flag, :].T) sub2R, _, _ = normalizeData(data[LR_flag, :].T) _, R = brainSync(X=sub1L, Y=sub2L) avgCorrL += sp.sum(sub1L * sp.dot(R, sub2L), axis=0) avgCorrR += sp.sum(sub1R * sp.dot(R, sub2R), axis=0) nSub += 1 print nSub,