def plscorr_eval(train_fmri_ts, train_feat_ts, val_fmri_ts, val_feat_ts, out_dir, mask_file): """Compute PLS correlation between brain activity and CNN activation.""" train_feat_ts = train_feat_ts.reshape(-1, train_feat_ts.shape[3]).T val_feat_ts = val_feat_ts.reshape(-1, val_feat_ts.shape[3]).T train_fmri_ts = train_fmri_ts.T val_fmri_ts = val_fmri_ts.T # Iteration loop for different component number #for n in range(5, 19): # print '--- Components number %s ---' %(n) # plsca = PLSCanonical(n_components=n) # plsca.fit(train_feat_ts, train_fmri_ts) # pred_feat_c, pred_fmri_c = plsca.transform(val_feat_ts, val_fmri_ts) # pred_fmri_ts = plsca.predict(val_feat_ts) # # calculate correlation coefficient between truth and prediction # r = corr2_coef(val_fmri_ts.T, pred_fmri_ts.T, mode='pair') # # get top 20% corrcoef for model evaluation # vsample = int(np.rint(0.2*len(r))) # print 'Sample size for evaluation : %s' % (vsample) # r.sort() # meanr = np.mean(r[-1*vsample:]) # print 'Mean prediction corrcoef : %s' %(meanr) # model generation based on optimized CC number cc_num = 10 plsca = PLSCanonical(n_components=cc_num) plsca.fit(train_feat_ts, train_fmri_ts) from sklearn.externals import joblib joblib.dump(plsca, os.path.join(out_dir, 'plsca_model.pkl')) plsca = joblib.load(os.path.join(out_dir, 'plsca_model.pkl')) # calculate correlation coefficient between truth and prediction pred_fmri_ts = plsca.predict(val_feat_ts) fmri_pred_r = corr2_coef(val_fmri_ts.T, pred_fmri_ts.T, mode='pair') mask = vutil.data_swap(mask_file) vxl_idx = np.nonzero(mask.flatten() == 1)[0] tmp = np.zeros_like(mask.flatten(), dtype=np.float64) tmp[vxl_idx] = fmri_pred_r tmp = tmp.reshape(mask.shape) vutil.save2nifti(tmp, os.path.join(out_dir, 'pred_fmri_r.nii.gz')) pred_feat_ts = pls_y_pred_x(plsca, val_fmri_ts) pred_feat_ts = pred_feat_ts.T.reshape(96, 14, 14, 540) np.save(os.path.join(out_dir, 'pred_feat.npy'), pred_feat_ts) # get PLS-CCA weights feat_cc, fmri_cc = plsca.transform(train_feat_ts, train_fmri_ts) np.save(os.path.join(out_dir, 'feat_cc.npy'), feat_cc) np.save(os.path.join(out_dir, 'fmri_cc.npy'), fmri_cc) feat_weight = plsca.x_weights_.reshape(96, 14, 14, cc_num) #feat_weight = plsca.x_weights_.reshape(96, 11, 11, cc_num) fmri_weight = plsca.y_weights_ np.save(os.path.join(out_dir, 'feat_weights.npy'), feat_weight) np.save(os.path.join(out_dir, 'fmri_weights.npy'), fmri_weight) fmri_orig_ccs = get_pls_components(plsca.y_scores_, plsca.y_loadings_) np.save(os.path.join(out_dir, 'fmri_orig_ccs.npy'), fmri_orig_ccs)
def reg_cca(train_fmri_ts, train_feat_ts, val_fmri_ts, val_feat_ts, out_dir): """Conduct CCA between brain activity and CNN activation.""" train_feat_ts = train_feat_ts.reshape(-1, train_feat_ts.shape[3]).T val_feat_ts = val_feat_ts.reshape(-1, val_feat_ts.shape[3]).T train_fmri_ts = train_fmri_ts.T val_fmri_ts = val_fmri_ts.T #-- model training # for reduce complexity, a linear kernel is used #cca = rcca.CCACrossValidate(numCCs=[7, 8, 9, 10, 11, 12, 13], # kernelcca=True) CCnum = 7 #cca = rcca.CCA(kernelcca=True, reg=0.007743, numCC=CCnum) #cca.train([train_feat_ts, train_fmri_ts]) #cca.validate([val_feat_ts, val_fmri_ts]) #cca.compute_ev([val_feat_ts, val_fmri_ts]) #print 'Best CC number : %s' %(cca.best_numCC) #print 'Best reg : %s' %(cca.best_reg) out_file = os.path.join(out_dir, 'CCA_results_%s.hdf5' % (CCnum)) #cca.save(os.path.join(out_file)) #-- model exploring mask_file = r'/Users/sealhuang/brainDecoding/S1_mask.nii.gz' cca = rcca.CCA() cca.load(out_file) # model prediction performance fmri_pred_r = cca.corrs[1] feat_pred_r = cca.corrs[0].reshape(96, 11, 11) vutil.plot_cca_fweights(feat_pred_r, out_dir, 'pred_feat_r_CC%s' % (CCnum)) mask = vutil.data_swap(mask_file) vxl_idx = np.nonzero(mask.flatten() == 1)[0] tmp = np.zeros_like(mask.flatten(), dtype=np.float64) tmp[vxl_idx] = fmri_pred_r tmp = tmp.reshape(mask.shape) vutil.save2nifti( tmp, os.path.join(out_dir, 'pred_fmri_r_CC%s.nii.gz' % (CCnum))) # model weights visualization feat_weights = cca.ws[0] feat_weights = feat_weights.reshape(96, 11, 11, feat_weights.shape[1]) fmri_weights = cca.ws[1] vutil.plot_cca_fweights(feat_weights, out_dir, 'feat_weight_CC%s' % (CCnum)) vutil.save_cca_volweights(fmri_weights, mask_file, out_dir, 'cca_component') feat_cc = cca.comps[0] parallel_corr2_coef(train_feat_ts.T, feat_cc.T, os.path.join(out_dir, 'feat_cc_corr.npy'), block_size=7, n_jobs=1) feat_cc_corr = np.load(os.path.join(out_dir, 'feat_cc_corr.npy')) feat_cc_corr = feat_cc_corr.reshape(96, 11, 11, 7) vutil.plot_cca_fweights(feat_cc_corr, out_dir, 'feat_cc_corr')
def inter_subj_cca(db_dir): """Inter-subject CCA to extract stimulus-related brain areas""" subj_num = 3 subjects = ['S1', 'S2', 'S3'] # training stack data tdata = [] # validation stack data vdata = [] for subj in subjects: subj_dir = os.path.join(db_dir, 'v%s' % subj) tf = tables.open_file(os.path.join(subj_dir, 'VoxelResponses.mat')) # generate mask train_fmri_ts = tf.get_node('/rt')[:] fmri_s = train_fmri_ts.sum(axis=1) non_nan_idx = np.nonzero(np.logical_not(np.isnan(fmri_s)))[0] mask_file = os.path.join(subj_dir, '%s_mask.nii.gz' % (subj)) mask = vutil.data_swap(mask_file).flatten() vxl_idx = np.nonzero(mask == 1)[0] vxl_idx = np.intersect1d(vxl_idx, non_nan_idx) #-- load fmri response zscore = lambda d: (d - d.mean(1, keepdims=True)) / d.std( 1, keepdims=True) train_ts = np.nan_to_num(zscore(np.nan_to_num(tf.get_node('/rt')[:]))) val_ts = np.nan_to_num(zscore(np.nan_to_num(tf.get_node('/rv')[:]))) # data.shape = (73728, 540/7200) print train_ts[vxl_idx].T.shape print val_ts[vxl_idx].T.shape tdata.append(train_ts[vxl_idx].T) vdata.append(val_ts[vxl_idx].T) # CCA regs = np.array(np.logspace(-4, 2, 10)) numCCs = np.arange(3, 6) cca = rcca.CCACrossValidate(numCCs=numCCs, regs=regs) cca.train(tdata) cca.validate(vdata) cca.compute_ev(vdata) cca.save('inter_subj_cca_results.hdf5')
#roi_file = os.path.join(subj_dir, 'S%s_small_roi.nii.gz'%(subj_id)) #vutil.roi2nifti(tf, roi_file, mode='small') #-- get mean fmri responses #dataset = 'rt' #mean_file = os.path.join(subj_dir, 'S%s_mean_%s.nii.gz'%(subj_id, dataset)) #vutil.gen_mean_vol(tf, dataset, mean_file) #-- create mask train_fmri_ts = tf.get_node('/rt')[:] # data.shape = (73728, 7200) # get non-nan voxel indexs fmri_s = train_fmri_ts.sum(axis=1) non_nan_idx = np.nonzero(np.logical_not(np.isnan(fmri_s)))[0] # create mask full_mask_file = os.path.join(subj_dir, 'S%s_mask.nii.gz' % (subj_id)) full_mask = vutil.data_swap(full_mask_file).flatten() full_vxl_idx = np.nonzero(full_mask == 1)[0] full_vxl_idx = np.intersect1d(full_vxl_idx, non_nan_idx) if phrase == 'test': mask_file = os.path.join(subj_dir, 'S%s_small_roi.nii.gz' % (subj_id)) mask = vutil.data_swap(mask_file).flatten() mask[mask > 1] = 0 mask[mask > 0] = 1 vxl_idx = np.nonzero(mask == 1)[0] vxl_idx = np.intersect1d(vxl_idx, non_nan_idx) else: vxl_idx = full_vxl_idx #-- load fmri response train_fmri_ts = tf.get_node('/rt')[:] #val_fmri_ts = tf.get_node('/rv')[:]