def test_correlation_nans(): row1 = 5 col = 10 row2 = 6 mat1 = prng.rand(row1, col).astype(np.float32) mat2 = prng.rand(row2, col).astype(np.float32) mat1[0, 0] = np.nan corr = compute_correlation(mat1, mat2, return_nans=False) assert np.all(corr == 0, axis=1)[0] assert np.sum(corr == 0) == row2 corr = compute_correlation(mat1, mat2, return_nans=True) assert np.all(np.isnan(corr), axis=1)[0] assert np.sum(np.isnan(corr)) == row2
def test_correlation_computation(): row1 = 5 col = 10 row2 = 6 mat1 = prng.rand(row1, col).astype(np.float32) mat2 = prng.rand(row2, col).astype(np.float32) corr = compute_correlation(mat1, mat1) expected_corr = np.corrcoef(mat1) assert np.allclose(corr, expected_corr, atol=1e-5), \ 'high performance correlation computation does not provide correct correlation results within the same set' corr = compute_correlation(mat1, mat2) mat = np.concatenate((mat1, mat2), axis=0) expected_corr = np.corrcoef(mat)[0:row1, row1:] assert np.allclose(corr, expected_corr, atol=1e-5), \ 'high performance correlation computation does not provide correct correlation results between two sets'
def time_segment_matching_accuracy(data, win_size=6): nsubjs = len(data) (ndim, nsample) = data[0].shape accu = np.zeros(shape=nsubjs) nseg = nsample - win_size # mysseg prediction prediction trn_data = np.zeros((ndim*win_size, nseg),order='f') # the trn data also include the tst data, but will be subtracted when # calculating A for m in range(nsubjs): for w in range(win_size): trn_data[w*ndim:(w+1)*ndim,:] += data[m][:,w:(w+nseg)] for tst_subj in range(nsubjs): tst_data = np.zeros((ndim*win_size, nseg),order='f') for w in range(win_size): tst_data[w*ndim:(w+1)*ndim,:] = data[tst_subj][:,w:(w+nseg)] A = np.nan_to_num(stats.zscore((trn_data - tst_data),axis=0, ddof=1)) B = np.nan_to_num(stats.zscore(tst_data,axis=0, ddof=1)) # compute correlation matrix corr_mtx = compute_correlation(B.T,A.T) for i in range(nseg): for j in range(nseg): if abs(i-j)<win_size and i != j : corr_mtx[i,j] = -np.inf max_idx = np.argmax(corr_mtx, axis=1) accu[tst_subj] = sum(max_idx == range(nseg)) / float(nseg) return accu
def get_conn_matrix(time_series, conn_model, NETWORK, ID, dir_path, thr): if conn_model == 'corr': conn_measure = ConnectivityMeasure(kind='correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] est_path = dir_path + '/' + ID + '_est_corr' + '_' + str(thr) + '.txt' elif conn_model == 'corr_fast': try: conn_matrix = compute_correlation(time_series,time_series) est_path = dir_path + '/' + ID + '_est_corr_fast' + '_' + str(thr) + '.txt' except RuntimeError: print('Cannot run accelerated correlation computation due to a missing dependency. You need brainiak installed!') elif conn_model == 'partcorr': conn_measure = ConnectivityMeasure(kind='partial correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] est_path = dir_path + '/' + ID + '_est_part_corr' + '_' + str(thr) + '.txt' elif conn_model == 'cov' or conn_model == 'sps': ##Fit estimator to matrix to get sparse matrix estimator = GraphLassoCV() try: print("Fitting Lasso estimator...") est = estimator.fit(time_series) except RuntimeError: print('Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...') #from sklearn.covariance import GraphLasso, empirical_covariance, shrunk_covariance #emp_cov = empirical_covariance(time_series) #for i in np.arange(0.8, 0.99, 0.01): #shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) #alphaRange = 10.0 ** np.arange(-8,0) #for alpha in alphaRange: #try: #estimator_shrunk = GraphLasso(alpha) #est=estimator_shrunk.fit(shrunk_cov) #print("Calculated graph-lasso covariance matrix for alpha=%s"%alpha) #break #except FloatingPointError: #print("Failed at alpha=%s"%alpha) #if estimator_shrunk == None: #pass #else: #break print('Unstable Lasso estimation. Try again!') sys.exit() if NETWORK != None: est_path = dir_path + '/' + ID + '_' + NETWORK + '_est%s'%('_sps_inv' if conn_model=='sps' else 'cov') + '_' + str(thr) + '.txt' else: est_path = dir_path + '/' + ID + '_est%s'%('_sps_inv' if conn_model=='sps' else 'cov') + '_' + str(thr) + '.txt' if conn_model == 'sps': try: conn_matrix = -estimator.precision_ except: conn_matrix = -estimator_shrunk.precision_ elif conn_model == 'cov': try: conn_matrix = estimator.covariance_ except: conn_matrix = estimator_shrunk.covariance_ np.savetxt(est_path, conn_matrix, delimiter='\t') return(conn_matrix, est_path)
def isfc(D, collapse_subj=True): """Intersubject functional correlation Computes the correlation between the timecoure of each voxel in each subject with the average of all other subjects' timecourses in *all* voxels. By default the result is averaged across subjects, unless collapse_subj is set to False. Uses the high performance compute_correlation routine from fcma.util Parameters ---------- D : voxel by time by subject ndarray fMRI data for which to compute ISFC collapse_subj : bool, default:True Whether to average across subjects before returning result Returns ------- ISFC : voxel by voxel ndarray (or voxel by voxel by subject ndarray, if collapse_subj=False) pearson correlation between all pairs of voxels, across subjects """ n_vox = D.shape[0] n_subj = D.shape[2] ISFC = np.zeros((n_vox, n_vox, n_subj)) # Loop across choice of leave-one-out subject for loo_subj in range(D.shape[2]): group = np.mean(D[:, :, np.arange(n_subj) != loo_subj], axis=2) subj = D[:, :, loo_subj] ISFC[:, :, loo_subj] = compute_correlation(group, subj) # Symmetrize matrix ISFC[:, :, loo_subj] = (ISFC[:, :, loo_subj] + ISFC[:, :, loo_subj].T) / 2 if collapse_subj: ISFC = np.mean(ISFC, axis=2) return ISFC
sys.exit(1) data_dir = sys.argv[1] extension = sys.argv[2] mask_file = sys.argv[3] epoch_file = sys.argv[4] images = dataset.load_images_from_dir(data_dir, extension) mask = dataset.load_boolean_mask(mask_file) conditions = dataset.load_labels(epoch_file) (raw_data, ) = image.multimask_images(images, (mask, )) epoch_info = generate_epochs_info(conditions) for idx, epoch in enumerate(epoch_info): label = epoch[0] sid = epoch[1] start = epoch[2] end = epoch[3] mat = raw_data[sid][:, start:end] mat = np.ascontiguousarray(mat, dtype=np.float32) logger.info( 'start to compute correlation for subject %d epoch %d with label %d' % (sid, idx, label)) corr = compute_correlation(mat, mat) mdict = {} mdict['corr'] = corr filename = str(label) + '_' + str(sid) + '_' + str(idx) logger.info('start to write the correlation matrix to disk as %s' % filename) scipy.io.savemat(filename, mdict)
def get_conn_matrix(time_series, conn_model): import warnings warnings.simplefilter("ignore") from nilearn.connectome import ConnectivityMeasure from sklearn.covariance import GraphLassoCV try: from brainiak.fcma.util import compute_correlation except ImportError: pass if conn_model == 'corr': # credit: nilearn print('\nComputing correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'corr_fast': # credit: brainiak try: print('\nComputing accelerated fcma correlation matrix...\n') conn_matrix = compute_correlation(time_series, time_series) except RuntimeError: print( 'Cannot run accelerated correlation computation due to a missing dependency. You need brainiak installed!' ) elif conn_model == 'partcorr': # credit: nilearn print('\nComputing partial correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='partial correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'tangent': # credit: nilearn print('\nComputing tangent matrix...\n') conn_measure = ConnectivityMeasure(kind='tangent') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'cov' or conn_model == 'sps': ##Fit estimator to matrix to get sparse matrix estimator = GraphLassoCV() try: print('\nComputing covariance...\n') estimator.fit(time_series) except: try: print( 'Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...' ) from sklearn.covariance import GraphLasso, empirical_covariance, shrunk_covariance emp_cov = empirical_covariance(time_series) for i in np.arange(0.8, 0.99, 0.01): shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) alphaRange = 10.0**np.arange(-8, 0) for alpha in alphaRange: try: estimator_shrunk = GraphLasso(alpha) estimator_shrunk.fit(shrunk_cov) print( "Calculated graph-lasso covariance matrix for alpha=%s" % alpha) break except FloatingPointError: print("Failed at alpha=%s" % alpha) if estimator_shrunk == None: pass else: break except: raise ValueError( 'Unstable Lasso estimation! Shrinkage failed.') if conn_model == 'sps': try: print( '\nFetching precision matrix from covariance estimator...\n' ) conn_matrix = -estimator.precision_ except: print( '\nFetching shrunk precision matrix from covariance estimator...\n' ) conn_matrix = -estimator_shrunk.precision_ elif conn_model == 'cov': try: print( '\nFetching covariance matrix from covariance estimator...\n' ) conn_matrix = estimator.covariance_ except: conn_matrix = estimator_shrunk.covariance_ elif conn_model == 'QuicGraphLasso': from inverse_covariance import QuicGraphLasso # Compute the sparse inverse covariance via QuicGraphLasso # credit: skggm model = QuicGraphLasso(init_method='cov', lam=0.5, mode='default', verbose=1) print('\nCalculating QuicGraphLasso precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphLassoCV': from inverse_covariance import QuicGraphLassoCV # Compute the sparse inverse covariance via QuicGraphLassoCV # credit: skggm model = QuicGraphLassoCV(init_method='cov', verbose=1) print( '\nCalculating QuicGraphLassoCV precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphLassoEBIC': from inverse_covariance import QuicGraphLassoEBIC # Compute the sparse inverse covariance via QuicGraphLassoEBIC # credit: skggm model = QuicGraphLassoEBIC(init_method='cov', verbose=1) print( '\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n' ) model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'AdaptiveQuicGraphLasso': from inverse_covariance import AdaptiveGraphLasso, QuicGraphLassoEBIC # Compute the sparse inverse covariance via # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary' # credit: skggm model = AdaptiveGraphLasso( estimator=QuicGraphLassoEBIC(init_method='cov', ), method='binary', ) print( '\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n' ) model.fit(time_series) conn_matrix = -model.estimator_.precision_ return (conn_matrix)
extension = sys.argv[2] mask_file = sys.argv[3] epoch_file = sys.argv[4] images = dataset.load_images_from_dir(data_dir, extension) mask = dataset.load_boolean_mask(mask_file) conditions = dataset.load_labels(epoch_file) (raw_data,) = image.multimask_images(images, (mask,)) epoch_info = generate_epochs_info(conditions) for idx, epoch in enumerate(epoch_info): label = epoch[0] sid = epoch[1] start = epoch[2] end = epoch[3] mat = raw_data[sid][:, start:end] mat = np.ascontiguousarray(mat, dtype=np.float32) logger.info( 'start to compute correlation for subject %d epoch %d with label %d' % (sid, idx, label) ) corr = compute_correlation(mat, mat) mdict = {} mdict['corr'] = corr filename = str(label) + '_' + str(sid) + '_' + str(idx) logger.info( 'start to write the correlation matrix to disk as %s' % filename ) scipy.io.savemat(filename, mdict)
def isfc(D, collapse_subj=True, return_p=False, num_perm=1000, two_sided=False, random_state=0): """Intersubject functional correlation Computes the correlation between the timecoure of each voxel in each subject with the average of all other subjects' timecourses in *all* voxels. By default the result is averaged across subjects, unless collapse_subj is set to False. A null distribution can optionally be computed using phase randomization, to compute a p value for each voxel-to- voxel correlation. Uses the high performance compute_correlation routine from fcma.util Parameters ---------- D : voxel by time by subject ndarray fMRI data for which to compute ISFC collapse_subj : bool, default:True Whether to average across subjects before returning result return_p : bool, default:False Whether to use phase randomization to compute a p value for each voxel num_perm : int, default:1000 Number of null samples to use for computing p values two_sided : bool, default:False Whether the p value should be one-sided (testing only for being above the null) or two-sided (testing for both significantly positive and significantly negative values) random_state : RandomState or an int seed (0 by default) A random number generator instance to define the state of the random permutations generator. Returns ------- ISFC : voxel by voxel ndarray (or voxel by voxel by subject ndarray, if collapse_subj=False) pearson correlation between all pairs of voxels, across subjects p : ndarray the same shape as ISC (if return_p = True) p values for each ISC value under the null distribution """ n_vox = D.shape[0] n_subj = D.shape[2] if return_p: n_perm = num_perm else: n_perm = 0 ISFC = np.zeros((n_vox, n_vox, n_subj, n_perm + 1)) for p in range(n_perm + 1): # Loop across choice of leave-one-out subject for loo_subj in range(D.shape[2]): group = np.mean(D[:, :, np.arange(n_subj) != loo_subj], axis=2) subj = D[:, :, loo_subj] ISFC[:, :, loo_subj, p] = compute_correlation(group, subj) # Symmetrize matrix ISFC[:, :, loo_subj, p] = (ISFC[:, :, loo_subj, p] + ISFC[:, :, loo_subj, p].T) / 2 # Randomize phases of D to create next null dataset D = phase_randomize(D, random_state) if collapse_subj: ISFC = np.mean(ISFC, axis=2) if return_p: p = p_from_null(ISFC, two_sided) return ISFC[..., 0], p else: return ISFC[..., 0]
def isfc(D, collapse_subj=True, return_p=False, num_perm=1000, two_sided=False, random_state=0, float_type=np.float64): """Intersubject functional correlation Computes the correlation between the timecoure of each voxel in each subject with the average of all other subjects' timecourses in *all* voxels. By default the result is averaged across subjects, unless collapse_subj is set to False. A null distribution can optionally be computed using phase randomization, to compute a p value for each voxel-to- voxel correlation. Uses the high performance compute_correlation routine from fcma.util Parameters ---------- D : voxel by time by subject ndarray fMRI data for which to compute ISFC collapse_subj : bool, default:True Whether to average across subjects before returning result return_p : bool, default:False Whether to use phase randomization to compute a p value for each voxel num_perm : int, default:1000 Number of null samples to use for computing p values two_sided : bool, default:False Whether the p value should be one-sided (testing only for being above the null) or two-sided (testing for both significantly positive and significantly negative values) random_state : RandomState or an int seed (0 by default) A random number generator instance to define the state of the random permutations generator. float_type : either float16, float32, or float64 Depends on the required precision and available memory in the system. All the arrays generated during the execution will be cast to specified float type in order to save memory. Returns ------- ISFC : voxel by voxel ndarray (or voxel by voxel by subject ndarray, if collapse_subj=False) pearson correlation between all pairs of voxels, across subjects p : ndarray the same shape as ISC (if return_p = True) p values for each ISC value under the null distribution """ n_vox = D.shape[0] n_subj = D.shape[2] n_perm = num_perm*int(return_p) max_null = -np.ones(n_perm, dtype=float_type) min_null = np.ones(n_perm, dtype=float_type) ISFC = np.zeros((n_vox, n_vox, n_subj), dtype=float_type) for loo_subj in range(D.shape[2]): group = np.mean(D[:, :, np.arange(n_subj) != loo_subj], axis=2) subj = D[:, :, loo_subj] tmp_ISFC = compute_correlation(group, subj).astype(float_type) # Symmetrize matrix tmp_ISFC = (tmp_ISFC+tmp_ISFC.T)/2 ISFC[:, :, loo_subj] = tmp_ISFC if collapse_subj: ISFC = np.mean(ISFC, axis=2) for p in range(n_perm): # Randomize phases of D to create next null dataset D = phase_randomize(D, random_state) # Loop across choice of leave-one-out subject ISFC_null = np.zeros((n_vox, n_vox), dtype=float_type) for loo_subj in range(D.shape[2]): group = np.mean(D[:, :, np.arange(n_subj) != loo_subj], axis=2) subj = D[:, :, loo_subj] tmp_ISFC = compute_correlation(group, subj).astype(float_type) # Symmetrize matrix tmp_ISFC = (tmp_ISFC+tmp_ISFC.T)/2 if not collapse_subj: max_null[p] = max(np.max(tmp_ISFC), max_null[p]) min_null[p] = min(np.min(tmp_ISFC), min_null[p]) ISFC_null = ISFC_null + tmp_ISFC/n_subj if collapse_subj: max_null[p] = np.max(ISFC_null) min_null[p] = np.min(ISFC_null) if return_p: p = p_from_null(ISFC, two_sided, max_null_input=max_null, min_null_input=min_null) return ISFC, p else: return ISFC
def isfc(D, collapse_subj=True, return_p=False, num_perm=1000, two_sided=False, random_state=0, float_type=np.float64): """Intersubject functional correlation Computes the correlation between the timecoure of each voxel in each subject with the average of all other subjects' timecourses in *all* voxels. By default the result is averaged across subjects, unless collapse_subj is set to False. A null distribution can optionally be computed using phase randomization, to compute a p value for each voxel-to- voxel correlation. Uses the high performance compute_correlation routine from fcma.util Parameters ---------- D : voxel by time by subject ndarray fMRI data for which to compute ISFC collapse_subj : bool, default:True Whether to average across subjects before returning result return_p : bool, default:False Whether to use phase randomization to compute a p value for each voxel num_perm : int, default:1000 Number of null samples to use for computing p values two_sided : bool, default:False Whether the p value should be one-sided (testing only for being above the null) or two-sided (testing for both significantly positive and significantly negative values) random_state : RandomState or an int seed (0 by default) A random number generator instance to define the state of the random permutations generator. float_type : either float16, float32, or float64 Depends on the required precision and available memory in the system. All the arrays generated during the execution will be cast to specified float type in order to save memory. Returns ------- ISFC : voxel by voxel ndarray (or voxel by voxel by subject ndarray, if collapse_subj=False) pearson correlation between all pairs of voxels, across subjects p : ndarray the same shape as ISC (if return_p = True) p values for each ISC value under the null distribution """ n_vox = D.shape[0] n_subj = D.shape[2] n_perm = num_perm * int(return_p) max_null = -np.ones(n_perm, dtype=float_type) min_null = np.ones(n_perm, dtype=float_type) ISFC = np.zeros((n_vox, n_vox, n_subj), dtype=float_type) for loo_subj in range(D.shape[2]): group = np.mean(D[:, :, np.arange(n_subj) != loo_subj], axis=2) subj = D[:, :, loo_subj] tmp_ISFC = compute_correlation(group, subj).astype(float_type) # Symmetrize matrix tmp_ISFC = (tmp_ISFC + tmp_ISFC.T) / 2 ISFC[:, :, loo_subj] = tmp_ISFC if collapse_subj: ISFC = np.mean(ISFC, axis=2) for p in range(n_perm): # Randomize phases of D to create next null dataset D = phase_randomize(D, random_state) # Loop across choice of leave-one-out subject ISFC_null = np.zeros((n_vox, n_vox), dtype=float_type) for loo_subj in range(D.shape[2]): group = np.mean(D[:, :, np.arange(n_subj) != loo_subj], axis=2) subj = D[:, :, loo_subj] tmp_ISFC = compute_correlation(group, subj).astype(float_type) # Symmetrize matrix tmp_ISFC = (tmp_ISFC + tmp_ISFC.T) / 2 if not collapse_subj: max_null[p] = max(np.max(tmp_ISFC), max_null[p]) min_null[p] = min(np.min(tmp_ISFC), min_null[p]) ISFC_null = ISFC_null + tmp_ISFC / n_subj if collapse_subj: max_null[p] = np.max(ISFC_null) min_null[p] = np.min(ISFC_null) if return_p: p = p_from_null(ISFC, two_sided, max_null_input=max_null, min_null_input=min_null) return ISFC, p else: return ISFC