def subject_connectivity(timeseries, subject, atlas_name, kind, save=True, save_path=root_folder): """ timeseries : timeseries table for subject (timepoints x regions) subject : the subject short ID atlas_name : name of the atlas used kind : the kind of connectivity to be used, e.g. lasso, partial correlation, correlation save : save the connectivity matrix to a file save_path : specify path to save the matrix if different from subject folder returns: connectivity : connectivity matrix (regions x regions) """ print("Estimating %s matrix for subject %s" % (kind, subject)) if kind == 'lasso': # Graph Lasso estimator covariance_estimator = GraphLassoCV(verbose=1) covariance_estimator.fit(timeseries) connectivity = covariance_estimator.covariance_ print('Covariance matrix has shape {0}.'.format(connectivity.shape)) elif kind in ['tangent', 'partial correlation', 'correlation']: conn_measure = connectome.ConnectivityMeasure(kind=kind) connectivity = conn_measure.fit_transform([timeseries])[0] if save: subject_file = os.path.join(save_path, subject, subject + '_' + atlas_name + '_' + kind.replace(' ', '_') + '.mat') sio.savemat(subject_file, {'connectivity': connectivity}) return connectivity
def stockCluster(self, centers, data, selectStock): ''' data like 600010 600011 0 1.7 2.1 1 2.3 3.1 stockList like 600010 600011 ''' # gmm = GaussianMixture(centers, covariance_type='full', random_state=0) # result =gmm.fit(self.data) # print(result) # kmeans = KMeans(n_clusters=centers) # kmeans.fit(self.data) # y_kmeans = kmeans.predict(data) # return y_kmeans; # pass edge_model = GraphLassoCV() edge_model.fit(data) _, labels = affinity_propagation(edge_model.covariance_) n_labels = max(labels) # 对这41只股票进行了聚类,labels里面是每只股票对应的类别标号 print('Stock Clusters: {}'.format(n_labels + 1)) # 10,即得到10个类别 stockList = pd.read_excel("stockList.xls") sz50_df2 = stockList.set_index('ts_code') # print(sz50_df2) for i in range(n_labels + 1): # print('Cluster: {}----> stocks: {}'.format(i,','.join(np.array(selected_stocks)[labels==i]))) # 这个只有股票代码而不是股票名称 # 下面打印出股票名称,便于观察 stocks = np.array(selectStock)[labels == i].tolist() names = sz50_df2.loc[stocks, :].name.tolist() print('Cluster: {}----> stocks: {}'.format(i, ','.join(names)))
def calculate_connectivity_matrix(in_data, extraction_method): ''' after extract_parcellation_time_series() connectivity matrices are calculated via specified extraction method returns np.array with matrixand saves this array also to matrix_file ''' # fixme implement sparse inv covar import os, pickle import numpy as np if extraction_method == 'correlation': correlation_matrix = np.corrcoef(in_data.T) matrix = {'correlation': correlation_matrix} elif extraction_method == 'sparse_inverse_covariance': # Compute the sparse inverse covariance from sklearn.covariance import GraphLassoCV estimator = GraphLassoCV() estimator.fit(in_data) matrix = { 'covariance': estimator.covariance_, 'sparse_inverse_covariance': estimator.precision_ } else: raise (Exception('Unknown extraction method: %s' % extraction_method)) matrix_file = os.path.join(os.getcwd(), 'matrix.pkl') with open(matrix_file, 'w') as f: pickle.dump(matrix, f) return matrix, matrix_file
def cluster(stock_dataset, selected_stocks, sz50_df2): #根据相关性学习图结构 edge_model = GraphLassoCV() edge_model.fit(stock_dataset) #根据协方差进行AP聚类,取相似度中值,cluster_centers_indices_ cluster_centers_indices_, labels = affinity_propagation( edge_model.covariance_) #print(cluster_centers_indices_) n_labels = max(labels) # 对股票进行了聚类,labels里面是每只股票对应的类别标号 print('Stock Clusters: {}'.format(n_labels + 1)) # 10,即得到10个类别 # 获取质心股票代码 mass = [] for n in cluster_centers_indices_: mass.append(selected_stocks[n]) #获取股票名称 center_name = sz50_df2.loc[mass, :].name.tolist() #写入文件 center = pd.DataFrame(np.column_stack((mass, center_name)), columns=['code', 'name']) center.to_csv(str('./cluster/center.csv')) for i in range(n_labels + 1): # 下面打印出股票名称,便于观察 stocks = np.array(selected_stocks)[labels == i].tolist() names = sz50_df2.loc[stocks, :].name.tolist() print('Cluster: {}----> stocks: {}'.format(str(i), ','.join(names))) result = pd.DataFrame(np.column_stack((stocks, names)), columns=['code', 'name']) result.to_csv(str('./cluster/cluster ' + str(i) + '.csv'))
def calculate_connectivity_matrix(in_data, extraction_method): ''' after extract_parcellation_time_series() connectivity matrices are calculated via specified extraction method returns np.array with matrixand saves this array also to matrix_file ''' # fixme implement sparse inv covar import os, pickle import numpy as np if extraction_method == 'correlation': correlation_matrix = np.corrcoef(in_data.T) matrix = {'correlation': correlation_matrix} elif extraction_method == 'sparse_inverse_covariance': # Compute the sparse inverse covariance from sklearn.covariance import GraphLassoCV estimator = GraphLassoCV() estimator.fit(in_data) matrix = {'covariance': estimator.covariance_, 'sparse_inverse_covariance': estimator.precision_} else: raise (Exception('Unknown extraction method: %s' % extraction_method)) matrix_file = os.path.join(os.getcwd(), 'matrix.pkl') with open(matrix_file, 'w') as f: pickle.dump(matrix, f) return matrix, matrix_file
def get_conn_matrix(time_series, conn_model, NETWORK, ID, dir_path, thr): if conn_model == 'corr': conn_measure = ConnectivityMeasure(kind='correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] est_path = dir_path + '/' + ID + '_est_corr' + '_' + str(thr) + '.txt' elif conn_model == 'corr_fast': try: conn_matrix = compute_correlation(time_series,time_series) est_path = dir_path + '/' + ID + '_est_corr_fast' + '_' + str(thr) + '.txt' except RuntimeError: print('Cannot run accelerated correlation computation due to a missing dependency. You need brainiak installed!') elif conn_model == 'partcorr': conn_measure = ConnectivityMeasure(kind='partial correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] est_path = dir_path + '/' + ID + '_est_part_corr' + '_' + str(thr) + '.txt' elif conn_model == 'cov' or conn_model == 'sps': ##Fit estimator to matrix to get sparse matrix estimator = GraphLassoCV() try: print("Fitting Lasso estimator...") est = estimator.fit(time_series) except RuntimeError: print('Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...') #from sklearn.covariance import GraphLasso, empirical_covariance, shrunk_covariance #emp_cov = empirical_covariance(time_series) #for i in np.arange(0.8, 0.99, 0.01): #shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) #alphaRange = 10.0 ** np.arange(-8,0) #for alpha in alphaRange: #try: #estimator_shrunk = GraphLasso(alpha) #est=estimator_shrunk.fit(shrunk_cov) #print("Calculated graph-lasso covariance matrix for alpha=%s"%alpha) #break #except FloatingPointError: #print("Failed at alpha=%s"%alpha) #if estimator_shrunk == None: #pass #else: #break print('Unstable Lasso estimation. Try again!') sys.exit() if NETWORK != None: est_path = dir_path + '/' + ID + '_' + NETWORK + '_est%s'%('_sps_inv' if conn_model=='sps' else 'cov') + '_' + str(thr) + '.txt' else: est_path = dir_path + '/' + ID + '_est%s'%('_sps_inv' if conn_model=='sps' else 'cov') + '_' + str(thr) + '.txt' if conn_model == 'sps': try: conn_matrix = -estimator.precision_ except: conn_matrix = -estimator_shrunk.precision_ elif conn_model == 'cov': try: conn_matrix = estimator.covariance_ except: conn_matrix = estimator_shrunk.covariance_ np.savetxt(est_path, conn_matrix, delimiter='\t') return(conn_matrix, est_path)
def graph_lasso(X, num_folds): """Estimate inverse covariance via scikit-learn GraphLassoCV class. """ print("GraphLasso (sklearn)") model = GraphLassoCV(cv=num_folds) model.fit(X) print(" lam_: {}".format(model.alpha_)) return model.covariance_, model.precision_, model.alpha_
def _cov_selection(self, alphas=4, n_refinements=4, cv=None): from sklearn.covariance import GraphLassoCV gl = GraphLassoCV(alphas=alphas, n_refinements=n_refinements, cv=cv, assume_centered=True) gl.fit(self.ret) return gl.covariance_, gl.precision_
def graph_lasso(X, num_folds): '''Estimate inverse covariance via scikit-learn GraphLassoCV class. ''' print 'GraphLasso (sklearn)' model = GraphLassoCV(cv=num_folds) model.fit(X) print ' lam_: {}'.format(model.alpha_) return model.covariance_, model.precision_, model.alpha_
def _generate_structure_K(self, X): lasso = GraphLassoCV(alphas=20) lasso.fit(X.T) K_structure = lasso.get_precision() if (hasattr(lasso, 'alpha_')): print('alpha=', lasso.alpha_) return K_structure
def cal_connectome(fmri_ff, confound_ff, atlas_ff, outputjpg_ff, metric='correlation', labelrange=None, label_or_map=0): if label_or_map == 0: # “correlation”, “partial correlation”, “tangent”, “covariance”, “precision” masker = NiftiLabelsMasker(labels_img=atlas_ff, standardize=True, verbose=0) else: masker = NiftiMapsMasker(maps_img=atlas_ff, standardize=True, verbose=0) time_series_0 = masker.fit_transform(fmri_ff, confounds=confound_ff) if labelrange is None: labelrange = np.arange(time_series_0.shape[1]) time_series = time_series_0[:, labelrange] if metric == 'sparse inverse covariance': try: estimator = GraphLassoCV() estimator.fit(time_series) correlation_matrix = -estimator.precision_ except: correlation_matrix = np.zeros( (time_series.shape[1], time_series.shape[1])) else: correlation_measure = ConnectivityMeasure(kind=metric) correlation_matrix = correlation_measure.fit_transform([time_series ])[0] # Plot the correlation matrix fig = plt.figure(figsize=(6, 5), dpi=100) plt.clf() # Mask the main diagonal for visualization: np.fill_diagonal(correlation_matrix, 0) plt.imshow(correlation_matrix, interpolation="nearest", cmap="RdBu_r", vmax=0.8, vmin=-0.8) plt.gca().yaxis.tick_right() plt.axis('off') plt.colorbar() plt.title(metric.title(), fontsize=12) plt.tight_layout() fig.savefig(outputjpg_ff, bbox_inches='tight') plt.close() return correlation_matrix
def __init__(self, original_matrix): ''' :param dmatrix: X is an instances list(matrix) ''' # X = [x(1), x(2), ..., x(len)], with dim number of features self._X = np.matrix(original_matrix) self._len, self._dim = self._X.shape glasso_model = GraphLassoCV() glasso_model.fit(self._X) self._glasso_covariance = glasso_model.covariance_ self._glasso_precision = glasso_model.precision_
def run_clustering(methods, cases): true_method_groups = [m[1] for m in methods] edge_model = GraphLassoCV(alphas=4, n_refinements=5, n_jobs=3, max_iter=100) edge_model.fit(cases) CV = edge_model.covariance_ num_clusters=3 spectral = SpectralClustering(n_clusters=num_clusters,affinity='precomputed') spectral.fit(np.asarray(CV)) spec_sort=np.argsort(spectral.labels_) for i,m in enumerate(methods): print "%s:%d\t%s"%(m[1],spectral.labels_[i],m[0]) print "Adj. Rand Score: %f"%adjusted_rand_score(spectral.labels_,true_method_groups)
def corrcov(arr, typedat): #eng = matlab.engine.start_matlab() #out = eng.partialcorr(matlab.double(arr.tolist())) #fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(25,15)) #im = axes[0].imshow(out) #return np.array(out) #im1 = axes[0,0].imshow(P_corr) #fig.colorbar(im1, ax=axes[0,0]) if typedat == 'Partial correlation': out = partialcorr(arr) elif typedat == 'GraphLassoCV covariance': estimator = GraphLassoCV() estimator.fit(arr) out = estimator.covariance_ elif typedat == 'GraphLassoCV precision': estimator = GraphLassoCV() estimator.fit(arr) out = estimator.precision_ elif typedat == 'Covariance': out = np.cov(arr.transpose()) elif typedat == 'Correlation': out = np.corrcoef(arr.transpose()) # im2 = axes[0,1].imshow(covar) # fig.colorbar(im2, ax=axes[0, 1]) # im3 = axes[0,2].imshow(inverscovar) # fig.colorbar(im3, ax=axes[0, 2]) # im4 = axes[1,0].imshow(covar2) # fig.colorbar(im4, ax=axes[1, 0]) # im5 = axes[1,2].imshow(corr) # fig.colorbar(im5, ax=axes[1, 2]) # fig.savefig('partialcorr.png', bbox_inches='tight') return out
class TestStatisticalPower(object): @pytest.mark.parametrize("params_in", [ ({ 'model_selection_estimator': QuicGraphLassoCV(), 'n_trials': 20, 'n_features': 25, }), ({ 'model_selection_estimator': QuicGraphLassoEBIC(), 'n_trials': 20, 'n_features': 10, 'n_jobs': 2, }), ({ 'model_selection_estimator': GraphLassoCV(), 'n_trials': 20, 'n_features': 20, 'penalty_': 'alpha_', }), ]) def test_integration_statistical_power(self, params_in): ''' Just tests inputs/outputs (not validity of result). ''' X = datasets.load_diabetes().data sp = StatisticalPower(**params_in) sp.fit(X) num_k = 5 assert np.sum(sp.results_.flat) > 0 assert sp.results_.shape == (num_k, sp.n_grid_points) assert len(sp.ks_) == num_k assert len(sp.grid_) == sp.n_grid_points
def _generate_structure_K(self, X): # lasso = GraphLasso(alpha=0.012) lasso = GraphLassoCV(alphas=20) lasso.fit(X.T) K_structure = lasso.get_precision() if (hasattr(lasso, 'alpha_')): print('alpha=', lasso.alpha_) M = (np.abs(K_structure) > 1e-10) if (M == np.eye(M.shape[0], dtype=bool)).all(): print('Got identity structure') # K_structure = np.ones(K_lasso.shape) return K_structure
def test_estimate_covariance(self): configuration = { 'feature_config_list': [ { 'name': 'close', 'normalization': 'standard', 'transformation': {'name': 'log-return'}, 'is_target': True, 'local': False, }, ], 'fill_limit': 0, 'exchange_name': 'NYSE', 'features_ndays': 9, 'features_resample_minutes': 15, 'features_start_market_minute': 60, 'prediction_frequency_ndays': 1, 'prediction_market_minute': 60, 'target_delta_ndays': 1, 'target_market_minute': 60, 'n_classification_bins': 12, 'nassets': 3, 'classify_per_series': False, 'normalise_per_series': False } data_transformation = FinancialDataTransformation(configuration) universe, data = self._prepare_data_for_test() estimation_method = "Ledoit" exchange_calendar = data_transformation.exchange_calendar ndays = data_transformation.features_ndays # FIXME this is the only value that works now. forecast_interval = data_transformation.target_delta_ndays target_market_minute = data_transformation.target_market_minute covariance_matrix = estimate_covariance(data, ndays, target_market_minute, estimation_method, exchange_calendar, forecast_interval) ret_data = returns_minutes_after_market_open_data_frame(data['close'], exchange_calendar, target_market_minute) print(ret_data.shape) nd = ret_data.shape[1] sampling_days = nd * DEFAULT_NUM_REALISATIONS_MULTIPLICATION_FACTOR data_points = ret_data.values[-sampling_days:, :] glass_model = GraphLassoCV() glass_model.fit(data_points) cov_mat = glass_model.covariance_ self.assertTrue(np.allclose(covariance_matrix.diagonal(), cov_mat.diagonal()))
def __init__(self, n_components=2, n_iter=5, alpha=None): self.n_components = n_components self.n_iter = n_iter self.min_covar = 1e-3 if alpha == None: self.alpha = [10 for _ in range(self.n_components)] else: self.alpha = alpha self.model = [GraphLassoCV() for k in range(self.n_components)]
def group_connectivity(timeseries, subject_list, atlas_name, kind, save=True, save_path=root_folder): """ timeseries : list of timeseries tables for subjects (timepoints x regions) subject_list : the subject short IDs list atlas_name : name of the atlas used kind : the kind of connectivity to be used, e.g. lasso, partial correlation, correlation save : save the connectivity matrix to a file save_path : specify path to save the matrix if different from subject folder returns: connectivity : connectivity matrix (regions x regions) """ if kind == 'lasso': # Graph Lasso estimator covariance_estimator = GraphLassoCV(verbose=1) connectivity_matrices = [] for i, ts in enumerate(timeseries): covariance_estimator.fit(ts) connectivity = covariance_estimator.covariance_ connectivity_matrices.append(connectivity) print('Covariance matrix has shape {0}.'.format( connectivity.shape)) elif kind in ['tangent', 'partial correlation', 'correlation']: conn_measure = connectome.ConnectivityMeasure(kind=kind) connectivity_matrices = conn_measure.fit_transform(timeseries) if save: for i, subject in enumerate(subject_list): subject_file = os.path.join( save_path, subject_list[i], subject_list[i] + '_' + atlas_name + '_' + kind.replace(' ', '_') + '.mat') sio.savemat(subject_file, {'connectivity': connectivity_matrices[i]}) print("Saving connectivity matrix to %s" % subject_file) return connectivity_matrices
def main(): sample, genes, raw_expression, cov = load_data() expression = raw_expression[raw_expression.min(1) > 100] expression_indices = numpy.nonzero(raw_expression.sum(1) > 6)[0].tolist() ## reorder and filter data #rep1_cols = numpy.array((3,0,5)) # 8 is co culture #rep2_cols = numpy.array((4,2,7)) # 9 is MRC5 expression = expression[:,(3,4,0,2,5,7)] # log data expression = numpy.log10(expression + 1)[1:100,] cov = expression.dot(expression.T) print cov.shape #mo = GraphLasso(alpha=95, mode='lars', verbose=True) #, cv=KFold(3,2), n_jobs=24) mo = GraphLassoCV(mode='lars', verbose=True, cv=KFold(3,2), n_jobs=24) sparse_cov = mo.fit(cov) print( numpy.nonzero(sparse_cov)[0].sum() ) return
def test_graph_lasso_cv(random_state=1): # Sample data from a sparse multivariate normal dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) # Capture stdout, to smoke test the verbose mode orig_stdout = sys.stdout try: sys.stdout = StringIO() # We need verbose very high so that Parallel prints on stdout GraphLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X) finally: sys.stdout = orig_stdout # Smoke test with specified alphas GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
def main(): sample, genes, raw_expression, cov = load_data() expression = raw_expression[raw_expression.min(1) > 100] expression_indices = numpy.nonzero(raw_expression.sum(1) > 6)[0].tolist() ## reorder and filter data #rep1_cols = numpy.array((3,0,5)) # 8 is co culture #rep2_cols = numpy.array((4,2,7)) # 9 is MRC5 expression = expression[:, (3, 4, 0, 2, 5, 7)] # log data expression = numpy.log10(expression + 1)[1:100, ] cov = expression.dot(expression.T) print cov.shape #mo = GraphLasso(alpha=95, mode='lars', verbose=True) #, cv=KFold(3,2), n_jobs=24) mo = GraphLassoCV(mode='lars', verbose=True, cv=KFold(3, 2), n_jobs=24) sparse_cov = mo.fit(cov) print(numpy.nonzero(sparse_cov)[0].sum()) return
def get_BP4D_prescion_matrix(label_file_dir): adaptive_AU_database("BP4D") alpha = 0.2 model = GraphLassoCV(alphas=100, cv=10, max_iter=10, tol=1e-5, verbose=True, mode="lars", assume_centered=False, n_jobs=100) X = [] for file_name in os.listdir(label_file_dir): # each file is a video AU_column_idx = {} with open(label_file_dir + "/" + file_name, "r") as au_file_obj: # each file is a video for idx, line in enumerate(au_file_obj): if idx == 0: # header specify Action Unit for col_idx, AU in enumerate(line.split(",")[1:]): AU_column_idx[AU] = col_idx + 1 # read header continue # read head over , continue lines = line.split(",") frame = lines[0] au_labels = [AU for AU in config.AU_ROI.keys() \ if int(lines[AU_column_idx[AU]]) == 1] AU_bin = np.zeros(len(config.AU_SQUEEZE)) for AU in au_labels: bin_idx = config.AU_SQUEEZE.inv[AU] np.put(AU_bin, bin_idx, 1) X.append(AU_bin) X = np.array(X) print(X.shape) # X = np.transpose(X) model.fit(X) cov_ = model.covariance_ prec_ = model.precision_ return {"prec": prec_, "cov": cov_}
def _parallelize_4D_func_loading(f, atlas, method): func = nib.load(f) roi_masker = NiftiLabelsMasker(labels_img=atlas, standardize=True, resampling_target=None) time_series = roi_masker.fit_transform(func) if method == 'corr': conn = np.corrcoef(time_series.T) elif method == 'invcorr': graphlasso = GraphLassoCV() graphlasso.fit(time_series) conn = graphlasso.precision_ else: raise ValueError('Specify either corr or invcorr') conn = conn[np.tril_indices(conn.shape[0], k=-1)].ravel() return conn[np.newaxis, :]
def computePartialCorrelationsCV(coupling_data): # standardize coupling_data -= coupling_data.mean(axis=0) coupling_data /= coupling_data.std(axis=0) estimator = GraphLassoCV(alphas=10) estimator.fit(coupling_data) prec = estimator.get_precision() reg_alpha = estimator.alpha_ #### partial correlations: rho_ij = - p_ij/ sqrt(p_ii * p_jj) #diagonal of precision matrix prec_diag = np.diag(prec) partial_correlations = -prec / np.sqrt(np.outer(prec_diag, prec_diag)) # set lower half to zero partial_correlations[np.tril_indices(400)] = 0 return estimator.get_precision(), partial_correlations, reg_alpha
def compute_connectivity_subject(conn, func, masker): """ Returns connectivity of one fMRI for a given atlas """ ts = masker.fit_transform(func) if conn == 'gl': fc = GraphLassoCV(max_iter=1000) elif conn == 'lw': fc = LedoitWolf() elif conn == 'oas': fc = OAS() elif conn == 'scov': fc = ShrunkCovariance() elif conn == 'corr' or conn == 'pcorr': fc = Bunch(covariance_=0, precision_=0) if conn == 'corr' or conn == 'pcorr': fc.covariance_ = np.corrcoef(ts) fc.precision_ = partial_corr(ts) else: fc.fit(ts) ind = np.tril_indices(ts.shape[1], k=-1) return fc.covariance_[ind], fc.precision_[ind]
def get_DISFA_prescion_matrix(label_file_dir): adaptive_AU_database("DISFA") alpha = 0.2 model = GraphLassoCV(alphas=100, cv=10, max_iter=100, tol=1e-5, verbose=True, mode="lars", assume_centered=False, n_jobs=100) X = [] for file_name in os.listdir(label_file_dir): subject_filename = label_file_dir + os.sep + file_name frame_label = defaultdict(dict) for au_file in os.listdir(subject_filename): abs_filename = subject_filename + "/" + au_file AU = au_file[au_file.rindex("_") + 3:au_file.rindex(".")] with open(abs_filename, "r") as file_obj: for line in file_obj: frame, AU_label = line.strip().split(",") # AU_label = int(AU_label) AU_label = 0 if int( AU_label) < 3 else 1 # 居然<3的不要,但是也取得了出色的效果 frame_label[int(frame)][AU] = int(AU_label) for frame, AU_dict in frame_label.items(): AU_bin = np.zeros(len(config.AU_SQUEEZE)) for AU, AU_label in AU_dict.items(): bin_idx = config.AU_SQUEEZE.inv[AU] np.put(AU_bin, bin_idx, AU_label) X.append(AU_bin) X = np.array(X) print(X.shape) model.fit(X) cov_ = model.covariance_ prec_ = model.precision_ return {"prec": prec_, "cov": cov_}
def test_graph_lasso_cv(random_state=1): # Sample data from a sparse multivariate normal dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) # Capture stdout, to smoke test the verbose mode orig_stdout = sys.stdout try: sys.stdout = StringIO() GraphLassoCV(verbose=10, alphas=3).fit(X) finally: sys.stdout = orig_stdout
def test_deprecated_grid_scores(random_state=1): dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) graph_lasso = GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1) graph_lasso.fit(X) depr_message = ("Attribute grid_scores was deprecated in version " "0.19 and will be removed in 0.21. Use " "``grid_scores_`` instead") assert_warns_message(DeprecationWarning, depr_message, lambda: graph_lasso.grid_scores) assert_equal(graph_lasso.grid_scores, graph_lasso.grid_scores_)
def mgsparse(matrix, dimred=0, cutoff=1, eigtype = 'b'): ''' Plot ROC curve for using sparse covariance and precision matrix for multivariate gaussian classifier Input: matrix = n-by-m pandas data frame, each row is one bacteria strain, each column is one subject dimred = reduce dimensionality of covariance and inverse of covariance to n < len(x) if n is specified specified, else no reduction in dimensions cutoff = cutoff to top eigenvalues if specified, maybe less than n eigtype = pick n random(r), biggest(b) or smallest(s) eigenvalues to construct matrix B, default is biggest(b) Output: auc = area under ROC curve ''' # convert matrix from Pandas data frame to array m = matrix.values # control and CD subjects con = (m.T[252:]) cd = (m.T[:252]) # get mean for each strain conmean = vmean(con.T) cdmean = vmean(cd.T) # sparse covariance and precision matrix for control conglasso = GraphLassoCV() conglasso.fit(con) concov = conglasso.covariance_ concovinv = conglasso.precision_ # covariance and precision matrix for CD cdglasso = GraphLassoCV() cdglasso.fit(cd) cdcov = cdglasso.covariance_ cdcovinv = cdglasso.precision_ listac = ndgaussianfitsparse(c24g, c24gmean, sc24gcov, sc24gcovinv, dimred = r) listbc = ndgaussianfitsparse(c24g, cd24gmean, scd24gcov, scd24gcovinv, dimred = r) listacd = ndgaussianfitsparse(cd24g, c24gmean, sc24gcov, sc24gcovinv, dimred= r) listbcd = ndgaussianfitsparse(cd24g, cd24gmean, scd24gcov, scd24gcovinv, dimred= r) auc = ndaucsklearn(listac, listbc, listacd, listbcd, 252, 172, tol = 2) return auc
class TestAverageError(object): @pytest.mark.parametrize("params_in", [ ({ 'model_selection_estimator': QuicGraphLassoCV(), 'n_trials': 20, 'n_features': 25, }), ({ 'model_selection_estimator': QuicGraphLassoEBIC(), 'n_trials': 20, 'n_features': 10, 'n_jobs': 2, }), ({ 'model_selection_estimator': GraphLassoCV(), 'n_trials': 20, 'n_features': 20, 'penalty_': 'alpha_', }), ]) def test_integration_statistical_power(self, params_in): ''' Just tests inputs/outputs (not validity of result). ''' X = datasets.load_diabetes().data ae = AverageError(**params_in) ae.fit(X) num_k = 3 assert np.sum(ae.error_fro_.flat) > 0 assert ae.error_fro_.shape == (num_k, ae.n_grid_points) assert np.sum(ae.error_supp_.flat) > 0 assert ae.error_supp_.shape == (num_k, ae.n_grid_points) assert np.sum(ae.error_fp_.flat) > 0 assert ae.error_fp_.shape == (num_k, ae.n_grid_points) assert np.sum(ae.error_fn_.flat) > 0 assert ae.error_fn_.shape == (num_k, ae.n_grid_points) assert len(ae.ks_) == num_k assert len(ae.grid_) == ae.n_grid_points
def compute_connectivity_voxel(roi, voxel, conn): """ Returns connectivity of one voxel for a given roi """ if conn == 'gl': fc = GraphLassoCV(max_iter=1000) elif conn == 'lw': fc = LedoitWolf() elif conn == 'oas': fc = OAS() elif conn == 'scov': fc = ShrunkCovariance() ts = np.array([roi, voxel]).T if conn == 'corr' or conn == 'pcorr': cov = np.corrcoef(ts)[0, 1] else: fc.fit(ts) cov = fc.covariance_[0, 0] return cov
def set_optimal_shrinkage_amount(self, X, verbose=False): """ Parameters ---------- X: array-like, shape = [n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. Returns ------- optimal_shrinkage: The optimal amount of shrinkage, chosen with a 10-fold cross-validation. (or a Leave-One Out cross-validation if n_samples < 10). """ n_samples, n_features = X.shape std_shrinkage = np.trace(empirical_covariance(X)) / \ float(n_samples * n_features) # use L2 here? (was done during research work, changed for consistency) rmcd = RMCDl1(shrinkage=std_shrinkage).fit(X) cov = GraphLassoCV().fit(X[rmcd.raw_support_]) self.shrinkage = cov.alpha_ return cov.cv_alphas_, cov.cv_scores
random_state=prng) cov = linalg.inv(prec) d = np.sqrt(np.diag(cov)) cov /= d cov /= d[:, np.newaxis] prec *= d prec *= d[:, np.newaxis] X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples) X -= X.mean(axis=0) X /= X.std(axis=0) ############################################################################## # Estimate the covariance emp_cov = np.dot(X.T, X) / n_samples model = GraphLassoCV() model.fit(X) cov_ = model.covariance_ prec_ = model.precision_ lw_cov_, _ = ledoit_wolf(X) lw_prec_ = linalg.inv(lw_cov_) ############################################################################## # Plot the results pl.figure(figsize=(10, 6)) pl.subplots_adjust(left=0.02, right=0.98) # plot the covariances covs = [('Empirical', emp_cov), ('Ledoit-Wolf', lw_cov_), ('GraphLasso', cov_), ('True', cov)]
# Run group-sparse covariance on all subjects from nilearn.group_sparse_covariance import GroupSparseCovarianceCV gsc = GroupSparseCovarianceCV(max_iter=50, verbose=1) gsc.fit(subjects) for n in range(n_displayed): plt.subplot(n_displayed, 4, 4 * n + 2) plot_matrix(gsc.precisions_[..., n]) if n == 0: plt.title("group-sparse\n$\\alpha=%.2f$" % gsc.alpha_) # Fit one graph lasso per subject from sklearn.covariance import GraphLassoCV gl = GraphLassoCV(verbose=1) for n, subject in enumerate(subjects[:n_displayed]): gl.fit(subject) plt.subplot(n_displayed, 4, 4 * n + 3) plot_matrix(gl.precision_) if n == 0: plt.title("graph lasso") plt.ylabel("$\\alpha=%.2f$" % gl.alpha_) # Fit one graph lasso for all subjects at once import numpy as np gl.fit(np.concatenate(subjects))
nmm = NiftiMapsMasker( mask_img=mask_file, maps_img=icas_path, resampling_target='mask', standardize=True, detrend=True) nmm.fit() nmm.maps_img_.to_filename('dbg_ica_maps.nii.gz') FS_netproj = nmm.transform(all_sub_rs_maps) np.save('%i_nets_timeseries' % sub_id, FS_netproj) # compute network sparse inverse covariance from sklearn.covariance import GraphLassoCV from nilearn.image import index_img from nilearn import plotting try: gsc_nets = GraphLassoCV(verbose=2, alphas=20) gsc_nets.fit(FS_netproj) np.save('%i_nets_cov' % sub_id, gsc_nets.covariance_) np.save('%i_nets_prec' % sub_id, gsc_nets.precision_) except: pass ############################################################################### # dump region poolings ############################################################################### from nilearn.image import resample_img crad = ds.fetch_atlas_craddock_2012() # atlas_nii = index_img(crad['scorr_mean'], 19) # Craddock 200 region atlas atlas_nii = index_img(crad['scorr_mean'], 9) # Craddock 100 region atlas
EFA=True survey_HCA = get_EFA_HCA(all_results['survey'], EFA) survey_order = survey_HCA['reorder_vec'] task_HCA = get_EFA_HCA(all_results['task'], EFA) task_order = task_HCA['reorder_vec'] all_data = pd.concat([all_results['task'].data.iloc[:, task_order], all_results['survey'].data.iloc[:, survey_order]], axis=1) out, tuning = qgraph_cor(all_data, glasso=True, gamma=.5) # recreate with sklearn just to check data = scale(all_data) clf = GraphLassoCV() clf.fit(data) sklearn_covariance = clf.covariance_[np.tril_indices_from(clf.covariance_)] qgraph_covariance = out.values[np.tril_indices_from(out)] method_correlation = np.corrcoef(sklearn_covariance, qgraph_covariance)[0,1] assert method_correlation > .99 def add_attributes(g): g.vs['measurement'] = ['task']*len(task_order) + ['survey']*len(survey_order) task_clusters = task_HCA['labels'][task_order] survey_clusters = survey_HCA['labels'][survey_order] + max(task_clusters) g.vs['cluster'] = np.append(task_clusters, survey_clusters) save_loc = path.join(path.dirname(all_results['task'].get_output_dir()), 'graph_results')
# -*- coding: utf-8 -*- """ Created on Mon Sep 12 10:16:16 2016 @author: jonyoung """ import connectivity_utils as utils import numpy as np import scipy.linalg as la from sklearn.covariance import GraphLassoCV, ledoit_wolf, GraphLasso from sklearn.preprocessing import scale connectivity_data = utils.load_hcp_matrix('/home/jonyoung/IoP_data/Data/HCP_PTN820/node_timeseries/3T_HCP820_MSMAll_d15_ts2/715950.txt'); print connectivity_data print np.shape(connectivity_data) print np.std(connectivity_data, axis = 1) connectivity_data = connectivity_data[:, :250] X = scale(connectivity_data, axis=1) model = GraphLassoCV(max_iter=1500, assume_centered=True) model.fit(np.transpose(X))
timeseries = spheres_masker.fit_transform(fmri_filename, confounds=confounds_filename) ############################################################################### # Estimate correlations # --------------------- # # All starts with the estimation of the signals **covariance** matrix. Here the # number of ROIs exceeds the number of samples, print("time series has {0} samples".format(timeseries.shape[0])) ############################################################################### # in which situation the graphical lasso **sparse inverse covariance** # estimator captures well the covariance **structure**. from sklearn.covariance import GraphLassoCV covariance_estimator = GraphLassoCV(verbose=1) ############################################################################### # We just fit our regions signals into the `GraphLassoCV` object covariance_estimator.fit(timeseries) ############################################################################### # and get the ROI-to-ROI covariance matrix. matrix = covariance_estimator.covariance_ print("Covariance matrix has shape {0}.".format(matrix.shape)) ############################################################################### # Plot matrix and graph # --------------------- # # We use `matplotlib` plotting functions to visualize our correlation matrix
c0.append(temp_A[0]) c1.append(temp_A[1]) c2.append(temp_B) data = np.array([c0, c1, c2]) data = data.transpose() print data # emp_cov = empirical_covariance(data, assume_centered=False) # # print emp_cov model = GraphLassoCV() model.fit(data) cov_ = model.covariance_ prec_ = model.precision_ corr = np.corrcoef(data, rowvar=False) print corr # print cov_ # print prec_ threshold = 0.1
noise_sd=2 data,data_conv=mk_dcm_dataset(timepoints,z,noise_sd) numpy.savez(os.path.join(results_dir,'dcmdata.npz'),data=data_conv,A=A,B=B,C=C,u=u,d=d,design=design) # In[4]: plt.subplot(211) plt.plot(data_conv) cc=numpy.corrcoef(data_conv.T) print 'correlation matrix' print cc from sklearn.covariance import GraphLassoCV import matplotlib.colors glasso=GraphLassoCV() glasso.fit(data_conv) from pcor_from_precision import pcor_from_precision pcor=pcor_from_precision(glasso.precision_) print 'partial r^2 matrix' print pcor**2 plt.figure(figsize=(10,5)) plt.subplot(141) plt.imshow(A,interpolation='nearest',norm=matplotlib.colors.Normalize(vmin=-1,vmax=1)) plt.subplot(142) plt.imshow(B,interpolation='nearest',norm=matplotlib.colors.Normalize(vmin=-1,vmax=1)) plt.subplot(143) plt.imshow(cc,interpolation='nearest',norm=matplotlib.colors.Normalize(vmin=-1,vmax=1)) plt.subplot(144) plt.imshow(pcor**2,interpolation='nearest',norm=matplotlib.colors.Normalize(vmin=-1,vmax=1))
def GraphicLasso(X): model = GraphLassoCV() model.fit(X) cov_ = model.covariance_ prec_ = model.precision_ return prec_
############################################################################## # Extract time series # -------------------- from nilearn.input_data import NiftiMapsMasker masker = NiftiMapsMasker(maps_img=atlas_filename, standardize=True, memory='nilearn_cache', verbose=5) time_series = masker.fit_transform(data.func[0], confounds=data.confounds) ############################################################################## # Compute the sparse inverse covariance # -------------------------------------- from sklearn.covariance import GraphLassoCV estimator = GraphLassoCV() estimator.fit(time_series) ############################################################################## # Display the connectome matrix # ------------------------------ from matplotlib import pyplot as plt # Display the covariance plt.figure(figsize=(10, 10)) # The covariance can be found at estimator.covariance_ plt.imshow(estimator.covariance_, interpolation="nearest", vmax=1, vmin=-1, cmap=plt.cm.RdBu_r) # And display the labels
symbols, names = np.array(list(STOCKS.items())).T start = datetime(2014, 1, 1, 0, 0, 0, 0, pytz.utc) end = datetime(2016, 1, 1, 0, 0, 0, 0, pytz.utc) quotes = [quotes_historical_yahoo(symbol, start, end, asobject=True) for symbol in symbols] qopen = np.array([q.open for q in quotes]).astype(np.float) qclose = np.array([q.close for q in quotes]).astype(np.float) variation= qclose - qopen #per day variation in price for each symbol X = variation.T X /= X.std(axis=0) #standardize to use correlations rather than covariance #estimate inverse covariance graph = GraphLassoCV() graph.fit(X) gl_cov = graph.covariance_ gl_prec = graph.precision_ gl_alphas =graph.cv_alphas_ gl_scores = np.mean(graph.grid_scores, axis=1) plt.figure() sns.heatmap(gl_prec) plt.figure() plt.plot(gl_alphas, gl_scores, marker='o', color='b', lw=2.0, label='GraphLassoCV') plt.title("Graph Lasso Alpha Selection") plt.xlabel("alpha") plt.ylabel("score")
# -*- coding: utf-8 -*- """ Created on Mon Sep 12 10:16:16 2016 @author: jonyoung """ import connectivity_utils as utils import numpy as np import scipy.linalg as la from sklearn.covariance import GraphLassoCV, ledoit_wolf, GraphLasso from sklearn.preprocessing import scale connectivity_data = utils.load_hcp_matrix( '/home/jonyoung/IoP_data/Data/HCP_PTN820/node_timeseries/3T_HCP820_MSMAll_d15_ts2/715950.txt' ) print connectivity_data print np.shape(connectivity_data) print np.std(connectivity_data, axis=1) connectivity_data = connectivity_data[:, :250] X = scale(connectivity_data, axis=1) model = GraphLassoCV(max_iter=1500, assume_centered=True) model.fit(np.transpose(X))
############################################################################ # Graphical LASSO on Yeo's clusters extracted from sepideh's data ############################################################################ import numpy as np import pylab as pl from sklearn.covariance import GraphLassoCV, OAS tc = tc_roi glasso = GraphLassoCV(verbose=1, n_refinements=3, alphas=3, n_jobs=2) glasso.fit(tc) cov_ = glasso.covariance_ prec_ = glasso.precision_