def corrcov(arr, typedat): #eng = matlab.engine.start_matlab() #out = eng.partialcorr(matlab.double(arr.tolist())) #fig, axes = plt.subplots(nrows=2, ncols=3, figsize=(25,15)) #im = axes[0].imshow(out) #return np.array(out) #im1 = axes[0,0].imshow(P_corr) #fig.colorbar(im1, ax=axes[0,0]) if typedat == 'Partial correlation': out = partialcorr(arr) elif typedat == 'GraphLassoCV covariance': estimator = GraphLassoCV() estimator.fit(arr) out = estimator.covariance_ elif typedat == 'GraphLassoCV precision': estimator = GraphLassoCV() estimator.fit(arr) out = estimator.precision_ elif typedat == 'Covariance': out = np.cov(arr.transpose()) elif typedat == 'Correlation': out = np.corrcoef(arr.transpose()) # im2 = axes[0,1].imshow(covar) # fig.colorbar(im2, ax=axes[0, 1]) # im3 = axes[0,2].imshow(inverscovar) # fig.colorbar(im3, ax=axes[0, 2]) # im4 = axes[1,0].imshow(covar2) # fig.colorbar(im4, ax=axes[1, 0]) # im5 = axes[1,2].imshow(corr) # fig.colorbar(im5, ax=axes[1, 2]) # fig.savefig('partialcorr.png', bbox_inches='tight') return out
def stockCluster(self, centers, data, selectStock): ''' data like 600010 600011 0 1.7 2.1 1 2.3 3.1 stockList like 600010 600011 ''' # gmm = GaussianMixture(centers, covariance_type='full', random_state=0) # result =gmm.fit(self.data) # print(result) # kmeans = KMeans(n_clusters=centers) # kmeans.fit(self.data) # y_kmeans = kmeans.predict(data) # return y_kmeans; # pass edge_model = GraphLassoCV() edge_model.fit(data) _, labels = affinity_propagation(edge_model.covariance_) n_labels = max(labels) # 对这41只股票进行了聚类,labels里面是每只股票对应的类别标号 print('Stock Clusters: {}'.format(n_labels + 1)) # 10,即得到10个类别 stockList = pd.read_excel("stockList.xls") sz50_df2 = stockList.set_index('ts_code') # print(sz50_df2) for i in range(n_labels + 1): # print('Cluster: {}----> stocks: {}'.format(i,','.join(np.array(selected_stocks)[labels==i]))) # 这个只有股票代码而不是股票名称 # 下面打印出股票名称,便于观察 stocks = np.array(selectStock)[labels == i].tolist() names = sz50_df2.loc[stocks, :].name.tolist() print('Cluster: {}----> stocks: {}'.format(i, ','.join(names)))
def subject_connectivity(timeseries, subject, atlas_name, kind, save=True, save_path=root_folder): """ timeseries : timeseries table for subject (timepoints x regions) subject : the subject short ID atlas_name : name of the atlas used kind : the kind of connectivity to be used, e.g. lasso, partial correlation, correlation save : save the connectivity matrix to a file save_path : specify path to save the matrix if different from subject folder returns: connectivity : connectivity matrix (regions x regions) """ print("Estimating %s matrix for subject %s" % (kind, subject)) if kind == 'lasso': # Graph Lasso estimator covariance_estimator = GraphLassoCV(verbose=1) covariance_estimator.fit(timeseries) connectivity = covariance_estimator.covariance_ print('Covariance matrix has shape {0}.'.format(connectivity.shape)) elif kind in ['tangent', 'partial correlation', 'correlation']: conn_measure = connectome.ConnectivityMeasure(kind=kind) connectivity = conn_measure.fit_transform([timeseries])[0] if save: subject_file = os.path.join(save_path, subject, subject + '_' + atlas_name + '_' + kind.replace(' ', '_') + '.mat') sio.savemat(subject_file, {'connectivity': connectivity}) return connectivity
def compute_network_connectivity_subject(conn, func, masker, rois): """ Returns connectivity of one fMRI for a given atlas """ ts = masker.fit_transform(func) ts = np.asarray(ts)[:, rois] if conn == 'gl': fc = GraphLassoCV(max_iter=1000) elif conn == 'lw': fc = LedoitWolf() elif conn == 'oas': fc = OAS() elif conn == 'scov': fc = ShrunkCovariance() fc = Bunch(covariance_=0, precision_=0) if conn == 'corr' or conn == 'pcorr': fc = Bunch(covariance_=0, precision_=0) fc.covariance_ = np.corrcoef(ts) fc.precision_ = partial_corr(ts) else: fc.fit(ts) ind = np.tril_indices(ts.shape[1], k=-1) return fc.covariance_[ind], fc.precision_[ind]
def cluster(stock_dataset, selected_stocks, sz50_df2): #根据相关性学习图结构 edge_model = GraphLassoCV() edge_model.fit(stock_dataset) #根据协方差进行AP聚类,取相似度中值,cluster_centers_indices_ cluster_centers_indices_, labels = affinity_propagation( edge_model.covariance_) #print(cluster_centers_indices_) n_labels = max(labels) # 对股票进行了聚类,labels里面是每只股票对应的类别标号 print('Stock Clusters: {}'.format(n_labels + 1)) # 10,即得到10个类别 # 获取质心股票代码 mass = [] for n in cluster_centers_indices_: mass.append(selected_stocks[n]) #获取股票名称 center_name = sz50_df2.loc[mass, :].name.tolist() #写入文件 center = pd.DataFrame(np.column_stack((mass, center_name)), columns=['code', 'name']) center.to_csv(str('./cluster/center.csv')) for i in range(n_labels + 1): # 下面打印出股票名称,便于观察 stocks = np.array(selected_stocks)[labels == i].tolist() names = sz50_df2.loc[stocks, :].name.tolist() print('Cluster: {}----> stocks: {}'.format(str(i), ','.join(names))) result = pd.DataFrame(np.column_stack((stocks, names)), columns=['code', 'name']) result.to_csv(str('./cluster/cluster ' + str(i) + '.csv'))
class TestStatisticalPower(object): @pytest.mark.parametrize("params_in", [ ({ 'model_selection_estimator': QuicGraphLassoCV(), 'n_trials': 20, 'n_features': 25, }), ({ 'model_selection_estimator': QuicGraphLassoEBIC(), 'n_trials': 20, 'n_features': 10, 'n_jobs': 2, }), ({ 'model_selection_estimator': GraphLassoCV(), 'n_trials': 20, 'n_features': 20, 'penalty_': 'alpha_', }), ]) def test_integration_statistical_power(self, params_in): ''' Just tests inputs/outputs (not validity of result). ''' X = datasets.load_diabetes().data sp = StatisticalPower(**params_in) sp.fit(X) num_k = 5 assert np.sum(sp.results_.flat) > 0 assert sp.results_.shape == (num_k, sp.n_grid_points) assert len(sp.ks_) == num_k assert len(sp.grid_) == sp.n_grid_points
def compute_connectivity_subject(conn, masker, func, confound=None): """ Returns connectivity of one fMRI for a given atlas """ ts = do_mask_img(masker, func, confound) if conn == 'gl': fc = GraphLassoCV(max_iter=1000) elif conn == 'lw': fc = LedoitWolf() elif conn == 'oas': fc = OAS() elif conn == 'scov': fc = ShrunkCovariance() fc = Bunch(covariance_=0, precision_=0) if conn == 'corr' or conn == 'pcorr': fc = Bunch(covariance_=0, precision_=0) fc.covariance_ = np.corrcoef(ts) fc.precision_ = partial_corr(ts) else: fc.fit(ts) ind = np.tril_indices(ts.shape[1], k=-1) return fc.covariance_[ind], fc.precision_[ind]
def calculate_connectivity_matrix(in_data, extraction_method): ''' after extract_parcellation_time_series() connectivity matrices are calculated via specified extraction method returns np.array with matrixand saves this array also to matrix_file ''' # fixme implement sparse inv covar import os, pickle import numpy as np if extraction_method == 'correlation': correlation_matrix = np.corrcoef(in_data.T) matrix = {'correlation': correlation_matrix} elif extraction_method == 'sparse_inverse_covariance': # Compute the sparse inverse covariance from sklearn.covariance import GraphLassoCV estimator = GraphLassoCV() estimator.fit(in_data) matrix = { 'covariance': estimator.covariance_, 'sparse_inverse_covariance': estimator.precision_ } else: raise (Exception('Unknown extraction method: %s' % extraction_method)) matrix_file = os.path.join(os.getcwd(), 'matrix.pkl') with open(matrix_file, 'w') as f: pickle.dump(matrix, f) return matrix, matrix_file
def get_conn_matrix(time_series, conn_model, NETWORK, ID, dir_path, thr): if conn_model == 'corr': conn_measure = ConnectivityMeasure(kind='correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] est_path = dir_path + '/' + ID + '_est_corr' + '_' + str(thr) + '.txt' elif conn_model == 'corr_fast': try: conn_matrix = compute_correlation(time_series,time_series) est_path = dir_path + '/' + ID + '_est_corr_fast' + '_' + str(thr) + '.txt' except RuntimeError: print('Cannot run accelerated correlation computation due to a missing dependency. You need brainiak installed!') elif conn_model == 'partcorr': conn_measure = ConnectivityMeasure(kind='partial correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] est_path = dir_path + '/' + ID + '_est_part_corr' + '_' + str(thr) + '.txt' elif conn_model == 'cov' or conn_model == 'sps': ##Fit estimator to matrix to get sparse matrix estimator = GraphLassoCV() try: print("Fitting Lasso estimator...") est = estimator.fit(time_series) except RuntimeError: print('Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...') #from sklearn.covariance import GraphLasso, empirical_covariance, shrunk_covariance #emp_cov = empirical_covariance(time_series) #for i in np.arange(0.8, 0.99, 0.01): #shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) #alphaRange = 10.0 ** np.arange(-8,0) #for alpha in alphaRange: #try: #estimator_shrunk = GraphLasso(alpha) #est=estimator_shrunk.fit(shrunk_cov) #print("Calculated graph-lasso covariance matrix for alpha=%s"%alpha) #break #except FloatingPointError: #print("Failed at alpha=%s"%alpha) #if estimator_shrunk == None: #pass #else: #break print('Unstable Lasso estimation. Try again!') sys.exit() if NETWORK != None: est_path = dir_path + '/' + ID + '_' + NETWORK + '_est%s'%('_sps_inv' if conn_model=='sps' else 'cov') + '_' + str(thr) + '.txt' else: est_path = dir_path + '/' + ID + '_est%s'%('_sps_inv' if conn_model=='sps' else 'cov') + '_' + str(thr) + '.txt' if conn_model == 'sps': try: conn_matrix = -estimator.precision_ except: conn_matrix = -estimator_shrunk.precision_ elif conn_model == 'cov': try: conn_matrix = estimator.covariance_ except: conn_matrix = estimator_shrunk.covariance_ np.savetxt(est_path, conn_matrix, delimiter='\t') return(conn_matrix, est_path)
def _cov_selection(self, alphas=4, n_refinements=4, cv=None): from sklearn.covariance import GraphLassoCV gl = GraphLassoCV(alphas=alphas, n_refinements=n_refinements, cv=cv, assume_centered=True) gl.fit(self.ret) return gl.covariance_, gl.precision_
def graph_lasso(X, num_folds): """Estimate inverse covariance via scikit-learn GraphLassoCV class. """ print("GraphLasso (sklearn)") model = GraphLassoCV(cv=num_folds) model.fit(X) print(" lam_: {}".format(model.alpha_)) return model.covariance_, model.precision_, model.alpha_
def graph_lasso(X, num_folds): '''Estimate inverse covariance via scikit-learn GraphLassoCV class. ''' print 'GraphLasso (sklearn)' model = GraphLassoCV(cv=num_folds) model.fit(X) print ' lam_: {}'.format(model.alpha_) return model.covariance_, model.precision_, model.alpha_
def __init__(self, n_components=2, n_iter=5, alpha=None): self.n_components = n_components self.n_iter = n_iter self.min_covar = 1e-3 if alpha == None: self.alpha = [10 for _ in range(self.n_components)] else: self.alpha = alpha self.model = [GraphLassoCV() for k in range(self.n_components)]
def _generate_structure_K(self, X): lasso = GraphLassoCV(alphas=20) lasso.fit(X.T) K_structure = lasso.get_precision() if (hasattr(lasso, 'alpha_')): print('alpha=', lasso.alpha_) return K_structure
def test_graph_lasso_cv(random_state=1): # Sample data from a sparse multivariate normal dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) # Capture stdout, to smoke test the verbose mode orig_stdout = sys.stdout try: sys.stdout = StringIO() # We need verbose very high so that Parallel prints on stdout GraphLassoCV(verbose=100, alphas=5, tol=1e-1).fit(X) finally: sys.stdout = orig_stdout # Smoke test with specified alphas GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1).fit(X)
def __init__(self, original_matrix): ''' :param dmatrix: X is an instances list(matrix) ''' # X = [x(1), x(2), ..., x(len)], with dim number of features self._X = np.matrix(original_matrix) self._len, self._dim = self._X.shape glasso_model = GraphLassoCV() glasso_model.fit(self._X) self._glasso_covariance = glasso_model.covariance_ self._glasso_precision = glasso_model.precision_
def cal_connectome(fmri_ff, confound_ff, atlas_ff, outputjpg_ff, metric='correlation', labelrange=None, label_or_map=0): if label_or_map == 0: # “correlation”, “partial correlation”, “tangent”, “covariance”, “precision” masker = NiftiLabelsMasker(labels_img=atlas_ff, standardize=True, verbose=0) else: masker = NiftiMapsMasker(maps_img=atlas_ff, standardize=True, verbose=0) time_series_0 = masker.fit_transform(fmri_ff, confounds=confound_ff) if labelrange is None: labelrange = np.arange(time_series_0.shape[1]) time_series = time_series_0[:, labelrange] if metric == 'sparse inverse covariance': try: estimator = GraphLassoCV() estimator.fit(time_series) correlation_matrix = -estimator.precision_ except: correlation_matrix = np.zeros( (time_series.shape[1], time_series.shape[1])) else: correlation_measure = ConnectivityMeasure(kind=metric) correlation_matrix = correlation_measure.fit_transform([time_series ])[0] # Plot the correlation matrix fig = plt.figure(figsize=(6, 5), dpi=100) plt.clf() # Mask the main diagonal for visualization: np.fill_diagonal(correlation_matrix, 0) plt.imshow(correlation_matrix, interpolation="nearest", cmap="RdBu_r", vmax=0.8, vmin=-0.8) plt.gca().yaxis.tick_right() plt.axis('off') plt.colorbar() plt.title(metric.title(), fontsize=12) plt.tight_layout() fig.savefig(outputjpg_ff, bbox_inches='tight') plt.close() return correlation_matrix
def test_graph_lasso_cv(random_state=1): # Sample data from a sparse multivariate normal dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) # Capture stdout, to smoke test the verbose mode orig_stdout = sys.stdout try: sys.stdout = StringIO() GraphLassoCV(verbose=10, alphas=3).fit(X) finally: sys.stdout = orig_stdout
def _generate_structure_K(self, X): # lasso = GraphLasso(alpha=0.012) lasso = GraphLassoCV(alphas=20) lasso.fit(X.T) K_structure = lasso.get_precision() if (hasattr(lasso, 'alpha_')): print('alpha=', lasso.alpha_) M = (np.abs(K_structure) > 1e-10) if (M == np.eye(M.shape[0], dtype=bool)).all(): print('Got identity structure') # K_structure = np.ones(K_lasso.shape) return K_structure
def test_deprecated_grid_scores(random_state=1): dim = 5 n_samples = 6 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.96, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) graph_lasso = GraphLassoCV(alphas=[0.8, 0.5], tol=1e-1, n_jobs=1) graph_lasso.fit(X) depr_message = ("Attribute grid_scores was deprecated in version " "0.19 and will be removed in 0.21. Use " "``grid_scores_`` instead") assert_warns_message(DeprecationWarning, depr_message, lambda: graph_lasso.grid_scores) assert_equal(graph_lasso.grid_scores, graph_lasso.grid_scores_)
def test_estimate_covariance(self): configuration = { 'feature_config_list': [ { 'name': 'close', 'normalization': 'standard', 'transformation': {'name': 'log-return'}, 'is_target': True, 'local': False, }, ], 'fill_limit': 0, 'exchange_name': 'NYSE', 'features_ndays': 9, 'features_resample_minutes': 15, 'features_start_market_minute': 60, 'prediction_frequency_ndays': 1, 'prediction_market_minute': 60, 'target_delta_ndays': 1, 'target_market_minute': 60, 'n_classification_bins': 12, 'nassets': 3, 'classify_per_series': False, 'normalise_per_series': False } data_transformation = FinancialDataTransformation(configuration) universe, data = self._prepare_data_for_test() estimation_method = "Ledoit" exchange_calendar = data_transformation.exchange_calendar ndays = data_transformation.features_ndays # FIXME this is the only value that works now. forecast_interval = data_transformation.target_delta_ndays target_market_minute = data_transformation.target_market_minute covariance_matrix = estimate_covariance(data, ndays, target_market_minute, estimation_method, exchange_calendar, forecast_interval) ret_data = returns_minutes_after_market_open_data_frame(data['close'], exchange_calendar, target_market_minute) print(ret_data.shape) nd = ret_data.shape[1] sampling_days = nd * DEFAULT_NUM_REALISATIONS_MULTIPLICATION_FACTOR data_points = ret_data.values[-sampling_days:, :] glass_model = GraphLassoCV() glass_model.fit(data_points) cov_mat = glass_model.covariance_ self.assertTrue(np.allclose(covariance_matrix.diagonal(), cov_mat.diagonal()))
def group_connectivity(timeseries, subject_list, atlas_name, kind, save=True, save_path=root_folder): """ timeseries : list of timeseries tables for subjects (timepoints x regions) subject_list : the subject short IDs list atlas_name : name of the atlas used kind : the kind of connectivity to be used, e.g. lasso, partial correlation, correlation save : save the connectivity matrix to a file save_path : specify path to save the matrix if different from subject folder returns: connectivity : connectivity matrix (regions x regions) """ if kind == 'lasso': # Graph Lasso estimator covariance_estimator = GraphLassoCV(verbose=1) connectivity_matrices = [] for i, ts in enumerate(timeseries): covariance_estimator.fit(ts) connectivity = covariance_estimator.covariance_ connectivity_matrices.append(connectivity) print('Covariance matrix has shape {0}.'.format( connectivity.shape)) elif kind in ['tangent', 'partial correlation', 'correlation']: conn_measure = connectome.ConnectivityMeasure(kind=kind) connectivity_matrices = conn_measure.fit_transform(timeseries) if save: for i, subject in enumerate(subject_list): subject_file = os.path.join( save_path, subject_list[i], subject_list[i] + '_' + atlas_name + '_' + kind.replace(' ', '_') + '.mat') sio.savemat(subject_file, {'connectivity': connectivity_matrices[i]}) print("Saving connectivity matrix to %s" % subject_file) return connectivity_matrices
def main(): sample, genes, raw_expression, cov = load_data() expression = raw_expression[raw_expression.min(1) > 100] expression_indices = numpy.nonzero(raw_expression.sum(1) > 6)[0].tolist() ## reorder and filter data #rep1_cols = numpy.array((3,0,5)) # 8 is co culture #rep2_cols = numpy.array((4,2,7)) # 9 is MRC5 expression = expression[:, (3, 4, 0, 2, 5, 7)] # log data expression = numpy.log10(expression + 1)[1:100, ] cov = expression.dot(expression.T) print cov.shape #mo = GraphLasso(alpha=95, mode='lars', verbose=True) #, cv=KFold(3,2), n_jobs=24) mo = GraphLassoCV(mode='lars', verbose=True, cv=KFold(3, 2), n_jobs=24) sparse_cov = mo.fit(cov) print(numpy.nonzero(sparse_cov)[0].sum()) return
def get_BP4D_prescion_matrix(label_file_dir): adaptive_AU_database("BP4D") alpha = 0.2 model = GraphLassoCV(alphas=100, cv=10, max_iter=10, tol=1e-5, verbose=True, mode="lars", assume_centered=False, n_jobs=100) X = [] for file_name in os.listdir(label_file_dir): # each file is a video AU_column_idx = {} with open(label_file_dir + "/" + file_name, "r") as au_file_obj: # each file is a video for idx, line in enumerate(au_file_obj): if idx == 0: # header specify Action Unit for col_idx, AU in enumerate(line.split(",")[1:]): AU_column_idx[AU] = col_idx + 1 # read header continue # read head over , continue lines = line.split(",") frame = lines[0] au_labels = [AU for AU in config.AU_ROI.keys() \ if int(lines[AU_column_idx[AU]]) == 1] AU_bin = np.zeros(len(config.AU_SQUEEZE)) for AU in au_labels: bin_idx = config.AU_SQUEEZE.inv[AU] np.put(AU_bin, bin_idx, 1) X.append(AU_bin) X = np.array(X) print(X.shape) # X = np.transpose(X) model.fit(X) cov_ = model.covariance_ prec_ = model.precision_ return {"prec": prec_, "cov": cov_}
def _parallelize_4D_func_loading(f, atlas, method): func = nib.load(f) roi_masker = NiftiLabelsMasker(labels_img=atlas, standardize=True, resampling_target=None) time_series = roi_masker.fit_transform(func) if method == 'corr': conn = np.corrcoef(time_series.T) elif method == 'invcorr': graphlasso = GraphLassoCV() graphlasso.fit(time_series) conn = graphlasso.precision_ else: raise ValueError('Specify either corr or invcorr') conn = conn[np.tril_indices(conn.shape[0], k=-1)].ravel() return conn[np.newaxis, :]
class TestAverageError(object): @pytest.mark.parametrize("params_in", [ ({ 'model_selection_estimator': QuicGraphLassoCV(), 'n_trials': 20, 'n_features': 25, }), ({ 'model_selection_estimator': QuicGraphLassoEBIC(), 'n_trials': 20, 'n_features': 10, 'n_jobs': 2, }), ({ 'model_selection_estimator': GraphLassoCV(), 'n_trials': 20, 'n_features': 20, 'penalty_': 'alpha_', }), ]) def test_integration_statistical_power(self, params_in): ''' Just tests inputs/outputs (not validity of result). ''' X = datasets.load_diabetes().data ae = AverageError(**params_in) ae.fit(X) num_k = 3 assert np.sum(ae.error_fro_.flat) > 0 assert ae.error_fro_.shape == (num_k, ae.n_grid_points) assert np.sum(ae.error_supp_.flat) > 0 assert ae.error_supp_.shape == (num_k, ae.n_grid_points) assert np.sum(ae.error_fp_.flat) > 0 assert ae.error_fp_.shape == (num_k, ae.n_grid_points) assert np.sum(ae.error_fn_.flat) > 0 assert ae.error_fn_.shape == (num_k, ae.n_grid_points) assert len(ae.ks_) == num_k assert len(ae.grid_) == ae.n_grid_points
def compute_connectivity_voxel(roi, voxel, conn): """ Returns connectivity of one voxel for a given roi """ if conn == 'gl': fc = GraphLassoCV(max_iter=1000) elif conn == 'lw': fc = LedoitWolf() elif conn == 'oas': fc = OAS() elif conn == 'scov': fc = ShrunkCovariance() ts = np.array([roi, voxel]).T if conn == 'corr' or conn == 'pcorr': cov = np.corrcoef(ts)[0, 1] else: fc.fit(ts) cov = fc.covariance_[0, 0] return cov
def get_DISFA_prescion_matrix(label_file_dir): adaptive_AU_database("DISFA") alpha = 0.2 model = GraphLassoCV(alphas=100, cv=10, max_iter=100, tol=1e-5, verbose=True, mode="lars", assume_centered=False, n_jobs=100) X = [] for file_name in os.listdir(label_file_dir): subject_filename = label_file_dir + os.sep + file_name frame_label = defaultdict(dict) for au_file in os.listdir(subject_filename): abs_filename = subject_filename + "/" + au_file AU = au_file[au_file.rindex("_") + 3:au_file.rindex(".")] with open(abs_filename, "r") as file_obj: for line in file_obj: frame, AU_label = line.strip().split(",") # AU_label = int(AU_label) AU_label = 0 if int( AU_label) < 3 else 1 # 居然<3的不要,但是也取得了出色的效果 frame_label[int(frame)][AU] = int(AU_label) for frame, AU_dict in frame_label.items(): AU_bin = np.zeros(len(config.AU_SQUEEZE)) for AU, AU_label in AU_dict.items(): bin_idx = config.AU_SQUEEZE.inv[AU] np.put(AU_bin, bin_idx, AU_label) X.append(AU_bin) X = np.array(X) print(X.shape) model.fit(X) cov_ = model.covariance_ prec_ = model.precision_ return {"prec": prec_, "cov": cov_}
def set_optimal_shrinkage_amount(self, X, verbose=False): """ Parameters ---------- X: array-like, shape = [n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. Returns ------- optimal_shrinkage: The optimal amount of shrinkage, chosen with a 10-fold cross-validation. (or a Leave-One Out cross-validation if n_samples < 10). """ n_samples, n_features = X.shape std_shrinkage = np.trace(empirical_covariance(X)) / \ float(n_samples * n_features) # use L2 here? (was done during research work, changed for consistency) rmcd = RMCDl1(shrinkage=std_shrinkage).fit(X) cov = GraphLassoCV().fit(X[rmcd.raw_support_]) self.shrinkage = cov.alpha_ return cov.cv_alphas_, cov.cv_scores
# -*- coding: utf-8 -*- """ Created on Mon Sep 12 10:16:16 2016 @author: jonyoung """ import connectivity_utils as utils import numpy as np import scipy.linalg as la from sklearn.covariance import GraphLassoCV, ledoit_wolf, GraphLasso from sklearn.preprocessing import scale connectivity_data = utils.load_hcp_matrix( '/home/jonyoung/IoP_data/Data/HCP_PTN820/node_timeseries/3T_HCP820_MSMAll_d15_ts2/715950.txt' ) print connectivity_data print np.shape(connectivity_data) print np.std(connectivity_data, axis=1) connectivity_data = connectivity_data[:, :250] X = scale(connectivity_data, axis=1) model = GraphLassoCV(max_iter=1500, assume_centered=True) model.fit(np.transpose(X))