def cross_val_score_GLasso(data, fold=5, alpha=0.01): n = data.shape[0] m = int(n / fold) score = {} score['log_lik'] = 0 score['AIC'] = 0 score['non_zero'] = 0 for i in range(1, fold + 1): test_index = np.arange((i - 1) * m, i * m) #print(test_index) train_index = np.delete(np.arange(0, n), test_index) test_data = data[test_index, :] train_data = data[train_index, :] cov = sample_cov(test_data) model = GraphicalLasso(alpha=alpha) model.fit(train_data) prec = model.precision_ score['log_lik'] += log_likelihood(cov, prec) / fold score['AIC'] += AIC(cov, prec, n - m) / fold score['non_zero'] += L0_penal(prec) / fold return score
def glasso_results(data_grid, K, K_obs, ells, alpha): gl = GLsk(alpha=alpha, mode='cd', assume_centered=False, max_iter=500) tic = time.time() iters = [] precisions = [] for d in data_grid.transpose(2, 0, 1): gl.fit(d) iters.append(gl.n_iter_) precisions.append(gl.precision_) tac = time.time() iterations = np.max(iters) precisions = np.array(precisions) ss = utils.structure_error(K, precisions) #, thresholding=1, eps=1e-5) MSE_observed = None MSE_precision = utils.error_norm(K, precisions, upper_triangular=True) MSE_latent = None mean_rank_error = None res = dict(n_dim_obs=K.shape[1], time=tac - tic, iterations=iterations, MSE_precision=MSE_precision, MSE_observed=MSE_observed, MSE_latent=MSE_latent, mean_rank_error=mean_rank_error, likelihood=likelihood_score(data_grid.transpose(2, 0, 1), precisions), note=None, estimator=gl) res = dict(res, **ss) return res
def test_gowl_vs_glasso_duality_gap_3(self): """ Duality Gap goes negative in this case. Should that happen? """ np.random.seed(680) p = 10 blocks = [ Block(dim=p, idx=0, block_min_size=2, block_max_size=6, block_value=0.9), Block(dim=p, idx=1, block_min_size=2, block_max_size=6, block_value=-0.9), Block(dim=p, idx=3, block_min_size=2, block_max_size=6, block_value=-0.5), ] theta_star, blocks, theta_blocks = generate_theta_star_gowl(p=p, alpha=0.5, noise=0.1, blocks=blocks) lam1 = 0.001 # controls sparsity lam2 = 0.01 # encourages equality of coefficients rho = oscar_weights(lam1, lam2, (p ** 2 - p) / 2) theta_star = theta_star[0] sigma = np.linalg.inv(theta_star) n = 100 X = np.random.multivariate_normal(np.zeros(p), sigma, n) X = standardize(X) S = np.cov(X.T) theta_0 = np.linalg.inv(S) model = GOWLModel(X, S, theta_0, lam1, lam2, 'backtracking', max_iters=100000) model.fit() theta_gowl = model.theta_hat gl = GraphicalLasso(max_iter=200) gl.fit(S) theta_glasso = gl.get_precision() print('Non zero entries in precision matrix {}'.format(np.count_nonzero(theta_gowl))) plot_multiple_theta_matrices_2d([theta_blocks, theta_star, theta_glasso, theta_gowl], [f"Blocks: {len(blocks)}", 'True Theta', 'GLASSO', 'GOWL']) _fit_evaluations(theta_star, theta_glasso, 3, 'GLASSO') _fit_evaluations(theta_star, theta_gowl, 3, 'GOWL') y_hat_gowl = spectral_clustering(theta=theta_gowl, K=4) y_hat_glasso = spectral_clustering(theta=theta_glasso, K=4) y_true = spectral_clustering(theta=theta_blocks, K=4).flatten() _cluster_evaluations(y_true, y_hat_gowl, 'GOWL') _cluster_evaluations(y_true, y_hat_glasso, 'GLASSO')
def get_optimal_cov_estimator(time_series): from sklearn.covariance import GraphicalLassoCV estimator = GraphicalLassoCV(cv=5, assume_centered=True) print("\nSearching for best Lasso estimator...\n") try: estimator.fit(time_series) return estimator except BaseException: ix = 0 print("\nModel did not converge on first attempt. " "Varying tolerance...\n") while not hasattr(estimator, 'covariance_') and \ not hasattr(estimator, 'precision_') and ix < 3: for tol in [0.1, 0.01, 0.001, 0.0001]: print(f"Tolerance={tol}") estimator = GraphicalLassoCV(cv=5, max_iter=200, tol=tol, assume_centered=True) try: estimator.fit(time_series) return estimator except BaseException: ix += 1 continue if not hasattr(estimator, 'covariance_') and not hasattr( estimator, 'precision_'): print("Unstable Lasso estimation. Applying shrinkage to empirical " "covariance...") from sklearn.covariance import ( GraphicalLasso, empirical_covariance, shrunk_covariance, ) try: emp_cov = empirical_covariance(time_series, assume_centered=True) for i in np.arange(0.8, 0.99, 0.01): print(f"Shrinkage={i}:") shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) alphaRange = 10.0**np.arange(-8, 0) for alpha in alphaRange: print(f"Auto-tuning alpha={alpha}...") estimator_shrunk = GraphicalLasso(alpha, assume_centered=True) try: estimator_shrunk.fit(shrunk_cov) return estimator_shrunk except BaseException: continue except BaseException: return None else: return estimator
def predict(self, data: pd.DataFrame, alpha: float = 0.01, max_iter: int = 2000, **kwargs) -> nx.Graph: """Predict the graph structure """ edge_model = GraphicalLasso(alpha=alpha, max_iter=max_iter) edge_model.fit(data.values) return nx.relabel_nodes(nx.DiGraph(edge_model.get_precision()), {idx: i for idx, i in enumerate(data.columns)})
def test_SGL_scikit(): """ test single Graphical Lasso solver vs. scikit-learn """ p = 10 N = 100 Sigma, Theta = generate_precision_matrix(p=p, M=2, style='erdos', gamma=2.8, prob=0.1, scale=False, nxseed=None) S, samples = sample_covariance_matrix( Sigma, N) # sample from multivar_norm(Sigma) lambda1 = 0.01 singleGL = GraphicalLasso(alpha=lambda1, tol=1e-6, max_iter=500, verbose=False) model = singleGL.fit(samples.T) # transpose because of sklearn format sol_scikit = model.precision_ Omega_0 = np.eye(p) sol, info = ADMM_SGL(S, lambda1, Omega_0, tol=1e-7, rtol=1e-5, verbose=True, latent=False) # run into max_iter sol2, info2 = ADMM_SGL(S, lambda1, Omega_0, stopping_criterion='kkt', tol=1e-20, max_iter=200, verbose=True, latent=False) assert_array_almost_equal(sol_scikit, sol['Theta'], 3) assert_array_almost_equal(sol_scikit, sol2['Theta'], 3) return
def _fit(self, X): self.estimator_ = GraphicalLasso( alpha = self.alpha, assume_centered = self.assume_centered, enet_tol = self.enet_tol, max_iter = self.max_iter, mode = self.mode, tol = self.tol ).fit(X) _, self.labels_ = affinity_propagation( self.partial_corrcoef_, **self._apcluster_params ) return self
def wrapper(): seed = int(snakemake.wildcards["replicate"]) np.random.seed(seed) data = snakemake.input["data"] df = pd.read_csv(data) X = df.values cov = GraphicalLasso(alpha=float(snakemake.wildcards["alpha"]), mode=snakemake.wildcards["mode"], tol=float(snakemake.wildcards["tol"]), enet_tol=float(snakemake.wildcards["enet_tol"]), max_iter=int(snakemake.wildcards["max_iter"]), verbose=bool(snakemake.wildcards["verbose"]), assume_centered=bool( snakemake.wildcards["assume_centered"])).fit(X) #adjmat = np.around(np.abs(cov.precision_), decimals=3) adjmat = ((np.around(np.abs(cov.precision_), decimals=3) > float( snakemake.wildcards["precmat_threshold"])) * 1 - np.identity(X.shape[1])).astype(int) tottime = time.perf_counter() - start time_filename = snakemake.output["time"] np.savetxt(time_filename, [tottime]) dfadj = pd.DataFrame(adjmat) dfadj.columns = df.columns dfadj.to_csv(filename, index=False)
def main(): mean = torch.tensor(np.ones(16), dtype=torch.float32) diag = torch.tensor(np.ones(16), dtype=torch.float32) population = Gaussian_Distribution(mean=mean, diag=diag, sub=0.3, type='chain', slash=1) truth = population.invcov.numpy() n = 1000 d = population.dim print(truth) dist, sample, _, S = population.generate(n, numpy_like=True) #print(S) #print(np.array(sample)) print(sample_mean(np.array(sample))) print(sample_cov(np.array(sample))) R = np.linalg.inv(S) #print(R) #print(sample) np.random.seed(0) model = GraphicalLassoCV() model.fit(np.array(sample)) cov_ = model.covariance_ prec_ = model.precision_ heatmap(prec_) plt.figure(figsize=(4, 3)) plt.axes([.2, .15, .75, .7]) plt.plot(model.cv_alphas_, np.mean(model.grid_scores_, axis=1), 'o-') plt.axvline(model.alpha_, color='.5') plt.title('Model selection') plt.ylabel('Cross-validation score') plt.xlabel('alpha') plt.show() print(model.cv_alphas_, model.grid_scores_) model = GraphicalLasso() model.fit(sample) heatmap(model.precision_, 0.055) score = dict() score['log_lik'] = [] score['AIC'] = [] alpha_list = np.hstack((np.arange(0, 0.1, 0.001), np.arange(0.11, 0.3, 0.01))) data = np.array(sample) for alpha in alpha_list: out_dict = cross_val_score_GLasso(data, alpha=alpha) score['log_lik'].append(out_dict['log_lik']) score['AIC'].append(out_dict['AIC']) plt.plot(alpha_list, score['log_lik'], 'o-') plt.show() plt.plot(alpha_list, score['AIC']) plt.show()
def fit(self): if not self.is_fitted: all_x = [ elem.reshape(-1) for a_list in self.data.values() for elem in a_list ] e = None for alpha in np.logspace(-1, 5, 10): try: self.estimator = GraphicalLasso(assume_centered=False, alpha=alpha) self.estimator.fit(all_x) self.is_fitted = True return except Exception as e: logger.error(f"Graphical lasso failed with alpha={alpha}") raise e
def predict(self, data, alpha=0.01, max_iter=2000, **kwargs): """ Predict the graph skeleton. Args: data (pandas.DataFrame): observational data alpha (float): regularization parameter max_iter (int): maximum number of iterations Returns: networkx.Graph: Graph skeleton """ edge_model = GraphicalLasso(alpha=alpha, max_iter=max_iter) edge_model.fit(data.values) return nx.relabel_nodes(nx.DiGraph(edge_model.get_precision()), {idx: i for idx, i in enumerate(data.columns)})
def test_graphical_lasso(random_state=0): # Sample area_data from a sparse multivariate normal dim = 20 n_samples = 100 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) emp_cov = empirical_covariance(X) for alpha in (0., .1, .25): covs = dict() icovs = dict() for method in ('cd', 'lars'): cov_, icov_, costs = graphical_lasso(emp_cov, return_costs=True, alpha=alpha, mode=method) covs[method] = cov_ icovs[method] = icov_ costs, dual_gap = np.array(costs).T # Check that the costs always decrease (doesn't hold if alpha == 0) if not alpha == 0: assert_array_less(np.diff(costs), 0) # Check that the 2 approaches give similar results assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4) assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4) # Smoke test the estimator model = GraphicalLasso(alpha=.25).fit(X) model.score(X) assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4) assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4) # For a centered matrix, assume_centered could be chosen True or False # Check that this returns indeed the same result for centered area_data Z = X - X.mean(0) precs = list() for assume_centered in (False, True): prec_ = GraphicalLasso( assume_centered=assume_centered).fit(Z).precision_ precs.append(prec_) assert_array_almost_equal(precs[0], precs[1])
def test_graphical_lasso(random_state=0): # Sample data from a sparse multivariate normal dim = 20 n_samples = 100 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) emp_cov = empirical_covariance(X) for alpha in (0., .1, .25): covs = dict() icovs = dict() for method in ('cd', 'lars'): cov_, icov_, costs = graphical_lasso(emp_cov, return_costs=True, alpha=alpha, mode=method) covs[method] = cov_ icovs[method] = icov_ costs, dual_gap = np.array(costs).T # Check that the costs always decrease (doesn't hold if alpha == 0) if not alpha == 0: assert_array_less(np.diff(costs), 0) # Check that the 2 approaches give similar results assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4) assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4) # Smoke test the estimator model = GraphicalLasso(alpha=.25).fit(X) model.score(X) assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4) assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4) # For a centered matrix, assume_centered could be chosen True or False # Check that this returns indeed the same result for centered data Z = X - X.mean(0) precs = list() for assume_centered in (False, True): prec_ = GraphicalLasso( assume_centered=assume_centered).fit(Z).precision_ precs.append(prec_) assert_array_almost_equal(precs[0], precs[1])
def get_mean_cov(x,y): max_label = y.astype(int).max() ps = [] ms = [] for i in range(max_label + 1): model = GraphicalLasso() label_i = (y==i).astype(bool) x2 = x[label_i] model.fit(x2) ps.append(model.precision_) ms.append(model.location_) ms = np.stack(ms) ps = np.stack(ps) return ms,ps
def glasso(subsamples, alpha, precision_tol=1e-4, glasso_params={}): """Run the graphical lasso from scikit learn over the given subsamples, at the given regularization level. Parameters: - subsamples (np.array): the subsample array - alpha (float): the regularization parameter at which to run the estimator, taken as 1/lambda, i.e, lower values mean sparser Returns: - estimates (np.array): The adjacency matrices of the graphs estimated for each subsample """ (N, _, p) = subsamples.shape precisions = np.zeros((len(subsamples), p, p)) g = GraphicalLasso(alpha=1 / alpha, **glasso_params) for j, sample in enumerate(subsamples): precision = g.fit(sample).precision_ precisions[j, :, :] = precision - np.diag(np.diag(precision)) estimates = (abs(precisions) > precision_tol).astype(int) return estimates
def get_cov_estimator(cov_type): if cov_type == 'LW': model = LedoitWolf() elif cov_type == 'OAS': model = OAS() elif cov_type == 'MCD': model = MinCovDet() elif cov_type[:2] == 'SC': shrinkage = float(cov_type.split('_')[1]) model = ShrunkCovariance(shrinkage=shrinkage) elif cov_type[:2] == 'GL': alpha = float(cov_type.split('_')[1]) model = GraphicalLasso(alpha=alpha) return model
def get_mean_cov(x, y): #print(x.shape) ms_list = [] ps_list = [] # Label equals to One ones = (y == 1).astype(bool) model = GraphicalLasso() x2 = x[ones] kmeans = GaussianMixture(n_components=3, init_params='random', covariance_type='full') new_label = kmeans.fit_predict(x2) for elem in range(3): index = np.where(new_label == elem) tmp_df = x2[index] #print(tmp_df.shape) model.fit(tmp_df) p1 = model.precision_ m1 = model.location_ ms_list.append(m1) ps_list.append(p1) # Label equals to Zero onesb = (y == 0).astype(bool) x2b = x[onesb] kmeans = GaussianMixture(n_components=3, init_params='random', covariance_type='full') new_label = kmeans.fit_predict(x2b) model = GraphicalLasso() for elem in range(3): index = np.where(new_label == elem) tmp_df = x2b[index] model.fit(tmp_df) p1 = model.precision_ m1 = model.location_ ms_list.append(m1) ps_list.append(p1) ms = np.stack(ms_list) ps = np.stack(ps_list) return ms, ps
def get_mean_cov3(x, y): #print(x.shape) ms_list = [] ps_list = [] # Label equals to One ones = (y == 1).astype(bool) model = GraphicalLasso() x2 = x[ones] kmeans = KMeans(n_clusters=3, random_state=0, algorithm='elkan').fit(x2) new_label = kmeans.labels_ for elem in range(3): index = np.where(new_label == elem) tmp_df = x2[index] #print(tmp_df.shape) model.fit(tmp_df) p1 = model.precision_ m1 = model.location_ ms_list.append(m1) ps_list.append(p1) # Label equals to Zero onesb = (y == 0).astype(bool) x2b = x[onesb] kmeans = KMeans(n_clusters=3, random_state=0, algorithm='elkan').fit(x2b) new_label = kmeans.labels_ model = GraphicalLasso() for elem in range(3): index = np.where(new_label == elem) tmp_df = x2b[index] model.fit(tmp_df) p1 = model.precision_ m1 = model.location_ ms_list.append(m1) ps_list.append(p1) ms = np.stack(ms_list) ps = np.stack(ps_list) return ms, ps
def get_mean_cov(x, y): model = GraphicalLasso() ones = (y == 1).astype(bool) x2 = x[ones] model.fit(x2) p1 = model.precision_ m1 = model.location_ onesb = (y == 0).astype(bool) x2b = x[onesb] model.fit(x2b) p2 = model.precision_ m2 = model.location_ ms = np.stack([m1, m2]) ps = np.stack([p1, p2]) return ms, ps
def compute_covariance(dataset): """ Estimate covariance and precision matrices from data X. Depending on samples number, use either EmpiricalCovariance or GraphicalLasso methods from scikit-learn. Input: dataset: ndarray Dataset Outputs: covariance: ndarray Estimated covariance matrix precision: ndarray Estimated precision matrix (i.e. pseudo-inverse of covariance) """ # Turn matching warnings into exceptions warnings.filterwarnings("error") if nb_samples_is_sufficient(dataset): cov = EmpiricalCovariance().fit(dataset) covariance = cov.covariance_ precision = cov.precision_ return covariance, precision else: try: cov = GraphicalLasso(mode='cd').fit(dataset) covariance = cov.covariance_ precision = cov.precision_ return covariance, precision except Exception as e: lasso_error = str(e) raise ValueError(lasso_error + '\nNumber of reference trajectories not ' 'sufficiently large to estimate covariance ' 'and precision matrices.')
def helper_graphical_lasso(X, theta_true, tf_names=[]): # Estimate the covariance if args.mode == 'cv': model = GraphicalLassoCV() else: model = GraphicalLasso(alpha=args.alpha_l1, mode=args.mode, tol=1e-7, enet_tol=1e-6, max_iter=100, verbose=False, assume_centered=False) model.fit(X) # cov_ = model.covariance_ prec_ = model.precision_ if args.USE_TF_NAMES == 'yes' and len(tf_names) != 0: prec_ = postprocess_tf(prec_, tf_names) recovery_metrics = report_metrics(np.array(theta_true), prec_) print( 'GLASSO: FDR, TPR, FPR, SHD, nnz_true, nnz_pred, precision, recall, Fb, aupr, auc' ) print('GLASSO: TEST: Recovery of true theta: ', *np.around(recovery_metrics, 3)) return list(recovery_metrics)
def get_conn_matrix(time_series, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree, disp_filt, parc, prune, atlas, uatlas, labels, coords, norm, binary, hpass, extract_strategy): """ Computes a functional connectivity matrix based on a node-extracted time-series array. Includes a library of routines across Nilearn, scikit-learn, and skggm packages, among others. Parameters ---------- time_series : array 2D m x n array consisting of the time-series signal for each ROI node where m = number of scans and n = number of ROI's. conn_model : str Connectivity estimation model (e.g. corr for correlation, cov for covariance, sps for precision covariance, partcorr for partial correlation). sps type is used by default. dir_path : str Path to directory containing subject derivative data for given run. node_size : int Spherical centroid node size in the case that coordinate-based centroids are used as ROI's. smooth : int Smoothing width (mm fwhm) to apply to time-series when extracting signal from ROI's. dens_thresh : bool Indicates whether a target graph density is to be used as the basis for thresholding. network : str Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of brain subgraphs. ID : str A subject id or other unique identifier. roi : str File path to binarized/boolean region-of-interest Nifti1Image file. min_span_tree : bool Indicates whether local thresholding from the Minimum Spanning Tree should be used. disp_filt : bool Indicates whether local thresholding using a disparity filter and 'backbone network' should be used. parc : bool Indicates whether to use parcels instead of coordinates as ROI nodes. prune : bool Indicates whether to prune final graph of disconnected nodes/isolates. atlas : str Name of atlas parcellation used. uatlas : str File path to atlas parcellation Nifti1Image in MNI template space. labels : list List of string labels corresponding to ROI nodes. coords : list List of (x, y, z) tuples corresponding to a coordinate atlas used or which represent the center-of-mass of each parcellation node. norm : int Indicates method of normalizing resulting graph. binary : bool Indicates whether to binarize resulting graph edges to form an unweighted graph. hpass : bool High-pass filter values (Hz) to apply to node-extracted time-series. extract_strategy : str The name of a valid function used to reduce the time-series region extraction. Returns ------- conn_matrix : array Adjacency matrix stored as an m x n array of nodes and edges. conn_model : str Connectivity estimation model (e.g. corr for correlation, cov for covariance, sps for precision covariance, partcorr for partial correlation). sps type is used by default. dir_path : str Path to directory containing subject derivative data for given run. node_size : int Spherical centroid node size in the case that coordinate-based centroids are used as ROI's for tracking. smooth : int Smoothing width (mm fwhm) to apply to time-series when extracting signal from ROI's. dens_thresh : bool Indicates whether a target graph density is to be used as the basis for thresholding. network : str Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of brain subgraphs. ID : str A subject id or other unique identifier. roi : str File path to binarized/boolean region-of-interest Nifti1Image file. min_span_tree : bool Indicates whether local thresholding from the Minimum Spanning Tree should be used. disp_filt : bool Indicates whether local thresholding using a disparity filter and 'backbone network' should be used. parc : bool Indicates whether to use parcels instead of coordinates as ROI nodes. prune : bool Indicates whether to prune final graph of disconnected nodes/isolates. atlas : str Name of atlas parcellation used. uatlas : str File path to atlas parcellation Nifti1Image in MNI template space. labels : list List of string labels corresponding to graph nodes. coords : list List of (x, y, z) tuples corresponding to a coordinate atlas used or which represent the center-of-mass of each parcellation node. norm : int Indicates method of normalizing resulting graph. binary : bool Indicates whether to binarize resulting graph edges to form an unweighted graph. hpass : bool High-pass filter values (Hz) to apply to node-extracted time-series. extract_strategy : str The name of a valid function used to reduce the time-series region extraction. References ---------- .. [1] Varoquaux, G., & Craddock, R. C. (2013). Learning and comparing functional connectomes across subjects. NeuroImage. https://doi.org/10.1016/j.neuroimage.2013.04.007 .. [2] Jason Laska, Manjari Narayan, 2017. skggm 0.2.7: A scikit-learn compatible package for Gaussian and related Graphical Models. doi:10.5281/zenodo.830033 """ from nilearn.connectome import ConnectivityMeasure from sklearn.covariance import GraphicalLassoCV conn_matrix = None if conn_model == 'corr' or conn_model == 'cor' or conn_model == 'correlation': # credit: nilearn print('\nComputing correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'partcorr' or conn_model == 'parcorr' or conn_model == 'partialcorrelation': # credit: nilearn print('\nComputing partial correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='partial correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar' or conn_model == 'sps' or \ conn_model == 'sparse' or conn_model == 'precision': # Fit estimator to matrix to get sparse matrix estimator_shrunk = None estimator = GraphicalLassoCV(cv=5) try: print('\nComputing covariance...\n') estimator.fit(time_series) except: print('Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...') try: from sklearn.covariance import GraphicalLasso, empirical_covariance, shrunk_covariance emp_cov = empirical_covariance(time_series) for i in np.arange(0.8, 0.99, 0.01): shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) alphaRange = 10.0 ** np.arange(-8, 0) for alpha in alphaRange: try: estimator_shrunk = GraphicalLasso(alpha) estimator_shrunk.fit(shrunk_cov) print(f"Retrying covariance matrix estimate with alpha={alpha}") if estimator_shrunk is None: pass else: break except: print(f"Covariance estimation failed with shrinkage at alpha={alpha}") continue except ValueError: print('Unstable Lasso estimation! Shrinkage failed. A different connectivity model may be needed.') if estimator is None and estimator_shrunk is None: raise RuntimeError('\nERROR: Covariance estimation failed.') if conn_model == 'sps' or conn_model == 'sparse' or conn_model == 'precision': if estimator_shrunk is None: print('\nFetching precision matrix from covariance estimator...\n') conn_matrix = -estimator.precision_ else: print('\nFetching shrunk precision matrix from covariance estimator...\n') conn_matrix = -estimator_shrunk.precision_ elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar': if estimator_shrunk is None: print('\nFetching covariance matrix from covariance estimator...\n') conn_matrix = estimator.covariance_ else: conn_matrix = estimator_shrunk.covariance_ elif conn_model == 'QuicGraphicalLasso': try: from inverse_covariance import QuicGraphicalLasso except ImportError: print('Cannot run QuicGraphLasso. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLasso # credit: skggm model = QuicGraphicalLasso( init_method='cov', lam=0.5, mode='default', verbose=1) print('\nCalculating QuicGraphLasso precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphicalLassoCV': try: from inverse_covariance import QuicGraphicalLassoCV except ImportError: print('Cannot run QuicGraphLassoCV. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLassoCV # credit: skggm model = QuicGraphicalLassoCV( init_method='cov', verbose=1) print('\nCalculating QuicGraphLassoCV precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphicalLassoEBIC': try: from inverse_covariance import QuicGraphicalLassoEBIC except ImportError: print('Cannot run QuicGraphLassoEBIC. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLassoEBIC # credit: skggm model = QuicGraphicalLassoEBIC( init_method='cov', verbose=1) print('\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'AdaptiveQuicGraphicalLasso': try: from inverse_covariance import AdaptiveQuicGraphicalLasso, QuicGraphicalLassoEBIC except ImportError: print('Cannot run AdaptiveGraphLasso. Skggm not installed!') # Compute the sparse inverse covariance via # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary' # credit: skggm model = AdaptiveQuicGraphicalLasso( estimator=QuicGraphicalLassoEBIC( init_method='cov', ), method='binary', ) print('\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.estimator_.precision_ else: raise ValueError('\nERROR! No connectivity model specified at runtime. Select a valid estimator using the ' '-mod flag.') # Enforce symmetry conn_matrix = np.maximum(conn_matrix, conn_matrix.T) if conn_matrix.shape < (2, 2): raise RuntimeError('\nERROR! Matrix estimation selection yielded an empty or 1-dimensional graph. ' 'Check time-series for errors or try using a different atlas') coords = np.array(coords) labels = np.array(labels) del time_series return (conn_matrix, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree, disp_filt, parc, prune, atlas, uatlas, labels, coords, norm, binary, hpass, extract_strategy)
import pandas as pd import time seed = int(snakemake.wildcards["replicate"]) np.random.seed(seed) data = snakemake.input["data"] filename = snakemake.output["adjmat"] df = pd.read_csv(data) X = df.values start = time.perf_counter() cov = GraphicalLasso(alpha=float(snakemake.wildcards["alpha"]), mode=snakemake.wildcards["mode"], tol=float(snakemake.wildcards["tol"]), enet_tol=float(snakemake.wildcards["enet_tol"]), max_iter=int(snakemake.wildcards["max_iter"]), verbose=bool(snakemake.wildcards["verbose"]), assume_centered=bool( snakemake.wildcards["assume_centered"])).fit(X) #adjmat = np.around(np.abs(cov.precision_), decimals=3) adjmat = ((np.around(np.abs(cov.precision_), decimals=3) > float( snakemake.wildcards["precmat_threshold"])) * 1 - np.identity(X.shape[1])).astype(int) tottime = time.perf_counter() - start time_filename = snakemake.output["time"] np.savetxt(time_filename, [tottime]) dfadj = pd.DataFrame(adjmat) dfadj.columns = df.columns
Omega_0 = Omega_sol.copy() Theta_0 = Theta_sol.copy() AIC[g1, g2] = aic(S, Theta_sol, n.mean()) BIC[g1, g2] = ebic(S, Theta_sol, n.mean(), gamma=0.1) ix = np.unravel_index(np.nanargmin(BIC), BIC.shape) ix2 = np.unravel_index(np.nanargmin(AIC), AIC.shape) lambda1 = L1[ix] lambda2 = L2[ix] print("Optimal lambda values: (l1,l2) = ", (lambda1, lambda2)) #%% singleGL = GraphicalLasso(alpha=1.5 * lambda1, tol=1e-2, max_iter=4000, verbose=True) res = np.zeros((K, p, p)) for k in np.arange(K): #model = quic.fit(S[k,:,:], verbose = 1) model = singleGL.fit(samples[k, :, :]) res[k, :, :] = model.precision_ results['GLASSO'] = {'Theta': res} #%% start = time() sol, info = ADMM_MGL(S, lambda1, lambda2, reg, Omega_0, rho = 1, max_iter = 100, \ eps_admm = 1e-5, verbose = True, measure = True)
def block_glasso(data, eps=1e-8, COLLECT=True): criterion = nn.MSELoss() # input, target theta_true, X = data # ############################################################################# # Estimate the covariance print('Using the lars method') S = np.dot(X.T, X) / args.M # model = GraphicalLassoCV(cv=2, alphas=5, n_refinements=5, tol=1e-6, # max_iter=100, mode='lars', n_jobs=-1) model = GraphicalLasso(alpha=args.rho, mode='lars', tol=1e-7, enet_tol=1e-6, max_iter=args.MAX_EPOCH, verbose=True, assume_centered=True) # model = GraphLasso(alpha=args.rho, mode='lars', tol=1e-8, enet_tol=1e-6, # max_iter=100, verbose=False, assume_centered=False) # print('Using the cd method') # model = GraphicalLassoCV(cv=2, alphas=5, n_refinements=5, tol=1e-6, # max_iter=100, mode='cd', n_jobs=-1) model.fit(X) cov_ = model.covariance_ theta_pred = model.precision_ # ############################################################################# fdr, tpr, fpr, shd, nnz, nnz_true, ps = metrics.report_metrics( theta_true, theta_pred) cond_theta_pred, cond_theta_true = np.linalg.cond( theta_pred), np.linalg.cond(theta_true) num_itr = model.n_iter_ rho_obtained = args.rho # the L1 penalty parameter print('Accuracy metrics: fdr ', fdr, ' tpr ', tpr, ' fpr ', fpr, ' shd ', shd, ' nnz ', nnz, ' nnz_true ', nnz_true, ' sign_match ', ps, ' pred_cond ', cond_theta_pred, ' true_cond ', cond_theta_true, 'total itr: ', num_itr, ' penalty_rho: ', rho_obtained) # Getting the NMSE and objective value # results of convergence res_conv = [] theta_true = convert_to_torch(theta_true, TESTING_FLAG=True) theta_pred = convert_to_torch(theta_pred, TESTING_FLAG=True) S = convert_to_torch(S, TESTING_FLAG=True) obj_true = get_obj_val(theta_true, S) if COLLECT: theta_pred_diag = torch.diag_embed( torch.diagonal(theta_pred, offset=0, dim1=-2, dim2=-1)) theta_true_diag = torch.diag_embed( torch.diagonal(theta_true, offset=0, dim1=-2, dim2=-1)) cv_loss, cv_loss_off_diag, obj_pred = get_convergence_loss( theta_pred, theta_true), get_convergence_loss( theta_pred - theta_pred_diag, theta_true - theta_true_diag), get_obj_val(theta_pred, S) res_conv.append([cv_loss, obj_pred, obj_true, cv_loss_off_diag]) return [ fdr, tpr, fpr, shd, nnz, nnz_true, ps, cond_theta_pred, cond_theta_true, num_itr, rho_obtained ], res_conv # result of convergence
def get_conn_matrix(time_series, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree, disp_filt, parc, prune, atlas_select, uatlas_select, label_names, coords, c_boot, norm, binary): from nilearn.connectome import ConnectivityMeasure from sklearn.covariance import GraphicalLassoCV conn_matrix = None if conn_model == 'corr' or conn_model == 'cor' or conn_model == 'correlation': # credit: nilearn print('\nComputing correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'partcorr' or conn_model == 'parcorr' or conn_model == 'partialcorrelation': # credit: nilearn print('\nComputing partial correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='partial correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar' or conn_model == 'sps' or conn_model == 'sparse' or conn_model == 'precision': # Fit estimator to matrix to get sparse matrix estimator_shrunk = None estimator = GraphicalLassoCV(cv=5) try: print('\nComputing covariance...\n') estimator.fit(time_series) except: print( 'Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...' ) try: from sklearn.covariance import GraphicalLasso, empirical_covariance, shrunk_covariance emp_cov = empirical_covariance(time_series) for i in np.arange(0.8, 0.99, 0.01): shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) alphaRange = 10.0**np.arange(-8, 0) for alpha in alphaRange: try: estimator_shrunk = GraphicalLasso(alpha) estimator_shrunk.fit(shrunk_cov) print( "Retrying covariance matrix estimate with alpha=%s" % alpha) if estimator_shrunk is None: pass else: break except: print( "Covariance estimation failed with shrinkage at alpha=%s" % alpha) continue except ValueError: print( 'Unstable Lasso estimation! Shrinkage failed. A different connectivity model may be needed.' ) if estimator is None and estimator_shrunk is None: raise RuntimeError('\nERROR: Covariance estimation failed.') if conn_model == 'sps' or conn_model == 'sparse' or conn_model == 'precision': if estimator_shrunk is None: print( '\nFetching precision matrix from covariance estimator...\n' ) conn_matrix = -estimator.precision_ else: print( '\nFetching shrunk precision matrix from covariance estimator...\n' ) conn_matrix = -estimator_shrunk.precision_ elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar': if estimator_shrunk is None: print( '\nFetching covariance matrix from covariance estimator...\n' ) conn_matrix = estimator.covariance_ else: conn_matrix = estimator_shrunk.covariance_ elif conn_model == 'QuicGraphicalLasso': try: from inverse_covariance import QuicGraphicalLasso except ImportError: print('Cannot run QuicGraphLasso. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLasso # credit: skggm model = QuicGraphicalLasso(init_method='cov', lam=0.5, mode='default', verbose=1) print('\nCalculating QuicGraphLasso precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphLassoCV': try: from inverse_covariance import QuicGraphicalLassoCV except ImportError: print('Cannot run QuicGraphLassoCV. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLassoCV # credit: skggm model = QuicGraphicalLassoCV(init_method='cov', verbose=1) print( '\nCalculating QuicGraphLassoCV precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphicalLassoEBIC': try: from inverse_covariance import QuicGraphicalLassoEBIC except ImportError: print('Cannot run QuicGraphLassoEBIC. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLassoEBIC # credit: skggm model = QuicGraphicalLassoEBIC(init_method='cov', verbose=1) print( '\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n' ) model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'AdaptiveQuicGraphLasso': try: from inverse_covariance import AdaptiveQuicGraphicalLasso, QuicGraphicalLassoEBIC except ImportError: print('Cannot run AdaptiveGraphLasso. Skggm not installed!') # Compute the sparse inverse covariance via # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary' # credit: skggm model = AdaptiveQuicGraphicalLasso( estimator=QuicGraphicalLassoEBIC(init_method='cov', ), method='binary', ) print( '\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n' ) model.fit(time_series) conn_matrix = -model.estimator_.precision_ else: raise ValueError( '\nERROR! No connectivity model specified at runtime. Select a valid estimator using the ' '-mod flag.') if conn_matrix.shape < (2, 2): raise RuntimeError( '\nERROR! Matrix estimation selection yielded an empty or 1-dimensional graph. ' 'Check time-series for errors or try using a different atlas') coords = np.array(coords) label_names = np.array(label_names) return conn_matrix, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree, disp_filt, parc, prune, atlas_select, uatlas_select, label_names, coords, c_boot, norm, binary
GFPR[l] = FPR[gix] plot_gamma_influence(gammas, GTPR, GFPR, save=False) #%% # solve single GLASSO ALPHA = 2 * np.logspace(start=-3, stop=-1, num=15, base=10) FPR_GL = np.zeros(len(ALPHA)) TPR_GL = np.zeros(len(ALPHA)) DFPR_GL = np.zeros(len(ALPHA)) DTPR_GL = np.zeros(len(ALPHA)) for a in np.arange(len(ALPHA)): singleGL = GraphicalLasso(alpha=ALPHA[a], tol=1e-6, max_iter=200, verbose=False) singleGL_sol = np.zeros((K, p, p)) for k in np.arange(K): #model = quic.fit(S[k,:,:], verbose = 1) model = singleGL.fit(sample[k, :, :].T) singleGL_sol[k, :, :] = model.precision_ TPR_GL[a] = discovery_rate(singleGL_sol, Theta)['TPR'] FPR_GL[a] = discovery_rate(singleGL_sol, Theta)['FPR'] DTPR_GL[a] = discovery_rate(singleGL_sol, Theta)['TPR_DIFF'] DFPR_GL[a] = discovery_rate(singleGL_sol, Theta)['FPR_DIFF'] #%% # solve again for optimal (l1, l2) l1opt = L1[ix]
for l in ls: ns = nitk.NeighbourhoodSelection(l) ns.fit(X) tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy( K, ns.precision_) neighbourhood_selection_tpr.append(tpr) neighbourhood_selection_fpr.append(fpr) neighbourhood_selection_precision.append(prec) glasso_tpr = [] glasso_fpr = [] glasso_precision = [] for l in ls: try: gl = GraphicalLasso(l) gl.fit(X) tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy( K, gl.precision_) glasso_tpr.append(tpr) glasso_fpr.append(fpr) glasso_precision.append(prec) except FloatingPointError as e: print(e) space_tpr = [] space_fpr = [] space_precision = [] for l in ls: s = nitk.SPACE(l)
for _ in range(self.max_iter): X = self.update_X(Y, Z, cov) Y = self.soft_threshold(X + Z, self.alpha / self.rho) Z = Z + self.alpha * (X - Y) self.cov = np.linalg.inv(Y) self.precision = Y return self if __name__ == "__main__": A = load_boston().data A = sp.stats.zscore(A, axis=0) # ---sklearn--- model = GraphicalLasso(alpha=0.4, verbose=True) model.fit(A) cov = np.cov(A.T) cov_ = model.covariance_ pre_ = model.precision_ model = GraphicalLassoADMM() res = model.fit(A) #print(res.precision) #print(cov_) # 普通の共分散行列 plt.imshow(cov, interpolation='nearest', vmin=0, vmax=1, cmap='jet') plt.colorbar() plt.figure()
def fit(self, X, y): """Fit the QDA to the training data""" methods = [ None, 'nonpara', "fr", "kl", "mean", "wass", "reg", "freg", "sparse", "kl_new" ] rules = ["qda", "da", "fda"] if self.method not in methods: raise ValueError("method must be in {}; got (method={})".format( methods, self.method)) if self.rule not in rules: raise ValueError("rule must be in {}; got (rule={})".format( rules, self.rule)) X, y = check_X_y(X, y) self.labels_, self.n_samples_ = np.unique(y, return_counts=True) self.n_class_ = self.labels_.size n_samples, self.n_features_ = X.shape self.rho_ = np.array([self.rho]).ravel() if self.rho == -1: chi_quantile = chi2.ppf( 0.5, self.n_features_ * (self.n_features_ + 3) / 2) self.rho_ = chi_quantile * np.ones(self.n_class_) / self.n_samples_ else: if self.rho_.size == 1: self.rho_ = self.rho_[0] * np.ones(self.n_class_) if self.adaptive: self.rho_ *= np.sqrt(self.n_features_) # PRINT!!!! #print(self.n_features_, chi_quantile,self.n_samples_,self.rho_) if self.priors is None: self.priors_ = self.n_samples_ / n_samples else: self.priors_ = self.priors self.mean_ = [] self.covariance_ = [] self.cov_sqrt_ = [] self.prec_ = [] self.prec_sqrt_ = [] self.logdet_ = [] self.rotations_ = [] self.scalings_ = [] for n_c, label in enumerate(self.labels_): mask = (y == label) X_c = X[mask, :] X_c_mean = np.mean(X_c, 0) X_c_bar = X_c - X_c_mean U, s, Vt = np.linalg.svd(X_c_bar, full_matrices=False) s2 = (s**2) / (len(X_c_bar) - 1) self.mean_.append(X_c_mean) if self.method == 'reg': s2 += self.rho_[n_c] inv_s2 = 1 / s2 elif self.method in [ 'fr', 'kl', 'mean', 'freg', 'kl_new', 'nonpara' ]: sc = StandardScaler() X_c_ = sc.fit_transform(X_c) cov_c = ledoit_wolf(X_c_)[0] cov_c = sc.scale_[:, np.newaxis] * cov_c * sc.scale_[ np.newaxis, :] s2, V = np.linalg.eigh(cov_c) s2 = np.abs(s2) inv_s2 = 1 / s2 Vt = V.T elif self.method == 'sparse': try: cov_c = GraphicalLasso(alpha=self.rho_[n_c]).fit(X_c_bar) cov_c = cov_c.covariance__ except: tol = self.tol * 1e6 cov_c = graphical_lasso( np.dot(((1 - tol) * s2 + tol) * Vt.T, Vt), self.rho_[n_c])[0] s2, V = np.linalg.eigh(cov_c) s2 = np.abs(s2) inv_s2 = 1 / s2 Vt = V.T elif self.method == 'wass': f = lambda gamma: gamma * (self.rho_[n_c] ** 2 - 0.5 * np.sum(s2)) - self.n_features_ + \ 0.5 * (np.sum(np.sqrt((gamma ** 2) * (s2 ** 2) + 4 * s2 * gamma))) lb = 0 gamma_0 = 0 ub = np.sum(np.sqrt(1 / (s2 + self.tol))) / self.rho_[n_c] f_ub = f(ub) for bsect in range(100): gamma_0 = 0.5 * (ub + lb) f_gamma_0 = f(gamma_0) if f_ub * f_gamma_0 > 0: ub = gamma_0 f_ub = f_gamma_0 else: lb = gamma_0 if abs(ub - lb) < self.tol: break inv_s2 = gamma_0 * (1 - 2 / (1 + np.sqrt(1 + 4 / (gamma_0 * (s2 + self.tol))))) s2 = 1 / (inv_s2 + self.tol) else: s2 += self.tol inv_s2 = 1 / s2 self.covariance_.append(np.dot(s2 * Vt.T, Vt)) self.cov_sqrt_.append(np.dot(np.sqrt(s2) * Vt.T, Vt)) self.prec_.append(np.dot(inv_s2 * Vt.T, Vt)) self.prec_sqrt_.append(np.dot(np.sqrt(inv_s2) * Vt.T, Vt)) self.logdet_.append(np.log(s2).sum()) #print(self.logdet_) self.rotations_.append(Vt) self.scalings_.append(s2) return self
def get_covariance(data, method, lambda_val='CV', do_scale=False, n_cv_folds=None): # default cov if it is not calculated properly cov = -1 # scale timecourse if do_scale: data = scale(data, axis=1) # select method if method == 'QUIC': if lambda_val == 'CV': # set up model model = QuicGraphicalLassoCV(cv=n_cv_folds) # fit data to model and return resulting covariance model.fit(np.transpose(data)) return model.covariance_ elif lambda_val == 'EBIC': # set up model model = QuicGraphicalLassoEBIC() # fit data to model and return resulting covariance model.fit(np.transpose(data)) return model.covariance_ elif isinstance(lambda_val, float) and lambda_val > 0 and lambda_val < 1: # set up model model = QuicGraphicalLasso(lam=lambda_val) # fit data to model and return resulting covariance model.fit(data) return model.covariance_ else: print('Error in QUIC covariance:') print( 'lambda_val must be a float between 0 and 1, "CV" to find the best value by cross-validation, or "EBIC" to use extended Bayesian information criterion for model selection.' ) elif method == 'graphLasso': # transpose data as graphLasso likes it this way round data = np.transpose(data) # select whether to use supplied regularisation parameter or find the # best regularisation parameter by cross validation and maximum likelihood # use scikit-learn implementation of graph lasso and CV graph lasso if lambda_val == 'CV': try: model = GraphicalLassoCV(max_iter=1500, cv=n_cv_folds, assume_centered=True) model.fit(data) cov = model.covariance_ except: print( 'An error in cross validated graphLasso calculation occured.' ) elif isinstance(lambda_val, float) and lambda_val > 0 and lambda_val < 1: try: model = GraphicalLasso(alpha=lambda_val, mode='cd', tol=0.0001, max_iter=1500, verbose=False) model.fit(data) cov = model.covariance_ except (FloatingPointError, e): print( 'A floating point error in cross validated graphLasso calculation occured.' ) print(e) else: print('Error in graphLasso covariance:') print( 'lambda_val must be a float between 0 and 1, or "CV" to find the best value by cross-validation' ) # select method else: print('Method must be one of "graphLasso" or "QUIC".') return cov