def heat_map(file_path, X, headers, cmap=sns.color_palette("Blues")): model = GraphLasso() model.fit(X) Cov = model.covariance_ std = np.diag(1. / np.sqrt(np.diag(Cov))) Cor = std.dot(Cov).dot(std) fig, ax = plt.subplots() # the size of A4 paper fig.set_size_inches(10, 8) ax = sns.heatmap(Cor, cmap=cmap, square=True, xticklabels=1, yticklabels=1, linewidths=.5) ax.set_yticklabels(headers, rotation=0, fontsize=12) ax.set_xticklabels(headers, rotation=90, fontsize=12) plt.subplots_adjust(bottom=0.4, left=0.2) sns.despine(left=True, bottom=True) plt.tight_layout() plt.savefig(file_path) plt.show()
def save_network_graph_sequence(data, alpha_seq, labels, filename): if len(alpha_seq) % 2 != 0: print "make alpha an even number please." return n = len(alpha_seq) labels = dict(zip(range(len(labels)), labels)) fig = plt.figure() for i in range(n): ax = fig.add_subplot(n / 2, 2, i + 1) gl = GraphLasso(alpha=alpha_seq[i]) gl.fit(data) D = nx.Graph(gl.precision_) pos_labels = nx.circular_layout(D) for k, item in pos_labels.iteritems(): pos_labels[k] = item + 0.1 nx.draw_circular(D, scale=4, node_size=150, ax=ax, with_labels=True, labels=labels, font_size=6) #nx.draw_networkx_labels(D, pos_labels, ax=ax, labels= labels, font_size = 12) ax.set_title(r"$\alpha$ = %.2e" % alpha_seq[i]) plt.savefig(filename)
def _fit(self, X): self.estimator_ = GraphLasso(alpha=self.alpha, assume_centered=self.assume_centered, enet_tol=self.enet_tol, max_iter=self.max_iter, mode=self.mode, tol=self.tol).fit(X) _, self.labels_ = affinity_propagation(self.partial_corrcoef_, **self._apcluster_params) return self
def precisionCol(cleandata, k): model = GraphLasso(mode = 'lars') model.fit(cleandata) pre_ = pd.DataFrame(model.get_precision()) pre_.index = cleandata.columns pre_.columns = cleandata.columns pre_.to_csv("precision.csv") test = abs(pre_['Y']) test.sort() test = test[-k:] coltest = (test.index).drop('Y') return coltest
def create_skeleton_from_data(self, data, **kwargs): """ :param data: raw data df :param kwargs: alpha hyper-parameter ( :return: """ alpha = kwargs.get('alpha', 0.01) max_iter = kwargs.get('max_iter', 2000) edge_model = GraphLasso(alpha=alpha, max_iter=max_iter) edge_model.fit(data.as_matrix()) return edge_model.get_precision()
def get_other_precision(A): # reference on sklearn's graph lasso: http://scikit-learn.org/stable/modules/generated/sklearn.covariance.GraphLasso.html from sklearn.covariance import GraphLasso # our Algo code should replace this and input/output the same thing graph_lasso = GraphLasso( alpha=1e-5 ) # alpha = regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. graph_lasso.fit( A ) # A is the aggregated sentiment matrix, an arrray of (n_samples, n_features) precision = graph_lasso.get_precision() return precision
def myglasso(data, lam=0.5): model = GraphLasso(alpha=lam) # model=GraphLassoCV() model.fit(data) cov = model.covariance_ prec = model.precision_ # alpha=model.alpha_ n_samples, n_features = data.shape part = np.zeros((n_features, n_features)) for i in range(n_features): for j in range(n_features): part[i, j] = -prec[i, j] / np.sqrt(prec[i, i] * prec[j, j]) return part, prec, cov
def predict(self, data, **kwargs): """ :param data: raw data df :param kwargs: alpha hyper-parameter ( :return: """ alpha = kwargs.get('alpha', 0.01) max_iter = kwargs.get('max_iter', 2000) edge_model = GraphLasso(alpha=alpha, max_iter=max_iter) edge_model.fit(data.values) return nx.relabel_nodes(nx.DiGraph(edge_model.get_precision()), {idx: i for idx, i in enumerate(data.columns)})
def predict(self, data, alpha=0.01, max_iter=2000, **kwargs): """ Predict the graph skeleton. Args: data (pandas.DataFrame): observational data alpha (float): regularization parameter max_iter (int): maximum number of iterations Returns: networkx.Graph: Graph skeleton """ edge_model = GraphLasso(alpha=alpha, max_iter=max_iter) edge_model.fit(data.values) return nx.relabel_nodes(nx.DiGraph(edge_model.get_precision()), {idx: i for idx, i in enumerate(data.columns)})
def test_graph_lasso(random_state=0): # Sample data from a sparse multivariate normal dim = 20 n_samples = 100 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) emp_cov = empirical_covariance(X) for alpha in (.1, .01): covs = dict() for method in ('cd', 'lars'): cov_, _, costs = graph_lasso(emp_cov, alpha=.1, return_costs=True) covs[method] = cov_ costs, dual_gap = np.array(costs).T # Check that the costs always decrease assert_array_less(np.diff(costs), 0) # Check that the 2 approaches give similar results assert_array_almost_equal(covs['cd'], covs['lars']) # Smoke test the estimator model = GraphLasso(alpha=.1).fit(X) assert_array_almost_equal(model.covariance_, covs['cd'])
def __init__(self, n_components=2, n_iter=100, alpha = None): self.n_components = n_components self.n_iter = n_iter self.min_covar = 1e-3 if alpha == None: self.alpha = [10 for _ in range(self.n_components)] else: self.alpha = alpha self.model = [GraphLasso(alpha=self.alpha[k], assume_centered=False, tol=1e-4) for k in range(self.n_components)]
def test_graph_lasso(random_state=0): # Sample data from a sparse multivariate normal dim = 20 n_samples = 100 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) emp_cov = empirical_covariance(X) for alpha in (0., .1, .25): covs = dict() icovs = dict() for method in ('cd', 'lars'): cov_, icov_, costs = graph_lasso(emp_cov, alpha=alpha, mode=method, return_costs=True) covs[method] = cov_ icovs[method] = icov_ costs, dual_gap = np.array(costs).T # Check that the costs always decrease (doesn't hold if alpha == 0) if not alpha == 0: assert_array_less(np.diff(costs), 0) # Check that the 2 approaches give similar results assert_array_almost_equal(covs['cd'], covs['lars'], decimal=3) assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=3) # Smoke test the estimator model = GraphLasso(alpha=.25).fit(X) model.score(X) assert_array_almost_equal(model.covariance_, covs['cd'], decimal=3) assert_array_almost_equal(model.covariance_, covs['lars'], decimal=3) # For a centered matrix, assume_centered could be chosen True or False # Check that this returns indeed the same result for centered data Z = X - X.mean(0) precs = list() for assume_centered in (False, True): prec_ = GraphLasso(assume_centered=assume_centered).fit(Z).precision_ precs.append(prec_) assert_array_almost_equal(precs[0], precs[1])
def test_graph_lasso(random_state=0): # Sample data from a sparse multivariate normal dim = 20 n_samples = 100 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) emp_cov = empirical_covariance(X) for alpha in (0., .1, .25): covs = dict() icovs = dict() for method in ('cd', 'lars'): cov_, icov_, costs = graph_lasso(emp_cov, alpha=alpha, mode=method, return_costs=True) covs[method] = cov_ icovs[method] = icov_ costs, dual_gap = np.array(costs).T # Check that the costs always decrease (doesn't hold if alpha == 0) if not alpha == 0: assert_array_less(np.diff(costs), 0) # Check that the 2 approaches give similar results assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4) assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4) # Smoke test the estimator model = GraphLasso(alpha=.25).fit(X) model.score(X) assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4) assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4) # For a centered matrix, assume_centered could be chosen True or False # Check that this returns indeed the same result for centered data Z = X - X.mean(0) precs = list() for assume_centered in (False, True): prec_ = GraphLasso(assume_centered=assume_centered).fit(Z).precision_ precs.append(prec_) assert_array_almost_equal(precs[0], precs[1])
def computePartialCorrelations(coupling_data, reg_alpha): # standardize # coupling_data -= coupling_data.mean(axis=0) # coupling_data /= coupling_data.std(axis=0) # sparse inverse covariance matrix estimation estimator = GraphLasso(alpha=reg_alpha, assume_centered=False, mode='cd', max_iter=500) estimator.fit(coupling_data) print("Sparse inverse covariance matrix was estiamted with {0} iterations.".format(estimator.n_iter_)) print("\t\t\t and by using the parameters: ", estimator.get_params()) prec = estimator.get_precision() #diagonal of precision matrix prec_diag = np.diag(prec) # obtain partial correlations (proportional to prec matrix entries): # rho_ij = - p_ij/ sqrt(p_ii * p_jj) partial_correlations = -prec / np.sqrt(np.outer(prec_diag, prec_diag)) # d = 1 / np.sqrt(np.diag(prec)) # partial_correlations *= d # partial_correlations *= d[:, np.newaxis] # set lower half to zero partial_correlations[np.tril_indices(400)] = 0 return estimator.get_precision(), partial_correlations
def fit(self, X): self.mean_ = np.mean(X, axis=0) if self.alpha_: gl = GraphLasso(self.alpha_, max_iter=100000) gl.fit(X) self.precision_ = gl.precision_ elif self.method_ == 'cv': gl = GraphLassoCV(verbose=self.verbose) gl.fit(X) self.alpha_ = gl.alpha_ self.precision_ = gl.precision_ elif self.method_ == 'bic': min_score = np.inf min_precision = None alphas = np.arange(0.01, 0.5, 0.01) for a in alphas: if self.verbose: print("[GMRF] Alpha = {}".format(a)) gl = GraphLasso(a, max_iter=100000) try: gl.fit(X) self.precision_ = gl.precision_ score = self.bic(X, gamma=0.0) self.bic_scores.append(score) if score <= min_score: min_score = score self.alpha_ = a min_precision = np.copy(self.precision_) except: self.bic_scores.append(None) self.precision_ = min_precision else: raise NotImplementedError(self.method_ + " is not a valid method, use 'cv' or 'bic'")
def save_network_graph_sequence( data, alpha_seq, labels, filename): if len(alpha_seq)%2 != 0: print "make alpha an even number please." return n = len(alpha_seq) labels = dict( zip( range( len(labels) ), labels) ) fig = plt.figure() for i in range(n): ax = fig.add_subplot(n/2,2,i+1) gl = GraphLasso( alpha = alpha_seq[i] ) gl.fit( data ) D = nx.Graph( gl.precision_ ) pos_labels = nx.circular_layout( D ) for k,item in pos_labels.iteritems(): pos_labels[k] = item + 0.1 nx.draw_circular( D, scale = 4, node_size = 150, ax = ax, with_labels = True, labels = labels, font_size = 6 ) #nx.draw_networkx_labels(D, pos_labels, ax=ax, labels= labels, font_size = 12) ax.set_title( r"$\alpha$ = %.2e"%alpha_seq[i]) plt.savefig( filename )
def _init_para(self, X, y): ''' ''' p0, shape = BASE._init_para(self, X, y) edges = [] if self.sparsity==0: for i in itertools.combinations(range(y.shape[1]), 2): e1 = min(i) e2 = max(i) edges.append([e1, e2]) else: lasso = GraphLasso(alpha = 0.1) lasso.fit(y) graph = lasso.get_precision()!=0 for i in range(graph.shape[0]): for j in range(i+1,graph.shape[1]): if graph[i,j]==0:continue edges.append([i, j]) self.edges = T.shared(np.array(edges).T).astype('int8') if self.verbose: print('(edges)',edges) print('(y) shape:',y.shape,'labels:',np.unique(y)) print('(X) shape:',X.shape,'std:',np.std(X)) theta = [] for e1, e2 in edges: if self.shared_copula: theta.append(0.01) else: theta.append(np.ones((shape[1],shape[1]))*0.01) p0['theta'] = tespo.parameter(theta, const=False) return p0, shape
def __init__(self, n_components, n_iter=5, alpha=None): self.n_components = n_components + 2 self.n_iter = n_iter self.min_covar = 1e-3 self.tresh = 1e-3 self.lambd = 0.5 if alpha == None: self.alpha = [10 for _ in range(self.n_components)] else: self.alpha = alpha self.model = [ GraphLasso(alpha=self.alpha[k], assume_centered=True, tol=1e-4, verbose=True) for k in range(self.n_components) ]
class SparseStructureLearning(BaseOutlierDetector): """Outlier detector using sparse structure learning. Parameters ---------- alpha : float, default 0.01 Regularization parameter. assume_centered : bool, default False If True, data are not centered before computation. contamination : float, default 0.1 Proportion of outliers in the data set. Used to define the threshold. enet_tol : float, default 1e-04 Tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. max_iter : integer, default 100 Maximum number of iterations. mode : str, default 'cd' Lasso solver to use: coordinate descent or LARS. tol : float, default 1e-04 Tolerance to declare convergence. apcluster_params : dict, default None Additional parameters passed to ``sklearn.cluster.affinity_propagation``. Attributes ---------- anomaly_score_ : array-like of shape (n_samples,) Anomaly score for each training data. threshold_ : float Threshold. covariance_ : array-like of shape (n_features, n_features) Estimated covariance matrix. graphical_model_ : networkx Graph GGM. isolates_ : array-like of shape (n_isolates,) Indices of isolates. labels_ : array-like of shape (n_features,) Label of each feature. location_ : array-like of shape (n_features,) Estimated location. n_iter_ : int Number of iterations run. partial_corrcoef_ : array-like of shape (n_features, n_features) Partial correlation coefficient matrix. precision_ : array-like of shape (n_features, n_features) Estimated pseudo inverse matrix. References ---------- .. [#ide09] Ide, T., Lozano, C., Abe, N., and Liu, Y., "Proximity-based anomaly detection using sparse structure learning," In Proceedings of SDM, pp. 97-108, 2009. Examples -------- >>> import numpy as np >>> from kenchi.outlier_detection import SparseStructureLearning >>> X = np.array([ ... [0., 0.], [1., 1.], [2., 0.], [3., -1.], [4., 0.], ... [5., 1.], [6., 0.], [7., -1.], [8., 0.], [1000., 1.] ... ]) >>> det = SparseStructureLearning() >>> det.fit_predict(X) array([ 1, 1, 1, 1, 1, 1, 1, 1, 1, -1]) """ @property def _apcluster_params(self): if self.apcluster_params is None: return dict() else: return self.apcluster_params @property def covariance_(self): return self.estimator_.covariance_ @property def graphical_model_(self): import networkx as nx return nx.from_numpy_matrix(np.tril(self.partial_corrcoef_, k=-1)) @property def isolates_(self): import networkx as nx return np.array(list(nx.isolates(self.graphical_model_))) @property def location_(self): return self.estimator_.location_ @property def n_iter_(self): return self.estimator_.n_iter_ @property def partial_corrcoef_(self): n_features, _ = self.precision_.shape diag = np.diag(self.precision_)[np.newaxis] partial_corrcoef = -self.precision_ / np.sqrt(diag.T @ diag) partial_corrcoef.flat[::n_features + 1] = 1. return partial_corrcoef @property def precision_(self): return self.estimator_.precision_ def __init__(self, alpha=0.01, assume_centered=False, contamination=0.1, enet_tol=1e-04, max_iter=100, mode='cd', tol=1e-04, apcluster_params=None): super().__init__(contamination=contamination) self.alpha = alpha self.apcluster_params = apcluster_params self.assume_centered = assume_centered self.enet_tol = enet_tol self.max_iter = max_iter self.mode = mode self.tol = tol def _check_is_fitted(self): super()._check_is_fitted() check_is_fitted(self, [ 'covariance_', 'labels_', 'location_', 'n_iter_', 'partial_corrcoef_', 'precision_' ]) def _fit(self, X): self.estimator_ = GraphLasso(alpha=self.alpha, assume_centered=self.assume_centered, enet_tol=self.enet_tol, max_iter=self.max_iter, mode=self.mode, tol=self.tol).fit(X) _, self.labels_ = affinity_propagation(self.partial_corrcoef_, **self._apcluster_params) return self def _anomaly_score(self, X): return self.estimator_.mahalanobis(X) def featurewise_anomaly_score(self, X): """Compute the feature-wise anomaly scores for each sample. Parameters ---------- X : array-like of shape (n_samples, n_features) Data. Returns ------- anomaly_score : array-like of shape (n_samples, n_features) Feature-wise anomaly scores for each sample. """ self._check_is_fitted() X = self._check_array(X, estimator=self) return 0.5 * np.log(2. * np.pi / np.diag( self.precision_)) + 0.5 / np.diag(self.precision_) * ( (X - self.location_) @ self.precision_)**2 def score(self, X, y=None): """Compute the mean log-likelihood of the given data. Parameters ---------- X : array-like of shape (n_samples, n_features) Data. y : ignored Returns ------- score : float Mean log-likelihood of the given data. """ self._check_is_fitted() X = self._check_array(X, estimator=self) return self.estimator_.score(X) def plot_graphical_model(self, **kwargs): """Plot the Gaussian Graphical Model (GGM). Parameters ---------- ax : matplotlib Axes, default None Target axes instance. figsize : tuple, default None Tuple denoting figure size of the plot. filename : str, default None If provided, save the current figure. random_state : int, RandomState instance, default None Seed of the pseudo random number generator. title : string, default 'GGM (n_clusters, n_features, n_isolates)' Axes title. To disable, pass None. **kwargs : dict Other keywords passed to ``nx.draw_networkx``. Returns ------- ax : matplotlib Axes Axes on which the plot was drawn. """ self._check_is_fitted() n_clusters = np.max(self.labels_) + 1 n_isolates, = self.isolates_.shape title = (f'GGM (' f'n_clusters={n_clusters}, ' f'n_features={self.n_features_}, ' f'n_isolates={n_isolates}' f')') kwargs['G'] = self.graphical_model_ kwargs.setdefault('node_color', self.labels_) kwargs.setdefault('title', title) return plot_graphical_model(**kwargs) def plot_partial_corrcoef(self, **kwargs): """Plot the partial correlation coefficient matrix. Parameters ---------- ax : matplotlib Axes, default None Target axes instance. cbar : bool, default True. If Ture, to draw a colorbar. figsize : tuple, default None Tuple denoting figure size of the plot. filename : str, default None If provided, save the current figure. title : string, default 'Partial correlation' Axes title. To disable, pass None. **kwargs : dict Other keywords passed to ``ax.pcolormesh``. Returns ------- ax : matplotlib Axes Axes on which the plot was drawn. """ self._check_is_fitted() kwargs['partial_corrcoef'] = self.partial_corrcoef_ return plot_partial_corrcoef(**kwargs)
def get_conn_matrix(time_series, conn_model): import warnings warnings.simplefilter("ignore") from nilearn.connectome import ConnectivityMeasure from sklearn.covariance import GraphLassoCV try: from brainiak.fcma.util import compute_correlation except ImportError: pass if conn_model == 'corr': # credit: nilearn print('\nComputing correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'corr_fast': # credit: brainiak try: print('\nComputing accelerated fcma correlation matrix...\n') conn_matrix = compute_correlation(time_series, time_series) except RuntimeError: print( 'Cannot run accelerated correlation computation due to a missing dependency. You need brainiak installed!' ) elif conn_model == 'partcorr': # credit: nilearn print('\nComputing partial correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='partial correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'tangent': # credit: nilearn print('\nComputing tangent matrix...\n') conn_measure = ConnectivityMeasure(kind='tangent') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'cov' or conn_model == 'sps': ##Fit estimator to matrix to get sparse matrix estimator = GraphLassoCV() try: print('\nComputing covariance...\n') estimator.fit(time_series) except: try: print( 'Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...' ) from sklearn.covariance import GraphLasso, empirical_covariance, shrunk_covariance emp_cov = empirical_covariance(time_series) for i in np.arange(0.8, 0.99, 0.01): shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) alphaRange = 10.0**np.arange(-8, 0) for alpha in alphaRange: try: estimator_shrunk = GraphLasso(alpha) estimator_shrunk.fit(shrunk_cov) print( "Calculated graph-lasso covariance matrix for alpha=%s" % alpha) break except FloatingPointError: print("Failed at alpha=%s" % alpha) if estimator_shrunk == None: pass else: break except: raise ValueError( 'Unstable Lasso estimation! Shrinkage failed.') if conn_model == 'sps': try: print( '\nFetching precision matrix from covariance estimator...\n' ) conn_matrix = -estimator.precision_ except: print( '\nFetching shrunk precision matrix from covariance estimator...\n' ) conn_matrix = -estimator_shrunk.precision_ elif conn_model == 'cov': try: print( '\nFetching covariance matrix from covariance estimator...\n' ) conn_matrix = estimator.covariance_ except: conn_matrix = estimator_shrunk.covariance_ elif conn_model == 'QuicGraphLasso': from inverse_covariance import QuicGraphLasso # Compute the sparse inverse covariance via QuicGraphLasso # credit: skggm model = QuicGraphLasso(init_method='cov', lam=0.5, mode='default', verbose=1) print('\nCalculating QuicGraphLasso precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphLassoCV': from inverse_covariance import QuicGraphLassoCV # Compute the sparse inverse covariance via QuicGraphLassoCV # credit: skggm model = QuicGraphLassoCV(init_method='cov', verbose=1) print( '\nCalculating QuicGraphLassoCV precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphLassoEBIC': from inverse_covariance import QuicGraphLassoEBIC # Compute the sparse inverse covariance via QuicGraphLassoEBIC # credit: skggm model = QuicGraphLassoEBIC(init_method='cov', verbose=1) print( '\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n' ) model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'AdaptiveQuicGraphLasso': from inverse_covariance import AdaptiveGraphLasso, QuicGraphLassoEBIC # Compute the sparse inverse covariance via # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary' # credit: skggm model = AdaptiveGraphLasso( estimator=QuicGraphLassoEBIC(init_method='cov', ), method='binary', ) print( '\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n' ) model.fit(time_series) conn_matrix = -model.estimator_.precision_ return (conn_matrix)
def get_conn_matrix(time_series, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, mask, min_span_tree, disp_filt, parc, prune, atlas_select, uatlas_select, label_names, coords, vox_array): from nilearn.connectome import ConnectivityMeasure from sklearn.covariance import GraphLassoCV conn_matrix = None if conn_model == 'corr': # credit: nilearn print('\nComputing correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'partcorr': # credit: nilearn print('\nComputing partial correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='partial correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'cov' or conn_model == 'sps': # Fit estimator to matrix to get sparse matrix estimator_shrunk = None estimator = GraphLassoCV() try: print('\nComputing covariance...\n') estimator.fit(time_series) except: print( 'Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...' ) try: from sklearn.covariance import GraphLasso, empirical_covariance, shrunk_covariance emp_cov = empirical_covariance(time_series) for i in np.arange(0.8, 0.99, 0.01): shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) alphaRange = 10.0**np.arange(-8, 0) for alpha in alphaRange: try: estimator_shrunk = GraphLasso(alpha) estimator_shrunk.fit(shrunk_cov) print( "Retrying covariance matrix estimate with alpha=%s" % alpha) if estimator_shrunk is None: pass else: break except: print( "Covariance estimation failed with shrinkage at alpha=%s" % alpha) continue except ValueError: print( 'Unstable Lasso estimation! Shrinkage failed. A different connectivity model may be needed.' ) if estimator is None and estimator_shrunk is None: raise RuntimeError('ERROR: Covariance estimation failed.') if conn_model == 'sps': if estimator_shrunk is None: print( '\nFetching precision matrix from covariance estimator...\n' ) conn_matrix = -estimator.precision_ else: print( '\nFetching shrunk precision matrix from covariance estimator...\n' ) conn_matrix = -estimator_shrunk.precision_ elif conn_model == 'cov': if estimator_shrunk is None: print( '\nFetching covariance matrix from covariance estimator...\n' ) conn_matrix = estimator.covariance_ else: conn_matrix = estimator_shrunk.covariance_ elif conn_model == 'QuicGraphicalLasso': try: from inverse_covariance import QuicGraphicalLasso except ImportError: print('Cannot run QuicGraphLasso. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLasso # credit: skggm model = QuicGraphicalLasso(init_method='cov', lam=0.5, mode='default', verbose=1) print('\nCalculating QuicGraphLasso precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphLassoCV': try: from inverse_covariance import QuicGraphicalLassoCV except ImportError: print('Cannot run QuicGraphLassoCV. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLassoCV # credit: skggm model = QuicGraphicalLassoCV(init_method='cov', verbose=1) print( '\nCalculating QuicGraphLassoCV precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphicalLassoEBIC': try: from inverse_covariance import QuicGraphicalLassoEBIC except ImportError: print('Cannot run QuicGraphLassoEBIC. Skggm not installed!') # Compute the sparse inverse covariance via QuicGraphLassoEBIC # credit: skggm model = QuicGraphicalLassoEBIC(init_method='cov', verbose=1) print( '\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n' ) model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'AdaptiveQuicGraphLasso': try: from inverse_covariance import AdaptiveQuicGraphicalLasso, QuicGraphicalLassoEBIC except ImportError: print('Cannot run AdaptiveGraphLasso. Skggm not installed!') # Compute the sparse inverse covariance via # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary' # credit: skggm model = AdaptiveQuicGraphicalLasso( estimator=QuicGraphicalLassoEBIC(init_method='cov', ), method='binary', ) print( '\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n' ) model.fit(time_series) conn_matrix = -model.estimator_.precision_ # Weight reuslting matrix by voxels in each label if using parcels as nodes # if parc is True: # norm_parcels = (vox_array - min(vox_array)) / (max(vox_array) - min(vox_array)) # conn_matrix_norm = normalize(conn_matrix) # conn_matrix = norm_parcels * conn_matrix_norm coords = np.array(coords) label_names = np.array(label_names) return conn_matrix, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, mask, min_span_tree, disp_filt, parc, prune, atlas_select, uatlas_select, label_names, coords
re_error_750_ppca = getReconstructionError(sample_750, re_750points_ppca.T) drawReconstructionError(re_error_750_ppca) plt.title('reconstruction error of 750 points of PPCA') plt.show() #reconstruct the 250 points sample_250 = sample[750:1000] re_250points_ppca = W_ppca.dot(sample_250.T) re_250points_ppca = getPPCAInverseTransform(re_250points_ppca.T) re_error_250_ppca = getReconstructionError(sample_250, re_250points_ppca.T) drawReconstructionError(re_error_250_ppca) plt.title('reconstruction error of 250 points of PPCA') plt.show() #============================================================================== #problem 2.9 #============================================================================== from sklearn.covariance import GraphLasso gl = GraphLasso(0.01) gl.fit(sample_750) cov_gl = gl.covariance_ covarianceMatrix(cov_gl) plt.title('convariance matrix of GL') plt.show() prec_gl = gl.get_precision() covarianceMatrix(prec_gl) plt.title('inverse convariance matrix of GL') plt.show()
def get_conn_matrix(time_series, conn_model): from nilearn.connectome import ConnectivityMeasure from sklearn.covariance import GraphLassoCV conn_matrix = None if conn_model == 'corr': # credit: nilearn print('\nComputing correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'partcorr': # credit: nilearn print('\nComputing partial correlation matrix...\n') conn_measure = ConnectivityMeasure(kind='partial correlation') conn_matrix = conn_measure.fit_transform([time_series])[0] elif conn_model == 'cov' or conn_model == 'sps': # Fit estimator to matrix to get sparse matrix estimator_shrunk = None estimator = GraphLassoCV() try: print('\nComputing covariance...\n') estimator.fit(time_series) except RuntimeWarning: print('Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...') try: from sklearn.covariance import GraphLasso, empirical_covariance, shrunk_covariance emp_cov = empirical_covariance(time_series) for i in np.arange(0.8, 0.99, 0.01): shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i) alphaRange = 10.0 ** np.arange(-8, 0) for alpha in alphaRange: try: estimator_shrunk = GraphLasso(alpha) estimator_shrunk.fit(shrunk_cov) print("Retrying covariance matrix estimate with alpha=%s" % alpha) if estimator_shrunk is None: pass else: break except RuntimeWarning: print("Covariance estimation failed with shrinkage at alpha=%s" % alpha) continue except ValueError: print('Unstable Lasso estimation! Shrinkage failed. A different connectivity model may be needed.') if estimator is None and estimator_shrunk is None: raise RuntimeError('ERROR: Covariance estimation failed.') if conn_model == 'sps': if estimator_shrunk is None: print('\nFetching precision matrix from covariance estimator...\n') conn_matrix = -estimator.precision_ else: print('\nFetching shrunk precision matrix from covariance estimator...\n') conn_matrix = -estimator_shrunk.precision_ elif conn_model == 'cov': if estimator_shrunk is None: print('\nFetching covariance matrix from covariance estimator...\n') conn_matrix = estimator.covariance_ else: conn_matrix = estimator_shrunk.covariance_ elif conn_model == 'QuicGraphLasso': from inverse_covariance import QuicGraphLasso # Compute the sparse inverse covariance via QuicGraphLasso # credit: skggm model = QuicGraphLasso( init_method='cov', lam=0.5, mode='default', verbose=1) print('\nCalculating QuicGraphLasso precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphLassoCV': from inverse_covariance import QuicGraphLassoCV # Compute the sparse inverse covariance via QuicGraphLassoCV # credit: skggm model = QuicGraphLassoCV( init_method='cov', verbose=1) print('\nCalculating QuicGraphLassoCV precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'QuicGraphLassoEBIC': from inverse_covariance import QuicGraphLassoEBIC # Compute the sparse inverse covariance via QuicGraphLassoEBIC # credit: skggm model = QuicGraphLassoEBIC( init_method='cov', verbose=1) print('\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.precision_ elif conn_model == 'AdaptiveQuicGraphLasso': from inverse_covariance import AdaptiveGraphLasso, QuicGraphLassoEBIC # Compute the sparse inverse covariance via # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary' # credit: skggm model = AdaptiveGraphLasso( estimator=QuicGraphLassoEBIC( init_method='cov', ), method='binary', ) print('\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n') model.fit(time_series) conn_matrix = -model.estimator_.precision_ return conn_matrix
sb_expression = pd.read_table( "https://homes.cs.washington.edu/~suinlee/cse527/notes/yeast-comparison/sbay-expression.txt", header=None) conserved_gene = pd.read_table( "https://homes.cs.washington.edu/~suinlee/cse527/notes/yeast-comparison/conserved-genes.txt", header=None) sc_experiment = pd.read_table( "https://homes.cs.washington.edu/~suinlee/cse527/notes/yeast-comparison/scer-experiments.txt", header=None) sb_experiment = pd.read_table( "https://homes.cs.washington.edu/~suinlee/cse527/notes/yeast-comparison/sbay-experiments.txt", header=None) #%% #data normalization sb_data = sb_expression.values.T #sb_normdata = normalize(sb_data, axis=1) means = np.mean(sb_data, axis=0) stds = np.std(sb_data, axis=0) sb_normdata = np.divide(np.subtract(sb_data, means), stds) #%% GL_sb = GraphLasso(alpha=1) tic = time.time() GL_sb.fit(sb_expression.values.T) toc = time.time() time1 = toc - tic print(time1) perc_sb = GL_sb.precision_ np.save('perc_sb.npy', perc_sb)
NCols = len(ProteinNames) #Import the data and convert to a numpy array X = open(os.path.join('data', 'sachsCtsHTF.txt'), 'r').read().split() X = [float(x) for x in X] X = np.array(X).reshape(-1, NCols) X -= X.mean(axis=0).reshape(1, -1) X /= np.sqrt( 1000 ) #same as http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/sachs.info #Regularization parameters Lambs = [36, 27, 7, 0] for lam in Lambs: GL = GraphLasso(lam) GL.fit(X) prec = GL.precision_ #Form graph G = nx.Graph() G.add_nodes_from(ProteinNames) for i in range(NCols): for j in range(i): if prec[i, j] != 0: G.add_edges_from([(ProteinNames[i], ProteinNames[j])]) ttl = 'lambda {}, nedges {}'.format(lam, len(G.edges)) print(ttl)
def estimatePrecisionFromSet(data, alpha): model = GraphLasso(alpha=alpha) model.fit(data) return sp.csc_matrix(model.precision_)
def estimatePrecisionFromFile(file, dims, sparsity): data = pd.read_csv(file) model = GraphLasso() model.fit(data) return sp.csc_matrix(model.precision_)
# compute the empirical covariance matrix C_emp = X.dot(X.T) / float(N) print('Empirical Cov:') print C_emp # neighborhood selection nhs = NeighborSelect(EDPP(), ProximalGradientSolver(), path_lb=0.2, path_steps=5, path_scale='log') Cb = nhs.fit(np.ascontiguousarray(X)) print Cb glasso = GraphLasso(alpha=0.005, tol=0.0001, max_iter=1000, verbose=False) glasso.fit(X.T) C = glasso.get_precision() print glasso.error_norm(COV) print('GraphLasso Cov:') print C # plot some example network plt.figure() plt.subplot(2, len(Cb), 1) plt.title('Cov') plt.pcolor(COV) plt.subplot(2, len(Cb), 2) plt.title('Emp. Cov') plt.pcolor(C_emp)
'VLO', 'BAC', 'K', 'PFE', 'XRX', 'AIG', 'PEP', 'KO', 'PG', 'MCD', 'WMT', 'JPM', 'C', 'WFC', 'GE', 'T', 'VZ', 'IBM', 'MSFT', 'GOOG', 'AAPL', 'RIMM', "^DJA", "CSCO", "YHOO", "ORCL", "SNDK", "DELL", "NVDA", "EBAY", "WIN", "WFM", "WHR", "WU", "WAG", "VMC", "UTX", "UNP", "USB", "TSN", "TMO", "TXT", "TXN", "TSO", "SYY", "SBUX", "SWK", "LUV", "CMCSA", "AMD", "S", "INTC", "VXX", "^GSPC" ] start_data = datetime.datetime(2010, 01, 03) symbols, data = get_data(symbols, start_data) close_data = np.concatenate( [absolute_daily_returns(data[ts])[:, None] for ts in symbols], axis=1) alpha = 0.47 print "alpha: ", alpha gl = GraphLasso(alpha) nclose_data = scale(close_data) gl.fit(nclose_data) #remove the SP500 cov_sp = gl.covariance_[:, :-1].T[:, :-1] prec_sp = gl.precision_[:, :-1].T[:, :-1] def community_cluster(cov_sp, symbols): G = nx.Graph(cov_sp) partition = community.best_partition(G) for i in set(partition.values()): print "Community: ", i members = [ symbols[node] for node in partition.keys() if partition[node] == i
def grangercausalitytests(x, mxlg, autolag=None, alpha=0.0001, max_iter=1000, addconst=True, verbose=True): """four tests for granger non causality of 2 timeseries all four tests give similar results `params_ftest` and `ssr_ftest` are equivalent based on F test which is identical to lmtest:grangertest in R Parameters ---------- x : array, 2d data for test whether the time series in the second column Granger causes the time series in the first column lags : list of integers the Granger causality test results are calculated for all lags in the list autolag: If 'aic' the lag which minimizes the information criterion is used from the lags verbose : bool print results if true Returns ------- results : dictionary all test results, dictionary keys are the number of lags. For each lag the values are a tuple, with the first element a dictionary with teststatistic, pvalues, degrees of freedom, the second element are the OLS estimation results for the restricted model, the unrestricted model and the restriction (contrast) matrix for the parameter f_test. Notes ----- TODO: convert to class and attach results properly The Null hypothesis for grangercausalitytests is that the time series in the second column, x2, does NOT Granger cause the time series in the first column, x1. Grange causality means that past values of x2 have a statistically significant effect on the current value of x1, taking past values of x1 into account as regressors. We reject the null hypothesis that x2 does not Granger cause x1 if the pvalues are below a desired size of the test. The null hypothesis for all four test is that the coefficients corresponding to past values of the second time series are zero. 'params_ftest', 'ssr_ftest' are based on F distribution 'ssr_chi2test', 'lrtest' are based on chi-square distribution References ---------- http://en.wikipedia.org/wiki/Granger_causality Greene: Econometric Analysis """ from scipy import stats from sklearn.covariance import GraphLasso x = np.asarray(x) if x.shape[0] <= 3 * mxlg + int(addconst): raise ValueError( "Insufficient observations. Maximum allowable " "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1)) result = {} if verbose: print('\nGranger Causality') print('number of lags (no zero)', mxlg) # create lagmat of both time series dta = lagmat2ds(x, mxlg, trim='both', dropex=1) #add constant if addconst: dtajoint = add_constant(dta[:, :], prepend=False) else: raise NotImplementedError('Not Implemented') # Run Lasso on all variables lassoreg = GraphLasso(alpha=alpha, max_iter=100) lassoreg.fit(dtajoint) result = lassoreg.covariance_ #non_zeros = [i for i, x in enumerate(result) if x != 0] #non_zero_vars = set([(i+1)/(mxlg+1) for i in non_zeros]) return result