def est_connectivity(X, gm="Glasso", assume_centered=False): if gm == "QuicGlasso-CV": quic = QuicGraphicalLassoCV(cv=5) quic.fit(X) return quic.covariance_, quic.precision_, quic.lam_ elif gm == "QuicGlasso-BIC": quic_bic = QuicGraphicalLassoEBIC(gamma=0) quic_bic.fit(X) return quic_bic.covariance_, quic_bic.precision_, quic_bic.lam_ else: # Default: Glasso glasso = GraphicalLassoCV(assume_centered=assume_centered, cv=5).fit(X) return glasso.covariance_, glasso.get_precision(), glasso.alpha_
def glasso(data, alphas=5, n_jobs=None, mode='cd'): """ Estimates the graph with graphical lasso finding the best alpha based on cross validation Parameters ---------- data: numpy ndarray The input data for to reconstruct/estimate a graph on. Features as columns and observations as rows. alphas: int or array-like of shape (n_alphas,), dtype=float, default=5 Non-negative. If an integer is given, it fixes the number of points on the grids of alpha to be used. If a list is given, it gives the grid to be used. Returns ------- adjacency matrix : the estimated adjacency matrix. """ scaler = StandardScaler() data = scaler.fit_transform(data) cov = GraphicalLassoCV(alphas=alphas, n_jobs=n_jobs).fit(data) precision_matrix = cov.get_precision() adjacency_matrix = precision_matrix.astype(bool).astype(int) adjacency_matrix[np.diag_indices_from(adjacency_matrix)] = 0 return adjacency_matrix
def sparce_invcov(self, df, cols=None, style="GraphLassoCV", param=0.2, layout="circular", center=None, figsize=(7, 7)): """ cols: columns to calculate. If None, takes all numerical columns style: GraphLassoCV or LedoitWolf param: Parameter to pass to fitting algorithm. If GraphLasso, =alpha; if LedoitWolf, =threshold layout: choose between "circular", "spring", "shell" center: Put a certain colname in the center of the graph Sparse covariance matrix estimation Plot the sparce precision matrix """ new_df = Utility().normalize(df).dropna() # Remove NA, normalize if cols == None: cols = df._get_numeric_data().columns data = new_df[cols] if style == "GraphLassoCV": model = GraphicalLassoCV(alphas=[param, param], cv=10, max_iter=5000) model.fit(data) sparce_mat = np.zeros(np.shape(model.precision_)) sparce_mat[model.precision_ != 0] = -1 np.fill_diagonal(sparce_mat, 1) else: # Style == LedoitWolf model = LedoitWolf() model.fit(data) sparce_mat = np.zeros(np.shape(model.get_precision())) sparce_mat[np.abs(model.get_precision()) > param] = -1 np.fill_diagonal(sparce_mat, 1) sparce_mat = pd.DataFrame(sparce_mat, index=data.columns, columns=data.columns) # NetworkX Graph fig, ax = plt.subplots(figsize=figsize) G = nx.from_pandas_adjacency(sparce_mat) pos = { "circular": nx.drawing.circular_layout, "shell": nx.drawing.shell_layout, "spring": nx.drawing.spring_layout, }[layout](G, scale=2) pos[center] = np.array([0, 0]) node_color = [ 'mintcream' if node == center else 'mintcream' for node in G.nodes ] node_size = [ len(node) * 1500 if node == center else len(node) * 500 for node in G.nodes() ] nodes = nx.draw_networkx_nodes(G, pos, node_shape='o', node_color=node_color, node_size=node_size) nodes.set_edgecolor('k') nx.draw_networkx_edges(G, pos, edge_color='r', width=2.0, alpha=0.8) nx.draw_networkx_labels(G, pos, font_weight='bold', font_size=10) plt.axis('off') plt.tight_layout() # Display precision matrix as heatmap fig, ax = plt.subplots(figsize=(5, 5)) sns.heatmap(sparce_mat, vmax=1, vmin=-1, linewidth=0.1, cmap=plt.cm.RdBu_r, cbar=False) ax.set_ylim(sparce_mat.T.shape[0] - 1e-9, -1e-9) plt.title('Sparse Inverse Covariance') plt.show() return sparce_mat