Exemple #1
0
def est_connectivity(X, gm="Glasso", assume_centered=False):
    if gm == "QuicGlasso-CV":
        quic = QuicGraphicalLassoCV(cv=5)
        quic.fit(X)
        return quic.covariance_, quic.precision_, quic.lam_

    elif gm == "QuicGlasso-BIC":
        quic_bic = QuicGraphicalLassoEBIC(gamma=0)
        quic_bic.fit(X)
        return quic_bic.covariance_, quic_bic.precision_, quic_bic.lam_

    else:  # Default: Glasso
        glasso = GraphicalLassoCV(assume_centered=assume_centered, cv=5).fit(X)
        return glasso.covariance_, glasso.get_precision(), glasso.alpha_
Exemple #2
0
def glasso(data, alphas=5, n_jobs=None, mode='cd'):
    """
        Estimates the graph with graphical lasso finding the best alpha based on cross validation

        Parameters
        ----------
        data: numpy ndarray
            The input data for to reconstruct/estimate a graph on. Features as columns and observations as rows.
        alphas: int or array-like of shape (n_alphas,), dtype=float, default=5
            Non-negative. If an integer is given, it fixes the number of points on the grids of alpha to be used. If a list is given, it gives the grid to be used. 
        Returns
        -------
        adjacency matrix : the estimated adjacency matrix.
    """
    scaler = StandardScaler()
    data = scaler.fit_transform(data)
    cov = GraphicalLassoCV(alphas=alphas, n_jobs=n_jobs).fit(data)
    precision_matrix = cov.get_precision()
    adjacency_matrix = precision_matrix.astype(bool).astype(int)
    adjacency_matrix[np.diag_indices_from(adjacency_matrix)] = 0
    return adjacency_matrix
Exemple #3
0
    def sparce_invcov(self,
                      df,
                      cols=None,
                      style="GraphLassoCV",
                      param=0.2,
                      layout="circular",
                      center=None,
                      figsize=(7, 7)):
        """
        cols: columns to calculate. If None, takes all numerical columns
        style: GraphLassoCV or LedoitWolf
        param: Parameter to pass to fitting algorithm. If GraphLasso, =alpha; if LedoitWolf, =threshold
        layout: choose between "circular", "spring", "shell"
        center: Put a certain colname in the center of the graph
            
        Sparse covariance matrix estimation
        Plot the sparce precision matrix
        """
        new_df = Utility().normalize(df).dropna()  # Remove NA, normalize
        if cols == None:
            cols = df._get_numeric_data().columns
        data = new_df[cols]
        if style == "GraphLassoCV":
            model = GraphicalLassoCV(alphas=[param, param],
                                     cv=10,
                                     max_iter=5000)
            model.fit(data)
            sparce_mat = np.zeros(np.shape(model.precision_))
            sparce_mat[model.precision_ != 0] = -1
            np.fill_diagonal(sparce_mat, 1)
        else:  # Style == LedoitWolf
            model = LedoitWolf()
            model.fit(data)
            sparce_mat = np.zeros(np.shape(model.get_precision()))
            sparce_mat[np.abs(model.get_precision()) > param] = -1
        np.fill_diagonal(sparce_mat, 1)
        sparce_mat = pd.DataFrame(sparce_mat,
                                  index=data.columns,
                                  columns=data.columns)

        # NetworkX Graph
        fig, ax = plt.subplots(figsize=figsize)
        G = nx.from_pandas_adjacency(sparce_mat)

        pos = {
            "circular": nx.drawing.circular_layout,
            "shell": nx.drawing.shell_layout,
            "spring": nx.drawing.spring_layout,
        }[layout](G, scale=2)
        pos[center] = np.array([0, 0])
        node_color = [
            'mintcream' if node == center else 'mintcream' for node in G.nodes
        ]
        node_size = [
            len(node) * 1500 if node == center else len(node) * 500
            for node in G.nodes()
        ]
        nodes = nx.draw_networkx_nodes(G,
                                       pos,
                                       node_shape='o',
                                       node_color=node_color,
                                       node_size=node_size)
        nodes.set_edgecolor('k')
        nx.draw_networkx_edges(G, pos, edge_color='r', width=2.0, alpha=0.8)
        nx.draw_networkx_labels(G, pos, font_weight='bold', font_size=10)
        plt.axis('off')
        plt.tight_layout()

        # Display precision matrix as heatmap
        fig, ax = plt.subplots(figsize=(5, 5))
        sns.heatmap(sparce_mat,
                    vmax=1,
                    vmin=-1,
                    linewidth=0.1,
                    cmap=plt.cm.RdBu_r,
                    cbar=False)
        ax.set_ylim(sparce_mat.T.shape[0] - 1e-9, -1e-9)
        plt.title('Sparse Inverse Covariance')
        plt.show()

        return sparce_mat