Beispiel #1
0
def main():
    mean = torch.tensor(np.ones(16), dtype=torch.float32)
    diag = torch.tensor(np.ones(16), dtype=torch.float32)

    population = Gaussian_Distribution(mean=mean,
                                       diag=diag,
                                       sub=0.3,
                                       type='chain',
                                       slash=1)
    truth = population.invcov.numpy()
    n = 1000
    d = population.dim

    print(truth)
    dist, sample, _, S = population.generate(n, numpy_like=True)
    #print(S)
    #print(np.array(sample))
    print(sample_mean(np.array(sample)))
    print(sample_cov(np.array(sample)))

    R = np.linalg.inv(S)
    #print(R)
    #print(sample)
    np.random.seed(0)
    model = GraphicalLassoCV()
    model.fit(np.array(sample))
    cov_ = model.covariance_
    prec_ = model.precision_

    heatmap(prec_)

    plt.figure(figsize=(4, 3))
    plt.axes([.2, .15, .75, .7])
    plt.plot(model.cv_alphas_, np.mean(model.grid_scores_, axis=1), 'o-')
    plt.axvline(model.alpha_, color='.5')
    plt.title('Model selection')
    plt.ylabel('Cross-validation score')
    plt.xlabel('alpha')

    plt.show()
    print(model.cv_alphas_, model.grid_scores_)

    model = GraphicalLasso()
    model.fit(sample)
    heatmap(model.precision_, 0.055)

    score = dict()
    score['log_lik'] = []
    score['AIC'] = []
    alpha_list = np.hstack((np.arange(0, 0.1,
                                      0.001), np.arange(0.11, 0.3, 0.01)))
    data = np.array(sample)
    for alpha in alpha_list:
        out_dict = cross_val_score_GLasso(data, alpha=alpha)
        score['log_lik'].append(out_dict['log_lik'])
        score['AIC'].append(out_dict['AIC'])
    plt.plot(alpha_list, score['log_lik'], 'o-')
    plt.show()
    plt.plot(alpha_list, score['AIC'])
    plt.show()
def lasso(xs):
    '''Use GraphicalLassoCV.

    Parameters
    ----------
    xs : array_like
        N samples of X.

    Returns
    -------
    C : array_like
        Covariance matrix estimate.

    Notes
    -----
    This implementation uses cross-validation to find the correct
    weight for Lasso.  Graphical Lasso is a method for finding a
    sparse inverse covariance matrix, so this is additional
    information that might not follow from having a Toeplitz
    covariance matrix...
    '''
    model = GraphicalLassoCV(cv=3)
    model.fit(xs)
    C = model.covariance_
    return C
Beispiel #3
0
 def create_prior_from_samples(self, samples):
     from sklearn.covariance import GraphicalLassoCV
     from numpy import asarray, linalg
     model = GraphicalLassoCV()
     model.fit(asarray(samples))
     return th_Mahalanobis(
         asarray(samples).mean(axis=0), linalg.cholesky(model.precision_),
         self.prefix)
Beispiel #4
0
def get_optimal_cov_estimator(time_series):
    from sklearn.covariance import GraphicalLassoCV

    estimator = GraphicalLassoCV(cv=5, assume_centered=True)
    print("\nSearching for best Lasso estimator...\n")
    try:
        estimator.fit(time_series)
        return estimator
    except BaseException:
        ix = 0
        print("\nModel did not converge on first attempt. "
              "Varying tolerance...\n")
        while not hasattr(estimator, 'covariance_') and \
            not hasattr(estimator, 'precision_') and ix < 3:
            for tol in [0.1, 0.01, 0.001, 0.0001]:
                print(f"Tolerance={tol}")
                estimator = GraphicalLassoCV(cv=5,
                                             max_iter=200,
                                             tol=tol,
                                             assume_centered=True)
                try:
                    estimator.fit(time_series)
                    return estimator
                except BaseException:
                    ix += 1
                    continue

    if not hasattr(estimator, 'covariance_') and not hasattr(
            estimator, 'precision_'):
        print("Unstable Lasso estimation. Applying shrinkage to empirical "
              "covariance...")
        from sklearn.covariance import (
            GraphicalLasso,
            empirical_covariance,
            shrunk_covariance,
        )
        try:
            emp_cov = empirical_covariance(time_series, assume_centered=True)
            for i in np.arange(0.8, 0.99, 0.01):
                print(f"Shrinkage={i}:")
                shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i)
                alphaRange = 10.0**np.arange(-8, 0)
                for alpha in alphaRange:
                    print(f"Auto-tuning alpha={alpha}...")
                    estimator_shrunk = GraphicalLasso(alpha,
                                                      assume_centered=True)
                    try:
                        estimator_shrunk.fit(shrunk_cov)
                        return estimator_shrunk
                    except BaseException:
                        continue
        except BaseException:
            return None
    else:
        return estimator
Beispiel #5
0
    def _infer_network(self, data):
        """
        Infer the network.

        Args:
            data (pd.DataFrame): data to be used for the inference.
        """
        entities = data.columns
        model = GraphicalLassoCV(**self.parameters)
        model.fit(data.values)
        self.graph = Graph(adjacency=pd.DataFrame(
            from_precision_matrix_partial_correlations(model.precision_),
            index=entities,
            columns=entities))
        logger.debug('inferred with {}'.format(self.method))
def getConnectome(imgPath=None,
                  atlasPath=None,
                  viewInBrowser=False,
                  displayCovMatrix=False):
    """
    Gets the connectome of a functional MRI scan
    imgPath -> absolute or relative path to the .nii file
    atlasPath -> download path for the reference MSDL atlas
    viewInBrowser (optional, default=False) -> if True, opens up an interactive viewer in the browser
    displayCovMatrix (optional, default=False) -> display the inverse covariance matrix
    Returns a tuple of shape (estimator, atlas)
    """
    # Download the reference atlas
    atlas = datasets.fetch_atlas_msdl(data_dir=atlasPath)
    # Loading atlas image stored in 'maps'
    atlasFilename = atlas['maps']
    # Get the time series for the fMRI scan
    masker = NiftiMapsMasker(maps_img=atlasFilename,
                             standardize=True,
                             memory='nilearn_cache',
                             verbose=5)
    timeSeries = masker.fit_transform(imgPath)
    # Compute the connectome using sparse inverse covariance
    estimator = GraphicalLassoCV()
    estimator.fit(timeSeries)
    if (displayCovMatrix):
        labels = atlas['labels']
        plotting.plot_matrix(estimator.covariance_,
                             labels=labels,
                             figure=(9, 7),
                             vmax=1,
                             vmin=-1,
                             title='Covariance')
        plotting.plot_matrix(estimator.precision_,
                             labels=labels,
                             figure=(9, 7),
                             vmax=1,
                             vmin=-1,
                             title='Inverse covariance (Precision)')
        #covPlot.get_figure().savefig('Covariance.png')
        # precPlot.get_figure().savefig('Inverse Covariance.png')
    if (viewInBrowser):
        coords = atlas.region_coords
        view = plotting.view_connectome(-estimator.precision_, coords, '60.0%')
        #view.save_as_html(file_name='Connectome Test.html')
        view.open_in_browser()
    return (estimator, atlas)
Beispiel #7
0
def helper_graphical_lasso(X, theta_true, tf_names=[]):
    # Estimate the covariance
    if args.mode == 'cv':
        model = GraphicalLassoCV()
    else:
        model = GraphicalLasso(alpha=args.alpha_l1,
                               mode=args.mode,
                               tol=1e-7,
                               enet_tol=1e-6,
                               max_iter=100,
                               verbose=False,
                               assume_centered=False)
    model.fit(X)
    #    cov_ = model.covariance_
    prec_ = model.precision_
    if args.USE_TF_NAMES == 'yes' and len(tf_names) != 0:
        prec_ = postprocess_tf(prec_, tf_names)
    recovery_metrics = report_metrics(np.array(theta_true), prec_)
    print(
        'GLASSO: FDR, TPR, FPR, SHD, nnz_true, nnz_pred, precision, recall, Fb, aupr, auc'
    )
    print('GLASSO: TEST: Recovery of true theta: ',
          *np.around(recovery_metrics, 3))
    return list(recovery_metrics)
cov = linalg.inv(prec)
d = np.sqrt(np.diag(cov))
cov /= d
cov /= d[:, np.newaxis]
prec *= d
prec *= d[:, np.newaxis]
X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
X -= X.mean(axis=0)
X /= X.std(axis=0)

# #############################################################################
# Estimate the covariance
emp_cov = np.dot(X.T, X) / n_samples

model = GraphicalLassoCV(cv=5)
model.fit(X)
cov_ = model.covariance_
prec_ = model.precision_

lw_cov_, _ = ledoit_wolf(X)
lw_prec_ = linalg.inv(lw_cov_)

# #############################################################################
# Plot the results
plt.figure(figsize=(10, 6))
plt.subplots_adjust(left=0.02, right=0.98)

# plot the covariances
covs = [('Empirical', emp_cov), ('Ledoit-Wolf', lw_cov_),
        ('GraphicalLassoCV', cov_), ('True', cov)]
vmax = cov_.max()
print('time series has {0} samples'.format(timeseries.shape[0]))

###############################################################################
# in which situation the graphical lasso **sparse inverse covariance**
# estimator captures well the covariance **structure**.
try:
    from sklearn.covariance import GraphicalLassoCV
except ImportError:
    # for Scitkit-Learn < v0.20.0
    from sklearn.covariance import GraphLassoCV as GraphicalLassoCV

covariance_estimator = GraphicalLassoCV(cv=3, verbose=1)

###############################################################################
# We just fit our regions signals into the `GraphicalLassoCV` object
covariance_estimator.fit(timeseries)

###############################################################################
# and get the ROI-to-ROI covariance matrix.
matrix = covariance_estimator.covariance_
print('Covariance matrix has shape {0}.'.format(matrix.shape))

###############################################################################
# Plot matrix, graph, and strength
# --------------------------------
#
# We use `:func: nilearn.plotting.plot_matrix` to visualize our correlation matrix
# and display the graph of connections with `nilearn.plotting.plot_connectome`.
from nilearn import plotting

plotting.plot_matrix(matrix,
def main():
    mean = torch.tensor(np.ones(16), dtype=torch.float32)
    diag = torch.tensor(np.ones(16), dtype=torch.float32)

    population = Gaussian_Distribution(mean=mean,
                                       diag=diag,
                                       sub=0.25,
                                       type='chain',
                                       slash=1)
    truth = population.invcov.numpy()
    n = 1000
    d = population.dim

    print(truth)
    dist, sample, _, S = population.generate(n, numpy_like=True)

    # #############################################################################
    # Generate the data
    n_samples = 60
    n_features = 20

    prng = np.random.RandomState(1)
    prec = make_sparse_spd_matrix(n_features,
                                  alpha=.98,
                                  smallest_coef=.4,
                                  largest_coef=.7,
                                  random_state=prng)
    cov = linalg.inv(prec)
    d = np.sqrt(np.diag(cov))
    cov /= d
    cov /= d[:, np.newaxis]
    prec *= d
    prec *= d[:, np.newaxis]
    X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
    X -= X.mean(axis=0)
    X /= X.std(axis=0)

    #prec = population.invcov
    # #############################################################################
    # Estimate the covariance
    emp_cov = np.dot(X.T, X) / n_samples

    model = GraphicalLassoCV()
    model.fit(sample)
    cov_ = model.covariance_
    prec_ = model.precision_

    lw_cov_, _ = ledoit_wolf(X)
    lw_prec_ = linalg.inv(lw_cov_)

    # #############################################################################
    # Plot the results
    plt.figure(figsize=(10, 6))
    plt.subplots_adjust(left=0.02, right=0.98)

    # plot the covariances
    covs = [('Empirical', emp_cov), ('Ledoit-Wolf', lw_cov_),
            ('GraphicalLassoCV', cov_), ('True', cov)]
    vmax = cov_.max()
    for i, (name, this_cov) in enumerate(covs):
        plt.subplot(2, 4, i + 1)
        plt.imshow(this_cov,
                   interpolation='nearest',
                   vmin=-vmax,
                   vmax=vmax,
                   cmap=plt.cm.RdBu_r)
        plt.xticks(())
        plt.yticks(())
        plt.title('%s covariance' % name)

    # plot the precisions
    precs = [('Empirical', linalg.inv(emp_cov)), ('Ledoit-Wolf', lw_prec_),
             ('GraphicalLasso', prec_), ('True', prec)]
    vmax = .9 * prec_.max()
    for i, (name, this_prec) in enumerate(precs):
        ax = plt.subplot(2, 4, i + 5)
        plt.imshow(np.ma.masked_equal(this_prec, 0),
                   interpolation='nearest',
                   vmin=-vmax,
                   vmax=vmax,
                   cmap=plt.cm.RdBu_r)
        plt.xticks(())
        plt.yticks(())
        plt.title('%s precision' % name)
        if hasattr(ax, 'set_facecolor'):
            ax.set_facecolor('.7')
        else:
            ax.set_axis_bgcolor('.7')

    # plot the model selection metric
    plt.figure(figsize=(4, 3))
    plt.axes([.2, .15, .75, .7])
    plt.plot(model.cv_alphas_, np.mean(model.grid_scores_, axis=1), 'o-')
    plt.axvline(model.alpha_, color='.5')
    plt.title('Model selection')
    plt.ylabel('Cross-validation score')
    plt.xlabel('alpha')

    plt.show()
Beispiel #11
0
                         vmax=max_precision,
                         colorbar=False)
    if n == 0:
        plt.title("group-sparse\n$\\alpha=%.2f$" % gsc.alpha_)

# Fit one graph lasso per subject
try:
    from sklearn.covariance import GraphicalLassoCV
except ImportError:
    # for Scitkit-Learn < v0.20.0
    from sklearn.covariance import GraphLassoCV as GraphicalLassoCV

gl = GraphicalLassoCV(verbose=1)

for n, subject in enumerate(subjects[:n_displayed]):
    gl.fit(subject)

    ax = plt.subplot(n_displayed, 4, 4 * n + 3)
    max_precision = gl.precision_.max()
    plotting.plot_matrix(gl.precision_,
                         axes=ax,
                         vmin=-max_precision,
                         vmax=max_precision,
                         colorbar=False)
    if n == 0:
        plt.title("graph lasso")
    plt.ylabel("$\\alpha=%.2f$" % gl.alpha_)

# Fit one graph lasso for all subjects at once
import numpy as np
gl.fit(np.concatenate(subjects))
Beispiel #12
0
    def sparce_invcov(self,
                      df,
                      cols=None,
                      style="GraphLassoCV",
                      param=0.2,
                      layout="circular",
                      center=None,
                      figsize=(7, 7)):
        """
        cols: columns to calculate. If None, takes all numerical columns
        style: GraphLassoCV or LedoitWolf
        param: Parameter to pass to fitting algorithm. If GraphLasso, =alpha; if LedoitWolf, =threshold
        layout: choose between "circular", "spring", "shell"
        center: Put a certain colname in the center of the graph
            
        Sparse covariance matrix estimation
        Plot the sparce precision matrix
        """
        new_df = Utility().normalize(df).dropna()  # Remove NA, normalize
        if cols == None:
            cols = df._get_numeric_data().columns
        data = new_df[cols]
        if style == "GraphLassoCV":
            model = GraphicalLassoCV(alphas=[param, param],
                                     cv=10,
                                     max_iter=5000)
            model.fit(data)
            sparce_mat = np.zeros(np.shape(model.precision_))
            sparce_mat[model.precision_ != 0] = -1
            np.fill_diagonal(sparce_mat, 1)
        else:  # Style == LedoitWolf
            model = LedoitWolf()
            model.fit(data)
            sparce_mat = np.zeros(np.shape(model.get_precision()))
            sparce_mat[np.abs(model.get_precision()) > param] = -1
        np.fill_diagonal(sparce_mat, 1)
        sparce_mat = pd.DataFrame(sparce_mat,
                                  index=data.columns,
                                  columns=data.columns)

        # NetworkX Graph
        fig, ax = plt.subplots(figsize=figsize)
        G = nx.from_pandas_adjacency(sparce_mat)

        pos = {
            "circular": nx.drawing.circular_layout,
            "shell": nx.drawing.shell_layout,
            "spring": nx.drawing.spring_layout,
        }[layout](G, scale=2)
        pos[center] = np.array([0, 0])
        node_color = [
            'mintcream' if node == center else 'mintcream' for node in G.nodes
        ]
        node_size = [
            len(node) * 1500 if node == center else len(node) * 500
            for node in G.nodes()
        ]
        nodes = nx.draw_networkx_nodes(G,
                                       pos,
                                       node_shape='o',
                                       node_color=node_color,
                                       node_size=node_size)
        nodes.set_edgecolor('k')
        nx.draw_networkx_edges(G, pos, edge_color='r', width=2.0, alpha=0.8)
        nx.draw_networkx_labels(G, pos, font_weight='bold', font_size=10)
        plt.axis('off')
        plt.tight_layout()

        # Display precision matrix as heatmap
        fig, ax = plt.subplots(figsize=(5, 5))
        sns.heatmap(sparce_mat,
                    vmax=1,
                    vmin=-1,
                    linewidth=0.1,
                    cmap=plt.cm.RdBu_r,
                    cbar=False)
        ax.set_ylim(sparce_mat.T.shape[0] - 1e-9, -1e-9)
        plt.title('Sparse Inverse Covariance')
        plt.show()

        return sparce_mat
Beispiel #13
0
def Bayes_fghorse(data, p, nBurnin=1e3, nIter=10e3):
    ##blockwise Frobunius norm for precision matrix
    def F_norm(Theta, p, M):
        matx = np.kron(np.diag(np.ones(p, dtype=np.float32)),
                       np.ones(M, dtype=np.float32)).transpose()
        matx = tf.constant(matx, dtype=tf.float32)
        Theta_F = tf.linalg.matmul(tf.linalg.matmul(matx,
                                                    tf.math.square(Theta),
                                                    transpose_a=True,
                                                    a_is_sparse=True),
                                   matx,
                                   b_is_sparse=True)
        return tf.math.sqrt(Theta_F)

    def sampleLambda(Theta_F, Nu, tau_sq):
        gamma_lambda = tfd.Gamma(
            (1 + M**2) / 2,
            tf.math.divide(1, Nu) +
            tf.math.scalar_mul(1 / (2 * tau_sq), tf.math.square(Theta_F)))
        return tf.math.divide(1, gamma_lambda.sample(1)[0, :, :])

    def sampleNu(Lambda):
        Nu_gamma = tfd.Gamma(1, 1 + tf.math.divide(1, Lambda))
        return tf.math.divide(1, Nu_gamma.sample(1)[0, :, :])

    def permut(Mat, mat_p):
        return tf.linalg.matmul(tf.linalg.matmul(mat_p, Mat), mat_p)

    def parti(Mat, j):
        #functional for partitioning matirx
        exclude_row = tf.concat([Mat[:j, :], Mat[(j + 1):, :]], axis=0)
        Mat11 = tf.concat([exclude_row[:, :j], exclude_row[:, (j + 1):]],
                          axis=1)
        Mat12 = tf.concat([Mat[:, j][:j], Mat[:, j][(j + 1):]], axis=0)
        Mat21 = Mat12
        Mat22 = Mat[j, j]
        return (Mat11, Mat12, Mat21, Mat22)

    def is_pos_def(x):
        return np.all(np.linalg.eigvals(x) > 0)

    N = data.shape[0]
    M = int(data.shape[1] / p)

    ##centralize data
    data = data - np.mean(data, axis=0).reshape((1, p * M))

    S = np.matmul(data.transpose(), data)
    S = tf.constant(S)

    ### Use glasso with CV to get initial values:
    glasso_model = GraphicalLassoCV(cv=5)
    glasso_model.fit(data)

    #initial values
    Theta = glasso_model.precision_  ##you can also use identity as initial

    Theta = tf.constant(Theta, dtype=tf.float32)
    Theta_F = F_norm(Theta, p, M)
    tau_sq = 1
    zeta = 1
    Nu = tf.ones([p, p], dtype=tf.float32)
    Lambda = sampleLambda(Theta_F, Nu, tau_sq)
    lambda_ = 1  ##diagonal
    Nu = sampleNu(Lambda)

    samples = []

    for it in tqdm(range(-int(nBurnin), int(nIter) + 1, 1)):
        for i in range(p):
            ##create permutation matrix for exchange ith block and pth block
            m = np.diag(np.ones(p)).astype(np.float32)
            m[:, [i, p - 1]] = m[:, [p - 1, i]]
            m1 = tf.linalg.LinearOperatorFullMatrix(m)
            m2 = tf.linalg.LinearOperatorFullMatrix(
                np.diag(np.ones(M, dtype=np.float32)))
            mat_p = tf.linalg.LinearOperatorKronecker([m1, m2]).to_dense()

            #exchange the ith and pth node
            Theta_ = permut(Theta, mat_p)
            S_ = permut(S, mat_p)

            m1 = tf.linalg.LinearOperatorFullMatrix(Lambda)
            m2 = tf.linalg.LinearOperatorFullMatrix(
                np.ones([M, M], dtype=np.float32))
            Lambda_mat = tf.linalg.LinearOperatorKronecker([m1, m2]).to_dense()
            Lambda_ = permut(Lambda_mat, mat_p)

            m1 = tf.linalg.LinearOperatorFullMatrix(Nu)
            m2 = tf.linalg.LinearOperatorFullMatrix(
                np.ones([M, M], dtype=np.float32))
            Nu_mat = tf.linalg.LinearOperatorKronecker([m1, m2]).to_dense()
            Nu_ = permut(Nu_mat, mat_p)

            for j_ in range(int(M)):
                j = (p - 1) * M + j_
                ##partition matrices:
                (Theta11, Theta12, Theta21, Theta22) = parti(Theta_, j)
                Theta11_inv = tf.linalg.inv(Theta11)[:(p - 1) * M, :(p - 1) *
                                                     M]

                (S11, S12, S21, S22) = parti(S_, j)
                (Lambda11, Lambda12, Lambda21, Lambda22) = parti(Lambda_, j)
                (Nu11, Nu12, Nu21, Nu22) = parti(Nu_, j)

                gamma = np.random.gamma(shape=N / 2 + 1,
                                        scale=2 / (S22 + lambda_**2))

                Ell = tf.linalg.cholesky(
                    (S22 + lambda_**2) * Theta11_inv +
                    tf.linalg.diag(1 / (tau_sq * Lambda12[:(p - 1) * M])))
                temp1 = tf.linalg.solve(
                    Ell, tf.expand_dims(-1 * S21[:(p - 1) * M], axis=1))
                mu = tf.linalg.solve(tf.transpose(Ell), temp1)

                vee = tf.linalg.solve(
                    tf.transpose(Ell),
                    tf.expand_dims(tf.constant(
                        np.random.normal(size=mu.shape[0]), dtype=np.float32),
                                   axis=1))
                beta = mu + vee

                aa = np.zeros(M, dtype=np.float32)
                aa[j_] = gamma + tf.math.reduce_sum(
                    beta * tf.linalg.matmul(Theta11_inv, beta))
                aa = tf.constant(aa)
                temp = tf.concat([beta, tf.expand_dims(aa, axis=1)], axis=0)
                ##update jth column and jth row of Theta_
                Theta_ = tf.concat([Theta_[:, :j], temp, Theta_[:, j + 1:]],
                                   axis=1)
                Theta_ = tf.concat(
                    [Theta_[:j, :],
                     tf.transpose(temp), Theta_[j + 1:, :]],
                    axis=0)
            Theta = permut(Theta_, mat_p)

        ##update F_norm
        Theta_F = F_norm(Theta, p, M)

        #update Lambda
        Lambda = sampleLambda(Theta_F, Nu, tau_sq)

        #update Nu:
        Nu = sampleNu(Lambda)

        #update tau
        up_sum = tf.math.reduce_sum(
            tf.linalg.set_diag(
                tf.linalg.band_part(
                    tf.math.divide(tf.math.square(Theta_F), Lambda), 0, -1),
                np.zeros(p, dtype=np.float32)))
        scale_tau = 1 / (1 / zeta + up_sum.numpy() / 2)
        tau_sq = 1 / np.random.gamma(shape=(M**2 * p * (p - 1) + 2) / 4,
                                     scale=scale_tau)
        ##update zeta
        zeta = 1 / np.random.gamma(shape=1, scale=1 / (1 + 1 / tau_sq))

        if it > 0:
            samples.append(Theta)

    samples = np.stack(samples, axis=0)
    return samples
Beispiel #14
0
def main():
    mean = torch.tensor(np.zeros(32), dtype=torch.float32)
    diag = torch.tensor(np.ones(32), dtype=torch.float32)
    X = torch.eye(32, dtype=torch.float32)
    X[5, 10] = X[10, 5] = X[19, 20] = X[20, 19] = X[1, 31] = X[31, 1] = -0.5

    population = Gaussian_Distribution(mean=mean,
                                       diag=diag,
                                       sub=-0.2,
                                       type='DIY',
                                       slash=1,
                                       prec=X)
    truth = population.invcov.numpy()
    n = 400
    p = population.dim

    print(truth)
    #heatmap(truth)

    data = pd.read_csv("chain.csv")
    sample = data.values[1:, 1:]
    p_sample = sample
    #p_emp_cov = sample_cov(p_sample)
    sample = z_score(sample)
    emp_cov = sample_cov(sample)

    model = ProxGrad_l0()
    alpha = 0.05
    prec = model.fit_FISTA(emp_cov, alpha)
    heatmap(prec)
    print('nonzero:', L0_penal(prec))

    score = dict()
    score['log_lik'] = []
    score['AIC'] = []
    score['non_zero'] = []
    alpha_list = np.hstack((np.arange(1e-5, 0.1,
                                      0.002), np.arange(0.1, 0.3, 0.01)))
    #data = np.array(sample)
    for alpha in alpha_list:
        out_dict = cross_val_score_ProxGrad_l0(sample,
                                               alpha=alpha,
                                               type='FISTA')
        score['log_lik'].append(out_dict['log_lik'])
        score['AIC'].append(out_dict['AIC'])
        score['non_zero'].append(out_dict['non_zero'])
    plt.plot(alpha_list, score['log_lik'])
    plt.show()
    plt.plot(alpha_list, score['AIC'])
    plt.show()
    plt.plot(alpha_list, score['non_zero'])
    plt.show()

    model = ProxGrad_l0()
    l = len(alpha_list)
    alpha = 0
    log_lik = -1e12
    for i in range(0, l):
        if score['log_lik'][i] > log_lik:
            alpha = alpha_list[i]
            log_lik = score['log_lik'][i]
    print(alpha)
    prec = model.fit_FISTA(emp_cov, alpha)
    heatmap(prec)
    print('nonzero:', L0_penal(prec))

    alpha = 0
    aic = 1e12
    for i in range(0, l):
        if score['AIC'][i] < aic:
            alpha = alpha_list[i]
            aic = score['AIC'][i]
    print(alpha)
    prec = model.fit_FISTA(emp_cov, alpha)
    heatmap(prec)
    print('nonzero:', L0_penal(prec))

    model = GraphicalLassoCV(tol=1e-8)
    model.fit(sample)
    heatmap(model.precision_)
    print('nonzero:', L0_penal(model.precision_))
Beispiel #15
0
    ns.fit(X)
    tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(K, ns.precision_)
    ns_f1[i] = nitk.methods.calculate_f1_score(tpr, prec)

    te = nitk.ThresholdEstimatorCV()
    te.fit(X)
    tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(K, te.covariance_)
    ts_f1[i] = nitk.methods.calculate_f1_score(tpr, prec)

    sc = nitk.SCIOColumnwiseCV()
    sc.fit(X)
    tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(K, sc.precision_)
    sc_f1[i] = nitk.methods.calculate_f1_score(tpr, prec)

    gl = GraphicalLassoCV()
    gl.fit(X)
    tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(K, gl.precision_)
    gl_f1[i] = nitk.methods.calculate_f1_score(tpr, prec)

    sli = nitk.ScaledLassoInference()
    sli.fit(X)
    tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(K, sli.precision_)
    sli_f1[i] = nitk.methods.calculate_f1_score(tpr, prec)

    cli = nitk.CLIMECV()
    cli.fit(X)
    tpr, fpr, prec = nitk.methods.calculate_matrix_accuracy(K, cli.precision_)
    cli_f1[i] = nitk.methods.calculate_f1_score(tpr, prec)
print("Graphical Lasso & %s & %s & %6.3f $\pm$ %6.3f" %
      (p, n, np.mean(gl_f1), np.std(gl_f1)))
print("Neighbourhood Selection Columnwise & %s & %s & %6.3f $\pm$ %6.3f" %
def main():
    # 'Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'
    df = pd.read_csv(pt.join(DATA_ROOT, '1999_2018_complete.csv'))

    # drop malaysia and venezuela
    df.drop(['malaysia', 'venezuela'], axis=1, inplace=True)
    # print(df.columns[2:])

    data = df.values[:, 2:]
    data = preprocessing.scale(data, axis=1)
    print(data.shape)

    # plt.plot(data[:, 6], c='b', label='Japan')
    # plt.plot(data[:, 13], c='g', label='Sri Lanka')
    # plt.legend()
    # plt.show()

    # Estimate the covariance
    emp_cov = np.dot(data.T, data) / data.shape[0]

    for count in range(20):
        temp_data = data[count * 242:(count + 1) * 242]

        # GraphicalLasso
        model = GraphicalLassoCV()
        model.fit(temp_data)
        cov_ = model.covariance_
        prec_ = model.precision_

        # print(model.alpha_)
        # print(model.cv_alphas_)
        # print(model.grid_scores_)
        # print(model.n_iter_)

        # Ledoit-Wolf
        lw_cov_, _ = ledoit_wolf(data)
        lw_prec_ = linalg.inv(lw_cov_)

        # #############################################################################
        # Plot the results
        # plt.figure(figsize=(8, 6))
        # plt.subplots_adjust(left=0.02, right=0.98)
        #
        # # plot the covariances
        # covs = [('Empirical', emp_cov), ('Ledoit-Wolf',
        #                                  lw_cov_), ('GraphicalLassoCV', cov_)]
        # vmax = cov_.max()
        # for i, (name, this_cov) in enumerate(covs):
        #     plt.subplot(2, 3, i + 1)
        #     plt.imshow(this_cov, interpolation='nearest', vmin=-vmax, vmax=vmax,
        #                cmap=plt.cm.RdBu_r)
        #     plt.xticks(())
        #     plt.yticks(())
        #     plt.title('%s covariance' % name)
        #
        # # plot the precisions
        # precs = [('Empirical', linalg.inv(emp_cov)), ('Ledoit-Wolf', lw_prec_),
        #          ('GraphicalLasso', prec_)]
        # vmax = .9 * prec_.max()
        # for i, (name, this_prec) in enumerate(precs):
        #     ax = plt.subplot(2, 3, i + 4)
        #     plt.imshow(np.ma.masked_equal(this_prec, 0),
        #                interpolation='nearest', vmin=-vmax, vmax=vmax,
        #                cmap=plt.cm.RdBu_r)
        #     plt.xticks(())
        #     plt.yticks(())
        #     plt.title('%s precision' % name)
        #     if hasattr(ax, 'set_facecolor'):
        #         ax.set_facecolor('.7')
        #     else:
        #         ax.set_axis_bgcolor('.7')
        # plt.show()
        # print(prec_)
        name = 'GraphicalLasso'
        this_prec = prec_
        vmax = .9 * prec_.max()
        plt.figure()
        ax = plt.subplot(1, 1, 1)
        plt.imshow(np.ma.masked_equal(this_prec, 0),
                   interpolation='nearest',
                   vmin=-vmax,
                   vmax=vmax,
                   cmap=plt.cm.RdBu_r)
        plt.xticks(())
        plt.yticks(())
        plt.title('year: %d' % (1999 + count))
        if hasattr(ax, 'set_facecolor'):
            ax.set_facecolor('.7')
        else:
            ax.set_axis_bgcolor('.7')
        plt.show()
Beispiel #17
0
ret_data, vol_data, comp_list = load_data.read_data()

# Take first 1000 rows (days) as training set and rest as test set
train_index = 1000
X_train = ret_data.iloc[0:train_index, :]
X_test = ret_data.iloc[train_index:, :]
#X = (X - X.mean())/X.std()

num_firms = ret_data.shape[1]
print("Number of firms: ", num_firms)

cov, corr = X_train.cov(), X_train.corr()
#print(cov)

model = GraphicalLassoCV(cv=5)
model.fit(X_train)
cov_ = model.covariance_
prec_ = model.precision_

print("What are the alphs in CV: ", model.cv_alphas_)
print("Optimal lambda parameter for graphical LASSO: ", model.alpha_)

#lw_cov_, _ = ledoit_wolf(X_train)
#lw_prec_ = linalg.inv(lw_cov_)

# print("Lasso Covariance Matrix:\n", cov_)
prec_ = model.precision_
# print("Lasso Inverse Covariance Matrix:\n", prec_)

# print("Empirical Covariance Matrix:\n", cov)
# Find inverse covariance matrix of empirical covariance
        print(f"gradients: {gradients}")

        # update alpha
        alpha_grad = np.mean(gradients)
        diff = np.linalg.norm(alpha_grad)
        curr_alpha = gradient_update(curr_alpha, alpha_grad)
        curr_alpha = max(curr_alpha, 0)

        print(alpha_grad, curr_alpha)

    # s = prob(samples, new_alpha)
    return curr_alpha


if __name__ == '__main__':
    import causaldag as cd
    from sklearn.covariance import GraphicalLassoCV

    d = cd.rand.directed_erdos(5, .5)
    g = cd.rand.rand_weights(d)
    samples = g.sample(100)

    cvgm_alpha = cvgm_glasso(samples, 1)
    print(cvgm_alpha)

    glcv = GraphicalLassoCV(alphas=[.1, .2, .3, .4, .5, .6, .7, .8, .9], cv=10)
    glcv.fit(samples)
    print(glcv.alpha_)


Beispiel #19
0
# (603, 41) 由此可以看出得到了603个交易日的数据
# 其中有41只股票被选出。
stock_dataset, selected_stocks = preprocess_data(batch_K_data, min_K_num=1100)
print(stock_dataset.shape)
"""
    其他的9只股票因为不满足最小交易日的要求而被删除
    这603个交易日是所有41只股票都在交易
    都没有停牌的数据。
"""
# 这是实际使用的股票列表
print(selected_stocks)

# 对这41只股票进行聚类
edge_model = GraphicalLassoCV()
edge_model.fit(stock_dataset)
_, labels = affinity_propagation(edge_model.covariance_)
n_labels = max(labels)
"""
    labels里面是每只股票对应的类别标号  
"""
print('Stock Clusters: {}'.format(n_labels + 1))
"""
    10
    即得到10个类别
"""
sz50_df2 = sz50_df.set_index('code')
# print(sz50_df2)
for i in range(n_labels + 1):
    # print('Cluster: {}----> stocks: {}'.format(i,','.join(np.array(selected_stocks)[labels==i])))
    """
Beispiel #20
0
##############################################################################
# Computing group-sparse precision matrices
# ------------------------------------------
from nilearn.connectome import GroupSparseCovarianceCV
gsc = GroupSparseCovarianceCV(verbose=2)
gsc.fit(subject_time_series)

try:
    from sklearn.covariance import GraphicalLassoCV
except ImportError:
    # for Scitkit-Learn < v0.20.0
    from sklearn.covariance import GraphLassoCV as GraphicalLassoCV

gl = GraphicalLassoCV(verbose=2)
gl.fit(np.concatenate(subject_time_series))

##############################################################################
# Displaying results
# -------------------
atlas_img = msdl_atlas_dataset.maps
atlas_region_coords = plotting.find_probabilistic_atlas_cut_coords(atlas_img)
labels = msdl_atlas_dataset.labels

plotting.plot_connectome(gl.covariance_,
                         atlas_region_coords,
                         edge_threshold='90%',
                         title="Covariance",
                         display_mode="lzr")
plotting.plot_connectome(-gl.precision_,
                         atlas_region_coords,
from sklearn.preprocessing import StandardScaler

# loading the data
infile = np.load('DataDynamicConn/Leiden_sub39335_Rt2_K200.npz')
ts = infile['ts']
nodes = infile['nodes']

# correlation matrix
R = np.corrcoef(ts, rowvar=False)

# scaling the input data
ts_std = StandardScaler().fit_transform(ts)

# ICOV
glasso = GraphicalLassoCV(cv=5)
glasso.fit(ts_std)
ICOV = glasso.covariance_

# visual inspection
# zeroing the main diagonal
for i in range(R.shape[0]):
    R[i, i] = 0
    ICOV[i, i] = 0

plt.figure(figsize=[6, 3])
plt.subplot(121)
plt.imshow(R, interpolation='nearest', vmin=-1, vmax=1, cmap=plt.cm.rainbow)
plt.xticks(())
plt.yticks(())
plt.title('Correlation')
Beispiel #22
0
def get_conn_matrix(time_series, conn_model, dir_path, node_size, smooth,
                    dens_thresh, network, ID, roi, min_span_tree, disp_filt,
                    parc, prune, atlas_select, uatlas_select, label_names,
                    coords, c_boot, norm, binary):
    from nilearn.connectome import ConnectivityMeasure
    from sklearn.covariance import GraphicalLassoCV

    conn_matrix = None
    if conn_model == 'corr' or conn_model == 'cor' or conn_model == 'correlation':
        # credit: nilearn
        print('\nComputing correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'partcorr' or conn_model == 'parcorr' or conn_model == 'partialcorrelation':
        # credit: nilearn
        print('\nComputing partial correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='partial correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar' or conn_model == 'sps' or conn_model == 'sparse' or conn_model == 'precision':
        # Fit estimator to matrix to get sparse matrix
        estimator_shrunk = None
        estimator = GraphicalLassoCV(cv=5)
        try:
            print('\nComputing covariance...\n')
            estimator.fit(time_series)
        except:
            print(
                'Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...'
            )
            try:
                from sklearn.covariance import GraphicalLasso, empirical_covariance, shrunk_covariance
                emp_cov = empirical_covariance(time_series)
                for i in np.arange(0.8, 0.99, 0.01):
                    shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i)
                    alphaRange = 10.0**np.arange(-8, 0)
                    for alpha in alphaRange:
                        try:
                            estimator_shrunk = GraphicalLasso(alpha)
                            estimator_shrunk.fit(shrunk_cov)
                            print(
                                "Retrying covariance matrix estimate with alpha=%s"
                                % alpha)
                            if estimator_shrunk is None:
                                pass
                            else:
                                break
                        except:
                            print(
                                "Covariance estimation failed with shrinkage at alpha=%s"
                                % alpha)
                            continue
            except ValueError:
                print(
                    'Unstable Lasso estimation! Shrinkage failed. A different connectivity model may be needed.'
                )
        if estimator is None and estimator_shrunk is None:
            raise RuntimeError('\nERROR: Covariance estimation failed.')
        if conn_model == 'sps' or conn_model == 'sparse' or conn_model == 'precision':
            if estimator_shrunk is None:
                print(
                    '\nFetching precision matrix from covariance estimator...\n'
                )
                conn_matrix = -estimator.precision_
            else:
                print(
                    '\nFetching shrunk precision matrix from covariance estimator...\n'
                )
                conn_matrix = -estimator_shrunk.precision_
        elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar':
            if estimator_shrunk is None:
                print(
                    '\nFetching covariance matrix from covariance estimator...\n'
                )
                conn_matrix = estimator.covariance_
            else:
                conn_matrix = estimator_shrunk.covariance_
    elif conn_model == 'QuicGraphicalLasso':
        try:
            from inverse_covariance import QuicGraphicalLasso
        except ImportError:
            print('Cannot run QuicGraphLasso. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLasso
        # credit: skggm
        model = QuicGraphicalLasso(init_method='cov',
                                   lam=0.5,
                                   mode='default',
                                   verbose=1)
        print('\nCalculating QuicGraphLasso precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'QuicGraphLassoCV':
        try:
            from inverse_covariance import QuicGraphicalLassoCV
        except ImportError:
            print('Cannot run QuicGraphLassoCV. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLassoCV
        # credit: skggm
        model = QuicGraphicalLassoCV(init_method='cov', verbose=1)
        print(
            '\nCalculating QuicGraphLassoCV precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'QuicGraphicalLassoEBIC':
        try:
            from inverse_covariance import QuicGraphicalLassoEBIC
        except ImportError:
            print('Cannot run QuicGraphLassoEBIC. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLassoEBIC
        # credit: skggm
        model = QuicGraphicalLassoEBIC(init_method='cov', verbose=1)
        print(
            '\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n'
        )
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'AdaptiveQuicGraphLasso':
        try:
            from inverse_covariance import AdaptiveQuicGraphicalLasso, QuicGraphicalLassoEBIC
        except ImportError:
            print('Cannot run AdaptiveGraphLasso. Skggm not installed!')

        # Compute the sparse inverse covariance via
        # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary'
        # credit: skggm
        model = AdaptiveQuicGraphicalLasso(
            estimator=QuicGraphicalLassoEBIC(init_method='cov', ),
            method='binary',
        )
        print(
            '\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n'
        )
        model.fit(time_series)
        conn_matrix = -model.estimator_.precision_
    else:
        raise ValueError(
            '\nERROR! No connectivity model specified at runtime. Select a valid estimator using the '
            '-mod flag.')

    if conn_matrix.shape < (2, 2):
        raise RuntimeError(
            '\nERROR! Matrix estimation selection yielded an empty or 1-dimensional graph. '
            'Check time-series for errors or try using a different atlas')

    coords = np.array(coords)
    label_names = np.array(label_names)
    return conn_matrix, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree, disp_filt, parc, prune, atlas_select, uatlas_select, label_names, coords, c_boot, norm, binary
                         memory='nilearn_cache',
                         verbose=5)

time_series = masker.fit_transform(data.func[0], confounds=data.confounds)

##############################################################################
# Compute the sparse inverse covariance
# --------------------------------------
try:
    from sklearn.covariance import GraphicalLassoCV
except ImportError:
    # for Scitkit-Learn < v0.20.0
    from sklearn.covariance import GraphLassoCV as GraphicalLassoCV

estimator = GraphicalLassoCV()
estimator.fit(time_series)

##############################################################################
# Display the connectome matrix
# ------------------------------
from nilearn import plotting
# Display the covariance

# The covariance can be found at estimator.covariance_
plotting.plot_matrix(estimator.covariance_,
                     labels=labels,
                     figure=(9, 7),
                     vmax=1,
                     vmin=-1,
                     title='Covariance')
Beispiel #24
0
cov = linalg.inv(prec)
d = np.sqrt(np.diag(cov))
cov /= d
cov /= d[:, np.newaxis]
prec *= d
prec *= d[:, np.newaxis]
X = prng.multivariate_normal(np.zeros(n_features), cov, size=n_samples)
X -= X.mean(axis=0)
X /= X.std(axis=0)

# #############################################################################
# Estimate the covariance
emp_cov = np.dot(X.T, X) / n_samples

model = GraphicalLassoCV()
model.fit(X)
cov_ = model.covariance_
prec_ = model.precision_

lw_cov_, _ = ledoit_wolf(X)
lw_prec_ = linalg.inv(lw_cov_)

# #############################################################################
# Plot the results
plt.figure(figsize=(10, 6))
plt.subplots_adjust(left=0.02, right=0.98)

# plot the covariances
covs = [
    ("Empirical", emp_cov),
    ("Ledoit-Wolf", lw_cov_),
Beispiel #25
0
def Bayes_fglasso(data,
                  p,
                  regular_parm=None,
                  lambda_shape=1,
                  lambda_rate=0.01,
                  nBurnin=1e3,
                  nIter=10e3):

    ##blockwise Frobunius norm for precision matrix
    def F_norm(Theta, p, M):
        matx = np.kron(np.diag(np.ones(p, dtype=np.float32)),
                       np.ones(M, dtype=np.float32)).transpose()
        matx = tf.constant(matx, dtype=tf.float32)
        Theta_F = tf.linalg.matmul(tf.linalg.matmul(matx,
                                                    tf.math.square(Theta),
                                                    transpose_a=True,
                                                    a_is_sparse=True),
                                   matx,
                                   b_is_sparse=True)
        return tf.math.sqrt(Theta_F)

    def sampleLambda(Theta, Tau_sq):
        shape_new = lambda_shape + p * M + (M**2 + 1) * p * (p - 1) / 4
        rate_new = lambda_rate + tf.math.reduce_sum(
            tf.linalg.diag_part(Theta)) / 2 + tf.math.reduce_sum(
                tf.linalg.set_diag(
                    tf.linalg.band_part(Tau_sq, 0, -1)[0, :, :],
                    np.zeros(p))) / 2
        lamb_sq = np.random.gamma(shape=shape_new, scale=1 / rate_new)
        return lamb_sq

    def sampleTau(Theta_F, regular_parm):
        if tf.math.reduce_min(Theta_F) < 1e-6:
            Theta_F = Theta_F + 1e-6
        tau_ = tfd.InverseGaussian(loc=tf.math.divide(regular_parm, Theta_F),
                                   concentration=regular_parm**2).sample(1)
        return tf.math.divide(1, tau_)

    def permut(Mat, mat_p):
        return tf.linalg.matmul(tf.linalg.matmul(mat_p, Mat), mat_p)

    def parti(Mat, j):
        #functional for partitioning matirx
        exclude_row = tf.concat([Mat[:j, :], Mat[(j + 1):, :]], axis=0)
        Mat11 = tf.concat([exclude_row[:, :j], exclude_row[:, (j + 1):]],
                          axis=1)
        Mat12 = tf.concat([Mat[:, j][:j], Mat[:, j][(j + 1):]], axis=0)
        Mat21 = Mat12
        Mat22 = Mat[j, j]
        return (Mat11, Mat12, Mat21, Mat22)

    # def is_pos_def(x):
    #     return np.all(np.linalg.eigvals(x) > 0)
    N = data.shape[0]
    M = int(data.shape[1] / p)

    ##centralize data
    data = data - np.mean(data, axis=0).reshape((1, p * M))

    S = np.matmul(data.transpose(), data)
    S = tf.constant(S)

    ### Use glasso with CV to get initial values:
    glasso_model = GraphicalLassoCV(
        cv=5)  ## you can also use identity as initial
    glasso_model.fit(data)

    #initial values
    Theta = glasso_model.precision_
    Theta = tf.constant(Theta, dtype=tf.float32)

    Theta_F = F_norm(Theta, p, M)

    Tau_sq = sampleTau(Theta_F, regular_parm)

    o1 = tf.linalg.LinearOperatorFullMatrix(Tau_sq[0, :, :])
    o2 = tf.linalg.LinearOperatorFullMatrix(np.ones([M, M], dtype=np.float32))

    Tau = tf.linalg.LinearOperatorKronecker([o1, o2]).to_dense()

    samples = []
    lambda_sq = []

    for it in tqdm(range(-int(nBurnin), int(nIter) + 1, 1)):
        for i in range(p):
            ##create permutation matrix for exchange ith block and pth block
            m = np.diag(np.ones(p)).astype(np.float32)
            m[:, [i, p - 1]] = m[:, [p - 1, i]]
            m1 = tf.linalg.LinearOperatorFullMatrix(m)
            m2 = tf.linalg.LinearOperatorFullMatrix(
                np.diag(np.ones(M, dtype=np.float32)))
            mat_p = tf.linalg.LinearOperatorKronecker([m1, m2]).to_dense()

            #exchange the ith and pth node
            Theta_ = permut(Theta, mat_p)
            S_ = permut(S, mat_p)
            Tau_ = permut(Tau, mat_p)

            ##for every principal component
            for j_ in range(int(M)):
                j = (p - 1) * M + j_
                ##partition matrices:
                (Theta11, Theta12, Theta21, Theta22) = parti(Theta_, j)
                Theta11_inv = tf.linalg.inv(Theta11)[:(p - 1) * M, :(p - 1) *
                                                     M]

                (S11, S12, S21, S22) = parti(S_, j)
                (Tau11, Tau12, Tau21, Tau22) = parti(Tau_, j)

                gamma = np.random.gamma(shape=N / 2 + 1,
                                        scale=2 / (S22 + regular_parm**2))
                Ell = tf.linalg.cholesky(
                    (S22 + regular_parm**2) * Theta11_inv +
                    tf.linalg.diag(1 / Tau12[:(p - 1) * M]))
                temp1 = tf.linalg.solve(
                    Ell, tf.expand_dims(-1 * S21[:(p - 1) * M], axis=1))
                mu = tf.linalg.solve(tf.transpose(Ell), temp1)

                vee = tf.linalg.solve(
                    tf.transpose(Ell),
                    tf.expand_dims(tf.constant(
                        np.random.normal(size=mu.shape[0]), dtype=np.float32),
                                   axis=1))
                beta = mu + vee

                aa = np.zeros(M, dtype=np.float32)
                aa[j_] = gamma + tf.math.reduce_sum(
                    beta * tf.linalg.matmul(Theta11_inv, beta))
                aa = tf.constant(aa)
                temp = tf.concat([beta, tf.expand_dims(aa, axis=1)], axis=0)

                ##update jth column and jth row of Theta_
                Theta_ = tf.concat([Theta_[:, :j], temp, Theta_[:, j + 1:]],
                                   axis=1)
                Theta_ = tf.concat(
                    [Theta_[:j, :],
                     tf.transpose(temp), Theta_[j + 1:, :]],
                    axis=0)
            Theta = permut(Theta_, mat_p)

        #update Tau
        Tau_sq = sampleTau(Theta_F, regular_parm)
        o1 = tf.linalg.LinearOperatorFullMatrix(Tau_sq[0, :, :])
        o2 = tf.linalg.LinearOperatorFullMatrix(
            np.ones([M, M], dtype=np.float32))
        Tau_o = tf.linalg.LinearOperatorKronecker([o1, o2])
        Tau = Tau_o.to_dense()

        ##update Theta_F
        Theta_F = F_norm(Theta, p, M)

        ##  update lambda
        if regular_parm is None:
            regular_parm = sampleLambda(Theta, Tau_sq)
        #Store:
        if it > 0:
            samples.append(Theta)
            if regular_parm is None:
                lambda_sq.append(regular_parm)

    samples = tf.stack(samples, axis=0)
    return (samples, lambda_sq)
Beispiel #26
0
def get_conn_matrix(time_series, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree,
                    disp_filt, parc, prune, atlas, uatlas, labels, coords, norm, binary,
                    hpass, extract_strategy):
    """
    Computes a functional connectivity matrix based on a node-extracted time-series array.
    Includes a library of routines across Nilearn, scikit-learn, and skggm packages, among others.

    Parameters
    ----------
    time_series : array
        2D m x n array consisting of the time-series signal for each ROI node where m = number of scans and
        n = number of ROI's.
    conn_model : str
       Connectivity estimation model (e.g. corr for correlation, cov for covariance, sps for precision covariance,
       partcorr for partial correlation). sps type is used by default.
    dir_path : str
        Path to directory containing subject derivative data for given run.
    node_size : int
        Spherical centroid node size in the case that coordinate-based centroids
        are used as ROI's.
    smooth : int
        Smoothing width (mm fwhm) to apply to time-series when extracting signal from ROI's.
    dens_thresh : bool
        Indicates whether a target graph density is to be used as the basis for
        thresholding.
    network : str
        Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of
        brain subgraphs.
    ID : str
        A subject id or other unique identifier.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    min_span_tree : bool
        Indicates whether local thresholding from the Minimum Spanning Tree
        should be used.
    disp_filt : bool
        Indicates whether local thresholding using a disparity filter and
        'backbone network' should be used.
    parc : bool
        Indicates whether to use parcels instead of coordinates as ROI nodes.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    atlas : str
        Name of atlas parcellation used.
    uatlas : str
        File path to atlas parcellation Nifti1Image in MNI template space.
    labels : list
        List of string labels corresponding to ROI nodes.
    coords : list
        List of (x, y, z) tuples corresponding to a coordinate atlas used or
        which represent the center-of-mass of each parcellation node.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    hpass : bool
        High-pass filter values (Hz) to apply to node-extracted time-series.
    extract_strategy : str 
        The name of a valid function used to reduce the time-series region extraction.

    Returns
    -------
    conn_matrix : array
        Adjacency matrix stored as an m x n array of nodes and edges.
    conn_model : str
       Connectivity estimation model (e.g. corr for correlation, cov for covariance, sps for precision covariance,
       partcorr for partial correlation). sps type is used by default.
    dir_path : str
        Path to directory containing subject derivative data for given run.
    node_size : int
        Spherical centroid node size in the case that coordinate-based centroids
        are used as ROI's for tracking.
    smooth : int
        Smoothing width (mm fwhm) to apply to time-series when extracting signal from ROI's.
    dens_thresh : bool
        Indicates whether a target graph density is to be used as the basis for
        thresholding.
    network : str
        Resting-state network based on Yeo-7 and Yeo-17 naming (e.g. 'Default') used to filter nodes in the study of
        brain subgraphs.
    ID : str
        A subject id or other unique identifier.
    roi : str
        File path to binarized/boolean region-of-interest Nifti1Image file.
    min_span_tree : bool
        Indicates whether local thresholding from the Minimum Spanning Tree
        should be used.
    disp_filt : bool
        Indicates whether local thresholding using a disparity filter and
        'backbone network' should be used.
    parc : bool
        Indicates whether to use parcels instead of coordinates as ROI nodes.
    prune : bool
        Indicates whether to prune final graph of disconnected nodes/isolates.
    atlas : str
        Name of atlas parcellation used.
    uatlas : str
        File path to atlas parcellation Nifti1Image in MNI template space.
    labels : list
        List of string labels corresponding to graph nodes.
    coords : list
        List of (x, y, z) tuples corresponding to a coordinate atlas used or
        which represent the center-of-mass of each parcellation node.
    norm : int
        Indicates method of normalizing resulting graph.
    binary : bool
        Indicates whether to binarize resulting graph edges to form an
        unweighted graph.
    hpass : bool
        High-pass filter values (Hz) to apply to node-extracted time-series.
    extract_strategy : str 
        The name of a valid function used to reduce the time-series region extraction.

    References
    ----------
    .. [1] Varoquaux, G., & Craddock, R. C. (2013). Learning and comparing functional connectomes
      across subjects. NeuroImage. https://doi.org/10.1016/j.neuroimage.2013.04.007
    .. [2] Jason Laska, Manjari Narayan, 2017. skggm 0.2.7: A scikit-learn compatible package
      for Gaussian and related Graphical Models. doi:10.5281/zenodo.830033

    """
    from nilearn.connectome import ConnectivityMeasure
    from sklearn.covariance import GraphicalLassoCV

    conn_matrix = None
    if conn_model == 'corr' or conn_model == 'cor' or conn_model == 'correlation':
        # credit: nilearn
        print('\nComputing correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'partcorr' or conn_model == 'parcorr' or conn_model == 'partialcorrelation':
        # credit: nilearn
        print('\nComputing partial correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='partial correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar' or conn_model == 'sps' or \
        conn_model == 'sparse' or conn_model == 'precision':
        # Fit estimator to matrix to get sparse matrix
        estimator_shrunk = None
        estimator = GraphicalLassoCV(cv=5)
        try:
            print('\nComputing covariance...\n')
            estimator.fit(time_series)
        except:
            print('Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...')
            try:
                from sklearn.covariance import GraphicalLasso, empirical_covariance, shrunk_covariance
                emp_cov = empirical_covariance(time_series)
                for i in np.arange(0.8, 0.99, 0.01):
                    shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i)
                    alphaRange = 10.0 ** np.arange(-8, 0)
                    for alpha in alphaRange:
                        try:
                            estimator_shrunk = GraphicalLasso(alpha)
                            estimator_shrunk.fit(shrunk_cov)
                            print(f"Retrying covariance matrix estimate with alpha={alpha}")
                            if estimator_shrunk is None:
                                pass
                            else:
                                break
                        except:
                            print(f"Covariance estimation failed with shrinkage at alpha={alpha}")
                            continue
            except ValueError:
                print('Unstable Lasso estimation! Shrinkage failed. A different connectivity model may be needed.')
        if estimator is None and estimator_shrunk is None:
            raise RuntimeError('\nERROR: Covariance estimation failed.')
        if conn_model == 'sps' or conn_model == 'sparse' or conn_model == 'precision':
            if estimator_shrunk is None:
                print('\nFetching precision matrix from covariance estimator...\n')
                conn_matrix = -estimator.precision_
            else:
                print('\nFetching shrunk precision matrix from covariance estimator...\n')
                conn_matrix = -estimator_shrunk.precision_
        elif conn_model == 'cov' or conn_model == 'covariance' or conn_model == 'covar':
            if estimator_shrunk is None:
                print('\nFetching covariance matrix from covariance estimator...\n')
                conn_matrix = estimator.covariance_
            else:
                conn_matrix = estimator_shrunk.covariance_
    elif conn_model == 'QuicGraphicalLasso':

        try:
            from inverse_covariance import QuicGraphicalLasso
        except ImportError:
            print('Cannot run QuicGraphLasso. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLasso
        # credit: skggm
        model = QuicGraphicalLasso(
            init_method='cov',
            lam=0.5,
            mode='default',
            verbose=1)
        print('\nCalculating QuicGraphLasso precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'QuicGraphicalLassoCV':
        try:
            from inverse_covariance import QuicGraphicalLassoCV
        except ImportError:
            print('Cannot run QuicGraphLassoCV. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLassoCV
        # credit: skggm
        model = QuicGraphicalLassoCV(
            init_method='cov',
            verbose=1)
        print('\nCalculating QuicGraphLassoCV precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'QuicGraphicalLassoEBIC':
        try:
            from inverse_covariance import QuicGraphicalLassoEBIC
        except ImportError:
            print('Cannot run QuicGraphLassoEBIC. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLassoEBIC
        # credit: skggm
        model = QuicGraphicalLassoEBIC(
            init_method='cov',
            verbose=1)
        print('\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'AdaptiveQuicGraphicalLasso':
        try:
            from inverse_covariance import AdaptiveQuicGraphicalLasso, QuicGraphicalLassoEBIC
        except ImportError:
            print('Cannot run AdaptiveGraphLasso. Skggm not installed!')

        # Compute the sparse inverse covariance via
        # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary'
        # credit: skggm
        model = AdaptiveQuicGraphicalLasso(
            estimator=QuicGraphicalLassoEBIC(
                init_method='cov',
            ),
            method='binary',
        )
        print('\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.estimator_.precision_
    else:
        raise ValueError('\nERROR! No connectivity model specified at runtime. Select a valid estimator using the '
                         '-mod flag.')

    # Enforce symmetry
    conn_matrix = np.maximum(conn_matrix, conn_matrix.T)

    if conn_matrix.shape < (2, 2):
        raise RuntimeError('\nERROR! Matrix estimation selection yielded an empty or 1-dimensional graph. '
                           'Check time-series for errors or try using a different atlas')

    coords = np.array(coords)
    labels = np.array(labels)

    del time_series

    return (conn_matrix, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, roi, min_span_tree,
            disp_filt, parc, prune, atlas, uatlas, labels, coords, norm, binary, hpass, extract_strategy)
Beispiel #27
0
stock_list = table.col_values(colx=1, start_rowx=1, end_rowx=33)
stock_list = [str(i) for i in stock_list]

batch_K_data = get_kdata(stock_list, start='2013-09-01', end='2018-09-01')  # 查看最近五年的数据
# print(batch_K_data.info())

stock_dataset, selected_stocks = preprocess_data(batch_K_data, min_K_num=1100)
stock_dataset2, selected_stocks2 = preprocess_data2(batch_K_data, min_K_num=1100)
print("The selected stocks is:  ",
      selected_stocks)  # 这是实际使用的股票列表stock_dataset,selected_stocks=preprocess_data2(batch_K_data,min_K_num=1100)

# 从相关性中学习其图形结构
edge_model1 = GraphicalLassoCV(cv=3)
edge_model2 = GraphicalLassoCV(cv=3)
# edge_model.fit(stock_dataset)
edge_model1.fit(stock_dataset)
edge_model2.fit(stock_dataset2)

# 使用近邻传播算法构建模型,并训练LassoCV graph
_, labels1 = affinity_propagation(edge_model1.covariance_)
_, labels2 = affinity_propagation(edge_model2.covariance_)

n_labels = max(labels1)
print('Stock Clusters: {}'.format(n_labels + 1))  # 10,即得到10个类别
sz50_df2 = stock_list
# print(sz50_df2)
for i in range(n_labels + 1):
    print('Cluster: {}----> stocks: {}'.format(i, ','.join(np.array(selected_stocks)[labels1 == i])))  # 这个只有股票代码而不是股票名称
    stocks = np.array(selected_stocks)[labels1 == i].tolist()
    # names = sz50_df2.loc[stocks, :].name.tolist()
    # print('Cluster: {}----> stocks: {}'.format(i,','.join(names)))