Ejemplo n.º 1
0
def heat_map(file_path, X, headers, cmap=sns.color_palette("Blues")):
    model = GraphLasso()
    model.fit(X)
    Cov = model.covariance_
    std = np.diag(1. / np.sqrt(np.diag(Cov)))
    Cor = std.dot(Cov).dot(std)

    fig, ax = plt.subplots()
    # the size of A4 paper
    fig.set_size_inches(10, 8)
    ax = sns.heatmap(Cor,
                     cmap=cmap,
                     square=True,
                     xticklabels=1,
                     yticklabels=1,
                     linewidths=.5)
    ax.set_yticklabels(headers, rotation=0, fontsize=12)
    ax.set_xticklabels(headers, rotation=90, fontsize=12)
    plt.subplots_adjust(bottom=0.4, left=0.2)

    sns.despine(left=True, bottom=True)

    plt.tight_layout()

    plt.savefig(file_path)
    plt.show()
Ejemplo n.º 2
0
def save_network_graph_sequence(data, alpha_seq, labels, filename):
    if len(alpha_seq) % 2 != 0:
        print "make alpha an even number please."
        return

    n = len(alpha_seq)
    labels = dict(zip(range(len(labels)), labels))
    fig = plt.figure()
    for i in range(n):
        ax = fig.add_subplot(n / 2, 2, i + 1)
        gl = GraphLasso(alpha=alpha_seq[i])

        gl.fit(data)
        D = nx.Graph(gl.precision_)
        pos_labels = nx.circular_layout(D)
        for k, item in pos_labels.iteritems():
            pos_labels[k] = item + 0.1
        nx.draw_circular(D,
                         scale=4,
                         node_size=150,
                         ax=ax,
                         with_labels=True,
                         labels=labels,
                         font_size=6)
        #nx.draw_networkx_labels(D, pos_labels, ax=ax, labels= labels, font_size = 12)
        ax.set_title(r"$\alpha$ = %.2e" % alpha_seq[i])

    plt.savefig(filename)
Ejemplo n.º 3
0
    def _fit(self, X):
        self.estimator_ = GraphLasso(alpha=self.alpha,
                                     assume_centered=self.assume_centered,
                                     enet_tol=self.enet_tol,
                                     max_iter=self.max_iter,
                                     mode=self.mode,
                                     tol=self.tol).fit(X)

        _, self.labels_ = affinity_propagation(self.partial_corrcoef_,
                                               **self._apcluster_params)

        return self
Ejemplo n.º 4
0
def precisionCol(cleandata, k):
    model = GraphLasso(mode = 'lars')
    model.fit(cleandata)
    pre_ = pd.DataFrame(model.get_precision())
    pre_.index = cleandata.columns
    pre_.columns = cleandata.columns
    pre_.to_csv("precision.csv")
    test = abs(pre_['Y'])
    test.sort()
    test = test[-k:]
    coltest = (test.index).drop('Y')
    return coltest
Ejemplo n.º 5
0
    def create_skeleton_from_data(self, data, **kwargs):
        """

        :param data: raw data df
        :param kwargs: alpha hyper-parameter (
        :return:
        """
        alpha = kwargs.get('alpha', 0.01)
        max_iter = kwargs.get('max_iter', 2000)
        edge_model = GraphLasso(alpha=alpha, max_iter=max_iter)
        edge_model.fit(data.as_matrix())
        return edge_model.get_precision()
Ejemplo n.º 6
0
def get_other_precision(A):
    # reference on sklearn's graph lasso: http://scikit-learn.org/stable/modules/generated/sklearn.covariance.GraphLasso.html
    from sklearn.covariance import GraphLasso  # our Algo code should replace this and input/output the same thing

    graph_lasso = GraphLasso(
        alpha=1e-5
    )  # alpha =  regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance.
    graph_lasso.fit(
        A
    )  # A is the aggregated sentiment matrix, an arrray of (n_samples, n_features)
    precision = graph_lasso.get_precision()
    return precision
Ejemplo n.º 7
0
def myglasso(data, lam=0.5):
    model = GraphLasso(alpha=lam)
    # model=GraphLassoCV()
    model.fit(data)
    cov = model.covariance_
    prec = model.precision_
    # alpha=model.alpha_
    n_samples, n_features = data.shape
    part = np.zeros((n_features, n_features))
    for i in range(n_features):
        for j in range(n_features):
            part[i, j] = -prec[i, j] / np.sqrt(prec[i, i] * prec[j, j])
    return part, prec, cov
Ejemplo n.º 8
0
    def predict(self, data, **kwargs):
        """

        :param data: raw data df
        :param kwargs: alpha hyper-parameter (
        :return:
        """
        alpha = kwargs.get('alpha', 0.01)
        max_iter = kwargs.get('max_iter', 2000)
        edge_model = GraphLasso(alpha=alpha, max_iter=max_iter)
        edge_model.fit(data.values)

        return nx.relabel_nodes(nx.DiGraph(edge_model.get_precision()),
                                {idx: i
                                 for idx, i in enumerate(data.columns)})
Ejemplo n.º 9
0
    def predict(self, data, alpha=0.01, max_iter=2000, **kwargs):
        """ Predict the graph skeleton.

        Args:
            data (pandas.DataFrame): observational data
            alpha (float): regularization parameter
            max_iter (int): maximum number of iterations

        Returns:
            networkx.Graph: Graph skeleton
        """
        edge_model = GraphLasso(alpha=alpha, max_iter=max_iter)
        edge_model.fit(data.values)

        return nx.relabel_nodes(nx.DiGraph(edge_model.get_precision()),
                                {idx: i for idx, i in enumerate(data.columns)})
Ejemplo n.º 10
0
def test_graph_lasso(random_state=0):
    # Sample data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (.1, .01):
        covs = dict()
        for method in ('cd', 'lars'):
            cov_, _, costs = graph_lasso(emp_cov, alpha=.1, return_costs=True)
            covs[method] = cov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease
            assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'])

    # Smoke test the estimator
    model = GraphLasso(alpha=.1).fit(X)
    assert_array_almost_equal(model.covariance_, covs['cd'])
Ejemplo n.º 11
0
 def __init__(self, n_components=2, n_iter=100, alpha = None):
     self.n_components = n_components
     self.n_iter = n_iter
     self.min_covar = 1e-3
     if alpha == None:
         self.alpha = [10 for _ in range(self.n_components)]
     else:
         self.alpha = alpha
     self.model = [GraphLasso(alpha=self.alpha[k], assume_centered=False, tol=1e-4) for k in range(self.n_components)]
def test_graph_lasso(random_state=0):
    # Sample data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (0., .1, .25):
        covs = dict()
        icovs = dict()
        for method in ('cd', 'lars'):
            cov_, icov_, costs = graph_lasso(emp_cov,
                                             alpha=alpha,
                                             mode=method,
                                             return_costs=True)
            covs[method] = cov_
            icovs[method] = icov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease (doesn't hold if alpha == 0)
            if not alpha == 0:
                assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'], decimal=3)
        assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=3)

    # Smoke test the estimator
    model = GraphLasso(alpha=.25).fit(X)
    model.score(X)
    assert_array_almost_equal(model.covariance_, covs['cd'], decimal=3)
    assert_array_almost_equal(model.covariance_, covs['lars'], decimal=3)

    # For a centered matrix, assume_centered could be chosen True or False
    # Check that this returns indeed the same result for centered data
    Z = X - X.mean(0)
    precs = list()
    for assume_centered in (False, True):
        prec_ = GraphLasso(assume_centered=assume_centered).fit(Z).precision_
        precs.append(prec_)
    assert_array_almost_equal(precs[0], precs[1])
Ejemplo n.º 13
0
def test_graph_lasso(random_state=0):
    # Sample data from a sparse multivariate normal
    dim = 20
    n_samples = 100
    random_state = check_random_state(random_state)
    prec = make_sparse_spd_matrix(dim, alpha=.95,
                                  random_state=random_state)
    cov = linalg.inv(prec)
    X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples)
    emp_cov = empirical_covariance(X)

    for alpha in (0., .1, .25):
        covs = dict()
        icovs = dict()
        for method in ('cd', 'lars'):
            cov_, icov_, costs = graph_lasso(emp_cov, alpha=alpha, mode=method,
                                             return_costs=True)
            covs[method] = cov_
            icovs[method] = icov_
            costs, dual_gap = np.array(costs).T
            # Check that the costs always decrease (doesn't hold if alpha == 0)
            if not alpha == 0:
                assert_array_less(np.diff(costs), 0)
        # Check that the 2 approaches give similar results
        assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4)
        assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4)

    # Smoke test the estimator
    model = GraphLasso(alpha=.25).fit(X)
    model.score(X)
    assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4)
    assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4)

    # For a centered matrix, assume_centered could be chosen True or False
    # Check that this returns indeed the same result for centered data
    Z = X - X.mean(0)
    precs = list()
    for assume_centered in (False, True):
        prec_ = GraphLasso(assume_centered=assume_centered).fit(Z).precision_
        precs.append(prec_)
    assert_array_almost_equal(precs[0], precs[1])
def computePartialCorrelations(coupling_data, reg_alpha):

    # standardize
    # coupling_data -= coupling_data.mean(axis=0)
    # coupling_data /= coupling_data.std(axis=0)

    # sparse inverse covariance matrix estimation
    estimator = GraphLasso(alpha=reg_alpha, assume_centered=False, mode='cd', max_iter=500)
    estimator.fit(coupling_data)

    print("Sparse inverse covariance matrix was estiamted with {0} iterations.".format(estimator.n_iter_))
    print("\t\t\t and by using the parameters: ", estimator.get_params())
    prec = estimator.get_precision()

    #diagonal of precision matrix
    prec_diag = np.diag(prec)

    # obtain partial correlations (proportional to prec matrix entries):
    # rho_ij = - p_ij/ sqrt(p_ii * p_jj)
    partial_correlations = -prec / np.sqrt(np.outer(prec_diag, prec_diag))
    # d = 1 / np.sqrt(np.diag(prec))
    # partial_correlations *= d
    # partial_correlations *= d[:, np.newaxis]

    # set lower half to zero
    partial_correlations[np.tril_indices(400)] = 0

    return estimator.get_precision(), partial_correlations
Ejemplo n.º 15
0
    def fit(self, X):
        self.mean_ = np.mean(X, axis=0)

        if self.alpha_:
            gl = GraphLasso(self.alpha_, max_iter=100000)

            gl.fit(X)
            self.precision_ = gl.precision_

        elif self.method_ == 'cv':
            gl = GraphLassoCV(verbose=self.verbose)
            gl.fit(X)
            self.alpha_ = gl.alpha_
            self.precision_ = gl.precision_

        elif self.method_ == 'bic':
            min_score = np.inf
            min_precision = None
            alphas = np.arange(0.01, 0.5, 0.01)

            for a in alphas:
                if self.verbose:
                    print("[GMRF] Alpha = {}".format(a))

                gl = GraphLasso(a, max_iter=100000)

                try:
                    gl.fit(X)
                    self.precision_ = gl.precision_
                    score = self.bic(X, gamma=0.0)

                    self.bic_scores.append(score)

                    if score <= min_score:
                        min_score = score
                        self.alpha_ = a
                        min_precision = np.copy(self.precision_)
                except:
                    self.bic_scores.append(None)

            self.precision_ = min_precision

        else:
            raise NotImplementedError(self.method_ +
                    " is not a valid method, use 'cv' or 'bic'")
def save_network_graph_sequence( data, alpha_seq, labels, filename):
	if len(alpha_seq)%2 != 0:
		print "make alpha an even number please."
		return

	n = len(alpha_seq)
	labels = dict( zip( range( len(labels) ), labels) )
	fig = plt.figure()
	for i in range(n):
		ax = fig.add_subplot(n/2,2,i+1)
		gl = GraphLasso( alpha = alpha_seq[i] )
		
		gl.fit( data )
		D = nx.Graph( gl.precision_ )
		pos_labels = nx.circular_layout( D )
		for k,item in pos_labels.iteritems():
			pos_labels[k] = item + 0.1
		nx.draw_circular( D, scale = 4, node_size = 150, ax = ax, with_labels = True, labels = labels, font_size = 6 )
		#nx.draw_networkx_labels(D, pos_labels, ax=ax, labels= labels, font_size = 12)
		ax.set_title( r"$\alpha$ = %.2e"%alpha_seq[i])

	plt.savefig( filename )
Ejemplo n.º 17
0
    def _init_para(self, X, y):
        '''
        '''
        p0, shape = BASE._init_para(self, X, y)

        edges = []
        if self.sparsity==0:
            for i in itertools.combinations(range(y.shape[1]), 2):
                e1 = min(i)
                e2 = max(i)
                edges.append([e1, e2])
        else:
            lasso = GraphLasso(alpha = 0.1)
            lasso.fit(y)
            graph = lasso.get_precision()!=0
            for i in range(graph.shape[0]):
                for j in range(i+1,graph.shape[1]):
                    if graph[i,j]==0:continue
                    edges.append([i, j])

        self.edges = T.shared(np.array(edges).T).astype('int8')

        if self.verbose:
            print('(edges)',edges)
            print('(y) shape:',y.shape,'labels:',np.unique(y))
            print('(X) shape:',X.shape,'std:',np.std(X))



        theta = []
        for e1, e2 in edges:
            if self.shared_copula:
                theta.append(0.01)
            else:
                theta.append(np.ones((shape[1],shape[1]))*0.01)

        p0['theta'] = tespo.parameter(theta, const=False)
        return p0, shape
 def __init__(self, n_components, n_iter=5, alpha=None):
     self.n_components = n_components + 2
     self.n_iter = n_iter
     self.min_covar = 1e-3
     self.tresh = 1e-3
     self.lambd = 0.5
     if alpha == None:
         self.alpha = [10 for _ in range(self.n_components)]
     else:
         self.alpha = alpha
     self.model = [
         GraphLasso(alpha=self.alpha[k],
                    assume_centered=True,
                    tol=1e-4,
                    verbose=True) for k in range(self.n_components)
     ]
Ejemplo n.º 19
0
class SparseStructureLearning(BaseOutlierDetector):
    """Outlier detector using sparse structure learning.

    Parameters
    ----------
    alpha : float, default 0.01
        Regularization parameter.

    assume_centered : bool, default False
        If True, data are not centered before computation.

    contamination : float, default 0.1
        Proportion of outliers in the data set. Used to define the threshold.

    enet_tol : float, default 1e-04
        Tolerance for the elastic net solver used to calculate the descent
        direction. This parameter controls the accuracy of the search direction
        for a given column update, not of the overall parameter estimate. Only
        used for mode='cd'.

    max_iter : integer, default 100
        Maximum number of iterations.

    mode : str, default 'cd'
        Lasso solver to use: coordinate descent or LARS.

    tol : float, default 1e-04
        Tolerance to declare convergence.

    apcluster_params : dict, default None
        Additional parameters passed to
        ``sklearn.cluster.affinity_propagation``.

    Attributes
    ----------
    anomaly_score_ : array-like of shape (n_samples,)
        Anomaly score for each training data.

    threshold_ : float
        Threshold.

    covariance_ : array-like of shape (n_features, n_features)
        Estimated covariance matrix.

    graphical_model_ : networkx Graph
        GGM.

    isolates_ : array-like of shape (n_isolates,)
        Indices of isolates.

    labels_ : array-like of shape (n_features,)
        Label of each feature.

    location_ : array-like of shape (n_features,)
        Estimated location.

    n_iter_ : int
        Number of iterations run.

    partial_corrcoef_ : array-like of shape (n_features, n_features)
        Partial correlation coefficient matrix.

    precision_ : array-like of shape (n_features, n_features)
        Estimated pseudo inverse matrix.

    References
    ----------
    .. [#ide09] Ide, T., Lozano, C., Abe, N., and Liu, Y.,
        "Proximity-based anomaly detection using sparse structure learning,"
        In Proceedings of SDM, pp. 97-108, 2009.

    Examples
    --------
    >>> import numpy as np
    >>> from kenchi.outlier_detection import SparseStructureLearning
    >>> X = np.array([
    ...     [0., 0.], [1., 1.], [2., 0.], [3., -1.], [4., 0.],
    ...     [5., 1.], [6., 0.], [7., -1.], [8., 0.], [1000., 1.]
    ... ])
    >>> det = SparseStructureLearning()
    >>> det.fit_predict(X)
    array([ 1,  1,  1,  1,  1,  1,  1,  1,  1, -1])
    """
    @property
    def _apcluster_params(self):
        if self.apcluster_params is None:
            return dict()
        else:
            return self.apcluster_params

    @property
    def covariance_(self):
        return self.estimator_.covariance_

    @property
    def graphical_model_(self):
        import networkx as nx

        return nx.from_numpy_matrix(np.tril(self.partial_corrcoef_, k=-1))

    @property
    def isolates_(self):
        import networkx as nx

        return np.array(list(nx.isolates(self.graphical_model_)))

    @property
    def location_(self):
        return self.estimator_.location_

    @property
    def n_iter_(self):
        return self.estimator_.n_iter_

    @property
    def partial_corrcoef_(self):
        n_features, _ = self.precision_.shape
        diag = np.diag(self.precision_)[np.newaxis]
        partial_corrcoef = -self.precision_ / np.sqrt(diag.T @ diag)
        partial_corrcoef.flat[::n_features + 1] = 1.

        return partial_corrcoef

    @property
    def precision_(self):
        return self.estimator_.precision_

    def __init__(self,
                 alpha=0.01,
                 assume_centered=False,
                 contamination=0.1,
                 enet_tol=1e-04,
                 max_iter=100,
                 mode='cd',
                 tol=1e-04,
                 apcluster_params=None):
        super().__init__(contamination=contamination)

        self.alpha = alpha
        self.apcluster_params = apcluster_params
        self.assume_centered = assume_centered
        self.enet_tol = enet_tol
        self.max_iter = max_iter
        self.mode = mode
        self.tol = tol

    def _check_is_fitted(self):
        super()._check_is_fitted()

        check_is_fitted(self, [
            'covariance_', 'labels_', 'location_', 'n_iter_',
            'partial_corrcoef_', 'precision_'
        ])

    def _fit(self, X):
        self.estimator_ = GraphLasso(alpha=self.alpha,
                                     assume_centered=self.assume_centered,
                                     enet_tol=self.enet_tol,
                                     max_iter=self.max_iter,
                                     mode=self.mode,
                                     tol=self.tol).fit(X)

        _, self.labels_ = affinity_propagation(self.partial_corrcoef_,
                                               **self._apcluster_params)

        return self

    def _anomaly_score(self, X):
        return self.estimator_.mahalanobis(X)

    def featurewise_anomaly_score(self, X):
        """Compute the feature-wise anomaly scores for each sample.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Data.

        Returns
        -------
        anomaly_score : array-like of shape (n_samples, n_features)
            Feature-wise anomaly scores for each sample.
        """

        self._check_is_fitted()

        X = self._check_array(X, estimator=self)

        return 0.5 * np.log(2. * np.pi / np.diag(
            self.precision_)) + 0.5 / np.diag(self.precision_) * (
                (X - self.location_) @ self.precision_)**2

    def score(self, X, y=None):
        """Compute the mean log-likelihood of the given data.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Data.

        y : ignored

        Returns
        -------
        score : float
            Mean log-likelihood of the given data.
        """

        self._check_is_fitted()

        X = self._check_array(X, estimator=self)

        return self.estimator_.score(X)

    def plot_graphical_model(self, **kwargs):
        """Plot the Gaussian Graphical Model (GGM).

        Parameters
        ----------
        ax : matplotlib Axes, default None
            Target axes instance.

        figsize : tuple, default None
            Tuple denoting figure size of the plot.

        filename : str, default None
            If provided, save the current figure.

        random_state : int, RandomState instance, default None
            Seed of the pseudo random number generator.

        title : string, default 'GGM (n_clusters, n_features, n_isolates)'
            Axes title. To disable, pass None.

        **kwargs : dict
            Other keywords passed to ``nx.draw_networkx``.

        Returns
        -------
        ax : matplotlib Axes
            Axes on which the plot was drawn.
        """

        self._check_is_fitted()

        n_clusters = np.max(self.labels_) + 1
        n_isolates, = self.isolates_.shape
        title = (f'GGM ('
                 f'n_clusters={n_clusters}, '
                 f'n_features={self.n_features_}, '
                 f'n_isolates={n_isolates}'
                 f')')
        kwargs['G'] = self.graphical_model_

        kwargs.setdefault('node_color', self.labels_)
        kwargs.setdefault('title', title)

        return plot_graphical_model(**kwargs)

    def plot_partial_corrcoef(self, **kwargs):
        """Plot the partial correlation coefficient matrix.

        Parameters
        ----------
        ax : matplotlib Axes, default None
            Target axes instance.

        cbar : bool, default True.
            If Ture, to draw a colorbar.

        figsize : tuple, default None
            Tuple denoting figure size of the plot.

        filename : str, default None
            If provided, save the current figure.

        title : string, default 'Partial correlation'
            Axes title. To disable, pass None.

        **kwargs : dict
            Other keywords passed to ``ax.pcolormesh``.

        Returns
        -------
        ax : matplotlib Axes
            Axes on which the plot was drawn.
        """

        self._check_is_fitted()

        kwargs['partial_corrcoef'] = self.partial_corrcoef_

        return plot_partial_corrcoef(**kwargs)
Ejemplo n.º 20
0
def get_conn_matrix(time_series, conn_model):
    import warnings
    warnings.simplefilter("ignore")
    from nilearn.connectome import ConnectivityMeasure
    from sklearn.covariance import GraphLassoCV
    try:
        from brainiak.fcma.util import compute_correlation
    except ImportError:
        pass

    if conn_model == 'corr':
        # credit: nilearn
        print('\nComputing correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'corr_fast':
        # credit: brainiak
        try:
            print('\nComputing accelerated fcma correlation matrix...\n')
            conn_matrix = compute_correlation(time_series, time_series)
        except RuntimeError:
            print(
                'Cannot run accelerated correlation computation due to a missing dependency. You need brainiak installed!'
            )
    elif conn_model == 'partcorr':
        # credit: nilearn
        print('\nComputing partial correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='partial correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'tangent':
        # credit: nilearn
        print('\nComputing tangent matrix...\n')
        conn_measure = ConnectivityMeasure(kind='tangent')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'cov' or conn_model == 'sps':
        ##Fit estimator to matrix to get sparse matrix
        estimator = GraphLassoCV()
        try:
            print('\nComputing covariance...\n')
            estimator.fit(time_series)
        except:
            try:
                print(
                    'Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...'
                )
                from sklearn.covariance import GraphLasso, empirical_covariance, shrunk_covariance
                emp_cov = empirical_covariance(time_series)
                for i in np.arange(0.8, 0.99, 0.01):
                    shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i)
                    alphaRange = 10.0**np.arange(-8, 0)
                    for alpha in alphaRange:
                        try:
                            estimator_shrunk = GraphLasso(alpha)
                            estimator_shrunk.fit(shrunk_cov)
                            print(
                                "Calculated graph-lasso covariance matrix for alpha=%s"
                                % alpha)
                            break
                        except FloatingPointError:
                            print("Failed at alpha=%s" % alpha)
                    if estimator_shrunk == None:
                        pass
                    else:
                        break
            except:
                raise ValueError(
                    'Unstable Lasso estimation! Shrinkage failed.')

        if conn_model == 'sps':
            try:
                print(
                    '\nFetching precision matrix from covariance estimator...\n'
                )
                conn_matrix = -estimator.precision_
            except:
                print(
                    '\nFetching shrunk precision matrix from covariance estimator...\n'
                )
                conn_matrix = -estimator_shrunk.precision_
        elif conn_model == 'cov':
            try:
                print(
                    '\nFetching covariance matrix from covariance estimator...\n'
                )
                conn_matrix = estimator.covariance_
            except:
                conn_matrix = estimator_shrunk.covariance_
    elif conn_model == 'QuicGraphLasso':
        from inverse_covariance import QuicGraphLasso
        # Compute the sparse inverse covariance via QuicGraphLasso
        # credit: skggm
        model = QuicGraphLasso(init_method='cov',
                               lam=0.5,
                               mode='default',
                               verbose=1)
        print('\nCalculating QuicGraphLasso precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_

    elif conn_model == 'QuicGraphLassoCV':
        from inverse_covariance import QuicGraphLassoCV
        # Compute the sparse inverse covariance via QuicGraphLassoCV
        # credit: skggm
        model = QuicGraphLassoCV(init_method='cov', verbose=1)
        print(
            '\nCalculating QuicGraphLassoCV precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_

    elif conn_model == 'QuicGraphLassoEBIC':
        from inverse_covariance import QuicGraphLassoEBIC
        # Compute the sparse inverse covariance via QuicGraphLassoEBIC
        # credit: skggm
        model = QuicGraphLassoEBIC(init_method='cov', verbose=1)
        print(
            '\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n'
        )
        model.fit(time_series)
        conn_matrix = -model.precision_

    elif conn_model == 'AdaptiveQuicGraphLasso':
        from inverse_covariance import AdaptiveGraphLasso, QuicGraphLassoEBIC
        # Compute the sparse inverse covariance via
        # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary'
        # credit: skggm
        model = AdaptiveGraphLasso(
            estimator=QuicGraphLassoEBIC(init_method='cov', ),
            method='binary',
        )
        print(
            '\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n'
        )
        model.fit(time_series)
        conn_matrix = -model.estimator_.precision_

    return (conn_matrix)
Ejemplo n.º 21
0
def get_conn_matrix(time_series, conn_model, dir_path, node_size, smooth,
                    dens_thresh, network, ID, mask, min_span_tree, disp_filt,
                    parc, prune, atlas_select, uatlas_select, label_names,
                    coords, vox_array):
    from nilearn.connectome import ConnectivityMeasure
    from sklearn.covariance import GraphLassoCV

    conn_matrix = None
    if conn_model == 'corr':
        # credit: nilearn
        print('\nComputing correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'partcorr':
        # credit: nilearn
        print('\nComputing partial correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='partial correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'cov' or conn_model == 'sps':
        # Fit estimator to matrix to get sparse matrix
        estimator_shrunk = None
        estimator = GraphLassoCV()
        try:
            print('\nComputing covariance...\n')
            estimator.fit(time_series)
        except:
            print(
                'Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...'
            )
            try:
                from sklearn.covariance import GraphLasso, empirical_covariance, shrunk_covariance
                emp_cov = empirical_covariance(time_series)
                for i in np.arange(0.8, 0.99, 0.01):
                    shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i)
                    alphaRange = 10.0**np.arange(-8, 0)
                    for alpha in alphaRange:
                        try:
                            estimator_shrunk = GraphLasso(alpha)
                            estimator_shrunk.fit(shrunk_cov)
                            print(
                                "Retrying covariance matrix estimate with alpha=%s"
                                % alpha)
                            if estimator_shrunk is None:
                                pass
                            else:
                                break
                        except:
                            print(
                                "Covariance estimation failed with shrinkage at alpha=%s"
                                % alpha)
                            continue
            except ValueError:
                print(
                    'Unstable Lasso estimation! Shrinkage failed. A different connectivity model may be needed.'
                )
        if estimator is None and estimator_shrunk is None:
            raise RuntimeError('ERROR: Covariance estimation failed.')
        if conn_model == 'sps':
            if estimator_shrunk is None:
                print(
                    '\nFetching precision matrix from covariance estimator...\n'
                )
                conn_matrix = -estimator.precision_
            else:
                print(
                    '\nFetching shrunk precision matrix from covariance estimator...\n'
                )
                conn_matrix = -estimator_shrunk.precision_
        elif conn_model == 'cov':
            if estimator_shrunk is None:
                print(
                    '\nFetching covariance matrix from covariance estimator...\n'
                )
                conn_matrix = estimator.covariance_
            else:
                conn_matrix = estimator_shrunk.covariance_
    elif conn_model == 'QuicGraphicalLasso':
        try:
            from inverse_covariance import QuicGraphicalLasso
        except ImportError:
            print('Cannot run QuicGraphLasso. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLasso
        # credit: skggm
        model = QuicGraphicalLasso(init_method='cov',
                                   lam=0.5,
                                   mode='default',
                                   verbose=1)
        print('\nCalculating QuicGraphLasso precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'QuicGraphLassoCV':
        try:
            from inverse_covariance import QuicGraphicalLassoCV
        except ImportError:
            print('Cannot run QuicGraphLassoCV. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLassoCV
        # credit: skggm
        model = QuicGraphicalLassoCV(init_method='cov', verbose=1)
        print(
            '\nCalculating QuicGraphLassoCV precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'QuicGraphicalLassoEBIC':
        try:
            from inverse_covariance import QuicGraphicalLassoEBIC
        except ImportError:
            print('Cannot run QuicGraphLassoEBIC. Skggm not installed!')

        # Compute the sparse inverse covariance via QuicGraphLassoEBIC
        # credit: skggm
        model = QuicGraphicalLassoEBIC(init_method='cov', verbose=1)
        print(
            '\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n'
        )
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'AdaptiveQuicGraphLasso':
        try:
            from inverse_covariance import AdaptiveQuicGraphicalLasso, QuicGraphicalLassoEBIC
        except ImportError:
            print('Cannot run AdaptiveGraphLasso. Skggm not installed!')

        # Compute the sparse inverse covariance via
        # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary'
        # credit: skggm
        model = AdaptiveQuicGraphicalLasso(
            estimator=QuicGraphicalLassoEBIC(init_method='cov', ),
            method='binary',
        )
        print(
            '\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n'
        )
        model.fit(time_series)
        conn_matrix = -model.estimator_.precision_

    # Weight reuslting matrix by voxels in each label if using parcels as nodes
    # if parc is True:
    #     norm_parcels = (vox_array - min(vox_array)) / (max(vox_array) - min(vox_array))
    #     conn_matrix_norm = normalize(conn_matrix)
    #     conn_matrix = norm_parcels * conn_matrix_norm

    coords = np.array(coords)
    label_names = np.array(label_names)
    return conn_matrix, conn_model, dir_path, node_size, smooth, dens_thresh, network, ID, mask, min_span_tree, disp_filt, parc, prune, atlas_select, uatlas_select, label_names, coords
Ejemplo n.º 22
0
re_error_750_ppca = getReconstructionError(sample_750, re_750points_ppca.T)
drawReconstructionError(re_error_750_ppca)
plt.title('reconstruction error of 750 points of PPCA')
plt.show()

#reconstruct the 250 points
sample_250 = sample[750:1000]
re_250points_ppca = W_ppca.dot(sample_250.T)
re_250points_ppca = getPPCAInverseTransform(re_250points_ppca.T)
re_error_250_ppca = getReconstructionError(sample_250, re_250points_ppca.T)
drawReconstructionError(re_error_250_ppca)
plt.title('reconstruction error of 250 points of PPCA')
plt.show()

#==============================================================================
#problem 2.9
#==============================================================================
from sklearn.covariance import GraphLasso
gl = GraphLasso(0.01)
gl.fit(sample_750)

cov_gl = gl.covariance_
covarianceMatrix(cov_gl)
plt.title('convariance matrix of GL')
plt.show()

prec_gl = gl.get_precision()
covarianceMatrix(prec_gl)
plt.title('inverse  convariance matrix of GL')
plt.show()
Ejemplo n.º 23
0
def get_conn_matrix(time_series, conn_model):
    from nilearn.connectome import ConnectivityMeasure
    from sklearn.covariance import GraphLassoCV

    conn_matrix = None
    if conn_model == 'corr':
        # credit: nilearn
        print('\nComputing correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'partcorr':
        # credit: nilearn
        print('\nComputing partial correlation matrix...\n')
        conn_measure = ConnectivityMeasure(kind='partial correlation')
        conn_matrix = conn_measure.fit_transform([time_series])[0]
    elif conn_model == 'cov' or conn_model == 'sps':
        # Fit estimator to matrix to get sparse matrix
        estimator_shrunk = None
        estimator = GraphLassoCV()
        try:
            print('\nComputing covariance...\n')
            estimator.fit(time_series)
        except RuntimeWarning:
            print('Unstable Lasso estimation--Attempting to re-run by first applying shrinkage...')
            try:
                from sklearn.covariance import GraphLasso, empirical_covariance, shrunk_covariance
                emp_cov = empirical_covariance(time_series)
                for i in np.arange(0.8, 0.99, 0.01):
                    shrunk_cov = shrunk_covariance(emp_cov, shrinkage=i)
                    alphaRange = 10.0 ** np.arange(-8, 0)
                    for alpha in alphaRange:
                        try:
                            estimator_shrunk = GraphLasso(alpha)
                            estimator_shrunk.fit(shrunk_cov)
                            print("Retrying covariance matrix estimate with alpha=%s" % alpha)
                            if estimator_shrunk is None:
                                pass
                            else:
                                break
                        except RuntimeWarning:
                            print("Covariance estimation failed with shrinkage at alpha=%s" % alpha)
                            continue
            except ValueError:
                print('Unstable Lasso estimation! Shrinkage failed. A different connectivity model may be needed.')
        if estimator is None and estimator_shrunk is None:
            raise RuntimeError('ERROR: Covariance estimation failed.')
        if conn_model == 'sps':
            if estimator_shrunk is None:
                print('\nFetching precision matrix from covariance estimator...\n')
                conn_matrix = -estimator.precision_
            else:
                print('\nFetching shrunk precision matrix from covariance estimator...\n')
                conn_matrix = -estimator_shrunk.precision_
        elif conn_model == 'cov':
            if estimator_shrunk is None:
                print('\nFetching covariance matrix from covariance estimator...\n')
                conn_matrix = estimator.covariance_
            else:
                conn_matrix = estimator_shrunk.covariance_
    elif conn_model == 'QuicGraphLasso':
        from inverse_covariance import QuicGraphLasso
        # Compute the sparse inverse covariance via QuicGraphLasso
        # credit: skggm
        model = QuicGraphLasso(
            init_method='cov',
            lam=0.5,
            mode='default',
            verbose=1)
        print('\nCalculating QuicGraphLasso precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'QuicGraphLassoCV':
        from inverse_covariance import QuicGraphLassoCV
        # Compute the sparse inverse covariance via QuicGraphLassoCV
        # credit: skggm
        model = QuicGraphLassoCV(
            init_method='cov',
            verbose=1)
        print('\nCalculating QuicGraphLassoCV precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'QuicGraphLassoEBIC':
        from inverse_covariance import QuicGraphLassoEBIC
        # Compute the sparse inverse covariance via QuicGraphLassoEBIC
        # credit: skggm
        model = QuicGraphLassoEBIC(
            init_method='cov',
            verbose=1)
        print('\nCalculating QuicGraphLassoEBIC precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.precision_
    elif conn_model == 'AdaptiveQuicGraphLasso':
        from inverse_covariance import AdaptiveGraphLasso, QuicGraphLassoEBIC
        # Compute the sparse inverse covariance via
        # AdaptiveGraphLasso + QuicGraphLassoEBIC + method='binary'
        # credit: skggm
        model = AdaptiveGraphLasso(
                estimator=QuicGraphLassoEBIC(
                    init_method='cov',
                ),
                method='binary',
            )
        print('\nCalculating AdaptiveQuicGraphLasso precision matrix using skggm...\n')
        model.fit(time_series)
        conn_matrix = -model.estimator_.precision_

    return conn_matrix
Ejemplo n.º 24
0
sb_expression = pd.read_table(
    "https://homes.cs.washington.edu/~suinlee/cse527/notes/yeast-comparison/sbay-expression.txt",
    header=None)
conserved_gene = pd.read_table(
    "https://homes.cs.washington.edu/~suinlee/cse527/notes/yeast-comparison/conserved-genes.txt",
    header=None)
sc_experiment = pd.read_table(
    "https://homes.cs.washington.edu/~suinlee/cse527/notes/yeast-comparison/scer-experiments.txt",
    header=None)
sb_experiment = pd.read_table(
    "https://homes.cs.washington.edu/~suinlee/cse527/notes/yeast-comparison/sbay-experiments.txt",
    header=None)

#%%
#data normalization
sb_data = sb_expression.values.T
#sb_normdata = normalize(sb_data, axis=1)
means = np.mean(sb_data, axis=0)
stds = np.std(sb_data, axis=0)
sb_normdata = np.divide(np.subtract(sb_data, means), stds)
#%%
GL_sb = GraphLasso(alpha=1)

tic = time.time()
GL_sb.fit(sb_expression.values.T)
toc = time.time()
time1 = toc - tic
print(time1)

perc_sb = GL_sb.precision_
np.save('perc_sb.npy', perc_sb)
Ejemplo n.º 25
0
NCols = len(ProteinNames)

#Import the data and convert to a numpy array
X = open(os.path.join('data', 'sachsCtsHTF.txt'), 'r').read().split()
X = [float(x) for x in X]
X = np.array(X).reshape(-1, NCols)
X -= X.mean(axis=0).reshape(1, -1)
X /= np.sqrt(
    1000
)  #same as http://www-stat.stanford.edu/~tibs/ElemStatLearn/datasets/sachs.info

#Regularization parameters
Lambs = [36, 27, 7, 0]

for lam in Lambs:
    GL = GraphLasso(lam)
    GL.fit(X)

    prec = GL.precision_

    #Form graph
    G = nx.Graph()
    G.add_nodes_from(ProteinNames)

    for i in range(NCols):
        for j in range(i):
            if prec[i, j] != 0:
                G.add_edges_from([(ProteinNames[i], ProteinNames[j])])

    ttl = 'lambda {}, nedges {}'.format(lam, len(G.edges))
    print(ttl)
Ejemplo n.º 26
0
def estimatePrecisionFromSet(data, alpha):
    model = GraphLasso(alpha=alpha)
    model.fit(data)
    return sp.csc_matrix(model.precision_)
Ejemplo n.º 27
0
def estimatePrecisionFromFile(file, dims, sparsity):
    data = pd.read_csv(file)
    model = GraphLasso()
    model.fit(data)
    return sp.csc_matrix(model.precision_)
    # compute the empirical covariance matrix
    C_emp = X.dot(X.T) / float(N)
    print('Empirical Cov:')
    print C_emp

    # neighborhood selection
    nhs = NeighborSelect(EDPP(),
                         ProximalGradientSolver(),
                         path_lb=0.2,
                         path_steps=5,
                         path_scale='log')
    Cb = nhs.fit(np.ascontiguousarray(X))
    print Cb

    glasso = GraphLasso(alpha=0.005, tol=0.0001, max_iter=1000, verbose=False)
    glasso.fit(X.T)
    C = glasso.get_precision()
    print glasso.error_norm(COV)

    print('GraphLasso Cov:')
    print C

    # plot some example network
    plt.figure()
    plt.subplot(2, len(Cb), 1)
    plt.title('Cov')
    plt.pcolor(COV)
    plt.subplot(2, len(Cb), 2)
    plt.title('Emp. Cov')
    plt.pcolor(C_emp)
Ejemplo n.º 29
0
    'VLO', 'BAC', 'K', 'PFE', 'XRX', 'AIG', 'PEP', 'KO', 'PG', 'MCD', 'WMT',
    'JPM', 'C', 'WFC', 'GE', 'T', 'VZ', 'IBM', 'MSFT', 'GOOG', 'AAPL', 'RIMM',
    "^DJA", "CSCO", "YHOO", "ORCL", "SNDK", "DELL", "NVDA", "EBAY", "WIN",
    "WFM", "WHR", "WU", "WAG", "VMC", "UTX", "UNP", "USB", "TSN", "TMO", "TXT",
    "TXN", "TSO", "SYY", "SBUX", "SWK", "LUV", "CMCSA", "AMD", "S", "INTC",
    "VXX", "^GSPC"
]

start_data = datetime.datetime(2010, 01, 03)
symbols, data = get_data(symbols, start_data)
close_data = np.concatenate(
    [absolute_daily_returns(data[ts])[:, None] for ts in symbols], axis=1)
alpha = 0.47
print "alpha: ", alpha

gl = GraphLasso(alpha)
nclose_data = scale(close_data)
gl.fit(nclose_data)

#remove the SP500
cov_sp = gl.covariance_[:, :-1].T[:, :-1]
prec_sp = gl.precision_[:, :-1].T[:, :-1]


def community_cluster(cov_sp, symbols):
    G = nx.Graph(cov_sp)
    partition = community.best_partition(G)
    for i in set(partition.values()):
        print "Community: ", i
        members = [
            symbols[node] for node in partition.keys() if partition[node] == i
Ejemplo n.º 30
0
def grangercausalitytests(x,
                          mxlg,
                          autolag=None,
                          alpha=0.0001,
                          max_iter=1000,
                          addconst=True,
                          verbose=True):
    """four tests for granger non causality of 2 timeseries

    all four tests give similar results
    `params_ftest` and `ssr_ftest` are equivalent based on F test which is
    identical to lmtest:grangertest in R

    Parameters
    ----------
    x : array, 2d
        data for test whether the time series in the second column Granger
        causes the time series in the first column
    lags : list of integers
        the Granger causality test results are calculated for all lags in the list
    autolag: If 'aic' the lag which minimizes the information criterion is used
             from the lags
    verbose : bool
        print results if true

    Returns
    -------
    results : dictionary
        all test results, dictionary keys are the number of lags. For each
        lag the values are a tuple, with the first element a dictionary with
        teststatistic, pvalues, degrees of freedom, the second element are
        the OLS estimation results for the restricted model, the unrestricted
        model and the restriction (contrast) matrix for the parameter f_test.

    Notes
    -----
    TODO: convert to class and attach results properly

    The Null hypothesis for grangercausalitytests is that the time series in
    the second column, x2, does NOT Granger cause the time series in the first
    column, x1. Grange causality means that past values of x2 have a
    statistically significant effect on the current value of x1, taking past
    values of x1 into account as regressors. We reject the null hypothesis
    that x2 does not Granger cause x1 if the pvalues are below a desired size
    of the test.

    The null hypothesis for all four test is that the coefficients
    corresponding to past values of the second time series are zero.

    'params_ftest', 'ssr_ftest' are based on F distribution

    'ssr_chi2test', 'lrtest' are based on chi-square distribution

    References
    ----------
    http://en.wikipedia.org/wiki/Granger_causality
    Greene: Econometric Analysis

    """
    from scipy import stats
    from sklearn.covariance import GraphLasso

    x = np.asarray(x)

    if x.shape[0] <= 3 * mxlg + int(addconst):
        raise ValueError(
            "Insufficient observations. Maximum allowable "
            "lag is {0}".format(int((x.shape[0] - int(addconst)) / 3) - 1))
    result = {}
    if verbose:
        print('\nGranger Causality')
        print('number of lags (no zero)', mxlg)

    # create lagmat of both time series
    dta = lagmat2ds(x, mxlg, trim='both', dropex=1)

    #add constant
    if addconst:
        dtajoint = add_constant(dta[:, :], prepend=False)
    else:
        raise NotImplementedError('Not Implemented')

    # Run Lasso on all variables
    lassoreg = GraphLasso(alpha=alpha, max_iter=100)
    lassoreg.fit(dtajoint)
    result = lassoreg.covariance_
    #non_zeros = [i for i, x in enumerate(result) if x != 0]
    #non_zero_vars = set([(i+1)/(mxlg+1) for i in non_zeros])
    return result