コード例 #1
0
def likelihood_score(X, precision_):
    # compute empirical covariance of the test set
    location_ = X.mean(1).reshape(X.shape[0], 1, X.shape[2])
    test_cov = np.array(
        [empirical_covariance(x, assume_centered=True) for x in X - location_])

    res = sum(log_likelihood(S, K) for S, K in zip(test_cov, precision_))

    return res
コード例 #2
0
def _objective(mle, precision_, alpha):
    """Evaluation of the graphical-lasso objective function

    the objective function is made of a shifted scaled version of the
    normalized log-likelihood (i.e. its empirical mean over the samples) and a
    penalisation term to promote sparsity
    """
    p = precision_.shape[0]
    cost = -2. * log_likelihood(mle, precision_) + p * np.log(2 * np.pi)
    cost += alpha * (np.abs(precision_).sum() -
                     np.abs(np.diag(precision_)).sum())
    return cost
コード例 #3
0
    def score(self, X, y):
        """Computes the log-likelihood of a Gaussian data set with
        `self.covariance_` as an estimator of its covariance matrix.

        Parameters
        ----------
        X : array-like, shape = (n_samples, n_features)
            Test data of which we compute the likelihood, where n_samples is
            the number of samples and n_features is the number of features.
            X is assumed to be drawn from the same distribution than
            the data used in fit (including centering).

        y :  array-like, shape = (n_samples,)
            Class of samples.

        Returns
        -------
        res : float
            The likelihood of the data set with `self.covariance_` as an
            estimator of its covariance matrix.

        """
        # Covariance does not make sense for a single feature
        X, y = check_X_y(X,
                         y,
                         accept_sparse=False,
                         dtype=np.float64,
                         order="C",
                         ensure_min_features=2,
                         estimator=self)

        # compute empirical covariance of the test set
        test_cov = np.array([
            empirical_covariance(X[y == cl] - self.location_[i],
                                 assume_centered=True)
            for i, cl in enumerate(self.classes_)
        ])

        res = sum(X[y == cl].shape[0] * log_likelihood(S, K) for S, K, cl in
                  zip(test_cov, self.get_observed_precision(), self.classes_))

        return res
    def score(self, X_test, y=None):
        """Computes the log-likelihood of a Gaussian data set with
        `self.covariance_` as an estimator of its covariance matrix.

        Parameters
        ----------
        X_test : array-like, shape = [n_samples, n_features]
            Test data of which we compute the likelihood, where n_samples is
            the number of samples and n_features is the number of features.
            X_test is assumed to be drawn from the same distribution than
            the data used in fit (including centering).

        y : not used, present for API consistence purpose.

        Returns
        -------
        res : float
            The likelihood of the data set with `self.covariance_` as an
            estimator of its covariance matrix.

        """
        if not self.bypass_transpose:
            X_test = X_test.transpose(2, 0, 1)  # put time as first dimension
        # compute empirical covariance of the test set
        test_cov = np.array([
            empirical_covariance(x, assume_centered=True)
            for x in X_test - self.location_
        ])

        res = sum(
            log_likelihood(S, K)
            for S, K in zip(test_cov, self.get_observed_precision()))

        # ALLA  MATLAB1
        # ranks = [np.linalg.matrix_rank(L) for L in self.latent_]
        # scores_ranks = np.square(ranks-np.sqrt(L.shape[1]))

        return res  # - np.sum(scores_ranks)
コード例 #5
0
    def set_optimal_shrinkage_amount(self, X, method="cv", verbose=False):
        """Set optimal shrinkage amount according to chosen method.

        /!\ Could be rewritten with GridSearchCV.

        Parameters
        ----------
        X: array-like, shape = [n_samples, n_features]
          Training data, where n_samples is the number of samples
          and n_features is the number of features.
        method: float or str in {"cv", "lw", "oas"},
          The method used to set the shrinkage. If a floating value is provided
          that value is used. Otherwise, the selection is made according to
          the selected method.
          "cv" (default): 10-fold cross-validation.
                          (or Leave-One Out cross-validation if n_samples < 10)
          "lw": Ledoit-Wolf criterion
          "oas": OAS criterion
        verbose: bool,
          Verbose mode or not.

        Returns
        -------
        optimal_shrinkage: float,
          The optimal amount of shrinkage.

        """
        n_samples, n_features = X.shape
        if isinstance(method, str):
            std_shrinkage = np.trace(empirical_covariance(X)) / \
                (n_features * n_samples)
            self.std_shrinkage = std_shrinkage
        if method == "cv":
            from sklearn.covariance import log_likelihood
            n_samples, n_features = X.shape
            shrinkage_range = np.concatenate(
                ([0.], 10.**np.arange(-n_samples / n_features, -1, 0.5),
                 np.arange(0.05, 1., 0.05), np.arange(1., 20., 1.),
                 np.arange(20., 100, 5.), 10.**np.arange(2, 7, 0.5)))
            # get a "pure" active set with a standard shrinkage
            active_set_estimator = RMCDl2(shrinkage=std_shrinkage)
            active_set_estimator.fit(X)
            active_set = np.where(active_set_estimator.support_)[0]
            # split this active set in ten parts
            active_set = active_set[np.random.permutation(active_set.size)]
            if active_set.size >= 10:
                # ten fold cross-validation
                n_folds = 10
                fold_size = active_set.size / 10
            else:
                n_folds = active_set.size
                fold_size = 1
            log_likelihoods = np.zeros((shrinkage_range.size, n_folds))
            if verbose:
                print "*** Cross-validation"
            for trial in range(n_folds):
                if verbose:
                    print trial / float(n_folds)
                # define train and test sets
                train_set_indices = np.concatenate(
                    (np.arange(0, fold_size * trial),
                     np.arange(fold_size * (trial + 1), n_folds * fold_size)))
                train_set = X[active_set[train_set_indices]]
                test_set = X[active_set[np.arange(fold_size * trial,
                                                  fold_size * (trial + 1))]]
                # learn location and covariance estimates from train set
                # for several amounts of shrinkage
                for i, shrinkage in enumerate(shrinkage_range):
                    location = test_set.mean(0)
                    cov = empirical_covariance(train_set)
                    cov.flat[::(n_features + 1)] += shrinkage * std_shrinkage
                    # compute test data likelihood
                    log_likelihoods[i, trial] = log_likelihood(
                        empirical_covariance(test_set - location,
                                             assume_centered=True), pinvh(cov))
            optimal_shrinkage = shrinkage_range[np.argmax(
                log_likelihoods.mean(1))]
            self.shrinkage = optimal_shrinkage * std_shrinkage
            self.shrinkage_cst = optimal_shrinkage
            if verbose:
                print "optimal shrinkage: %g (%g x lambda(= %g))" \
                    % (self.shrinkage, optimal_shrinkage, std_shrinkage)
            self.log_likelihoods = log_likelihoods
            self.shrinkage_range = shrinkage_range

            return shrinkage_range, log_likelihoods
        elif method == "oas":
            from sklearn.covariance import OAS
            rmcd = self.__init__(shrinkage=std_shrinkage)
            support = rmcd.fit(X).support_
            oas = OAS().fit(X[support])
            if oas.shrinkage_ == 1:
                self.shrinkage_cst = np.inf
            else:
                self.shrinkage_cst = oas.shrinkage_ / (1. - oas.shrinkage_)
            self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features
        elif method == "lw":
            from sklearn.covariance import LedoitWolf
            rmcd = RMCDl2(self, h=self.h, shrinkage=std_shrinkage)
            support = rmcd.fit(X).support_
            lw = LedoitWolf().fit(X[support])
            if lw.shrinkage_ == 1:
                self.shrinkage_cst = np.inf
            else:
                self.shrinkage_cst = lw.shrinkage_ / (1. - lw.shrinkage_)
            self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features
        else:
            pass
        return
コード例 #6
0
X_train = np.dot(base_X_train, coloring_matrix)
X_test = np.dot(base_X_test, coloring_matrix)

# #############################################################################
# Compute the likelihood on test data

# spanning a range of possible shrinkage coefficient values
shrinkages = np.logspace(-2, 0, 30)
negative_logliks = [-ShrunkCovariance(shrinkage=s).fit(X_train).score(X_test)
                    for s in shrinkages]

# under the ground-truth model, which we would not have access to in real
# settings
real_cov = np.dot(coloring_matrix.T, coloring_matrix)
emp_cov = empirical_covariance(X_train)
loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov))

# #############################################################################
# Compare different approaches to setting the parameter

# GridSearch for an optimal shrinkage coefficient
tuned_parameters = [{'shrinkage': shrinkages}]
cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5)
cv.fit(X_train)

# Ledoit-Wolf optimal shrinkage coefficient estimate
lw = LedoitWolf()
loglik_lw = lw.fit(X_train).score(X_test)

# OAS coefficient estimate
oa = OAS()
コード例 #7
0
def graphical_lasso(emp_cov,
                    alpha,
                    cov_init=None,
                    mode='cd',
                    tol=1e-4,
                    enet_tol=1e-4,
                    max_iter=100,
                    verbose=False,
                    return_costs=False,
                    eps=np.finfo(np.float64).eps,
                    return_n_iter=False,
                    isRLA=False):
    """l1-penalized covariance estimator

    Read more in the :ref:`User Guide <sparse_inverse_covariance>`.

    Parameters
    ----------
    emp_cov : 2D ndarray, shape (n_features, n_features)
        Empirical covariance from which to compute the covariance estimate.

    alpha : positive float
        The regularization parameter: the higher alpha, the more
        regularization, the sparser the inverse covariance.

    cov_init : 2D array (n_features, n_features), optional
        The initial guess for the covariance.

    mode : {'cd', 'lars'}
        The Lasso solver to use: coordinate descent or LARS. Use LARS for
        very sparse underlying graphs, where p > n. Elsewhere prefer cd
        which is more numerically stable.

    tol : positive float, optional
        The tolerance to declare convergence: if the dual gap goes below
        this value, iterations are stopped.

    enet_tol : positive float, optional
        The tolerance for the elastic net solver used to calculate the descent
        direction. This parameter controls the accuracy of the search direction
        for a given column update, not of the overall parameter estimate. Only
        used for mode='cd'.

    max_iter : integer, optional
        The maximum number of iterations.

    verbose : boolean, optional
        If verbose is True, the objective function and dual gap are
        printed at each iteration.

    return_costs : boolean, optional
        If return_costs is True, the objective function and dual gap
        at each iteration are returned.

    eps : float, optional
        The machine-precision regularization in the computation of the
        Cholesky diagonal factors. Increase this for very ill-conditioned
        systems.

    return_n_iter : bool, optional
        Whether or not to return the number of iterations.

    Returns
    -------
    covariance : 2D ndarray, shape (n_features, n_features)
        The estimated covariance matrix.

    precision : 2D ndarray, shape (n_features, n_features)
        The estimated (sparse) precision matrix.

    costs : list of (objective, dual_gap) pairs
        The list of values of the objective function and the dual gap at
        each iteration. Returned only if return_costs is True.

    n_iter : int
        Number of iterations. Returned only if `return_n_iter` is set to True.

    See Also
    --------
    GraphicalLasso, GraphicalLassoCV

    Notes
    -----
    The algorithm employed to solve this problem is the GLasso algorithm,
    from the Friedman 2008 Biostatistics paper. It is the same algorithm
    as in the R `glasso` package.

    One possible difference with the `glasso` R package is that the
    diagonal coefficients are not penalized.

    """
    _, n_features = emp_cov.shape
    if alpha == 0:
        if return_costs:
            precision_ = linalg.inv(emp_cov)
            cost = -2. * log_likelihood(emp_cov, precision_)
            cost += n_features * np.log(2 * np.pi)
            d_gap = np.sum(emp_cov * precision_) - n_features
            if return_n_iter:
                return emp_cov, precision_, (cost, d_gap), 0
            else:
                return emp_cov, precision_, (cost, d_gap)
        else:
            if return_n_iter:
                return emp_cov, linalg.inv(emp_cov), 0
            else:
                return emp_cov, linalg.inv(emp_cov)
    if cov_init is None:
        covariance_ = emp_cov.copy()
    else:
        covariance_ = cov_init.copy()
    # As a trivial regularization (Tikhonov like), we scale down the
    # off-diagonal coefficients of our starting point: This is needed, as
    # in the cross-validation the cov_init can easily be
    # ill-conditioned, and the CV loop blows. Beside, this takes
    # conservative stand-point on the initial conditions, and it tends to
    # make the convergence go faster.
    covariance_ *= 0.95
    diagonal = emp_cov.flat[::n_features + 1]
    covariance_.flat[::n_features + 1] = diagonal
    precision_ = linalg.pinvh(covariance_)

    indices = np.arange(n_features)
    costs = list()
    # The different l1 regression solver have different numerical errors
    if mode == 'cd':
        errors = dict(over='raise', invalid='ignore')
    else:
        errors = dict(invalid='raise')
    try:
        # be robust to the max_iter=0 edge case, see:
        # https://github.com/scikit-learn/scikit-learn/issues/4134
        d_gap = np.inf
        # set a sub_covariance buffer
        sub_covariance = np.copy(covariance_[1:, 1:], order='C')
        for i in range(max_iter):
            for idx in range(n_features):
                # To keep the contiguous matrix `sub_covariance` equal to
                # covariance_[indices != idx].T[indices != idx]
                # we only need to update 1 column and 1 line when idx changes
                if idx > 0:
                    di = idx - 1
                    sub_covariance[di] = covariance_[di][indices != idx]
                    sub_covariance[:, di] = covariance_[:, di][indices != idx]
                else:
                    sub_covariance[:] = covariance_[1:, 1:]
                row = emp_cov[idx, indices != idx]
                with np.errstate(**errors):
                    if mode == 'cd':
                        # Use coordinate descent
                        coefs = -(precision_[indices != idx, idx] /
                                  (precision_[idx, idx] + 1000 * eps))
                        # TODO: swap in RLA LASSO with the following enet_coordinate_descent_gram() function!
                        if isRLA:
                            coefs = SLRviaRP(sub_covariance,
                                             np.matmul(
                                                 fractional_matrix_power(
                                                     sub_covariance, -0.5),
                                                 row),
                                             0.01,
                                             0.05,
                                             30,
                                             1.1,
                                             100,
                                             gamma=0)
                        else:
                            coefs, _, _, _ = enet_coordinate_descent_gram(
                                coefs, alpha, 0, sub_covariance, row,
                                row, max_iter, enet_tol,
                                check_random_state(None), False)
                    else:
                        # Use LARS
                        _, _, coefs = lars_path_gram(Xy=row,
                                                     Gram=sub_covariance,
                                                     n_samples=row.size,
                                                     alpha_min=alpha /
                                                     (n_features - 1),
                                                     copy_Gram=True,
                                                     eps=eps,
                                                     method='lars',
                                                     return_path=False)
                # Update the precision matrix
                precision_[idx, idx] = (
                    1. / (covariance_[idx, idx] -
                          np.dot(covariance_[indices != idx, idx], coefs)))
                precision_[indices != idx,
                           idx] = (-precision_[idx, idx] * coefs)
                precision_[idx,
                           indices != idx] = (-precision_[idx, idx] * coefs)
                coefs = np.dot(sub_covariance, coefs)
                covariance_[idx, indices != idx] = coefs
                covariance_[indices != idx, idx] = coefs
            if not np.isfinite(precision_.sum()):
                raise FloatingPointError('The system is too ill-conditioned '
                                         'for this solver')
            d_gap = _dual_gap(emp_cov, precision_, alpha)
            cost = _objective(emp_cov, precision_, alpha)
            if verbose:
                print('[graphical_lasso] Iteration '
                      '% 3i, cost % 3.2e, dual gap %.3e' % (i, cost, d_gap))
            if return_costs:
                costs.append((cost, d_gap))
            if np.abs(d_gap) < tol:
                break
            if not np.isfinite(cost) and i > 0:
                raise FloatingPointError('Non SPD result: the system is '
                                         'too ill-conditioned for this solver')
        else:
            warnings.warn(
                'graphical_lasso: did not converge after '
                '%i iteration: dual gap: %.3e' % (max_iter, d_gap),
                ConvergenceWarning)
    except FloatingPointError as e:
        e.args = (e.args[0] +
                  '. The system is too ill-conditioned for this solver', )
        raise e

    if return_costs:
        if return_n_iter:
            return covariance_, precision_, costs, i + 1
        else:
            return covariance_, precision_, costs
    else:
        if return_n_iter:
            return covariance_, precision_, i + 1
        else:
            return covariance_, precision_
コード例 #8
0
ファイル: rmcd.py プロジェクト: VirgileFritsch/outliers
    def set_optimal_shrinkage_amount(self, X, method="cv", verbose=False):
        """Set optimal shrinkage amount according to chosen method.

        /!\ Could be rewritten with GridSearchCV.

        Parameters
        ----------
        X: array-like, shape = [n_samples, n_features]
          Training data, where n_samples is the number of samples
          and n_features is the number of features.
        method: float or str in {"cv", "lw", "oas"},
          The method used to set the shrinkage. If a floating value is provided
          that value is used. Otherwise, the selection is made according to
          the selected method.
          "cv" (default): 10-fold cross-validation.
                          (or Leave-One Out cross-validation if n_samples < 10)
          "lw": Ledoit-Wolf criterion
          "oas": OAS criterion
        verbose: bool,
          Verbose mode or not.

        Returns
        -------
        optimal_shrinkage: float,
          The optimal amount of shrinkage.

        """
        n_samples, n_features = X.shape
        if isinstance(method, str):
            std_shrinkage = np.trace(empirical_covariance(X)) / \
                (n_features * n_samples)
            self.std_shrinkage = std_shrinkage
        if method == "cv":
            from sklearn.covariance import log_likelihood
            n_samples, n_features = X.shape
            shrinkage_range = np.concatenate((
                    [0.], 10. ** np.arange(-n_samples / n_features, -1, 0.5),
                    np.arange(0.05, 1., 0.05),
                    np.arange(1., 20., 1.), np.arange(20., 100, 5.),
                    10. ** np.arange(2, 7, 0.5)))
            # get a "pure" active set with a standard shrinkage
            active_set_estimator = RMCDl2(shrinkage=std_shrinkage)
            active_set_estimator.fit(X)
            active_set = np.where(active_set_estimator.support_)[0]
            # split this active set in ten parts
            active_set = active_set[np.random.permutation(active_set.size)]
            if active_set.size >= 10:
                # ten fold cross-validation
                n_folds = 10
                fold_size = active_set.size / 10
            else:
                n_folds = active_set.size
                fold_size = 1
            log_likelihoods = np.zeros((shrinkage_range.size, n_folds))
            if verbose:
                print "*** Cross-validation"
            for trial in range(n_folds):
                if verbose:
                    print trial / float(n_folds)
                # define train and test sets
                train_set_indices = np.concatenate(
                    (np.arange(0, fold_size * trial),
                     np.arange(fold_size * (trial + 1), n_folds * fold_size)))
                train_set = X[active_set[train_set_indices]]
                test_set = X[active_set[np.arange(
                            fold_size * trial, fold_size * (trial + 1))]]
                # learn location and covariance estimates from train set
                # for several amounts of shrinkage
                for i, shrinkage in enumerate(shrinkage_range):
                    location = test_set.mean(0)
                    cov = empirical_covariance(train_set)
                    cov.flat[::(n_features + 1)] += shrinkage * std_shrinkage
                    # compute test data likelihood
                    log_likelihoods[i, trial] = log_likelihood(
                       empirical_covariance(test_set - location,
                                            assume_centered=True), pinvh(cov))
            optimal_shrinkage = shrinkage_range[
                np.argmax(log_likelihoods.mean(1))]
            self.shrinkage = optimal_shrinkage * std_shrinkage
            self.shrinkage_cst = optimal_shrinkage
            if verbose:
                print "optimal shrinkage: %g (%g x lambda(= %g))" \
                    % (self.shrinkage, optimal_shrinkage, std_shrinkage)
            self.log_likelihoods = log_likelihoods
            self.shrinkage_range = shrinkage_range

            return shrinkage_range, log_likelihoods
        elif method == "oas":
            from sklearn.covariance import OAS
            rmcd = self.__init__(shrinkage=std_shrinkage)
            support = rmcd.fit(X).support_
            oas = OAS().fit(X[support])
            if oas.shrinkage_ == 1:
                self.shrinkage_cst = np.inf
            else:
                self.shrinkage_cst = oas.shrinkage_ / (1. - oas.shrinkage_)
            self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features
        elif method == "lw":
            from sklearn.covariance import LedoitWolf
            rmcd = RMCDl2(self, h=self.h, shrinkage=std_shrinkage)
            support = rmcd.fit(X).support_
            lw = LedoitWolf().fit(X[support])
            if lw.shrinkage_ == 1:
                self.shrinkage_cst = np.inf
            else:
                self.shrinkage_cst = lw.shrinkage_ / (1. - lw.shrinkage_)
            self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features
        else:
            pass
        return