Exemple #1
0
def log_likelihood(test_series, cov, maps, residues):
    """ Return the log likelihood of test_series under the model
        described by cov, maps, and residues.
    """
    # try:
    # This makes heavy use of the matrix inversion lemma
    #test_series = np.concatenate(test_series, axis=0)
    n_samples = test_series.shape[0]
    white_test_series = test_series / residues
    residues_fit = np.sum(white_test_series**2)
    white_test_series /= residues
    white_projection = np.dot(white_test_series, maps)
    del white_test_series
    prec_maps = linalg.inv(cov)
    prec_maps += np.dot(maps.T / residues**2, maps)
    residues_fit -= np.trace(
        np.dot(np.dot(white_projection.T, white_projection),
               linalg.inv(prec_maps)))
    del white_projection
    white_maps = maps / residues[:, np.newaxis]
    prec_maps += np.dot(white_maps.T, white_maps)
    del white_maps
    det = fast_logdet(prec_maps)
    del prec_maps
    return (-residues_fit / n_samples - fast_logdet(cov) - det -
            2 * np.sum(np.log(residues)))
def log_likelihood(test_series, cov, maps, residues):
    """ Return the log likelihood of test_series under the model
        described by cov, maps, and residues.
    """
    # try:
    # This makes heavy use of the matrix inversion lemma
    #test_series = np.concatenate(test_series, axis=0)
    n_samples = test_series.shape[0]
    white_test_series = test_series / residues
    residues_fit = np.sum(white_test_series ** 2)
    white_test_series /= residues
    white_projection = np.dot(white_test_series, maps)
    del white_test_series
    prec_maps = linalg.inv(cov)
    prec_maps += np.dot(maps.T / residues ** 2, maps)
    residues_fit -= np.trace(
        np.dot(np.dot(white_projection.T, white_projection),
               linalg.inv(prec_maps)))
    del white_projection
    white_maps = maps / residues[:, np.newaxis]
    prec_maps += np.dot(white_maps.T, white_maps)
    del white_maps
    det = fast_logdet(prec_maps)
    del prec_maps
    return (-residues_fit / n_samples - fast_logdet(cov)
            - det - 2 * np.sum(np.log(residues)))
def objectiveFLGL(emp_cov, K, R, T, H, U, mu, eta, rho):
    res = -fast_logdet(R) + np.sum(R * emp_cov)
    res += rho / 2. * squared_norm(R - T + U + np.linalg.multi_dot(
        (K.T, linalg.pinvh(H), K)))
    res += mu * l1_od_norm(H)
    res += eta * l1_od_norm(T)
    return res
Exemple #4
0
    def score_samples(self, X):
        """Return the log-likelihood of each sample

        See. "Pattern Recognition and Machine Learning"
        by C. Bishop, 12.2.1 p. 574
        or http://www.miketipping.com/papers/met-mppca.pdf

        Parameters
        ----------
        X: array, shape(n_samples, n_features)
            The data.

        Returns
        -------
        ll: array, shape (n_samples,)
            Log-likelihood of each sample under the current model
        """
        X = array2d(X)
        Xr = X - self.mean_
        n_features = X.shape[1]
        log_like = np.zeros(X.shape[0])
        precision = self.get_precision()
        log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
        log_like -= .5 * (n_features * log(2. * np.pi)
                          - fast_logdet(precision))
        return log_like
    def bound(self, doc, lamda=None, nu2=None):
        """
        Estimate the variational bound of a document

        """
        if lamda is None:
            lamda = self.lamda

        if nu2 is None:
            nu2 = self.nu2

        N = sum([cnt for _, cnt in doc])  # nb of words in document

        bound = 0.0

        # E[log p(\eta | \mu, \Sigma)] + H(q(\eta | \lamda, \nu) + sum_n,i { \phi_{n,i}*log(\phi_{n,i}) }
        bound = - np.sum(np.diag(nu2) * self.sigma_inverse) + fast_logdet(self.sigma_inverse)
        
        bound -= (lamda - self.mu).transpose().dot(self.sigma_inverse).dot(lamda - self.mu)
        bound += np.sum(np.log(nu2)) + self.num_topics  # TODO safe_log
        
        bound /= 2
        # print "first term %f for doc %s" %(bound, doc)

        # \sum_n { E[log p(z_n | \eta)] - sum_i {\lamda_i * \phi_{n, i}}
        sum_exp = np.exp(lamda + 0.5 * nu2).sum()
        bound -= (N * (sum_exp / self.zeta - 1. + np.log(self.zeta)))

        # print "second term %f for doc %s" %(bound, doc)

        # E[log p(w_n | z_n, \beta)] - sum_n,i { \phi_{n,i}*log(\phi_{n,i})
        bound += sum(c * (self.phi[n] * (lamda + np.log(self.beta[:, n]) - np.log(self.phi[n]))).sum()
            for (n, c) in doc)

        return bound
Exemple #6
0
def log_likelihood(covariance, precision):
    """Computes the log-likelihood between the covariance and precision
    estimate.

    Parameters
    ----------
    covariance : 2D ndarray (n_features, n_features)
        Maximum Likelihood Estimator of covariance

    precision : 2D ndarray (n_features, n_features)
        The precision matrix of the covariance model to be tested

    Returns
    -------
    log-likelihood
    """
    assert covariance.shape == precision.shape
    dim, _ = precision.shape
    log_likelihood_ = (
        -np.sum(covariance * precision)
        + fast_logdet(precision)
        - dim * np.log(2 * np.pi)
    )
    log_likelihood_ /= 2.
    return log_likelihood_
Exemple #7
0
    def score(self, X, y=None):
        """Return a score associated to new data

        Parameters
        ----------
        X: array of shape(n_samples, n_features)
            The data to test

        Returns
        -------
        ll: array of shape (n_samples),
            log-likelihood of each row of X under the current model
        """
        Xr = X - self.mean_
        n_features = X.shape[1]
        log_like = np.zeros(X.shape[0])
        if (self.precision_ is None) and (self.covariance_ is None):
            XrP,ldet = self.dot_precision(X=Xr, logdet=True )

        else:
            if self.precision_ is None:
                self.precision_ = linalg.inv(self.covariance_)
            XrP = np.dot(Xr, self.precision_)
            ldet = fast_logdet(self.covariance_)

        log_like = -.5 * (Xr * XrP).sum(axis=1)
        log_like -= .5 * (ldet
                          + n_features * log(2. * np.pi))
        return log_like
    def log_likelihood(self, X):
        """Equivalent to scipy.

        from scipy.stats import invwishart
        invwishart.logpdf(X, nu, S)
        """
        nu = self.nu
        n_dim = X.shape[0]

        logp = nu * fast_logdet(self.S)
        logp -= np.sum(self.S * linalg.pinvh(X))
        logp -= (nu + n_dim + 1) * fast_logdet(X)
        logp -= nu * n_dim * np.log(2)
        logp -= 2 * multigammaln(0.5 * nu, n_dim)
        logp /= 2.0
        return logp
Exemple #9
0
    def score_samples(self, X):
        """Return the log-likelihood of each sample.

        See. "Pattern Recognition and Machine Learning"
        by C. Bishop, 12.2.1 p. 574
        or http://www.miketipping.com/papers/met-mppca.pdf

        Parameters
        ----------
        X : array, shape(n_samples, n_features)
            The data.

        Returns
        -------
        ll : array, shape (n_samples,)
            Log-likelihood of each sample under the current model
        """
        check_is_fitted(self, "mean_")

        # X = check_array(X)
        Xr = X - self.mean_
        n_features = X.shape[1]
        precision = self.get_precision()  # [n_features, n_features]
        log_like = -.5 * (Xr * (da.dot(Xr, precision))).sum(axis=1)
        log_like -= .5 * (n_features * da.log(2. * np.pi) -
                          fast_logdet(precision))
        return log_like
Exemple #10
0
    def score(self, X, y=None):
        """Return a score associated to new data

        Parameters
        ----------
        X: array of shape(n_samples, n_features)
            The data to test

        Returns
        -------
        ll: array of shape (n_samples),
            log-likelihood of each row of X under the current model
        """
        Xr = X - self.mean_
        n_features = X.shape[1]
        log_like = np.zeros(X.shape[0])
        if (self.precision_ is None) and (self.covariance_ is None):
            XrP, ldet = self.dot_precision(X=Xr, logdet=True)

        else:
            if self.precision_ is None:
                self.precision_ = linalg.inv(self.covariance_)
            XrP = np.dot(Xr, self.precision_)
            ldet = fast_logdet(self.covariance_)

        log_like = -.5 * (Xr * XrP).sum(axis=1)
        log_like -= .5 * (ldet + n_features * log(2. * np.pi))
        return log_like
Exemple #11
0
def _ridge_smooth_fun_grad(theta, X, y, D, verbose, other):

    nv = theta[0]
    alphas = theta[1:]

    XX, Xy, yy, fit_intercept = other

    N, p = X.shape
    nD = D.shape[2]
    I = np.eye(p)

    # Prior covariance matrix for current parameter setting
    Cprior, invC = _ridge_smooth_inverse(D,
                                         alphas,
                                         fit_intercept=fit_intercept)

    # Posterior covariance and mean
    SS = linalg.pinv(XX / nv + invC)
    mu = np.dot(SS, Xy) / nv

    # Compute log-evidence
    term1 = .5 * (extmath.fast_logdet(2 * np.pi * SS) -
                  p * np.log(2 * np.pi * nv) - p * np.log(2 * np.pi) -
                  1. / extmath.fast_logdet(invC))
    term2 = -.5 * (yy / nv - np.dot(Xy.T, np.dot(SS, Xy)) / nv**2)
    logE = term1 + term2

    # Derivative with respect to covariance hyperparameters
    dAlphas = np.zeros((nD, ))
    for i in range(nD):
        A = Cprior - SS - np.outer(mu, mu)
        dAlphas[i] = .5 * np.trace(np.dot(A, D[:, :, i]))

    # Gradient with respect to the noise variance
    SSinvC = np.dot(SS, invC)
    rss = yy - 2 * np.dot(mu.T, Xy) + np.dot(mu.T, XX).dot(mu)
    dNsevar = -N / nv + np.trace(I - SSinvC) / nv + rss / nv**2

    dEE = np.append(dNsevar.item(), dAlphas)

    if verbose:
        ss = ("-logE: %0.3f | nv: %0.3f | alphas: (" % (-logE, nv))
        for alpha in alphas:
            ss += ("%0.3g, " % alpha)
        print(s[:-2] + ")")

    return -logE, -dEE
    def log_likelihood(self, X):
        """Equivalent to scipy.

        from scipy.stats import wishart
        wishart.logpdf(X, nu, S)
        """
        nu = self.nu
        n_dim = X.shape[0]
        inv_S = self.inv_S

        logp = (nu - n_dim - 1) * fast_logdet(X)
        logp -= np.sum(X * inv_S)
        logp -= nu * n_dim * np.log(2)
        logp -= 2 * multigammaln(0.5 * nu, n_dim)
        logp -= nu * fast_logdet(self.S)
        logp /= 2.0
        return logp
Exemple #13
0
    def dot_precision(self,X,logdet=False):
        """Compute the dot product of a matrix X by the data
        precision matrix with the generative model.

        Returns
        -------
        Y   : array, shape=(n_samples, n_features)
          =X*precision
        """
        n_features = self.components_.shape[1]

        # handle corner cases first
        if self.n_components_ == 0:

            if logdet:
                #import pdb;pdb.set_trace()
                if np.isscalar(self.noise_variance_):
                    logdet_cov = np.log(self.noise_variance_)*X.shape[1]
                else:
                    assert self.noise_variance_.shape[0] == X.shape[1], "self.noise_variance_.shape[0] == X.shape[1]"
                    logdet_cov = np.log(self.noise_variance_).sum()
                return X / self.noise_variance_, logdet_cov
            else:
                return X / self.noise_variance_
        if self.n_components_ == n_features:
            covariance = self.get_covariance()
            if logdet:
                return X.dot(linalg.inv(covariance)), fast_logdet(covariance)
            else:
                return X.dot(linalg.inv(covariance))
        # Get precision using matrix inversion lemma
        components_ = self.components_
        exp_var = self.explained_variance_
        if self.whiten:
            components_ = components_ * np.sqrt(exp_var[:, np.newaxis])
        exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.)
        Xprecision = (1.0 / exp_var_diff) + (1.0 / self.noise_variance_)
        if logdet:
            #import pdb;pdb.set_trace()
            if np.isscalar(self.noise_variance_):
                logdet_cov = np.log(self.noise_variance_)*X.shape[1]
            else:
                assert self.noise_variance_.shape[0] == X.shape[1], "self.noise_variance_.shape[0] == X.shape[1]"
                logdet_cov = np.log(self.noise_variance_).sum()
            logdet_cov += np.log(((1.0 / exp_var_diff) + (1.0 / self.noise_variance_))).sum()
            logdet_cov += np.log(exp_var_diff).sum()

        Xprecision *= (self.noise_variance_ * self.noise_variance_)
        Xprecision = (X.dot(components_.T/(-Xprecision))).dot(components_)
        Xprecision += X/self.noise_variance_
        #cprecision=((1.0 / exp_var_diff) + (1.0 / self.noise_variance_))*(-self.noise_variance_ * self.noise_variance_)
        #cprecision = (components_.T/cprecision).dot(components_)
        #cprecision.flat[::len(cprecision) + 1] += 1. / self.noise_variance_
        #Xcprecision = X.dot(cprecision)
        if logdet:
            return Xprecision, logdet_cov
        else:
            return Xprecision
Exemple #14
0
def log_likelihood_full(test_series, full_cov):
    """ Return the log likelihood of test_series under the model
        described by cov, maps, and residues.
    """
    # Without the matrix inversion lemma
    n_samples = test_series.shape[0]
    return -fast_logdet(full_cov) - 1. / n_samples * \
        np.trace(np.dot(np.dot(test_series, linalg.inv(full_cov)),
                        test_series.T))
def log_likelihood_full(test_series, full_cov):
    """ Return the log likelihood of test_series under the model
        described by cov, maps, and residues.
    """
    # Without the matrix inversion lemma
    n_samples = test_series.shape[0]
    return -fast_logdet(full_cov) - 1. / n_samples * \
        np.trace(np.dot(np.dot(test_series, linalg.inv(full_cov)),
                        test_series.T))
Exemple #16
0
 def objective_function(self, data, location, covariance):
     """Objective function minimized at each step of the MCD algorithm.
     """
     precision = pinvh(covariance)
     det = fast_logdet(precision)
     trace = np.trace(
         np.dot(empirical_covariance(data - location, assume_centered=True),
                precision))
     pen = self.shrinkage * np.trace(precision)
     return -det + trace + pen
Exemple #17
0
 def objective_function(self, data, location, covariance):
     """Objective function minimized at each step of the MCD algorithm.
     """
     precision = pinvh(covariance)
     det = fast_logdet(precision)
     trace = np.trace(
         np.dot(empirical_covariance(data - location, assume_centered=True),
                precision))
     pen = self.shrinkage * np.trace(precision)
     return -det + trace + pen
Exemple #18
0
    def _score_samples(self, X, session=None):
        check_is_fitted(self, "mean_")

        X = check_array(X)
        Xr = X - self.mean_
        n_features = X.shape[1]
        precision = self.get_precision().fetch(session=session)
        log_like = -0.5 * (Xr * (mt.dot(Xr, precision))).sum(axis=1)
        log_like -= 0.5 * (n_features * log(2.0 * mt.pi) - fast_logdet(precision))
        return log_like
Exemple #19
0
def ebic(covariance, precision, n_samples, n_features, gamma=0):
    '''
    Extended Bayesian Information Criteria for model selection.

    When using path mode, use this as an alternative to cross-validation for
    finding lambda.

    See:
        "Extended Bayesian Information Criteria for Gaussian Graphical Models"
        R. Foygel and M. Drton, NIPS 2010

    Parameters
    ----------
    covariance : 2D ndarray (n_features, n_features)
        Maximum Likelihood Estimator of covariance (sample covariance)

    precision : 2D ndarray (n_features, n_features)
        The precision matrix of the model to be tested

    n_samples :  int
        Number of examples.

    n_features : int
        Dimension of an example.

    lam: (float)
        Threshold value for precision matrix. This should be lambda scaling
        used to obtain this estimate.

    gamma : (float) \in (0, 1)
        Choice of gamma=0 leads to classical BIC
        Positive gamma leads to stronger penalization of large graphs.

    Returns
    -------
    ebic score (float).  Caller should minimized this score.
    '''
    l_theta = -np.sum(covariance * precision) + fast_logdet(precision)
    l_theta *= n_features / 2.

    # is something goes wrong with fast_logdet, return large value
    if np.isinf(l_theta) or np.isnan(l_theta):
        return 1e10

    mask = np.abs(precision.flat) > np.finfo(precision.dtype).eps
    precision_nnz = (np.sum(mask) - n_features) / 2.0  # lower off diagonal tri

    return (
        -2.0 * l_theta +
        precision_nnz * np.log(n_samples) +
        4.0 * precision_nnz * np.log(n_features) * gamma
    )
Exemple #20
0
 def decision_function(self, X, raw_values=True):
     """
     """
     n_features = self.cov_.shape[0]
     prec_ = linalg.pinv(self.cov_)
     dist = np.zeros((X.shape[0], self.support.shape[0]))
     for i, x in enumerate(X):
         for j, t in enumerate(self.support):
             dist[i, j] = distance.mahalanobis(x, t, prec_)
     a = fast_logdet(self.cov_)
     density = np.log(np.ravel(np.exp(-.5 * dist).mean(1))) \
         - 0.5 * a - (.5 * n_features) * np.log(2. * np.pi)
     return -density
 def decision_function(self, X, raw_values=True):
     """
     """
     n_features = self.cov_.shape[0]
     prec_ = linalg.pinv(self.cov_)
     dist = np.zeros((X.shape[0], self.support.shape[0]))
     for i, x in enumerate(X):
         for j, t in enumerate(self.support):
             dist[i, j] = distance.mahalanobis(x, t, prec_)
     a = fast_logdet(self.cov_)
     density = np.log(np.ravel(np.exp(-.5 * dist).mean(1))) \
         - 0.5 * a - (.5 * n_features) * np.log(2. * np.pi)
     return -density
    def _decision_function2(self, X):
        check_is_fitted(self, "classes_")

        X = check_array(X)
        precisions = self.get_observed_precision()
        norm2 = []
        for i in range(len(self.classes_)):
            Xm = X - self.means_[i]
            # X2 = np.dot(Xm, R * (S ** (-0.5)))
            X2 = np.linalg.multi_dot((Xm, precisions[i], Xm.T))
            norm2.append(np.diag(X2))
        norm2 = np.array(norm2).T  # shape = [len(X), n_classes]
        u = np.asarray([-fast_logdet(s) for s in precisions])
        return -0.5 * (norm2 + u) + np.log(self.priors_)
 def _gaussian_likelihood(self, S_test, prec):
     """
     Estimates the likelihood of the neighbourhood selection
     using the Gaussian log-likelihood model
     Parameters
     ----------
     S_test : array_like
         n by p matrix - data matrix of test data
     prec : array_like
         p by p matrix - estimated precision matrix 
     """
     p = S_test.shape[0]
     log_likelihood_ = -fast_logdet(prec) + np.trace(S_test @ prec)
     log_likelihood_ -= p * np.log(2 * np.pi)
     return log_likelihood_
Exemple #24
0
def test_bayesian_ridge_score_values():
    """Check value of score on toy example.

    Compute log marginal likelihood with equation (36) in Sparse Bayesian
    Learning and the Relevance Vector Machine (Tipping, 2001):

    - 0.5 * (log |Id/alpha + X.X^T/lambda| +
             y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))
    + lambda_1 * log(lambda) - lambda_2 * lambda
    + alpha_1 * log(alpha) - alpha_2 * alpha

    and check equality with the score computed during training.
    """

    X, y = diabetes.data, diabetes.target
    n_samples = X.shape[0]
    # check with initial values of alpha and lambda (see code for the values)
    eps = np.finfo(np.float64).eps
    alpha_ = 1.0 / (np.var(y) + eps)
    lambda_ = 1.0

    # value of the parameters of the Gamma hyperpriors
    alpha_1 = 0.1
    alpha_2 = 0.1
    lambda_1 = 0.1
    lambda_2 = 0.1

    # compute score using formula of docstring
    score = lambda_1 * log(lambda_) - lambda_2 * lambda_
    score += alpha_1 * log(alpha_) - alpha_2 * alpha_
    M = 1.0 / alpha_ * np.eye(n_samples) + 1.0 / lambda_ * np.dot(X, X.T)
    M_inv = pinvh(M)
    score += -0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) +
                     n_samples * log(2 * np.pi))

    # compute score with BayesianRidge
    clf = BayesianRidge(
        alpha_1=alpha_1,
        alpha_2=alpha_2,
        lambda_1=lambda_1,
        lambda_2=lambda_2,
        n_iter=1,
        fit_intercept=False,
        compute_score=True,
    )
    clf.fit(X, y)

    assert_almost_equal(clf.scores_[0], score, decimal=9)
Exemple #25
0
def pison_correction(n, p):
    """

    """
    repeat = 100
    pth_roots = np.zeros(repeat)
    for i in range(repeat):
        print i
        data = np.dot(np.random.randn(n, p), np.eye(p))
        mcd = MCD(h=None).fit(data)
        covariance = mcd.raw_covariance_
        pth_roots[i] = np.exp(fast_logdet(covariance))

    res_inv = (1. / repeat) * np.sum(pth_roots ** (1. / p))

    return 1. / res_inv
Exemple #26
0
def pison_correction(n, p):
    """

    """
    repeat = 100
    pth_roots = np.zeros(repeat)
    for i in range(repeat):
        print i
        data = np.dot(np.random.randn(n, p), np.eye(p))
        mcd = MCD(h=None).fit(data)
        covariance = mcd.raw_covariance_
        pth_roots[i] = np.exp(fast_logdet(covariance))

    res_inv = (1. / repeat) * np.sum(pth_roots**(1. / p))

    return 1. / res_inv
Exemple #27
0
    def likelihood(self, S, theta):
        """
        Likelihood function for a Gaussian model
        Parameters
        ----------
        S : array_like
            p by p matrix - Covariance matrix of problem
        theta : array_like
            estimated precision matrix 
 
        Returns
        -------
        float - Gaussian loglikelihood of the estimated model
        """
        p = S.shape[0]
        log_likelihood_ = -fast_logdet(theta) + np.trace(S @ theta)
        log_likelihood_ -= p * np.log(2 * np.pi)
        return log_likelihood_
def test_gaussian_mixture_aic_bic():
    # Test the aic and bic criteria
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 50, 3, 2
    X = rng.randn(n_samples, n_features)
    # standard gaussian entropy
    sgh = 0.5 * (fast_logdet(np.cov(X.T, bias=1)) +
                 n_features * (1 + np.log(2 * np.pi)))
    for cv_type in COVARIANCE_TYPE:
        g = GaussianMixture(
            n_components=n_components, covariance_type=cv_type,
            random_state=rng, max_iter=200)
        g.fit(X)
        aic = 2 * n_samples * sgh + 2 * g._n_parameters()
        bic = (2 * n_samples * sgh +
               np.log(n_samples) * g._n_parameters())
        bound = n_features / np.sqrt(n_samples)
        assert (g.aic(X) - aic) / n_samples < bound
        assert (g.bic(X) - bic) / n_samples < bound
Exemple #29
0
def test_gaussian_mixture_aic_bic():
    # Test the aic and bic criteria
    rng = np.random.RandomState(0)
    n_samples, n_features, n_components = 50, 3, 2
    X = rng.randn(n_samples, n_features)
    # standard gaussian entropy
    sgh = 0.5 * (fast_logdet(np.cov(X.T, bias=1)) + n_features *
                 (1 + np.log(2 * np.pi)))
    for cv_type in COVARIANCE_TYPE:
        g = GaussianMixture(n_components=n_components,
                            covariance_type=cv_type,
                            random_state=rng,
                            max_iter=200)
        g.fit(X)
        aic = 2 * n_samples * sgh + 2 * g._n_parameters()
        bic = (2 * n_samples * sgh + np.log(n_samples) * g._n_parameters())
        bound = n_features / np.sqrt(n_samples)
        assert_true((g.aic(X) - aic) / n_samples < bound)
        assert_true((g.bic(X) - bic) / n_samples < bound)
Exemple #30
0
def test_bayesian_ridge_score_values():
    """Check value of score on toy example.

    Compute log marginal likelihood with equation (36) in Sparse Bayesian
    Learning and the Relevance Vector Machine (Tipping, 2001):

    - 0.5 * (log |Id/alpha + X.X^T/lambda| +
             y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))
    + lambda_1 * log(lambda) - lambda_2 * lambda
    + alpha_1 * log(alpha) - alpha_2 * alpha

    and check equality with the score computed during training.
    """

    X, y = diabetes.data, diabetes.target
    n_samples = X.shape[0]
    # check with initial values of alpha and lambda (see code for the values)
    eps = np.finfo(np.float64).eps
    alpha_ = 1. / (np.var(y) + eps)
    lambda_ = 1.

    # value of the parameters of the Gamma hyperpriors
    alpha_1 = 0.1
    alpha_2 = 0.1
    lambda_1 = 0.1
    lambda_2 = 0.1

    # compute score using formula of docstring
    score = lambda_1 * log(lambda_) - lambda_2 * lambda_
    score += alpha_1 * log(alpha_) - alpha_2 * alpha_
    M = 1. / alpha_ * np.eye(n_samples) + 1. / lambda_ * np.dot(X, X.T)
    M_inv = pinvh(M)
    score += - 0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) +
                      n_samples * log(2 * np.pi))

    # compute score with BayesianRidge
    clf = BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2,
                        lambda_1=lambda_1, lambda_2=lambda_2,
                        n_iter=1, fit_intercept=False, compute_score=True)
    clf.fit(X, y)

    assert_almost_equal(clf.scores_[0], score, decimal=9)
Exemple #31
0
def _ridge_evidence_fun_grad(theta, X, y, verbose, other):

    nv = theta[0]
    alpha = theta[1]

    XX, Xy, yy = other

    N, p = X.shape
    I = np.eye(p)

    # Prior covariance matrix for current parameter setting
    Cprior = 1. / alpha * I
    Cprior[0, 0] = 0

    invCprior = alpha * I
    invCprior[0, 0] = 0

    # Posterior covariance and mean
    SS = linalg.pinv(XX / nv + invCprior)
    mu = np.dot(SS, Xy) / nv

    # (1) Compute log-evidence
    term1 = .5 * (fast_logdet(2 * np.pi * SS) - p * np.log(2 * np.pi / alpha) -
                  p * np.log(2 * np.pi * nv))
    term2 = -.5 * (yy / nv - np.dot(Xy.T, np.dot(SS, Xy)) / nv**2)
    logE = term1 + term2

    # Gradient with respect to the ridge parameter
    #    dAlpha = .5 * np.trace(1./alpha*I + SS + np.outer(mu, mu))
    dAlpha = p / (2 * alpha) - .5 * np.sum(mu * mu) - .5 * np.trace(SS)

    # Gradient with respect to the noise variance
    SSinvC = np.dot(SS, invCprior)
    rss = yy - 2 * np.dot(mu.T, Xy) + np.dot(mu.T, XX).dot(mu)
    dNsevar = -N / nv + np.trace(I - SSinvC) / nv + rss / nv**2

    dEE = np.array([dNsevar.item(), dAlpha])

    if verbose:
        print("-logE: %0.3f | nv: %0.3f | alpha: %0.3f" % (-logE, nv, alpha))

    return -logE, -dEE
Exemple #32
0
    def score(self, X, y=None):
        """Return a score associated to new data

        Parameters
        ----------
        X: array of shape(n_samples, n_features)
            The data to test

        Returns
        -------
        ll: array of shape (n_samples),
            log-likelihood of each row of X under the current model
        """
        Xr = X - self.mean_
        n_features = X.shape[1]
        log_like = np.zeros(X.shape[0])
        self.precision_ = linalg.inv(self.covariance_)
        log_like = -.5 * (Xr * (np.dot(Xr, self.precision_))).sum(axis=1)
        log_like -= .5 * (fast_logdet(self.covariance_) +
                          n_features * log(2. * np.pi))
        return log_like
Exemple #33
0
def samplewise_log_likelihood(X, mean, precision):
    """Return the log-likelihood of each sample.
        See - http://www.miketipping.com/papers/met-mppca.pdf
        code adapted from https://github.com/scikit-learn/scikit-learn/blob/ed5e127b/sklearn/decomposition/pca.py#L516
        
        Parameters
        ----------
            X : array, shape(n_samples, n_features), the sample data
            mean: float, the mean of the current model 
            precision: array, shape(n_features, n_features), precision matrix of the current model
        
        Returns
        -------
            ll : array, shape (n_samples,) : Log-likelihood of each sample under the current model
    """
    Xr = X - mean
    n_features = X.shape[1]
    log_like = np.zeros(X.shape[0])
    log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
    log_like -= .5 * (n_features * log(2. * np.pi) - fast_logdet(precision))
    return log_like.reshape(-1, 1)
Exemple #34
0
def covsel(x, p, nonZero, C):
    """Objective and gradient for MLE of precision given empirical covariance.

    nonZero is a list of non-zero upper triangle precision matrix entries.
    Based on sparse GGM estimation code by Mark Schmidt.
    """
    X = np.zeros((p, p))
    X[nonZero] = x  # fill the diagonal and upper triangle
    X += np.triu(X, 1).T  # fill the lower triangle

    # Fast Way to compute -logdet(X) + tr(X*C)
    # f = -2*sum(log(diag(R))) + sum(sum(C.*X)) + (lambda/2)*sum(X(:).^2);
    f = -fast_logdet(X) + np.sum(C * X)

    if f < np.inf:
        g = C - linalg.pinvh(X)
        g += np.tril(g, -1).T  # add contribution from lower to upper triangle
        g = g[nonZero]
    else:
        g = 0
    return f, g
Exemple #35
0
    def score(self, X, y=None):
        """Return a score associated to new data

        Parameters
        ----------
        X: array of shape(n_samples, n_features)
            The data to test

        Returns
        -------
        ll: array of shape (n_samples),
            log-likelihood of each row of X under the current model
        """
        Xr = X - self.mean_
        n_features = X.shape[1]
        log_like = np.zeros(X.shape[0])
        self.precision_ = linalg.inv(self.covariance_)
        log_like = -.5 * (Xr * (np.dot(Xr, self.precision_))).sum(axis=1)
        log_like -= .5 * (fast_logdet(self.covariance_)
                          + n_features * log(2. * np.pi))
        return log_like
Exemple #36
0
    def score_samples(self, X):
        """Compute the log-likelihood of each sample
        Parameters
        ----------
        X: array, shape (n_samples, n_features)
            The data
        Returns
        -------
        ll: array, shape (n_samples,)
            Log-likelihood of each sample under the current model
        """
        check_is_fitted(self, 'components_')

        Xr = X - self.mean_
        precision = self.get_precision()
        n_features = X.shape[1]
        log_like = np.zeros(X.shape[0])
        log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
        log_like -= .5 * (n_features * log(2. * np.pi) -
                          fast_logdet(precision))
        return log_like
Exemple #37
0
def kl_loss(covariance, precision):
    """Computes the KL divergence between precision estimate and 
    reference covariance.
    
    The loss is computed as:

        Trace(Theta_1 * Sigma_0) - log(Theta_0 * Sigma_1) - dim(Sigma)

    Parameters
    ----------
    covariance : 2D ndarray (n_features, n_features)
        Maximum Likelihood Estimator of covariance
    
    precision : 2D ndarray (n_features, n_features)
        The precision matrix of the covariance model to be tested
    
    Returns
    -------
    KL-divergence 
    """
    assert covariance.shape == precision.shape
    dim, _ = precision.shape
    logdet_p_dot_c = fast_logdet(np.dot(precision, covariance))
    return 0.5 * (np.sum(precision * covariance) - logdet_p_dot_c - dim)
Exemple #38
0
def _c_step(X,
            n_support,
            random_state,
            remaining_iterations=30,
            initial_estimates=None,
            verbose=False,
            cov_computation_method=empirical_covariance):
    n_samples, n_features = X.shape
    dist = np.inf

    # Initialisation
    support = np.zeros(n_samples, dtype=bool)
    if initial_estimates is None:
        # compute initial robust estimates from a random subset
        support[random_state.permutation(n_samples)[:n_support]] = True
    else:
        # get initial robust estimates from the function parameters
        location = initial_estimates[0]
        covariance = initial_estimates[1]
        # run a special iteration for that case (to get an initial support)
        precision = linalg.pinvh(covariance)
        X_centered = X - location
        dist = (np.dot(X_centered, precision) * X_centered).sum(1)
        # compute new estimates
        support[np.argsort(dist)[:n_support]] = True

    X_support = X[support]
    location = X_support.mean(0)
    covariance = cov_computation_method(X_support)

    # Iterative procedure for Minimum Covariance Determinant computation
    det = fast_logdet(covariance)
    # If the data already has singular covariance, calculate the precision,
    # as the loop below will not be entered.
    if np.isinf(det):
        precision = linalg.pinvh(covariance)

    previous_det = np.inf
    while (det < previous_det and remaining_iterations > 0
           and not np.isinf(det)):
        # save old estimates values
        previous_location = location
        previous_covariance = covariance
        previous_det = det
        previous_support = support
        # compute a new support from the full data set mahalanobis distances
        precision = linalg.pinvh(covariance)
        X_centered = X - location
        dist = (np.dot(X_centered, precision) * X_centered).sum(axis=1)
        # compute new estimates
        support = np.zeros(n_samples, dtype=bool)
        support[np.argsort(dist)[:n_support]] = True
        X_support = X[support]
        location = X_support.mean(axis=0)
        covariance = cov_computation_method(X_support)
        det = fast_logdet(covariance)
        # update remaining iterations for early stopping
        remaining_iterations -= 1

    previous_dist = dist
    dist = (np.dot(X - location, precision) * (X - location)).sum(axis=1)
    # Check if best fit already found (det => 0, logdet => -inf)
    if np.isinf(det):
        results = location, covariance, det, support, dist
    # Check convergence
    if np.allclose(det, previous_det):
        # c_step procedure converged
        if verbose:
            print("Optimal couple (location, covariance) found before"
                  " ending iterations (%d left)" % (remaining_iterations))
        results = location, covariance, det, support, dist
    elif det > previous_det:
        # determinant has increased (should not happen)
        warnings.warn(
            "Determinant has increased; this should not happen: "
            "log(det) > log(previous_det) (%.15f > %.15f). "
            "You may want to try with a higher value of "
            "support_fraction (current value: %.3f)." %
            (det, previous_det, n_support / n_samples), RuntimeWarning)
        results = previous_location, previous_covariance, \
            previous_det, previous_support, previous_dist

    # Check early stopping
    if remaining_iterations == 0:
        if verbose:
            print('Maximum number of iterations reached')
        results = location, covariance, det, support, dist

    return results
Exemple #39
0
    def dot_precision(self, X, logdet=False):
        """Compute the dot product of a matrix X by the data
        precision matrix with the generative model.

        Returns
        -------
        Y   : array, shape=(n_samples, n_features)
          =X*precision
        """
        n_features = self.components_.shape[1]

        # handle corner cases first
        if self.n_components_ == 0:

            if logdet:
                #import pdb;pdb.set_trace()
                if np.isscalar(self.noise_variance_):
                    logdet_cov = np.log(self.noise_variance_) * X.shape[1]
                else:
                    assert self.noise_variance_.shape[0] == X.shape[
                        1], "self.noise_variance_.shape[0] == X.shape[1]"
                    logdet_cov = np.log(self.noise_variance_).sum()
                return X / self.noise_variance_, logdet_cov
            else:
                return X / self.noise_variance_
        if self.n_components_ == n_features:
            covariance = self.get_covariance()
            if logdet:
                return X.dot(linalg.inv(covariance)), fast_logdet(covariance)
            else:
                return X.dot(linalg.inv(covariance))
        # Get precision using matrix inversion lemma
        components_ = self.components_
        exp_var = self.explained_variance_
        if self.whiten:
            components_ = components_ * np.sqrt(exp_var[:, np.newaxis])
        exp_var_diff = np.maximum(exp_var - self.noise_variance_, 0.)
        Xprecision = (1.0 / exp_var_diff) + (1.0 / self.noise_variance_)
        if logdet:
            #import pdb;pdb.set_trace()
            if np.isscalar(self.noise_variance_):
                logdet_cov = np.log(self.noise_variance_) * X.shape[1]
            else:
                assert self.noise_variance_.shape[0] == X.shape[
                    1], "self.noise_variance_.shape[0] == X.shape[1]"
                logdet_cov = np.log(self.noise_variance_).sum()
            logdet_cov += np.log(
                ((1.0 / exp_var_diff) + (1.0 / self.noise_variance_))).sum()
            logdet_cov += np.log(exp_var_diff).sum()

        Xprecision *= (self.noise_variance_ * self.noise_variance_)
        Xprecision = (X.dot(components_.T / (-Xprecision))).dot(components_)
        Xprecision += X / self.noise_variance_
        #cprecision=((1.0 / exp_var_diff) + (1.0 / self.noise_variance_))*(-self.noise_variance_ * self.noise_variance_)
        #cprecision = (components_.T/cprecision).dot(components_)
        #cprecision.flat[::len(cprecision) + 1] += 1. / self.noise_variance_
        #Xcprecision = X.dot(cprecision)
        if logdet:
            return Xprecision, logdet_cov
        else:
            return Xprecision
Exemple #40
0
 def objective_function(self, data, location, covariance):
     """
     """
     det = fast_logdet(covariance)
     return det
Exemple #41
0
def c_step(X, h, objective_function, initial_estimates, verbose=False,
           cov_computation_method=empirical_covariance):
    """C_step procedure described in [1] aiming at computing the MCD

    Parameters
    ----------
    X: array-like, shape (n_samples, n_features)
      Data set in which we look for the h observations whose scatter matrix
      has minimum determinant
    h: int, > n_samples / 2
      Number of observations to compute the ribust estimates of location
      and covariance from.
    remaining_iterations: int
      Number of iterations to perform.
      According to Rousseeuw [1], two iterations are sufficient to get close
      to the minimum, and we never need more than 30 to reach convergence.
    initial_estimates: 2-tuple
      Initial estimates of location and shape from which to run the c_step
      procedure:
      - initial_estimates[0]: an initial location estimate
      - initial_estimates[1]: an initial covariance estimate
    verbose: boolean
      Verbose mode

    Returns
    -------
    location: array-like, shape (n_features,)
      Robust location estimates
    covariance: array-like, shape (n_features, n_features)
      Robust covariance estimates
    support: array-like, shape (n_samples,)
      A mask for the `h` observations whose scatter matrix has minimum
      determinant

    Notes
    -----
    References:
    [1] A Fast Algorithm for the Minimum Covariance Determinant Estimator,
        1999, American Statistical Association and the American Society
        for Quality, TECHNOMETRICS

    """
    n_samples, n_features = X.shape
    n_iter = 30
    remaining_iterations = 30

    # Get initial robust estimates from the function parameters
    location = initial_estimates[0]
    covariance = initial_estimates[1]
    # run a special iteration for that case (to get an initial support)
    precision = pinvh(covariance)
    X_centered = X - location
    dist = (np.dot(X_centered, precision) * X_centered).sum(1)
    # compute new estimates
    support = np.zeros(n_samples).astype(bool)
    support[np.argsort(dist)[:h]] = True
    location = X[support].mean(0)
    covariance = cov_computation_method(X[support])
    previous_obj = np.inf

    # Iterative procedure for Minimum Covariance Determinant computation
    obj = objective_function(X[support], location, covariance)
    while (obj < previous_obj) and (remaining_iterations > 0):
        # save old estimates values
        previous_location = location
        previous_covariance = covariance
        previous_obj = obj
        previous_support = support
        # compute a new support from the full data set mahalanobis distances
        precision = pinvh(covariance)
        X_centered = X - location
        dist = (np.dot(X_centered, precision) * X_centered).sum(1)
        # compute new estimates
        support = np.zeros(n_samples).astype(bool)
        support[np.argsort(dist)[:h]] = True
        location = X[support].mean(axis=0)
        covariance = cov_computation_method(X[support])
        obj = objective_function(X[support], location, covariance)
        # update remaining iterations for early stopping
        remaining_iterations -= 1

    # Catch computation errors
    if np.isinf(obj):
        raise ValueError(
            "Singular covariance matrix. "
            "Please check that the covariance matrix corresponding "
            "to the dataset is full rank and that MCD is used with "
            "Gaussian-distributed data (or at least data drawn from a "
            "unimodal, symetric distribution.")
    # Check convergence
    if np.allclose(obj, previous_obj):
        # c_step procedure converged
        if verbose:
            print "Optimal couple (location, covariance) found before" \
                "ending iterations (%d left)" % (remaining_iterations)
        results = location, covariance, obj, support
    elif obj > previous_obj:
        # objective function has increased (should not happen)
        current_iter = n_iter - remaining_iterations
        warnings.warn("Warning! obj > previous_obj (%.15f > %.15f, iter=%d)" \
                          % (obj, previous_obj, current_iter), RuntimeWarning)
        results = previous_location, previous_covariance, \
            previous_obj, previous_support

    # Check early stopping
    if remaining_iterations == 0:
        if verbose:
            print 'Maximum number of iterations reached'
        obj = fast_logdet(covariance)
        results = location, covariance, obj, support

    return results
Exemple #42
0
    def _lca(self, X, max_iter=100, regularization=0, tol=1e-10):
        """
        """
        n_samples, n_features = X.shape
        if regularization == np.inf:
            # Use identity matrix if Ledoit-Wolf shrinkage == 1
            print "/!\ use identity matrix"
            coeff = np.trace(empirical_covariance(X)) / float(n_features)
            self.cov_ = coeff * np.eye(n_features)
            prec_ = self.cov_
            # learn the kernel
            dist = np.zeros((n_samples, self.support.shape[0]))
            for i, x in enumerate(X):
                for j, t in enumerate(self.support):
                    dist[i, j] = distance.mahalanobis(x, t, prec_)
            self.kernel = np.exp(-.5 * dist)
            # decompose the kernel
            U, D, V = linalg.svd(self.kernel)
            self.U = U
            self.D = D
            return self.cov_

        # LCA algorithm starts
        cov_gauss = empirical_covariance(X)
        cov_gauss.flat[::n_features + 1] += regularization
        # EM loop
        # The last iteration is there to compute the final log-likelihood
        mean_loglike = -np.inf
        for l in xrange(max_iter + 1):
            xax = np.dot(X, np.dot(linalg.pinv(cov_gauss), X.T))
            dxax = np.diag(xax).reshape((-1, 1))

            logK = -.5 * (dxax + dxax.T - 2. * xax)
            # each datapoint cannot use itself
            logK.flat[::n_samples + 1] = -np.inf
            K = np.exp(logK)

            loglik1 = -.5 * fast_logdet(cov_gauss)
            loglik2 = np.log(np.sum(K)) - np.log(n_samples - 1)
            loglik3 = -n_features / (2. * np.log(2. * np.pi))
            loglike = loglik1 + loglik2 + loglik3
            old_mean_loglike = mean_loglike
            mean_loglike = np.mean(loglike)
            if self.verbose:
                print "\tIteration %d, loglike = %g" % (l, mean_loglike)

            if l < max_iter:
                if mean_loglike - old_mean_loglike < tol:
                    #print "Convergence reached (iteration %d)" % l
                    break
                # row-normalize the responsibilities
                B = K / np.sum(K, 1)
                Bsum = np.sum(B, 0) + np.sum(B, 1)
                cov_gauss = np.dot(X.T, np.dot(np.diag(Bsum) - B - B.T, X)) \
                    / float(n_samples)
                cov_gauss.flat[::n_features + 1] += regularization
        self.responsibilities = K
        self.cov_ = cov_gauss

        # learn the kernel for further decision/prediction
        prec_ = linalg.pinv(self.cov_)
        dist = np.zeros((n_samples, self.support.shape[0]))
        for i, x in enumerate(X):
            for j, t in enumerate(self.support):
                dist[i, j] = distance.mahalanobis(x, t, prec_)
        self.kernel = np.exp(-.5 * dist)
        # decompose the kernel
        U, D, V = linalg.svd(self.kernel)
        self.U = U
        self.D = D

        return cov_gauss
Exemple #43
0
def log_likelihood_t(emp_cov, precision):
    """Gaussian log-likelihood without constant term in time"""
    score = 0
    for e, p in zip(emp_cov, precision):
        score += fast_logdet(p) - np.sum(e * p)
    return score
Exemple #44
0
def group_sparse_scores(precisions,
                        n_samples,
                        emp_covs,
                        alpha,
                        duality_gap=False,
                        debug=False):
    """Compute scores used by group_sparse_covariance.

    The log-likelihood of a given list of empirical covariances /
    precisions.

    Parameters
    ----------
    precisions : numpy.ndarray, shape (n_features, n_features, n_subjects)
        estimated precisions.

    n_samples : array-like, shape (n_subjects,)
        number of samples used in estimating each subject in "precisions".
        n_samples.sum() must be equal to 1.

    emp_covs : numpy.ndarray, shape (n_features, n_features, n_subjects)
        empirical covariance matrix

    alpha : float
        regularization parameter

    duality_gap : bool, optional
        if True, also returns a duality gap upper bound.

    debug : bool, optional
        if True, some consistency checks are performed to help solving
        numerical problems

    Returns
    -------
    log_lik : float
        log-likelihood of precisions on the given covariances. This is the
        opposite of the loss function, without the regularization term

    objective : float
        value of objective function. This is the value minimized by
        group_sparse_covariance()

    duality_gap : float
        duality gap upper bound. The returned bound is tight: it vanishes for
        the optimal precision matrices
    """

    n_features, _, n_subjects = emp_covs.shape

    log_lik = 0
    for k in range(n_subjects):
        log_lik_k = -np.sum(emp_covs[..., k] * precisions[..., k])
        log_lik_k += fast_logdet(precisions[..., k])
        log_lik += n_samples[k] * log_lik_k

    l2 = np.sqrt((precisions**2).sum(axis=-1))
    l12 = l2.sum() - np.diag(l2).sum()  # Do not count diagonal terms
    objective = alpha * l12 - log_lik
    ret = (log_lik, objective)

    # Compute duality gap if requested
    if duality_gap is True:
        A = np.empty(precisions.shape, dtype=np.float, order="F")
        for k in range(n_subjects):
            # TODO: can be computed more efficiently using W_inv. See
            # Friedman, Jerome, Trevor Hastie, and Robert Tibshirani.
            # 'Sparse Inverse Covariance Estimation with the Graphical Lasso'.
            # Biostatistics 9, no. 3 (1 July 2008): 432-441.
            precisions_inv = scipy.linalg.inv(precisions[..., k])
            if debug:
                assert is_spd(precisions_inv)

            A[..., k] = n_samples[k] * (precisions_inv - emp_covs[..., k])

            if debug:
                np.testing.assert_almost_equal(A[..., k], A[..., k].T)

        # Project A on the set of feasible points
        alpha_max = np.sqrt((A**2).sum(axis=-1))
        mask = alpha_max > alpha
        for k in range(A.shape[-1]):
            A[mask, k] *= alpha / alpha_max[mask]
            # Set zeros on diagonals. Essential to get an always positive
            # duality gap.
            A[..., k].flat[::A.shape[0] + 1] = 0

        alpha_max = np.sqrt((A**2).sum(axis=-1)).max()
        dual_obj = 0  # dual objective
        for k in range(n_subjects):
            B = emp_covs[..., k] + A[..., k] / n_samples[k]
            dual_obj += n_samples[k] * (n_features + fast_logdet(B))

        # The previous computation can lead to a non-feasible point, because
        # one of the Bs may not be positive definite.
        # Use another value in this case, that ensure positive definiteness
        # of B. The upper bound on the duality gap is not tight in the
        # following, but is smaller than infinity, which is better in any case.
        if not np.isfinite(dual_obj):
            for k in range(n_subjects):
                A[..., k] = -n_samples[k] * emp_covs[..., k]
                A[..., k].flat[::A.shape[0] + 1] = 0
            alpha_max = np.sqrt((A**2).sum(axis=-1)).max()
            # the second value (0.05 is arbitrary: positive in ]0,1[)
            gamma = min((alpha / alpha_max, 0.05))
            dual_obj = 0
            for k in range(n_subjects):
                # add gamma on the diagonal
                B = ((1. - gamma) * emp_covs[..., k] +
                     gamma * np.eye(emp_covs.shape[0]))
                dual_obj += n_samples[k] * (n_features + fast_logdet(B))

        gap = objective - dual_obj
        ret = ret + (gap, )
    return ret
def group_sparse_scores(precisions, n_samples, emp_covs, alpha,
                        duality_gap=False, debug=False):
    """Compute scores used by group_sparse_covariance.

    The log-likelihood of a given list of empirical covariances /
    precisions.

    Parameters
    ----------
    precisions : numpy.ndarray, shape (n_features, n_features, n_subjects)
        estimated precisions.

    n_samples : array-like, shape (n_subjects,)
        number of samples used in estimating each subject in "precisions".
        n_samples.sum() must be equal to 1.

    emp_covs : numpy.ndarray, shape (n_features, n_features, n_subjects)
        empirical covariance matrix

    alpha : float
        regularization parameter

    duality_gap : bool, optional
        if True, also returns a duality gap upper bound.

    debug : bool, optional
        if True, some consistency checks are performed to help solving
        numerical problems

    Returns
    -------
    log_lik : float
        log-likelihood of precisions on the given covariances. This is the
        opposite of the loss function, without the regularization term

    objective : float
        value of objective function. This is the value minimized by
        group_sparse_covariance()

    duality_gap : float
        duality gap upper bound. The returned bound is tight: it vanishes for
        the optimal precision matrices
    """

    n_features, _, n_subjects = emp_covs.shape

    log_lik = 0
    for k in range(n_subjects):
        log_lik_k = - np.sum(emp_covs[...,  k] * precisions[..., k]) 
        log_lik_k += fast_logdet(precisions[..., k])
        log_lik += n_samples[k] * log_lik_k

    l2 = np.sqrt((precisions ** 2).sum(axis=-1))
    l12 = l2.sum() - np.diag(l2).sum()  # Do not count diagonal terms
    objective = alpha * l12 - log_lik
    ret = (log_lik, objective)

    # Compute duality gap if requested
    if duality_gap is True:
        A = np.empty(precisions.shape, dtype=np.float, order="F")
        for k in range(n_subjects):
            # TODO: can be computed more efficiently using W_inv. See
            # Friedman, Jerome, Trevor Hastie, and Robert Tibshirani.
            # 'Sparse Inverse Covariance Estimation with the Graphical Lasso'.
            # Biostatistics 9, no. 3 (1 July 2008): 432-441.
            precisions_inv = scipy.linalg.inv(precisions[..., k])
            if debug:
                assert is_spd(precisions_inv)

            A[..., k] = n_samples[k] * (precisions_inv - emp_covs[..., k])

            if debug:
                np.testing.assert_almost_equal(A[..., k], A[..., k].T)

        # Project A on the set of feasible points
        alpha_max = np.sqrt((A ** 2).sum(axis=-1))
        mask = alpha_max > alpha
        for k in range(A.shape[-1]):
            A[mask, k] *= alpha / alpha_max[mask]
            # Set zeros on diagonals. Essential to get an always positive
            # duality gap.
            A[..., k].flat[::A.shape[0] + 1] = 0

        alpha_max = np.sqrt((A ** 2).sum(axis=-1)).max()
        dual_obj = 0  # dual objective
        for k in range(n_subjects):
            B = emp_covs[..., k] + A[..., k] / n_samples[k]
            dual_obj += n_samples[k] * (n_features + fast_logdet(B))

        # The previous computation can lead to a non-feasible point, because
        # one of the Bs may not be positive definite.
        # Use another value in this case, that ensure positive definiteness
        # of B. The upper bound on the duality gap is not tight in the
        # following, but is smaller than infinity, which is better in any case.
        if not np.isfinite(dual_obj):
            for k in range(n_subjects):
                A[..., k] = - n_samples[k] * emp_covs[..., k]
                A[..., k].flat[::A.shape[0] + 1] = 0
            alpha_max = np.sqrt((A ** 2).sum(axis=-1)).max()
            # the second value (0.05 is arbitrary: positive in ]0,1[)
            gamma = min((alpha / alpha_max, 0.05))
            dual_obj = 0
            for k in range(n_subjects):
                # add gamma on the diagonal
                B = ((1. - gamma) * emp_covs[..., k]
                     + gamma * np.eye(emp_covs.shape[0]))
                dual_obj += n_samples[k] * (n_features + fast_logdet(B))

        gap = objective - dual_obj
        ret = ret + (gap,)
    return ret