def likelihood_score(X, precision_): # compute empirical covariance of the test set location_ = X.mean(1).reshape(X.shape[0], 1, X.shape[2]) test_cov = np.array( [empirical_covariance(x, assume_centered=True) for x in X - location_]) res = sum(log_likelihood(S, K) for S, K in zip(test_cov, precision_)) return res
def _objective(mle, precision_, alpha): """Evaluation of the graphical-lasso objective function the objective function is made of a shifted scaled version of the normalized log-likelihood (i.e. its empirical mean over the samples) and a penalisation term to promote sparsity """ p = precision_.shape[0] cost = -2. * log_likelihood(mle, precision_) + p * np.log(2 * np.pi) cost += alpha * (np.abs(precision_).sum() - np.abs(np.diag(precision_)).sum()) return cost
def score(self, X, y): """Computes the log-likelihood of a Gaussian data set with `self.covariance_` as an estimator of its covariance matrix. Parameters ---------- X : array-like, shape = (n_samples, n_features) Test data of which we compute the likelihood, where n_samples is the number of samples and n_features is the number of features. X is assumed to be drawn from the same distribution than the data used in fit (including centering). y : array-like, shape = (n_samples,) Class of samples. Returns ------- res : float The likelihood of the data set with `self.covariance_` as an estimator of its covariance matrix. """ # Covariance does not make sense for a single feature X, y = check_X_y(X, y, accept_sparse=False, dtype=np.float64, order="C", ensure_min_features=2, estimator=self) # compute empirical covariance of the test set test_cov = np.array([ empirical_covariance(X[y == cl] - self.location_[i], assume_centered=True) for i, cl in enumerate(self.classes_) ]) res = sum(X[y == cl].shape[0] * log_likelihood(S, K) for S, K, cl in zip(test_cov, self.get_observed_precision(), self.classes_)) return res
def score(self, X_test, y=None): """Computes the log-likelihood of a Gaussian data set with `self.covariance_` as an estimator of its covariance matrix. Parameters ---------- X_test : array-like, shape = [n_samples, n_features] Test data of which we compute the likelihood, where n_samples is the number of samples and n_features is the number of features. X_test is assumed to be drawn from the same distribution than the data used in fit (including centering). y : not used, present for API consistence purpose. Returns ------- res : float The likelihood of the data set with `self.covariance_` as an estimator of its covariance matrix. """ if not self.bypass_transpose: X_test = X_test.transpose(2, 0, 1) # put time as first dimension # compute empirical covariance of the test set test_cov = np.array([ empirical_covariance(x, assume_centered=True) for x in X_test - self.location_ ]) res = sum( log_likelihood(S, K) for S, K in zip(test_cov, self.get_observed_precision())) # ALLA MATLAB1 # ranks = [np.linalg.matrix_rank(L) for L in self.latent_] # scores_ranks = np.square(ranks-np.sqrt(L.shape[1])) return res # - np.sum(scores_ranks)
def set_optimal_shrinkage_amount(self, X, method="cv", verbose=False): """Set optimal shrinkage amount according to chosen method. /!\ Could be rewritten with GridSearchCV. Parameters ---------- X: array-like, shape = [n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. method: float or str in {"cv", "lw", "oas"}, The method used to set the shrinkage. If a floating value is provided that value is used. Otherwise, the selection is made according to the selected method. "cv" (default): 10-fold cross-validation. (or Leave-One Out cross-validation if n_samples < 10) "lw": Ledoit-Wolf criterion "oas": OAS criterion verbose: bool, Verbose mode or not. Returns ------- optimal_shrinkage: float, The optimal amount of shrinkage. """ n_samples, n_features = X.shape if isinstance(method, str): std_shrinkage = np.trace(empirical_covariance(X)) / \ (n_features * n_samples) self.std_shrinkage = std_shrinkage if method == "cv": from sklearn.covariance import log_likelihood n_samples, n_features = X.shape shrinkage_range = np.concatenate( ([0.], 10.**np.arange(-n_samples / n_features, -1, 0.5), np.arange(0.05, 1., 0.05), np.arange(1., 20., 1.), np.arange(20., 100, 5.), 10.**np.arange(2, 7, 0.5))) # get a "pure" active set with a standard shrinkage active_set_estimator = RMCDl2(shrinkage=std_shrinkage) active_set_estimator.fit(X) active_set = np.where(active_set_estimator.support_)[0] # split this active set in ten parts active_set = active_set[np.random.permutation(active_set.size)] if active_set.size >= 10: # ten fold cross-validation n_folds = 10 fold_size = active_set.size / 10 else: n_folds = active_set.size fold_size = 1 log_likelihoods = np.zeros((shrinkage_range.size, n_folds)) if verbose: print "*** Cross-validation" for trial in range(n_folds): if verbose: print trial / float(n_folds) # define train and test sets train_set_indices = np.concatenate( (np.arange(0, fold_size * trial), np.arange(fold_size * (trial + 1), n_folds * fold_size))) train_set = X[active_set[train_set_indices]] test_set = X[active_set[np.arange(fold_size * trial, fold_size * (trial + 1))]] # learn location and covariance estimates from train set # for several amounts of shrinkage for i, shrinkage in enumerate(shrinkage_range): location = test_set.mean(0) cov = empirical_covariance(train_set) cov.flat[::(n_features + 1)] += shrinkage * std_shrinkage # compute test data likelihood log_likelihoods[i, trial] = log_likelihood( empirical_covariance(test_set - location, assume_centered=True), pinvh(cov)) optimal_shrinkage = shrinkage_range[np.argmax( log_likelihoods.mean(1))] self.shrinkage = optimal_shrinkage * std_shrinkage self.shrinkage_cst = optimal_shrinkage if verbose: print "optimal shrinkage: %g (%g x lambda(= %g))" \ % (self.shrinkage, optimal_shrinkage, std_shrinkage) self.log_likelihoods = log_likelihoods self.shrinkage_range = shrinkage_range return shrinkage_range, log_likelihoods elif method == "oas": from sklearn.covariance import OAS rmcd = self.__init__(shrinkage=std_shrinkage) support = rmcd.fit(X).support_ oas = OAS().fit(X[support]) if oas.shrinkage_ == 1: self.shrinkage_cst = np.inf else: self.shrinkage_cst = oas.shrinkage_ / (1. - oas.shrinkage_) self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features elif method == "lw": from sklearn.covariance import LedoitWolf rmcd = RMCDl2(self, h=self.h, shrinkage=std_shrinkage) support = rmcd.fit(X).support_ lw = LedoitWolf().fit(X[support]) if lw.shrinkage_ == 1: self.shrinkage_cst = np.inf else: self.shrinkage_cst = lw.shrinkage_ / (1. - lw.shrinkage_) self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features else: pass return
X_train = np.dot(base_X_train, coloring_matrix) X_test = np.dot(base_X_test, coloring_matrix) # ############################################################################# # Compute the likelihood on test data # spanning a range of possible shrinkage coefficient values shrinkages = np.logspace(-2, 0, 30) negative_logliks = [-ShrunkCovariance(shrinkage=s).fit(X_train).score(X_test) for s in shrinkages] # under the ground-truth model, which we would not have access to in real # settings real_cov = np.dot(coloring_matrix.T, coloring_matrix) emp_cov = empirical_covariance(X_train) loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov)) # ############################################################################# # Compare different approaches to setting the parameter # GridSearch for an optimal shrinkage coefficient tuned_parameters = [{'shrinkage': shrinkages}] cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5) cv.fit(X_train) # Ledoit-Wolf optimal shrinkage coefficient estimate lw = LedoitWolf() loglik_lw = lw.fit(X_train).score(X_test) # OAS coefficient estimate oa = OAS()
def graphical_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4, enet_tol=1e-4, max_iter=100, verbose=False, return_costs=False, eps=np.finfo(np.float64).eps, return_n_iter=False, isRLA=False): """l1-penalized covariance estimator Read more in the :ref:`User Guide <sparse_inverse_covariance>`. Parameters ---------- emp_cov : 2D ndarray, shape (n_features, n_features) Empirical covariance from which to compute the covariance estimate. alpha : positive float The regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. cov_init : 2D array (n_features, n_features), optional The initial guess for the covariance. mode : {'cd', 'lars'} The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable. tol : positive float, optional The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. enet_tol : positive float, optional The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. max_iter : integer, optional The maximum number of iterations. verbose : boolean, optional If verbose is True, the objective function and dual gap are printed at each iteration. return_costs : boolean, optional If return_costs is True, the objective function and dual gap at each iteration are returned. eps : float, optional The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. return_n_iter : bool, optional Whether or not to return the number of iterations. Returns ------- covariance : 2D ndarray, shape (n_features, n_features) The estimated covariance matrix. precision : 2D ndarray, shape (n_features, n_features) The estimated (sparse) precision matrix. costs : list of (objective, dual_gap) pairs The list of values of the objective function and the dual gap at each iteration. Returned only if return_costs is True. n_iter : int Number of iterations. Returned only if `return_n_iter` is set to True. See Also -------- GraphicalLasso, GraphicalLassoCV Notes ----- The algorithm employed to solve this problem is the GLasso algorithm, from the Friedman 2008 Biostatistics paper. It is the same algorithm as in the R `glasso` package. One possible difference with the `glasso` R package is that the diagonal coefficients are not penalized. """ _, n_features = emp_cov.shape if alpha == 0: if return_costs: precision_ = linalg.inv(emp_cov) cost = -2. * log_likelihood(emp_cov, precision_) cost += n_features * np.log(2 * np.pi) d_gap = np.sum(emp_cov * precision_) - n_features if return_n_iter: return emp_cov, precision_, (cost, d_gap), 0 else: return emp_cov, precision_, (cost, d_gap) else: if return_n_iter: return emp_cov, linalg.inv(emp_cov), 0 else: return emp_cov, linalg.inv(emp_cov) if cov_init is None: covariance_ = emp_cov.copy() else: covariance_ = cov_init.copy() # As a trivial regularization (Tikhonov like), we scale down the # off-diagonal coefficients of our starting point: This is needed, as # in the cross-validation the cov_init can easily be # ill-conditioned, and the CV loop blows. Beside, this takes # conservative stand-point on the initial conditions, and it tends to # make the convergence go faster. covariance_ *= 0.95 diagonal = emp_cov.flat[::n_features + 1] covariance_.flat[::n_features + 1] = diagonal precision_ = linalg.pinvh(covariance_) indices = np.arange(n_features) costs = list() # The different l1 regression solver have different numerical errors if mode == 'cd': errors = dict(over='raise', invalid='ignore') else: errors = dict(invalid='raise') try: # be robust to the max_iter=0 edge case, see: # https://github.com/scikit-learn/scikit-learn/issues/4134 d_gap = np.inf # set a sub_covariance buffer sub_covariance = np.copy(covariance_[1:, 1:], order='C') for i in range(max_iter): for idx in range(n_features): # To keep the contiguous matrix `sub_covariance` equal to # covariance_[indices != idx].T[indices != idx] # we only need to update 1 column and 1 line when idx changes if idx > 0: di = idx - 1 sub_covariance[di] = covariance_[di][indices != idx] sub_covariance[:, di] = covariance_[:, di][indices != idx] else: sub_covariance[:] = covariance_[1:, 1:] row = emp_cov[idx, indices != idx] with np.errstate(**errors): if mode == 'cd': # Use coordinate descent coefs = -(precision_[indices != idx, idx] / (precision_[idx, idx] + 1000 * eps)) # TODO: swap in RLA LASSO with the following enet_coordinate_descent_gram() function! if isRLA: coefs = SLRviaRP(sub_covariance, np.matmul( fractional_matrix_power( sub_covariance, -0.5), row), 0.01, 0.05, 30, 1.1, 100, gamma=0) else: coefs, _, _, _ = enet_coordinate_descent_gram( coefs, alpha, 0, sub_covariance, row, row, max_iter, enet_tol, check_random_state(None), False) else: # Use LARS _, _, coefs = lars_path_gram(Xy=row, Gram=sub_covariance, n_samples=row.size, alpha_min=alpha / (n_features - 1), copy_Gram=True, eps=eps, method='lars', return_path=False) # Update the precision matrix precision_[idx, idx] = ( 1. / (covariance_[idx, idx] - np.dot(covariance_[indices != idx, idx], coefs))) precision_[indices != idx, idx] = (-precision_[idx, idx] * coefs) precision_[idx, indices != idx] = (-precision_[idx, idx] * coefs) coefs = np.dot(sub_covariance, coefs) covariance_[idx, indices != idx] = coefs covariance_[indices != idx, idx] = coefs if not np.isfinite(precision_.sum()): raise FloatingPointError('The system is too ill-conditioned ' 'for this solver') d_gap = _dual_gap(emp_cov, precision_, alpha) cost = _objective(emp_cov, precision_, alpha) if verbose: print('[graphical_lasso] Iteration ' '% 3i, cost % 3.2e, dual gap %.3e' % (i, cost, d_gap)) if return_costs: costs.append((cost, d_gap)) if np.abs(d_gap) < tol: break if not np.isfinite(cost) and i > 0: raise FloatingPointError('Non SPD result: the system is ' 'too ill-conditioned for this solver') else: warnings.warn( 'graphical_lasso: did not converge after ' '%i iteration: dual gap: %.3e' % (max_iter, d_gap), ConvergenceWarning) except FloatingPointError as e: e.args = (e.args[0] + '. The system is too ill-conditioned for this solver', ) raise e if return_costs: if return_n_iter: return covariance_, precision_, costs, i + 1 else: return covariance_, precision_, costs else: if return_n_iter: return covariance_, precision_, i + 1 else: return covariance_, precision_
def set_optimal_shrinkage_amount(self, X, method="cv", verbose=False): """Set optimal shrinkage amount according to chosen method. /!\ Could be rewritten with GridSearchCV. Parameters ---------- X: array-like, shape = [n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. method: float or str in {"cv", "lw", "oas"}, The method used to set the shrinkage. If a floating value is provided that value is used. Otherwise, the selection is made according to the selected method. "cv" (default): 10-fold cross-validation. (or Leave-One Out cross-validation if n_samples < 10) "lw": Ledoit-Wolf criterion "oas": OAS criterion verbose: bool, Verbose mode or not. Returns ------- optimal_shrinkage: float, The optimal amount of shrinkage. """ n_samples, n_features = X.shape if isinstance(method, str): std_shrinkage = np.trace(empirical_covariance(X)) / \ (n_features * n_samples) self.std_shrinkage = std_shrinkage if method == "cv": from sklearn.covariance import log_likelihood n_samples, n_features = X.shape shrinkage_range = np.concatenate(( [0.], 10. ** np.arange(-n_samples / n_features, -1, 0.5), np.arange(0.05, 1., 0.05), np.arange(1., 20., 1.), np.arange(20., 100, 5.), 10. ** np.arange(2, 7, 0.5))) # get a "pure" active set with a standard shrinkage active_set_estimator = RMCDl2(shrinkage=std_shrinkage) active_set_estimator.fit(X) active_set = np.where(active_set_estimator.support_)[0] # split this active set in ten parts active_set = active_set[np.random.permutation(active_set.size)] if active_set.size >= 10: # ten fold cross-validation n_folds = 10 fold_size = active_set.size / 10 else: n_folds = active_set.size fold_size = 1 log_likelihoods = np.zeros((shrinkage_range.size, n_folds)) if verbose: print "*** Cross-validation" for trial in range(n_folds): if verbose: print trial / float(n_folds) # define train and test sets train_set_indices = np.concatenate( (np.arange(0, fold_size * trial), np.arange(fold_size * (trial + 1), n_folds * fold_size))) train_set = X[active_set[train_set_indices]] test_set = X[active_set[np.arange( fold_size * trial, fold_size * (trial + 1))]] # learn location and covariance estimates from train set # for several amounts of shrinkage for i, shrinkage in enumerate(shrinkage_range): location = test_set.mean(0) cov = empirical_covariance(train_set) cov.flat[::(n_features + 1)] += shrinkage * std_shrinkage # compute test data likelihood log_likelihoods[i, trial] = log_likelihood( empirical_covariance(test_set - location, assume_centered=True), pinvh(cov)) optimal_shrinkage = shrinkage_range[ np.argmax(log_likelihoods.mean(1))] self.shrinkage = optimal_shrinkage * std_shrinkage self.shrinkage_cst = optimal_shrinkage if verbose: print "optimal shrinkage: %g (%g x lambda(= %g))" \ % (self.shrinkage, optimal_shrinkage, std_shrinkage) self.log_likelihoods = log_likelihoods self.shrinkage_range = shrinkage_range return shrinkage_range, log_likelihoods elif method == "oas": from sklearn.covariance import OAS rmcd = self.__init__(shrinkage=std_shrinkage) support = rmcd.fit(X).support_ oas = OAS().fit(X[support]) if oas.shrinkage_ == 1: self.shrinkage_cst = np.inf else: self.shrinkage_cst = oas.shrinkage_ / (1. - oas.shrinkage_) self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features elif method == "lw": from sklearn.covariance import LedoitWolf rmcd = RMCDl2(self, h=self.h, shrinkage=std_shrinkage) support = rmcd.fit(X).support_ lw = LedoitWolf().fit(X[support]) if lw.shrinkage_ == 1: self.shrinkage_cst = np.inf else: self.shrinkage_cst = lw.shrinkage_ / (1. - lw.shrinkage_) self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features else: pass return