def test_lars_path_gram_equivalent(method, return_path): _assert_same_lars_path_result( linear_model.lars_path_gram( Xy=Xy, Gram=G, n_samples=n_samples, method=method, return_path=return_path ), linear_model.lars_path(X, y, Gram=G, method=method, return_path=return_path), )
def test_lars_path_gram_equivalent(method, return_path): _assert_same_lars_path_result( linear_model.lars_path_gram( Xy=Xy, Gram=G, n_samples=n_samples, method=method, return_path=return_path), linear_model.lars_path( X, y, Gram=G, method=method, return_path=return_path))
def _fit_sufficient(self, xtx, xty, wess) -> None: self.alpha, _, coefs = lars_path_gram( Xy=xty[:, 0], Gram=xtx, n_samples=wess, max_iter=self.max_iter, alpha_min=self.min_corr, method="lasso", ) self.beta = coefs[:, [-1]]
def compute_bench(samples_range, features_range): it = 0 results = defaultdict(lambda: []) max_it = len(samples_range) * len(features_range) for n_samples in samples_range: for n_features in features_range: it += 1 print('====================') print('Iteration %03d of %03d' % (it, max_it)) print('====================') dataset_kwargs = { 'n_samples': n_samples, 'n_features': n_features, 'n_informative': n_features // 10, 'effective_rank': min(n_samples, n_features) / 10, #'effective_rank': None, 'bias': 0.0, } print("n_samples: %d" % n_samples) print("n_features: %d" % n_features) X, y = make_regression(**dataset_kwargs) gc.collect() print("benchmarking lars_path (with Gram):", end='') sys.stdout.flush() tstart = time() G = np.dot(X.T, X) # precomputed Gram matrix Xy = np.dot(X.T, y) lars_path_gram(Xy=Xy, Gram=G, n_samples=y.size, method='lasso') delta = time() - tstart print("%0.3fs" % delta) results['lars_path (with Gram)'].append(delta) gc.collect() print("benchmarking lars_path (without Gram):", end='') sys.stdout.flush() tstart = time() lars_path(X, y, method='lasso') delta = time() - tstart print("%0.3fs" % delta) results['lars_path (without Gram)'].append(delta) gc.collect() print("benchmarking lasso_path (with Gram):", end='') sys.stdout.flush() tstart = time() lasso_path(X, y, precompute=True) delta = time() - tstart print("%0.3fs" % delta) results['lasso_path (with Gram)'].append(delta) gc.collect() print("benchmarking lasso_path (without Gram):", end='') sys.stdout.flush() tstart = time() lasso_path(X, y, precompute=False) delta = time() - tstart print("%0.3fs" % delta) results['lasso_path (without Gram)'].append(delta) return results
def compute_bench(samples_range, features_range): it = 0 results = dict() lars = np.empty((len(features_range), len(samples_range))) lars_gram = lars.copy() omp = lars.copy() omp_gram = lars.copy() max_it = len(samples_range) * len(features_range) for i_s, n_samples in enumerate(samples_range): for i_f, n_features in enumerate(features_range): it += 1 n_informative = n_features / 10 print('====================') print('Iteration %03d of %03d' % (it, max_it)) print('====================') # dataset_kwargs = { # 'n_train_samples': n_samples, # 'n_test_samples': 2, # 'n_features': n_features, # 'n_informative': n_informative, # 'effective_rank': min(n_samples, n_features) / 10, # #'effective_rank': None, # 'bias': 0.0, # } dataset_kwargs = { 'n_samples': 1, 'n_components': n_features, 'n_features': n_samples, 'n_nonzero_coefs': n_informative, 'random_state': 0 } print("n_samples: %d" % n_samples) print("n_features: %d" % n_features) y, X, _ = make_sparse_coded_signal(**dataset_kwargs) X = np.asfortranarray(X) gc.collect() print("benchmarking lars_path (with Gram):", end='') sys.stdout.flush() tstart = time() G = np.dot(X.T, X) # precomputed Gram matrix Xy = np.dot(X.T, y) lars_path_gram(Xy=Xy, Gram=G, n_samples=y.size, max_iter=n_informative) delta = time() - tstart print("%0.3fs" % delta) lars_gram[i_f, i_s] = delta gc.collect() print("benchmarking lars_path (without Gram):", end='') sys.stdout.flush() tstart = time() lars_path(X, y, Gram=None, max_iter=n_informative) delta = time() - tstart print("%0.3fs" % delta) lars[i_f, i_s] = delta gc.collect() print("benchmarking orthogonal_mp (with Gram):", end='') sys.stdout.flush() tstart = time() orthogonal_mp(X, y, precompute=True, n_nonzero_coefs=n_informative) delta = time() - tstart print("%0.3fs" % delta) omp_gram[i_f, i_s] = delta gc.collect() print("benchmarking orthogonal_mp (without Gram):", end='') sys.stdout.flush() tstart = time() orthogonal_mp(X, y, precompute=False, n_nonzero_coefs=n_informative) delta = time() - tstart print("%0.3fs" % delta) omp[i_f, i_s] = delta results['time(LARS) / time(OMP)\n (w/ Gram)'] = (lars_gram / omp_gram) results['time(LARS) / time(OMP)\n (w/o Gram)'] = (lars / omp) return results
def compute_bench(samples_range, features_range): it = 0 results = dict() lars = np.empty((len(features_range), len(samples_range))) lars_gram = lars.copy() omp = lars.copy() omp_gram = lars.copy() max_it = len(samples_range) * len(features_range) for i_s, n_samples in enumerate(samples_range): for i_f, n_features in enumerate(features_range): it += 1 n_informative = n_features / 10 print('====================') print('Iteration %03d of %03d' % (it, max_it)) print('====================') # dataset_kwargs = { # 'n_train_samples': n_samples, # 'n_test_samples': 2, # 'n_features': n_features, # 'n_informative': n_informative, # 'effective_rank': min(n_samples, n_features) / 10, # #'effective_rank': None, # 'bias': 0.0, # } dataset_kwargs = { 'n_samples': 1, 'n_components': n_features, 'n_features': n_samples, 'n_nonzero_coefs': n_informative, 'random_state': 0 } print("n_samples: %d" % n_samples) print("n_features: %d" % n_features) y, X, _ = make_sparse_coded_signal(**dataset_kwargs) X = np.asfortranarray(X) gc.collect() print("benchmarking lars_path (with Gram):", end='') sys.stdout.flush() tstart = time() G = np.dot(X.T, X) # precomputed Gram matrix Xy = np.dot(X.T, y) lars_path_gram(Xy=Xy, Gram=G, n_samples=y.size, max_iter=n_informative) delta = time() - tstart print("%0.3fs" % delta) lars_gram[i_f, i_s] = delta gc.collect() print("benchmarking lars_path (without Gram):", end='') sys.stdout.flush() tstart = time() lars_path(X, y, Gram=None, max_iter=n_informative) delta = time() - tstart print("%0.3fs" % delta) lars[i_f, i_s] = delta gc.collect() print("benchmarking orthogonal_mp (with Gram):", end='') sys.stdout.flush() tstart = time() orthogonal_mp(X, y, precompute=True, n_nonzero_coefs=n_informative) delta = time() - tstart print("%0.3fs" % delta) omp_gram[i_f, i_s] = delta gc.collect() print("benchmarking orthogonal_mp (without Gram):", end='') sys.stdout.flush() tstart = time() orthogonal_mp(X, y, precompute=False, n_nonzero_coefs=n_informative) delta = time() - tstart print("%0.3fs" % delta) omp[i_f, i_s] = delta results['time(LARS) / time(OMP)\n (w/ Gram)'] = (lars_gram / omp_gram) results['time(LARS) / time(OMP)\n (w/o Gram)'] = (lars / omp) return results
def graphical_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4, enet_tol=1e-4, max_iter=100, verbose=False, return_costs=False, eps=np.finfo(np.float64).eps, return_n_iter=False, isRLA=False): """l1-penalized covariance estimator Read more in the :ref:`User Guide <sparse_inverse_covariance>`. Parameters ---------- emp_cov : 2D ndarray, shape (n_features, n_features) Empirical covariance from which to compute the covariance estimate. alpha : positive float The regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. cov_init : 2D array (n_features, n_features), optional The initial guess for the covariance. mode : {'cd', 'lars'} The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable. tol : positive float, optional The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. enet_tol : positive float, optional The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. max_iter : integer, optional The maximum number of iterations. verbose : boolean, optional If verbose is True, the objective function and dual gap are printed at each iteration. return_costs : boolean, optional If return_costs is True, the objective function and dual gap at each iteration are returned. eps : float, optional The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. return_n_iter : bool, optional Whether or not to return the number of iterations. Returns ------- covariance : 2D ndarray, shape (n_features, n_features) The estimated covariance matrix. precision : 2D ndarray, shape (n_features, n_features) The estimated (sparse) precision matrix. costs : list of (objective, dual_gap) pairs The list of values of the objective function and the dual gap at each iteration. Returned only if return_costs is True. n_iter : int Number of iterations. Returned only if `return_n_iter` is set to True. See Also -------- GraphicalLasso, GraphicalLassoCV Notes ----- The algorithm employed to solve this problem is the GLasso algorithm, from the Friedman 2008 Biostatistics paper. It is the same algorithm as in the R `glasso` package. One possible difference with the `glasso` R package is that the diagonal coefficients are not penalized. """ _, n_features = emp_cov.shape if alpha == 0: if return_costs: precision_ = linalg.inv(emp_cov) cost = -2. * log_likelihood(emp_cov, precision_) cost += n_features * np.log(2 * np.pi) d_gap = np.sum(emp_cov * precision_) - n_features if return_n_iter: return emp_cov, precision_, (cost, d_gap), 0 else: return emp_cov, precision_, (cost, d_gap) else: if return_n_iter: return emp_cov, linalg.inv(emp_cov), 0 else: return emp_cov, linalg.inv(emp_cov) if cov_init is None: covariance_ = emp_cov.copy() else: covariance_ = cov_init.copy() # As a trivial regularization (Tikhonov like), we scale down the # off-diagonal coefficients of our starting point: This is needed, as # in the cross-validation the cov_init can easily be # ill-conditioned, and the CV loop blows. Beside, this takes # conservative stand-point on the initial conditions, and it tends to # make the convergence go faster. covariance_ *= 0.95 diagonal = emp_cov.flat[::n_features + 1] covariance_.flat[::n_features + 1] = diagonal precision_ = linalg.pinvh(covariance_) indices = np.arange(n_features) costs = list() # The different l1 regression solver have different numerical errors if mode == 'cd': errors = dict(over='raise', invalid='ignore') else: errors = dict(invalid='raise') try: # be robust to the max_iter=0 edge case, see: # https://github.com/scikit-learn/scikit-learn/issues/4134 d_gap = np.inf # set a sub_covariance buffer sub_covariance = np.copy(covariance_[1:, 1:], order='C') for i in range(max_iter): for idx in range(n_features): # To keep the contiguous matrix `sub_covariance` equal to # covariance_[indices != idx].T[indices != idx] # we only need to update 1 column and 1 line when idx changes if idx > 0: di = idx - 1 sub_covariance[di] = covariance_[di][indices != idx] sub_covariance[:, di] = covariance_[:, di][indices != idx] else: sub_covariance[:] = covariance_[1:, 1:] row = emp_cov[idx, indices != idx] with np.errstate(**errors): if mode == 'cd': # Use coordinate descent coefs = -(precision_[indices != idx, idx] / (precision_[idx, idx] + 1000 * eps)) # TODO: swap in RLA LASSO with the following enet_coordinate_descent_gram() function! if isRLA: coefs = SLRviaRP(sub_covariance, np.matmul( fractional_matrix_power( sub_covariance, -0.5), row), 0.01, 0.05, 30, 1.1, 100, gamma=0) else: coefs, _, _, _ = enet_coordinate_descent_gram( coefs, alpha, 0, sub_covariance, row, row, max_iter, enet_tol, check_random_state(None), False) else: # Use LARS _, _, coefs = lars_path_gram(Xy=row, Gram=sub_covariance, n_samples=row.size, alpha_min=alpha / (n_features - 1), copy_Gram=True, eps=eps, method='lars', return_path=False) # Update the precision matrix precision_[idx, idx] = ( 1. / (covariance_[idx, idx] - np.dot(covariance_[indices != idx, idx], coefs))) precision_[indices != idx, idx] = (-precision_[idx, idx] * coefs) precision_[idx, indices != idx] = (-precision_[idx, idx] * coefs) coefs = np.dot(sub_covariance, coefs) covariance_[idx, indices != idx] = coefs covariance_[indices != idx, idx] = coefs if not np.isfinite(precision_.sum()): raise FloatingPointError('The system is too ill-conditioned ' 'for this solver') d_gap = _dual_gap(emp_cov, precision_, alpha) cost = _objective(emp_cov, precision_, alpha) if verbose: print('[graphical_lasso] Iteration ' '% 3i, cost % 3.2e, dual gap %.3e' % (i, cost, d_gap)) if return_costs: costs.append((cost, d_gap)) if np.abs(d_gap) < tol: break if not np.isfinite(cost) and i > 0: raise FloatingPointError('Non SPD result: the system is ' 'too ill-conditioned for this solver') else: warnings.warn( 'graphical_lasso: did not converge after ' '%i iteration: dual gap: %.3e' % (max_iter, d_gap), ConvergenceWarning) except FloatingPointError as e: e.args = (e.args[0] + '. The system is too ill-conditioned for this solver', ) raise e if return_costs: if return_n_iter: return covariance_, precision_, costs, i + 1 else: return covariance_, precision_, costs else: if return_n_iter: return covariance_, precision_, i + 1 else: return covariance_, precision_