def test_correlated(): X, y, w_true = make_correlated_data(snr=np.inf) np.testing.assert_allclose(y, X @ w_true) snr = 5 w_true = np.ones(50) X, y, _ = make_correlated_data(n_features=w_true.shape[0], snr=5) np.testing.assert_allclose(snr, norm(X @ w_true), norm(y - X @ w_true)) np.testing.assert_raises(ValueError, make_correlated_data, corr=1.01) np.testing.assert_raises(ValueError, make_correlated_data, density=1.01)
def plot_varying_sigma(corr, density, snr, max_iter=100): np.random.seed(0) # true coefficient vector has entries equal to 0 or 1 supp = np.random.choice(n_features, size=int(density * n_features), replace=False) w_true = np.zeros(n_features) w_true[supp] = 1 X_, y_, w_true = make_correlated_data( n_samples=int(n_samples * 4 / 3.), n_features=n_features, w_true=w_true, corr=corr, snr=snr, random_state=0) X, X_test, y, y_test = train_test_split(X_, y_, test_size=0.25) print('Starting computation for this setting') ratio = 10 * datadriven_ratio(X, y) _, _, _, all_w = dual_primal( X, y, step_ratio=ratio, rho=0.99, ret_all=True, max_iter=max_iter, f_store=1) fig, axarr = plt.subplots(2, 2, sharey='row', sharex='col', figsize=(4.2, 3.5), constrained_layout=True) scores = [f1_score(w != 0, w_true != 0) for w in all_w] mses = np.array([mean_squared_error(y_test, X_test @ w) for w in all_w]) axarr[0, 0].plot(scores) axarr[1, 0].plot(mses / np.mean(y_test ** 2)) axarr[0, 0].set_ylim(0, 1) axarr[0, 0].set_ylabel('F1 score') axarr[1, 0].set_ylabel("pred MSE left out") axarr[-1, 0].set_xlabel("CP iteration") axarr[0, 0].set_title('Iterative regularization') # last column: Lasso results alphas = norm(X.T @ y, ord=np.inf) / len(y) * np.geomspace(1, 1e-3) coefs = celer_path(X, y, 'lasso', alphas=alphas)[1].T axarr[0, 1].semilogx( alphas, [f1_score(coef != 0, w_true != 0) for coef in coefs]) axarr[1, 1].semilogx( alphas, np.array([mean_squared_error(y_test, X_test @ coef) for coef in coefs]) / np.mean(y_test ** 2)) axarr[-1, 1].set_xlabel(r'$\lambda$') axarr[0, 1].set_title("Lasso path") axarr[0, 1].invert_xaxis() plt.show(block=False) return fig
def test_cd_warm_start(): X, y, _ = make_correlated_data(30, 50, random_state=12) alpha = np.max(np.abs(X.T @ y)) / 100 # same to do 20 iter, or 10 iter, and 10 iter again with warm start: for algo in [cd, ista]: w, _, E = algo(X, y, alpha, max_iter=20, f_store=1) w, _, E1 = algo(X, y, alpha, max_iter=10, f_store=1) w, _, E2 = algo(X, y, alpha, w_init=w, max_iter=10, f_store=1) np.testing.assert_allclose(E, np.hstack([E1, E2]))
def test_dual_primal(solver): np.random.seed(0) X, y, _ = make_correlated_data(20, 30, random_state=0) w, theta, _ = solver(X, y, max_iter=100_000, ret_all=False) # feasability np.testing.assert_array_less(norm(X @ w - y) / norm(y), 1e-9) # -X.T @ theta should be subgradient of L1 norm at w supp = w != 0 assert_allclose(-X[:, supp].T @ theta, np.sign(w[supp])) assert_array_less(np.abs(X[:, ~supp].T @ theta), 1. - 1e-9)
def test_rw_cvg(): X, y, _ = make_correlated_data(20, 40, random_state=0) alpha = np.max(np.abs(X.T @ y)) / 5 w, E = reweighted(X, y, alpha, max_iter=1000, n_adapt=5) clf = Lasso(fit_intercept=False, alpha=alpha / len(y)).fit(X, y) np.testing.assert_allclose(w, clf.coef_, atol=5e-4) np.testing.assert_allclose(E[-1] / E[0], E[-2] / E[0], atol=5e-4) w, E = reweighted(X, y, alpha, deriv_MCP) np.testing.assert_allclose(E[-1] / E[0], E[-2] / E[0], atol=5e-4)
def test_cd_ista_fista(): np.random.seed(0) X, y, _ = make_correlated_data(20, 40, random_state=0) alpha = np.max(np.abs(X.T @ y)) / 5 w, _, _ = cd(X, y, alpha, max_iter=100) clf = Lasso(fit_intercept=False, alpha=alpha / len(y)).fit(X, y) np.testing.assert_allclose(w, clf.coef_, atol=5e-4) w, _, _ = ista(X, y, alpha, max_iter=1_000) np.testing.assert_allclose(w, clf.coef_, atol=5e-4) w, _, _ = fista(X, y, alpha, max_iter=1_000) np.testing.assert_allclose(w, clf.coef_, atol=5e-4)
from celer.datasets import make_correlated_data from celer.plot_utils import configure_plt print(__doc__) configure_plt(fontsize=16) # Generating X, y, and true regression coefs with 4 groups of 5 non-zero values n_samples, n_features = 100, 50 w_true = np.zeros(n_features) w_true[:5] = 1 w_true[10:15] = 1 w_true[30:35] = -1 w_true[45:] = 1 X, y, w_true = make_correlated_data( n_samples, n_features, w_true=w_true, snr=5, random_state=0) ############################################################################### # Get group Lasso's optimal alpha for prediction by cross validation groups = 5 # groups are contiguous and of size 5 # irregular groups are also supported, group_lasso = GroupLassoCV(groups=groups) group_lasso.fit(X, y) print("Estimated regularization parameter alpha: %s" % group_lasso.alpha_) fig = plt.figure(figsize=(6, 3), constrained_layout=True) plt.semilogx(group_lasso.alphas_, group_lasso.mse_path_, ':') plt.semilogx(group_lasso.alphas_, group_lasso.mse_path_.mean(axis=-1), 'k', label='Average across the folds', linewidth=2)
import matplotlib.pyplot as plt from sklearn.metrics import f1_score from celer.datasets import make_correlated_data from celer.plot_utils import configure_plt from iterreg.sparse import dual_primal configure_plt() # data for the experiment: n_samples = 200 n_features = 500 X, y, w_true = make_correlated_data(n_samples=n_samples, n_features=n_features, corr=0.2, density=0.1, snr=10, random_state=0) ############################################################################### # In the L1 case, the Chambolle-Pock algorithm converges to the noisy Basis # Pursuit solution, which has ``min(n_samples, n_features)`` non zero entries. # The true coefficients may be much sparser. It is thus important that the # Chambolle-Pock iterates do not get the sparsity of their limit too fast, # as this would lead to many false positives in the support. # A remedy is to pick a small enough dual stepsize, :math:`\sigma``, so that # the dual variable theta grows slowly, and the primal iterates remain sparse # in the early iterations. # With the default stepsizes tau = sigma = 0.99 / ||X||, the iterates become # dense very fast. If sigma is too small, the iterates stay 0 for too long.
def plot_varying_sigma(corr, density, snr, steps, max_iter=100, rho=0.99): np.random.seed(0) # true coefficient vector has entries equal to 0 or 1 supp = np.random.choice(n_features, size=int(density * n_features), replace=False) w_true = np.zeros(n_features) w_true[supp] = 1 X_, y_, w_true = make_correlated_data(n_samples=int(n_samples * 4 / 3.), n_features=n_features, w_true=w_true, corr=corr, snr=snr, random_state=0) X, X_test, y, y_test = train_test_split(X_, y_, test_size=0.25) print('Starting computation for this setting') fig, axarr = plt.subplots(4, 2, sharey='row', sharex='col', figsize=(7, 5), constrained_layout=True) fig.suptitle(r"Correlation=%.1f, $||w^*||_0$= %s, snr=%s" % (corr, (w_true != 0).sum(), snr)) for i, step in enumerate(steps): _, _, _, all_w = dual_primal(X, y, step=step, rho=rho, ret_all=True, max_iter=max_iter, f_store=1) scores = [f1_score(w != 0, w_true != 0) for w in all_w] supp_size = np.sum(all_w != 0, axis=1) mses = [mean_squared_error(y_test, X_test @ w) for w in all_w] axarr[0, 0].plot(scores, label=r"$\sigma=1 /%d ||X||$" % step) axarr[1, 0].semilogy(supp_size) axarr[2, 0].plot(norm(all_w - w_true, axis=1)) axarr[3, 0].plot(mses) axarr[0, 0].set_ylim(0, 1) axarr[0, 0].set_ylabel('F1 score for support') axarr[1, 0].set_ylabel(r"$||w_k||_0$") axarr[2, 0].set_ylabel(r'$\Vert w_k - w^*\Vert$') axarr[2, 0].set_xlabel("CP iteration") axarr[3, 0].set_ylabel("pred MSE left out") axarr[0, 0].legend(loc='lower right', fontsize=10) axarr[0, 0].set_title('Iterative regularization') # last column: Lasso results alphas = norm(X.T @ y, ord=np.inf) / len(y) * np.geomspace(1, 1e-3) coefs = celer_path(X, y, 'lasso', alphas=alphas)[1].T axarr[0, 1].semilogx(alphas, [f1_score(coef != 0, w_true != 0) for coef in coefs]) axarr[1, 1].semilogx(alphas, [np.sum(coef != 0) for coef in coefs]) axarr[2, 1].semilogx(alphas, [norm(coef - w_true) for coef in coefs]) axarr[3, 1].semilogx( alphas, [mean_squared_error(y_test, X_test @ coef) for coef in coefs]) axarr[3, 1].set_xlabel(r'$\lambda$') axarr[0, 1].set_title("Lasso path") for i in range(3): axarr[i, 1].set_xlim(*axarr[i, 1].get_xlim()[::-1]) plt.show(block=False)
def test_BP(): np.random.seed(0) X, y, _ = make_correlated_data(200, 300, random_state=0) clf = SparseIterReg(verbose=True, f_test=1, memory=30).fit(X, y) np.testing.assert_equal(np.argmin(clf.mses), len(clf.mses) - clf.memory - 1)
from sklearn.model_selection import train_test_split from sklearn import linear_model from celer.datasets import make_correlated_data from sparse_ho.models import Lasso from sparse_ho.criterion import HeldOutMSE from sparse_ho import ImplicitForward from sparse_ho.utils import Monitor from sparse_ho import grad_search from sparse_ho.optimizers import LineSearch n_samples, n_features, corr, snr = 200, 70, 0.1, 5 X, y, _ = make_correlated_data(n_samples, n_features, corr=corr, snr=snr, random_state=42) X, _, y, _ = train_test_split(X, y) n_samples = X.shape[0] idx_train = np.arange(0, n_samples // 2) idx_val = np.arange(n_samples // 2, n_samples) n_samples = len(y[idx_train]) alpha_max = np.max(np.abs(X[idx_train, :].T.dot( y[idx_train]))) / len(idx_train) alpha0 = alpha_max / 10 tol = 1e-7
from sklearn.metrics import mean_squared_error from sklearn.model_selection import KFold from celer import Lasso, LassoCV from celer.datasets import make_correlated_data from sparse_ho.models import WeightedLasso from sparse_ho.criterion import HeldOutMSE, CrossVal from sparse_ho import ImplicitForward from sparse_ho.utils import Monitor from sparse_ho.ho import grad_search from sparse_ho.optimizers import GradientDescent ############################################################################## # Dataset creation X, y, w_true = make_correlated_data( n_samples=100, n_features=1000, random_state=0, snr=5) ############################################################################## X, X_test, y, y_test = train_test_split(X, y, test_size=0.333, random_state=0) n_samples, n_features = X.shape idx_train = np.arange(0, n_samples // 2) idx_val = np.arange(n_samples // 2, n_samples) ############################################################################## ############################################################################## # Max penalty value alpha_max = np.max(np.abs(X[idx_train, :].T @ y[idx_train])) / len(idx_train) n_alphas = 30 alphas = np.geomspace(alpha_max, alpha_max / 1_000, n_alphas) ##############################################################################
# dataset = "rcv1" dataset = 'simu' ############################################################################## # Load some data # dataset = 'rcv1' dataset = 'simu' if dataset == 'rcv1': X, y = fetch_libsvm('rcv1.binary') y -= y.mean() y /= np.linalg.norm(y) else: X, y, _ = make_correlated_data(n_samples=200, n_features=400, snr=5, random_state=0) n_samples = X.shape[0] idx_train = np.arange(0, n_samples // 2) idx_val = np.arange(n_samples // 2, n_samples) print("Starting path computation...") alpha_max = np.max(np.abs(X[idx_train, :].T @ y[idx_train])) / len(idx_train) alpha_min = 1e-4 * alpha_max n_grid = 15 alphas_l1 = np.geomspace(alpha_max, alpha_min, n_grid) alphas_l2 = np.geomspace(alpha_max, alpha_min, n_grid)