Exemple #1
0
def test_correlated():
    X, y, w_true = make_correlated_data(snr=np.inf)
    np.testing.assert_allclose(y, X @ w_true)

    snr = 5
    w_true = np.ones(50)
    X, y, _ = make_correlated_data(n_features=w_true.shape[0], snr=5)
    np.testing.assert_allclose(snr, norm(X @ w_true), norm(y - X @ w_true))

    np.testing.assert_raises(ValueError, make_correlated_data, corr=1.01)
    np.testing.assert_raises(ValueError, make_correlated_data, density=1.01)
def plot_varying_sigma(corr, density, snr, max_iter=100):
    np.random.seed(0)
    # true coefficient vector has entries equal to 0 or 1
    supp = np.random.choice(n_features, size=int(density * n_features),
                            replace=False)
    w_true = np.zeros(n_features)
    w_true[supp] = 1
    X_, y_, w_true = make_correlated_data(
        n_samples=int(n_samples * 4 / 3.), n_features=n_features,
        w_true=w_true,
        corr=corr, snr=snr, random_state=0)

    X, X_test, y, y_test = train_test_split(X_, y_, test_size=0.25)

    print('Starting computation for this setting')

    ratio = 10 * datadriven_ratio(X, y)
    _, _, _, all_w = dual_primal(
        X, y, step_ratio=ratio, rho=0.99, ret_all=True,
        max_iter=max_iter,
        f_store=1)

    fig, axarr = plt.subplots(2, 2, sharey='row', sharex='col',
                              figsize=(4.2, 3.5), constrained_layout=True)

    scores = [f1_score(w != 0, w_true != 0) for w in all_w]
    mses = np.array([mean_squared_error(y_test, X_test @ w) for w in all_w])

    axarr[0, 0].plot(scores)
    axarr[1, 0].plot(mses / np.mean(y_test ** 2))

    axarr[0, 0].set_ylim(0, 1)
    axarr[0, 0].set_ylabel('F1 score')
    axarr[1, 0].set_ylabel("pred MSE left out")
    axarr[-1, 0].set_xlabel("CP iteration")
    axarr[0, 0].set_title('Iterative regularization')

    # last column: Lasso results
    alphas = norm(X.T @ y, ord=np.inf) / len(y) * np.geomspace(1, 1e-3)

    coefs = celer_path(X, y, 'lasso', alphas=alphas)[1].T
    axarr[0, 1].semilogx(
        alphas, [f1_score(coef != 0, w_true != 0) for coef in coefs])
    axarr[1, 1].semilogx(
        alphas,
        np.array([mean_squared_error(y_test, X_test @ coef) for coef in coefs])
        / np.mean(y_test ** 2))

    axarr[-1, 1].set_xlabel(r'$\lambda$')
    axarr[0, 1].set_title("Lasso path")

    axarr[0, 1].invert_xaxis()

    plt.show(block=False)
    return fig
Exemple #3
0
def test_cd_warm_start():
    X, y, _ = make_correlated_data(30, 50, random_state=12)
    alpha = np.max(np.abs(X.T @ y)) / 100

    # same to do 20 iter, or 10 iter, and 10 iter again with warm start:
    for algo in [cd, ista]:
        w, _, E = algo(X, y, alpha, max_iter=20, f_store=1)

        w, _, E1 = algo(X, y, alpha, max_iter=10, f_store=1)
        w, _, E2 = algo(X, y, alpha, w_init=w, max_iter=10, f_store=1)
        np.testing.assert_allclose(E, np.hstack([E1, E2]))
Exemple #4
0
def test_dual_primal(solver):
    np.random.seed(0)
    X, y, _ = make_correlated_data(20, 30, random_state=0)
    w, theta, _ = solver(X, y, max_iter=100_000, ret_all=False)

    # feasability
    np.testing.assert_array_less(norm(X @ w - y) / norm(y), 1e-9)
    # -X.T @ theta should be subgradient of L1 norm at w
    supp = w != 0
    assert_allclose(-X[:, supp].T @ theta, np.sign(w[supp]))
    assert_array_less(np.abs(X[:, ~supp].T @ theta), 1. - 1e-9)
Exemple #5
0
def test_rw_cvg():
    X, y, _ = make_correlated_data(20, 40, random_state=0)
    alpha = np.max(np.abs(X.T @ y)) / 5
    w, E = reweighted(X, y, alpha, max_iter=1000, n_adapt=5)
    clf = Lasso(fit_intercept=False, alpha=alpha / len(y)).fit(X, y)

    np.testing.assert_allclose(w, clf.coef_, atol=5e-4)

    np.testing.assert_allclose(E[-1] / E[0], E[-2] / E[0], atol=5e-4)

    w, E = reweighted(X, y, alpha, deriv_MCP)

    np.testing.assert_allclose(E[-1] / E[0], E[-2] / E[0], atol=5e-4)
Exemple #6
0
def test_cd_ista_fista():
    np.random.seed(0)
    X, y, _ = make_correlated_data(20, 40, random_state=0)
    alpha = np.max(np.abs(X.T @ y)) / 5
    w, _, _ = cd(X, y, alpha, max_iter=100)
    clf = Lasso(fit_intercept=False, alpha=alpha / len(y)).fit(X, y)

    np.testing.assert_allclose(w, clf.coef_, atol=5e-4)

    w, _, _ = ista(X, y, alpha, max_iter=1_000)
    np.testing.assert_allclose(w, clf.coef_, atol=5e-4)

    w, _, _ = fista(X, y, alpha, max_iter=1_000)
    np.testing.assert_allclose(w, clf.coef_, atol=5e-4)
Exemple #7
0
from celer.datasets import make_correlated_data
from celer.plot_utils import configure_plt

print(__doc__)
configure_plt(fontsize=16)

# Generating X, y, and true regression coefs with 4 groups of 5 non-zero values

n_samples, n_features = 100, 50

w_true = np.zeros(n_features)
w_true[:5] = 1
w_true[10:15] = 1
w_true[30:35] = -1
w_true[45:] = 1
X, y, w_true = make_correlated_data(
    n_samples, n_features, w_true=w_true, snr=5, random_state=0)

###############################################################################
# Get group Lasso's optimal alpha for prediction by cross validation

groups = 5  # groups are contiguous and of size 5
# irregular groups are also supported,
group_lasso = GroupLassoCV(groups=groups)
group_lasso.fit(X, y)

print("Estimated regularization parameter alpha: %s" % group_lasso.alpha_)

fig = plt.figure(figsize=(6, 3), constrained_layout=True)
plt.semilogx(group_lasso.alphas_, group_lasso.mse_path_, ':')
plt.semilogx(group_lasso.alphas_, group_lasso.mse_path_.mean(axis=-1), 'k',
             label='Average across the folds', linewidth=2)
Exemple #8
0
import matplotlib.pyplot as plt
from sklearn.metrics import f1_score
from celer.datasets import make_correlated_data
from celer.plot_utils import configure_plt

from iterreg.sparse import dual_primal

configure_plt()

# data for the experiment:
n_samples = 200
n_features = 500

X, y, w_true = make_correlated_data(n_samples=n_samples,
                                    n_features=n_features,
                                    corr=0.2,
                                    density=0.1,
                                    snr=10,
                                    random_state=0)

###############################################################################
# In the L1 case, the Chambolle-Pock algorithm converges to the noisy Basis
# Pursuit solution, which has ``min(n_samples, n_features)`` non zero entries.
# The true coefficients may be much sparser. It is thus important that the
# Chambolle-Pock iterates do not get the sparsity of their limit too fast,
# as this would lead to many false positives in the support.
# A remedy is to pick a small enough dual stepsize, :math:`\sigma``, so that
# the dual variable theta grows slowly, and the primal iterates remain sparse
# in the early iterations.
# With the default stepsizes tau = sigma = 0.99 / ||X||, the iterates become
# dense very fast. If sigma is too small, the iterates stay 0 for too long.
Exemple #9
0
def plot_varying_sigma(corr, density, snr, steps, max_iter=100, rho=0.99):
    np.random.seed(0)
    # true coefficient vector has entries equal to 0 or 1
    supp = np.random.choice(n_features,
                            size=int(density * n_features),
                            replace=False)
    w_true = np.zeros(n_features)
    w_true[supp] = 1
    X_, y_, w_true = make_correlated_data(n_samples=int(n_samples * 4 / 3.),
                                          n_features=n_features,
                                          w_true=w_true,
                                          corr=corr,
                                          snr=snr,
                                          random_state=0)

    X, X_test, y, y_test = train_test_split(X_, y_, test_size=0.25)

    print('Starting computation for this setting')
    fig, axarr = plt.subplots(4,
                              2,
                              sharey='row',
                              sharex='col',
                              figsize=(7, 5),
                              constrained_layout=True)

    fig.suptitle(r"Correlation=%.1f, $||w^*||_0$= %s, snr=%s" %
                 (corr, (w_true != 0).sum(), snr))

    for i, step in enumerate(steps):
        _, _, _, all_w = dual_primal(X,
                                     y,
                                     step=step,
                                     rho=rho,
                                     ret_all=True,
                                     max_iter=max_iter,
                                     f_store=1)
        scores = [f1_score(w != 0, w_true != 0) for w in all_w]
        supp_size = np.sum(all_w != 0, axis=1)
        mses = [mean_squared_error(y_test, X_test @ w) for w in all_w]

        axarr[0, 0].plot(scores, label=r"$\sigma=1 /%d ||X||$" % step)
        axarr[1, 0].semilogy(supp_size)
        axarr[2, 0].plot(norm(all_w - w_true, axis=1))
        axarr[3, 0].plot(mses)

    axarr[0, 0].set_ylim(0, 1)
    axarr[0, 0].set_ylabel('F1 score for support')
    axarr[1, 0].set_ylabel(r"$||w_k||_0$")
    axarr[2, 0].set_ylabel(r'$\Vert w_k - w^*\Vert$')
    axarr[2, 0].set_xlabel("CP iteration")
    axarr[3, 0].set_ylabel("pred MSE left out")
    axarr[0, 0].legend(loc='lower right', fontsize=10)
    axarr[0, 0].set_title('Iterative regularization')

    # last column: Lasso results
    alphas = norm(X.T @ y, ord=np.inf) / len(y) * np.geomspace(1, 1e-3)

    coefs = celer_path(X, y, 'lasso', alphas=alphas)[1].T
    axarr[0, 1].semilogx(alphas,
                         [f1_score(coef != 0, w_true != 0) for coef in coefs])
    axarr[1, 1].semilogx(alphas, [np.sum(coef != 0) for coef in coefs])
    axarr[2, 1].semilogx(alphas, [norm(coef - w_true) for coef in coefs])
    axarr[3, 1].semilogx(
        alphas, [mean_squared_error(y_test, X_test @ coef) for coef in coefs])

    axarr[3, 1].set_xlabel(r'$\lambda$')
    axarr[0, 1].set_title("Lasso path")

    for i in range(3):
        axarr[i, 1].set_xlim(*axarr[i, 1].get_xlim()[::-1])

    plt.show(block=False)
Exemple #10
0
def test_BP():
    np.random.seed(0)
    X, y, _ = make_correlated_data(200, 300, random_state=0)
    clf = SparseIterReg(verbose=True, f_test=1, memory=30).fit(X, y)
    np.testing.assert_equal(np.argmin(clf.mses),
                            len(clf.mses) - clf.memory - 1)
Exemple #11
0
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from celer.datasets import make_correlated_data

from sparse_ho.models import Lasso
from sparse_ho.criterion import HeldOutMSE
from sparse_ho import ImplicitForward
from sparse_ho.utils import Monitor
from sparse_ho import grad_search
from sparse_ho.optimizers import LineSearch

n_samples, n_features, corr, snr = 200, 70, 0.1, 5

X, y, _ = make_correlated_data(n_samples,
                               n_features,
                               corr=corr,
                               snr=snr,
                               random_state=42)

X, _, y, _ = train_test_split(X, y)

n_samples = X.shape[0]
idx_train = np.arange(0, n_samples // 2)
idx_val = np.arange(n_samples // 2, n_samples)

n_samples = len(y[idx_train])
alpha_max = np.max(np.abs(X[idx_train, :].T.dot(
    y[idx_train]))) / len(idx_train)
alpha0 = alpha_max / 10

tol = 1e-7
Exemple #12
0
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
from celer import Lasso, LassoCV
from celer.datasets import make_correlated_data

from sparse_ho.models import WeightedLasso
from sparse_ho.criterion import HeldOutMSE, CrossVal
from sparse_ho import ImplicitForward
from sparse_ho.utils import Monitor
from sparse_ho.ho import grad_search
from sparse_ho.optimizers import GradientDescent


##############################################################################
# Dataset creation
X, y, w_true = make_correlated_data(
    n_samples=100, n_features=1000, random_state=0, snr=5)

##############################################################################
X, X_test, y, y_test = train_test_split(X, y, test_size=0.333, random_state=0)

n_samples, n_features = X.shape
idx_train = np.arange(0, n_samples // 2)
idx_val = np.arange(n_samples // 2, n_samples)
##############################################################################

##############################################################################
# Max penalty value
alpha_max = np.max(np.abs(X[idx_train, :].T @ y[idx_train])) / len(idx_train)
n_alphas = 30
alphas = np.geomspace(alpha_max, alpha_max / 1_000, n_alphas)
##############################################################################
Exemple #13
0
# dataset = "rcv1"
dataset = 'simu'

##############################################################################
# Load some data

# dataset = 'rcv1'
dataset = 'simu'

if dataset == 'rcv1':
    X, y = fetch_libsvm('rcv1.binary')
    y -= y.mean()
    y /= np.linalg.norm(y)
else:
    X, y, _ = make_correlated_data(n_samples=200,
                                   n_features=400,
                                   snr=5,
                                   random_state=0)

n_samples = X.shape[0]
idx_train = np.arange(0, n_samples // 2)
idx_val = np.arange(n_samples // 2, n_samples)

print("Starting path computation...")
alpha_max = np.max(np.abs(X[idx_train, :].T @ y[idx_train])) / len(idx_train)

alpha_min = 1e-4 * alpha_max

n_grid = 15
alphas_l1 = np.geomspace(alpha_max, alpha_min, n_grid)
alphas_l2 = np.geomspace(alpha_max, alpha_min, n_grid)