Exemple #1
0
def test_weights():
    sparse_X = 1
    X, y = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)

    np.random.seed(0)
    weights = np.abs(np.random.randn(X.shape[1]))

    tol = 1e-12
    params = {'n_alphas': 10, 'tol': tol}
    alphas1, coefs1, gaps1 = celer_path(X,
                                        y,
                                        "lasso",
                                        weights=weights,
                                        verbose=1,
                                        **params)

    alphas2, coefs2, gaps2 = celer_path(X / weights[None, :], y, "lasso",
                                        **params)

    np.testing.assert_allclose(alphas1, alphas2)
    np.testing.assert_allclose(coefs1,
                               coefs2 / weights[:, None],
                               atol=1e-5,
                               rtol=1e-3)
    np.testing.assert_array_less(gaps1, tol)
    np.testing.assert_array_less(gaps2, tol)
Exemple #2
0
def test_weights():
    sparse_X = 1
    X, y = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)

    np.random.seed(0)
    weights = np.abs(np.random.randn(X.shape[1]))

    tol = 1e-14
    params = {'n_alphas': 10, 'tol': tol}
    alphas1, coefs1, gaps1 = celer_path(X,
                                        y,
                                        "lasso",
                                        weights=weights,
                                        verbose=1,
                                        **params)

    alphas2, coefs2, gaps2 = celer_path(X / weights[None, :], y, "lasso",
                                        **params)

    assert_allclose(alphas1, alphas2)
    assert_allclose(coefs1, coefs2 / weights[:, None], atol=1e-4, rtol=1e-3)
    assert_array_less(gaps1, tol * norm(y)**2 / len(y))
    assert_array_less(gaps2, tol * norm(y)**2 / len(y))

    alpha = 0.001
    clf1 = Lasso(alpha=alpha, weights=weights, fit_intercept=False).fit(X, y)
    clf2 = Lasso(alpha=alpha, fit_intercept=False).fit(X / weights, y)

    assert_allclose(clf1.coef_, clf2.coef_ / weights)

    # weights must be > 0
    clf1.weights[0] = 0.
    np.testing.assert_raises(ValueError, clf1.fit, X=X, y=y)
Exemple #3
0
def test_celer_path(sparse_X, alphas, pb):
    """Test Lasso path convergence."""
    X, y = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)
    tol = 1e-6
    if pb == "logreg":
        y = np.sign(y)
        tol_scaled = tol * len(y) * np.log(2)
    else:
        tol_scaled = tol * norm(y)**2 / len(y)
    n_samples = X.shape[0]
    if alphas is not None:
        alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples
        n_alphas = 10
        alphas = alpha_max * np.logspace(0, -2, n_alphas)

    alphas, _, gaps, _, n_iters = celer_path(X,
                                             y,
                                             pb,
                                             alphas=alphas,
                                             tol=tol,
                                             return_thetas=True,
                                             verbose=1,
                                             return_n_iter=True)
    assert_array_less(gaps, tol_scaled)
    # hack because array_less wants strict inequality
    assert_array_less(0.99, n_iters)
Exemple #4
0
def test_infinite_weights(pb):
    n_samples, n_features = 50, 100
    X, y = build_dataset(n_samples, n_features)
    if pb == "logreg":
        y = np.sign(y)

    np.random.seed(1)
    weights = np.abs(np.random.randn(n_features))
    n_inf = n_features // 10
    inf_indices = np.random.choice(n_features, size=n_inf, replace=False)
    weights[inf_indices] = np.inf

    alpha = norm(X.T @ y / weights, ord=np.inf) / n_samples / 100

    tol = 1e-8
    _, coefs, dual_gaps = celer_path(X,
                                     y,
                                     pb=pb,
                                     alphas=[alpha],
                                     weights=weights,
                                     tol=tol)

    if pb == "logreg":
        assert_array_less(dual_gaps[0], tol * n_samples * np.log(2))
    else:
        assert_array_less(dual_gaps[0], tol * norm(y)**2 / 2.)

    assert_array_equal(coefs[inf_indices], 0)
Exemple #5
0
def test_celer_path_logreg(solver):
    X, y = build_dataset(n_samples=60, n_features=100, sparse_X=True)
    y = np.sign(y)
    alpha_max = norm(X.T.dot(y), ord=np.inf) / 2
    alphas = alpha_max * np.geomspace(1, 1e-2, 10)

    tol = 1e-8
    coefs, Cs, n_iters = _logistic_regression_path(X,
                                                   y,
                                                   Cs=1. / alphas,
                                                   fit_intercept=False,
                                                   penalty='l1',
                                                   solver='liblinear',
                                                   tol=tol)

    _, coefs_c, gaps = celer_path(X,
                                  y,
                                  "logreg",
                                  alphas=alphas,
                                  tol=tol,
                                  verbose=1,
                                  use_PN=(solver == "celer-pn"))

    np.testing.assert_array_less(gaps, tol)
    np.testing.assert_allclose(coefs != 0, coefs_c.T != 0)
    np.testing.assert_allclose(coefs, coefs_c.T, atol=1e-5, rtol=1e-3)
Exemple #6
0
def test_celer_single_alpha(sparse_X, pb):
    X, y = build_dataset(n_samples=20, n_features=100, sparse_X=sparse_X)
    if pb == "logreg":
        y = np.sign(y)
    alpha_max = norm(X.T.dot(y), ord=np.inf) / X.shape[0]

    tol = 1e-6
    _, coefs, gaps = celer_path(X, y, pb, alphas=[alpha_max / 10.], tol=tol)
    np.testing.assert_array_less(gaps, tol)
Exemple #7
0
def test_celer_path_sparse():

    X, y, _, _ = build_dataset(n_samples=50, n_features=50, n_targets=1)
    X = sparse.csc_matrix(X)
    alpha_max = np.max(np.abs(X.T.dot(y)))
    n_alphas = 10
    alphas = alpha_max * np.logspace(0, -2, n_alphas)

    tol = 1e-6
    betas, thetas, gaps = celer_path(X, y, alphas=alphas, tol=tol, verbose=1)
    np.testing.assert_array_less(gaps, tol)
Exemple #8
0
def test_celer_path_vs_lasso_path(sparse_X, prune):
    """Test that celer_path matches sklearn lasso_path."""
    X, y, _, _ = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)

    params = dict(eps=1e-2, n_alphas=10, tol=1e-14)
    alphas1, coefs1, gaps1 = celer_path(
        X, y, return_thetas=False, verbose=1, prune=prune, **params)

    alphas2, coefs2, gaps2 = lasso_path(X, y, verbose=False, **params)

    np.testing.assert_allclose(alphas1, alphas2)
    np.testing.assert_allclose(coefs1, coefs2, rtol=1e-05, atol=1e-6)
Exemple #9
0
def test_celer_single_alpha(sparse_X, pb):
    X, y = build_dataset(n_samples=20, n_features=100, sparse_X=sparse_X)
    tol = 1e-6

    if pb == "logreg":
        y = np.sign(y)
        tol_scaled = tol * np.log(2) * len(y)
    else:
        tol_scaled = tol * norm(y)**2 / len(y)

    alpha_max = norm(X.T.dot(y), ord=np.inf) / X.shape[0]
    _, _, gaps = celer_path(X, y, pb, alphas=[alpha_max / 10.], tol=tol)
    assert_array_less(gaps, tol_scaled)
Exemple #10
0
def test_zero_column(sparse_X):
    X, y = build_dataset(n_samples=60, n_features=50, sparse_X=sparse_X)
    n_zero_columns = 20
    if sparse_X:
        X.data[:X.indptr[n_zero_columns]].fill(0.)
    else:
        X[:, :n_zero_columns].fill(0.)
    alpha_max = norm(X.T.dot(y), ord=np.inf) / X.shape[0]
    tol = 1e-6
    _, coefs, gaps = celer_path(
        X, y, "lasso", alphas=[alpha_max / 10.], tol=tol, p0=50, prune=0)
    w = coefs.T[0]
    np.testing.assert_array_less(gaps, tol)
    np.testing.assert_equal(w.shape[0], X.shape[1])
Exemple #11
0
def test_celer_path(sparse_X, alphas, positive):
    """Test Lasso path convergence."""
    X, y, _, _ = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)
    n_samples = X.shape[0]
    if alphas is not None:
        alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples
        n_alphas = 10
        alphas = alpha_max * np.logspace(0, -2, n_alphas)

    tol = 1e-6
    alphas, coefs, gaps, thetas, n_iters = celer_path(
        X, y, alphas=alphas, tol=tol, return_thetas=True, verbose=False,
        verbose_inner=False, positive=positive, return_n_iter=True)
    np.testing.assert_array_less(gaps, tol)
    # hack because array_less wants strict inequality
    np.testing.assert_array_less(0.99, n_iters)
Exemple #12
0
def test_celer_path_vs_lasso_path(sparse_X, prune):
    """Test that celer_path matches sklearn lasso_path."""
    X, y = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)

    tol = 1e-12
    params = dict(eps=1e-3, n_alphas=10, tol=tol)
    alphas1, coefs1, gaps1 = celer_path(
        X, y, "lasso", return_thetas=False, verbose=1, prune=prune,
        max_iter=30, **params)

    alphas2, coefs2, _ = lasso_path(X, y, verbose=False, **params,
                                    max_iter=10000)

    np.testing.assert_allclose(alphas1, alphas2)
    np.testing.assert_array_less(gaps1, tol)
    np.testing.assert_allclose(coefs1, coefs2, rtol=1e-03, atol=1e-5)
def test_celer_path(sparse_X):
    """Test Lasso path convergence."""
    X, y, _, _ = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)
    n_samples = X.shape[0]
    alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples
    n_alphas = 10
    alphas = alpha_max * np.logspace(0, -2, n_alphas)

    tol = 1e-6
    alphas, coefs, gaps, thetas = celer_path(X,
                                             y,
                                             alphas=alphas,
                                             tol=tol,
                                             return_thetas=True,
                                             verbose=False,
                                             verbose_inner=False)
    np.testing.assert_array_less(gaps, tol)
Exemple #14
0
n_alphas = 11
alphas = alpha_max * np.geomspace(1, 0.1, n_alphas)

###############################################################################
# Run Celer on a grid of regularization parameters, for various tolerances:
tols = [1e-2, 1e-4, 1e-6]
results = np.zeros([1, len(tols)])
gaps = np.zeros((len(tols), len(alphas)))

print("Starting path computation...")
for tol_ix, tol in enumerate(tols):
    t0 = time.time()
    res = celer_path(X,
                     y,
                     'lasso',
                     alphas=alphas,
                     tol=tol,
                     prune=True,
                     verbose=1)
    results[0, tol_ix] = time.time() - t0
    _, coefs, gaps[tol_ix] = res

labels = [r"\sc{Celer}"]
figsize = (4, 3.5)

df = pd.DataFrame(results.T, columns=["Celer"])
df.index = [str(tol) for tol in tols]
df.plot.bar(rot=0, figsize=figsize)
plt.xlabel("stopping tolerance")
plt.ylabel("path computation time (s)")
plt.tight_layout()
Exemple #15
0
alphas = alpha_max * np.logspace(0, -2, n_alphas)

gap_freq = 10
prune = 1
verbose = 0
verbose_inner = 0

tols = [1e-2, 1e-4, 1e-6]
results = np.zeros([1, len(tols)])
for tol_ix, tol in enumerate(tols):
    t0 = time.time()
    res = celer_path(X,
                     y,
                     alphas=alphas,
                     max_iter=100,
                     gap_freq=gap_freq,
                     p0=100,
                     verbose=verbose,
                     verbose_inner=verbose_inner,
                     tol=tol,
                     prune=prune,
                     return_thetas=True)
    results[0, tol_ix] = time.time() - t0
    _, coefs, gaps, thetas = res
    betas = coefs.T

labels = [r"\sc{Celer}"]
figsize = (7, 4)
fig = plot_path_hist(results, labels, tols, figsize, ylim=None)
plt.show()
Exemple #16
0
alphas = alpha_max * np.logspace(0, -2, n_alphas)

gap_freq = 10
prune = 1
verbose = 0
verbose_inner = 0
tols = [1e-2, 1e-4, 1e-6, 1e-8]
results = np.zeros([2, len(tols)])
for tol_ix, tol in enumerate(tols):
    t0 = time.time()
    res = celer_path(X,
                     y,
                     alphas,
                     max_iter=100,
                     gap_freq=gap_freq,
                     max_epochs_inner=50000,
                     p0=100,
                     verbose=verbose,
                     verbose_inner=verbose_inner,
                     tol=tol,
                     prune=prune)
    results[0, tol_ix] = time.time() - t0
    print('Celer time: %.2f s' % results[0, tol_ix])
    betas, thetas, gaps = res

    t0 = time.time()
    _, coefs, dual_gaps = lasso_path(X, y, tol=tol, alphas=alphas / n_samples)
    results[1, tol_ix] = time.time() - t0
    coefs = coefs.T

labels = [r"\sc{CELER}", "scikit-learn"]
Exemple #17
0
y -= np.mean(y)
y /= np.std(y)

print("Starting path computation...")
alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples

n_alphas = 100
alphas = alpha_max * np.geomspace(1, 0.01, n_alphas)

tols = [1e-2, 1e-4, 1e-6, 1e-8]
results = np.zeros([2, len(tols)])
for tol_ix, tol in enumerate(tols):
    t0 = time.time()
    _, coefs, gaps = celer_path(X,
                                y,
                                pb='lasso',
                                alphas=alphas,
                                tol=tol,
                                prune=True)
    results[0, tol_ix] = time.time() - t0
    print('Celer time: %.2f s' % results[0, tol_ix])

    t0 = time.time()
    _, coefs, dual_gaps = lasso_path(X, y, tol=tol, alphas=alphas)
    results[1, tol_ix] = time.time() - t0

labels = [r"\sc{Celer}", "scikit-learn"]
figsize = (7.1, 4.3)
fig = plot_path_hist(results, labels, tols, figsize, ylim=None)
plt.show()