Example #1
0
def test_weights():
    sparse_X = 1
    X, y = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)

    np.random.seed(0)
    weights = np.abs(np.random.randn(X.shape[1]))

    tol = 1e-12
    params = {'n_alphas': 10, 'tol': tol}
    alphas1, coefs1, gaps1 = celer_path(X,
                                        y,
                                        "lasso",
                                        weights=weights,
                                        verbose=1,
                                        **params)

    alphas2, coefs2, gaps2 = celer_path(X / weights[None, :], y, "lasso",
                                        **params)

    np.testing.assert_allclose(alphas1, alphas2)
    np.testing.assert_allclose(coefs1,
                               coefs2 / weights[:, None],
                               atol=1e-5,
                               rtol=1e-3)
    np.testing.assert_array_less(gaps1, tol)
    np.testing.assert_array_less(gaps2, tol)
Example #2
0
def test_weights():
    sparse_X = 1
    X, y = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)

    np.random.seed(0)
    weights = np.abs(np.random.randn(X.shape[1]))

    tol = 1e-14
    params = {'n_alphas': 10, 'tol': tol}
    alphas1, coefs1, gaps1 = celer_path(X,
                                        y,
                                        "lasso",
                                        weights=weights,
                                        verbose=1,
                                        **params)

    alphas2, coefs2, gaps2 = celer_path(X / weights[None, :], y, "lasso",
                                        **params)

    assert_allclose(alphas1, alphas2)
    assert_allclose(coefs1, coefs2 / weights[:, None], atol=1e-4, rtol=1e-3)
    assert_array_less(gaps1, tol * norm(y)**2 / len(y))
    assert_array_less(gaps2, tol * norm(y)**2 / len(y))

    alpha = 0.001
    clf1 = Lasso(alpha=alpha, weights=weights, fit_intercept=False).fit(X, y)
    clf2 = Lasso(alpha=alpha, fit_intercept=False).fit(X / weights, y)

    assert_allclose(clf1.coef_, clf2.coef_ / weights)

    # weights must be > 0
    clf1.weights[0] = 0.
    np.testing.assert_raises(ValueError, clf1.fit, X=X, y=y)
Example #3
0
def test_group_lasso_lasso(sparse_X, fit_intercept, normalize):
    # check that group Lasso with groups of size 1 gives Lasso
    n_features = 1000
    X, y = build_dataset(n_samples=100,
                         n_features=n_features,
                         sparse_X=sparse_X)[:2]
    alpha_max = norm(X.T @ y, ord=np.inf) / len(y)
    alpha = alpha_max / 10
    clf = Lasso(alpha,
                tol=1e-12,
                fit_intercept=fit_intercept,
                normalize=normalize,
                verbose=0)
    clf.fit(X, y)
    # take groups of size 1:

    clf1 = GroupLasso(alpha=alpha,
                      groups=1,
                      tol=1e-12,
                      fit_intercept=fit_intercept,
                      normalize=normalize,
                      verbose=0)
    clf1.fit(X, y)

    np.testing.assert_allclose(clf1.coef_, clf.coef_, atol=1e-6)
    np.testing.assert_allclose(clf1.intercept_, clf.intercept_, rtol=1e-4)
Example #4
0
def test_weights_group_lasso():
    n_samples, n_features = 30, 50
    X, y = build_dataset(n_samples, n_features, sparse_X=True)

    groups = 5
    n_groups = n_features // groups
    np.random.seed(0)
    weights = np.abs(np.random.randn(n_groups))

    tol = 1e-14
    params = {'n_alphas': 10, 'tol': tol, 'verbose': 1}
    augmented_weights = np.repeat(weights, groups)

    alphas1, coefs1, gaps1 = celer_path(
        X, y, "grouplasso", groups=groups, weights=weights,
        eps=1e-2, **params)
    alphas2, coefs2, gaps2 = celer_path(
        X.multiply(1 / augmented_weights[None, :]), y, "grouplasso",
        groups=groups, eps=1e-2, **params)

    assert_allclose(alphas1, alphas2)
    assert_allclose(
        coefs1, coefs2 / augmented_weights[:, None], rtol=1e-3)
    assert_array_less(gaps1, tol * norm(y) ** 2 / len(y))
    assert_array_less(gaps2, tol * norm(y) ** 2 / len(y))
Example #5
0
def test_LogisticRegression(sparse_X):
    np.random.seed(1409)
    X, y = build_dataset(
        n_samples=30, n_features=60, sparse_X=sparse_X)
    y = np.sign(y)
    alpha_max = norm(X.T.dot(y), ord=np.inf) / 2
    C = 30. / alpha_max

    tol = 1e-8
    clf1 = LogisticRegression(C=C, tol=tol, verbose=2)
    clf1.fit(X, y)

    clf2 = sklearn_Logreg(
        C=C, penalty='l1', solver='liblinear', fit_intercept=False, tol=tol)
    clf2.fit(X, y)
    np.testing.assert_allclose(clf1.coef_, clf2.coef_, rtol=1e-3, atol=1e-5)

    # this uses float32 so we increase the tol else there are precision issues
    clf1.tol = 1e-4
    check_estimator(clf1)

    # multinomial test, need to have a slightly lower tol
    # for results to be comparable
    y = np.random.choice(4, len(y))
    clf3 = LogisticRegression(C=C, tol=tol, verbose=2)
    clf3.fit(X, y)

    clf4 = sklearn_Logreg(
        C=C, penalty='l1', solver='liblinear', fit_intercept=False, tol=tol)
    clf4.fit(X, y)
    np.testing.assert_allclose(clf3.coef_, clf4.coef_, rtol=1e-3, atol=1e-3)

    clf3.tol = 1e-3
    check_estimator(clf3)
Example #6
0
def test_celer_path(sparse_X, alphas, pb):
    """Test Lasso path convergence."""
    X, y = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)
    tol = 1e-6
    if pb == "logreg":
        y = np.sign(y)
        tol_scaled = tol * len(y) * np.log(2)
    else:
        tol_scaled = tol * norm(y)**2 / len(y)
    n_samples = X.shape[0]
    if alphas is not None:
        alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples
        n_alphas = 10
        alphas = alpha_max * np.logspace(0, -2, n_alphas)

    alphas, _, gaps, _, n_iters = celer_path(X,
                                             y,
                                             pb,
                                             alphas=alphas,
                                             tol=tol,
                                             return_thetas=True,
                                             verbose=1,
                                             return_n_iter=True)
    assert_array_less(gaps, tol_scaled)
    # hack because array_less wants strict inequality
    assert_array_less(0.99, n_iters)
Example #7
0
def test_celer_path_logreg(solver):
    X, y = build_dataset(n_samples=60, n_features=100, sparse_X=True)
    y = np.sign(y)
    alpha_max = norm(X.T.dot(y), ord=np.inf) / 2
    alphas = alpha_max * np.geomspace(1, 1e-2, 10)

    tol = 1e-8
    coefs, Cs, n_iters = _logistic_regression_path(X,
                                                   y,
                                                   Cs=1. / alphas,
                                                   fit_intercept=False,
                                                   penalty='l1',
                                                   solver='liblinear',
                                                   tol=tol)

    _, coefs_c, gaps = celer_path(X,
                                  y,
                                  "logreg",
                                  alphas=alphas,
                                  tol=tol,
                                  verbose=1,
                                  use_PN=(solver == "celer-pn"))

    np.testing.assert_array_less(gaps, tol)
    np.testing.assert_allclose(coefs != 0, coefs_c.T != 0)
    np.testing.assert_allclose(coefs, coefs_c.T, atol=1e-5, rtol=1e-3)
Example #8
0
def test_MultiTaskLassoCV():
    """Test that our MultitaskLassoCV behaves like sklearn's."""
    X, y = build_dataset(n_samples=30, n_features=50, n_targets=3)

    params = dict(eps=1e-2, n_alphas=10, tol=1e-10, cv=2, n_jobs=1,
                  fit_intercept=False, verbose=2)

    clf = MultiTaskLassoCV(**params)
    clf.fit(X, y)

    clf2 = sklearn_MultiTaskLassoCV(**params)
    clf2.max_iter = 10000  # increase max_iter bc of low tol
    clf2.fit(X, y)

    np.testing.assert_allclose(clf.mse_path_, clf2.mse_path_,
                               atol=1e-4, rtol=1e-04)
    np.testing.assert_allclose(clf.alpha_, clf2.alpha_,
                               atol=1e-4, rtol=1e-04)
    np.testing.assert_allclose(clf.coef_, clf2.coef_,
                               atol=1e-4, rtol=1e-04)

    # check_estimator tests float32 so using tol < 1e-7 causes precision
    # issues
    clf.tol = 1e-5
    check_estimator(clf)
Example #9
0
def test_infinite_weights(pb):
    n_samples, n_features = 50, 100
    X, y = build_dataset(n_samples, n_features)
    if pb == "logreg":
        y = np.sign(y)

    np.random.seed(1)
    weights = np.abs(np.random.randn(n_features))
    n_inf = n_features // 10
    inf_indices = np.random.choice(n_features, size=n_inf, replace=False)
    weights[inf_indices] = np.inf

    alpha = norm(X.T @ y / weights, ord=np.inf) / n_samples / 100

    tol = 1e-8
    _, coefs, dual_gaps = celer_path(X,
                                     y,
                                     pb=pb,
                                     alphas=[alpha],
                                     weights=weights,
                                     tol=tol)

    if pb == "logreg":
        assert_array_less(dual_gaps[0], tol * n_samples * np.log(2))
    else:
        assert_array_less(dual_gaps[0], tol * norm(y)**2 / 2.)

    assert_array_equal(coefs[inf_indices], 0)
Example #10
0
def test_dropin_logreg():
    np.random.seed(1409)
    check_estimator(LogisticRegression)
    X, y, _, _ = build_dataset(n_samples=100, n_features=100, sparse_X=True)
    y = np.sign(y)
    alpha_max = norm(X.T.dot(y), ord=np.inf) / 2
    C = 30. / alpha_max

    tol = 1e-8
    clf1 = LogisticRegression(C=C, tol=tol)
    clf1.fit(X, y)

    clf2 = sklearn_Logreg(C=C,
                          penalty='l1',
                          solver='liblinear',
                          fit_intercept=False,
                          tol=tol)
    clf2.fit(X, y)
    np.testing.assert_allclose(clf1.coef_, clf2.coef_, rtol=1e-3, atol=1e-5)

    # multinomial test:
    y = np.random.choice(4, len(y))
    clf3 = LogisticRegression(C=C, tol=tol)
    clf3.fit(X, y)

    clf4 = sklearn_Logreg(C=C,
                          penalty='l1',
                          solver='liblinear',
                          fit_intercept=False,
                          tol=tol)
    clf4.fit(X, y)
    np.testing.assert_allclose(clf3.coef_, clf4.coef_, rtol=1e-3, atol=1e-4)
Example #11
0
def test_group_lasso_path(sparse_X):
    n_features = 50
    X, y = build_dataset(
        n_samples=11, n_features=n_features, sparse_X=sparse_X)

    alphas, coefs, gaps = celer_path(
        X, y, "grouplasso", groups=5, eps=1e-2, n_alphas=10, tol=1e-8)
    tol = 1e-8
    np.testing.assert_array_less(gaps, tol)
Example #12
0
def test_celer_single_alpha(sparse_X, pb):
    X, y = build_dataset(n_samples=20, n_features=100, sparse_X=sparse_X)
    if pb == "logreg":
        y = np.sign(y)
    alpha_max = norm(X.T.dot(y), ord=np.inf) / X.shape[0]

    tol = 1e-6
    _, coefs, gaps = celer_path(X, y, pb, alphas=[alpha_max / 10.], tol=tol)
    np.testing.assert_array_less(gaps, tol)
Example #13
0
def test_check_weights():
    X, y = build_dataset(30, 42)
    weights = np.ones(X.shape[1] // 7)
    weights[0] = 0
    clf = GroupLasso(weights=weights, groups=7)  # groups of size 7
    # weights must be > 0
    np.testing.assert_raises(ValueError, clf.fit, X=X, y=y)
    # len(weights) must be equal to number of groups (6 here)
    clf.weights = np.ones(8)
    np.testing.assert_raises(ValueError, clf.fit, X=X, y=y)
Example #14
0
def test_GroupLasso(sparse_X):
    n_features = 50
    X, y = build_dataset(n_samples=11,
                         n_features=n_features,
                         sparse_X=sparse_X,
                         n_informative_features=n_features)[:2]

    tol = 1e-4
    clf = GroupLasso(alpha=0.01, groups=10, tol=tol)
    clf.fit(X, y)
    np.testing.assert_array_less(clf.dual_gap_, tol)
Example #15
0
def test_mtl_path():
    X, Y = build_dataset(n_targets=3)
    tol = 1e-10
    params = dict(eps=0.01, tol=tol, n_alphas=10)
    alphas, coefs, gaps = mtl_path(X, Y, **params)
    np.testing.assert_array_less(gaps, tol)

    sk_alphas, sk_coefs, sk_gaps = lasso_path(X, Y, **params, max_iter=10000)
    np.testing.assert_array_less(sk_gaps, tol * np.linalg.norm(Y, 'fro')**2)
    np.testing.assert_array_almost_equal(coefs, sk_coefs, decimal=5)
    np.testing.assert_allclose(alphas, sk_alphas)
Example #16
0
def test_zero_iter():
    X, y = build_dataset(n_samples=30, n_features=50)

    # convergence warning is raised bc we return -1 as gap
    with warnings.catch_warnings(record=True):
        assert_allclose(Lasso(max_iter=0).fit(X, y).coef_, 0)
        y = 2 * (y > 0) - 1
        assert_allclose(
            LogisticRegression(max_iter=0, solver="celer-pn").fit(X, y).coef_,
            0)
        assert_allclose(
            LogisticRegression(max_iter=0, solver="celer").fit(X, y).coef_, 0)
Example #17
0
def test_mtl():
    # n_samples, n_features = 30, 70
    # X, Y, _, _ = build_dataset(n_samples, n_features, n_targets=10)
    X, Y, _, _ = build_dataset(n_targets=10)
    tol = 1e-9
    alphas, coefs, gaps = mtl_path(X, Y, eps=1e-2, tol=tol)
    np.testing.assert_array_less(gaps, tol)

    sk_alphas, sk_coefs, sk_gaps = lasso_path(X, Y, eps=1e-2, tol=tol)
    np.testing.assert_array_less(sk_gaps, tol * np.linalg.norm(Y, 'fro')**2)
    np.testing.assert_array_almost_equal(coefs, sk_coefs, decimal=5)
    np.testing.assert_allclose(alphas, sk_alphas)
Example #18
0
def test_convergence_warning():
    X, y = build_dataset(n_samples=10, n_features=10)
    tol = -1  # gap canot be negative, a covnergence warning should be raised
    alpha_max = np.max(np.abs(X.T.dot(y))) / X.shape[0]
    clf = Lasso(alpha_max / 10, max_iter=1, max_epochs=100, tol=tol)

    with warnings.catch_warnings(record=True) as w:
        # Cause all warnings to always be triggered.
        warnings.simplefilter("always")
        clf.fit(X, y)
        assert len(w) == 1
        assert issubclass(w[-1].category, ConvergenceWarning)
Example #19
0
def test_GroupLassoCV(sparse_X):
    n_features = 50
    X, y = build_dataset(
        n_samples=11, n_features=n_features, sparse_X=sparse_X)

    tol = 1e-8
    clf = GroupLassoCV(groups=10, tol=tol)
    clf.fit(X, y)
    np.testing.assert_array_less(clf.dual_gap_, tol)

    clf.tol = 1e-6
    clf.groups = 1  # unsatisfying but sklearn will fit with 5 features
    check_estimator(clf)
Example #20
0
def test_GroupLasso(sparse_X):
    n_features = 50
    X, y = build_dataset(
        n_samples=11, n_features=n_features, sparse_X=sparse_X)

    tol = 1e-8
    clf = GroupLasso(alpha=0.8, groups=10, tol=tol)
    clf.fit(X, y)
    np.testing.assert_array_less(clf.dual_gap_, tol * norm(y) ** 2 / len(y))

    clf.tol = 1e-6
    clf.groups = 1  # unsatisfying but sklearn will fit out of 5 features
    check_estimator(clf)
Example #21
0
def test_celer_single_alpha(sparse_X, pb):
    X, y = build_dataset(n_samples=20, n_features=100, sparse_X=sparse_X)
    tol = 1e-6

    if pb == "logreg":
        y = np.sign(y)
        tol_scaled = tol * np.log(2) * len(y)
    else:
        tol_scaled = tol * norm(y)**2 / len(y)

    alpha_max = norm(X.T.dot(y), ord=np.inf) / X.shape[0]
    _, _, gaps = celer_path(X, y, pb, alphas=[alpha_max / 10.], tol=tol)
    assert_array_less(gaps, tol_scaled)
Example #22
0
def test_zero_column(sparse_X):
    X, y = build_dataset(n_samples=60, n_features=50, sparse_X=sparse_X)
    n_zero_columns = 20
    if sparse_X:
        X.data[:X.indptr[n_zero_columns]].fill(0.)
    else:
        X[:, :n_zero_columns].fill(0.)
    alpha_max = norm(X.T.dot(y), ord=np.inf) / X.shape[0]
    tol = 1e-6
    _, coefs, gaps = celer_path(
        X, y, "lasso", alphas=[alpha_max / 10.], tol=tol, p0=50, prune=0)
    w = coefs.T[0]
    np.testing.assert_array_less(gaps, tol)
    np.testing.assert_equal(w.shape[0], X.shape[1])
Example #23
0
def test_multinomial(sparse_X):
    np.random.seed(1409)
    X, y = build_dataset(n_samples=30, n_features=60, sparse_X=sparse_X)
    y = np.random.choice(4, len(y))
    tol = 1e-8
    clf = LogisticRegression(C=1, tol=tol, verbose=0)
    clf.fit(X, y)

    clf_sk = sklearn_Logreg(C=1,
                            penalty='l1',
                            solver='liblinear',
                            fit_intercept=False,
                            tol=tol)
    clf_sk.fit(X, y)
    assert_allclose(clf.coef_, clf_sk.coef_, rtol=1e-3, atol=1e-3)
Example #24
0
def test_celer_path_vs_lasso_path(sparse_X, prune):
    """Test that celer_path matches sklearn lasso_path."""
    X, y = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)

    tol = 1e-12
    params = dict(eps=1e-3, n_alphas=10, tol=tol)
    alphas1, coefs1, gaps1 = celer_path(
        X, y, "lasso", return_thetas=False, verbose=1, prune=prune,
        max_iter=30, **params)

    alphas2, coefs2, _ = lasso_path(X, y, verbose=False, **params,
                                    max_iter=10000)

    np.testing.assert_allclose(alphas1, alphas2)
    np.testing.assert_array_less(gaps1, tol)
    np.testing.assert_allclose(coefs1, coefs2, rtol=1e-03, atol=1e-5)
Example #25
0
def test_MultiTaskLasso(fit_intercept):
    """Test that our MultiTaskLasso behaves as sklearn's."""
    X, Y = build_dataset(n_samples=20, n_features=30, n_targets=10)
    alpha_max = np.max(norm(X.T.dot(Y), axis=1)) / X.shape[0]

    alpha = alpha_max / 2.
    params = dict(alpha=alpha, fit_intercept=fit_intercept, tol=1e-10)
    clf = MultiTaskLasso(**params)
    clf.verbose = 2
    clf.fit(X, Y)

    clf2 = sklearn_MultiTaskLasso(**params)
    clf2.fit(X, Y)
    np.testing.assert_allclose(clf.coef_, clf2.coef_, rtol=1e-5)
    if fit_intercept:
        np.testing.assert_allclose(clf.intercept_, clf2.intercept_)
Example #26
0
def test_celer_path_vs_lasso_path(sparse_X, prune):
    """Test that celer_path matches sklearn lasso_path."""
    X, y, _, _ = build_dataset(n_samples=30, n_features=50, sparse_X=sparse_X)

    params = dict(eps=1e-2, n_alphas=10, tol=1e-12)
    alphas1, coefs1, gaps1 = celer_path(X,
                                        y,
                                        "lasso",
                                        return_thetas=False,
                                        verbose=1,
                                        prune=prune,
                                        **params)

    alphas2, coefs2, gaps2 = lasso_path(X, y, verbose=False, **params)

    np.testing.assert_allclose(alphas1, alphas2)
    np.testing.assert_allclose(coefs1, coefs2, rtol=2e-03, atol=1e-4)
Example #27
0
def test_LassoCV(sparse_X, fit_intercept, positive):
    """Test that our LassoCV behaves like sklearn's LassoCV."""

    X, y = build_dataset(n_samples=20, n_features=30, sparse_X=sparse_X)
    params = dict(eps=0.05, n_alphas=10, tol=1e-10, cv=2,
                  fit_intercept=fit_intercept, positive=positive, verbose=2,
                  n_jobs=-1)

    clf = LassoCV(**params)
    clf.fit(X, y)

    clf2 = sklearn_LassoCV(**params, max_iter=10000)
    clf2.fit(X, y)

    np.testing.assert_allclose(clf.mse_path_, clf2.mse_path_, atol=1e-4)
    np.testing.assert_allclose(clf.alpha_, clf2.alpha_)
    np.testing.assert_allclose(clf.coef_, clf2.coef_, atol=1e-5)
Example #28
0
def test_dropin_MultiTaskLasso():
    """Test that our MultiTaskLasso class behaves as sklearn's."""
    X, Y, _, _ = build_dataset(n_samples=20, n_features=30, n_targets=10)
    alpha_max = np.max(norm(X.T.dot(Y), axis=1)) / X.shape[0]

    alpha = alpha_max / 2.
    params = dict(alpha=alpha, fit_intercept=False, tol=1e-10, normalize=True)
    clf = MultiTaskLasso(**params)
    clf.fit(X, Y)

    clf2 = sklearn_MultiTaskLasso(**params)
    clf2.fit(X, Y)
    np.testing.assert_allclose(clf.coef_, clf2.coef_, rtol=1e-5)
    # if fit_intercept:
    #     np.testing.assert_allclose(clf.intercept_, clf2.intercept_)

    check_estimator(MultiTaskLasso)
Example #29
0
def test_group_lasso_path(sparse_X):
    n_features = 50
    X, y = build_dataset(n_samples=11,
                         n_features=n_features,
                         sparse_X=sparse_X,
                         n_informative_features=n_features)[:2]

    alphas, coefs, gaps = celer_path(X,
                                     y,
                                     "grouplasso",
                                     groups=5,
                                     eps=1e-2,
                                     n_alphas=10,
                                     tol=1e-8)
    tol = 1e-8
    np.testing.assert_array_less(gaps, tol)

    check_estimator(GroupLasso)
Example #30
0
def test_warm_start():
    """Test Lasso path convergence."""
    X, y = build_dataset(n_samples=100, n_features=100, sparse_X=True)
    n_samples, n_features = X.shape
    alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples
    n_alphas = 10
    alphas = alpha_max * np.logspace(0, -2, n_alphas)

    reg1 = Lasso(tol=1e-6, warm_start=True, p0=10)
    reg1.coef_ = np.zeros(n_features)

    for alpha in alphas:
        reg1.set_params(alpha=alpha)
        reg1.fit(X, y)
        # refitting with warm start should take less than 2 iters:
        reg1.fit(X, y)
        # hack because assert_array_less does strict comparison...
        np.testing.assert_array_less(reg1.n_iter_, 2.01)