Exemple #1
0
def test_lasso_lars_vs_lasso_cd_early_stopping():
    # Test that LassoLars and Lasso using coordinate descent give the
    # same results when early stopping is used.
    # (test : before, in the middle, and in the last part of the path)
    alphas_min = [10, 0.9, 1e-4]

    for alpha_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X,
                                                       y,
                                                       method='lasso',
                                                       alpha_min=alpha_min)
        lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y)
        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert error < 0.01

    # same test, with normalization
    for alpha_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X,
                                                       y,
                                                       method='lasso',
                                                       alpha_min=alpha_min)
        lasso_cd = linear_model.Lasso(normalize=True, tol=1e-8)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y)
        error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert error < 0.01
Exemple #2
0
def test_lars_path_positive_constraint():
    # this is the main test for the positive parameter on the lars_path method
    # the estimator classes just make use of this function

    # we do the test on the diabetes dataset

    # ensure that we get negative coefficients when positive=False
    # and all positive when positive=True
    # for method 'lar' (default) and lasso

    err_msg = "Positive constraint not supported for 'lar' coding method."
    with pytest.raises(ValueError, match=err_msg):
        linear_model.lars_path(diabetes['data'],
                               diabetes['target'],
                               method='lar',
                               positive=True)

    method = 'lasso'
    _, _, coefs = \
        linear_model.lars_path(X, y, return_path=True, method=method,
                               positive=False)
    assert coefs.min() < 0

    _, _, coefs = \
        linear_model.lars_path(X, y, return_path=True, method=method,
                               positive=True)
    assert coefs.min() >= 0
Exemple #3
0
def test_all_precomputed():
    # Test that lars_path with precomputed Gram and Xy gives the right answer
    G = np.dot(X.T, X)
    Xy = np.dot(X.T, y)
    for method in 'lar', 'lasso':
        output = linear_model.lars_path(X, y, method=method)
        output_pre = linear_model.lars_path(X, y, Gram=G, Xy=Xy, method=method)
        for expected, got in zip(output, output_pre):
            assert_array_almost_equal(expected, got)
Exemple #4
0
def test_no_path():
    # Test that the ``return_path=False`` option returns the correct output
    alphas_, _, coef_path_ = linear_model.lars_path(X, y, method='lar')
    alpha_, _, coef = linear_model.lars_path(X,
                                             y,
                                             method='lar',
                                             return_path=False)

    assert_array_almost_equal(coef, coef_path_[:, -1])
    assert alpha_ == alphas_[-1]
Exemple #5
0
def test_no_path_precomputed():
    # Test that the ``return_path=False`` option with Gram remains correct
    alphas_, _, coef_path_ = linear_model.lars_path(X, y, method='lar', Gram=G)
    alpha_, _, coef = linear_model.lars_path(X,
                                             y,
                                             method='lar',
                                             Gram=G,
                                             return_path=False)

    assert_array_almost_equal(coef, coef_path_[:, -1])
    assert alpha_ == alphas_[-1]
Exemple #6
0
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
    # Test lasso lars on a very ill-conditioned design, and check that
    # it does not blow up, and stays somewhat close to a solution given
    # by the coordinate descent solver
    # Also test that lasso_path (using lars_path output style) gives
    # the same result as lars_path and previous lasso output style
    # under these conditions.
    rng = np.random.RandomState(42)

    # Generate data
    n, m = 70, 100
    k = 5
    X = rng.randn(n, m)
    w = np.zeros((m, 1))
    i = np.arange(0, m)
    rng.shuffle(i)
    supp = i[:k]
    w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
    y = np.dot(X, w)
    sigma = 0.2
    y += sigma * rng.rand(*y.shape)
    y = y.squeeze()
    lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method='lasso')

    _, lasso_coef2, _ = linear_model.lasso_path(X,
                                                y,
                                                alphas=lars_alphas,
                                                tol=1e-6,
                                                fit_intercept=False)

    assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
Exemple #7
0
def test_simple():
    # Principle of Lars is to keep covariances tied and decreasing

    # also test verbose output
    from io import StringIO
    import sys
    old_stdout = sys.stdout
    try:
        sys.stdout = StringIO()

        _, _, coef_path_ = linear_model.lars_path(X,
                                                  y,
                                                  method='lar',
                                                  verbose=10)

        sys.stdout = old_stdout

        for i, coef_ in enumerate(coef_path_.T):
            res = y - np.dot(X, coef_)
            cov = np.dot(X.T, res)
            C = np.max(abs(cov))
            eps = 1e-3
            ocur = len(cov[C - eps < abs(cov)])
            if i < X.shape[1]:
                assert ocur == i + 1
            else:
                # no more than max_pred variables can go into the active set
                assert ocur == X.shape[1]
    finally:
        sys.stdout = old_stdout
Exemple #8
0
def test_collinearity():
    # Check that lars_path is robust to collinearity in input
    X = np.array([[3., 3., 1.], [2., 2., 0.], [1., 1., 0]])
    y = np.array([1., 0., 0])
    rng = np.random.RandomState(0)

    f = ignore_warnings
    _, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01)
    assert not np.isnan(coef_path_).any()
    residual = np.dot(X, coef_path_[:, -1]) - y
    assert (residual**2).sum() < 1.  # just make sure it's bounded

    n_samples = 10
    X = rng.rand(n_samples, 5)
    y = np.zeros(n_samples)
    _, _, coef_path_ = linear_model.lars_path(X,
                                              y,
                                              Gram='auto',
                                              copy_X=False,
                                              copy_Gram=False,
                                              alpha_min=0.,
                                              method='lasso',
                                              verbose=0,
                                              max_iter=500)
    assert_array_almost_equal(coef_path_, np.zeros_like(coef_path_))
Exemple #9
0
def test_lars_path_gram_equivalent(method, return_path):
    _assert_same_lars_path_result(
        linear_model.lars_path_gram(Xy=Xy,
                                    Gram=G,
                                    n_samples=n_samples,
                                    method=method,
                                    return_path=return_path),
        linear_model.lars_path(X,
                               y,
                               Gram=G,
                               method=method,
                               return_path=return_path))
Exemple #10
0
def test_no_path_all_precomputed():
    # Test that the ``return_path=False`` option with Gram and Xy remains
    # correct
    X, y = 3 * diabetes.data, diabetes.target
    G = np.dot(X.T, X)
    Xy = np.dot(X.T, y)
    alphas_, _, coef_path_ = linear_model.lars_path(X,
                                                    y,
                                                    method='lasso',
                                                    Xy=Xy,
                                                    Gram=G,
                                                    alpha_min=0.9)
    alpha_, _, coef = linear_model.lars_path(X,
                                             y,
                                             method='lasso',
                                             Gram=G,
                                             Xy=Xy,
                                             alpha_min=0.9,
                                             return_path=False)

    assert_array_almost_equal(coef, coef_path_[:, -1])
    assert alpha_ == alphas_[-1]
Exemple #11
0
def test_lasso_lars_vs_lasso_cd():
    # Test that LassoLars and Lasso using coordinate descent give the
    # same results.
    X = 3 * diabetes.data

    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
    lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
    for c, a in zip(lasso_path.T, alphas):
        if a == 0:
            continue
        lasso_cd.alpha = a
        lasso_cd.fit(X, y)
        error = linalg.norm(c - lasso_cd.coef_)
        assert error < 0.01

    # similar test, with the classifiers
    for alpha in np.linspace(1e-2, 1 - 1e-2, 20):
        clf1 = linear_model.LassoLars(alpha=alpha, normalize=False).fit(X, y)
        clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8,
                                  normalize=False).fit(X, y)
        err = linalg.norm(clf1.coef_ - clf2.coef_)
        assert err < 1e-3

    # same test, with normalized data
    X = diabetes.data
    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
    lasso_cd = linear_model.Lasso(fit_intercept=False,
                                  normalize=True,
                                  tol=1e-8)
    for c, a in zip(lasso_path.T, alphas):
        if a == 0:
            continue
        lasso_cd.alpha = a
        lasso_cd.fit(X, y)
        error = linalg.norm(c - lasso_cd.coef_)
        assert error < 0.01
Exemple #12
0
def test_simple_precomputed():
    # The same, with precomputed Gram matrix

    _, _, coef_path_ = linear_model.lars_path(X, y, Gram=G, method='lar')

    for i, coef_ in enumerate(coef_path_.T):
        res = y - np.dot(X, coef_)
        cov = np.dot(X.T, res)
        C = np.max(abs(cov))
        eps = 1e-3
        ocur = len(cov[C - eps < abs(cov)])
        if i < X.shape[1]:
            assert ocur == i + 1
        else:
            # no more than max_pred variables can go into the active set
            assert ocur == X.shape[1]
Exemple #13
0
def test_lasso_path_return_models_vs_new_return_gives_same_coefficients():
    # Test that lasso_path with lars_path style output gives the
    # same result

    # Some toy data
    X = np.array([[1, 2, 3.1], [2.3, 5.4, 4.3]]).T
    y = np.array([1, 2, 3.1])
    alphas = [5., 1., .5]

    # Use lars_path and lasso_path(new output) with 1D linear interpolation
    # to compute the same path
    alphas_lars, _, coef_path_lars = lars_path(X, y, method='lasso')
    coef_path_cont_lars = interpolate.interp1d(alphas_lars[::-1],
                                               coef_path_lars[:, ::-1])
    alphas_lasso2, coef_path_lasso2, _ = lasso_path(X,
                                                    y,
                                                    alphas=alphas,
                                                    return_models=False)
    coef_path_cont_lasso = interpolate.interp1d(alphas_lasso2[::-1],
                                                coef_path_lasso2[:, ::-1])

    assert_array_almost_equal(coef_path_cont_lasso(alphas),
                              coef_path_cont_lars(alphas),
                              decimal=1)
Exemple #14
0
def test_lasso_lars_vs_lasso_cd_positive():
    # Test that LassoLars and Lasso using coordinate descent give the
    # same results when using the positive option

    # This test is basically a copy of the above with additional positive
    # option. However for the middle part, the comparison of coefficient values
    # for a range of alphas, we had to make an adaptations. See below.

    # not normalized data
    X = 3 * diabetes.data

    alphas, _, lasso_path = linear_model.lars_path(X,
                                                   y,
                                                   method='lasso',
                                                   positive=True)
    lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
    for c, a in zip(lasso_path.T, alphas):
        if a == 0:
            continue
        lasso_cd.alpha = a
        lasso_cd.fit(X, y)
        error = linalg.norm(c - lasso_cd.coef_)
        assert error < 0.01

    # The range of alphas chosen for coefficient comparison here is restricted
    # as compared with the above test without the positive option. This is due
    # to the circumstance that the Lars-Lasso algorithm does not converge to
    # the least-squares-solution for small alphas, see 'Least Angle Regression'
    # by Efron et al 2004. The coefficients are typically in congruence up to
    # the smallest alpha reached by the Lars-Lasso algorithm and start to
    # diverge thereafter.  See
    # https://gist.github.com/michigraber/7e7d7c75eca694c7a6ff

    for alpha in np.linspace(6e-1, 1 - 1e-2, 20):
        clf1 = linear_model.LassoLars(fit_intercept=False,
                                      alpha=alpha,
                                      normalize=False,
                                      positive=True).fit(X, y)
        clf2 = linear_model.Lasso(fit_intercept=False,
                                  alpha=alpha,
                                  tol=1e-8,
                                  normalize=False,
                                  positive=True).fit(X, y)
        err = linalg.norm(clf1.coef_ - clf2.coef_)
        assert err < 1e-3

    # normalized data
    X = diabetes.data
    alphas, _, lasso_path = linear_model.lars_path(X,
                                                   y,
                                                   method='lasso',
                                                   positive=True)
    lasso_cd = linear_model.Lasso(fit_intercept=False,
                                  normalize=True,
                                  tol=1e-8,
                                  positive=True)
    for c, a in zip(lasso_path.T[:-1], alphas[:-1]):  # don't include alpha=0
        lasso_cd.alpha = a
        lasso_cd.fit(X, y)
        error = linalg.norm(c - lasso_cd.coef_)
        assert error < 0.01
Exemple #15
0
def test_singular_matrix():
    # Test when input is a singular matrix
    X1 = np.array([[1, 1.], [1., 1.]])
    y1 = np.array([1, 1])
    _, _, coef_path = linear_model.lars_path(X1, y1)
    assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0]])
Exemple #16
0
def test_lasso_gives_lstsq_solution():
    # Test that Lars Lasso gives least square solution at the end
    # of the path
    _, _, coef_path_ = linear_model.lars_path(X, y, method='lasso')
    coef_lstsq = np.linalg.lstsq(X, y)[0]
    assert_array_almost_equal(coef_lstsq, coef_path_[:, -1])