Example #1
0
def test_singular_matrix():
    """
    Test when input is a singular matrix
    """
    X1 = np.array([[1, 1.], [1., 1.]])
    y1 = np.array([1, 1])
    alphas, active, coef_path = linear_model.lars_path(X1, y1)
    assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0], [1, 0]])
def test_lasso_gives_lstsq_solution():
    """
    Test that Lars Lasso gives least square solution at the end
    of the path
    """
    alphas_, active, coef_path_ = linear_model.lars_path(X, y, method="lasso")
    coef_lstsq = np.linalg.lstsq(X, y)[0]
    assert_array_almost_equal(coef_lstsq, coef_path_[:, -1])
def test_singular_matrix():
    """
    Test when input is a singular matrix
    """
    X1 = np.array([[1, 1.], [1., 1.]])
    y1 = np.array([1, 1])
    alphas, active, coef_path = linear_model.lars_path(X1, y1)
    assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0], [1, 0]])
Example #4
0
def test_lasso_gives_lstsq_solution():
    """
    Test that LARS Lasso gives least square solution at the end
    of the path
    """

    alphas_, active, coef_path_ = linear_model.lars_path(X, y, method="lasso")
    coef_lstsq = np.linalg.lstsq(X, y)[0]
    assert_array_almost_equal(coef_lstsq, coef_path_[:, -1])
Example #5
0
def test_collinearity():
    """Check that lars_path is robust to collinearity in input"""

    X = np.array([[3., 3., 1.], [2., 2., 0.], [1., 1., 0]])
    y = np.array([1., 0., 0])

    _, _, coef_path_ = linear_model.lars_path(X, y)
    assert (not np.isnan(coef_path_).any())
    assert_array_almost_equal(np.dot(X, coef_path_[:, -1]), y)
Example #6
0
def test_collinearity():
    """Check that lars_path is robust to collinearity in input"""

    X = np.array([[3.0, 3.0, 1.0], [2.0, 2.0, 0.0], [1.0, 1.0, 0]])
    y = np.array([1.0, 0.0, 0])

    _, _, coef_path_ = linear_model.lars_path(X, y)
    assert_(not np.isnan(coef_path_).any())
    assert_array_almost_equal(np.dot(X, coef_path_[:, -1]), y)
Example #7
0
def test_lars_lstsq():
    """
    Test that LARS gives least square solution at the end
    of the path
    """
    # test that it arrives to a least squares solution
    alphas_, active, coef_path_ = linear_model.lars_path(diabetes.data, diabetes.target, method="lar")
    coef_lstsq = np.linalg.lstsq(X, y)[0]
    assert_array_almost_equal(coef_path_.T[-1], coef_lstsq)
Example #8
0
def test_lars_lstsq():
    """
    Test that LARS gives least square solution at the end
    of the path
    """
    # test that it arrives to a least squares solution
    alphas_, active, coef_path_ = linear_model.lars_path(diabetes.data, diabetes.target,
                                                                method="lar")
    coef_lstsq = np.linalg.lstsq(X, y)[0]
    assert_array_almost_equal(coef_path_.T[-1], coef_lstsq)
Example #9
0
def test_lasso_lars_vs_lasso_cd(verbose=False):
    """
    Test that LassoLars and Lasso using coordinate descent give the
    same results
    """
    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
    lasso_cd = linear_model.Lasso(fit_intercept=False)
    for (c, a) in zip(lasso_path.T, alphas):
        lasso_cd.alpha = a
        lasso_cd.fit(X, y, tol=1e-8)
        error = np.linalg.norm(c - lasso_cd.coef_)
        assert error < 0.01
Example #10
0
def test_lasso_lars_vs_lasso_cd(verbose=False):
    """
    Test that LassoLars and Lasso using coordinate descent give the
    same results
    """
    alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso")
    lasso_cd = linear_model.Lasso(fit_intercept=False)
    for (c, a) in zip(lasso_path.T, alphas):
        lasso_cd.alpha = a
        lasso_cd.fit(X, y, tol=1e-8)
        error = np.linalg.norm(c - lasso_cd.coef_)
        assert error < 0.01
Example #11
0
def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
    """
    Test that LassoLars and Lasso using coordinate descent give the
    same results when early stopping is used.
    (test : before, in the middle, and in the last part of the path)
    """
    alphas_min = [10, 0.9, 1e-4]
    for alphas_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso", alpha_min=0.9)
        lasso_cd = linear_model.Lasso(fit_intercept=False)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y, tol=1e-8)
        error = np.linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
        assert error < 0.01
Example #12
0
def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False):
    """
    Test that LassoLars and Lasso using coordinate descent give the
    same results when early stopping is used.
    (test : before, in the middle, and in the last part of the path)
    """
    alphas_min = [10, 0.9, 1e-4]
    for alphas_min in alphas_min:
        alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso',
                                                    alpha_min=0.9)
        lasso_cd = linear_model.Lasso(fit_intercept=False)
        lasso_cd.alpha = alphas[-1]
        lasso_cd.fit(X, y, tol=1e-8)
        error = np.linalg.norm(lasso_path[:,-1] - lasso_cd.coef_)
        assert error < 0.01
Example #13
0
def test_simple():
    """
    Principle of LARS is to keep covariances tied and decreasing
    """

    alphas_, active, coef_path_ = linear_model.lars_path(diabetes.data, diabetes.target, method="lar")

    for (i, coef_) in enumerate(coef_path_.T):
        res = y - np.dot(X, coef_)
        cov = np.dot(X.T, res)
        C = np.max(abs(cov))
        eps = 1e-3
        ocur = len(cov[C - eps < abs(cov)])
        if i < X.shape[1]:
            assert ocur == i + 1
        else:
            # no more than max_pred variables can go into the active set
            assert ocur == X.shape[1]
Example #14
0
def test_lasso_lars_vs_lasso_cd(verbose=False):
    """
    Test that LassoLars and Lasso using coordinate descent give the
    same results
    """
    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
    lasso_cd = linear_model.Lasso(fit_intercept=False)
    for (c, a) in zip(lasso_path.T, alphas):
        lasso_cd.alpha = a
        lasso_cd.fit(X, y, tol=1e-8)
        error = np.linalg.norm(c - lasso_cd.coef_)
        assert error < 0.01

    # similar test, with the classifiers
    for alpha in np.linspace(1e-2, 1 - 1e-2):
        clf1 = linear_model.LassoLARS(alpha=alpha).fit(X, y)
        clf2 = linear_model.Lasso(alpha=alpha).fit(X, y, tol=1e-8)
        err = np.linalg.norm(clf1.coef_ - clf2.coef_)
        assert err < 1e-3
Example #15
0
def test_lasso_lars_vs_lasso_cd(verbose=False):
    """
    Test that LassoLars and Lasso using coordinate descent give the
    same results
    """
    alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso')
    lasso_cd = linear_model.Lasso(fit_intercept=False)
    for (c, a) in zip(lasso_path.T, alphas):
        lasso_cd.alpha = a
        lasso_cd.fit(X, y, tol=1e-8)
        error = np.linalg.norm(c - lasso_cd.coef_)
        assert error < 0.01

    # similar test, with the classifiers
    for alpha in np.linspace(1e-2, 1 - 1e-2):
       clf1 = linear_model.LassoLARS(alpha=alpha).fit(X, y)
       clf2 = linear_model.Lasso(alpha=alpha).fit(X, y, tol=1e-8)
       err = np.linalg.norm(clf1.coef_ - clf2.coef_)
       assert err < 1e-3
Example #16
0
def test_simple():
    """
    Principle of LARS is to keep covariances tied and decreasing
    """

    alphas_, active, coef_path_ = linear_model.lars_path(
        diabetes.data, diabetes.target, method="lar")

    for (i, coef_) in enumerate(coef_path_.T):
        res =  y - np.dot(X, coef_)
        cov = np.dot(X.T, res)
        C = np.max(abs(cov))
        eps = 1e-3
        ocur = len(cov[ C - eps < abs(cov)])
        if i < X.shape[1]:
            assert ocur == i+1
        else:
            # no more than max_pred variables can go into the active set
            assert ocur == X.shape[1]
Example #17
0
def test_simple_precomputed():
    """
    The same, with precomputed Gram matrix
    """

    G = np.dot(diabetes.data.T, diabetes.data)
    alphas_, active, coef_path_ = linear_model.lars_path(diabetes.data, diabetes.target, Gram=G, method="lar")

    for (i, coef_) in enumerate(coef_path_.T):
        res = y - np.dot(X, coef_)
        cov = np.dot(X.T, res)
        C = np.max(abs(cov))
        eps = 1e-3
        ocur = len(cov[C - eps < abs(cov)])
        if i < X.shape[1]:
            assert ocur == i + 1
        else:
            # no more than max_pred variables can go into the active set
            assert ocur == X.shape[1]
Example #18
0
def test_simple_precomputed():
    """
    The same, with precomputed Gram matrix
    """

    G = np.dot (diabetes.data.T, diabetes.data)
    alphas_, active, coef_path_ = linear_model.lars_path(
        diabetes.data, diabetes.target, Gram=G, method="lar")
    
    for (i, coef_) in enumerate(coef_path_.T):
        res =  y - np.dot(X, coef_)
        cov = np.dot(X.T, res)
        C = np.max(abs(cov))
        eps = 1e-3
        ocur = len(cov[ C - eps < abs(cov)])
        if i < X.shape[1]:
            assert ocur == i+1
        else:
            # no more than max_pred variables can go into the active set
            assert ocur == X.shape[1]
Example #19
0
def compute_bench(samples_range, features_range):

    it = 0

    results = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print '===================='
            print 'Iteration %03d of %03d' % (it, max_it)
            print '===================='
            dataset_kwargs = {
                'n_train_samples': n_samples,
                'n_test_samples': 2,
                'n_features': n_features,
                'n_informative': n_features / 10,
                'effective_rank': min(n_samples, n_features) / 10,
                #'effective_rank': None,
                'bias': 0.0,
            }
            print "n_samples: %d" % n_samples
            print "n_features: %d" % n_features
            X, y, _, _, _ = make_regression_dataset(**dataset_kwargs)

            gc.collect()
            print "benching lars_path (with Gram):",
            sys.stdout.flush()
            tstart = time()
            G = np.dot(X.T, X)  # precomputed Gram matrix
            Xy = np.dot(X.T, y)
            lars_path(X, y, Xy=Xy, Gram=G, method='lasso')
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lars_path (with Gram)'].append(delta)

            gc.collect()
            print "benching lars_path (without Gram):",
            sys.stdout.flush()
            tstart = time()
            lars_path(X, y, method='lasso')
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lars_path (without Gram)'].append(delta)

            gc.collect()
            print "benching lasso_path (with Gram):",
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=True)
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lasso_path (with Gram)'].append(delta)

            gc.collect()
            print "benching lasso_path (without Gram):",
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=False)
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lasso_path (without Gram)'].append(delta)

    return results
Example #20
0
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

# add garbage features
rng = np.random.RandomState(42)
X = np.c_[X, rng.randn(X.shape[0], 10)]
n_samples = X.shape[0]

# Standardize the data to avoid intercept problems
y -= np.mean(y)
X -= np.mean(X, axis=0)
X /= np.std(X, axis=0)

print "Computing regularization path using the LARS ..."
alphas, features, coefs = lars_path(X, y, method='lasso', verbose=True)

###############################################################################
# BIC and AIC
K_aic = 2  # AIC
K_bic = log(n_samples)  # BIC

R = y[:, np.newaxis] - np.dot(X, coefs)  # residuals
mse = np.sum(R ** 2, axis=0)  # MSE ie. mean square error

df = np.zeros(coefs.shape[1], dtype=np.int)  # Degrees of freedom
for k, coef in enumerate(coefs.T):
    mask = coef != 0
    if not np.any(mask):
        continue
    Xc = X[:, mask]
Example #21
0
print __doc__

# Author: Fabian Pedregosa <*****@*****.**>
#         Alexandre Gramfort <*****@*****.**>
# License: BSD Style.

import numpy as np
import pylab as pl

from scikits.learn import linear_model
from scikits.learn import datasets

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

print "Computing regularization path using the LARS ..."
_, _, coefs_ = linear_model.lars_path(X, y, method='lasso', verbose=True)

xx = np.sum(np.abs(coefs_.T), axis=1)
xx /= xx[-1]
pl.plot(xx, coefs_.T)
ymin, ymax = pl.ylim()
pl.vlines(xx, ymin, ymax, linestyle='dashed')
pl.xlabel('|coef| / max|coef|')
pl.ylabel('Coefficients')
pl.title('LASSO Path')
pl.axis('tight')
pl.show()

def compute_bench(samples_range, features_range):

    it = 0

    results = defaultdict(lambda: [])

    max_it = len(samples_range) * len(features_range)
    for n_samples in samples_range:
        for n_features in features_range:
            it += 1
            print '===================='
            print 'Iteration %03d of %03d' % (it, max_it)
            print '===================='
            dataset_kwargs = {
                'n_train_samples': n_samples,
                'n_test_samples': 2,
                'n_features': n_features,
                'n_informative': n_features / 10,
                'effective_rank': min(n_samples, n_features) / 10,
                #'effective_rank': None,
                'bias': 0.0,
            }
            print "n_samples: %d" % n_samples
            print "n_features: %d" % n_features
            X, y, _, _, _ = make_regression_dataset(**dataset_kwargs)

            gc.collect()
            print "benching lars_path (with Gram):",
            sys.stdout.flush()
            tstart = time()
            G = np.dot(X.T, X) # precomputed Gram matrix
            Xy = np.dot(X.T, y)
            lars_path(X, y, Xy=Xy, Gram=G, method='lasso')
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lars_path (with Gram)'].append(delta)

            gc.collect()
            print "benching lars_path (without Gram):",
            sys.stdout.flush()
            tstart = time()
            lars_path(X, y, method='lasso')
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lars_path (without Gram)'].append(delta)

            gc.collect()
            print "benching lasso_path (with Gram):",
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=True)
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lasso_path (with Gram)'].append(delta)

            gc.collect()
            print "benching lasso_path (without Gram):",
            sys.stdout.flush()
            tstart = time()
            lasso_path(X, y, precompute=False)
            delta = time() - tstart
            print "%0.3fs" % delta
            results['lasso_path (without Gram)'].append(delta)

    return results
# Author: Fabian Pedregosa <*****@*****.**>
#         Alexandre Gramfort <*****@*****.**>
# License: BSD Style.

import numpy as np
import pylab as pl

from scikits.learn import linear_model
from scikits.learn import datasets

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

print "Computing regularization path using the LARS ..."
alphas, _, coefs = linear_model.lars_path(X, y, method='lasso', verbose=True)

xx = np.sum(np.abs(coefs.T), axis=1)
xx /= xx[-1]

pl.plot(xx, coefs.T)
ymin, ymax = pl.ylim()
pl.vlines(xx, ymin, ymax, linestyle='dashed')
pl.xlabel('|coef| / max|coef|')
pl.ylabel('Coefficients')
pl.title('LASSO Path')
pl.axis('tight')
pl.show()

def compute_bench(samples_range, features_range):

    it = 0

    results = dict()
    lars = np.empty((len(features_range), len(samples_range)))
    lars_gram = lars.copy()
    omp = lars.copy()
    omp_gram = lars.copy()

    max_it = len(samples_range) * len(features_range)
    for i_s, n_samples in enumerate(samples_range):
        for i_f, n_features in enumerate(features_range):
            it += 1
            n_informative = n_features / 10
            print '===================='
            print 'Iteration %03d of %03d' % (it, max_it)
            print '===================='
            # dataset_kwargs = {
            #     'n_train_samples': n_samples,
            #     'n_test_samples': 2,
            #     'n_features': n_features,
            #     'n_informative': n_informative,
            #     'effective_rank': min(n_samples, n_features) / 10,
            #     #'effective_rank': None,
            #     'bias': 0.0,
            # }
            dataset_kwargs = {
                'n_samples': 1,
                'n_components': n_features,
                'n_features': n_samples,
                'n_nonzero_coefs': n_informative,
                'random_state': 0
            }
            print "n_samples: %d" % n_samples
            print "n_features: %d" % n_features
            y, X, _ = make_sparse_coded_signal(**dataset_kwargs)
            X = np.asfortranarray(X)

            gc.collect()
            print "benching lars_path (with Gram):",
            sys.stdout.flush()
            tstart = time()
            G = np.dot(X.T, X)  # precomputed Gram matrix
            Xy = np.dot(X.T, y)
            lars_path(X, y, Xy=Xy, Gram=G, max_iter=n_informative)
            delta = time() - tstart
            print "%0.3fs" % delta
            lars_gram[i_f, i_s] = delta

            gc.collect()
            print "benching lars_path (without Gram):",
            sys.stdout.flush()
            tstart = time()
            lars_path(X, y, Gram=None, max_iter=n_informative)
            delta = time() - tstart
            print "%0.3fs" % delta
            lars[i_f, i_s] = delta

            gc.collect()
            print "benching orthogonal_mp (with Gram):",
            sys.stdout.flush()
            tstart = time()
            orthogonal_mp(X, y, precompute_gram=True,
                          n_nonzero_coefs=n_informative)
            delta = time() - tstart
            print "%0.3fs" % delta
            omp_gram[i_f, i_s] = delta

            gc.collect()
            print "benching orthogonal_mp (without Gram):",
            sys.stdout.flush()
            tstart = time()
            orthogonal_mp(X, y, precompute_gram=False,
                          n_nonzero_coefs=n_informative)
            delta = time() - tstart
            print "%0.3fs" % delta
            omp[i_f, i_s] = delta

    results['time(LARS) / time(OMP)\n (w/ Gram)'] = (lars_gram / omp_gram)
    results['time(LARS) / time(OMP)\n (w/o Gram)'] = (lars / omp)
    return results
Example #25
0
from scikits.learn import linear_model
from scikits.learn import datasets

diabetes = datasets.load_diabetes()
X = diabetes.data
y = diabetes.target

X[:,6] *= -1 # To reproduce wikipedia LAR page

################################################################################
# Compute path functions

print "Computing regularization path using the LARS ..."
start = datetime.now()
_, _, coefs_ = linear_model.lars_path(X, y, max_features=10, method="lasso")
print "This took ", datetime.now() - start

###############################################################################
# Display path
xx = np.sum(np.abs(coefs_), axis=0)
xx /= xx[-1]
pl.plot(xx, coefs_.T)
ymin, ymax = pl.ylim()
pl.vlines(xx, ymin, ymax, linestyle='dashed')
pl.xlabel('|coef| / max|coef|')
pl.ylabel('Coefficients')
pl.title('Least Angle Regression (LAR) Path')
pl.axis('tight')
pl.show()