def test_lasso_cv(): X, y, X_test, y_test = build_dataset() max_iter = 150 clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) # Check that the lars and the coordinate descent implementation # select a similar alpha lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y) # for this we check that they don't fall in the grid of # clf.alphas further than 1 assert_true(np.abs( np.searchsorted(clf.alphas_[::-1], lars.alpha_) - np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1) # check that they also give a similar MSE mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.mse_path_.T) np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(), clf.mse_path_[5].mean(), significant=2) # test set assert_greater(clf.score(X_test, y_test), 0.99)
class LassoCVImpl(): def __init__(self, eps=0.001, n_alphas=100, alphas=None, fit_intercept=True, normalize=False, precompute='auto', max_iter=1000, tol=0.0001, copy_X=True, cv=3, verbose=False, n_jobs=None, positive=False, random_state=None, selection='cyclic'): self._hyperparams = { 'eps': eps, 'n_alphas': n_alphas, 'alphas': alphas, 'fit_intercept': fit_intercept, 'normalize': normalize, 'precompute': precompute, 'max_iter': max_iter, 'tol': tol, 'copy_X': copy_X, 'cv': cv, 'verbose': verbose, 'n_jobs': n_jobs, 'positive': positive, 'random_state': random_state, 'selection': selection} def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def predict(self, X): return self._sklearn_model.predict(X)
def test_lasso_cv(): X, y, X_test, y_test = build_dataset() max_iter = 150 clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) # Check that the lars and the coordinate descent implementation # select a similar alpha lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y) # for this we check that they don't fall in the grid of # clf.alphas further than 1 assert_true(np.abs( np.searchsorted(clf.alphas_[::-1], lars.alpha_) - np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1) # check that they also give a similar MSE mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.cv_mse_path_.T) np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(), clf.mse_path_[5].mean(), significant=2) # test set assert_greater(clf.score(X_test, y_test), 0.99)
def test_1d_multioutput_lasso_and_multitask_lasso_cv(): X, y, _, _ = build_dataset(n_features=10) y = y[:, np.newaxis] clf = LassoCV(n_alphas=5, eps=2e-3) clf.fit(X, y[:, 0]) clf1 = MultiTaskLassoCV(n_alphas=5, eps=2e-3) clf1.fit(X, y) assert_almost_equal(clf.alpha_, clf1.alpha_) assert_almost_equal(clf.coef_, clf1.coef_[0]) assert_almost_equal(clf.intercept_, clf1.intercept_[0])
def test_lasso_path(): X, y, X_test, y_test = build_dataset() max_iter = 50 clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y) assert_almost_equal(clf.alpha, 0.011, 2) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha, 0.011, 2) # test set assert clf.score(X_test, y_test) > 0.85
def test_lasso_cv_positive_constraint(): X, y, X_test, y_test = build_dataset() max_iter = 500 # Ensure the unconstrained fit has a negative coefficient clf_unconstrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, cv=2, n_jobs=1) clf_unconstrained.fit(X, y) assert_true(min(clf_unconstrained.coef_) < 0) # On same data, constrained fit has non-negative coefficients clf_constrained = LassoCV(n_alphas=3, eps=1e-1, max_iter=max_iter, positive=True, cv=2, n_jobs=1) clf_constrained.fit(X, y) assert_true(min(clf_constrained.coef_) >= 0)
def test_lassoCV_does_not_set_precompute(monkeypatch, precompute, inner_precompute): X, y, _, _ = build_dataset() calls = 0 class LassoMock(Lasso): def fit(self, X, y): super().fit(X, y) nonlocal calls calls += 1 assert self.precompute == inner_precompute monkeypatch.setattr("sklearn.linear_model.coordinate_descent.Lasso", LassoMock) clf = LassoCV(precompute=precompute) clf.fit(X, y) assert calls > 0
def test_lasso_path(): # build an ill-posed linear regression problem with many noisy features and # comparatively few samples n_samples, n_features, max_iter = 50, 200, 30 random_state = np.random.RandomState(0) w = random_state.randn(n_features) w[10:] = 0.0 # only the top 10 features are impacting the model X = random_state.randn(n_samples, n_features) y = np.dot(X, w) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y) assert_almost_equal(clf.alpha, 0.011, 2) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha, 0.011, 2) # test set X_test = random_state.randn(n_samples, n_features) y_test = np.dot(X_test, w) assert clf.score(X_test, y_test) > 0.85
lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25)) for i in range(n_rows): X_ = conn_data[groups==i,:] y_ = y[groups==i] enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25)) lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25)) lassocv.fit(X_, y_) enetcv.fit(X_, y_) f = pl.figure() a = f.add_subplot(211) pl.plot(lassocv.coef_, c=color[i], label=labels_group[i]) a = f.add_subplot(212) pl.plot(enetcv.coef_, c=color[i], label=labels_group[i]) ################################################## permut_ = [] for i in np.arange(1000): y_permuted = permutation(y) cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25)
cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25)) lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y), n_iter=50, test_size=0.25)) for i in range(n_rows): X_ = conn_data[groups == i, :] y_ = y[groups == i] enetcv = ElasticNetCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25)) lassocv = LassoCV(alphas=np.linspace(1, 0.05, 50), cv=ShuffleSplit(len(y_), n_iter=50, test_size=0.25)) lassocv.fit(X_, y_) enetcv.fit(X_, y_) f = pl.figure() a = f.add_subplot(211) pl.plot(lassocv.coef_, c=color[i], label=labels_group[i]) a = f.add_subplot(212) pl.plot(enetcv.coef_, c=color[i], label=labels_group[i]) ################################################## permut_ = [] for i in np.arange(1000): y_permuted = permutation(y) cv = ShuffleSplit(len(y), n_iter=50, test_size=0.25)