def test_lasso_lars_vs_lasso_cd(verbose=False): """ Test that LassoLars and Lasso using coordinate descent give the same results """ alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso') lasso_cd = linear_model.Lasso(fit_intercept=False) for (c, a) in zip(lasso_path.T, alphas): lasso_cd.alpha = a lasso_cd.fit(X, y, tol=1e-8) error = np.linalg.norm(c - lasso_cd.coef_) assert error < 0.01 # similar test, with the classifiers for alpha in np.linspace(1e-2, 1 - 1e-2): clf1 = linear_model.LassoLARS(alpha=alpha).fit(X, y) clf2 = linear_model.Lasso(alpha=alpha).fit(X, y, tol=1e-8) err = np.linalg.norm(clf1.coef_ - clf2.coef_) assert err < 1e-3
def test_lasso_lars_vs_lasso_cd(verbose=False): """ Test that LassoLars and Lasso using coordinate descent give the same results """ alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso') lasso_cd = linear_model.Lasso(fit_intercept=False) for (c, a) in zip(lasso_path.T, alphas): lasso_cd.alpha = a lasso_cd.fit(X, y, tol=1e-8) error = np.linalg.norm(c - lasso_cd.coef_) assert error < 0.01
def test_lasso_lars_vs_lasso_cd_early_stopping(verbose=False): """ Test that LassoLars and Lasso using coordinate descent give the same results when early stopping is used. (test : before, in the middle, and in the last part of the path) """ alphas_min = [10, 0.9, 1e-4] for alphas_min in alphas_min: alphas, _, lasso_path = linear_model.lars_path(X, y, method='lasso', alpha_min=0.9) lasso_cd = linear_model.Lasso(fit_intercept=False) lasso_cd.alpha = alphas[-1] lasso_cd.fit(X, y, tol=1e-8) error = np.linalg.norm(lasso_path[:,-1] - lasso_cd.coef_) assert error < 0.01
import numpy as np import pylab as pl from scikits.learn import cross_val, datasets, linear_model diabetes = datasets.load_diabetes() X = diabetes.data y = diabetes.target lasso = linear_model.Lasso() alphas = np.logspace(-4, -1, 20) scores = list() scores_std = list() for alpha in alphas: lasso.alpha = alpha this_scores = cross_val.cross_val_score(lasso, X, y, n_jobs=-1) scores.append(np.mean(this_scores)) scores_std.append(np.std(this_scores)) pl.figure(1, figsize=(2.5, 2)) pl.clf() pl.axes([.1, .25, .8, .7]) pl.semilogx(alphas, scores) pl.semilogx(alphas, np.array(scores) + np.array(scores_std) / 20, 'b--') pl.semilogx(alphas, np.array(scores) - np.array(scores_std) / 20, 'b--') pl.yticks(()) pl.ylabel('CV score') pl.xlabel('alpha')
rhos = [0] rc = '' best_alpha = 0 best_rho = 0 best_mean_rcor = 0 best_mean_max_pos = features best_mean_max_intersect = 0 for a, alpha in enumerate(ALPHA_VALUES): for r, rho in enumerate(rhos): if regressionModel == 'ElasticNet': model = lm.ElasticNet(alpha=alpha, rho=rho) elif regressionModel == 'Lasso': model = lm.Lasso(alpha=alpha) elif regressionModel == 'Ridge' and alpha != 0: model = lm.Ridge(alpha=alpha) for k in range(file_num): dy, dx = generate_data.gen_data(samples, features, impFeat) #dy, dx = genRedundantData(100, 6, 2, 2) examples, features = dx.shape (weights_iter, rcors, max_position_iter, intersect_size_iter, deltatime) = regBoost(dx, dy, model, bootstrap_num, impFeat) files_rcors[k, :] = rcors if np.mean(rcors) > best_mean_rcor: best_mean_rcor = np.mean(rcors)