from scikits.learn.glm.sparse import Lasso as SparseLasso from scikits.learn.glm import Lasso as DenseLasso ############################################################################### # The two Lasso implementations on Dense data print "--- Dense matrices" n_samples, n_features = 200, 10000 np.random.seed(0) y = np.random.randn(n_samples) X = np.random.randn(n_samples, n_features) alpha = 1 sparse_lasso = SparseLasso(alpha=alpha, fit_intercept=False) dense_lasso = DenseLasso(alpha=alpha, fit_intercept=False) t0 = time() sparse_lasso.fit(X, y, maxit=1000) print "Sparse Lasso done in %fs" % (time() - t0) t0 = time() dense_lasso.fit(X, y, maxit=1000) print "Dense Lasso done in %fs" % (time() - t0) print "Distance between coefficients : %s" % linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_) ############################################################################### # The two Lasso implementations on Sparse data print "--- Sparse matrices"
y = np.dot(X, coef) # add noise y += 0.01*np.random.normal((n_samples,)) # Split data in train set and test set n_samples = X.shape[0] X_train, y_train = X[:n_samples/2], y[:n_samples/2] X_test, y_test = X[n_samples/2:], y[n_samples/2:] ################################################################################ # Lasso from scikits.learn.glm import Lasso alpha = 0.1 lasso = Lasso(alpha=alpha) y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test) print lasso print "r^2 on test data : %f" % (1 - np.linalg.norm(y_test - y_pred_lasso)**2 / np.linalg.norm(y_test)**2) ################################################################################ # ElasticNet from scikits.learn.glm import ElasticNet enet = ElasticNet(alpha=alpha, rho=0.7) y_pred_enet = enet.fit(X_train, y_train).predict(X_test) print enet print "r^2 on test data : %f" % (1 - np.linalg.norm(y_test - y_pred_enet)**2
def test_lasso_toy(): """ Test Lasso on a toy example for various values of alpha. When validating this against glmnet notice that glmnet divides it against nobs. """ X = [[-1], [0], [1]] Y = [-1, 0, 1] # just a straight line T = [[2], [3], [4]] # test sample clf = Lasso(alpha=0) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [1]) assert_array_almost_equal(pred, [2, 3, 4]) assert_almost_equal(clf.dual_gap_, 0) clf = Lasso(alpha=0.1) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [.85]) assert_array_almost_equal(pred, [ 1.7 , 2.55, 3.4 ]) assert_almost_equal(clf.dual_gap_, 0) clf = Lasso(alpha=0.5) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [.25]) assert_array_almost_equal(pred, [0.5, 0.75, 1.]) assert_almost_equal(clf.dual_gap_, 0) clf = Lasso(alpha=1) clf.fit(X, Y) pred = clf.predict(T) assert_array_almost_equal(clf.coef_, [.0]) assert_array_almost_equal(pred, [0, 0, 0]) assert_almost_equal(clf.dual_gap_, 0)