def lasso_classification(table, alpha=0.3): ''' ''' from scikits.learn.linear_model import Lasso X = table[:, 1:] Y = table[:, 0] # n_samples, n_features = 50, 200 # X = np.random.randn(n_samples, n_features) # coef = 3*np.random.randn(n_features) # coef[10:] = 0 # sparsify coef # Y = np.dot(X, coef) # # # add noise # Y += 0.01*np.random.normal((n_samples,)) # Split data in train set and test set n_samples = X.shape[0] items = np.random.permutation(n_samples) training_items = items[:n_samples / 2] testing_items = items[n_samples / 2:] X_train, y_train = X[training_items], Y[training_items] X_test, y_test = X[testing_items], Y[testing_items] lasso = Lasso(alpha=alpha, fit_intercept=True) lasso_fit = lasso.fit(X_train, y_train) print lasso_fit.coef_ y_pred_lasso = lasso_fit.predict(X_test) y_collapsed = np.zeros_like(y_pred_lasso) collapsed_1 = y_pred_lasso >= 0.5 y_collapsed[collapsed_1] = 1 test = y_collapsed == y_test return float(test.sum()) / test.shape[0]
def compute_bench(alpha, n_samples, n_features): lasso_results = [] larslasso_results = [] larslasso_gram_results = [] n_tests = 1000 it = 0 for ns in n_samples: for nf in n_features: it += 1 print '==================' print 'Iteration %s of %s' % (it, max(len(n_samples), len(n_features))) print '==================' k = nf // 10 X, Y, X_test, Y_test, coef_ = make_data( n_samples=ns, n_tests=n_tests, n_features=nf, noise=0.1, k=k) X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data gc.collect() print "benching Lasso: " clf = Lasso(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y) lasso_results.append(time() - tstart) gc.collect() print "benching LassoLARS: " clf = LassoLARS(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y, normalize=False, precompute=False) larslasso_results.append(time() - tstart) gc.collect() print "benching LassoLARS (precomp. Gram): " clf = LassoLARS(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y, normalize=False, precompute=True) larslasso_gram_results.append(time() - tstart) return lasso_results, larslasso_results, larslasso_gram_results
def compute_bench(alpha, n_samples, n_features, precompute): lasso_results = [] larslasso_results = [] n_test_samples = 0 it = 0 for ns in n_samples: for nf in n_features: it += 1 print '==================' print 'Iteration %s of %s' % (it, max(len(n_samples), len(n_features))) print '==================' n_informative = nf // 10 X, Y, _, _, coef = make_regression_dataset( n_train_samples=ns, n_test_samples=n_test_samples, n_features=nf, noise=0.1, n_informative = n_informative) X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data gc.collect() print "- benching Lasso" clf = Lasso(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y, precompute=precompute) lasso_results.append(time() - tstart) gc.collect() print "- benching LassoLARS" clf = LassoLARS(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y, normalize=False, precompute=precompute) larslasso_results.append(time() - tstart) return lasso_results, larslasso_results
def compute_bench(alpha, n_samples, n_features, precompute): lasso_results = [] lars_lasso_results = [] n_test_samples = 0 it = 0 for ns in n_samples: for nf in n_features: it += 1 print '==================' print 'Iteration %s of %s' % (it, max(len(n_samples), len(n_features))) print '==================' n_informative = nf // 10 X, Y, coef_ = make_regression(n_samples=ns, n_features=nf, n_informative=n_informative, noise=0.1, coef=True) X /= np.sqrt(np.sum(X**2, axis=0)) # Normalize data gc.collect() print "- benching Lasso" clf = Lasso(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y, precompute=precompute) lasso_results.append(time() - tstart) gc.collect() print "- benching LassoLars" clf = LassoLars(alpha=alpha, fit_intercept=False) tstart = time() clf.fit(X, Y, normalize=False, precompute=precompute) lars_lasso_results.append(time() - tstart) return lasso_results, lars_lasso_results
y = np.dot(X, coef) # add noise y += 0.01*np.random.normal((n_samples,)) # Split data in train set and test set n_samples = X.shape[0] X_train, y_train = X[:n_samples/2], y[:n_samples/2] X_test, y_test = X[n_samples/2:], y[n_samples/2:] ################################################################################ # Lasso from scikits.learn.linear_model import Lasso alpha = 0.1 lasso = Lasso(alpha=alpha) y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test) print lasso print "r^2 on test data : %f" % (1 - np.linalg.norm(y_test - y_pred_lasso)**2 / np.linalg.norm(y_test)**2) ################################################################################ # ElasticNet from scikits.learn.linear_model import ElasticNet enet = ElasticNet(alpha=alpha, rho=0.7) y_pred_enet = enet.fit(X_train, y_train).predict(X_test) print enet print "r^2 on test data : %f" % (1 - np.linalg.norm(y_test - y_pred_enet)**2
from scikits.learn.linear_model.sparse import Lasso as SparseLasso from scikits.learn.linear_model import Lasso as DenseLasso ############################################################################### # The two Lasso implementations on Dense data print "--- Dense matrices" n_samples, n_features = 200, 10000 np.random.seed(0) y = np.random.randn(n_samples) X = np.random.randn(n_samples, n_features) alpha = 1 sparse_lasso = SparseLasso(alpha=alpha, fit_intercept=False) dense_lasso = DenseLasso(alpha=alpha, fit_intercept=False) t0 = time() sparse_lasso.fit(X, y, max_iter=1000) print "Sparse Lasso done in %fs" % (time() - t0) t0 = time() dense_lasso.fit(X, y, max_iter=1000) print "Dense Lasso done in %fs" % (time() - t0) print "Distance between coefficients : %s" % linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_) ############################################################################### # The two Lasso implementations on Sparse data print "--- Sparse matrices"
from scikits.learn.linear_model.sparse import Lasso as SparseLasso from scikits.learn.linear_model import Lasso as DenseLasso ############################################################################### # The two Lasso implementations on Dense data print "--- Dense matrices" n_samples, n_features = 200, 10000 np.random.seed(0) y = np.random.randn(n_samples) X = np.random.randn(n_samples, n_features) alpha = 1 sparse_lasso = SparseLasso(alpha=alpha, fit_intercept=False) dense_lasso = DenseLasso(alpha=alpha, fit_intercept=False) t0 = time() sparse_lasso.fit(X, y, maxit=1000) print "Sparse Lasso done in %fs" % (time() - t0) t0 = time() dense_lasso.fit(X, y, maxit=1000) print "Dense Lasso done in %fs" % (time() - t0) print "Distance between coefficients : %s" % linalg.norm(sparse_lasso.coef_ - dense_lasso.coef_) ############################################################################### # The two Lasso implementations on Sparse data print "--- Sparse matrices"
y = np.dot(X, coef) # add noise y += 0.01 * np.random.normal((n_samples, )) # Split data in train set and test set n_samples = X.shape[0] X_train, y_train = X[:n_samples / 2], y[:n_samples / 2] X_test, y_test = X[n_samples / 2:], y[n_samples / 2:] ################################################################################ # Lasso from scikits.learn.linear_model import Lasso alpha = 0.1 lasso = Lasso(alpha=alpha) y_pred_lasso = lasso.fit(X_train, y_train).predict(X_test) print lasso print "r^2 on test data : %f" % ( 1 - np.linalg.norm(y_test - y_pred_lasso)**2 / np.linalg.norm(y_test)**2) ################################################################################ # ElasticNet from scikits.learn.linear_model import ElasticNet enet = ElasticNet(alpha=alpha, rho=0.7) y_pred_enet = enet.fit(X_train, y_train).predict(X_test) print enet print "r^2 on test data : %f" % (
def fitRateSpectrum(Times, Data, Rates, w, Lnorm='ridge', standardizeData=True, CalcNdof=False, rho=0.5): """Using pseudo-inverse, with Tikhonov regularization (w parameter) to solve the inverse lapace tranform. Returns coefficients A_k, residual sum of squares (rss), and number of degrees of freedom, for each relaxation rate. """ if Lnorm == 'lasso': # Use L1-norm Lasso regression try: from scikits.learn.linear_model import Lasso except: print 'Error: could NOT import Lasso from scikits.learn.linear_model. Using L2 norm (ridge).' Lnorm = 'ridge' if Lnorm == 'enet': # Use L1-L2-mixture norm Lasso regression try: from scikits.learn.linear_model import ElasticNet except: print 'Error: could NOT import ElasticNet from scikits.learn.linear_model. Using L2 norm (ridge).' Lnorm = 'ridge' if Lnorm == 'lasso': lasso = Lasso(alpha = w, fit_intercept=False) # assume the data is already "centered" -- i.e. no zero rate X, Xmean = Xsubmatrix(Rates, Times, standardizeData=standardizeData) #print 'X.shape', X.shape, 'Data.shape', Data.shape lasso.fit(X, Data, max_iter=1e6, tol=1e-7) A = lasso.coef_ # Compute "residual sum of squares" (note loss function is different for L1-norm) y_pred_lasso = lasso.predict(X) diff = y_pred_lasso - Data elif Lnorm == 'enet': # NOTE: The convention for rho is backwards in scikits.learn, instead of rho we must send (1-rho) enet = ElasticNet(alpha = w, rho=(1.-rho), fit_intercept=False) # assume the data is already "centered" -- i.e. no zero rate X, Xmean = Xsubmatrix(Rates, Times, standardizeData=standardizeData) #print 'X.shape', X.shape, 'Data.shape', Data.shape #enet.fit(X, Data, max_iter=1e6, tol=1e-7) enet.fit(X, Data, max_iter=1e6, tol=1e-3) # for testing A = enet.coef_ # Compute "residual sum of squares" (note loss function is different for L1-norm) y_pred_enet = enet.predict(X) diff = y_pred_enet - Data elif Lnorm == 'ridge': X, Xmean = Xmatrix(Rates, Times, w, standardizeData=standardizeData ) Xinv = linalg.pinv(X) y = np.array( Data.tolist() + [0. for k in Rates] ) if standardizeData: y - y.mean() A = np.dot(Xinv, y) # Compute "residual sum of squares" (note loss function is different for L1-norm) diff = SumSpectra(A, Rates, Times) - Data rss = np.dot(diff,diff) # Residual sum of squares if CalcNdof: Xsub, Xmean = Xsubmatrix(Rates, Times, standardizeData=standardizeData) XT = np.transpose(Xsub) I_XT = np.eye(XT.shape[0]) I_X = np.eye(Xsub.shape[0]) Xtemp = np.dot(Xsub, np.linalg.inv(np.dot(XT,Xsub) + w*I_XT)) ndof = np.trace(I_X - np.dot(Xtemp,XT)) else: ndof = None return A, rss, ndof