def parallel_function( dataset_name, method, tol=1e-8, n_outer=15): # load data X, y = fetch_libsvm(dataset_name) # subsample the samples and the features n_samples, n_features = dict_subsampling[dataset_name] t_max = dict_t_max[dataset_name] # t_max = 3600 X, y = clean_dataset(X, y, n_samples, n_features) alpha_max, n_classes = get_alpha_max(X, y) log_alpha_max = np.log(alpha_max) # maybe to change alpha max value algo = ImplicitForward(None, n_iter_jac=2000) estimator = LogisticRegression( C=1, fit_intercept=False, warm_start=True, max_iter=30, verbose=False) model = SparseLogreg(estimator=estimator) idx_train, idx_val, idx_test = get_splits(X, y) logit_multiclass = LogisticMulticlass( idx_train, idx_val, algo, idx_test=idx_test) monitor = Monitor() if method == "implicit_forward": log_alpha0 = np.ones(n_classes) * np.log(0.1 * alpha_max) optimizer = LineSearch(n_outer=100) grad_search( algo, logit_multiclass, model, optimizer, X, y, log_alpha0, monitor) elif method.startswith(('random', 'bayesian')): max_evals = dict_max_eval[dataset_name] log_alpha_min = np.log(alpha_max) - 7 hyperopt_wrapper( algo, logit_multiclass, model, X, y, log_alpha_min, log_alpha_max, monitor, max_evals=max_evals, tol=tol, t_max=t_max, method=method, size_space=n_classes) elif method == 'grid_search': n_alphas = 20 p_alphas = np.geomspace(1, 0.001, n_alphas) p_alphas = np.tile(p_alphas, (n_classes, 1)) for i in range(n_alphas): log_alpha_i = np.log(alpha_max * p_alphas[:, i]) logit_multiclass.get_val( model, X, y, log_alpha_i, None, monitor, tol) monitor.times = np.array(monitor.times).copy() monitor.objs = np.array(monitor.objs).copy() monitor.acc_vals = np.array(monitor.acc_vals).copy() monitor.acc_tests = np.array(monitor.acc_tests).copy() monitor.log_alphas = np.array(monitor.log_alphas).copy() return ( dataset_name, method, tol, n_outer, monitor.times, monitor.objs, monitor.acc_vals, monitor.acc_tests, monitor.log_alphas, log_alpha_max, n_samples, n_features, n_classes)
def test_grad_search_custom(model, model_custom, crit): """check that the paths are the same in the line search""" n_outer = 5 criterion = HeldOutLogistic(idx_val, idx_val) monitor = Monitor() algo = ImplicitForward(tol_jac=tol, n_iter_jac=5000) grad_search(algo, criterion, model, X, y, log_alpha, monitor, n_outer=n_outer, tol=tol) criterion = HeldOutLogistic(idx_val, idx_val) monitor_custom = Monitor() algo = ImplicitForward(tol_jac=tol, n_iter_jac=5000) grad_search(algo, criterion, model_custom, X, y, log_alpha, monitor_custom, n_outer=n_outer, tol=tol) np.testing.assert_allclose(np.array(monitor.log_alphas), np.array(monitor_custom.log_alphas), atol=1e-3) np.testing.assert_allclose(np.array(monitor.grads), np.array(monitor_custom.grads), atol=1e-4) np.testing.assert_allclose(np.array(monitor.objs), np.array(monitor_custom.objs), atol=1e-5) assert not np.allclose(np.array(monitor.times), np.array(monitor_custom.times))
def test_grad_search(Optimizer, model, crit): """check that the paths are the same in the line search""" n_outer = 2 criterion = HeldOutMSE(idx_train, idx_val) monitor1 = Monitor() algo = Forward() optimizer = Optimizer(n_outer=n_outer, tol=1e-16) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor1) criterion = HeldOutMSE(idx_train, idx_val) monitor2 = Monitor() algo = Implicit() optimizer = Optimizer(n_outer=n_outer, tol=1e-16) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor2) criterion = HeldOutMSE(idx_train, idx_val) monitor3 = Monitor() algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) optimizer = Optimizer(n_outer=n_outer, tol=1e-16) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor3) np.testing.assert_allclose(np.array(monitor1.alphas), np.array(monitor3.alphas)) np.testing.assert_allclose(np.array(monitor1.grads), np.array(monitor3.grads), rtol=1e-5) np.testing.assert_allclose(np.array(monitor1.objs), np.array(monitor3.objs)) assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
def test_grad_search(model, crit): """check that the paths are the same in the line search""" n_outer = 2 criterion = HeldOutLogistic(X_val, y_val, model) monitor1 = Monitor() algo = Forward() grad_search(algo, criterion, log_alpha, monitor1, n_outer=n_outer, tol=tol) criterion = HeldOutLogistic(X_val, y_val, model) monitor2 = Monitor() algo = Implicit() grad_search(algo, criterion, log_alpha, monitor2, n_outer=n_outer, tol=tol) criterion = HeldOutLogistic(X_val, y_val, model) monitor3 = Monitor() algo = ImplicitForward(tol_jac=tol, n_iter_jac=5000) grad_search(algo, criterion, log_alpha, monitor3, n_outer=n_outer, tol=tol) assert np.allclose( np.array(monitor1.log_alphas), np.array(monitor3.log_alphas)) assert np.allclose( np.array(monitor1.grads), np.array(monitor3.grads), atol=1e-4) assert np.allclose( np.array(monitor1.objs), np.array(monitor3.objs)) assert not np.allclose( np.array(monitor1.times), np.array(monitor3.times))
def test_grad_search(model): # criterion = SURE( # X_train, y_train, model, sigma=sigma_star, X_test=X_test, # y_test=y_test) n_outer = 3 criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=None, y_test=None) monitor1 = Monitor() algo = Forward() grad_search(algo, criterion, np.log(1e-3), monitor1, n_outer=n_outer, tol=1e-13) # criterion = SURE( # X_train, y_train, model, sigma=sigma_star, X_test=X_test, # y_test=y_test) criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=None, y_test=None) monitor2 = Monitor() algo = Implicit() grad_search(algo, criterion, np.log(1e-3), monitor2, n_outer=n_outer, tol=1e-13) # criterion = SURE( # X_train, y_train, model, sigma=sigma_star, X_test=X_test, # y_test=y_test) criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=None, y_test=None) monitor3 = Monitor() algo = ImplicitForward(tol_jac=1e-6, n_iter_jac=100) grad_search(algo, criterion, np.log(1e-3), monitor3, n_outer=n_outer, tol=1e-13) assert np.allclose(np.array(monitor1.log_alphas), np.array(monitor3.log_alphas)) assert np.allclose(np.array(monitor1.grads), np.array(monitor3.grads)) assert np.allclose(np.array(monitor1.objs), np.array(monitor3.objs)) # assert np.allclose( # np.array(monitor1.objs_test), np.array(monitor3.objs_test)) assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
def test_grad_search(model, crit): """check that the paths are the same in the line search""" if crit == 'cv': n_outer = 2 criterion = HeldOutMSE(idx_train, idx_val) else: n_outer = 2 criterion = SmoothedSURE(sigma_star) # TODO MM@QBE if else scheme surprising criterion = HeldOutMSE(idx_train, idx_val) monitor1 = Monitor() algo = Forward() grad_search(algo, criterion, model, X, y, log_alpha, monitor1, n_outer=n_outer, tol=1e-16) criterion = HeldOutMSE(idx_train, idx_val) monitor2 = Monitor() algo = Implicit() grad_search(algo, criterion, model, X, y, log_alpha, monitor2, n_outer=n_outer, tol=1e-16) criterion = HeldOutMSE(idx_train, idx_val) monitor3 = Monitor() algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) grad_search(algo, criterion, model, X, y, log_alpha, monitor3, n_outer=n_outer, tol=1e-16) np.testing.assert_allclose(np.array(monitor1.log_alphas), np.array(monitor3.log_alphas)) np.testing.assert_allclose(np.array(monitor1.grads), np.array(monitor3.grads), atol=1e-8) np.testing.assert_allclose(np.array(monitor1.objs), np.array(monitor3.objs)) assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
def test_grad_search(model, crit): """check that the paths are the same in the line search""" if crit == 'cv': n_outer = 2 criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) else: n_outer = 2 criterion = SURE(X_train, y_train, model, sigma=sigma_star, X_test=X_test, y_test=y_test) criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) monitor1 = Monitor() algo = Forward() grad_search(algo, criterion, log_alpha, monitor1, n_outer=n_outer, tol=1e-16) criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) monitor2 = Monitor() algo = Implicit() grad_search(algo, criterion, log_alpha, monitor2, n_outer=n_outer, tol=1e-16) criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) monitor3 = Monitor() algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) grad_search(algo, criterion, log_alpha, monitor3, n_outer=n_outer, tol=1e-16) assert np.allclose(np.array(monitor1.log_alphas), np.array(monitor3.log_alphas)) assert np.allclose(np.array(monitor1.grads), np.array(monitor3.grads)) assert np.allclose(np.array(monitor1.objs), np.array(monitor3.objs)) assert np.allclose(np.array(monitor1.objs_test), np.array(monitor3.objs_test)) assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
def test_grad_search(): n_outer = 3 criterion = HeldOutMSE(X_val, y_val, model, X_test=None, y_test=None) monitor1 = Monitor() algo = Forward() grad_search(algo, criterion, np.array([log_alpha1, log_alpha2]), monitor1, n_outer=n_outer, tol=1e-16) criterion = HeldOutMSE(X_val, y_val, model, X_test=None, y_test=None) monitor2 = Monitor() algo = Implicit() grad_search(algo, criterion, np.array([log_alpha1, log_alpha2]), monitor2, n_outer=n_outer, tol=1e-16) criterion = HeldOutMSE(X_val, y_val, model, X_test=None, y_test=None) monitor3 = Monitor() algo = ImplicitForward(tol_jac=1e-3, n_iter_jac=1000) grad_search(algo, criterion, np.array([log_alpha1, log_alpha2]), monitor3, n_outer=n_outer, tol=1e-16) [np.linalg.norm(grad) for grad in monitor1.grads] [np.exp(alpha) for alpha in monitor1.log_alphas] assert np.allclose(np.array(monitor1.log_alphas), np.array(monitor3.log_alphas)) assert np.allclose(np.array(monitor1.grads), np.array(monitor3.grads)) assert np.allclose(np.array(monitor1.objs), np.array(monitor3.objs)) assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times)) assert np.allclose(np.array(monitor1.log_alphas), np.array(monitor2.log_alphas), atol=1e-2) assert np.allclose(np.array(monitor1.grads), np.array(monitor2.grads), atol=1e-2) assert np.allclose(np.array(monitor1.objs), np.array(monitor2.objs), atol=1e-2) assert not np.allclose(np.array(monitor1.times), np.array(monitor2.times))
def test_grad_search(model): n_outer = 3 criterion = HeldOutSmoothedHinge(idx_train, idx_val) monitor1 = Monitor() algo = Forward() grad_search(algo, criterion, model, X, y, np.log(1e-3), monitor1, n_outer=n_outer, tol=1e-13) criterion = HeldOutSmoothedHinge(idx_train, idx_val) monitor2 = Monitor() algo = Implicit() grad_search(algo, criterion, model, X, y, np.log(1e-3), monitor2, n_outer=n_outer, tol=1e-13) criterion = HeldOutSmoothedHinge(idx_train, idx_val) monitor3 = Monitor() algo = ImplicitForward(tol_jac=1e-6, n_iter_jac=100) grad_search(algo, criterion, model, X, y, np.log(1e-3), monitor3, n_outer=n_outer, tol=1e-13) assert np.allclose(np.array(monitor1.log_alphas), np.array(monitor3.log_alphas)) assert np.allclose(np.array(monitor1.grads), np.array(monitor3.grads)) assert np.allclose(np.array(monitor1.objs), np.array(monitor3.objs)) # assert np.allclose( # np.array(monitor1.objs_test), np.array(monitor3.objs_test)) assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
def test_grad_search(model, crit): """check that the paths are the same in the line search""" if crit == 'MSE': n_outer = 2 criterion = HeldOutMSE(idx_train, idx_val) else: n_outer = 2 criterion = FiniteDiffMonteCarloSure(sigma_star) # TODO MM@QBE if else scheme surprising criterion = HeldOutMSE(idx_train, idx_val) monitor1 = Monitor() algo = Forward() optimizer = LineSearch(n_outer=n_outer, tol=1e-16) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor1) criterion = HeldOutMSE(idx_train, idx_val) monitor2 = Monitor() algo = Implicit() optimizer = LineSearch(n_outer=n_outer, tol=1e-16) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor2) criterion = HeldOutMSE(idx_train, idx_val) monitor3 = Monitor() algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000) optimizer = LineSearch(n_outer=n_outer, tol=1e-16) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor3) np.testing.assert_allclose(np.array(monitor1.alphas), np.array(monitor3.alphas)) np.testing.assert_allclose(np.array(monitor1.grads), np.array(monitor3.grads), rtol=1e-5) np.testing.assert_allclose(np.array(monitor1.objs), np.array(monitor3.objs)) assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
# Measure mse on test mse_cv = mean_squared_error(y_test, model_cv.predict(X_test)) print("Vanilla LassoCV: Mean-squared error on test data %f" % mse_cv) ############################################################################## ############################################################################## # Weighted Lasso with sparse-ho. # We use the vanilla lassoCV coefficients as a starting point alpha0 = np.log(model_cv.alpha_) * np.ones(X_train.shape[1]) # Weighted Lasso: Sparse-ho: 1 param per feature estimator = Lasso(fit_intercept=False, max_iter=10, warm_start=True) model = WeightedLasso(X_train, y_train, estimator=estimator) criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = ImplicitForward() monitor = Monitor() grad_search(algo, criterion, alpha0, monitor, n_outer=20, tol=1e-6) ############################################################################## ############################################################################## # MSE on validation set mse_sho_val = mean_squared_error(y_val, estimator.predict(X_val)) # MSE on test set, ie unseen data mse_sho_test = mean_squared_error(y_test, estimator.predict(X_test)) print("Sparse-ho: Mean-squared error on validation data %f" % mse_sho_val) print("Sparse-ho: Mean-squared error on test (unseen) data %f" % mse_sho_test) labels = ['WeightedLasso val', 'WeightedLasso test', 'Lasso CV'] df = pd.DataFrame(np.array([mse_sho_val, mse_sho_test, mse_cv]).reshape(
############################################################################## # Weighted Lasso with sparse-ho. # We use the vanilla lassoCV coefficients as a starting point log_alpha0 = np.log(model_cv.alpha_) * np.ones(n_features) # Weighted Lasso: Sparse-ho: 1 param per feature estimator = Lasso(fit_intercept=False, max_iter=10, warm_start=True) model = WeightedLasso(estimator=estimator) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward() monitor = Monitor() grad_search(algo, criterion, model, X, y, log_alpha0, monitor, n_outer=20, tol=1e-6) ############################################################################## ############################################################################## # MSE on validation set mse_sho_val = mean_squared_error(y[idx_val], estimator.predict(X[idx_val, :])) # MSE on test set, ie unseen data mse_sho_test = mean_squared_error(y_test, estimator.predict(X_test)) print("Sparse-ho: Mean-squared error on validation data %f" % mse_sho_val) print("Sparse-ho: Mean-squared error on test (unseen) data %f" % mse_sho_test)
log_alphas_1[-1], log_alphas_2[-1], log_alpha_max, max_iter=max_iter, tol=tol) criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = ImplicitForward(criterion, tol_jac=1e-2, n_iter_jac=1000, max_iter=max_iter) _, _, _ = grad_search(algo=algo, verbose=True, log_alpha0=np.array( [np.log(alpha_max / 10), np.log(alpha_max / 10)]), tol=tol, n_outer=n_outer, monitor=monitor, tolerance_decrease='constant') alphas_grad = np.exp(np.array(monitor.log_alphas)) alphas_grad /= alpha_max t_grad_search += time.time() print("Time grid-search %f" % t_grid_search) print("Minimum grid-search %.3e" % results.min()) print("Time grad-search %f" % t_grad_search) print("Minimum grad-search %.3e" % np.array(monitor.objs).min())
def callback(val, grad, mask, dense, alpha): # The custom quantity is added at each outer iteration: # here the prediction MSE on test data objs_test.append(mean_squared_error(X_test[:, mask] @ dense, y_test)) ############################################################################## # Grad-search with sparse-ho and callback # --------------------------------------- model = Lasso(estimator=estimator) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward() # use Monitor(callback) with your custom callback monitor = Monitor(callback=callback) optimizer = LineSearch(n_outer=30) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor) ############################################################################## # Plot results # ------------ plt.figure(figsize=(5, 3)) plt.plot(monitor.times, objs_test) plt.tick_params(width=5) plt.xlabel("Times (s)") plt.ylabel(r"$\|y^{\rm{test}} - X^{\rm{test}} \hat \beta^{(\lambda)} \|^2$") plt.tight_layout() plt.show(block=False)
estimator = linear_model.ElasticNet(fit_intercept=False, max_iter=max_iter, warm_start=True) print("Started grad-search") t_grad_search = -time.time() monitor = Monitor() n_outer = 10 model = ElasticNet(max_iter=max_iter, estimator=estimator) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward(tol_jac=1e-7, n_iter_jac=1000, max_iter=max_iter) grad_search(algo, criterion, model, X, y, verbose=True, log_alpha0=np.array( [np.log(alpha_max * 0.3), np.log(alpha_max / 10)]), tol=tol, n_outer=n_outer, monitor=monitor) t_grad_search += time.time() alphas_grad = np.exp(np.array(monitor.log_alphas)) alphas_grad /= alpha_max print("Time grid-search %f" % t_grid_search) print("Time grad-search %f" % t_grad_search) print("Minimum grid search %0.3e" % results.min()) print("Minimum grad search %0.3e" % np.array(monitor.objs).min()) ##############################################################################
log_alphas = np.log(alphas) tol = 1e-7 # grid search # model = Lasso(X_train, y_train, np.log(alpha_max/10)) # criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) # algo = Forward(criterion) # monitor_grid_sk = Monitor() # grid_search( # algo, None, None, monitor_grid_sk, log_alphas=log_alphas, # tol=tol) # np.save("p_alphas.npy", p_alphas) # objs = np.array(monitor_grid_sk.objs) # np.save("objs.npy", objs) # grad_search estimator = linear_model.Lasso(fit_intercept=False, warm_start=True) model = Lasso(X_train, y_train, np.log(alpha_max / 10), estimator=estimator) criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = ImplicitForward(criterion) monitor_grad = Monitor() grad_search(algo, np.log(alpha_max / 10), monitor_grad, n_outer=10, tol=tol) p_alphas_grad = np.exp(np.array(monitor_grad.log_alphas)) / alpha_max np.save("p_alphas_grad.npy", p_alphas_grad) objs_grad = np.array(monitor_grad.objs) np.save("objs_grad.npy", objs_grad)
def parallel_function( dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='exponential'): # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name, csr=True) n_samples, n_features = X_train.shape print('n_samples', n_samples) print('n_features', n_features) y_train[y_train == 0.0] = -1.0 y_val[y_val == 0.0] = -1.0 y_test[y_test == 0.0] = -1.0 C_max = 100 logC = np.log(1e-2) n_outer = 5 if dataset_name == "rcv1": size_loop = 1 else: size_loop = 1 model = SVM( X_train, y_train, logC, max_iter=10000, tol=tol) for i in range(size_loop): monitor = Monitor() if method == "implicit_forward": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = ImplicitForward(criterion, tol_jac=1e-3, n_iter_jac=100) _, _, _ = grad_search( algo=algo, verbose=False, log_alpha0=logC, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "forward": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) _, _, _ = grad_search( algo=algo, log_alpha0=logC, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "implicit": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Implicit(criterion) _, _, _ = grad_search( algo=algo, log_alpha0=logC, tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "grid_search": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(1e-2) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(C_max), monitor, max_evals=25, tol=tol, samp="grid") print(log_alpha_opt) elif method == "random": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(1e-2) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(C_max), monitor, max_evals=25, tol=tol, samp="random") print(log_alpha_opt) elif method == "lhs": criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(1e-2) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(C_max), monitor, max_evals=25, tol=tol, samp="lhs") print(log_alpha_opt) monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = np.array(monitor.objs_test) monitor.log_alphas = np.array(monitor.log_alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test))
def test_grad_search(): for model in models: ######################################################################### # check that the paths are the same in the line search n_outer = 5 monitor1 = Monitor() warm_start = WarmStart() grad_search(X_train, y_train, dict_log_alpha[model], X_val, y_val, X_test, y_test, tol, monitor1, method="forward", maxit=10000, n_outer=n_outer, warm_start=warm_start, niter_jac=10000, tol_jac=tol, model=model) monitor2 = Monitor() warm_start = WarmStart() grad_search(X_train, y_train, dict_log_alpha[model], X_val, y_val, X_test, y_test, tol, monitor2, method="implicit", maxit=10000, n_outer=n_outer, warm_start=warm_start, niter_jac=10000, tol_jac=tol, model=model) monitor3 = Monitor() warm_start = WarmStart() grad_search(X_train, y_train, dict_log_alpha[model], X_val, y_val, X_test, y_test, tol, monitor3, method="implicit_forward", maxit=1000, n_outer=n_outer, warm_start=warm_start, niter_jac=10000, tol_jac=tol, model=model) monitor4 = Monitor() warm_start = WarmStart() grad_search(X_train_s, y_train, dict_log_alpha[model], X_val, y_val, X_test, y_test, tol, monitor4, method="forward", maxit=10000, n_outer=n_outer, warm_start=warm_start, niter_jac=10000, tol_jac=tol, model=model) # need to regularize the solution in this case: # this means tha the solutions returned are different # monitor5 = Monitor() # warm_start = WarmStart() # grad_search( # X_train_s, y_train, dict_log_alpha[model], X_val, y_val, # X_test, y_test, # tol, monitor5, method="implicit", maxit=10000, # n_outer=n_outer, # warm_start=warm_start, niter_jac=10000, tol_jac=tol, model=model) monitor5 = Monitor() warm_start = WarmStart() grad_search(X_train_s, y_train, dict_log_alpha[model], X_val, y_val, X_test, y_test, tol, monitor5, method="implicit_forward", maxit=1000, n_outer=n_outer, warm_start=warm_start, niter_jac=10000, tol_jac=tol, model=model) assert np.allclose(np.array(monitor1.log_alphas), np.array(monitor2.log_alphas)) assert np.allclose(np.array(monitor1.grads), np.array(monitor2.grads)) assert np.allclose(np.array(monitor1.objs), np.array(monitor2.objs)) assert np.allclose(np.array(monitor1.objs_test), np.array(monitor2.objs_test)) assert not np.allclose(np.array(monitor1.times), np.array(monitor2.times)) assert np.allclose(np.array(monitor2.log_alphas), np.array(monitor3.log_alphas)) assert np.allclose(np.array(monitor2.grads), np.array(monitor3.grads)) assert np.allclose(np.array(monitor2.objs), np.array(monitor3.objs)) assert np.allclose(np.array(monitor2.objs_test), np.array(monitor3.objs_test)) assert not np.allclose(np.array(monitor2.times), np.array(monitor3.times)) assert np.allclose(np.array(monitor3.log_alphas), np.array(monitor4.log_alphas)) assert np.allclose(np.array(monitor3.grads), np.array(monitor4.grads)) assert np.allclose(np.array(monitor3.objs), np.array(monitor4.objs)) assert np.allclose(np.array(monitor3.objs_test), np.array(monitor4.objs_test)) assert not np.allclose(np.array(monitor3.times), np.array(monitor4.times)) assert np.allclose(np.array(monitor4.log_alphas), np.array(monitor5.log_alphas)) assert np.allclose(np.array(monitor4.grads), np.array(monitor5.grads)) assert np.allclose(np.array(monitor4.objs), np.array(monitor5.objs)) assert np.allclose(np.array(monitor4.objs_test), np.array(monitor5.objs_test)) assert not np.allclose(np.array(monitor4.times), np.array(monitor5.times))
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='exponential'): # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name) n_samples, n_features = X_train.shape print('n_samples', n_samples) print('n_features', n_features) y_train[y_train == 0.0] = -1.0 y_val[y_val == 0.0] = -1.0 y_test[y_test == 0.0] = -1.0 alpha_max = np.max(np.abs(X_train.T @ y_train)) alpha_max /= X_train.shape[0] log_alpha_max = np.log(alpha_max) alpha_min = alpha_max * 1e-2 # alphas = np.geomspace(alpha_max, alpha_min, 10) # log_alphas = np.log(alphas) log_alpha1_0 = np.log(0.1 * alpha_max) log_alpha2_0 = np.log(0.1 * alpha_max) log_alpha_max = np.log(alpha_max) n_outer = 25 if dataset_name == "rcv1": size_loop = 2 else: size_loop = 2 model = ElasticNet(X_train, y_train, log_alpha1_0, log_alpha2_0, log_alpha_max, max_iter=1000, tol=tol) for i in range(size_loop): monitor = Monitor() if method == "implicit_forward": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = ImplicitForward(criterion, tol_jac=1e-3, n_iter_jac=100) _, _, _ = grad_search(algo=algo, verbose=False, log_alpha0=np.array( [log_alpha1_0, log_alpha2_0]), tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "forward": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) _, _, _ = grad_search(algo=algo, log_alpha0=np.array( [log_alpha1_0, log_alpha2_0]), tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "implicit": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Implicit(criterion) _, _, _ = grad_search(algo=algo, log_alpha0=np.array( [log_alpha1_0, log_alpha2_0]), tol=tol, n_outer=n_outer, monitor=monitor, t_max=dict_t_max[dataset_name], tolerance_decrease=tolerance_decrease) elif method == "grid_search": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(alpha_min) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, log_alpha_max, monitor, max_evals=10, tol=tol, samp="grid", t_max=dict_t_max[dataset_name], log_alphas=None, nb_hyperparam=2) print(log_alpha_opt) elif method == "random": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(alpha_min) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(alpha_max), monitor, max_evals=10, tol=tol, samp="random", t_max=dict_t_max[dataset_name], nb_hyperparam=2) print(log_alpha_opt) elif method == "lhs": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = Forward(criterion) log_alpha_min = np.log(alpha_min) log_alpha_opt, min_g_func = grid_search( algo, log_alpha_min, np.log(alpha_max), monitor, max_evals=10, tol=tol, samp="lhs", t_max=dict_t_max[dataset_name]) print(log_alpha_opt) monitor.times = np.array(monitor.times).copy() monitor.objs = np.array(monitor.objs).copy() monitor.objs_test = np.array(monitor.objs_test).copy() monitor.log_alphas = np.array(monitor.log_alphas).copy() return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test), log_alpha_max)
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): t_max = dict_tmax[dataset_name] # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name) n_samples, n_features = X_train.shape # compute alpha_max alpha_max = np.abs(X_train.T @ y_train).max() / n_samples log_alpha0 = np.log(0.1 * alpha_max) idx_nz = scipy.sparse.linalg.norm(X_train, axis=0) != 0 L_min = scipy.sparse.linalg.norm(X_train[:, idx_nz], axis=0).min()**2 / n_samples log_alpha0_mcp = np.array([log_alpha0, np.log(2 / L_min)]) list_log_alphas = np.log(alpha_max * np.geomspace(1, 0.0001, 100)) list_log_gammas = np.log(np.geomspace(1.1 / L_min, 1000 / L_min, 5)) try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 50 if dataset_name == "rcv1": size_loop = 2 else: size_loop = 1 for i in range(size_loop): monitor = Monitor() warm_start = WarmStart() if method == 'grid_search': # n_alpha = 100 # p_alphas = np.geomspace(1, 0.0001, n_alpha) grid_searchMCP(X_train, y_train, list_log_alphas, list_log_gammas, X_val, y_val, X_test, y_test, tol, monitor=monitor) elif method in ("bayesian", "random"): monitor = hyperopt_lasso(X_train, y_train, log_alpha0, X_val, y_val, X_test, y_test, tol, max_evals=n_outer, method=method) else: # do line search to find the optimal lambda log_alpha, val, grad = grad_search(X_train, y_train, log_alpha0_mcp, X_val, y_val, X_test, y_test, tol, monitor, method=method, maxit=10000, n_outer=n_outer, warm_start=warm_start, niter_jac=100, model="mcp", t_max=t_max) del log_alpha, val, grad # as not used monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = np.array(monitor.objs_test) monitor.log_alphas = np.array(monitor.log_alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test))
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): # load data X, y = fetch_libsvm(dataset_name) y -= np.mean(y) # compute alpha_max alpha_max = np.abs(X.T @ y).max() / len(y) if model_name == "logreg": alpha_max /= 2 alpha_min = alpha_max * dict_palphamin[dataset_name] if model_name == "enet": estimator = linear_model.ElasticNet(fit_intercept=False, max_iter=10_000, warm_start=True, tol=tol) model = ElasticNet(estimator=estimator) elif model_name == "logreg": model = SparseLogreg(estimator=estimator) # TODO improve this try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 20 size_loop = 2 for _ in range(size_loop): if model_name == "lasso" or model_name == "enet": sub_criterion = HeldOutMSE(None, None) elif model_name == "logreg": criterion = HeldOutLogistic(None, None) kf = KFold(n_splits=5, shuffle=True, random_state=42) criterion = CrossVal(sub_criterion, cv=kf) algo = ImplicitForward(tol_jac=1e-3) monitor = Monitor() t_max = dict_t_max[dataset_name] if method == 'grid_search': num1D = dict_point_grid_search[dataset_name] alpha1D = np.geomspace(alpha_max, alpha_min, num=num1D) alphas = [np.array(i) for i in product(alpha1D, alpha1D)] grid_search(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=100, tol=tol, alphas=alphas) elif method == 'random' or method == 'bayesian': hyperopt_wrapper(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=30, tol=tol, method=method, size_space=2, t_max=t_max) elif method.startswith("implicit_forward"): # do gradient descent to find the optimal lambda alpha0 = np.array([alpha_max / 100, alpha_max / 100]) n_outer = 30 if method == 'implicit_forward': optimizer = GradientDescent(n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max) else: optimizer = GradientDescent(n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max, tol_decrease="geom") grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor) else: raise NotImplementedError monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = 0 # TODO monitor.alphas = np.array(monitor.alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.alphas, alpha_max, model_name)
dict_n_outers["20newsgroups", "bayesian"] = 75 dict_n_outers["20newsgroups", "random"] = 35 dataset_name = "20newsgroups" for method in methods: monitor = Monitor() warm_start = WarmStart() if method == "grid_search": grid_searchCV( X_train, y_train, log_alphas, X_val, y_val, X_test, y_test, tol, monitor) else: n_outer = dict_n_outers[dataset_name, method] grad_search( X_train, y_train, log_alpha0, X_val, y_val, X_test, y_test, tol, monitor, method=method, maxit=1000, n_outer=n_outer, warm_start=warm_start, model="lasso", t_max=20) pobj = np.array([np.min(monitor.objs[:k]) for k in np.arange( len(monitor.objs)) + 1]) dict_pobj[method] = pobj dict_times[method] = np.array(monitor.times) pobj_star = np.infty for method in methods: pobj_star = np.minimum(pobj_star, np.min(dict_pobj[method])) dict_legend = {} dict_legend["implicit_forward"] = "implicit forward" dict_legend["forward"] = "forward" dict_legend["implicit"] = "implicit"
print('sparse-ho started') t0 = time.time() estimator = LogisticRegression(penalty='l1', fit_intercept=False, solver='saga', tol=tol) model = SparseLogreg(max_iter=max_iter, estimator=estimator) criterion = HeldOutLogistic(idx_train, idx_val) monitor_grad = Monitor() algo = ImplicitForward(tol_jac=tol, n_iter_jac=1000) grad_search(algo, criterion, model, X, y, np.log(0.1 * alpha_max), monitor_grad, n_outer=10, tol=tol) objs_grad = np.array(monitor_grad.objs) t_grad_search = time.time() - t0 print('sparse-ho finished') print("Time to compute CV for sparse-ho: %.2f" % t_grad_search) p_alphas_grad = np.exp(np.array(monitor_grad.log_alphas)) / alpha_max objs_grad = np.array(monitor_grad.objs)
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): # load data X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name) n_samples, _ = X_train.shape # compute alpha_max alpha_max = np.abs(X_train.T @ y_train).max() / n_samples if model_name == "logreg": alpha_max /= 2 alpha_min = alpha_max / 10_000 log_alpha_max = np.log(alpha_max) log_alpha_min = np.log(alpha_min) log_alpha0 = np.log(0.1 * alpha_max) if model_name == "lasso": model = Lasso(X_train, y_train) elif model_name == "logreg": model = SparseLogreg(X_train, y_train) try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 20 size_loop = 2 for _ in range(size_loop): if model_name == "lasso": criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test) elif model_name == "logreg": criterion = HeldOutLogistic(X_val, y_val, model, X_test=X_test, y_test=y_test) algo = dict_algo[method](criterion) monitor = Monitor() if method == 'grid_search': log_alphas = np.log(np.geomspace(alpha_max, alpha_min, num=100)) grid_search(algo, None, None, monitor, log_alphas=log_alphas, tol=tol) elif method == 'random': grid_search(algo, log_alpha_max, log_alpha_min, monitor, tol=tol, max_evals=n_alphas, t_max=dict_t_max[dataset_name]) elif method in ("bayesian"): hyperopt_wrapper(algo, log_alpha_min, log_alpha_max, monitor, max_evals=n_alphas, tol=tol, method='bayesian', t_max=dict_t_max[dataset_name]) else: # do line search to find the optimal lambda grad_search(algo, log_alpha0, monitor, n_outer=n_outer, tol=tol, tolerance_decrease=tolerance_decrease, t_max=dict_t_max[dataset_name]) monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = np.array(monitor.objs_test) monitor.log_alphas = np.array(monitor.log_alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas, norm(y_val), norm(y_test), log_alpha_max, model_name)
monitor_grid = Monitor() for i in range(n_alphas): log_alpha_i = np.log(alpha_max * p_alphas[:, i]) logit_multiclass.get_val( model, X, y, log_alpha_i, None, monitor_grid, tol) 1/0 print("###################### GRAD SEARCH LS ###################") n_outer = 100 model = SparseLogreg(estimator=estimator) logit_multiclass = LogisticMulticlass(idx_train, idx_val, idx_test, algo) monitor = Monitor() log_alpha0 = np.ones(n_classes) * np.log(0.1 * alpha_max) idx_min = np.argmin(np.array(monitor_grid.objs)) log_alpha0 = monitor_grid.log_alphas[idx_min] optimizer = GradientDescent( n_outer=n_outer, step_size=None, p_grad_norm=0.1, tol=tol) grad_search( algo, logit_multiclass, model, optimizer, X, y, log_alpha0, monitor) print("###################### USE HYPEROPT ###################") log_alpha_max = np.log(alpha_max) log_alpha_min = np.log(alpha_max / 10_000) monitor_hyp = Monitor() hyperopt_wrapper( algo, logit_multiclass, model, X, y, log_alpha_min, log_alpha_max, monitor_hyp, tol=tol, size_space=n_classes, max_evals=10)
# Grad-search with sparse-ho and callback # --------------------------------------- print('sparse-ho started') model = Lasso(estimator=estimator) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward(criterion) # use Monitor(callback) with your custom callback monitor = Monitor(callback=callback) grad_search(algo, criterion, model, X, y, np.log(alpha_max / 10), monitor, n_outer=30, tol=tol) print('sparse-ho finished') ############################################################################## # Plot results # ------------ current_palette = sns.color_palette("colorblind") fig = plt.figure(figsize=(5, 3)) plt.plot(monitor.times, objs_test)