monitor_grid = Monitor() for i in range(n_alphas): log_alpha_i = np.log(alpha_max * p_alphas[:, i]) logit_multiclass.get_val( model, X, y, log_alpha_i, None, monitor_grid, tol) 1/0 print("###################### GRAD SEARCH LS ###################") n_outer = 100 model = SparseLogreg(estimator=estimator) logit_multiclass = LogisticMulticlass(idx_train, idx_val, idx_test, algo) monitor = Monitor() log_alpha0 = np.ones(n_classes) * np.log(0.1 * alpha_max) idx_min = np.argmin(np.array(monitor_grid.objs)) log_alpha0 = monitor_grid.log_alphas[idx_min] optimizer = GradientDescent( n_outer=n_outer, step_size=None, p_grad_norm=0.1, tol=tol) grad_search( algo, logit_multiclass, model, optimizer, X, y, log_alpha0, monitor) print("###################### USE HYPEROPT ###################") log_alpha_max = np.log(alpha_max) log_alpha_min = np.log(alpha_max / 10_000) monitor_hyp = Monitor() hyperopt_wrapper( algo, logit_multiclass, model, X, y, log_alpha_min, log_alpha_max, monitor_hyp, tol=tol, size_space=n_classes, max_evals=10)
# all_algo_name = ['random_search'] for algo_name in all_algo_name: model = ElasticNet(estimator=estimator) sub_criterion = HeldOutMSE(None, None) alpha0 = np.array([alpha_max / 10, alpha_max / 10]) monitor = Monitor() kf = KFold(n_splits=5, shuffle=True, random_state=42) criterion = CrossVal(sub_criterion, cv=kf) algo = ImplicitForward(tol_jac=1e-3) # optimizer = LineSearch(n_outer=10, tol=tol) if algo_name.startswith('implicit_forward'): if algo_name == "implicit_forward_approx": optimizer = GradientDescent(n_outer=30, p_grad_norm=1., verbose=True, tol=tol, tol_decrease="geom") else: optimizer = GradientDescent(n_outer=30, p_grad_norm=1., verbose=True, tol=tol) grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor) elif algo_name == 'grid_search': grid_search(algo, criterion, model, X, y, None,
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): # load data X, y = fetch_libsvm(dataset_name) y -= np.mean(y) # compute alpha_max alpha_max = np.abs(X.T @ y).max() / len(y) if model_name == "logreg": alpha_max /= 2 alpha_min = alpha_max * dict_palphamin[dataset_name] if model_name == "enet": estimator = linear_model.ElasticNet(fit_intercept=False, max_iter=10_000, warm_start=True, tol=tol) model = ElasticNet(estimator=estimator) elif model_name == "logreg": model = SparseLogreg(estimator=estimator) # TODO improve this try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 20 size_loop = 2 for _ in range(size_loop): if model_name == "lasso" or model_name == "enet": sub_criterion = HeldOutMSE(None, None) elif model_name == "logreg": criterion = HeldOutLogistic(None, None) kf = KFold(n_splits=5, shuffle=True, random_state=42) criterion = CrossVal(sub_criterion, cv=kf) algo = ImplicitForward(tol_jac=1e-3) monitor = Monitor() t_max = dict_t_max[dataset_name] if method == 'grid_search': num1D = dict_point_grid_search[dataset_name] alpha1D = np.geomspace(alpha_max, alpha_min, num=num1D) alphas = [np.array(i) for i in product(alpha1D, alpha1D)] grid_search(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=100, tol=tol, alphas=alphas) elif method == 'random' or method == 'bayesian': hyperopt_wrapper(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=30, tol=tol, method=method, size_space=2, t_max=t_max) elif method.startswith("implicit_forward"): # do gradient descent to find the optimal lambda alpha0 = np.array([alpha_max / 100, alpha_max / 100]) n_outer = 30 if method == 'implicit_forward': optimizer = GradientDescent(n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max) else: optimizer = GradientDescent(n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max, tol_decrease="geom") grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor) else: raise NotImplementedError monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = 0 # TODO monitor.alphas = np.array(monitor.alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.alphas, alpha_max, model_name)
print("Vanilla LassoCV: Mean-squared error on test data %f" % mse_cv) ############################################################################## ############################################################################## # Weighted Lasso with sparse-ho. # We use the vanilla lassoCV coefficients as a starting point alpha0 = model_cv.alpha_ * np.ones(n_features) # Weighted Lasso: Sparse-ho: 1 param per feature estimator = Lasso(fit_intercept=False, max_iter=100, warm_start=True) model = WeightedLasso(estimator=estimator) sub_criterion = HeldOutMSE(idx_train, idx_val) criterion = CrossVal(sub_criterion, cv=cv) algo = ImplicitForward() monitor = Monitor() optimizer = GradientDescent( n_outer=100, tol=1e-7, verbose=True, p_grad_norm=1.9) results = grad_search( algo, criterion, model, optimizer, X, y, alpha0, monitor) ############################################################################## estimator.weights = monitor.alphas[-1] estimator.fit(X, y) ############################################################################## # MSE on validation set mse_sho_val = mean_squared_error(y, estimator.predict(X)) # MSE on test set, ie unseen data mse_sho_test = mean_squared_error(y_test, estimator.predict(X_test)) # Oracle MSE mse_oracle = mean_squared_error(y_test, X_test @ w_true)
X, y, alpha_min, alpha_max, monitor_grid, alphas=alphas, tol=tol) objs = np.array(monitor_grid.objs) ############################################################################## # Grad-search # ----------- optimizer_names = ['line-search', 'gradient-descent', 'adam'] optimizers = { 'line-search': LineSearch(n_outer=10, tol=tol), 'gradient-descent': GradientDescent(n_outer=10, step_size=100), 'adam': Adam(n_outer=10, lr=0.11) } monitors = {} alpha0 = alpha_max / 10 # starting point for optimizer_name in optimizer_names: estimator = LogisticRegression(penalty='l1', fit_intercept=False, solver='saga', tol=tol) model = SparseLogreg(estimator=estimator) criterion = HeldOutLogistic(idx_train, idx_val) monitor_grad = Monitor()
############################################################################## # Grad-search with sparse-ho # -------------------------- estimator = linear_model.ElasticNet(fit_intercept=False, max_iter=max_iter, warm_start=True) print("Started grad-search") t_grad_search = -time.time() monitor = Monitor() n_outer = 10 alpha0 = np.array([alpha_max * 0.9, alpha_max * 0.9]) model = ElasticNet(estimator=estimator) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward(tol_jac=1e-3, n_iter_jac=100, max_iter=max_iter) optimizer = GradientDescent(n_outer=n_outer, tol=tol, p_grad_norm=1.5, verbose=True) grad_search(algo, criterion, model, optimizer, X, y, alpha0=alpha0, monitor=monitor) t_grad_search += time.time() monitor.alphas = np.array(monitor.alphas) print("Time grid search %f" % t_grid_search) print("Time grad-search %f" % t_grad_search) print("Minimum grid search %0.3e" % results.min())
# Grad-search # ----------- print('sparse-ho started') t0 = time.time() estimator = LogisticRegression( penalty='l1', fit_intercept=False, tol=tol) model = SparseLogreg(estimator=estimator) criterion = HeldOutLogistic(idx_train, idx_val) monitor_grad = Monitor() algo = ImplicitForward(tol_jac=tol, n_iter_jac=1000) optimizer = GradientDescent(n_outer=10, tol=tol) grad_search( algo, criterion, model, optimizer, X, y, alpha0, monitor_grad) objs_grad = np.array(monitor_grad.objs) t_grad_search = time.time() - t0 print('sparse-ho finished') print(f"Time to compute grad search: {t_grad_search:.2f} s") p_alphas_grad = np.array(monitor_grad.alphas) / alpha_max objs_grad = np.array(monitor_grad.objs)
print("Starting path computation...") for algorithm in algorithms: estimator = celer.Lasso(fit_intercept=False, max_iter=1000, warm_start=True, tol=tol) print('%s started' % algorithm) model = Lasso(estimator=estimator) criterion = HeldOutMSE(None, None) log_alpha0 = np.log(alpha_max / 10) monitor = Monitor() cross_val_criterion = CrossVal(criterion, cv=kf) algo = ImplicitForward() optimizer = GradientDescent(n_outer=10, tol=tol, verbose=True, p_grad0=1) # optimizer = LineSearch(n_outer=10, tol=tol, verbose=True) if algorithm == 'grad_search': grad_search(algo, cross_val_criterion, model, optimizer, X, y, log_alpha0, monitor) objs = np.array(monitor.objs) log_alphas = np.array(monitor.log_alphas) elif algorithm.startswith('grid_search'): if algorithm == 'grid_search10': n_alphas = 10 else: n_alphas = 100 p_alphas = np.geomspace(1, p_alpha_min, n_alphas) alphas = alpha_max * p_alphas reg = LassoCV(cv=kf,
tol=tol) dict_monitor = {} all_algo_name = ['implicit_forward', 'grid_search'] for algo_name in all_algo_name: model = Lasso(estimator=estimator) sub_criterion = HeldOutMSE(None, None) alpha0 = alpha_max / 10 monitor = Monitor() kf = KFold(n_splits=5, shuffle=True, random_state=42) criterion = CrossVal(sub_criterion, cv=kf) algo = ImplicitForward(tol_jac=1e-3) optimizer = GradientDescent(n_outer=30, p_grad_norm=1., verbose=True, tol=tol) if algo_name == 'implicit_forward': grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor) elif algo_name == 'grid_search': grid_search(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=20, tol=tol) elif algo_name == 'random_search':
for algorithm in algorithms: estimator = sklearn.linear_model.ElasticNet( fit_intercept=False, max_iter=3_000, warm_start=True, tol=tol) print('%s started' % algorithm) model = ElasticNet(estimator=estimator) criterion = HeldOutMSE(None, None) log_alpha0 = np.array([np.log(alpha_max / 10), np.log(alpha_max / 10)]) monitor = Monitor() cross_val_criterion = CrossVal(criterion, cv=kf) algo = ImplicitForward() # optimizer = LineSearch(n_outer=10, tol=tol, verbose=True) if algorithm.startswith('grad_search'): if algorithm == 'grad_search': optimizer = GradientDescent( n_outer=max_evals, tol=tol, verbose=True, p_grad_norm=1.9) else: optimizer = LineSearch(n_outer=25, verbose=True, tol=tol) grad_search( algo, cross_val_criterion, model, optimizer, X, y, log_alpha0, monitor) elif algorithm.startswith('grid_search'): if algorithm == 'grid_search10': n_alphas = 5 else: n_alphas = 30 p_alphas = np.geomspace(1, p_alpha_min, n_alphas) alphas = alpha_max * p_alphas log_alphas = np.log(alphas) grid_alphas = [i for i in itertools.product(log_alphas, log_alphas)]