def fetch_libsvm(dataset, replace=False, normalize=True, min_nnz=3): """ This function is deprecated, we now rely on the libsvmdata package. Parameters ---------- dataset: string Name of the dataset. replace: bool Whether to redownload the data. normalize: bool Whether to divide the columns by their norm. min_nnz: int Columns with strictly less than `nnz` non-zero entries are discarded. """ warnings.simplefilter("always", FutureWarning) warnings.warn("celer.datasets.fetch_libsvm is deprecated and will be " "removed in version 0.6. Use the lightweight " "libsvmadata package instead.", FutureWarning) return libsvmdata.fetch_libsvm(dataset, replace=replace, normalize=normalize, min_nnz=min_nnz)
def parallel_function(dataset_name, algo, div_alpha, div_rho): algo_name, use_acc, K = algo if dataset_name.startswith( ('rcv1_train', 'news20', 'kdda_train', 'finance')): X, y = fetch_libsvm(dataset_name) y /= np.linalg.norm(y) else: X, y = load_openml(dataset_name) alpha_max = np.max(np.abs(X.T @ y)) alpha = alpha_max / div_alpha tol = 1e-16 f_gap = dict_f_gap[dataset_name] max_iter = dict_maxiter[dataset_name, div_alpha] if algo_name == 'apcg': w, E, gaps = apcg_enet(X, y, alpha, alpha / div_rho, max_iter=max_iter, tol=tol, f_gap=f_gap, verbose=True) else: w, E, gaps = solver_enet(X, y, alpha, rho=alpha / div_rho, f_gap=f_gap, max_iter=max_iter, tol=tol, use_acc=use_acc, K=K, algo=algo_name, verbose=True) return (dataset_name, algo_name, use_acc, K, div_alpha, div_rho, w, E, gaps, f_gap)
def test_multiclass(): X, y = fetch_libsvm("iris") np.testing.assert_equal(X.shape[0], y.shape[0]) X, y = fetch_libsvm("iris")
def test_regression(): X, y = fetch_libsvm("bodyfat") np.testing.assert_equal(X.shape[0], y.shape[0]) X, y = fetch_libsvm("bodyfat")
def test_multilabel(): # test download X, Y = fetch_libsvm("rcv1_topics_test") np.testing.assert_equal(X.shape[0], Y.shape[0]) # test saved npz loading X, Y = fetch_libsvm("rcv1_topics_test")
def test_binary(): # download if not present: X, y = fetch_libsvm("news20.binary") np.testing.assert_equal(X.shape[0], y.shape[0]) # also checks that loading saved files works: X, y = fetch_libsvm("news20.binary")
def parallel_function(dataset_name, div_alpha, method, ind_rep, random_state=10): maxit = dict_maxits[(dataset_name, div_alpha)][ind_rep] print("Dataset %s, algo %s, maxit %i" % (dataset_name, method, maxit)) X, y = fetch_libsvm(dataset_name) n_samples = len(y) kf = KFold(n_splits=5, random_state=random_state, shuffle=True) for i in range(2): alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples log_alpha = np.log(alpha_max / div_alpha) monitor = Monitor() if method == "celer": clf = Lasso_celer( alpha=np.exp(log_alpha), fit_intercept=False, # TODO maybe change this tol tol=1e-8, max_iter=maxit) model = Lasso(estimator=clf, max_iter=maxit) criterion = HeldOutMSE(None, None) cross_val = CrossVal(cv=kf, criterion=criterion) algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=maxit, use_stop_crit=False) algo.max_iter = maxit val, grad = cross_val.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol, monitor=monitor, max_iter=maxit) elif method == "ground_truth": for file in os.listdir("results/"): if file.startswith("hypergradient_%s_%i_%s" % (dataset_name, div_alpha, method)): return else: clf = Lasso_celer(alpha=np.exp(log_alpha), fit_intercept=False, warm_start=True, tol=1e-13, max_iter=10000) criterion = HeldOutMSE(None, None) cross_val = CrossVal(cv=kf, criterion=criterion) algo = Implicit(criterion) model = Lasso(estimator=clf, max_iter=10000) val, grad = cross_val.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=1e-13, monitor=monitor) else: model = Lasso(max_iter=maxit) criterion = HeldOutMSE(None, None) cross_val = CrossVal(cv=kf, criterion=criterion) if method == "forward": algo = Forward(use_stop_crit=False) elif method == "implicit_forward": algo = ImplicitForward(use_stop_crit=False, tol_jac=1e-8, n_iter_jac=maxit, max_iter=1000) elif method == "implicit": algo = Implicit(use_stop_crit=False, max_iter=1000) elif method == "backward": algo = Backward() else: 1 / 0 algo.max_iter = maxit algo.use_stop_crit = False val, grad = cross_val.get_val_grad(model, X, y, log_alpha, algo.get_beta_jac_v, tol=tol, monitor=monitor, max_iter=maxit) results = (dataset_name, div_alpha, method, maxit, val, grad, monitor.times[0]) df = pandas.DataFrame(results).transpose() df.columns = [ 'dataset', 'div_alpha', 'method', 'maxit', 'val', 'grad', 'time' ] str_results = "results/hypergradient_%s_%i_%s_%i.pkl" % ( dataset_name, div_alpha, method, maxit) df.to_pickle(str_results)
""" import time import warnings import numpy as np from numpy.linalg import norm import matplotlib.pyplot as plt from sklearn import linear_model from libsvmdata import fetch_libsvm from celer import LogisticRegression warnings.filterwarnings("ignore", message="Objective did not converge") warnings.filterwarnings("ignore", message="Liblinear failed to converge") X, y = fetch_libsvm("news20.binary") C_min = 2 / norm(X.T @ y, ord=np.inf) C = 20 * C_min def pobj_logreg(w): return np.sum(np.log(1 + np.exp(-y * (X @ w)))) + 1. / C * norm(w, ord=1) pobj_celer = [] t_celer = [] for n_iter in range(10): t0 = time.time() clf = LogisticRegression(C=C, solver="celer-pn", max_iter=n_iter,
implementation as it makes the example too long to run. """ import time import numpy as np import pandas as pd import matplotlib.pyplot as plt from libsvmdata import fetch_libsvm from celer import celer_path print(__doc__) print("*** Warning: this example may take more than 5 minutes to run ***") X, y = fetch_libsvm('finance') y -= np.mean(y) n_samples, n_features = X.shape alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples print("Dataset size: %d samples, %d features" % X.shape) # construct grid of regularization parameters alpha n_alphas = 11 alphas = alpha_max * np.geomspace(1, 0.1, n_alphas) ############################################################################### # Run Celer on a grid of regularization parameters, for various tolerances: tols = [1e-2, 1e-4, 1e-6] results = np.zeros([1, len(tols)]) gaps = np.zeros((len(tols), len(alphas)))
from sparse_ho.models import ElasticNet from sparse_ho.criterion import HeldOutMSE, CrossVal from sparse_ho.optimizers import GradientDescent from sparse_ho.utils import Monitor from sparse_ho.utils_plot import configure_plt from sparse_ho.grid_search import grid_search from sparse_ho.utils_plot import discrete_color configure_plt() # dataset = 'real-sim' dataset = 'rcv1_train' # dataset = 'simu' if dataset != 'simu': X, y = fetch_libsvm(dataset) y -= y.mean() else: X, y = make_regression(n_samples=500, n_features=1000, noise=40, random_state=42) n_samples = len(y) alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples alpha_min = alpha_max / 100_000 num1D = 5 alpha1D = np.geomspace(alpha_max, alpha_min, num=num1D) alphas = [np.array(i) for i in product(alpha1D, alpha1D)]
from sklearn.model_selection import KFold from sparse_ho import ImplicitForward, grad_search from sparse_ho.models import Lasso from sparse_ho.criterion import HeldOutMSE, CrossVal from sparse_ho.optimizers import LineSearch from sparse_ho.utils import Monitor from sparse_ho.utils_plot import discrete_cmap print(__doc__) # dataset = 'rcv1' dataset = 'simu' if dataset == 'rcv1': X, y = fetch_libsvm('rcv1_train') else: X, y = make_regression(n_samples=500, n_features=1000, noise=40, random_state=42) kf = KFold(n_splits=5, shuffle=True, random_state=42) print("Starting path computation...") n_samples = len(y) alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples n_alphas = 10 alphas = np.geomspace(alpha_max, alpha_max / 1_000, n_alphas)
def parallel_function(dataset_name, div_alpha, method): X, y = fetch_libsvm(dataset_name) n_samples = len(y) if dataset_name == "news20" and div_alpha == 100: rng = np.random.RandomState(42) y += rng.randn(n_samples) * 0.01 for maxit in dict_maxits[(dataset_name, div_alpha)]: print("Dataset %s, maxit %i" % (method, maxit)) for i in range(2): rng = np.random.RandomState(i) idx_train = rng.choice(n_samples, n_samples // 2, replace=False) idx = np.arange(0, n_samples) idx_val = idx[np.logical_not(np.isin(idx, idx_train))] alpha_max = np.max(np.abs(X[idx_train, :].T.dot(y[idx_train]))) alpha_max /= len(idx_train) log_alpha = np.log(alpha_max / div_alpha) monitor = Monitor() if method == "celer": clf = Lasso_celer(alpha=np.exp(log_alpha), fit_intercept=False, tol=1e-12, max_iter=maxit) model = Lasso(estimator=clf, max_iter=maxit) criterion = HeldOutMSE(idx_train, idx_val) algo = ImplicitForward(tol_jac=1e-32, n_iter_jac=maxit, use_stop_crit=False) algo.max_iter = maxit val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.compute_beta_grad, tol=1e-12, monitor=monitor, max_iter=maxit) elif method == "ground_truth": for file in os.listdir("results/"): if file.startswith("hypergradient_%s_%i_%s" % (dataset_name, div_alpha, method)): return clf = Lasso_celer(alpha=np.exp(log_alpha), fit_intercept=False, warm_start=True, tol=1e-14, max_iter=10000) criterion = HeldOutMSE(idx_train, idx_val) if dataset_name == "news20": algo = ImplicitForward(tol_jac=1e-11, n_iter_jac=100000) else: algo = Implicit(criterion) model = Lasso(estimator=clf, max_iter=10000) val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.compute_beta_grad, tol=1e-14, monitor=monitor) else: model = Lasso(max_iter=maxit) criterion = HeldOutMSE(idx_train, idx_val) if method == "forward": algo = Forward(use_stop_crit=False) elif method == "implicit_forward": algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=maxit, use_stop_crit=False) elif method == "implicit": algo = Implicit(max_iter=1000) elif method == "backward": algo = Backward() else: raise NotImplementedError algo.max_iter = maxit algo.use_stop_crit = False val, grad = criterion.get_val_grad(model, X, y, log_alpha, algo.compute_beta_grad, tol=tol, monitor=monitor, max_iter=maxit) results = (dataset_name, div_alpha, method, maxit, val, grad, monitor.times[0]) df = pandas.DataFrame(results).transpose() df.columns = [ 'dataset', 'div_alpha', 'method', 'maxit', 'val', 'grad', 'time' ] str_results = "results/hypergradient_%s_%i_%s_%i.pkl" % ( dataset_name, div_alpha, method, maxit) df.to_pickle(str_results)
""" import numpy as np import seaborn as sns import matplotlib.pyplot as plt from libsvmdata import fetch_libsvm from andersoncd.plot_utils import configure_plt, _plot_legend_apart from andersoncd.logreg import solver_logreg configure_plt() ############################################################################### # Load the data: # n_features = 1000 X, y = fetch_libsvm('rcv1_train', normalize=True) ############################################################################### # Run solver with various regularization strengths: # alpha = 0 div_alpha = 30 alpha_max = np.max(np.abs(X.T @ y)) / 2 alpha = alpha_max / div_alpha tol = 1e-10 f_gap = 10 max_iter = 600 reg_amount_list = [1e-3, 1e-4, 1e-5, 1e-7, None]
def get_data(self): X, y = fetch_libsvm('finance') data = dict(X=X, y=y) return X.shape[1], data
def get_data(self): X, y = fetch_libsvm("madelon") X_test, y_test = fetch_libsvm("madelon_test") data = dict(X=X, y=y, X_test=X_test, y_test=y_test) return X.shape[1], data
def parallel_function(dataset_name, method, tol=1e-5, n_outer=50, tolerance_decrease='constant'): # load data X, y = fetch_libsvm(dataset_name) y -= np.mean(y) # compute alpha_max alpha_max = np.abs(X.T @ y).max() / len(y) if model_name == "logreg": alpha_max /= 2 alpha_min = alpha_max * dict_palphamin[dataset_name] if model_name == "enet": estimator = linear_model.ElasticNet(fit_intercept=False, max_iter=10_000, warm_start=True, tol=tol) model = ElasticNet(estimator=estimator) elif model_name == "logreg": model = SparseLogreg(estimator=estimator) # TODO improve this try: n_outer = dict_n_outers[dataset_name, method] except Exception: n_outer = 20 size_loop = 2 for _ in range(size_loop): if model_name == "lasso" or model_name == "enet": sub_criterion = HeldOutMSE(None, None) elif model_name == "logreg": criterion = HeldOutLogistic(None, None) kf = KFold(n_splits=5, shuffle=True, random_state=42) criterion = CrossVal(sub_criterion, cv=kf) algo = ImplicitForward(tol_jac=1e-3) monitor = Monitor() t_max = dict_t_max[dataset_name] if method == 'grid_search': num1D = dict_point_grid_search[dataset_name] alpha1D = np.geomspace(alpha_max, alpha_min, num=num1D) alphas = [np.array(i) for i in product(alpha1D, alpha1D)] grid_search(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=100, tol=tol, alphas=alphas) elif method == 'random' or method == 'bayesian': hyperopt_wrapper(algo, criterion, model, X, y, alpha_min, alpha_max, monitor, max_evals=30, tol=tol, method=method, size_space=2, t_max=t_max) elif method.startswith("implicit_forward"): # do gradient descent to find the optimal lambda alpha0 = np.array([alpha_max / 100, alpha_max / 100]) n_outer = 30 if method == 'implicit_forward': optimizer = GradientDescent(n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max) else: optimizer = GradientDescent(n_outer=n_outer, p_grad_norm=1, verbose=True, tol=tol, t_max=t_max, tol_decrease="geom") grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor) else: raise NotImplementedError monitor.times = np.array(monitor.times) monitor.objs = np.array(monitor.objs) monitor.objs_test = 0 # TODO monitor.alphas = np.array(monitor.alphas) return (dataset_name, method, tol, n_outer, tolerance_decrease, monitor.times, monitor.objs, monitor.objs_test, monitor.alphas, alpha_max, model_name)
def parallel_function(dataset_name, method): X, y = fetch_libsvm(dataset_name) X, y = fetch_libsvm(dataset_name) if dataset_name == "real-sim": X = X[:, :2000] X = csr_matrix(X) # very important for SVM my_bool = norm(X, axis=1) != 0 X = X[my_bool, :] y = y[my_bool] logC = dict_logC[dataset_name] for max_iter in dict_max_iter[dataset_name]: print("Dataset %s, max iter %i" % (method, max_iter)) for i in range(2): # TODO change this sss1 = StratifiedShuffleSplit(n_splits=2, test_size=0.3333, random_state=0) idx_train, idx_val = sss1.split(X, y) idx_train = idx_train[0] idx_val = idx_val[0] monitor = Monitor() criterion = HeldOutSmoothedHinge(idx_train, idx_val) model = SVM(estimator=None, max_iter=10_000) if method == "ground_truth": for file in os.listdir("results_svm/"): if file.startswith("hypergradient_svm_%s_%s" % (dataset_name, method)): return clf = LinearSVC(C=np.exp(logC), tol=1e-32, max_iter=10_000, loss='hinge', permute=False) algo = Implicit(criterion) model.estimator = clf val, grad = criterion.get_val_grad(model, X, y, logC, algo.compute_beta_grad, tol=1e-14, monitor=monitor) else: if method == "sota": clf = LinearSVC(C=np.exp(logC), loss='hinge', max_iter=max_iter, tol=1e-32, permute=False) model.estimator = clf algo = ImplicitForward(tol_jac=1e-32, n_iter_jac=max_iter, use_stop_crit=False) elif method == "forward": algo = Forward(use_stop_crit=False) elif method == "implicit_forward": algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=max_iter, use_stop_crit=False) else: raise NotImplementedError algo.max_iter = max_iter algo.use_stop_crit = False val, grad = criterion.get_val_grad(model, X, y, logC, algo.compute_beta_grad, tol=tol, monitor=monitor, max_iter=max_iter) results = (dataset_name, method, max_iter, val, grad, monitor.times[0]) df = pandas.DataFrame(results).transpose() df.columns = ['dataset', 'method', 'maxit', 'val', 'grad', 'time'] str_results = "results_svm/hypergradient_svm_%s_%s_%i.pkl" % ( dataset_name, method, max_iter) df.to_pickle(str_results)
maxits = [5, 10, 100, 500, 1000] methods = ["forward", "implicit_forward", "celer"] dict_label = {} dict_label["forward"] = "forward" dict_label["implicit_forward"] = "Implicit" dict_label["celer"] = "Implicit + celer" dataset_name = "rcv1_train" p_alpha_max = 0.001 tol = 1e-32 X, y = fetch_libsvm(dataset_name) n_samples = len(y) sss1 = StratifiedShuffleSplit(n_splits=2, test_size=0.3333, random_state=0) idx_train, idx_val = sss1.split(X, y) idx_train = idx_train[0] idx_val = idx_val[0] dict_res = {} for maxit in maxits: for method in methods: print("Dataset %s, maxit %i" % (method, maxit)) for i in range(2): alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples log_alpha = np.log(alpha_max * p_alpha_max)
from sklearn.model_selection import KFold from sparse_ho import ImplicitForward, grad_search from sparse_ho.models import Lasso from sparse_ho.criterion import HeldOutMSE, CrossVal from sparse_ho.optimizers import GradientDescent from sparse_ho.utils import Monitor from sparse_ho.utils_plot import discrete_cmap print(__doc__) # dataset = 'rcv1' dataset = 'simu' if dataset == 'rcv1': X, y = fetch_libsvm('rcv1.binary') else: X, y = make_regression(n_samples=500, n_features=1000, noise=40, random_state=42) kf = KFold(n_splits=5, shuffle=True, random_state=42) print("Starting path computation...") n_samples = len(y) alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples n_alphas = 10 alphas = np.geomspace(alpha_max, alpha_max / 1_000, n_alphas)