def parallel_function( dataset_name, method, tol=1e-8, n_outer=15): # load data X, y = fetch_libsvm(dataset_name) # subsample the samples and the features n_samples, n_features = dict_subsampling[dataset_name] t_max = dict_t_max[dataset_name] # t_max = 3600 X, y = clean_dataset(X, y, n_samples, n_features) alpha_max, n_classes = get_alpha_max(X, y) log_alpha_max = np.log(alpha_max) # maybe to change alpha max value algo = ImplicitForward(None, n_iter_jac=2000) estimator = LogisticRegression( C=1, fit_intercept=False, warm_start=True, max_iter=30, verbose=False) model = SparseLogreg(estimator=estimator) idx_train, idx_val, idx_test = get_splits(X, y) logit_multiclass = LogisticMulticlass( idx_train, idx_val, algo, idx_test=idx_test) monitor = Monitor() if method == "implicit_forward": log_alpha0 = np.ones(n_classes) * np.log(0.1 * alpha_max) optimizer = LineSearch(n_outer=100) grad_search( algo, logit_multiclass, model, optimizer, X, y, log_alpha0, monitor) elif method.startswith(('random', 'bayesian')): max_evals = dict_max_eval[dataset_name] log_alpha_min = np.log(alpha_max) - 7 hyperopt_wrapper( algo, logit_multiclass, model, X, y, log_alpha_min, log_alpha_max, monitor, max_evals=max_evals, tol=tol, t_max=t_max, method=method, size_space=n_classes) elif method == 'grid_search': n_alphas = 20 p_alphas = np.geomspace(1, 0.001, n_alphas) p_alphas = np.tile(p_alphas, (n_classes, 1)) for i in range(n_alphas): log_alpha_i = np.log(alpha_max * p_alphas[:, i]) logit_multiclass.get_val( model, X, y, log_alpha_i, None, monitor, tol) monitor.times = np.array(monitor.times).copy() monitor.objs = np.array(monitor.objs).copy() monitor.acc_vals = np.array(monitor.acc_vals).copy() monitor.acc_tests = np.array(monitor.acc_tests).copy() monitor.log_alphas = np.array(monitor.log_alphas).copy() return ( dataset_name, method, tol, n_outer, monitor.times, monitor.objs, monitor.acc_vals, monitor.acc_tests, monitor.log_alphas, log_alpha_max, n_samples, n_features, n_classes)
from sparse_ho.optimizers import GradientDescent from sparse_ho.ho import grad_search, hyperopt_wrapper from sparse_ho.utils import Monitor from sparse_ho.datasets.utils_datasets import ( alpha_max_multiclass, clean_dataset, get_splits) # load data n_samples = 1_000 n_features = 1_000 # n_samples = 1_100 # n_features = 3_200 # X, y = fetch_libsvm('sensit') # X, y = fetch_libsvm('usps') X, y = fetch_libsvm('rcv1_multiclass') # X, y = fetch_libsvm('sector_scale') # X, y = fetch_libsvm('sector') # X, y = fetch_libsvm('smallNORB') # X, y = fetch_libsvm('mnist') # clean data and subsample X, y = clean_dataset(X, y, n_samples, n_features) idx_train, idx_val, idx_test = get_splits(X, y) n_samples, n_features = X.shape algo = ImplicitForward(n_iter_jac=1000) estimator = LogisticRegression( C=1, fit_intercept=False, warm_start=True, max_iter=2000, verbose=False)
from sparse_ho.ho import grad_search from sparse_ho.utils import Monitor from sparse_ho.models import SparseLogreg from sparse_ho.criterion import HeldOutLogistic from sparse_ho import ImplicitForward from sparse_ho import Forward from sparse_ho.grid_search import grid_search print(__doc__) dataset = 'rcv1_train' # dataset = 'simu' if dataset != 'simu': X, y = fetch_libsvm(dataset) X = X[:, :100] else: X, y = make_classification(n_samples=100, n_features=1_000, random_state=42, flip_y=0.02) n_samples = X.shape[0] idx_train = np.arange(0, n_samples // 2) idx_val = np.arange(n_samples // 2, n_samples) print("Starting path computation...") n_samples = len(y[idx_train]) alpha_max = np.max(np.abs(X[idx_train, :].T.dot(y[idx_train])))
from sparse_ho.criterion import HeldOutMSE from sparse_ho import Forward from sparse_ho import ImplicitForward from sparse_ho.utils import Monitor from sparse_ho.ho import grad_search from sparse_ho.grid_search import grid_search from libsvmdata.datasets import fetch_libsvm print(__doc__) dataset = 'rcv1' # dataset = 'simu' if dataset == 'rcv1': X, y = fetch_libsvm('rcv1_train') else: X, y = make_regression(n_samples=1000, n_features=1000, noise=40) n_samples = X.shape[0] idx_train = np.arange(0, n_samples // 2) idx_val = np.arange(n_samples // 2, n_samples) print("Starting path computation...") n_samples = len(y[idx_train]) alpha_max = np.max(np.abs(X[idx_train, :].T.dot( y[idx_train]))) / len(idx_train) log_alpha0 = np.log(alpha_max / 10) n_alphas = 10 p_alphas = np.geomspace(1, 0.0001, n_alphas)
import sklearn from sklearn.linear_model import LogisticRegression from libsvmdata.datasets import fetch_libsvm from sparse_ho.models import SparseLogreg from sparse_ho.criterion import LogisticMulticlass from sparse_ho import ImplicitForward from sparse_ho.utils import Monitor from sparse_ho.datasets.utils_datasets import (alpha_max_multiclass, clean_dataset) # load data n_samples = 1000 n_features = 10 X, y = fetch_libsvm('mnist') my_bool = np.logical_or(np.logical_or(y == 0, y == 1), y == 2) X = X[my_bool, :] y = y[my_bool] # clean data and subsample X, y = clean_dataset(X, y, n_samples, n_features) idx_train = np.arange(len(y) // 2) idx_val = np.arange(len(y) // 2, len(y)) alpha_max, n_classes = alpha_max_multiclass(X, y) tol = 1e-8 n_classes = np.unique(y).shape[0] max_iter = 10000
from sparse_ho.ho import grad_search from sparse_ho.utils import Monitor from sparse_ho.utils_plot import discrete_cmap from sparse_ho.optimizers import GradientDescent # dataset = "rcv1" dataset = 'simu' ############################################################################## # Load some data # dataset = 'rcv1' dataset = 'simu' if dataset == 'rcv1': X, y = fetch_libsvm('rcv1.binary') y -= y.mean() y /= np.linalg.norm(y) else: X, y, _ = make_correlated_data(n_samples=200, n_features=400, snr=5, random_state=0) n_samples = X.shape[0] idx_train = np.arange(0, n_samples // 2) idx_val = np.arange(n_samples // 2, n_samples) print("Starting path computation...") alpha_max = np.max(np.abs(X[idx_train, :].T @ y[idx_train])) / len(idx_train)