Ejemplo n.º 1
0
class Solver(BaseSolver):
    name = 'Celer'

    install_cmd = 'pip'
    package_name = 'celer'
    package_install = 'git+https://github.com/mathurinm/celer.git'

    def set_objective(self, X, y, lmbd):
        self.X, self.y, self.lmbd = X, y, lmbd

        self.clf = LogisticRegression(penalty='l1',
                                      C=1 / self.lmbd,
                                      max_iter=1,
                                      max_epochs=100000,
                                      p0=10,
                                      verbose=False,
                                      tol=1e-12,
                                      fit_intercept=False)

    def run(self, n_iter):
        self.clf.max_iter = n_iter
        self.clf.fit(self.X, self.y)

    def get_result(self):
        return self.clf.coef_.flatten()
Ejemplo n.º 2
0
class Solver(BaseSolver):
    name = 'Celer'
    stop_strategy = 'iteration'

    install_cmd = 'conda'
    # need 0.7dev until max_iter=0 is supported on pypi version (0.7 release)
    requirements = ['pip:git+https://github.com/mathurinm/celer.git']

    def set_objective(self, X, y, lmbd):
        self.X, self.y, self.lmbd = X, y, lmbd

        warnings.filterwarnings('ignore', category=ConvergenceWarning)
        self.clf = LogisticRegression(penalty='l1',
                                      C=1 / self.lmbd,
                                      max_iter=1,
                                      max_epochs=100000,
                                      p0=10,
                                      verbose=False,
                                      tol=1e-12,
                                      fit_intercept=False)

    def run(self, n_iter):
        self.clf.max_iter = n_iter
        self.clf.fit(self.X, self.y)

    @staticmethod
    def get_next(stop_val):
        return stop_val + 1

    def get_result(self):
        return self.clf.coef_.flatten()
Ejemplo n.º 3
0
    def set_objective(self, X, y, lmbd):
        self.X, self.y, self.lmbd = X, y, lmbd

        warnings.filterwarnings('ignore', category=ConvergenceWarning)
        self.clf = LogisticRegression(
            penalty='l1', C=1/self.lmbd, max_iter=1,
            max_epochs=100000, p0=10, verbose=False, tol=1e-12,
            fit_intercept=False
        )
Ejemplo n.º 4
0
    def set_objective(self, X, y, lmbd):
        self.X, self.y, self.lmbd = X, y, lmbd

        self.clf = LogisticRegression(penalty='l1',
                                      C=1 / self.lmbd,
                                      max_iter=1,
                                      max_epochs=100000,
                                      p0=10,
                                      verbose=False,
                                      tol=1e-12,
                                      fit_intercept=False)
Ejemplo n.º 5
0
def parallel_function(
        dataset_name, method, tol=1e-8, n_outer=15):

    # load data
    X, y = fetch_libsvm(dataset_name)
    # subsample the samples and the features
    n_samples, n_features = dict_subsampling[dataset_name]
    t_max = dict_t_max[dataset_name]
    # t_max = 3600

    X, y = clean_dataset(X, y, n_samples, n_features)
    alpha_max, n_classes = get_alpha_max(X, y)
    log_alpha_max = np.log(alpha_max)  # maybe to change alpha max value

    algo = ImplicitForward(None, n_iter_jac=2000)
    estimator = LogisticRegression(
        C=1, fit_intercept=False, warm_start=True, max_iter=30, verbose=False)

    model = SparseLogreg(estimator=estimator)
    idx_train, idx_val, idx_test = get_splits(X, y)

    logit_multiclass = LogisticMulticlass(
        idx_train, idx_val, algo, idx_test=idx_test)

    monitor = Monitor()
    if method == "implicit_forward":
        log_alpha0 = np.ones(n_classes) * np.log(0.1 * alpha_max)
        optimizer = LineSearch(n_outer=100)
        grad_search(
            algo, logit_multiclass, model, optimizer, X, y, log_alpha0,
            monitor)
    elif method.startswith(('random', 'bayesian')):
        max_evals = dict_max_eval[dataset_name]
        log_alpha_min = np.log(alpha_max) - 7
        hyperopt_wrapper(
            algo, logit_multiclass, model, X, y, log_alpha_min, log_alpha_max,
            monitor, max_evals=max_evals, tol=tol, t_max=t_max, method=method,
            size_space=n_classes)
    elif method == 'grid_search':
        n_alphas = 20
        p_alphas = np.geomspace(1, 0.001, n_alphas)
        p_alphas = np.tile(p_alphas, (n_classes, 1))
        for i in range(n_alphas):
            log_alpha_i = np.log(alpha_max * p_alphas[:, i])
            logit_multiclass.get_val(
                model, X, y, log_alpha_i, None, monitor, tol)

    monitor.times = np.array(monitor.times).copy()
    monitor.objs = np.array(monitor.objs).copy()
    monitor.acc_vals = np.array(monitor.acc_vals).copy()
    monitor.acc_tests = np.array(monitor.acc_tests).copy()
    monitor.log_alphas = np.array(monitor.log_alphas).copy()
    return (
        dataset_name, method, tol, n_outer, monitor.times, monitor.objs,
        monitor.acc_vals, monitor.acc_tests, monitor.log_alphas, log_alpha_max,
        n_samples, n_features, n_classes)
Ejemplo n.º 6
0
class Solver(BaseSolver):
    name = 'Celer'
    sampling_strategy = 'iteration'

    install_cmd = 'pip'
    requirements = ['celer']
    requirements_install = ['git+https://github.com/mathurinm/celer.git']

    def set_objective(self, X, y, lmbd):
        self.X, self.y, self.lmbd = X, y, lmbd

        warnings.filterwarnings('ignore', category=ConvergenceWarning)
        self.clf = LogisticRegression(
            penalty='l1', C=1/self.lmbd, max_iter=1,
            max_epochs=100000, p0=10, verbose=False, tol=1e-12,
            fit_intercept=False
        )

    def run(self, n_iter):
        self.clf.max_iter = n_iter
        self.clf.fit(self.X, self.y)

    def get_result(self):
        return self.clf.coef_.flatten()
Ejemplo n.º 7
0
# X, y = fetch_libsvm('sensit')
# X, y = fetch_libsvm('usps')
X, y = fetch_libsvm('rcv1_multiclass')
# X, y = fetch_libsvm('sector_scale')
# X, y = fetch_libsvm('sector')
# X, y = fetch_libsvm('smallNORB')
# X, y = fetch_libsvm('mnist')


# clean data and subsample
X, y = clean_dataset(X, y, n_samples, n_features)
idx_train, idx_val, idx_test = get_splits(X, y)
n_samples, n_features = X.shape

algo = ImplicitForward(n_iter_jac=1000)
estimator = LogisticRegression(
    C=1, fit_intercept=False, warm_start=True, max_iter=2000, verbose=False)

model = SparseLogreg(estimator=estimator)
logit_multiclass = LogisticMulticlass(
    idx_train, idx_val, algo, idx_test=idx_test)


alpha_max, n_classes = alpha_max_multiclass(X, y)
tol = 1e-5


n_alphas = 10
p_alphas = np.geomspace(1, 0.001, n_alphas)
p_alphas = np.tile(p_alphas, (n_classes, 1))

print("###################### GRID SEARCH ###################")
Ejemplo n.º 8
0
tol = 1e-8

n_alphas = 30
p_alphas = np.geomspace(1, 0.0001, n_alphas)
alphas = alpha_max * p_alphas
log_alphas = np.log(alphas)

##############################################################################
# Grid-search
# -----------

print('scikit started')
t0 = time.time()

estimator = LogisticRegression(penalty='l1',
                               fit_intercept=False,
                               max_iter=max_iter)
model = SparseLogreg(max_iter=max_iter, estimator=estimator)
criterion = HeldOutLogistic(idx_train, idx_val)
algo_grid = Forward()
monitor_grid = Monitor()
grid_search(algo_grid,
            criterion,
            model,
            X,
            y,
            log_alpha_min,
            log_alpha_max,
            monitor_grid,
            log_alphas=log_alphas,
            tol=tol)
Ejemplo n.º 9
0
"""
===============================================================
Use LogisticRegression class with Celer and Prox-Newton solvers
===============================================================
"""

import numpy as np
from scipy import sparse
from numpy.linalg import norm
from sklearn import linear_model

from celer import LogisticRegression
from celer.datasets import fetch_ml_uci

dataset = "gisette_train"
X, y = fetch_ml_uci(dataset)

C_min = 2 / norm(X.T @ y, ord=np.inf)
C = 5 * C_min
clf = LogisticRegression(C=C, verbose=1, solver="celer-pn", tol=1e0).fit(X, y)
w_celer = clf.coef_.ravel()

clf = linear_model.LogisticRegression(C=C,
                                      solver="liblinear",
                                      penalty='l1',
                                      fit_intercept=False).fit(X, y)
w_lib = clf.coef_.ravel()
Ejemplo n.º 10
0
X, y = fetch_libsvm("news20.binary")

C_min = 2 / norm(X.T @ y, ord=np.inf)
C = 20 * C_min


def pobj_logreg(w):
    return np.sum(np.log(1 + np.exp(-y * (X @ w)))) + 1. / C * norm(w, ord=1)


pobj_celer = []
t_celer = []

for n_iter in range(10):
    t0 = time.time()
    clf = LogisticRegression(C=C, solver="celer-pn", max_iter=n_iter,
                             tol=0).fit(X, y)
    t_celer.append(time.time() - t0)
    w_celer = clf.coef_.ravel()
    pobj_celer.append(pobj_logreg(w_celer))

pobj_celer = np.array(pobj_celer)

pobj_libl = []
t_libl = []

for n_iter in np.arange(0, 50, 10):
    t0 = time.time()
    clf = linear_model.LogisticRegression(C=C,
                                          solver="liblinear",
                                          penalty='l1',
                                          fit_intercept=False,