Ejemplo n.º 1
0
def test_grad_search(Optimizer, model, crit):
    """check that the paths are the same in the line search"""
    n_outer = 2

    criterion = HeldOutMSE(idx_train, idx_val)
    monitor1 = Monitor()
    algo = Forward()
    optimizer = Optimizer(n_outer=n_outer, tol=1e-16)
    grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor1)

    criterion = HeldOutMSE(idx_train, idx_val)
    monitor2 = Monitor()
    algo = Implicit()
    optimizer = Optimizer(n_outer=n_outer, tol=1e-16)
    grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor2)

    criterion = HeldOutMSE(idx_train, idx_val)
    monitor3 = Monitor()
    algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
    optimizer = Optimizer(n_outer=n_outer, tol=1e-16)
    grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor3)

    np.testing.assert_allclose(np.array(monitor1.alphas),
                               np.array(monitor3.alphas))
    np.testing.assert_allclose(np.array(monitor1.grads),
                               np.array(monitor3.grads),
                               rtol=1e-5)
    np.testing.assert_allclose(np.array(monitor1.objs),
                               np.array(monitor3.objs))
    assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
Ejemplo n.º 2
0
def test_val_grad(model_name, criterion_name, algo):
    """Check that all methods return the same gradient, comparing to cvxpylayer
    """
    if model_name == 'svr':
        pytest.xfail("svr needs to be fixed")

    if criterion_name == 'logistic':
        pytest.xfail("cvxpylayer seems broken for logistic")

    if criterion_name == 'MSE':
        criterion = HeldOutMSE(idx_train, idx_val)
    elif criterion_name == 'logistic':
        criterion = HeldOutLogistic(idx_train, idx_val)
    elif criterion_name == 'SURE':
        criterion = FiniteDiffMonteCarloSure(sigma_star)

    log_alpha = dict_log_alpha[model_name]
    model = models[model_name]
    val, grad = criterion.get_val_grad(model,
                                       X,
                                       y,
                                       log_alpha,
                                       algo.compute_beta_grad,
                                       tol=tol)
    np.testing.assert_allclose(dict_vals_cvxpy[model_name, criterion_name],
                               val,
                               rtol=1e-5,
                               atol=1e-5)
    np.testing.assert_allclose(dict_grads_cvxpy[model_name, criterion_name],
                               grad,
                               rtol=1e-5,
                               atol=1e-5)
Ejemplo n.º 3
0
def test_grad_search(model, crit):
    """check that the paths are the same in the line search"""
    if crit == 'cv':
        n_outer = 2
        criterion = HeldOutMSE(idx_train, idx_val)
    else:
        n_outer = 2
        criterion = SmoothedSURE(sigma_star)
    # TODO MM@QBE if else scheme surprising

    criterion = HeldOutMSE(idx_train, idx_val)
    monitor1 = Monitor()
    algo = Forward()
    grad_search(algo,
                criterion,
                model,
                X,
                y,
                log_alpha,
                monitor1,
                n_outer=n_outer,
                tol=1e-16)

    criterion = HeldOutMSE(idx_train, idx_val)
    monitor2 = Monitor()
    algo = Implicit()
    grad_search(algo,
                criterion,
                model,
                X,
                y,
                log_alpha,
                monitor2,
                n_outer=n_outer,
                tol=1e-16)

    criterion = HeldOutMSE(idx_train, idx_val)
    monitor3 = Monitor()
    algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
    grad_search(algo,
                criterion,
                model,
                X,
                y,
                log_alpha,
                monitor3,
                n_outer=n_outer,
                tol=1e-16)

    np.testing.assert_allclose(np.array(monitor1.log_alphas),
                               np.array(monitor3.log_alphas))
    np.testing.assert_allclose(np.array(monitor1.grads),
                               np.array(monitor3.grads),
                               atol=1e-8)
    np.testing.assert_allclose(np.array(monitor1.objs),
                               np.array(monitor3.objs))
    assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
Ejemplo n.º 4
0
def test_grad_search(model, crit):
    """check that the paths are the same in the line search"""
    if crit == 'cv':
        n_outer = 2
        criterion = HeldOutMSE(X_val,
                               y_val,
                               model,
                               X_test=X_test,
                               y_test=y_test)
    else:
        n_outer = 2
        criterion = SURE(X_train,
                         y_train,
                         model,
                         sigma=sigma_star,
                         X_test=X_test,
                         y_test=y_test)

    criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
    monitor1 = Monitor()
    algo = Forward()
    grad_search(algo,
                criterion,
                log_alpha,
                monitor1,
                n_outer=n_outer,
                tol=1e-16)

    criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
    monitor2 = Monitor()
    algo = Implicit()
    grad_search(algo,
                criterion,
                log_alpha,
                monitor2,
                n_outer=n_outer,
                tol=1e-16)

    criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
    monitor3 = Monitor()
    algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
    grad_search(algo,
                criterion,
                log_alpha,
                monitor3,
                n_outer=n_outer,
                tol=1e-16)

    assert np.allclose(np.array(monitor1.log_alphas),
                       np.array(monitor3.log_alphas))
    assert np.allclose(np.array(monitor1.grads), np.array(monitor3.grads))
    assert np.allclose(np.array(monitor1.objs), np.array(monitor3.objs))
    assert np.allclose(np.array(monitor1.objs_test),
                       np.array(monitor3.objs_test))
    assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
Ejemplo n.º 5
0
def test_grad_search():

    n_outer = 3
    criterion = HeldOutMSE(X_val, y_val, model, X_test=None, y_test=None)
    monitor1 = Monitor()
    algo = Forward()
    grad_search(algo,
                criterion,
                np.array([log_alpha1, log_alpha2]),
                monitor1,
                n_outer=n_outer,
                tol=1e-16)

    criterion = HeldOutMSE(X_val, y_val, model, X_test=None, y_test=None)
    monitor2 = Monitor()
    algo = Implicit()
    grad_search(algo,
                criterion,
                np.array([log_alpha1, log_alpha2]),
                monitor2,
                n_outer=n_outer,
                tol=1e-16)

    criterion = HeldOutMSE(X_val, y_val, model, X_test=None, y_test=None)
    monitor3 = Monitor()
    algo = ImplicitForward(tol_jac=1e-3, n_iter_jac=1000)
    grad_search(algo,
                criterion,
                np.array([log_alpha1, log_alpha2]),
                monitor3,
                n_outer=n_outer,
                tol=1e-16)
    [np.linalg.norm(grad) for grad in monitor1.grads]
    [np.exp(alpha) for alpha in monitor1.log_alphas]

    assert np.allclose(np.array(monitor1.log_alphas),
                       np.array(monitor3.log_alphas))
    assert np.allclose(np.array(monitor1.grads), np.array(monitor3.grads))
    assert np.allclose(np.array(monitor1.objs), np.array(monitor3.objs))
    assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))

    assert np.allclose(np.array(monitor1.log_alphas),
                       np.array(monitor2.log_alphas),
                       atol=1e-2)
    assert np.allclose(np.array(monitor1.grads),
                       np.array(monitor2.grads),
                       atol=1e-2)
    assert np.allclose(np.array(monitor1.objs),
                       np.array(monitor2.objs),
                       atol=1e-2)
    assert not np.allclose(np.array(monitor1.times), np.array(monitor2.times))
Ejemplo n.º 6
0
def test_check_grad_sparse_ho(model_name, criterion, algo):
    """Check that all methods return a good gradient using check_grad"""
    if criterion == 'MSE':
        criterion = HeldOutMSE(idx_train, idx_val)
    elif criterion == 'SURE':
        criterion = FiniteDiffMonteCarloSure(sigma_star)
    elif criterion == 'logistic':
        criterion = HeldOutLogistic(idx_train, idx_val)

    model = models[model_name]
    log_alpha = dict_log_alpha[model_name]

    def get_val(log_alpha):
        val, _ = criterion.get_val_grad(
            model, X, y, np.squeeze(log_alpha), algo.get_beta_jac_v, tol=tol)
        return val

    def get_grad(log_alpha):
        _, grad = criterion.get_val_grad(
            model, X, y, np.squeeze(log_alpha), algo.get_beta_jac_v, tol=tol)
        return grad

    for log_alpha in dict_list_log_alphas[model_name]:
        grad_error = check_grad(get_val, get_grad, log_alpha)
        assert grad_error < 1e-1
Ejemplo n.º 7
0
def test_grid_search():
    max_evals = 5

    monitor_grid = Monitor()
    model = Lasso(estimator=estimator)
    criterion = HeldOutMSE(idx_train, idx_train)
    algo = Forward()
    log_alpha_opt_grid, _ = grid_search(
        algo, criterion, model, X, y, log_alpha_min, log_alpha_max,
        monitor_grid, max_evals=max_evals,
        tol=1e-5, samp="grid")

    monitor_random = Monitor()
    criterion = HeldOutMSE(idx_train, idx_val)
    algo = Forward()
    log_alpha_opt_random, _ = grid_search(
        algo, criterion, model, X, y, log_alpha_min, log_alpha_max,
        monitor_random,
        max_evals=max_evals, tol=1e-5, samp="random")

    assert(monitor_random.log_alphas[
        np.argmin(monitor_random.objs)] == log_alpha_opt_random)
    assert(monitor_grid.log_alphas[
        np.argmin(monitor_grid.objs)] == log_alpha_opt_grid)

    monitor_grid = Monitor()
    model = Lasso(estimator=estimator)

    criterion = SmoothedSURE(sigma=sigma_star)
    algo = Forward()
    log_alpha_opt_grid, _ = grid_search(
        algo, criterion, model, X, y, log_alpha_min, log_alpha_max,
        monitor_grid, max_evals=max_evals,
        tol=1e-5, samp="grid")

    monitor_random = Monitor()
    criterion = SmoothedSURE(sigma=sigma_star)
    algo = Forward()
    log_alpha_opt_random, _ = grid_search(
        algo, criterion, model, X, y, log_alpha_min, log_alpha_max,
        monitor_random,
        max_evals=max_evals, tol=1e-5, samp="random")

    assert(monitor_random.log_alphas[
        np.argmin(monitor_random.objs)] == log_alpha_opt_random)
    assert(monitor_grid.log_alphas[
        np.argmin(monitor_grid.objs)] == log_alpha_opt_grid)
Ejemplo n.º 8
0
def test_monitor():
    model = Lasso(estimator=estimator)
    criterion = HeldOutMSE(idx_train, idx_val)
    algo = ImplicitForward()
    monitor = Monitor(callback=callback)
    optimizer = LineSearch(n_outer=10, tol=tol)
    grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor)

    np.testing.assert_allclose(np.array(monitor.objs), np.array(objs))
Ejemplo n.º 9
0
def test_cross_val_criterion(model_name, XX):
    model = models[model_name]
    alpha_min = alpha_max / 10
    max_iter = 10000
    n_alphas = 10
    kf = KFold(n_splits=5, shuffle=True, random_state=56)

    monitor_grid = Monitor()
    if model_name.startswith("lasso"):
        sub_crit = HeldOutMSE(None, None)
    else:
        sub_crit = HeldOutLogistic(None, None)
    criterion = CrossVal(sub_crit, cv=kf)
    grid_search(criterion,
                model,
                XX,
                y,
                alpha_min,
                alpha_max,
                monitor_grid,
                max_evals=n_alphas,
                tol=tol)

    if model_name.startswith("lasso"):
        reg = linear_model.LassoCV(cv=kf,
                                   verbose=True,
                                   tol=tol,
                                   fit_intercept=False,
                                   alphas=np.geomspace(alpha_max,
                                                       alpha_min,
                                                       num=n_alphas),
                                   max_iter=max_iter).fit(X, y)
    else:
        reg = linear_model.LogisticRegressionCV(
            cv=kf,
            verbose=True,
            tol=tol,
            fit_intercept=False,
            Cs=len(idx_train) /
            np.geomspace(alpha_max, alpha_min, num=n_alphas),
            max_iter=max_iter,
            penalty='l1',
            solver='liblinear').fit(X, y)
    reg.score(XX, y)
    if model_name.startswith("lasso"):
        objs_grid_sk = reg.mse_path_.mean(axis=1)
    else:
        objs_grid_sk = reg.scores_[1.0].mean(axis=1)
    # these 2 value should be the same
    (objs_grid_sk - np.array(monitor_grid.objs))
    np.testing.assert_allclose(objs_grid_sk, monitor_grid.objs)
Ejemplo n.º 10
0
    def __init__(self, X, y, Model, cv=None, max_iter=1000, estimator=None):
        """
        Parameters
        ----------
        X : {ndarray, sparse matrix} of shape (n_samples, n_features)
            Data
        y : {ndarray, sparse matrix} of shape (n_samples,)
            Target
        Model: class
            The Model class definition (e.g. Lasso or SparseLogreg)
        cv : int, cross-validation generator or iterable, default=None
            Determines the cross-validation splitting strategy.
            Possible inputs for cv are:

            - None, to use the default 5-fold cross-validation,
            - int, to specify the number of folds.
            - scikit-learn CV splitter
            - An iterable yielding (train, test) splits as arrays of indices.

            For int/None inputs, KFold is used.
        max_iter: int
            Maximal number of iteration for the state-of-the-art solver
        estimator: instance of ``sklearn.base.BaseEstimator``
            An estimator that follows the scikit-learn API.
        """
        self.X = X
        self.y = y
        self.dict_crits = {}
        self.dict_models = {}
        self.rmse = None
        self.estimator = estimator

        cv = check_cv(cv)

        for i, (idx_train, idx_val) in enumerate(cv.split(X)):
            X_train = X[idx_train, :]
            y_train = y[idx_train]

            if issparse(X_train):
                X_train = X_train.tocsc()

            # TODO get rid of this
            self.models[i] = Model(X_train,
                                   y_train,
                                   max_iter=max_iter,
                                   estimator=estimator)

            criterion = HeldOutMSE(idx_train, idx_val)

            self.dict_crits[i] = criterion
        self.n_splits = cv.n_splits
Ejemplo n.º 11
0
def test_grad_search(model, crit):
    """check that the paths are the same in the line search"""
    if crit == 'MSE':
        n_outer = 2
        criterion = HeldOutMSE(idx_train, idx_val)
    else:
        n_outer = 2
        criterion = FiniteDiffMonteCarloSure(sigma_star)
    # TODO MM@QBE if else scheme surprising

    criterion = HeldOutMSE(idx_train, idx_val)
    monitor1 = Monitor()
    algo = Forward()
    optimizer = LineSearch(n_outer=n_outer, tol=1e-16)
    grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor1)

    criterion = HeldOutMSE(idx_train, idx_val)
    monitor2 = Monitor()
    algo = Implicit()
    optimizer = LineSearch(n_outer=n_outer, tol=1e-16)
    grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor2)

    criterion = HeldOutMSE(idx_train, idx_val)
    monitor3 = Monitor()
    algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
    optimizer = LineSearch(n_outer=n_outer, tol=1e-16)
    grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor3)

    np.testing.assert_allclose(np.array(monitor1.alphas),
                               np.array(monitor3.alphas))
    np.testing.assert_allclose(np.array(monitor1.grads),
                               np.array(monitor3.grads),
                               rtol=1e-5)
    np.testing.assert_allclose(np.array(monitor1.objs),
                               np.array(monitor3.objs))
    assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
Ejemplo n.º 12
0
def test_monitor():
    model = Lasso(estimator=estimator)
    criterion = HeldOutMSE(idx_train, idx_val)
    algo = ImplicitForward()
    monitor = Monitor(callback=callback)

    grad_search(algo,
                criterion,
                model,
                X,
                y,
                np.log(alpha_max / 10),
                monitor,
                n_outer=10,
                tol=tol)

    np.testing.assert_allclose(np.array(monitor.objs), np.array(objs))
Ejemplo n.º 13
0
def test_val_grad():
    #######################################################################
    # Not all methods computes the full Jacobian, but all
    # compute the gradients
    # check that the gradient returned by all methods are the same
    criterion = HeldOutMSE(X_val, y_val, model)
    algo = Forward()
    val_fwd, grad_fwd = criterion.get_val_grad(np.array(
        [log_alpha1, log_alpha2]),
                                               algo.get_beta_jac_v,
                                               tol=tol)

    criterion = HeldOutMSE(X_val, y_val, model)
    algo = ImplicitForward(tol_jac=1e-16, n_iter_jac=5000)
    val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(np.array(
        [log_alpha1, log_alpha2]),
                                                       algo.get_beta_jac_v,
                                                       tol=tol)

    criterion = HeldOutMSE(X_val, y_val, model)
    algo = ImplicitForward(tol_jac=1e-16, n_iter_jac=5000)
    val_imp_fwd_custom, grad_imp_fwd_custom = criterion.get_val_grad(
        np.array([log_alpha1, log_alpha2]), algo.get_beta_jac_v, tol=tol)

    criterion = HeldOutMSE(X_val, y_val, model)
    algo = Implicit()
    val_imp, grad_imp = criterion.get_val_grad(np.array(
        [log_alpha1, log_alpha2]),
                                               algo.get_beta_jac_v,
                                               tol=tol)
    assert np.allclose(val_fwd, val_imp_fwd)
    assert np.allclose(grad_fwd, grad_imp_fwd)
    assert np.allclose(val_imp_fwd, val_imp)
    assert np.allclose(val_imp_fwd, val_imp_fwd_custom)
    # for the implcit the conjugate grad does not converge
    # hence the rtol=1e-2
    assert np.allclose(grad_imp_fwd, grad_imp, atol=1e-3)
    assert np.allclose(grad_imp_fwd, grad_imp_fwd_custom)
Ejemplo n.º 14
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='constant'):

    # load data
    X, y = fetch_libsvm(dataset_name)
    y -= np.mean(y)
    # compute alpha_max
    alpha_max = np.abs(X.T @ y).max() / len(y)

    if model_name == "logreg":
        alpha_max /= 2
    alpha_min = alpha_max * dict_palphamin[dataset_name]

    if model_name == "enet":
        estimator = linear_model.ElasticNet(fit_intercept=False,
                                            max_iter=10_000,
                                            warm_start=True,
                                            tol=tol)
        model = ElasticNet(estimator=estimator)
    elif model_name == "logreg":
        model = SparseLogreg(estimator=estimator)

    # TODO improve this
    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 20

    size_loop = 2
    for _ in range(size_loop):
        if model_name == "lasso" or model_name == "enet":
            sub_criterion = HeldOutMSE(None, None)
        elif model_name == "logreg":
            criterion = HeldOutLogistic(None, None)
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        criterion = CrossVal(sub_criterion, cv=kf)

        algo = ImplicitForward(tol_jac=1e-3)
        monitor = Monitor()
        t_max = dict_t_max[dataset_name]
        if method == 'grid_search':
            num1D = dict_point_grid_search[dataset_name]
            alpha1D = np.geomspace(alpha_max, alpha_min, num=num1D)
            alphas = [np.array(i) for i in product(alpha1D, alpha1D)]
            grid_search(algo,
                        criterion,
                        model,
                        X,
                        y,
                        alpha_min,
                        alpha_max,
                        monitor,
                        max_evals=100,
                        tol=tol,
                        alphas=alphas)
        elif method == 'random' or method == 'bayesian':
            hyperopt_wrapper(algo,
                             criterion,
                             model,
                             X,
                             y,
                             alpha_min,
                             alpha_max,
                             monitor,
                             max_evals=30,
                             tol=tol,
                             method=method,
                             size_space=2,
                             t_max=t_max)
        elif method.startswith("implicit_forward"):
            # do gradient descent to find the optimal lambda
            alpha0 = np.array([alpha_max / 100, alpha_max / 100])
            n_outer = 30
            if method == 'implicit_forward':
                optimizer = GradientDescent(n_outer=n_outer,
                                            p_grad_norm=1,
                                            verbose=True,
                                            tol=tol,
                                            t_max=t_max)
            else:
                optimizer = GradientDescent(n_outer=n_outer,
                                            p_grad_norm=1,
                                            verbose=True,
                                            tol=tol,
                                            t_max=t_max,
                                            tol_decrease="geom")
            grad_search(algo, criterion, model, optimizer, X, y, alpha0,
                        monitor)
        else:
            raise NotImplementedError

    monitor.times = np.array(monitor.times)
    monitor.objs = np.array(monitor.objs)
    monitor.objs_test = 0  # TODO
    monitor.alphas = np.array(monitor.alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.alphas,
            alpha_max, model_name)
Ejemplo n.º 15
0
import pytest
import numpy as np

from sparse_ho.criterion import (HeldOutMSE, HeldOutLogistic,
                                 FiniteDiffMonteCarloSure)
from sparse_ho.utils import Monitor
from sparse_ho import Forward

from sparse_ho.tests.common import (X, y, sigma_star, idx_train, idx_val,
                                    models, dict_list_log_alphas)

list_model_crit = [('lasso', HeldOutMSE(idx_train, idx_val)),
                   ('enet', HeldOutMSE(idx_train, idx_val)),
                   ('wLasso', HeldOutMSE(idx_train, idx_val)),
                   ('lasso', FiniteDiffMonteCarloSure(sigma_star)),
                   ('logreg', HeldOutLogistic(idx_train, idx_val))]

tol = 1e-15


@pytest.mark.parametrize('model_name,criterion', list_model_crit)
def test_cross_val_criterion(model_name, criterion):
    # verify dtype from criterion, and the good shape
    algo = Forward()
    monitor_get_val = Monitor()
    monitor_get_val_grad = Monitor()

    model = models[model_name]
    for log_alpha in dict_list_log_alphas[model_name]:
        criterion.get_val(model,
                          X,
Ejemplo n.º 16
0
def test_val_grad():
    #######################################################################
    # Not all methods computes the full Jacobian, but all
    # compute the gradients
    # check that the gradient returned by all methods are the same
    for key in models.keys():
        # model = Lasso(log_alpha)
        log_alpha = dict_log_alpha[key]
        model = models[key]
        # model = Lasso(log_alpha)

        criterion = HeldOutMSE(X_val, y_val, model)
        algo = Forward()
        val_fwd, grad_fwd = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        criterion = HeldOutMSE(X_val, y_val, model)
        algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
        val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(log_alpha,
                                                           algo.get_beta_jac_v,
                                                           tol=tol)

        criterion = HeldOutMSE(X_val, y_val, model)
        algo = Implicit()
        val_imp, grad_imp = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        # import ipdb; ipdb.set_trace()

        criterion = HeldOutMSE(X_val, y_val, model)
        algo = Backward()
        val_bwd, grad_bwd = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        assert np.allclose(val_fwd, val_imp_fwd)
        assert np.allclose(grad_fwd, grad_imp_fwd)
        # assert np.allclose(val_imp_fwd, val_imp)
        assert np.allclose(val_bwd, val_fwd)
        assert np.allclose(val_bwd, val_imp_fwd)
        assert np.allclose(grad_fwd, grad_bwd)
        assert np.allclose(grad_bwd, grad_imp_fwd)

        # for the implcit the conjugate grad does not converge
        # hence the rtol=1e-2
        assert np.allclose(grad_imp_fwd, grad_imp, atol=1e-3)

    for key in models.keys():
        # model = Lasso(log_alpha)
        log_alpha = dict_log_alpha[key]
        model = models[key]
        # model = Lasso(log_alpha)
        criterion = SURE(X_train, y_train, model, sigma_star)
        algo = Forward()
        val_fwd, grad_fwd = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        criterion = SURE(X_train, y_train, model, sigma_star)
        algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
        val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(log_alpha,
                                                           algo.get_beta_jac_v,
                                                           tol=tol)

        criterion = SURE(X_train, y_train, model, sigma_star)
        algo = Implicit(criterion)
        val_imp, grad_imp = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        criterion = SURE(X_train, y_train, model, sigma_star)
        algo = Backward()
        val_bwd, grad_bwd = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        assert np.allclose(val_fwd, val_imp_fwd)
        assert np.allclose(grad_fwd, grad_imp_fwd)
        assert np.allclose(val_imp_fwd, val_imp)
        assert np.allclose(val_bwd, val_fwd)
        assert np.allclose(val_bwd, val_imp_fwd)
        assert np.allclose(grad_fwd, grad_bwd)
        assert np.allclose(grad_bwd, grad_imp_fwd)
Ejemplo n.º 17
0
                   cv=2,
                   n_jobs=2).fit(X_train_val, y_train_val)

# Measure mse on test
mse_cv = mean_squared_error(y_test, model_cv.predict(X_test))
print("Vanilla LassoCV: Mean-squared error on test data %f" % mse_cv)
##############################################################################

##############################################################################
# Weighted Lasso with sparse-ho.
# We use the vanilla lassoCV coefficients as a starting point
alpha0 = np.log(model_cv.alpha_) * np.ones(X_train.shape[1])
# Weighted Lasso: Sparse-ho: 1 param per feature
estimator = Lasso(fit_intercept=False, max_iter=10, warm_start=True)
model = WeightedLasso(X_train, y_train, estimator=estimator)
criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
algo = ImplicitForward()
monitor = Monitor()
grad_search(algo, criterion, alpha0, monitor, n_outer=20, tol=1e-6)
##############################################################################

##############################################################################
# MSE on validation set
mse_sho_val = mean_squared_error(y_val, estimator.predict(X_val))

# MSE on test set, ie unseen data
mse_sho_test = mean_squared_error(y_test, estimator.predict(X_test))

print("Sparse-ho: Mean-squared error on validation data %f" % mse_sho_val)
print("Sparse-ho: Mean-squared error on test (unseen) data %f" % mse_sho_test)
Ejemplo n.º 18
0
                   cv=2,
                   n_jobs=2).fit(X, y)

# Measure mse on test
mse_cv = mean_squared_error(y_test, model_cv.predict(X_test))
print("Vanilla LassoCV: Mean-squared error on test data %f" % mse_cv)
##############################################################################

##############################################################################
# Weighted Lasso with sparse-ho.
# We use the vanilla lassoCV coefficients as a starting point
log_alpha0 = np.log(model_cv.alpha_) * np.ones(n_features)
# Weighted Lasso: Sparse-ho: 1 param per feature
estimator = Lasso(fit_intercept=False, max_iter=10, warm_start=True)
model = WeightedLasso(estimator=estimator)
criterion = HeldOutMSE(idx_train, idx_val)
algo = ImplicitForward()
monitor = Monitor()
grad_search(algo,
            criterion,
            model,
            X,
            y,
            log_alpha0,
            monitor,
            n_outer=20,
            tol=1e-6)
##############################################################################

##############################################################################
# MSE on validation set
Ejemplo n.º 19
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='exponential'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name)
    n_samples, n_features = X_train.shape
    print('n_samples', n_samples)
    print('n_features', n_features)
    y_train[y_train == 0.0] = -1.0
    y_val[y_val == 0.0] = -1.0
    y_test[y_test == 0.0] = -1.0

    alpha_max = np.max(np.abs(X_train.T @ y_train))
    alpha_max /= X_train.shape[0]
    log_alpha_max = np.log(alpha_max)

    alpha_min = alpha_max * 1e-2
    # alphas = np.geomspace(alpha_max, alpha_min, 10)
    # log_alphas = np.log(alphas)

    log_alpha1_0 = np.log(0.1 * alpha_max)
    log_alpha2_0 = np.log(0.1 * alpha_max)

    log_alpha_max = np.log(alpha_max)
    n_outer = 25

    if dataset_name == "rcv1":
        size_loop = 2
    else:
        size_loop = 2
    model = ElasticNet(X_train,
                       y_train,
                       log_alpha1_0,
                       log_alpha2_0,
                       log_alpha_max,
                       max_iter=1000,
                       tol=tol)
    for i in range(size_loop):
        monitor = Monitor()

        if method == "implicit_forward":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = ImplicitForward(criterion, tol_jac=1e-3, n_iter_jac=100)
            _, _, _ = grad_search(algo=algo,
                                  verbose=False,
                                  log_alpha0=np.array(
                                      [log_alpha1_0, log_alpha2_0]),
                                  tol=tol,
                                  n_outer=n_outer,
                                  monitor=monitor,
                                  t_max=dict_t_max[dataset_name],
                                  tolerance_decrease=tolerance_decrease)

        elif method == "forward":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            _, _, _ = grad_search(algo=algo,
                                  log_alpha0=np.array(
                                      [log_alpha1_0, log_alpha2_0]),
                                  tol=tol,
                                  n_outer=n_outer,
                                  monitor=monitor,
                                  t_max=dict_t_max[dataset_name],
                                  tolerance_decrease=tolerance_decrease)

        elif method == "implicit":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Implicit(criterion)
            _, _, _ = grad_search(algo=algo,
                                  log_alpha0=np.array(
                                      [log_alpha1_0, log_alpha2_0]),
                                  tol=tol,
                                  n_outer=n_outer,
                                  monitor=monitor,
                                  t_max=dict_t_max[dataset_name],
                                  tolerance_decrease=tolerance_decrease)

        elif method == "grid_search":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo,
                log_alpha_min,
                log_alpha_max,
                monitor,
                max_evals=10,
                tol=tol,
                samp="grid",
                t_max=dict_t_max[dataset_name],
                log_alphas=None,
                nb_hyperparam=2)
            print(log_alpha_opt)

        elif method == "random":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo,
                log_alpha_min,
                np.log(alpha_max),
                monitor,
                max_evals=10,
                tol=tol,
                samp="random",
                t_max=dict_t_max[dataset_name],
                nb_hyperparam=2)
            print(log_alpha_opt)

        elif method == "lhs":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo,
                log_alpha_min,
                np.log(alpha_max),
                monitor,
                max_evals=10,
                tol=tol,
                samp="lhs",
                t_max=dict_t_max[dataset_name])
            print(log_alpha_opt)

    monitor.times = np.array(monitor.times).copy()
    monitor.objs = np.array(monitor.objs).copy()
    monitor.objs_test = np.array(monitor.objs_test).copy()
    monitor.log_alphas = np.array(monitor.log_alphas).copy()
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas,
            norm(y_val), norm(y_test), log_alpha_max)
Ejemplo n.º 20
0
def parallel_function(name_model, div_alpha):
    index_col = np.arange(10)
    alpha_max = (np.abs(X[np.ix_(idx_train, index_col)].T
                        @ y[idx_train])).max() / len(idx_train)
    if name_model == "lasso":
        log_alpha = np.log(alpha_max / div_alpha)
    elif name_model == "enet":
        alpha0 = alpha_max / div_alpha
        alpha1 = (1 - l1_ratio) * alpha0 / l1_ratio
        log_alpha = np.log(np.array([alpha0, alpha1]))

    criterion = HeldOutMSE(idx_train, idx_val)
    algo = Forward()
    monitor = Monitor()
    val, grad = criterion.get_val_grad(dict_models[name_model],
                                       X[:, index_col],
                                       y,
                                       log_alpha,
                                       algo.compute_beta_grad,
                                       tol=tol,
                                       monitor=monitor)

    criterion = HeldOutMSE(idx_train, idx_val)
    algo = Backward()
    monitor = Monitor()
    val, grad = criterion.get_val_grad(dict_models[name_model],
                                       X[:, index_col],
                                       y,
                                       log_alpha,
                                       algo.compute_beta_grad,
                                       tol=tol,
                                       monitor=monitor)

    val_cvxpy, grad_cvxpy = dict_cvxpy[name_model](X[:, index_col], y,
                                                   np.exp(log_alpha),
                                                   idx_train, idx_val)

    list_times_fwd = []
    list_times_bwd = []
    list_times_cvxpy = []
    for n_col in dict_ncols[div_alpha]:
        temp_fwd = []
        temp_bwd = []
        temp_cvxpy = []
        for i in range(repeat):

            rng = np.random.RandomState(i)
            index_col = rng.choice(n_features, n_col, replace=False)
            alpha_max = (np.abs(X[np.ix_(idx_train, index_col)].T
                                @ y[idx_train])).max() / len(idx_train)
            if name_model == "lasso":
                log_alpha = np.log(alpha_max / div_alpha)
            elif name_model == "enet":
                alpha0 = alpha_max / div_alpha
                alpha1 = (1 - l1_ratio) * alpha0 / l1_ratio
                log_alpha = np.log(np.array([alpha0, alpha1]))

            criterion = HeldOutMSE(idx_train, idx_val)
            algo = Forward()
            monitor = Monitor()
            val, grad = criterion.get_val_grad(dict_models[name_model],
                                               X[:, index_col],
                                               y,
                                               log_alpha,
                                               algo.compute_beta_grad,
                                               tol=tol,
                                               monitor=monitor)
            temp_fwd.append(monitor.times)

            criterion = HeldOutMSE(idx_train, idx_val)
            algo = Backward()
            monitor = Monitor()
            val, grad = criterion.get_val_grad(dict_models[name_model],
                                               X[:, index_col],
                                               y,
                                               log_alpha,
                                               algo.compute_beta_grad,
                                               tol=tol,
                                               monitor=monitor)
            temp_bwd.append(monitor.times)

            t0 = time.time()
            val_cvxpy, grad_cvxpy = dict_cvxpy[name_model](X[:, index_col], y,
                                                           np.exp(log_alpha),
                                                           idx_train, idx_val)
            temp_cvxpy.append(time.time() - t0)

            print(np.abs(grad - grad_cvxpy * np.exp(log_alpha)))
        list_times_fwd.append(np.mean(np.array(temp_fwd)))
        list_times_bwd.append(np.mean(np.array(temp_bwd)))
        list_times_cvxpy.append(np.mean(np.array(temp_cvxpy)))

    np.save("results/times_%s_forward_%s" % (name_model, div_alpha),
            list_times_fwd)
    np.save("results/times_%s_backward_%s" % (name_model, div_alpha),
            list_times_bwd)
    np.save("results/times_%s_cvxpy_%s" % (name_model, div_alpha),
            list_times_cvxpy)
    np.save("results/nfeatures_%s_%s" % (name_model, div_alpha),
            dict_ncols[div_alpha])
Ejemplo n.º 21
0
estimator = linear_model.ElasticNet(fit_intercept=False,
                                    max_iter=1000,
                                    warm_start=True,
                                    tol=tol)

dict_monitor = {}

all_algo_name = ['grid_search']
# , 'implicit_forward', "implicit_forward_approx", 'bayesian']
# , 'random_search']
# all_algo_name = ['random_search']

for algo_name in all_algo_name:
    model = ElasticNet(estimator=estimator)
    sub_criterion = HeldOutMSE(None, None)
    alpha0 = np.array([alpha_max / 10, alpha_max / 10])
    monitor = Monitor()
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    criterion = CrossVal(sub_criterion, cv=kf)
    algo = ImplicitForward(tol_jac=1e-3)
    # optimizer = LineSearch(n_outer=10, tol=tol)
    if algo_name.startswith('implicit_forward'):
        if algo_name == "implicit_forward_approx":
            optimizer = GradientDescent(n_outer=30,
                                        p_grad_norm=1.,
                                        verbose=True,
                                        tol=tol,
                                        tol_decrease="geom")
        else:
            optimizer = GradientDescent(n_outer=30,
Ejemplo n.º 22
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='constant'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name)
    n_samples, _ = X_train.shape
    # compute alpha_max
    alpha_max = np.abs(X_train.T @ y_train).max() / n_samples

    if model_name == "logreg":
        alpha_max /= 2
    alpha_min = alpha_max / 10_000
    log_alpha_max = np.log(alpha_max)
    log_alpha_min = np.log(alpha_min)
    log_alpha0 = np.log(0.1 * alpha_max)

    if model_name == "lasso":
        model = Lasso(X_train, y_train)
    elif model_name == "logreg":
        model = SparseLogreg(X_train, y_train)

    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 20

    size_loop = 2

    for _ in range(size_loop):
        if model_name == "lasso":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
        elif model_name == "logreg":
            criterion = HeldOutLogistic(X_val,
                                        y_val,
                                        model,
                                        X_test=X_test,
                                        y_test=y_test)
        algo = dict_algo[method](criterion)
        monitor = Monitor()
        if method == 'grid_search':
            log_alphas = np.log(np.geomspace(alpha_max, alpha_min, num=100))
            grid_search(algo,
                        None,
                        None,
                        monitor,
                        log_alphas=log_alphas,
                        tol=tol)
        elif method == 'random':
            grid_search(algo,
                        log_alpha_max,
                        log_alpha_min,
                        monitor,
                        tol=tol,
                        max_evals=n_alphas,
                        t_max=dict_t_max[dataset_name])
        elif method in ("bayesian"):
            hyperopt_wrapper(algo,
                             log_alpha_min,
                             log_alpha_max,
                             monitor,
                             max_evals=n_alphas,
                             tol=tol,
                             method='bayesian',
                             t_max=dict_t_max[dataset_name])
        else:
            # do line search to find the optimal lambda
            grad_search(algo,
                        log_alpha0,
                        monitor,
                        n_outer=n_outer,
                        tol=tol,
                        tolerance_decrease=tolerance_decrease,
                        t_max=dict_t_max[dataset_name])

        monitor.times = np.array(monitor.times)
        monitor.objs = np.array(monitor.objs)
        monitor.objs_test = np.array(monitor.objs_test)
        monitor.log_alphas = np.array(monitor.log_alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas,
            norm(y_val), norm(y_test), log_alpha_max, model_name)
Ejemplo n.º 23
0
def parallel_function(dataset_name, div_alpha, method):
    X, y = fetch_libsvm(dataset_name)
    n_samples = len(y)
    if dataset_name == "news20" and div_alpha == 100:
        rng = np.random.RandomState(42)
        y += rng.randn(n_samples) * 0.01
    for maxit in dict_maxits[(dataset_name, div_alpha)]:
        print("Dataset %s, maxit %i" % (method, maxit))
        for i in range(2):
            rng = np.random.RandomState(i)
            idx_train = rng.choice(n_samples, n_samples // 2, replace=False)
            idx = np.arange(0, n_samples)
            idx_val = idx[np.logical_not(np.isin(idx, idx_train))]
            alpha_max = np.max(np.abs(X[idx_train, :].T.dot(y[idx_train])))
            alpha_max /= len(idx_train)
            log_alpha = np.log(alpha_max / div_alpha)
            monitor = Monitor()
            if method == "celer":
                clf = Lasso_celer(alpha=np.exp(log_alpha),
                                  fit_intercept=False,
                                  tol=1e-12,
                                  max_iter=maxit)
                model = Lasso(estimator=clf, max_iter=maxit)
                criterion = HeldOutMSE(idx_train, idx_val)
                algo = ImplicitForward(tol_jac=1e-32,
                                       n_iter_jac=maxit,
                                       use_stop_crit=False)
                algo.max_iter = maxit
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   log_alpha,
                                                   algo.compute_beta_grad,
                                                   tol=1e-12,
                                                   monitor=monitor,
                                                   max_iter=maxit)
            elif method == "ground_truth":
                for file in os.listdir("results/"):
                    if file.startswith("hypergradient_%s_%i_%s" %
                                       (dataset_name, div_alpha, method)):
                        return
                clf = Lasso_celer(alpha=np.exp(log_alpha),
                                  fit_intercept=False,
                                  warm_start=True,
                                  tol=1e-14,
                                  max_iter=10000)
                criterion = HeldOutMSE(idx_train, idx_val)
                if dataset_name == "news20":
                    algo = ImplicitForward(tol_jac=1e-11, n_iter_jac=100000)
                else:
                    algo = Implicit(criterion)
                model = Lasso(estimator=clf, max_iter=10000)
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   log_alpha,
                                                   algo.compute_beta_grad,
                                                   tol=1e-14,
                                                   monitor=monitor)
            else:
                model = Lasso(max_iter=maxit)
                criterion = HeldOutMSE(idx_train, idx_val)
                if method == "forward":
                    algo = Forward(use_stop_crit=False)
                elif method == "implicit_forward":
                    algo = ImplicitForward(tol_jac=1e-8,
                                           n_iter_jac=maxit,
                                           use_stop_crit=False)
                elif method == "implicit":
                    algo = Implicit(max_iter=1000)
                elif method == "backward":
                    algo = Backward()
                else:
                    raise NotImplementedError
                algo.max_iter = maxit
                algo.use_stop_crit = False
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   log_alpha,
                                                   algo.compute_beta_grad,
                                                   tol=tol,
                                                   monitor=monitor,
                                                   max_iter=maxit)

        results = (dataset_name, div_alpha, method, maxit, val, grad,
                   monitor.times[0])
        df = pandas.DataFrame(results).transpose()
        df.columns = [
            'dataset', 'div_alpha', 'method', 'maxit', 'val', 'grad', 'time'
        ]
        str_results = "results/hypergradient_%s_%i_%s_%i.pkl" % (
            dataset_name, div_alpha, method, maxit)
        df.to_pickle(str_results)
Ejemplo n.º 24
0
def test_grid_search():
    max_evals = 5

    monitor_grid = Monitor()
    model = Lasso(estimator=estimator)
    criterion = HeldOutMSE(idx_train, idx_train)
    alpha_opt_grid, _ = grid_search(criterion,
                                    model,
                                    X,
                                    y,
                                    alpha_min,
                                    alpha_max,
                                    monitor_grid,
                                    max_evals=max_evals,
                                    tol=1e-5,
                                    samp="grid")

    monitor_random = Monitor()
    criterion = HeldOutMSE(idx_train, idx_val)
    alpha_opt_random, _ = grid_search(criterion,
                                      model,
                                      X,
                                      y,
                                      alpha_min,
                                      alpha_max,
                                      monitor_random,
                                      max_evals=max_evals,
                                      tol=1e-5,
                                      samp="random")

    np.testing.assert_allclose(
        monitor_random.alphas[np.argmin(monitor_random.objs)],
        alpha_opt_random)
    np.testing.assert_allclose(
        monitor_grid.alphas[np.argmin(monitor_grid.objs)], alpha_opt_grid)

    monitor_grid = Monitor()
    model = Lasso(estimator=estimator)

    criterion = FiniteDiffMonteCarloSure(sigma=sigma_star)
    alpha_opt_grid, _ = grid_search(criterion,
                                    model,
                                    X,
                                    y,
                                    alpha_min,
                                    alpha_max,
                                    monitor_grid,
                                    max_evals=max_evals,
                                    tol=1e-5,
                                    samp="grid")

    monitor_random = Monitor()
    criterion = FiniteDiffMonteCarloSure(sigma=sigma_star)
    alpha_opt_random, _ = grid_search(criterion,
                                      model,
                                      X,
                                      y,
                                      alpha_min,
                                      alpha_max,
                                      monitor_random,
                                      max_evals=max_evals,
                                      tol=1e-5,
                                      samp="random")

    np.testing.assert_allclose(
        monitor_random.alphas[np.argmin(monitor_random.objs)],
        alpha_opt_random)
    np.testing.assert_allclose(
        monitor_grid.alphas[np.argmin(monitor_grid.objs)], alpha_opt_grid)
Ejemplo n.º 25
0
def parallel_function(dataset_name,
                      div_alpha,
                      method,
                      ind_rep,
                      random_state=10):
    maxit = dict_maxits[(dataset_name, div_alpha)][ind_rep]
    print("Dataset %s, algo %s, maxit %i" % (dataset_name, method, maxit))
    X, y = fetch_libsvm(dataset_name)
    n_samples = len(y)

    kf = KFold(n_splits=5, random_state=random_state, shuffle=True)

    for i in range(2):
        alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples
        log_alpha = np.log(alpha_max / div_alpha)
        monitor = Monitor()
        if method == "celer":
            clf = Lasso_celer(
                alpha=np.exp(log_alpha),
                fit_intercept=False,
                # TODO maybe change this tol
                tol=1e-8,
                max_iter=maxit)
            model = Lasso(estimator=clf, max_iter=maxit)
            criterion = HeldOutMSE(None, None)
            cross_val = CrossVal(cv=kf, criterion=criterion)
            algo = ImplicitForward(tol_jac=1e-8,
                                   n_iter_jac=maxit,
                                   use_stop_crit=False)
            algo.max_iter = maxit
            val, grad = cross_val.get_val_grad(model,
                                               X,
                                               y,
                                               log_alpha,
                                               algo.get_beta_jac_v,
                                               tol=tol,
                                               monitor=monitor,
                                               max_iter=maxit)
        elif method == "ground_truth":
            for file in os.listdir("results/"):
                if file.startswith("hypergradient_%s_%i_%s" %
                                   (dataset_name, div_alpha, method)):
                    return
                else:
                    clf = Lasso_celer(alpha=np.exp(log_alpha),
                                      fit_intercept=False,
                                      warm_start=True,
                                      tol=1e-13,
                                      max_iter=10000)
                    criterion = HeldOutMSE(None, None)
                    cross_val = CrossVal(cv=kf, criterion=criterion)
                    algo = Implicit(criterion)
                    model = Lasso(estimator=clf, max_iter=10000)
                    val, grad = cross_val.get_val_grad(model,
                                                       X,
                                                       y,
                                                       log_alpha,
                                                       algo.get_beta_jac_v,
                                                       tol=1e-13,
                                                       monitor=monitor)
        else:
            model = Lasso(max_iter=maxit)
            criterion = HeldOutMSE(None, None)
            cross_val = CrossVal(cv=kf, criterion=criterion)
            if method == "forward":
                algo = Forward(use_stop_crit=False)
            elif method == "implicit_forward":
                algo = ImplicitForward(use_stop_crit=False,
                                       tol_jac=1e-8,
                                       n_iter_jac=maxit,
                                       max_iter=1000)
            elif method == "implicit":
                algo = Implicit(use_stop_crit=False, max_iter=1000)
            elif method == "backward":
                algo = Backward()
            else:
                1 / 0
            algo.max_iter = maxit
            algo.use_stop_crit = False
            val, grad = cross_val.get_val_grad(model,
                                               X,
                                               y,
                                               log_alpha,
                                               algo.get_beta_jac_v,
                                               tol=tol,
                                               monitor=monitor,
                                               max_iter=maxit)

    results = (dataset_name, div_alpha, method, maxit, val, grad,
               monitor.times[0])
    df = pandas.DataFrame(results).transpose()
    df.columns = [
        'dataset', 'div_alpha', 'method', 'maxit', 'val', 'grad', 'time'
    ]
    str_results = "results/hypergradient_%s_%i_%s_%i.pkl" % (
        dataset_name, div_alpha, method, maxit)
    df.to_pickle(str_results)
Ejemplo n.º 26
0
dict_res = {}

for maxit in maxits:
    for method in methods:
        print("Dataset %s, maxit %i" % (method, maxit))
        for i in range(2):
            alpha_max = np.max(np.abs(X.T.dot(y))) / n_samples
            log_alpha = np.log(alpha_max * p_alpha_max)
            monitor = Monitor()
            if method == "celer":
                clf = Lasso_celer(alpha=np.exp(log_alpha),
                                  fit_intercept=False,
                                  tol=1e-12,
                                  max_iter=maxit)
                model = Lasso(estimator=clf, max_iter=maxit)
                criterion = HeldOutMSE(idx_train, idx_val)
                algo = ImplicitForward(tol_jac=1e-32,
                                       n_iter_jac=maxit,
                                       use_stop_crit=False)
                algo.max_iter = maxit
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=1e-12,
                                                   monitor=monitor,
                                                   max_iter=maxit)
            else:
                model = Lasso(max_iter=maxit)
                criterion = HeldOutMSE(idx_train, idx_val)
Ejemplo n.º 27
0
def test_grid_search():
    max_evals = 5

    monitor_grid = Monitor()
    model = Lasso(X_train, y_train, estimator=estimator)
    criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
    algo = Forward()
    log_alpha_opt_grid, _ = grid_search(algo,
                                        criterion,
                                        log_alpha_min,
                                        log_alpha_max,
                                        monitor_grid,
                                        max_evals=max_evals,
                                        tol=1e-5,
                                        samp="grid")

    monitor_random = Monitor()
    criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
    algo = Forward()
    log_alpha_opt_random, _ = grid_search(algo,
                                          criterion,
                                          log_alpha_min,
                                          log_alpha_max,
                                          monitor_random,
                                          max_evals=max_evals,
                                          tol=1e-5,
                                          samp="random")

    assert (monitor_random.log_alphas[np.argmin(
        monitor_random.objs)] == log_alpha_opt_random)
    assert (monitor_grid.log_alphas[np.argmin(
        monitor_grid.objs)] == log_alpha_opt_grid)

    monitor_grid = Monitor()
    model = Lasso(X_train, y_train, estimator=estimator)
    criterion = SURE(X_train,
                     y_train,
                     model,
                     sigma=sigma_star,
                     X_test=X_test,
                     y_test=y_test)
    algo = Forward()
    log_alpha_opt_grid, _ = grid_search(algo,
                                        criterion,
                                        log_alpha_min,
                                        log_alpha_max,
                                        monitor_grid,
                                        max_evals=max_evals,
                                        tol=1e-5,
                                        samp="grid")

    monitor_random = Monitor()
    criterion = SURE(X_train,
                     y_train,
                     model,
                     sigma=sigma_star,
                     X_test=X_test,
                     y_test=y_test)
    algo = Forward()
    log_alpha_opt_random, _ = grid_search(algo,
                                          criterion,
                                          log_alpha_min,
                                          log_alpha_max,
                                          monitor_random,
                                          max_evals=max_evals,
                                          tol=1e-5,
                                          samp="random")

    assert (monitor_random.log_alphas[np.argmin(
        monitor_random.objs)] == log_alpha_opt_random)
    assert (monitor_grid.log_alphas[np.argmin(
        monitor_grid.objs)] == log_alpha_opt_grid)