Beispiel #1
0
def test_val_grad_custom(model, model_custom):
    criterion = HeldOutLogistic(X_val, y_val, model)
    algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
    val, grad = criterion.get_val_grad(
        log_alpha, algo.get_beta_jac_v, tol=tol)

    criterion = HeldOutLogistic(X_val, y_val, model_custom)
    algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
    val_custom, grad_custom = criterion.get_val_grad(
        log_alpha, algo.get_beta_jac_v, tol=tol)

    assert np.allclose(val, val_custom)
    assert np.allclose(grad, grad_custom)
Beispiel #2
0
def test_grad_search(model, crit):
    """check that the paths are the same in the line search"""
    n_outer = 2

    criterion = HeldOutLogistic(X_val, y_val, model)
    monitor1 = Monitor()
    algo = Forward()
    grad_search(algo, criterion, log_alpha, monitor1, n_outer=n_outer,
                tol=tol)

    criterion = HeldOutLogistic(X_val, y_val, model)
    monitor2 = Monitor()
    algo = Implicit()
    grad_search(algo, criterion, log_alpha, monitor2, n_outer=n_outer,
                tol=tol)

    criterion = HeldOutLogistic(X_val, y_val, model)
    monitor3 = Monitor()
    algo = ImplicitForward(tol_jac=tol, n_iter_jac=5000)
    grad_search(algo, criterion, log_alpha, monitor3, n_outer=n_outer,
                tol=tol)

    assert np.allclose(
        np.array(monitor1.log_alphas), np.array(monitor3.log_alphas))
    assert np.allclose(
        np.array(monitor1.grads), np.array(monitor3.grads), atol=1e-4)
    assert np.allclose(
        np.array(monitor1.objs), np.array(monitor3.objs))
    assert not np.allclose(
        np.array(monitor1.times), np.array(monitor3.times))
Beispiel #3
0
def test_grad_search(model):
    # criterion = SURE(
    #     X_train, y_train, model, sigma=sigma_star, X_test=X_test,
    #     y_test=y_test)
    n_outer = 3
    criterion = HeldOutSmoothedHinge(X_val,
                                     y_val,
                                     model,
                                     X_test=None,
                                     y_test=None)
    monitor1 = Monitor()
    algo = Forward()
    grad_search(algo,
                criterion,
                np.log(1e-3),
                monitor1,
                n_outer=n_outer,
                tol=1e-13)

    # criterion = SURE(
    #     X_train, y_train, model, sigma=sigma_star, X_test=X_test,
    #     y_test=y_test)
    criterion = HeldOutSmoothedHinge(X_val,
                                     y_val,
                                     model,
                                     X_test=None,
                                     y_test=None)
    monitor2 = Monitor()
    algo = Implicit()
    grad_search(algo,
                criterion,
                np.log(1e-3),
                monitor2,
                n_outer=n_outer,
                tol=1e-13)

    # criterion = SURE(
    #     X_train, y_train, model, sigma=sigma_star, X_test=X_test,
    #     y_test=y_test)
    criterion = HeldOutSmoothedHinge(X_val,
                                     y_val,
                                     model,
                                     X_test=None,
                                     y_test=None)
    monitor3 = Monitor()
    algo = ImplicitForward(tol_jac=1e-6, n_iter_jac=100)
    grad_search(algo,
                criterion,
                np.log(1e-3),
                monitor3,
                n_outer=n_outer,
                tol=1e-13)

    assert np.allclose(np.array(monitor1.log_alphas),
                       np.array(monitor3.log_alphas))
    assert np.allclose(np.array(monitor1.grads), np.array(monitor3.grads))
    assert np.allclose(np.array(monitor1.objs), np.array(monitor3.objs))
    # assert np.allclose(
    #     np.array(monitor1.objs_test), np.array(monitor3.objs_test))
    assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
Beispiel #4
0
def test_val_grad(model):
    #######################################################################
    # Not all methods computes the full Jacobian, but all
    # compute the gradients
    # check that the gradient returned by all methods are the same

    criterion = HeldOutLogistic(X_val, y_val, model)
    algo = Forward()
    val_fwd, grad_fwd = criterion.get_val_grad(log_C,
                                               algo.get_beta_jac_v,
                                               tol=tol)

    criterion = HeldOutLogistic(X_val, y_val, model)
    algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=100)
    val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(log_C,
                                                       algo.get_beta_jac_v,
                                                       tol=tol)

    criterion = HeldOutLogistic(X_val, y_val, model)
    algo = Implicit()
    val_imp, grad_imp = criterion.get_val_grad(log_C,
                                               algo.get_beta_jac_v,
                                               tol=tol)

    assert np.allclose(val_fwd, val_imp_fwd)
    assert np.allclose(grad_fwd, grad_imp_fwd)
    assert np.allclose(val_imp_fwd, val_imp)
    assert np.allclose(grad_imp_fwd, grad_imp, atol=1e-5)
Beispiel #5
0
def parallel_function(
        dataset_name, method, tol=1e-8, n_outer=15):

    # load data
    X, y = fetch_libsvm(dataset_name)
    # subsample the samples and the features
    n_samples, n_features = dict_subsampling[dataset_name]
    t_max = dict_t_max[dataset_name]
    # t_max = 3600

    X, y = clean_dataset(X, y, n_samples, n_features)
    alpha_max, n_classes = get_alpha_max(X, y)
    log_alpha_max = np.log(alpha_max)  # maybe to change alpha max value

    algo = ImplicitForward(None, n_iter_jac=2000)
    estimator = LogisticRegression(
        C=1, fit_intercept=False, warm_start=True, max_iter=30, verbose=False)

    model = SparseLogreg(estimator=estimator)
    idx_train, idx_val, idx_test = get_splits(X, y)

    logit_multiclass = LogisticMulticlass(
        idx_train, idx_val, algo, idx_test=idx_test)

    monitor = Monitor()
    if method == "implicit_forward":
        log_alpha0 = np.ones(n_classes) * np.log(0.1 * alpha_max)
        optimizer = LineSearch(n_outer=100)
        grad_search(
            algo, logit_multiclass, model, optimizer, X, y, log_alpha0,
            monitor)
    elif method.startswith(('random', 'bayesian')):
        max_evals = dict_max_eval[dataset_name]
        log_alpha_min = np.log(alpha_max) - 7
        hyperopt_wrapper(
            algo, logit_multiclass, model, X, y, log_alpha_min, log_alpha_max,
            monitor, max_evals=max_evals, tol=tol, t_max=t_max, method=method,
            size_space=n_classes)
    elif method == 'grid_search':
        n_alphas = 20
        p_alphas = np.geomspace(1, 0.001, n_alphas)
        p_alphas = np.tile(p_alphas, (n_classes, 1))
        for i in range(n_alphas):
            log_alpha_i = np.log(alpha_max * p_alphas[:, i])
            logit_multiclass.get_val(
                model, X, y, log_alpha_i, None, monitor, tol)

    monitor.times = np.array(monitor.times).copy()
    monitor.objs = np.array(monitor.objs).copy()
    monitor.acc_vals = np.array(monitor.acc_vals).copy()
    monitor.acc_tests = np.array(monitor.acc_tests).copy()
    monitor.log_alphas = np.array(monitor.log_alphas).copy()
    return (
        dataset_name, method, tol, n_outer, monitor.times, monitor.objs,
        monitor.acc_vals, monitor.acc_tests, monitor.log_alphas, log_alpha_max,
        n_samples, n_features, n_classes)
Beispiel #6
0
def test_grad_search(model, crit):
    """check that the paths are the same in the line search"""
    if crit == 'cv':
        n_outer = 2
        criterion = HeldOutMSE(X_val,
                               y_val,
                               model,
                               X_test=X_test,
                               y_test=y_test)
    else:
        n_outer = 2
        criterion = SURE(X_train,
                         y_train,
                         model,
                         sigma=sigma_star,
                         X_test=X_test,
                         y_test=y_test)

    criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
    monitor1 = Monitor()
    algo = Forward()
    grad_search(algo,
                criterion,
                log_alpha,
                monitor1,
                n_outer=n_outer,
                tol=1e-16)

    criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
    monitor2 = Monitor()
    algo = Implicit()
    grad_search(algo,
                criterion,
                log_alpha,
                monitor2,
                n_outer=n_outer,
                tol=1e-16)

    criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
    monitor3 = Monitor()
    algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
    grad_search(algo,
                criterion,
                log_alpha,
                monitor3,
                n_outer=n_outer,
                tol=1e-16)

    assert np.allclose(np.array(monitor1.log_alphas),
                       np.array(monitor3.log_alphas))
    assert np.allclose(np.array(monitor1.grads), np.array(monitor3.grads))
    assert np.allclose(np.array(monitor1.objs), np.array(monitor3.objs))
    assert np.allclose(np.array(monitor1.objs_test),
                       np.array(monitor3.objs_test))
    assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))
Beispiel #7
0
def test_grad_search_custom(model, model_custom, crit):
    """check that the paths are the same in the line search"""
    n_outer = 5

    criterion = HeldOutLogistic(X_val, y_val, model)
    monitor = Monitor()
    algo = ImplicitForward(tol_jac=tol, n_iter_jac=5000)
    grad_search(algo, criterion, log_alpha, monitor, n_outer=n_outer, tol=tol)

    criterion = HeldOutLogistic(X_val, y_val, model_custom)
    monitor_custom = Monitor()
    algo = ImplicitForward(tol_jac=tol, n_iter_jac=5000)
    grad_search(algo, criterion, log_alpha, monitor_custom, n_outer=n_outer, tol=tol)

    assert np.allclose(
        np.array(monitor.log_alphas), np.array(monitor_custom.log_alphas))
    assert np.allclose(
        np.array(monitor.grads), np.array(monitor_custom.grads), atol=1e-4)
    assert np.allclose(
        np.array(monitor.objs), np.array(monitor_custom.objs))
    assert not np.allclose(
        np.array(monitor.times), np.array(monitor_custom.times))
Beispiel #8
0
def test_grad_search():

    n_outer = 3
    criterion = HeldOutMSE(X_val, y_val, model, X_test=None, y_test=None)
    monitor1 = Monitor()
    algo = Forward()
    grad_search(algo,
                criterion,
                np.array([log_alpha1, log_alpha2]),
                monitor1,
                n_outer=n_outer,
                tol=1e-16)

    criterion = HeldOutMSE(X_val, y_val, model, X_test=None, y_test=None)
    monitor2 = Monitor()
    algo = Implicit()
    grad_search(algo,
                criterion,
                np.array([log_alpha1, log_alpha2]),
                monitor2,
                n_outer=n_outer,
                tol=1e-16)

    criterion = HeldOutMSE(X_val, y_val, model, X_test=None, y_test=None)
    monitor3 = Monitor()
    algo = ImplicitForward(tol_jac=1e-3, n_iter_jac=1000)
    grad_search(algo,
                criterion,
                np.array([log_alpha1, log_alpha2]),
                monitor3,
                n_outer=n_outer,
                tol=1e-16)
    [np.linalg.norm(grad) for grad in monitor1.grads]
    [np.exp(alpha) for alpha in monitor1.log_alphas]

    assert np.allclose(np.array(monitor1.log_alphas),
                       np.array(monitor3.log_alphas))
    assert np.allclose(np.array(monitor1.grads), np.array(monitor3.grads))
    assert np.allclose(np.array(monitor1.objs), np.array(monitor3.objs))
    assert not np.allclose(np.array(monitor1.times), np.array(monitor3.times))

    assert np.allclose(np.array(monitor1.log_alphas),
                       np.array(monitor2.log_alphas),
                       atol=1e-2)
    assert np.allclose(np.array(monitor1.grads),
                       np.array(monitor2.grads),
                       atol=1e-2)
    assert np.allclose(np.array(monitor1.objs),
                       np.array(monitor2.objs),
                       atol=1e-2)
    assert not np.allclose(np.array(monitor1.times), np.array(monitor2.times))
Beispiel #9
0
def test_val_grad():
    #######################################################################
    # Not all methods computes the full Jacobian, but all
    # compute the gradients
    # check that the gradient returned by all methods are the same
    criterion = HeldOutMSE(X_val, y_val, model)
    algo = Forward()
    val_fwd, grad_fwd = criterion.get_val_grad(np.array(
        [log_alpha1, log_alpha2]),
                                               algo.get_beta_jac_v,
                                               tol=tol)

    criterion = HeldOutMSE(X_val, y_val, model)
    algo = ImplicitForward(tol_jac=1e-16, n_iter_jac=5000)
    val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(np.array(
        [log_alpha1, log_alpha2]),
                                                       algo.get_beta_jac_v,
                                                       tol=tol)

    criterion = HeldOutMSE(X_val, y_val, model)
    algo = ImplicitForward(tol_jac=1e-16, n_iter_jac=5000)
    val_imp_fwd_custom, grad_imp_fwd_custom = criterion.get_val_grad(
        np.array([log_alpha1, log_alpha2]), algo.get_beta_jac_v, tol=tol)

    criterion = HeldOutMSE(X_val, y_val, model)
    algo = Implicit()
    val_imp, grad_imp = criterion.get_val_grad(np.array(
        [log_alpha1, log_alpha2]),
                                               algo.get_beta_jac_v,
                                               tol=tol)
    assert np.allclose(val_fwd, val_imp_fwd)
    assert np.allclose(grad_fwd, grad_imp_fwd)
    assert np.allclose(val_imp_fwd, val_imp)
    assert np.allclose(val_imp_fwd, val_imp_fwd_custom)
    # for the implcit the conjugate grad does not converge
    # hence the rtol=1e-2
    assert np.allclose(grad_imp_fwd, grad_imp, atol=1e-3)
    assert np.allclose(grad_imp_fwd, grad_imp_fwd_custom)
Beispiel #10
0
def test_val_grad(model):
    criterion = HeldOutLogistic(X_val, y_val, model)
    algo = Forward()
    val_fwd, grad_fwd = criterion.get_val_grad(
        log_alpha, algo.get_beta_jac_v, tol=tol)

    criterion = HeldOutLogistic(X_val, y_val, model)
    algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
    val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(
        log_alpha, algo.get_beta_jac_v, tol=tol)

    criterion = HeldOutLogistic(X_val, y_val, model)
    algo = Implicit()
    val_imp, grad_imp = criterion.get_val_grad(
        log_alpha, algo.get_beta_jac_v, tol=tol)

    assert np.allclose(val_fwd, val_imp_fwd, atol=1e-4)
    assert np.allclose(grad_fwd, grad_imp_fwd, atol=1e-4)
    assert np.allclose(val_imp_fwd, val_imp, atol=1e-4)

    # for the implcit the conjugate grad does not converge
    # hence the rtol=1e-2
    assert np.allclose(grad_imp_fwd, grad_imp, rtol=1e-2)
Beispiel #11
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='exponential'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name)
    n_samples, n_features = X_train.shape
    print('n_samples', n_samples)
    print('n_features', n_features)
    y_train[y_train == 0.0] = -1.0
    y_val[y_val == 0.0] = -1.0
    y_test[y_test == 0.0] = -1.0

    alpha_max = np.max(np.abs(X_train.T @ y_train))
    alpha_max /= X_train.shape[0]
    log_alpha_max = np.log(alpha_max)

    alpha_min = alpha_max * 1e-2
    # alphas = np.geomspace(alpha_max, alpha_min, 10)
    # log_alphas = np.log(alphas)

    log_alpha1_0 = np.log(0.1 * alpha_max)
    log_alpha2_0 = np.log(0.1 * alpha_max)

    log_alpha_max = np.log(alpha_max)
    n_outer = 25

    if dataset_name == "rcv1":
        size_loop = 2
    else:
        size_loop = 2
    model = ElasticNet(X_train,
                       y_train,
                       log_alpha1_0,
                       log_alpha2_0,
                       log_alpha_max,
                       max_iter=1000,
                       tol=tol)
    for i in range(size_loop):
        monitor = Monitor()

        if method == "implicit_forward":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = ImplicitForward(criterion, tol_jac=1e-3, n_iter_jac=100)
            _, _, _ = grad_search(algo=algo,
                                  verbose=False,
                                  log_alpha0=np.array(
                                      [log_alpha1_0, log_alpha2_0]),
                                  tol=tol,
                                  n_outer=n_outer,
                                  monitor=monitor,
                                  t_max=dict_t_max[dataset_name],
                                  tolerance_decrease=tolerance_decrease)

        elif method == "forward":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            _, _, _ = grad_search(algo=algo,
                                  log_alpha0=np.array(
                                      [log_alpha1_0, log_alpha2_0]),
                                  tol=tol,
                                  n_outer=n_outer,
                                  monitor=monitor,
                                  t_max=dict_t_max[dataset_name],
                                  tolerance_decrease=tolerance_decrease)

        elif method == "implicit":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Implicit(criterion)
            _, _, _ = grad_search(algo=algo,
                                  log_alpha0=np.array(
                                      [log_alpha1_0, log_alpha2_0]),
                                  tol=tol,
                                  n_outer=n_outer,
                                  monitor=monitor,
                                  t_max=dict_t_max[dataset_name],
                                  tolerance_decrease=tolerance_decrease)

        elif method == "grid_search":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo,
                log_alpha_min,
                log_alpha_max,
                monitor,
                max_evals=10,
                tol=tol,
                samp="grid",
                t_max=dict_t_max[dataset_name],
                log_alphas=None,
                nb_hyperparam=2)
            print(log_alpha_opt)

        elif method == "random":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo,
                log_alpha_min,
                np.log(alpha_max),
                monitor,
                max_evals=10,
                tol=tol,
                samp="random",
                t_max=dict_t_max[dataset_name],
                nb_hyperparam=2)
            print(log_alpha_opt)

        elif method == "lhs":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo,
                log_alpha_min,
                np.log(alpha_max),
                monitor,
                max_evals=10,
                tol=tol,
                samp="lhs",
                t_max=dict_t_max[dataset_name])
            print(log_alpha_opt)

    monitor.times = np.array(monitor.times).copy()
    monitor.objs = np.array(monitor.objs).copy()
    monitor.objs_test = np.array(monitor.objs_test).copy()
    monitor.log_alphas = np.array(monitor.log_alphas).copy()
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas,
            norm(y_val), norm(y_test), log_alpha_max)
print("Time to compute CV for scikit-learn: %.2f" % t_sk)

##############################################################################
# Grad-search
# -----------

print('sparse-ho started')

t0 = time.time()
estimator = LogisticRegression(penalty='l1',
                               fit_intercept=False,
                               solver='saga')
model = SparseLogreg(X_train, y_train, max_iter=max_iter, estimator=estimator)
criterion = HeldOutLogistic(X_val, y_val, model)
monitor_grad = Monitor()
algo = ImplicitForward(tol_jac=tol, n_iter_jac=100)
grad_search(algo,
            criterion,
            np.log(0.1 * alpha_max),
            monitor_grad,
            n_outer=10,
            tol=tol)
objs_grad = np.array(monitor_grad.objs)

t_grad_search = time.time() - t0

print('sparse-ho finished')
print("Time to compute CV for sparse-ho: %.2f" % t_grad_search)

p_alphas_grad = np.exp(np.array(monitor_grad.log_alphas)) / alpha_max
Beispiel #13
0
def test_val_grad():
    #######################################################################
    # Not all methods computes the full Jacobian, but all
    # compute the gradients
    # check that the gradient returned by all methods are the same
    for key in models.keys():
        # model = Lasso(log_alpha)
        log_alpha = dict_log_alpha[key]
        model = models[key]
        # model = Lasso(log_alpha)

        criterion = HeldOutMSE(X_val, y_val, model)
        algo = Forward()
        val_fwd, grad_fwd = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        criterion = HeldOutMSE(X_val, y_val, model)
        algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
        val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(log_alpha,
                                                           algo.get_beta_jac_v,
                                                           tol=tol)

        criterion = HeldOutMSE(X_val, y_val, model)
        algo = Implicit()
        val_imp, grad_imp = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        # import ipdb; ipdb.set_trace()

        criterion = HeldOutMSE(X_val, y_val, model)
        algo = Backward()
        val_bwd, grad_bwd = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        assert np.allclose(val_fwd, val_imp_fwd)
        assert np.allclose(grad_fwd, grad_imp_fwd)
        # assert np.allclose(val_imp_fwd, val_imp)
        assert np.allclose(val_bwd, val_fwd)
        assert np.allclose(val_bwd, val_imp_fwd)
        assert np.allclose(grad_fwd, grad_bwd)
        assert np.allclose(grad_bwd, grad_imp_fwd)

        # for the implcit the conjugate grad does not converge
        # hence the rtol=1e-2
        assert np.allclose(grad_imp_fwd, grad_imp, atol=1e-3)

    for key in models.keys():
        # model = Lasso(log_alpha)
        log_alpha = dict_log_alpha[key]
        model = models[key]
        # model = Lasso(log_alpha)
        criterion = SURE(X_train, y_train, model, sigma_star)
        algo = Forward()
        val_fwd, grad_fwd = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        criterion = SURE(X_train, y_train, model, sigma_star)
        algo = ImplicitForward(tol_jac=1e-8, n_iter_jac=5000)
        val_imp_fwd, grad_imp_fwd = criterion.get_val_grad(log_alpha,
                                                           algo.get_beta_jac_v,
                                                           tol=tol)

        criterion = SURE(X_train, y_train, model, sigma_star)
        algo = Implicit(criterion)
        val_imp, grad_imp = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        criterion = SURE(X_train, y_train, model, sigma_star)
        algo = Backward()
        val_bwd, grad_bwd = criterion.get_val_grad(log_alpha,
                                                   algo.get_beta_jac_v,
                                                   tol=tol)

        assert np.allclose(val_fwd, val_imp_fwd)
        assert np.allclose(grad_fwd, grad_imp_fwd)
        assert np.allclose(val_imp_fwd, val_imp)
        assert np.allclose(val_bwd, val_fwd)
        assert np.allclose(val_bwd, val_imp_fwd)
        assert np.allclose(grad_fwd, grad_bwd)
        assert np.allclose(grad_bwd, grad_imp_fwd)
Beispiel #14
0
                   n_jobs=2).fit(X_train_val, y_train_val)

# Measure mse on test
mse_cv = mean_squared_error(y_test, model_cv.predict(X_test))
print("Vanilla LassoCV: Mean-squared error on test data %f" % mse_cv)
##############################################################################

##############################################################################
# Weighted Lasso with sparse-ho.
# We use the vanilla lassoCV coefficients as a starting point
alpha0 = np.log(model_cv.alpha_) * np.ones(X_train.shape[1])
# Weighted Lasso: Sparse-ho: 1 param per feature
estimator = Lasso(fit_intercept=False, max_iter=10, warm_start=True)
model = WeightedLasso(X_train, y_train, estimator=estimator)
criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
algo = ImplicitForward()
monitor = Monitor()
grad_search(algo, criterion, alpha0, monitor, n_outer=20, tol=1e-6)
##############################################################################

##############################################################################
# MSE on validation set
mse_sho_val = mean_squared_error(y_val, estimator.predict(X_val))

# MSE on test set, ie unseen data
mse_sho_test = mean_squared_error(y_test, estimator.predict(X_test))

print("Sparse-ho: Mean-squared error on validation data %f" % mse_sho_val)
print("Sparse-ho: Mean-squared error on test (unseen) data %f" % mse_sho_test)

labels = ['WeightedLasso val', 'WeightedLasso test', 'Lasso CV']
Beispiel #15
0
# grad search
print("Started grad-search")
t_grad_search = -time.time()
monitor = Monitor()
n_outer = 10
model = ElasticNet(X_train,
                   y_train,
                   log_alphas_1[-1],
                   log_alphas_2[-1],
                   log_alpha_max,
                   max_iter=max_iter,
                   tol=tol)
criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
algo = ImplicitForward(criterion,
                       tol_jac=1e-2,
                       n_iter_jac=1000,
                       max_iter=max_iter)

_, _, _ = grad_search(algo=algo,
                      verbose=True,
                      log_alpha0=np.array(
                          [np.log(alpha_max / 10),
                           np.log(alpha_max / 10)]),
                      tol=tol,
                      n_outer=n_outer,
                      monitor=monitor,
                      tolerance_decrease='constant')
alphas_grad = np.exp(np.array(monitor.log_alphas))
alphas_grad /= alpha_max

t_grad_search += time.time()
Beispiel #16
0
log_alphas = np.log(alphas)

tol = 1e-7

# grid search
# model = Lasso(X_train, y_train, np.log(alpha_max/10))
# criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
# algo = Forward(criterion)
# monitor_grid_sk = Monitor()
# grid_search(
#     algo, None, None, monitor_grid_sk, log_alphas=log_alphas,
#     tol=tol)

# np.save("p_alphas.npy", p_alphas)
# objs = np.array(monitor_grid_sk.objs)
# np.save("objs.npy", objs)

# grad_search
estimator = linear_model.Lasso(fit_intercept=False, warm_start=True)
model = Lasso(X_train, y_train, np.log(alpha_max / 10), estimator=estimator)
criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
algo = ImplicitForward(criterion)
monitor_grad = Monitor()
grad_search(algo, np.log(alpha_max / 10), monitor_grad, n_outer=10, tol=tol)

p_alphas_grad = np.exp(np.array(monitor_grad.log_alphas)) / alpha_max

np.save("p_alphas_grad.npy", p_alphas_grad)
objs_grad = np.array(monitor_grad.objs)
np.save("objs_grad.npy", objs_grad)
Beispiel #17
0
def parallel_function(
        dataset_name, method, tol=1e-5, n_outer=50,
        tolerance_decrease='exponential'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name, csr=True)
    n_samples, n_features = X_train.shape
    print('n_samples', n_samples)
    print('n_features', n_features)
    y_train[y_train == 0.0] = -1.0
    y_val[y_val == 0.0] = -1.0
    y_test[y_test == 0.0] = -1.0

    C_max = 100
    logC = np.log(1e-2)
    n_outer = 5

    if dataset_name == "rcv1":
        size_loop = 1
    else:
        size_loop = 1
    model = SVM(
        X_train, y_train, logC, max_iter=10000, tol=tol)
    for i in range(size_loop):
        monitor = Monitor()

        if method == "implicit_forward":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = ImplicitForward(criterion, tol_jac=1e-3, n_iter_jac=100)
            _, _, _ = grad_search(
                algo=algo, verbose=False,
                log_alpha0=logC, tol=tol,
                n_outer=n_outer, monitor=monitor,
                t_max=dict_t_max[dataset_name],
                tolerance_decrease=tolerance_decrease)

        elif method == "forward":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            _, _, _ = grad_search(
                algo=algo,
                log_alpha0=logC, tol=tol,
                n_outer=n_outer, monitor=monitor,
                t_max=dict_t_max[dataset_name],
                tolerance_decrease=tolerance_decrease)

        elif method == "implicit":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Implicit(criterion)
            _, _, _ = grad_search(
                algo=algo,
                log_alpha0=logC, tol=tol,
                n_outer=n_outer, monitor=monitor,
                t_max=dict_t_max[dataset_name],
                tolerance_decrease=tolerance_decrease)

        elif method == "grid_search":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(1e-2)
            log_alpha_opt, min_g_func = grid_search(
                algo, log_alpha_min, np.log(C_max), monitor, max_evals=25,
                tol=tol, samp="grid")
            print(log_alpha_opt)

        elif method == "random":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(1e-2)
            log_alpha_opt, min_g_func = grid_search(
                algo, log_alpha_min, np.log(C_max), monitor, max_evals=25,
                tol=tol, samp="random")
            print(log_alpha_opt)

        elif method == "lhs":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(1e-2)
            log_alpha_opt, min_g_func = grid_search(
                algo, log_alpha_min, np.log(C_max), monitor, max_evals=25,
                tol=tol, samp="lhs")
            print(log_alpha_opt)

    monitor.times = np.array(monitor.times)
    monitor.objs = np.array(monitor.objs)
    monitor.objs_test = np.array(monitor.objs_test)
    monitor.log_alphas = np.array(monitor.log_alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test,
            monitor.log_alphas, norm(y_val), norm(y_test))