Ejemplo n.º 1
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='constant'):

    # load data
    X, y = fetch_libsvm(dataset_name)
    y -= np.mean(y)
    # compute alpha_max
    alpha_max = np.abs(X.T @ y).max() / len(y)

    if model_name == "logreg":
        alpha_max /= 2
    alpha_min = alpha_max * dict_palphamin[dataset_name]

    if model_name == "enet":
        estimator = linear_model.ElasticNet(fit_intercept=False,
                                            max_iter=10_000,
                                            warm_start=True,
                                            tol=tol)
        model = ElasticNet(estimator=estimator)
    elif model_name == "logreg":
        model = SparseLogreg(estimator=estimator)

    # TODO improve this
    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 20

    size_loop = 2
    for _ in range(size_loop):
        if model_name == "lasso" or model_name == "enet":
            sub_criterion = HeldOutMSE(None, None)
        elif model_name == "logreg":
            criterion = HeldOutLogistic(None, None)
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        criterion = CrossVal(sub_criterion, cv=kf)

        algo = ImplicitForward(tol_jac=1e-3)
        monitor = Monitor()
        t_max = dict_t_max[dataset_name]
        if method == 'grid_search':
            num1D = dict_point_grid_search[dataset_name]
            alpha1D = np.geomspace(alpha_max, alpha_min, num=num1D)
            alphas = [np.array(i) for i in product(alpha1D, alpha1D)]
            grid_search(algo,
                        criterion,
                        model,
                        X,
                        y,
                        alpha_min,
                        alpha_max,
                        monitor,
                        max_evals=100,
                        tol=tol,
                        alphas=alphas)
        elif method == 'random' or method == 'bayesian':
            hyperopt_wrapper(algo,
                             criterion,
                             model,
                             X,
                             y,
                             alpha_min,
                             alpha_max,
                             monitor,
                             max_evals=30,
                             tol=tol,
                             method=method,
                             size_space=2,
                             t_max=t_max)
        elif method.startswith("implicit_forward"):
            # do gradient descent to find the optimal lambda
            alpha0 = np.array([alpha_max / 100, alpha_max / 100])
            n_outer = 30
            if method == 'implicit_forward':
                optimizer = GradientDescent(n_outer=n_outer,
                                            p_grad_norm=1,
                                            verbose=True,
                                            tol=tol,
                                            t_max=t_max)
            else:
                optimizer = GradientDescent(n_outer=n_outer,
                                            p_grad_norm=1,
                                            verbose=True,
                                            tol=tol,
                                            t_max=t_max,
                                            tol_decrease="geom")
            grad_search(algo, criterion, model, optimizer, X, y, alpha0,
                        monitor)
        else:
            raise NotImplementedError

    monitor.times = np.array(monitor.times)
    monitor.objs = np.array(monitor.objs)
    monitor.objs_test = 0  # TODO
    monitor.alphas = np.array(monitor.alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.alphas,
            alpha_max, model_name)
Ejemplo n.º 2
0
    n_samples=n_samples,
    n_features=n_features,
    n_times=1,
    n_active=n_active,
    rho=rho,
    SNR=SNR,
    seed=2)
X_val = csc_matrix(X_val)
alpha_max = (X_train.T @ y_train).max() / n_samples
p_alpha = 0.7
alpha_1 = p_alpha * alpha_max
alpha_2 = 0.01
log_alpha1 = np.log(alpha_1)
log_alpha2 = np.log(alpha_2)

model = ElasticNet(X_train, y_train, max_iter=max_iter, estimator=None)
estimator = linear_model.ElasticNet(alpha=(alpha_1 + alpha_2),
                                    fit_intercept=False,
                                    l1_ratio=alpha_1 / (alpha_1 + alpha_2),
                                    tol=1e-16,
                                    max_iter=max_iter)
model_custom = ElasticNet(X_train,
                          y_train,
                          max_iter=max_iter,
                          estimator=estimator)


def get_v(mask, dense):
    return 2 * (
        X_val[:, mask].T @ (X_val[:, mask] @ dense - y_val)) / X_val.shape[0]
Ejemplo n.º 3
0
from sparse_ho import Forward, Backward
from sparse_ho.models import Lasso, ElasticNet
from sparse_ho.tests.cvxpylayer import lasso_cvxpy, enet_cvxpy
from sparse_ho.criterion import HeldOutMSE
import time

X, y = get_gina_agnostic(normalize_y=False)
n_samples, n_features = X.shape
idx_train = np.arange(0, n_samples // 2)
idx_val = np.arange(n_samples // 2, n_samples)

name_models = ["lasso", "enet"]

dict_models = {}
dict_models["lasso"] = Lasso()
dict_models["enet"] = ElasticNet()

dict_cvxpy = {}
dict_cvxpy["lasso"] = lasso_cvxpy
dict_cvxpy["enet"] = enet_cvxpy

dict_ncols = {}
dict_ncols[10] = np.geomspace(100, n_features, num=10, dtype=int)
dict_ncols[100] = np.geomspace(100, n_features, num=10, dtype=int)

tol = 1e-6
l1_ratio = 0.8
repeat = 10
div_alphas = [10, 100]

Ejemplo n.º 4
0
                            max_iter=max_iter,
                            warm_start=True)
        clf.fit(X_train, y_train)
        results[i, j] = norm(y_val - X_val @ clf.coef_)**2 / X_val.shape[0]
t_grid_search += time.time()
print("Finished grid-search")

# grad search
print("Started grad-search")
t_grad_search = -time.time()
monitor = Monitor()
n_outer = 10
model = ElasticNet(X_train,
                   y_train,
                   log_alphas_1[-1],
                   log_alphas_2[-1],
                   log_alpha_max,
                   max_iter=max_iter,
                   tol=tol)
criterion = HeldOutMSE(X_val, y_val, model, X_test=X_test, y_test=y_test)
algo = ImplicitForward(criterion,
                       tol_jac=1e-2,
                       n_iter_jac=1000,
                       max_iter=max_iter)

_, _, _ = grad_search(algo=algo,
                      verbose=True,
                      log_alpha0=np.array(
                          [np.log(alpha_max / 10),
                           np.log(alpha_max / 10)]),
                      tol=tol,
Ejemplo n.º 5
0
tol = 1e-3

estimator = linear_model.ElasticNet(fit_intercept=False,
                                    max_iter=1000,
                                    warm_start=True,
                                    tol=tol)

dict_monitor = {}

all_algo_name = ['grid_search']
# , 'implicit_forward', "implicit_forward_approx", 'bayesian']
# , 'random_search']
# all_algo_name = ['random_search']

for algo_name in all_algo_name:
    model = ElasticNet(estimator=estimator)
    sub_criterion = HeldOutMSE(None, None)
    alpha0 = np.array([alpha_max / 10, alpha_max / 10])
    monitor = Monitor()
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    criterion = CrossVal(sub_criterion, cv=kf)
    algo = ImplicitForward(tol_jac=1e-3)
    # optimizer = LineSearch(n_outer=10, tol=tol)
    if algo_name.startswith('implicit_forward'):
        if algo_name == "implicit_forward_approx":
            optimizer = GradientDescent(n_outer=30,
                                        p_grad_norm=1.,
                                        verbose=True,
                                        tol=tol,
                                        tol_decrease="geom")
        else:
Ejemplo n.º 6
0
        results[i, j] = np.mean(
            (y[idx_val] - X[idx_val, :] @ estimator.coef_)**2)
t_grid_search += time.time()
print("Finished grid-search")

##############################################################################
# Grad-search with sparse-ho
# --------------------------
estimator = linear_model.ElasticNet(fit_intercept=False,
                                    max_iter=max_iter,
                                    warm_start=True)
print("Started grad-search")
t_grad_search = -time.time()
monitor = Monitor()
n_outer = 10
model = ElasticNet(max_iter=max_iter, estimator=estimator)
criterion = HeldOutMSE(idx_train, idx_val)
algo = ImplicitForward(tol_jac=1e-7, n_iter_jac=1000, max_iter=max_iter)
grad_search(algo,
            criterion,
            model,
            X,
            y,
            verbose=True,
            log_alpha0=np.array(
                [np.log(alpha_max * 0.3),
                 np.log(alpha_max / 10)]),
            tol=tol,
            n_outer=n_outer,
            monitor=monitor)
t_grad_search += time.time()
Ejemplo n.º 7
0
idx_train = np.arange(0, 50)
idx_val = np.arange(50, 100)

alpha_max = (np.abs(X[idx_train, :].T @ y[idx_train])).max() / n_samples

tol = 1e-16

p_alpha = 0.7
alpha_1 = p_alpha * alpha_max
alpha_2 = 0.01
log_alpha1 = np.log(alpha_1)
log_alpha2 = np.log(alpha_2)
max_iter = 100

model = ElasticNet(max_iter=max_iter, estimator=None)
estimator = linear_model.ElasticNet(alpha=(alpha_1 + alpha_2),
                                    fit_intercept=False,
                                    l1_ratio=alpha_1 / (alpha_1 + alpha_2),
                                    tol=1e-16,
                                    max_iter=max_iter)
model_custom = ElasticNet(max_iter=max_iter, estimator=estimator)


def get_v(mask, dense):
    return 2 * (X[np.ix_(idx_val, mask)].T @ (
        X[np.ix_(idx_val, mask)] @ dense - y[idx_val])) / len(idx_val)


def test_beta_jac():
    supp1, dense1, jac1 = get_beta_jac_iterdiff(X[idx_train, :],
Ejemplo n.º 8
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='exponential'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name)
    n_samples, n_features = X_train.shape
    print('n_samples', n_samples)
    print('n_features', n_features)
    y_train[y_train == 0.0] = -1.0
    y_val[y_val == 0.0] = -1.0
    y_test[y_test == 0.0] = -1.0

    alpha_max = np.max(np.abs(X_train.T @ y_train))
    alpha_max /= X_train.shape[0]
    log_alpha_max = np.log(alpha_max)

    alpha_min = alpha_max * 1e-2
    # alphas = np.geomspace(alpha_max, alpha_min, 10)
    # log_alphas = np.log(alphas)

    log_alpha1_0 = np.log(0.1 * alpha_max)
    log_alpha2_0 = np.log(0.1 * alpha_max)

    log_alpha_max = np.log(alpha_max)
    n_outer = 25

    if dataset_name == "rcv1":
        size_loop = 2
    else:
        size_loop = 2
    model = ElasticNet(X_train,
                       y_train,
                       log_alpha1_0,
                       log_alpha2_0,
                       log_alpha_max,
                       max_iter=1000,
                       tol=tol)
    for i in range(size_loop):
        monitor = Monitor()

        if method == "implicit_forward":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = ImplicitForward(criterion, tol_jac=1e-3, n_iter_jac=100)
            _, _, _ = grad_search(algo=algo,
                                  verbose=False,
                                  log_alpha0=np.array(
                                      [log_alpha1_0, log_alpha2_0]),
                                  tol=tol,
                                  n_outer=n_outer,
                                  monitor=monitor,
                                  t_max=dict_t_max[dataset_name],
                                  tolerance_decrease=tolerance_decrease)

        elif method == "forward":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            _, _, _ = grad_search(algo=algo,
                                  log_alpha0=np.array(
                                      [log_alpha1_0, log_alpha2_0]),
                                  tol=tol,
                                  n_outer=n_outer,
                                  monitor=monitor,
                                  t_max=dict_t_max[dataset_name],
                                  tolerance_decrease=tolerance_decrease)

        elif method == "implicit":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Implicit(criterion)
            _, _, _ = grad_search(algo=algo,
                                  log_alpha0=np.array(
                                      [log_alpha1_0, log_alpha2_0]),
                                  tol=tol,
                                  n_outer=n_outer,
                                  monitor=monitor,
                                  t_max=dict_t_max[dataset_name],
                                  tolerance_decrease=tolerance_decrease)

        elif method == "grid_search":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo,
                log_alpha_min,
                log_alpha_max,
                monitor,
                max_evals=10,
                tol=tol,
                samp="grid",
                t_max=dict_t_max[dataset_name],
                log_alphas=None,
                nb_hyperparam=2)
            print(log_alpha_opt)

        elif method == "random":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo,
                log_alpha_min,
                np.log(alpha_max),
                monitor,
                max_evals=10,
                tol=tol,
                samp="random",
                t_max=dict_t_max[dataset_name],
                nb_hyperparam=2)
            print(log_alpha_opt)

        elif method == "lhs":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo,
                log_alpha_min,
                np.log(alpha_max),
                monitor,
                max_evals=10,
                tol=tol,
                samp="lhs",
                t_max=dict_t_max[dataset_name])
            print(log_alpha_opt)

    monitor.times = np.array(monitor.times).copy()
    monitor.objs = np.array(monitor.objs).copy()
    monitor.objs_test = np.array(monitor.objs_test).copy()
    monitor.log_alphas = np.array(monitor.log_alphas).copy()
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas,
            norm(y_val), norm(y_test), log_alpha_max)
Ejemplo n.º 9
0
alpha_2 = 0.1
log_alpha1 = np.log(alpha_1)
log_alpha2 = np.log(alpha_2)

dict_log_alpha = {}
dict_log_alpha["lasso"] = log_alpha
dict_log_alpha["enet"] = np.array([log_alpha1, log_alpha2])
tab = np.linspace(1, 1000, n_features)
dict_log_alpha["wLasso"] = log_alpha + np.log(tab / tab.max())
dict_log_alpha["logreg"] = (log_alpha - np.log(2))
dict_log_alpha["svm"] = 1e-4
dict_log_alpha["svr"] = np.array([1e-2, 1e-2])
# Set models to be tested
models = {}
models["lasso"] = Lasso(estimator=None)
models["enet"] = ElasticNet(estimator=None)
models["wLasso"] = WeightedLasso(estimator=None)
models["logreg"] = SparseLogreg(estimator=None)
models["svm"] = SVM(estimator=None)
models["svr"] = SVR(estimator=None)

custom_models = {}
custom_models["lasso"] = Lasso(estimator=celer.Lasso(
    warm_start=True, fit_intercept=False))
custom_models["enet"] = ElasticNet(
    estimator=linear_model.ElasticNet(warm_start=True, fit_intercept=False))
custom_models["logreg"] = SparseLogreg(
    estimator=celer.LogisticRegression(warm_start=True, fit_intercept=False))

# Compute "ground truth" with cvxpylayer
dict_cvxpy_func = {