Beispiel #1
0
def test_20news():
    X_train, X_val, X_test, y_train, y_val, y_test = get_data("rcv1_train")

    np.testing.assert_equal(X_train.shape[0], y_train.shape[0])
    np.testing.assert_equal(X_test.shape[0], y_test.shape[0])
    np.testing.assert_equal(X_val.shape[0], y_val.shape[0])

    np.testing.assert_equal(X_train.shape[1], X_test.shape[1])
    np.testing.assert_equal(X_train.shape[1], X_val.shape[1])
Beispiel #2
0
from sparse_ho.implicit_forward import ImplicitForward
from sparse_ho.criterion import HeldOutMSE
from sparse_ho.models import ElasticNet
from sparse_ho.ho import grad_search
from bcdsugar.utils import Monitor

Axes3D
dataset = "rcv1"
# dataset = "real-sim"
use_small_part = False
# use_small_part = True

#############################
print("Started to load data")

X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset)
if use_small_part:
    idx = np.abs((X_train.T @ y_train)).argsort()[-1000:]
    X_train = X_train[:, idx]
    X_val = X_val[:, idx]
    X_test = X_test[:, idx]
y_train -= y_train.mean()
y_val -= y_val.mean()
y_test -= y_test.mean()

print("Finished loading data")

alpha_max = np.max(np.abs(X_train.T @ y_train))
alpha_max /= X_train.shape[0]
log_alpha_max = np.log(alpha_max)
Beispiel #3
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='exponential'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name)
    n_samples, n_features = X_train.shape
    print('n_samples', n_samples)
    print('n_features', n_features)
    y_train[y_train == 0.0] = -1.0
    y_val[y_val == 0.0] = -1.0
    y_test[y_test == 0.0] = -1.0

    alpha_max = np.max(np.abs(X_train.T @ y_train))
    alpha_max /= X_train.shape[0]
    log_alpha_max = np.log(alpha_max)

    alpha_min = alpha_max * 1e-2
    # alphas = np.geomspace(alpha_max, alpha_min, 10)
    # log_alphas = np.log(alphas)

    log_alpha1_0 = np.log(0.1 * alpha_max)
    log_alpha2_0 = np.log(0.1 * alpha_max)

    log_alpha_max = np.log(alpha_max)
    n_outer = 25

    if dataset_name == "rcv1":
        size_loop = 2
    else:
        size_loop = 2
    model = ElasticNet(X_train,
                       y_train,
                       log_alpha1_0,
                       log_alpha2_0,
                       log_alpha_max,
                       max_iter=1000,
                       tol=tol)
    for i in range(size_loop):
        monitor = Monitor()

        if method == "implicit_forward":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = ImplicitForward(criterion, tol_jac=1e-3, n_iter_jac=100)
            _, _, _ = grad_search(algo=algo,
                                  verbose=False,
                                  log_alpha0=np.array(
                                      [log_alpha1_0, log_alpha2_0]),
                                  tol=tol,
                                  n_outer=n_outer,
                                  monitor=monitor,
                                  t_max=dict_t_max[dataset_name],
                                  tolerance_decrease=tolerance_decrease)

        elif method == "forward":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            _, _, _ = grad_search(algo=algo,
                                  log_alpha0=np.array(
                                      [log_alpha1_0, log_alpha2_0]),
                                  tol=tol,
                                  n_outer=n_outer,
                                  monitor=monitor,
                                  t_max=dict_t_max[dataset_name],
                                  tolerance_decrease=tolerance_decrease)

        elif method == "implicit":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Implicit(criterion)
            _, _, _ = grad_search(algo=algo,
                                  log_alpha0=np.array(
                                      [log_alpha1_0, log_alpha2_0]),
                                  tol=tol,
                                  n_outer=n_outer,
                                  monitor=monitor,
                                  t_max=dict_t_max[dataset_name],
                                  tolerance_decrease=tolerance_decrease)

        elif method == "grid_search":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo,
                log_alpha_min,
                log_alpha_max,
                monitor,
                max_evals=10,
                tol=tol,
                samp="grid",
                t_max=dict_t_max[dataset_name],
                log_alphas=None,
                nb_hyperparam=2)
            print(log_alpha_opt)

        elif method == "random":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo,
                log_alpha_min,
                np.log(alpha_max),
                monitor,
                max_evals=10,
                tol=tol,
                samp="random",
                t_max=dict_t_max[dataset_name],
                nb_hyperparam=2)
            print(log_alpha_opt)

        elif method == "lhs":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(alpha_min)
            log_alpha_opt, min_g_func = grid_search(
                algo,
                log_alpha_min,
                np.log(alpha_max),
                monitor,
                max_evals=10,
                tol=tol,
                samp="lhs",
                t_max=dict_t_max[dataset_name])
            print(log_alpha_opt)

    monitor.times = np.array(monitor.times).copy()
    monitor.objs = np.array(monitor.objs).copy()
    monitor.objs_test = np.array(monitor.objs_test).copy()
    monitor.log_alphas = np.array(monitor.log_alphas).copy()
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas,
            norm(y_val), norm(y_test), log_alpha_max)
Beispiel #4
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='constant'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name)
    n_samples, _ = X_train.shape
    # compute alpha_max
    alpha_max = np.abs(X_train.T @ y_train).max() / n_samples

    if model_name == "logreg":
        alpha_max /= 2
    alpha_min = alpha_max / 10_000
    log_alpha_max = np.log(alpha_max)
    log_alpha_min = np.log(alpha_min)
    log_alpha0 = np.log(0.1 * alpha_max)

    if model_name == "lasso":
        model = Lasso(X_train, y_train)
    elif model_name == "logreg":
        model = SparseLogreg(X_train, y_train)

    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 20

    size_loop = 2

    for _ in range(size_loop):
        if model_name == "lasso":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
        elif model_name == "logreg":
            criterion = HeldOutLogistic(X_val,
                                        y_val,
                                        model,
                                        X_test=X_test,
                                        y_test=y_test)
        algo = dict_algo[method](criterion)
        monitor = Monitor()
        if method == 'grid_search':
            log_alphas = np.log(np.geomspace(alpha_max, alpha_min, num=100))
            grid_search(algo,
                        None,
                        None,
                        monitor,
                        log_alphas=log_alphas,
                        tol=tol)
        elif method == 'random':
            grid_search(algo,
                        log_alpha_max,
                        log_alpha_min,
                        monitor,
                        tol=tol,
                        max_evals=n_alphas,
                        t_max=dict_t_max[dataset_name])
        elif method in ("bayesian"):
            hyperopt_wrapper(algo,
                             log_alpha_min,
                             log_alpha_max,
                             monitor,
                             max_evals=n_alphas,
                             tol=tol,
                             method='bayesian',
                             t_max=dict_t_max[dataset_name])
        else:
            # do line search to find the optimal lambda
            grad_search(algo,
                        log_alpha0,
                        monitor,
                        n_outer=n_outer,
                        tol=tol,
                        tolerance_decrease=tolerance_decrease,
                        t_max=dict_t_max[dataset_name])

        monitor.times = np.array(monitor.times)
        monitor.objs = np.array(monitor.objs)
        monitor.objs_test = np.array(monitor.objs_test)
        monitor.log_alphas = np.array(monitor.log_alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas,
            norm(y_val), norm(y_test), log_alpha_max, model_name)
Beispiel #5
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='constant'):
    t_max = dict_tmax[dataset_name]
    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name)
    n_samples, n_features = X_train.shape
    # compute alpha_max
    alpha_max = np.abs(X_train.T @ y_train).max() / n_samples
    log_alpha0 = np.log(0.1 * alpha_max)

    idx_nz = scipy.sparse.linalg.norm(X_train, axis=0) != 0
    L_min = scipy.sparse.linalg.norm(X_train[:, idx_nz],
                                     axis=0).min()**2 / n_samples

    log_alpha0_mcp = np.array([log_alpha0, np.log(2 / L_min)])

    list_log_alphas = np.log(alpha_max * np.geomspace(1, 0.0001, 100))
    list_log_gammas = np.log(np.geomspace(1.1 / L_min, 1000 / L_min, 5))

    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 50

    if dataset_name == "rcv1":
        size_loop = 2
    else:
        size_loop = 1
    for i in range(size_loop):
        monitor = Monitor()
        warm_start = WarmStart()

        if method == 'grid_search':
            # n_alpha = 100
            # p_alphas = np.geomspace(1, 0.0001, n_alpha)
            grid_searchMCP(X_train,
                           y_train,
                           list_log_alphas,
                           list_log_gammas,
                           X_val,
                           y_val,
                           X_test,
                           y_test,
                           tol,
                           monitor=monitor)
        elif method in ("bayesian", "random"):
            monitor = hyperopt_lasso(X_train,
                                     y_train,
                                     log_alpha0,
                                     X_val,
                                     y_val,
                                     X_test,
                                     y_test,
                                     tol,
                                     max_evals=n_outer,
                                     method=method)
        else:
            # do line search to find the optimal lambda
            log_alpha, val, grad = grad_search(X_train,
                                               y_train,
                                               log_alpha0_mcp,
                                               X_val,
                                               y_val,
                                               X_test,
                                               y_test,
                                               tol,
                                               monitor,
                                               method=method,
                                               maxit=10000,
                                               n_outer=n_outer,
                                               warm_start=warm_start,
                                               niter_jac=100,
                                               model="mcp",
                                               t_max=t_max)
            del log_alpha, val, grad  # as not used

    monitor.times = np.array(monitor.times)
    monitor.objs = np.array(monitor.objs)
    monitor.objs_test = np.array(monitor.objs_test)
    monitor.log_alphas = np.array(monitor.log_alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas,
            norm(y_val), norm(y_test))
Beispiel #6
0
def parallel_function(
        dataset_name, method, tol=1e-5, n_outer=50,
        tolerance_decrease='exponential'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name, csr=True)
    n_samples, n_features = X_train.shape
    print('n_samples', n_samples)
    print('n_features', n_features)
    y_train[y_train == 0.0] = -1.0
    y_val[y_val == 0.0] = -1.0
    y_test[y_test == 0.0] = -1.0

    C_max = 100
    logC = np.log(1e-2)
    n_outer = 5

    if dataset_name == "rcv1":
        size_loop = 1
    else:
        size_loop = 1
    model = SVM(
        X_train, y_train, logC, max_iter=10000, tol=tol)
    for i in range(size_loop):
        monitor = Monitor()

        if method == "implicit_forward":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = ImplicitForward(criterion, tol_jac=1e-3, n_iter_jac=100)
            _, _, _ = grad_search(
                algo=algo, verbose=False,
                log_alpha0=logC, tol=tol,
                n_outer=n_outer, monitor=monitor,
                t_max=dict_t_max[dataset_name],
                tolerance_decrease=tolerance_decrease)

        elif method == "forward":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            _, _, _ = grad_search(
                algo=algo,
                log_alpha0=logC, tol=tol,
                n_outer=n_outer, monitor=monitor,
                t_max=dict_t_max[dataset_name],
                tolerance_decrease=tolerance_decrease)

        elif method == "implicit":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Implicit(criterion)
            _, _, _ = grad_search(
                algo=algo,
                log_alpha0=logC, tol=tol,
                n_outer=n_outer, monitor=monitor,
                t_max=dict_t_max[dataset_name],
                tolerance_decrease=tolerance_decrease)

        elif method == "grid_search":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(1e-2)
            log_alpha_opt, min_g_func = grid_search(
                algo, log_alpha_min, np.log(C_max), monitor, max_evals=25,
                tol=tol, samp="grid")
            print(log_alpha_opt)

        elif method == "random":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(1e-2)
            log_alpha_opt, min_g_func = grid_search(
                algo, log_alpha_min, np.log(C_max), monitor, max_evals=25,
                tol=tol, samp="random")
            print(log_alpha_opt)

        elif method == "lhs":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(1e-2)
            log_alpha_opt, min_g_func = grid_search(
                algo, log_alpha_min, np.log(C_max), monitor, max_evals=25,
                tol=tol, samp="lhs")
            print(log_alpha_opt)

    monitor.times = np.array(monitor.times)
    monitor.objs = np.array(monitor.objs)
    monitor.objs_test = np.array(monitor.objs_test)
    monitor.log_alphas = np.array(monitor.log_alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test,
            monitor.log_alphas, norm(y_val), norm(y_test))