Exemple #1
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='constant'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name)
    n_samples, _ = X_train.shape
    # compute alpha_max
    alpha_max = np.abs(X_train.T @ y_train).max() / n_samples

    if model_name == "logreg":
        alpha_max /= 2
    alpha_min = alpha_max / 10_000
    log_alpha_max = np.log(alpha_max)
    log_alpha_min = np.log(alpha_min)
    log_alpha0 = np.log(0.1 * alpha_max)

    if model_name == "lasso":
        model = Lasso(X_train, y_train)
    elif model_name == "logreg":
        model = SparseLogreg(X_train, y_train)

    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 20

    size_loop = 2

    for _ in range(size_loop):
        if model_name == "lasso":
            criterion = HeldOutMSE(X_val,
                                   y_val,
                                   model,
                                   X_test=X_test,
                                   y_test=y_test)
        elif model_name == "logreg":
            criterion = HeldOutLogistic(X_val,
                                        y_val,
                                        model,
                                        X_test=X_test,
                                        y_test=y_test)
        algo = dict_algo[method](criterion)
        monitor = Monitor()
        if method == 'grid_search':
            log_alphas = np.log(np.geomspace(alpha_max, alpha_min, num=100))
            grid_search(algo,
                        None,
                        None,
                        monitor,
                        log_alphas=log_alphas,
                        tol=tol)
        elif method == 'random':
            grid_search(algo,
                        log_alpha_max,
                        log_alpha_min,
                        monitor,
                        tol=tol,
                        max_evals=n_alphas,
                        t_max=dict_t_max[dataset_name])
        elif method in ("bayesian"):
            hyperopt_wrapper(algo,
                             log_alpha_min,
                             log_alpha_max,
                             monitor,
                             max_evals=n_alphas,
                             tol=tol,
                             method='bayesian',
                             t_max=dict_t_max[dataset_name])
        else:
            # do line search to find the optimal lambda
            grad_search(algo,
                        log_alpha0,
                        monitor,
                        n_outer=n_outer,
                        tol=tol,
                        tolerance_decrease=tolerance_decrease,
                        t_max=dict_t_max[dataset_name])

        monitor.times = np.array(monitor.times)
        monitor.objs = np.array(monitor.objs)
        monitor.objs_test = np.array(monitor.objs_test)
        monitor.log_alphas = np.array(monitor.log_alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas,
            norm(y_val), norm(y_test), log_alpha_max, model_name)
Exemple #2
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='constant'):

    # load data
    X, y = fetch_libsvm(dataset_name)
    y -= np.mean(y)
    # compute alpha_max
    alpha_max = np.abs(X.T @ y).max() / len(y)

    if model_name == "logreg":
        alpha_max /= 2
    alpha_min = alpha_max * dict_palphamin[dataset_name]

    if model_name == "enet":
        estimator = linear_model.ElasticNet(fit_intercept=False,
                                            max_iter=10_000,
                                            warm_start=True,
                                            tol=tol)
        model = ElasticNet(estimator=estimator)
    elif model_name == "logreg":
        model = SparseLogreg(estimator=estimator)

    # TODO improve this
    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 20

    size_loop = 2
    for _ in range(size_loop):
        if model_name == "lasso" or model_name == "enet":
            sub_criterion = HeldOutMSE(None, None)
        elif model_name == "logreg":
            criterion = HeldOutLogistic(None, None)
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        criterion = CrossVal(sub_criterion, cv=kf)

        algo = ImplicitForward(tol_jac=1e-3)
        monitor = Monitor()
        t_max = dict_t_max[dataset_name]
        if method == 'grid_search':
            num1D = dict_point_grid_search[dataset_name]
            alpha1D = np.geomspace(alpha_max, alpha_min, num=num1D)
            alphas = [np.array(i) for i in product(alpha1D, alpha1D)]
            grid_search(algo,
                        criterion,
                        model,
                        X,
                        y,
                        alpha_min,
                        alpha_max,
                        monitor,
                        max_evals=100,
                        tol=tol,
                        alphas=alphas)
        elif method == 'random' or method == 'bayesian':
            hyperopt_wrapper(algo,
                             criterion,
                             model,
                             X,
                             y,
                             alpha_min,
                             alpha_max,
                             monitor,
                             max_evals=30,
                             tol=tol,
                             method=method,
                             size_space=2,
                             t_max=t_max)
        elif method.startswith("implicit_forward"):
            # do gradient descent to find the optimal lambda
            alpha0 = np.array([alpha_max / 100, alpha_max / 100])
            n_outer = 30
            if method == 'implicit_forward':
                optimizer = GradientDescent(n_outer=n_outer,
                                            p_grad_norm=1,
                                            verbose=True,
                                            tol=tol,
                                            t_max=t_max)
            else:
                optimizer = GradientDescent(n_outer=n_outer,
                                            p_grad_norm=1,
                                            verbose=True,
                                            tol=tol,
                                            t_max=t_max,
                                            tol_decrease="geom")
            grad_search(algo, criterion, model, optimizer, X, y, alpha0,
                        monitor)
        else:
            raise NotImplementedError

    monitor.times = np.array(monitor.times)
    monitor.objs = np.array(monitor.objs)
    monitor.objs_test = 0  # TODO
    monitor.alphas = np.array(monitor.alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.alphas,
            alpha_max, model_name)
Exemple #3
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='constant'):
    t_max = dict_tmax[dataset_name]
    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name)
    n_samples, n_features = X_train.shape
    # compute alpha_max
    alpha_max = np.abs(X_train.T @ y_train).max() / n_samples
    log_alpha0 = np.log(0.1 * alpha_max)

    idx_nz = scipy.sparse.linalg.norm(X_train, axis=0) != 0
    L_min = scipy.sparse.linalg.norm(X_train[:, idx_nz],
                                     axis=0).min()**2 / n_samples

    log_alpha0_mcp = np.array([log_alpha0, np.log(2 / L_min)])

    list_log_alphas = np.log(alpha_max * np.geomspace(1, 0.0001, 100))
    list_log_gammas = np.log(np.geomspace(1.1 / L_min, 1000 / L_min, 5))

    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 50

    if dataset_name == "rcv1":
        size_loop = 2
    else:
        size_loop = 1
    for i in range(size_loop):
        monitor = Monitor()
        warm_start = WarmStart()

        if method == 'grid_search':
            # n_alpha = 100
            # p_alphas = np.geomspace(1, 0.0001, n_alpha)
            grid_searchMCP(X_train,
                           y_train,
                           list_log_alphas,
                           list_log_gammas,
                           X_val,
                           y_val,
                           X_test,
                           y_test,
                           tol,
                           monitor=monitor)
        elif method in ("bayesian", "random"):
            monitor = hyperopt_lasso(X_train,
                                     y_train,
                                     log_alpha0,
                                     X_val,
                                     y_val,
                                     X_test,
                                     y_test,
                                     tol,
                                     max_evals=n_outer,
                                     method=method)
        else:
            # do line search to find the optimal lambda
            log_alpha, val, grad = grad_search(X_train,
                                               y_train,
                                               log_alpha0_mcp,
                                               X_val,
                                               y_val,
                                               X_test,
                                               y_test,
                                               tol,
                                               monitor,
                                               method=method,
                                               maxit=10000,
                                               n_outer=n_outer,
                                               warm_start=warm_start,
                                               niter_jac=100,
                                               model="mcp",
                                               t_max=t_max)
            del log_alpha, val, grad  # as not used

    monitor.times = np.array(monitor.times)
    monitor.objs = np.array(monitor.objs)
    monitor.objs_test = np.array(monitor.objs_test)
    monitor.log_alphas = np.array(monitor.log_alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.log_alphas,
            norm(y_val), norm(y_test))