Example #1
0
monitor_grid = Monitor()
for i in range(n_alphas):
    log_alpha_i = np.log(alpha_max * p_alphas[:, i])
    logit_multiclass.get_val(
        model, X, y, log_alpha_i, None, monitor_grid, tol)

1/0
print("###################### GRAD SEARCH LS ###################")
n_outer = 100
model = SparseLogreg(estimator=estimator)
logit_multiclass = LogisticMulticlass(idx_train, idx_val, idx_test, algo)

monitor = Monitor()
log_alpha0 = np.ones(n_classes) * np.log(0.1 * alpha_max)

idx_min = np.argmin(np.array(monitor_grid.objs))
log_alpha0 = monitor_grid.log_alphas[idx_min]
optimizer = GradientDescent(
    n_outer=n_outer, step_size=None, p_grad_norm=0.1, tol=tol)
grad_search(
    algo, logit_multiclass, model, optimizer, X, y, log_alpha0, monitor)


print("###################### USE HYPEROPT ###################")
log_alpha_max = np.log(alpha_max)
log_alpha_min = np.log(alpha_max / 10_000)
monitor_hyp = Monitor()
hyperopt_wrapper(
    algo, logit_multiclass, model, X, y, log_alpha_min, log_alpha_max,
    monitor_hyp, tol=tol, size_space=n_classes, max_evals=10)
Example #2
0
# all_algo_name = ['random_search']

for algo_name in all_algo_name:
    model = ElasticNet(estimator=estimator)
    sub_criterion = HeldOutMSE(None, None)
    alpha0 = np.array([alpha_max / 10, alpha_max / 10])
    monitor = Monitor()
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    criterion = CrossVal(sub_criterion, cv=kf)
    algo = ImplicitForward(tol_jac=1e-3)
    # optimizer = LineSearch(n_outer=10, tol=tol)
    if algo_name.startswith('implicit_forward'):
        if algo_name == "implicit_forward_approx":
            optimizer = GradientDescent(n_outer=30,
                                        p_grad_norm=1.,
                                        verbose=True,
                                        tol=tol,
                                        tol_decrease="geom")
        else:
            optimizer = GradientDescent(n_outer=30,
                                        p_grad_norm=1.,
                                        verbose=True,
                                        tol=tol)
        grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor)
    elif algo_name == 'grid_search':
        grid_search(algo,
                    criterion,
                    model,
                    X,
                    y,
                    None,
Example #3
0
def parallel_function(dataset_name,
                      method,
                      tol=1e-5,
                      n_outer=50,
                      tolerance_decrease='constant'):

    # load data
    X, y = fetch_libsvm(dataset_name)
    y -= np.mean(y)
    # compute alpha_max
    alpha_max = np.abs(X.T @ y).max() / len(y)

    if model_name == "logreg":
        alpha_max /= 2
    alpha_min = alpha_max * dict_palphamin[dataset_name]

    if model_name == "enet":
        estimator = linear_model.ElasticNet(fit_intercept=False,
                                            max_iter=10_000,
                                            warm_start=True,
                                            tol=tol)
        model = ElasticNet(estimator=estimator)
    elif model_name == "logreg":
        model = SparseLogreg(estimator=estimator)

    # TODO improve this
    try:
        n_outer = dict_n_outers[dataset_name, method]
    except Exception:
        n_outer = 20

    size_loop = 2
    for _ in range(size_loop):
        if model_name == "lasso" or model_name == "enet":
            sub_criterion = HeldOutMSE(None, None)
        elif model_name == "logreg":
            criterion = HeldOutLogistic(None, None)
        kf = KFold(n_splits=5, shuffle=True, random_state=42)
        criterion = CrossVal(sub_criterion, cv=kf)

        algo = ImplicitForward(tol_jac=1e-3)
        monitor = Monitor()
        t_max = dict_t_max[dataset_name]
        if method == 'grid_search':
            num1D = dict_point_grid_search[dataset_name]
            alpha1D = np.geomspace(alpha_max, alpha_min, num=num1D)
            alphas = [np.array(i) for i in product(alpha1D, alpha1D)]
            grid_search(algo,
                        criterion,
                        model,
                        X,
                        y,
                        alpha_min,
                        alpha_max,
                        monitor,
                        max_evals=100,
                        tol=tol,
                        alphas=alphas)
        elif method == 'random' or method == 'bayesian':
            hyperopt_wrapper(algo,
                             criterion,
                             model,
                             X,
                             y,
                             alpha_min,
                             alpha_max,
                             monitor,
                             max_evals=30,
                             tol=tol,
                             method=method,
                             size_space=2,
                             t_max=t_max)
        elif method.startswith("implicit_forward"):
            # do gradient descent to find the optimal lambda
            alpha0 = np.array([alpha_max / 100, alpha_max / 100])
            n_outer = 30
            if method == 'implicit_forward':
                optimizer = GradientDescent(n_outer=n_outer,
                                            p_grad_norm=1,
                                            verbose=True,
                                            tol=tol,
                                            t_max=t_max)
            else:
                optimizer = GradientDescent(n_outer=n_outer,
                                            p_grad_norm=1,
                                            verbose=True,
                                            tol=tol,
                                            t_max=t_max,
                                            tol_decrease="geom")
            grad_search(algo, criterion, model, optimizer, X, y, alpha0,
                        monitor)
        else:
            raise NotImplementedError

    monitor.times = np.array(monitor.times)
    monitor.objs = np.array(monitor.objs)
    monitor.objs_test = 0  # TODO
    monitor.alphas = np.array(monitor.alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test, monitor.alphas,
            alpha_max, model_name)
Example #4
0
print("Vanilla LassoCV: Mean-squared error on test data %f" % mse_cv)
##############################################################################


##############################################################################
# Weighted Lasso with sparse-ho.
# We use the vanilla lassoCV coefficients as a starting point
alpha0 = model_cv.alpha_ * np.ones(n_features)
# Weighted Lasso: Sparse-ho: 1 param per feature
estimator = Lasso(fit_intercept=False, max_iter=100, warm_start=True)
model = WeightedLasso(estimator=estimator)
sub_criterion = HeldOutMSE(idx_train, idx_val)
criterion = CrossVal(sub_criterion, cv=cv)
algo = ImplicitForward()
monitor = Monitor()
optimizer = GradientDescent(
    n_outer=100, tol=1e-7, verbose=True, p_grad_norm=1.9)
results = grad_search(
    algo, criterion, model, optimizer, X, y, alpha0, monitor)
##############################################################################

estimator.weights = monitor.alphas[-1]
estimator.fit(X, y)
##############################################################################
# MSE on validation set
mse_sho_val = mean_squared_error(y, estimator.predict(X))

# MSE on test set, ie unseen data
mse_sho_test = mean_squared_error(y_test, estimator.predict(X_test))

# Oracle MSE
mse_oracle = mean_squared_error(y_test, X_test @ w_true)
Example #5
0
            X,
            y,
            alpha_min,
            alpha_max,
            monitor_grid,
            alphas=alphas,
            tol=tol)
objs = np.array(monitor_grid.objs)

##############################################################################
# Grad-search
# -----------
optimizer_names = ['line-search', 'gradient-descent', 'adam']
optimizers = {
    'line-search': LineSearch(n_outer=10, tol=tol),
    'gradient-descent': GradientDescent(n_outer=10, step_size=100),
    'adam': Adam(n_outer=10, lr=0.11)
}

monitors = {}
alpha0 = alpha_max / 10  # starting point

for optimizer_name in optimizer_names:
    estimator = LogisticRegression(penalty='l1',
                                   fit_intercept=False,
                                   solver='saga',
                                   tol=tol)
    model = SparseLogreg(estimator=estimator)
    criterion = HeldOutLogistic(idx_train, idx_val)

    monitor_grad = Monitor()
Example #6
0
##############################################################################
# Grad-search with sparse-ho
# --------------------------
estimator = linear_model.ElasticNet(fit_intercept=False,
                                    max_iter=max_iter,
                                    warm_start=True)
print("Started grad-search")
t_grad_search = -time.time()
monitor = Monitor()
n_outer = 10
alpha0 = np.array([alpha_max * 0.9, alpha_max * 0.9])
model = ElasticNet(estimator=estimator)
criterion = HeldOutMSE(idx_train, idx_val)
algo = ImplicitForward(tol_jac=1e-3, n_iter_jac=100, max_iter=max_iter)
optimizer = GradientDescent(n_outer=n_outer,
                            tol=tol,
                            p_grad_norm=1.5,
                            verbose=True)
grad_search(algo,
            criterion,
            model,
            optimizer,
            X,
            y,
            alpha0=alpha0,
            monitor=monitor)
t_grad_search += time.time()
monitor.alphas = np.array(monitor.alphas)

print("Time grid search %f" % t_grid_search)
print("Time grad-search %f" % t_grad_search)
print("Minimum grid search %0.3e" % results.min())
Example #7
0
# Grad-search
# -----------

print('sparse-ho started')

t0 = time.time()

estimator = LogisticRegression(
    penalty='l1', fit_intercept=False, tol=tol)
model = SparseLogreg(estimator=estimator)
criterion = HeldOutLogistic(idx_train, idx_val)

monitor_grad = Monitor()
algo = ImplicitForward(tol_jac=tol, n_iter_jac=1000)

optimizer = GradientDescent(n_outer=10, tol=tol)
grad_search(
    algo, criterion, model, optimizer, X, y, alpha0,
    monitor_grad)
objs_grad = np.array(monitor_grad.objs)

t_grad_search = time.time() - t0

print('sparse-ho finished')
print(f"Time to compute grad search: {t_grad_search:.2f} s")


p_alphas_grad = np.array(monitor_grad.alphas) / alpha_max

objs_grad = np.array(monitor_grad.objs)
Example #8
0
print("Starting path computation...")
for algorithm in algorithms:
    estimator = celer.Lasso(fit_intercept=False,
                            max_iter=1000,
                            warm_start=True,
                            tol=tol)

    print('%s started' % algorithm)

    model = Lasso(estimator=estimator)
    criterion = HeldOutMSE(None, None)
    log_alpha0 = np.log(alpha_max / 10)
    monitor = Monitor()
    cross_val_criterion = CrossVal(criterion, cv=kf)
    algo = ImplicitForward()
    optimizer = GradientDescent(n_outer=10, tol=tol, verbose=True, p_grad0=1)
    # optimizer = LineSearch(n_outer=10, tol=tol, verbose=True)
    if algorithm == 'grad_search':
        grad_search(algo, cross_val_criterion, model, optimizer, X, y,
                    log_alpha0, monitor)
        objs = np.array(monitor.objs)
        log_alphas = np.array(monitor.log_alphas)

    elif algorithm.startswith('grid_search'):
        if algorithm == 'grid_search10':
            n_alphas = 10
        else:
            n_alphas = 100
        p_alphas = np.geomspace(1, p_alpha_min, n_alphas)
        alphas = alpha_max * p_alphas
        reg = LassoCV(cv=kf,
Example #9
0
                        tol=tol)

dict_monitor = {}

all_algo_name = ['implicit_forward', 'grid_search']

for algo_name in all_algo_name:
    model = Lasso(estimator=estimator)
    sub_criterion = HeldOutMSE(None, None)
    alpha0 = alpha_max / 10
    monitor = Monitor()
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    criterion = CrossVal(sub_criterion, cv=kf)
    algo = ImplicitForward(tol_jac=1e-3)
    optimizer = GradientDescent(n_outer=30,
                                p_grad_norm=1.,
                                verbose=True,
                                tol=tol)
    if algo_name == 'implicit_forward':
        grad_search(algo, criterion, model, optimizer, X, y, alpha0, monitor)
    elif algo_name == 'grid_search':
        grid_search(algo,
                    criterion,
                    model,
                    X,
                    y,
                    alpha_min,
                    alpha_max,
                    monitor,
                    max_evals=20,
                    tol=tol)
    elif algo_name == 'random_search':
Example #10
0
for algorithm in algorithms:
    estimator = sklearn.linear_model.ElasticNet(
        fit_intercept=False, max_iter=3_000, warm_start=True, tol=tol)

    print('%s started' % algorithm)

    model = ElasticNet(estimator=estimator)
    criterion = HeldOutMSE(None, None)
    log_alpha0 = np.array([np.log(alpha_max / 10), np.log(alpha_max / 10)])
    monitor = Monitor()
    cross_val_criterion = CrossVal(criterion, cv=kf)
    algo = ImplicitForward()
    # optimizer = LineSearch(n_outer=10, tol=tol, verbose=True)
    if algorithm.startswith('grad_search'):
        if algorithm == 'grad_search':
            optimizer = GradientDescent(
                n_outer=max_evals, tol=tol, verbose=True, p_grad_norm=1.9)
        else:
            optimizer = LineSearch(n_outer=25, verbose=True, tol=tol)
        grad_search(
            algo, cross_val_criterion, model, optimizer, X, y, log_alpha0,
            monitor)

    elif algorithm.startswith('grid_search'):
        if algorithm == 'grid_search10':
            n_alphas = 5
        else:
            n_alphas = 30
        p_alphas = np.geomspace(1, p_alpha_min, n_alphas)
        alphas = alpha_max * p_alphas
        log_alphas = np.log(alphas)
        grid_alphas = [i for i in itertools.product(log_alphas, log_alphas)]