Beispiel #1
0
def linear_cv(dataset_name, max_iter=1000, tol=1e-3, compute_jac=True):
    max_iter = max_iters[dataset_name]
    X, y = load_libsvm(dataset_name)
    X = X.tocsr()
    num_nonzeros = np.diff(X.indptr)
    X = X[num_nonzeros != 0]
    y = y[num_nonzeros != 0]
    n_samples, n_features = X.shape
    C = Cs[dataset_name]
    # Computation of dual solution of SVM via cvxopt

    clf = SDCAClassifier(
        alpha=1/(C * n_samples), loss='hinge', verbose=True, tol=1e-16,
        max_iter=max_iter)
    clf.fit(X, y)
    beta_star = np.abs(clf.dual_coef_[0])
    primal_star = np.sum(X.T.multiply(y * beta_star), axis=1)
    # full_supp = np.logical_and(beta_star > 0, beta_star < C)
    full_supp = np.logical_and(np.logical_not(np.isclose(beta_star, 0)), np.logical_not(np.isclose(beta_star, C)))
    # Q = (X.multiply(y[:, np.newaxis]))  @  (X.multiply(y[:, np.newaxis])).T
    yX = X.multiply(y[:, np.newaxis])
    yX = yX.tocsr()

    # TODO to optimize
    temp3 = np.zeros(n_samples)
    temp3[np.isclose(beta_star, C)] = np.ones(
        (np.isclose(beta_star, C)).sum()) * C
    # temp3 = temp3[full_supp]
    v = temp3[full_supp] - yX[full_supp, :] @ (yX[np.isclose(beta_star, C), :].T @ temp3[np.isclose(beta_star, C)])
    # v = np.array((np.eye(n_samples, n_samples) - Q)[np.ix_(full_supp, np.isclose(beta_star, C))] @ (np.ones((np.isclose(beta_star, C)).sum()) * C))
    # v = np.squeeze(v)
    temp = yX[full_supp, :] @ yX[full_supp, :].T
    temp = csc_matrix(temp)
    # temp = temp[:, full_supp]
    # Q = csc_matrix(Q)
    print("size system to solve %i" % v.shape[0])
    jac_dense = cg(temp, v, tol=1e-12)
    jac_star = np.zeros(n_samples)
    jac_star[full_supp] = jac_dense[0]
    jac_star[np.isclose(beta_star, C)] = C
    primal_jac_star = np.sum(X.T.multiply(y * jac_star), axis=1)
    model = SVM(X, y, np.log(C), max_iter=max_iter, tol=tol)
    list_beta, list_jac = compute_beta(
        X, y, np.log(C), model, save_iterates=True, tol=1e-32,
        max_iter=max_iter, compute_jac=True)

    M = X.T @ (list_beta * y).T
    M_jac = X.T @ (list_jac * y).T
    diff_beta = norm(M - primal_star, axis=0)
    diff_jac = norm(M_jac - primal_jac_star, axis=0)
    full_supp_star = full_supp
    full_supp_star = np.logical_and(np.logical_not(np.isclose(list_beta[-1], 0)), np.logical_not(np.isclose(list_beta[-1], C)))
    n_iter = list_beta.shape[0]
    for i in np.arange(n_iter)[::-1]:
        full_supp = np.logical_and(np.logical_not(np.isclose(list_beta[i, :], 0)), np.logical_not(np.isclose(list_beta[i, :], C)))
        if not np.all(full_supp == full_supp_star):
            supp_id = i + 1
            break
        supp_id = 0
    return dataset_name, C, diff_beta, diff_jac, n_iter, supp_id
def parallel_function(dataset_name, method):
    X, y = fetch_libsvm(dataset_name)
    X, y = fetch_libsvm(dataset_name)
    if dataset_name == "real-sim":
        X = X[:, :2000]
    X = csr_matrix(X)  # very important for SVM
    my_bool = norm(X, axis=1) != 0
    X = X[my_bool, :]
    y = y[my_bool]
    logC = dict_logC[dataset_name]
    for max_iter in dict_max_iter[dataset_name]:
        print("Dataset %s, max iter %i" % (method, max_iter))
        for i in range(2):  # TODO change this
            sss1 = StratifiedShuffleSplit(n_splits=2,
                                          test_size=0.3333,
                                          random_state=0)
            idx_train, idx_val = sss1.split(X, y)
            idx_train = idx_train[0]
            idx_val = idx_val[0]

            monitor = Monitor()
            criterion = HeldOutSmoothedHinge(idx_train, idx_val)
            model = SVM(estimator=None, max_iter=10_000)

            if method == "ground_truth":
                for file in os.listdir("results_svm/"):
                    if file.startswith("hypergradient_svm_%s_%s" %
                                       (dataset_name, method)):
                        return
                clf = LinearSVC(C=np.exp(logC),
                                tol=1e-32,
                                max_iter=10_000,
                                loss='hinge',
                                permute=False)
                algo = Implicit(criterion)
                model.estimator = clf
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   logC,
                                                   algo.compute_beta_grad,
                                                   tol=1e-14,
                                                   monitor=monitor)
            else:
                if method == "sota":
                    clf = LinearSVC(C=np.exp(logC),
                                    loss='hinge',
                                    max_iter=max_iter,
                                    tol=1e-32,
                                    permute=False)
                    model.estimator = clf
                    algo = ImplicitForward(tol_jac=1e-32,
                                           n_iter_jac=max_iter,
                                           use_stop_crit=False)
                elif method == "forward":
                    algo = Forward(use_stop_crit=False)
                elif method == "implicit_forward":
                    algo = ImplicitForward(tol_jac=1e-8,
                                           n_iter_jac=max_iter,
                                           use_stop_crit=False)
                else:
                    raise NotImplementedError
                algo.max_iter = max_iter
                algo.use_stop_crit = False
                val, grad = criterion.get_val_grad(model,
                                                   X,
                                                   y,
                                                   logC,
                                                   algo.compute_beta_grad,
                                                   tol=tol,
                                                   monitor=monitor,
                                                   max_iter=max_iter)

        results = (dataset_name, method, max_iter, val, grad, monitor.times[0])
        df = pandas.DataFrame(results).transpose()
        df.columns = ['dataset', 'method', 'maxit', 'val', 'grad', 'time']
        str_results = "results_svm/hypergradient_svm_%s_%s_%i.pkl" % (
            dataset_name, method, max_iter)
        df.to_pickle(str_results)
Beispiel #3
0
X, y = datasets.make_classification(n_samples=n_samples,
                                    n_features=n_features,
                                    n_informative=50,
                                    random_state=11,
                                    flip_y=0.1,
                                    n_redundant=0)

y[y == 0.0] = -1.0
idx_train = np.arange(0, 50)
idx_val = np.arange(50, 100)

C = 0.01
log_C = np.log(C)
tol = 1e-16

models = [SVM(log_C, max_iter=10000, tol=tol)]


def get_v(mask, dense):
    return 2 * (X[np.ix_(idx_val, mask)].T @ (
        X[np.ix_(idx_val, mask)] @ dense - y[idx_val])) / idx_val.shape[0]


@pytest.mark.parametrize('model', models)
def test_beta_jac(model):
    supp1, dense1, jac1 = get_beta_jac_iterdiff(X[idx_train, :],
                                                y[idx_train],
                                                log_C,
                                                tol=tol,
                                                model=model,
                                                compute_jac=True,
Beispiel #4
0
dict_log_alpha = {}
dict_log_alpha["lasso"] = log_alpha
dict_log_alpha["enet"] = np.array([log_alpha1, log_alpha2])
tab = np.linspace(1, 1000, n_features)
dict_log_alpha["wLasso"] = log_alpha + np.log(tab / tab.max())
dict_log_alpha["logreg"] = (log_alpha - np.log(2))
dict_log_alpha["svm"] = 1e-4
dict_log_alpha["svr"] = np.array([1e-2, 1e-2])
# Set models to be tested
models = {}
models["lasso"] = Lasso(estimator=None)
models["enet"] = ElasticNet(estimator=None)
models["wLasso"] = WeightedLasso(estimator=None)
models["logreg"] = SparseLogreg(estimator=None)
models["svm"] = SVM(estimator=None)
models["svr"] = SVR(estimator=None)

custom_models = {}
custom_models["lasso"] = Lasso(estimator=celer.Lasso(
    warm_start=True, fit_intercept=False))
custom_models["enet"] = ElasticNet(
    estimator=linear_model.ElasticNet(warm_start=True, fit_intercept=False))
custom_models["logreg"] = SparseLogreg(
    estimator=celer.LogisticRegression(warm_start=True, fit_intercept=False))

# Compute "ground truth" with cvxpylayer
dict_cvxpy_func = {
    'lasso': lasso_cvxpy,
    'enet': enet_cvxpy,
    'wLasso': weighted_lasso_cvxpy,
Beispiel #5
0
def parallel_function(
        dataset_name, method, tol=1e-5, n_outer=50,
        tolerance_decrease='exponential'):

    # load data
    X_train, X_val, X_test, y_train, y_val, y_test = get_data(dataset_name, csr=True)
    n_samples, n_features = X_train.shape
    print('n_samples', n_samples)
    print('n_features', n_features)
    y_train[y_train == 0.0] = -1.0
    y_val[y_val == 0.0] = -1.0
    y_test[y_test == 0.0] = -1.0

    C_max = 100
    logC = np.log(1e-2)
    n_outer = 5

    if dataset_name == "rcv1":
        size_loop = 1
    else:
        size_loop = 1
    model = SVM(
        X_train, y_train, logC, max_iter=10000, tol=tol)
    for i in range(size_loop):
        monitor = Monitor()

        if method == "implicit_forward":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = ImplicitForward(criterion, tol_jac=1e-3, n_iter_jac=100)
            _, _, _ = grad_search(
                algo=algo, verbose=False,
                log_alpha0=logC, tol=tol,
                n_outer=n_outer, monitor=monitor,
                t_max=dict_t_max[dataset_name],
                tolerance_decrease=tolerance_decrease)

        elif method == "forward":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            _, _, _ = grad_search(
                algo=algo,
                log_alpha0=logC, tol=tol,
                n_outer=n_outer, monitor=monitor,
                t_max=dict_t_max[dataset_name],
                tolerance_decrease=tolerance_decrease)

        elif method == "implicit":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Implicit(criterion)
            _, _, _ = grad_search(
                algo=algo,
                log_alpha0=logC, tol=tol,
                n_outer=n_outer, monitor=monitor,
                t_max=dict_t_max[dataset_name],
                tolerance_decrease=tolerance_decrease)

        elif method == "grid_search":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(1e-2)
            log_alpha_opt, min_g_func = grid_search(
                algo, log_alpha_min, np.log(C_max), monitor, max_evals=25,
                tol=tol, samp="grid")
            print(log_alpha_opt)

        elif method == "random":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(1e-2)
            log_alpha_opt, min_g_func = grid_search(
                algo, log_alpha_min, np.log(C_max), monitor, max_evals=25,
                tol=tol, samp="random")
            print(log_alpha_opt)

        elif method == "lhs":
            criterion = HeldOutSmoothedHinge(X_val, y_val, model, X_test=X_test, y_test=y_test)
            algo = Forward(criterion)
            log_alpha_min = np.log(1e-2)
            log_alpha_opt, min_g_func = grid_search(
                algo, log_alpha_min, np.log(C_max), monitor, max_evals=25,
                tol=tol, samp="lhs")
            print(log_alpha_opt)

    monitor.times = np.array(monitor.times)
    monitor.objs = np.array(monitor.objs)
    monitor.objs_test = np.array(monitor.objs_test)
    monitor.log_alphas = np.array(monitor.log_alphas)
    return (dataset_name, method, tol, n_outer, tolerance_decrease,
            monitor.times, monitor.objs, monitor.objs_test,
            monitor.log_alphas, norm(y_val), norm(y_test))
Beispiel #6
0
                                            n_features=n_features,
                                            n_informative=50,
                                            random_state=12,
                                            flip_y=0.1,
                                            n_redundant=0)
X_val_s = csr_matrix(X_val)

y_train[y_train == 0.0] = -1.0
y_val[y_val == 0.0] = -1.0

C = 0.001
log_C = np.log(C)
tol = 1e-16

models = [
    SVM(X_train, y_train, log_C, max_iter=10000, tol=tol),
    SVM(X_train_s, y_train, log_C, max_iter=10000, tol=tol)
]


def get_v(mask, dense):
    return 2 * (
        X_val[:, mask].T @ (X_val[:, mask] @ dense - y_val)) / X_val.shape[0]


@pytest.mark.parametrize('model', models)
def test_beta_jac(model):
    supp1, dense1, jac1 = get_beta_jac_iterdiff(X_train,
                                                y_train,
                                                log_C,
                                                tol=tol,