Beispiel #1
0
def check_log_reg(oracle_type, sparse=False):
    # Simple data:
    A = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
    if sparse: A = scipy.sparse.csr_matrix(A)
    b = np.array([1, 1, -1, 1])
    reg_coef = 0.5

    # Logistic regression oracle:
    logreg = oracles.create_log_reg_oracle(A,
                                           b,
                                           reg_coef,
                                           oracle_type=oracle_type)

    # Check at point x = [0, 0]
    x = np.zeros(2)
    assert_almost_equal(logreg.func(x), 0.693147180)
    ok_(np.allclose(logreg.grad(x), [0, -0.25]))
    ok_(np.allclose(logreg.hess(x), [[0.625, 0.0625], [0.0625, 0.625]]))
    ok_(isinstance(logreg.grad(x), np.ndarray))
    ok_(isinstance(logreg.hess(x), np.ndarray))

    # Check func_direction and grad_direction oracles at
    # x = [0, 0], d = [1, 1], alpha = 0.5 and 1.0
    x = np.zeros(2)
    d = np.ones(2)
    assert_almost_equal(logreg.func_directional(x, d, alpha=0.5),
                        0.7386407091095)
    assert_almost_equal(logreg.grad_directional(x, d, alpha=0.5),
                        0.4267589549159)
    assert_almost_equal(logreg.func_directional(x, d, alpha=1.0),
                        1.1116496416598)
    assert_almost_equal(logreg.grad_directional(x, d, alpha=1.0),
                        1.0559278283039)
Beispiel #2
0
def second_experiment():
    data_path = 'data/gisette_scale'
    result_path = lambda x: 'experiment_2/grad_norm-vs-{}'.format(x)
    A, b = load_svmlight_file(data_path)
    m, n = A.shape
    oracle = oracles.create_log_reg_oracle(A, b, 1 / m)
    results = []
    ls = [0, 1, 5, 10, 50, 100]
    for l in ls:
        _, _, history = optimization.lbfgs(oracle,
                                           np.zeros(n),
                                           memory_size=l,
                                           trace=True)
        print('lbfgs with l = {} finished'.format(l))
        grad_norm = np.array(history['grad_norm'])
        grad_norm /= grad_norm[0]
        grad_norm = np.power(grad_norm, 2)
        grad_norm = np.log(grad_norm)
        results.append((l, grad_norm, history['time']))

    def plotting(flag):
        plt.figure(figsize=(12, 8))
        for l, grad_norm, times in results:
            x = list(range(len(grad_norm))) if flag == 'iterations' else times
            plt.plot(x, grad_norm, label='history size = {}'.format(l))
        plt.xlabel('iterations' if flag == 'iterations' else 'seconds')
        plt.ylabel(r'$\log\left(grad\_norm\right)$')
        plt.legend()
        plt.grid()
        plt.savefig(result_path(flag))

    plotting('seconds')
    plotting('iterations')
Beispiel #3
0
def plot_results(dataset):
    """
    Plots function values dependency and squared norm of gradient on time for
    gradient descent and newton optimization methods
    :param dataset: One of 'w8a', 'gissete', 'real-sim'
    :return:
    """
    available_datasets = ['w8a', 'gissete', 'real-sim']
    if dataset not in available_datasets:
        raise ValueError(
            "Dataset {0} currently is not supported. Available datasets are: {1}"
            .format(dataset, ' '.join(available_datasets)))

    A, b = load_svmlight_file('./data/{}'.format(dataset))
    oracle = oracles.create_log_reg_oracle(A, b, 1 / len(b))
    x_init = np.zeros(A.shape[1])

    [_, _, history_grad] = optimization.gradient_descent(oracle,
                                                         x_init,
                                                         line_search_options={
                                                             'method': 'Wolfe',
                                                             'c': 1
                                                         },
                                                         trace=True)
    [_, _, history_newton] = optimization.newton(oracle,
                                                 x_init,
                                                 line_search_options={
                                                     'method': 'Wolfe',
                                                     'c': 1
                                                 },
                                                 trace=True)
    plot_function_values_on_time(dataset, history_grad, history_newton)
    plot_grad_norm_values_on_time(dataset, history_grad, history_newton)
Beispiel #4
0
def experiment_3():
    np.random.seed(31415)
    data_path = "data"
    datasets = ["w8a", "gisette_scale", "real-sim", "news20", "rcv1_train"]
    for dataset in datasets:

        print("___________________________")
        logging.info(f"{dataset} is in process...")

        A, b = load_svmlight_file(os.path.join(data_path, dataset))
        print(A.shape, 1 - A.size / (A.shape[0] * A.shape[1]))

        m = b.size
        oracle = create_log_reg_oracle(A, b, 1.0 / m, "optimized")
        x_0 = np.zeros((A.shape[1],))
        x_opt1, message, history1 = gradient_descent(oracle, x_0, trace=True)
        logging.info("GD ended")
        x_opt2, message, history2 = hessian_free_newton(oracle, x_0, trace=True)
        logging.info("HFN ended")
        x_opt3, message, history3 = lbfgs(oracle, x_0, trace=True)
        logging.info("L-BFGS ended")

        os.makedirs("report/pics/3", exist_ok=True)

        plt.figure()
        plt.plot(history1['func'], label='GD')
        plt.plot(history2['func'], label='HFN')
        plt.plot(history3['func'], label='L-BFGS')

        print(f"GD iterations={len(history1['func'])}, time={history1['time'][-1]}")
        print(f"HFN iterations={len(history2['func'])}, time={history2['time'][-1]}")
        print(f"L-BFGS iterations={len(history3['func'])}, time={history3['time'][-1]}")

        plt.xlabel('Номер итерации')
        plt.ylabel('Значение функции потерь')
        plt.legend()
        plt.grid()
        plt.savefig(f"report/pics/3/logreg_loss_value_vs_iter_{dataset}.pdf", bbox_inches='tight')

        plt.figure()
        plt.plot(history1['time'], history1['func'], label='GD')
        plt.plot(history2['time'], history2['func'], label='HFN')
        plt.plot(history3['time'], history3['func'], label='L-BFGS')
        plt.xlabel('Время от начала эксперимента в секундах')
        plt.ylabel('Значение функции потерь')
        plt.legend()
        plt.grid()
        plt.savefig(f"report/pics/3/logreg_loss_value_vs_time_{dataset}.pdf", bbox_inches='tight')

        plt.figure()
        plt.plot(history1['time'], (history1['grad_norm'] / history1['grad_norm'][0]) ** 2, label='GD')
        plt.plot(history2['time'], (history2['grad_norm'] / history2['grad_norm'][0]) ** 2, label='HFN')
        plt.plot(history3['time'], (history3['grad_norm'] / history3['grad_norm'][0]) ** 2, label='L-BFGS')
        plt.yscale('log')
        plt.xlabel('Время от начала эксперимента в секундах')
        plt.ylabel('Относительный квадрат нормы градиента')
        plt.legend()
        plt.grid()
        plt.savefig(f"report/pics/3/logreg_grad_norm_vs_time_{dataset}.pdf", bbox_inches='tight')
Beispiel #5
0
def experiment_3():
    np.random.seed(31415)
    m, n = 10000, 8000
    A = np.random.randn(m, n)
    b = np.sign(np.random.randn(m))
    regcoef = 1 / m

    oracle1 = create_log_reg_oracle(A, b, regcoef, oracle_type='usual')
    oracle2 = create_log_reg_oracle(A, b, regcoef, oracle_type='optimized')

    x_0 = np.zeros((n, ))
    x_opt1, message, history1 = gradient_descent(oracle1, x_0, trace=True)
    x_opt2, message, history2 = gradient_descent(oracle2, x_0, trace=True)
    print(x_opt1, x_opt2)

    plt.figure()
    plt.plot(history1['func'], label='Usual')
    plt.plot(history2['func'], label='Optimized')
    plt.xlabel('Номер итерации')
    plt.ylabel('Значение функции потерь')
    plt.legend()
    plt.grid()
    plt.savefig("pics/logreg_values")

    plt.figure()
    plt.plot(history1['time'], history1['func'], label='Usual')
    plt.plot(history2['time'], history2['func'], label='Optimized')
    plt.xlabel('Время от начала эксперимента в секундах')
    plt.ylabel('Значение функции потерь')
    plt.legend()
    plt.grid()
    plt.savefig("pics/logreg_loss_value_vs_time")

    plt.figure()
    plt.plot(history1['time'],
             2 * np.log((history1['grad_norm'] / history1['grad_norm'][0])),
             label='Usual')
    plt.plot(history2['time'],
             2 * np.log((history2['grad_norm'] / history2['grad_norm'][0])),
             label='Optimized')
    plt.xlabel('Время от начала эксперимента в секундах')
    plt.ylabel('Логарифм относительного квадрата нормы градиента')
    plt.legend()
    plt.grid()
    plt.savefig("pics/logreg_grad_norm_vs_time")
Beispiel #6
0
def check_lyoha():
    B = np.array(range(1, 10)).reshape(3, 3)
    #         A = [email protected]
    A = B.dot(B.T)
    b = np.array(range(1, 4))
    oracle = create_log_reg_oracle(A, b, 1.0, oracle_type='optimized')

    x0 = np.zeros(3)
    x_expected = np.array([0.01081755, 0.02428744, 0.03775733])
    hist_expected = {'func': [0.69314718055994518, 0.060072133470449901, 0.020431219493905504],
                     'time': [0.0] * 3,
                     'grad_norm': [176.70314088889307, 3.295883082719103, 1.101366262174557]}

    x_star, msg, history = hessian_free_newton(oracle, x0, trace=True,
                                               line_search_options={'method': 'Wolfe'}, tolerance=1e-4)

    oracle.hess(np.zeros(3))
    oracle.hess(np.array([0.01952667, 0.04648169, 0.07343672]))
    pass
def main():
    danger = []
    for _ in range(10):
        A = np.random.uniform(0, 1000, (5, 5))
        b = np.random.uniform(0, 1000, 5)
        regcoef = np.random.uniform(0, 100, 1)
        oracle = oracles.create_log_reg_oracle(A, b, regcoef)
        diffs = []
        for i in range(100):
            x = np.random.uniform(0, 100, 5)
            v = np.random.uniform(0, 100, 5)
            hess_vec_finite = oracles.hess_vec_finite_diff(oracle.func, x, v)
            hess_vec_oracle = oracle.hess_vec(x, v)
            diff = np.abs(hess_vec_finite - hess_vec_oracle)
            if max(diff) > 1:
                danger.append((A, b, regcoef, x, v))
            diffs.append(max(diff))
        print(max(diffs))
    print(len(danger))
Beispiel #8
0
def main():
    A = np.random.uniform(0, 10, (5, 5))
    b = np.random.uniform(0, 10, 5)
    regcoef = np.random.uniform(0, 10, 1)
    oracle = oracles.create_log_reg_oracle(A, b, regcoef)
    print(A)
    print(b)
    print(regcoef)
    for i in range(10):
        x = np.random.uniform(0, 10, 5)
        grad_oracle = oracle.grad(x)
        hess_oracle = oracle.hess(x)
        grad_finite = oracles.grad_finite_diff(oracle.func, x)
        hess_finite = oracles.hess_finite_diff(oracle.func, x)
        diff_grad = np.abs(grad_finite - grad_oracle)
        diff_hess = np.abs(hess_finite - hess_oracle)
        #print(i)
        #print(grad_oracle)
        #print(grad_finite)
        print(np.max(diff_hess))
Beispiel #9
0
def check_hess_vec():
    m, n = 1000, 500
    A = np.random.randn(m, n)
    b = np.sign(np.random.randn(m))
    regcoef = 1 / m

    x = np.random.randn(n)
    v = np.random.randn(n)

    logreg_oracle = create_log_reg_oracle(A, b, regcoef, oracle_type='optimized')

    v1 = logreg_oracle.hess_vec(x, v)
    v2 = hess_vec_finite_diff(logreg_oracle.func, x, v, eps=1e-6)
    res = np.allclose(v1, v2, atol=1e-2, rtol=1e-1)
    print(v1[:10])
    print(v2[:10])
    if res:
        print("Logreg hess_vec is OK!")
    else:
        print("Something wrong.")
    return res
Beispiel #10
0
def experiment_4():
    path = 'data'
    np.random.seed(31415)
    datasets = ["w8a", "gisette_scale", "real-sim"]
    for dataset in datasets:
        A, b = load_svmlight_file(path + '/' + dataset)
        m = b.size
        oracle = create_log_reg_oracle(A, b, 1.0 / m, "optimized")
        x_0 = np.zeros((A.shape[1], ))
        #        x_opt1, message, history1 = gradient_descent(oracle, x_0, trace=True)
        if dataset != 'real-sim':
            x_opt2, message, history2 = newton(oracle, x_0, trace=True)
        print(len(history2['time']), history2['time'][-1])
        continue

        plt.figure()
        plt.plot(history1['time'], history1['func'], label='GD')
        if dataset != 'real-sim':
            plt.plot(history2['time'], history2['func'], label='Newton')
        plt.xlabel('Время от начала эксперимента в секундах')
        plt.ylabel('Значение функции потерь')
        plt.legend()
        plt.grid()
        plt.savefig("pics/logreg_loss_value_vs_time_" + dataset)

        plt.figure()
        plt.plot(history1['time'],
                 2 * np.log(
                     (history1['grad_norm'] / history1['grad_norm'][0])),
                 label='GD')
        if dataset != 'real-sim':
            plt.plot(history2['time'],
                     2 * np.log(
                         (history2['grad_norm'] / history2['grad_norm'][0])),
                     label='Newton')
        plt.xlabel('Время от начала эксперимента в секундах')
        plt.ylabel('Логарифм относительного квадрата нормы градиента')
        plt.legend()
        plt.grid()
        plt.savefig("pics/logreg_grad_norm_vs_time_" + dataset)
Beispiel #11
0
def experiment_2():
    np.random.seed(31415)

    A, b = load_svmlight_file('data/gisette_scale')
    m = b.size
    oracle = create_log_reg_oracle(A, b, 1.0 / m, "optimized")
    x_0 = np.zeros((A.shape[1],))

    fig1, ax1 = plt.subplots()
    fig2, ax2 = plt.subplots()

    ax1.set_yscale('log')
    ax1.set_xlabel('Номер итерации')
    ax1.set_ylabel('Относительный квадрат нормы градиента')
    ax1.grid()

    ax2.set_yscale('log')
    ax2.set_xlabel('Время от начала эксперимента')
    ax2.set_ylabel('Относительный квадрат нормы градиента')
    ax2.grid()

    for memory_size in tqdm([0, 1, 5, 10, 50, 100]):
        x_opt, message, history = lbfgs(oracle, x_0, trace=True, memory_size=memory_size)
        relative_grad_norms = (history['grad_norm'] / history['grad_norm'][0]) ** 2
        iter_times = history['time']
        iters = range(len(history['time']))
        if len(relative_grad_norms) > 400:
            relative_grad_norms = [elem for (i, elem) in enumerate(relative_grad_norms) if i % 2 == 0]
            iter_times = [elem for (i, elem) in enumerate(iter_times) if i % 2 == 0]
            iters = range(0, len(history['time']), 2)
        ax1.plot(iters, relative_grad_norms, label=f"l={memory_size}")
        ax2.plot(iter_times, relative_grad_norms, label=f"l={memory_size}")
        print(f"При l={memory_size} до сходимости потребовалось "
              f"{len(history['time'])} итераций и {history['time'][-1]} секунд")
    ax1.legend()
    ax2.legend()

    os.makedirs("report/pics/2", exist_ok=True)
    fig1.savefig("report/pics/2/lbfgs_grad_norm_vs_iter.pdf", bbox_inches='tight')
    fig2.savefig("report/pics/2/lbfgs_grad_norm_vs_time.pdf", bbox_inches='tight')
Beispiel #12
0
def third_experiment():
    data_path = 'experiment_3/datasets/'
    result_path = 'experiment_3/'
    names = ['w8a', 'gisette_scale', 'real-sim']

    def plotting(history_gd, history_nm, param):
        f_gd = np.array(history_gd[param])
        f_nm = np.array(history_nm[param])
        time_gd = list(map(lambda i: i.total_seconds(), history_gd['time']))
        time_nm = list(map(lambda i: i.total_seconds(), history_nm['time']))
        if param == 'grad_norm':
            f_gd = np.log(f_gd / f_gd[0])
            f_nm = np.log(f_nm / f_nm[0])
        plt.figure()
        plt.plot(time_gd, f_gd, label='GD')
        plt.plot(time_nm, f_nm, label='Newton')
        plt.xlabel('seconds')
        ylabel = 'func' if param == 'func' else r'$\log\left(grad\_norm\right)$'
        plt.ylabel(ylabel)
        plt.legend()
        plt.grid()
        plt.savefig(result_path + name + '-' + param)

    for name in names:
        A, b = load_svmlight_file(data_path + name)
        m, n = A.shape
        oracle = oracles.create_log_reg_oracle(A, b, 1 / m)
        if name != 'real-sim':
            print('begin')
            x_star_nm, _, history_nm = optimization.newton(oracle,
                                                           np.zeros(n),
                                                           trace=True)
            print('Newton is finished')
        x_star_gd, _, history_gd = optimization.gradient_descent(oracle,
                                                                 np.zeros(n),
                                                                 trace=True)
        print('GD is finished')
        plotting(history_gd, history_nm, 'func')
        plotting(history_gd, history_nm, 'grad_norm')
Beispiel #13
0
def third_experiment():
    data_path = lambda name: 'data/{}'.format(name)
    datasets = [
        'gisette_scale', 'news20.binary', 'rcv1_train.binary', 'real-sim',
        'w8a'
    ]

    algorithms = [
        optimization.hessian_free_newton, optimization.lbfgs,
        optimization.gradient_descent
    ]

    def plotting(hfn_history, lbfgs_history, gd_history, dataset):
        figname = lambda data, x, y: 'experiment_3/{}_{}-vs-{}.png'.format(
            data, y, x)

        def get_x(history, form):
            if form == 'iterations':
                return list(range(len(history['time'])))
            if form == 'time':
                return history['time']

        def get_y(history, form):
            if form == 'func':
                return history['func']
            if form == 'grad':
                grad_norm = np.array(history['grad_norm'])
                grad_norm /= grad_norm[0]
                grad_norm = np.power(grad_norm, 2)
                grad_norm = np.log(grad_norm)
                return grad_norm

        histories = [hfn_history, lbfgs_history, gd_history]
        names = ['HFN', 'L-BFGS', 'GD']
        colors = ['b', 'r', 'g']
        for x_form in ['iterations', 'time']:
            for y_form in ['func', 'grad']:
                if (x_form, y_form) == ('iterations', 'grad'):
                    continue
                plt.figure()
                for history, name, color in zip(histories, names, colors):
                    plt.plot(get_x(history, x_form),
                             get_y(history, y_form),
                             label=name,
                             color=color)
                plt.title(dataset)
                plt.xlabel(x_form)
                plt.ylabel(y_form if y_form ==
                           'func' else r'$\log\left(grad\_norm\right)$')
                plt.grid()
                plt.legend()
                to_save = figname(dataset, x_form, y_form)
                plt.savefig(to_save)

    for dataset in datasets:
        print(dataset)
        A, b = load_svmlight_file(data_path(dataset))
        m, n = A.shape
        oracle = oracles.create_log_reg_oracle(A, b, 1 / m)
        histories = []
        for i, algorithm in enumerate(algorithms):
            _, _, history = algorithm(oracle, np.zeros(n), trace=True)
            print('{} algo finished'.format(i))
            histories.append(history)
        plotting(*histories, dataset)
Beispiel #14
0
matplotlib.use('Agg')
import matplotlib.pyplot as plt

import plot_trajectory_2d

from nose.tools import assert_almost_equal, ok_, eq_

import numpy as np
import numpy.linalg as li
import scipy.sparse as sp

# ----------------------- Checking grad and hess

A = np.random.rand(10, 4)
b = np.random.rand(10)
tr = oracles.create_log_reg_oracle(A, b, 1)
for i in range(5):
    x = np.random.rand(4)
    ok_(
        np.allclose(tr.grad(x),
                    oracles.grad_finite_diff(tr.func, x),
                    rtol=1e-1,
                    atol=1e-1))

# ---------------------- Experiment 1

for d in ['exp1', 'exp2', 'exp3', 'exp4']:
    if not os.path.exists(d):
        os.makedirs(d)

exps = [{
def run_experiment(dataset_filename, name, max_iters):    
    print('Experiment: \t %s, \t file: %s, \t max_iters = %d.' % 
          (name, dataset_filename, max_iters))

    X, y = load_svmlight_file(dataset_filename)
    oracle = create_log_reg_oracle(X, y, 1 / X.shape[0])
    x_0 = np.zeros(X.shape[1])

    print('Minimize by scipy ... ', flush=True, end='')
    f_star = \
        scipy.optimize.minimize(oracle.func, x_0, jac=oracle.grad, tol=1e-9).fun
    print('f_star = %g.' % f_star)

    H_0 = 1.0
    line_search = True
    tolerance = get_tolerance({'criterion': 'func', 
                               'f_star': f_star,
                               'tolerance': 1e-8})
    subsolver = 'FGM'
    stopping_criterion_subproblem = 'grad_uniform_convex'

    constant_strategies = get_constant_strategies()
    power_strategies = get_power_strategies()
    adaptive_strategy = get_tolerance_strategy({'strategy': 'adaptive',
                                                'c': 1.0,
                                                'alpha': 1,
                                                'label': 'adaptive'})

    strategies_1 = constant_strategies + [adaptive_strategy]
    strategies_2 = power_strategies + [adaptive_strategy]

    method = lambda strategy: cubic_newton(oracle, x_0, tolerance,
                                           max_iters=max_iters,
                                           H_0=H_0,
                                           line_search=line_search,
                                           inner_tolerance_strategy=strategy,
                                           subsolver=subsolver,
                                           trace=True,
                                           B=None,
                                           Binv=None,
                                           stopping_criterion_subproblem=
                                           stopping_criterion_subproblem)

    labels_1 = get_labels(strategies_1)
    histories_1 = run_method(method, strategies_1, labels_1)
    filename = os.getcwd() + '/plots/logreg_%s_time' % (name)
    plot_func_residual(histories_1, 'time', f_star, labels_1, 
                       ['grey', 'grey', 'grey', 'grey', 'red'], 
                       ['-', '--', '-.', ':', '-'], 
                       [5, 4, 3, 4, 2], 
                       [0.8, 0.8, 0.8, 0.8, 1], 
                       'Log-reg: %s' % name, 
                       'Time, s', 
                       filename=filename+'_const.pdf')
    labels_2 = get_labels(strategies_2)
    histories_2 = run_method(method, strategies_2, labels_2)
    plot_func_residual(histories_2, 'time', f_star, labels_2, 
                       ['blue', 'blue', 'blue', 'blue', 'red'], 
                       ['-', '--', '-.', ':', '-'], 
                       [5, 4, 3, 2, 2], 
                       [0.6, 0.6, 0.6, 0.6, 1], 
                       'Log-reg: %s' % name,
                       'Time, s', 
                       filename=filename+'_powers.pdf')
Beispiel #16
0
def experiment_5_and_6(algo='gd'):
    np.random.seed(31415)
    m, n = 2000, 1000
    A = np.random.randn(m, n)
    b = np.sign(np.random.randn(m))
    regcoef = 1 / m

    logreg_oracle = create_log_reg_oracle(A,
                                          b,
                                          regcoef,
                                          oracle_type='optimized')

    line_search_options = [
        {
            'method': 'Constant',
            'c': 1.0
        },
        {
            'method': 'Constant',
            'c': 0.95
        },
        {
            'method': 'Constant',
            'c': 0.9
        },
        {
            'method': 'Constant',
            'c': 0.85
        },
        {
            'method': 'Armijo',
            'c1': 1e-8
        },
        {
            'method': 'Armijo',
            'c1': 1e-6
        },
        {
            'method': 'Armijo',
            'c1': 1e-4
        },
        {
            'method': 'Armijo',
            'c1': 1e-1
        },
        {
            'method': 'Wolfe',
            'c2': 1.5
        },
        {
            'method': 'Wolfe',
            'c2': 0.9
        },
        {
            'method': 'Wolfe',
            'c2': 0.1
        },
        {
            'method': 'Wolfe',
            'c2': 0.01
        },
    ]

    colors = ['#e66101', '#fdb863', '#b2abd2', '#5e3c99']
    styles = {
        'Constant': {
            'linestyle': '--',
            'dashes': (2, 5),
            'linewidth': 2
        },
        'Armijo': {
            'linestyle': '--',
            'dashes': (5, 2)
        },
        'Wolfe': {
            'linestyle': 'solid'
        },
    }

    x_0_list = [None] * 3
    x_0_list[0] = np.zeros((n, ))
    x_0_list[1] = np.random.uniform(-1, 1, (n, ))
    x_0_list[2] = np.ones((n, ))

    for k, x_0 in enumerate(x_0_list):
        plt.figure(figsize=(12, 9))
        for i, options in tqdm(enumerate(line_search_options)):
            if algo == 'GD':
                x_opt, message, history = gradient_descent(
                    logreg_oracle,
                    x_0,
                    trace=True,
                    line_search_options=options)
            else:
                x_opt, message, history = newton(logreg_oracle,
                                                 x_0,
                                                 trace=True,
                                                 line_search_options=options)
            args = list(options.keys()) + list(options.values())
            label = "{} ({}={}, {}={})".format(algo, args[0], args[2], args[1],
                                               args[3])
            values = 2 * np.log(
                (history['grad_norm'] / history['grad_norm'][0]))
            method = args[2]
            plt.plot(values + np.random.randn(values.size) * 0.05,
                     color=colors[i % len(colors)],
                     label=label,
                     alpha=0.7,
                     **styles[method])
        plt.xlabel('Номер итерации')
        plt.ylabel('Логарифм относительного квадрата нормы градиента')
        plt.legend(loc='upper right')
        plt.grid()

        Path("pics/logreg_{}_linear_search_strategies".format(algo)).mkdir(
            parents=True, exist_ok=True)
        plt.savefig(
            "pics/logreg_{}_linear_search_strategies/x_0_{}.png".format(
                algo, k))

    np.random.seed(31415)
    n = 2000
    C = ortho_group.rvs(n)
    A = C.T @ np.diag(np.random.uniform(1, 20, (n, ))) @ C
    b = np.random.randn(n)
    x_0 = np.zeros((n, ))

    quadratic_oracle = QuadraticOracle(A, b)
    x_opt = np.linalg.solve(A, b)
    f_opt = quadratic_oracle.func(x_opt)

    line_search_options = [
        {
            'method': 'Constant',
            'c': 0.09
        },
        {
            'method': 'Constant',
            'c': 0.085
        },
        {
            'method': 'Constant',
            'c': 0.08
        },
        {
            'method': 'Constant',
            'c': 0.075
        },
        {
            'method': 'Armijo',
            'c1': 1e-10
        },
        {
            'method': 'Armijo',
            'c1': 1e-7
        },
        {
            'method': 'Armijo',
            'c1': 1e-4
        },
        {
            'method': 'Armijo',
            'c1': 1e-1
        },
        {
            'method': 'Wolfe',
            'c2': 1.5
        },
        {
            'method': 'Wolfe',
            'c2': 0.9
        },
        {
            'method': 'Wolfe',
            'c2': 0.1
        },
        {
            'method': 'Wolfe',
            'c2': 0.01
        },
    ]

    x_0_list = [None] * 3
    x_0_list[0] = np.zeros((n, ))
    x_0_list[1] = np.random.uniform(-1, 1, (n, ))
    x_0_list[2] = x_opt + np.random.randn(n, ) * 0.2

    for k, x_0 in enumerate(x_0_list):
        plt.figure(figsize=(12, 9))
        for i, options in tqdm(enumerate(line_search_options)):
            if algo == 'GD':
                x_opt, message, history = gradient_descent(
                    quadratic_oracle,
                    x_0,
                    trace=True,
                    line_search_options=options)
            else:
                x_opt, message, history = newton(quadratic_oracle,
                                                 x_0,
                                                 trace=True,
                                                 line_search_options=options)
            args = list(options.keys()) + list(options.values())
            label = "{} ({}={}, {}={})".format(algo, args[0], args[2], args[1],
                                               args[3])
            values = np.log(np.abs((history['func'] - f_opt) / f_opt) + 1e-16)
            method = args[2]
            plt.plot(values + np.random.randn(values.size) * 0.05,
                     color=colors[i % len(colors)],
                     label=label,
                     alpha=0.7,
                     **styles[method])
        plt.xlabel('Номер итерации')
        plt.ylabel('Логарифм относительной невязки')
        plt.legend(loc='upper right')
        plt.grid()
        Path("pics/quadratic_{}_linear_search_strategies".format(algo)).mkdir(
            parents=True, exist_ok=True)
        plt.savefig(
            "pics/quadratic_{}_linear_search_strategies/x_0_{}.png".format(
                algo, k))
Beispiel #17
0
def generate_log_reg_oracle(N, D, regcoef, seed=42):
    np.random.seed(seed)
    A = np.random.randn(N, D)
    w = np.random.randn(D)
    b = np.sign(A.dot(w) + np.random.randn(N))
    return create_log_reg_oracle(A, b, regcoef)
def run_experiment(dataset_filename, name, max_iters):
    print('Experiment: \t %s, \t file: %s, \t max_iters = %d.' %
          (name, dataset_filename, max_iters))

    X, y = load_svmlight_file(dataset_filename)
    oracle = create_log_reg_oracle(X, y, 1 / X.shape[0])
    x_0 = np.zeros(X.shape[1])

    print('Minimize by scipy ... ', flush=True, end='')
    f_star = \
        scipy.optimize.minimize(oracle.func, x_0, jac=oracle.grad, tol=1e-9).fun
    print('f_star = %g.' % f_star)

    H_0 = 1.0
    line_search = True
    tolerance = get_tolerance({
        'criterion': 'func',
        'f_star': f_star,
        'tolerance': 1e-8
    })
    subsolver = 'FGM'
    stopping_criterion_subproblem = 'func'

    constant_strategies = get_constant_strategies()
    power_strategies = get_power_strategies()
    adaptive_strategy = get_tolerance_strategy({
        'strategy': 'adaptive',
        'c': 1.0,
        'alpha': 1,
        'label': 'adaptive'
    })
    adaptive_15_strategy = get_tolerance_strategy({
        'strategy': 'adaptive',
        'c': 1.0,
        'alpha': 1.5,
        'label': r'adaptive $1.5$'
    })
    adaptive_2_strategy = get_tolerance_strategy({
        'strategy': 'adaptive',
        'c': 1.0,
        'alpha': 2,
        'label': r'adaptive $2$'
    })

    strategies_1 = constant_strategies
    strategies_2 = power_strategies + [constant_strategies[-1]]
    strategies_3 = [
        adaptive_strategy, adaptive_15_strategy, adaptive_2_strategy,
        constant_strategies[-1]
    ]

    method = lambda strategy: cubic_newton(oracle,
                                           x_0,
                                           tolerance,
                                           max_iters=max_iters,
                                           H_0=H_0,
                                           line_search=line_search,
                                           inner_tolerance_strategy=strategy,
                                           subsolver=subsolver,
                                           trace=True,
                                           B=None,
                                           Binv=None,
                                           stopping_criterion_subproblem=
                                           stopping_criterion_subproblem)

    filename = os.getcwd() + '/plots/exact_logreg_%s' % (name)

    labels_1 = get_labels(strategies_1)
    histories_1 = run_method(method, strategies_1, labels_1)
    plot_func_residual_iter(histories_1,
                            'hess_vec_calls',
                            f_star,
                            labels_1, ['grey', 'grey', 'grey', 'grey'],
                            ['-', '--', '-.', ':'], [5, 4, 3, 4], [1, 1, 1, 1],
                            r'Log-reg, %s: constant strategies' % name,
                            'Hessian-vector products',
                            filename=filename + '_const.pdf')
    labels_2 = get_labels(strategies_2)
    histories_2 = run_method(method, strategies_2, labels_2)
    plot_func_residual_iter(histories_2,
                            'hess_vec_calls',
                            f_star,
                            labels_2, ['blue', 'blue', 'blue', 'blue', 'gray'],
                            ['-', '--', '-.', ':', ':'], [5, 4, 3, 2, 4],
                            [0.6, 0.6, 0.6, 0.6, 0.8],
                            r'Log-reg, %s: dynamic strategies' % name,
                            'Hessian-vector products',
                            filename=filename + '_power.pdf')

    labels_3 = get_labels(strategies_3)
    histories_3 = run_method(method, strategies_3, labels_3)
    plot_func_residual_iter(histories_3,
                            'hess_vec_calls',
                            f_star,
                            labels_3,
                            ['red', 'tab:orange', 'tab:orange', 'gray'],
                            ['-', '--', '-.', ':'], [2, 4, 2, 4],
                            [1, 1, 1, 0.8],
                            r'Log-reg, %s: adaptive strategies' % name,
                            'Hessian-vector products',
                            filename=filename + '_adaptive.pdf')
Beispiel #19
0
    for i, c2 in enumerate(c2_values):
        [_, _, history] = optimization.gradient_descent(oracle,
                                                        x_init,
                                                        line_search_options={
                                                            'method': 'Wolfe',
                                                            'c2': c2
                                                        },
                                                        trace=True)
        plot_grad_norm_vs_time(history, colors[i], 'Wolfe, c2={}'.format(c2))


if __name__ == '__main__':
    ### Logistic regression
    A, b = generate_data()
    oracle = oracles.create_log_reg_oracle(A,
                                           b,
                                           1 / len(b),
                                           oracle_type='optimized')
    x_init = np.zeros(A.shape[1])

    c_values = [0.001, 0.01, 0.1]
    colors_constant = ['lime', 'green', 'darkgreen']
    analyze_constant(oracle, x_init, c_values, colors_constant)

    c1_values = [0.1, 0.25, 0.4]
    colors_armijo = ['red', 'darkred', 'lightcoral']
    analyze_armijo(oracle, x_init, c1_values, colors_armijo)

    c2_values = [0.6, 0.75, 0.9]
    colors_wolfe = ['blue', 'midnightblue', 'cornflowerblue']
    analyze_wolfe(oracle, x_init, c2_values, colors_wolfe)
    plt.legend()