Ejemplo n.º 1
0
def test_newton_fail():
    # f(x) = integral_{-infty}^x arctan(t) dt
    class Oracle(oracles.BaseSmoothOracle):
        def func(self, x):
            return x * np.arctan(x) - 0.5 * np.log(np.power(x, 2) + 1)

        def grad(self, x):
            return np.arctan(x)

        def hess(self, x):
            return np.array([1 / (np.power(x, 2) + 1)])

    x0 = np.array([10.0])
    warnings.filterwarnings("ignore")
    [x_star, msg, history] = optimization.newton(Oracle(),
                                                 x0,
                                                 display=False,
                                                 trace=False,
                                                 line_search_options={
                                                     'method': 'Constant',
                                                     'c': 1
                                                 })
    warnings.filterwarnings("default")
    eq_(msg, 'computational_error')
    eq_(history, None)
Ejemplo n.º 2
0
def test_newton_1d():
    oracle = get_1d(0.5)
    x0 = np.array([1.0])
    FUNC = [
        np.array([2.14872127]),
        np.array([0.9068072]),
        np.array([0.89869455]),
        np.array([0.89869434])
    ]
    GRAD_NORM = [
        1.8243606353500641, 0.14023069594489929, 0.00070465169721295462,
        1.7464279966628027e-08
    ]
    TIME = [0] * 4  # Dummy values.
    X = [
        np.array([1.]),
        np.array([-0.29187513]),
        np.array([-0.40719141]),
        np.array([-0.40777669])
    ]
    TRUE_HISTORY = {'func': FUNC, 'grad_norm': GRAD_NORM, 'time': TIME, 'x': X}
    # Constant step size.
    [x_star, msg, history] = optimization.newton(oracle,
                                                 x0,
                                                 max_iter=5,
                                                 tolerance=1e-10,
                                                 trace=True,
                                                 line_search_options={
                                                     'method': 'Constant',
                                                     'c': 1.0
                                                 })
    ok_(np.allclose(x_star, [-0.4077777], atol=1e-4))
    eq_(msg, 'success')
    check_equal_histories(history, TRUE_HISTORY)
Ejemplo n.º 3
0
def plot_results(dataset):
    """
    Plots function values dependency and squared norm of gradient on time for
    gradient descent and newton optimization methods
    :param dataset: One of 'w8a', 'gissete', 'real-sim'
    :return:
    """
    available_datasets = ['w8a', 'gissete', 'real-sim']
    if dataset not in available_datasets:
        raise ValueError(
            "Dataset {0} currently is not supported. Available datasets are: {1}"
            .format(dataset, ' '.join(available_datasets)))

    A, b = load_svmlight_file('./data/{}'.format(dataset))
    oracle = oracles.create_log_reg_oracle(A, b, 1 / len(b))
    x_init = np.zeros(A.shape[1])

    [_, _, history_grad] = optimization.gradient_descent(oracle,
                                                         x_init,
                                                         line_search_options={
                                                             'method': 'Wolfe',
                                                             'c': 1
                                                         },
                                                         trace=True)
    [_, _, history_newton] = optimization.newton(oracle,
                                                 x_init,
                                                 line_search_options={
                                                     'method': 'Wolfe',
                                                     'c': 1
                                                 },
                                                 trace=True)
    plot_function_values_on_time(dataset, history_grad, history_newton)
    plot_grad_norm_values_on_time(dataset, history_grad, history_newton)
Ejemplo n.º 4
0
def experiment_4():
    path = 'data'
    np.random.seed(31415)
    datasets = ["w8a", "gisette_scale", "real-sim"]
    for dataset in datasets:
        A, b = load_svmlight_file(path + '/' + dataset)
        m = b.size
        oracle = create_log_reg_oracle(A, b, 1.0 / m, "optimized")
        x_0 = np.zeros((A.shape[1], ))
        #        x_opt1, message, history1 = gradient_descent(oracle, x_0, trace=True)
        if dataset != 'real-sim':
            x_opt2, message, history2 = newton(oracle, x_0, trace=True)
        print(len(history2['time']), history2['time'][-1])
        continue

        plt.figure()
        plt.plot(history1['time'], history1['func'], label='GD')
        if dataset != 'real-sim':
            plt.plot(history2['time'], history2['func'], label='Newton')
        plt.xlabel('Время от начала эксперимента в секундах')
        plt.ylabel('Значение функции потерь')
        plt.legend()
        plt.grid()
        plt.savefig("pics/logreg_loss_value_vs_time_" + dataset)

        plt.figure()
        plt.plot(history1['time'],
                 2 * np.log(
                     (history1['grad_norm'] / history1['grad_norm'][0])),
                 label='GD')
        if dataset != 'real-sim':
            plt.plot(history2['time'],
                     2 * np.log(
                         (history2['grad_norm'] / history2['grad_norm'][0])),
                     label='Newton')
        plt.xlabel('Время от начала эксперимента в секундах')
        plt.ylabel('Логарифм относительного квадрата нормы градиента')
        plt.legend()
        plt.grid()
        plt.savefig("pics/logreg_grad_norm_vs_time_" + dataset)
Ejemplo n.º 5
0
def third_experiment():
    data_path = 'experiment_3/datasets/'
    result_path = 'experiment_3/'
    names = ['w8a', 'gisette_scale', 'real-sim']

    def plotting(history_gd, history_nm, param):
        f_gd = np.array(history_gd[param])
        f_nm = np.array(history_nm[param])
        time_gd = list(map(lambda i: i.total_seconds(), history_gd['time']))
        time_nm = list(map(lambda i: i.total_seconds(), history_nm['time']))
        if param == 'grad_norm':
            f_gd = np.log(f_gd / f_gd[0])
            f_nm = np.log(f_nm / f_nm[0])
        plt.figure()
        plt.plot(time_gd, f_gd, label='GD')
        plt.plot(time_nm, f_nm, label='Newton')
        plt.xlabel('seconds')
        ylabel = 'func' if param == 'func' else r'$\log\left(grad\_norm\right)$'
        plt.ylabel(ylabel)
        plt.legend()
        plt.grid()
        plt.savefig(result_path + name + '-' + param)

    for name in names:
        A, b = load_svmlight_file(data_path + name)
        m, n = A.shape
        oracle = oracles.create_log_reg_oracle(A, b, 1 / m)
        if name != 'real-sim':
            print('begin')
            x_star_nm, _, history_nm = optimization.newton(oracle,
                                                           np.zeros(n),
                                                           trace=True)
            print('Newton is finished')
        x_star_gd, _, history_gd = optimization.gradient_descent(oracle,
                                                                 np.zeros(n),
                                                                 trace=True)
        print('GD is finished')
        plotting(history_gd, history_nm, 'func')
        plotting(history_gd, history_nm, 'grad_norm')
Ejemplo n.º 6
0
def experiment_5_and_6(algo='gd'):
    np.random.seed(31415)
    m, n = 2000, 1000
    A = np.random.randn(m, n)
    b = np.sign(np.random.randn(m))
    regcoef = 1 / m

    logreg_oracle = create_log_reg_oracle(A,
                                          b,
                                          regcoef,
                                          oracle_type='optimized')

    line_search_options = [
        {
            'method': 'Constant',
            'c': 1.0
        },
        {
            'method': 'Constant',
            'c': 0.95
        },
        {
            'method': 'Constant',
            'c': 0.9
        },
        {
            'method': 'Constant',
            'c': 0.85
        },
        {
            'method': 'Armijo',
            'c1': 1e-8
        },
        {
            'method': 'Armijo',
            'c1': 1e-6
        },
        {
            'method': 'Armijo',
            'c1': 1e-4
        },
        {
            'method': 'Armijo',
            'c1': 1e-1
        },
        {
            'method': 'Wolfe',
            'c2': 1.5
        },
        {
            'method': 'Wolfe',
            'c2': 0.9
        },
        {
            'method': 'Wolfe',
            'c2': 0.1
        },
        {
            'method': 'Wolfe',
            'c2': 0.01
        },
    ]

    colors = ['#e66101', '#fdb863', '#b2abd2', '#5e3c99']
    styles = {
        'Constant': {
            'linestyle': '--',
            'dashes': (2, 5),
            'linewidth': 2
        },
        'Armijo': {
            'linestyle': '--',
            'dashes': (5, 2)
        },
        'Wolfe': {
            'linestyle': 'solid'
        },
    }

    x_0_list = [None] * 3
    x_0_list[0] = np.zeros((n, ))
    x_0_list[1] = np.random.uniform(-1, 1, (n, ))
    x_0_list[2] = np.ones((n, ))

    for k, x_0 in enumerate(x_0_list):
        plt.figure(figsize=(12, 9))
        for i, options in tqdm(enumerate(line_search_options)):
            if algo == 'GD':
                x_opt, message, history = gradient_descent(
                    logreg_oracle,
                    x_0,
                    trace=True,
                    line_search_options=options)
            else:
                x_opt, message, history = newton(logreg_oracle,
                                                 x_0,
                                                 trace=True,
                                                 line_search_options=options)
            args = list(options.keys()) + list(options.values())
            label = "{} ({}={}, {}={})".format(algo, args[0], args[2], args[1],
                                               args[3])
            values = 2 * np.log(
                (history['grad_norm'] / history['grad_norm'][0]))
            method = args[2]
            plt.plot(values + np.random.randn(values.size) * 0.05,
                     color=colors[i % len(colors)],
                     label=label,
                     alpha=0.7,
                     **styles[method])
        plt.xlabel('Номер итерации')
        plt.ylabel('Логарифм относительного квадрата нормы градиента')
        plt.legend(loc='upper right')
        plt.grid()

        Path("pics/logreg_{}_linear_search_strategies".format(algo)).mkdir(
            parents=True, exist_ok=True)
        plt.savefig(
            "pics/logreg_{}_linear_search_strategies/x_0_{}.png".format(
                algo, k))

    np.random.seed(31415)
    n = 2000
    C = ortho_group.rvs(n)
    A = C.T @ np.diag(np.random.uniform(1, 20, (n, ))) @ C
    b = np.random.randn(n)
    x_0 = np.zeros((n, ))

    quadratic_oracle = QuadraticOracle(A, b)
    x_opt = np.linalg.solve(A, b)
    f_opt = quadratic_oracle.func(x_opt)

    line_search_options = [
        {
            'method': 'Constant',
            'c': 0.09
        },
        {
            'method': 'Constant',
            'c': 0.085
        },
        {
            'method': 'Constant',
            'c': 0.08
        },
        {
            'method': 'Constant',
            'c': 0.075
        },
        {
            'method': 'Armijo',
            'c1': 1e-10
        },
        {
            'method': 'Armijo',
            'c1': 1e-7
        },
        {
            'method': 'Armijo',
            'c1': 1e-4
        },
        {
            'method': 'Armijo',
            'c1': 1e-1
        },
        {
            'method': 'Wolfe',
            'c2': 1.5
        },
        {
            'method': 'Wolfe',
            'c2': 0.9
        },
        {
            'method': 'Wolfe',
            'c2': 0.1
        },
        {
            'method': 'Wolfe',
            'c2': 0.01
        },
    ]

    x_0_list = [None] * 3
    x_0_list[0] = np.zeros((n, ))
    x_0_list[1] = np.random.uniform(-1, 1, (n, ))
    x_0_list[2] = x_opt + np.random.randn(n, ) * 0.2

    for k, x_0 in enumerate(x_0_list):
        plt.figure(figsize=(12, 9))
        for i, options in tqdm(enumerate(line_search_options)):
            if algo == 'GD':
                x_opt, message, history = gradient_descent(
                    quadratic_oracle,
                    x_0,
                    trace=True,
                    line_search_options=options)
            else:
                x_opt, message, history = newton(quadratic_oracle,
                                                 x_0,
                                                 trace=True,
                                                 line_search_options=options)
            args = list(options.keys()) + list(options.values())
            label = "{} ({}={}, {}={})".format(algo, args[0], args[2], args[1],
                                               args[3])
            values = np.log(np.abs((history['func'] - f_opt) / f_opt) + 1e-16)
            method = args[2]
            plt.plot(values + np.random.randn(values.size) * 0.05,
                     color=colors[i % len(colors)],
                     label=label,
                     alpha=0.7,
                     **styles[method])
        plt.xlabel('Номер итерации')
        plt.ylabel('Логарифм относительной невязки')
        plt.legend(loc='upper right')
        plt.grid()
        Path("pics/quadratic_{}_linear_search_strategies".format(algo)).mkdir(
            parents=True, exist_ok=True)
        plt.savefig(
            "pics/quadratic_{}_linear_search_strategies/x_0_{}.png".format(
                algo, k))