def get_constant_strategies():
    deltas = [1e-2, 1e-4, 1e-6, 1e-8]
    labels = [r'$10^{-2}$', r'$10^{-4}$', r'$10^{-6}$', r'$10^{-8}$']
    strategies = []
    for i, delta in enumerate(deltas):
        strategy = get_tolerance_strategy({
            'strategy': 'constant',
            'delta': delta,
            'label': labels[i]
        })
        strategies.append(strategy)
    return strategies
def get_power_strategies():
    powers = [1, 2, 3, 4]
    labels = [r'$1/k$', r'$1/k^2$', r'$1/k^3$', r'$1/k^4$']
    strategies = []
    for i, power in enumerate(powers):
        strategy = get_tolerance_strategy({
            'strategy': 'power',
            'alpha': power,
            'c': 1.0,
            'label': labels[i]
        })
        strategies.append(strategy)
    return strategies
def get_beta_strategies():
    betas = [0.5, 0.1, 0.01, 0.001]
    labels = [
        r'$\beta = 0.5$', r'$\beta = 0.1$', r'$\beta = 0.01$',
        r'$\beta = 0.001$'
    ]
    strategies = []
    for i, beta in enumerate(betas):
        strategy = get_tolerance_strategy({
            'strategy': 'constant',
            'delta': beta**2,
            'label': labels[i]
        })
        strategies.append(strategy)
    return strategies
def run_experiment(n, line_search, max_iters):
    print('Experiment: \t n = %d, \t line_search = %s, \t max_iters = %d.' %
          (n, str(line_search), max_iters))

    oracle = PDifferenceOracle(3)
    x_star = np.zeros(n)
    f_star = oracle.func(x_star)
    x_0 = np.ones(n)

    H_0 = 1.0
    tolerance = get_tolerance({
        'criterion': 'func',
        'f_star': f_star,
        'tolerance': 1e-9
    })

    power_strategy_1 = get_tolerance_strategy({
        'strategy': 'power',
        'c': 1.0,
        'alpha': 1,
        'label': r'$1/k$'
    })
    power_strategy_3 = get_tolerance_strategy({
        'strategy': 'power',
        'c': 1.0,
        'alpha': 3,
        'label': r'$1/k^3$'
    })
    adaptive_strategy = get_tolerance_strategy({
        'strategy': 'adaptive',
        'c': 1.0,
        'alpha': 1,
        'label': 'adaptive'
    })

    subsolver = 'NCG'
    stopping_criterion_inner = 'grad_uniform_convex'

    histories = []
    labels = []

    _, status, history_CN_power = \
        cubic_newton(oracle, x_0, tolerance,
                     max_iters=max_iters,
                     H_0=H_0,
                     line_search=line_search,
                     inner_tolerance_strategy=power_strategy_3,
                     subsolver=subsolver,
                     trace=True,
                     stopping_criterion_subproblem=stopping_criterion_inner)
    histories.append(history_CN_power)
    labels.append(r'CN, $1/k^3$')

    _, status, history_CN_adaptive = \
        cubic_newton(oracle, x_0, tolerance,
                     max_iters=max_iters,
                     H_0=H_0,
                     line_search=line_search,
                     inner_tolerance_strategy=adaptive_strategy,
                     subsolver=subsolver,
                     trace=True,
                     stopping_criterion_subproblem=stopping_criterion_inner)
    histories.append(history_CN_adaptive)
    labels.append('CN, adaptive')

    _, status, history_CN_averaging = \
        cubic_newton(oracle, x_0, tolerance,
                     max_iters=max_iters,
                     H_0=H_0,
                     line_search=line_search,
                     inner_tolerance_strategy=power_strategy_3,
                     subsolver=subsolver,
                     trace=True,
                     stopping_criterion_subproblem=stopping_criterion_inner,
                     averaging=True)
    histories.append(history_CN_averaging)
    labels.append(r'Averaging, $1/k^3$')

    if not line_search:
        _, status, history_CN_contracting = \
            contracting_cubic_newton(oracle, x_0, tolerance,
                                     max_iters=max_iters,
                                     H_0=H_0,
                                     prox_steps_tolerance_strategy=
                                     power_strategy_1,
                                     newton_steps_tolerance_strategy=
                                     power_strategy_1,
                                     trace=True)
        histories.append(history_CN_contracting)
        labels.append(r'Contracting')

    filename = os.getcwd() + '/plots/averaging_%d' % n
    title = r'$n = %d$' % n
    if line_search:
        filename += '_ls'
        title += ', line search'

    plot_func_residual(histories,
                       None,
                       f_star,
                       labels, ['blue', 'red', 'tab:green', 'tab:purple'],
                       ['-.', '-', '-', ':'], [3, 2, 5, 5], [0.6, 1, 0.8, 0.8],
                       title,
                       'Iterations',
                       filename=filename + '.pdf',
                       figsize=(5.5, 5))
def run_experiment(dataset_filename, name, max_iters):    
    print('Experiment: \t %s, \t file: %s, \t max_iters = %d.' % 
          (name, dataset_filename, max_iters))

    X, y = load_svmlight_file(dataset_filename)
    oracle = create_log_reg_oracle(X, y, 1 / X.shape[0])
    x_0 = np.zeros(X.shape[1])

    print('Minimize by scipy ... ', flush=True, end='')
    f_star = \
        scipy.optimize.minimize(oracle.func, x_0, jac=oracle.grad, tol=1e-9).fun
    print('f_star = %g.' % f_star)

    H_0 = 1.0
    line_search = True
    tolerance = get_tolerance({'criterion': 'func', 
                               'f_star': f_star,
                               'tolerance': 1e-8})
    subsolver = 'FGM'
    stopping_criterion_subproblem = 'grad_uniform_convex'

    constant_strategies = get_constant_strategies()
    power_strategies = get_power_strategies()
    adaptive_strategy = get_tolerance_strategy({'strategy': 'adaptive',
                                                'c': 1.0,
                                                'alpha': 1,
                                                'label': 'adaptive'})

    strategies_1 = constant_strategies + [adaptive_strategy]
    strategies_2 = power_strategies + [adaptive_strategy]

    method = lambda strategy: cubic_newton(oracle, x_0, tolerance,
                                           max_iters=max_iters,
                                           H_0=H_0,
                                           line_search=line_search,
                                           inner_tolerance_strategy=strategy,
                                           subsolver=subsolver,
                                           trace=True,
                                           B=None,
                                           Binv=None,
                                           stopping_criterion_subproblem=
                                           stopping_criterion_subproblem)

    labels_1 = get_labels(strategies_1)
    histories_1 = run_method(method, strategies_1, labels_1)
    filename = os.getcwd() + '/plots/logreg_%s_time' % (name)
    plot_func_residual(histories_1, 'time', f_star, labels_1, 
                       ['grey', 'grey', 'grey', 'grey', 'red'], 
                       ['-', '--', '-.', ':', '-'], 
                       [5, 4, 3, 4, 2], 
                       [0.8, 0.8, 0.8, 0.8, 1], 
                       'Log-reg: %s' % name, 
                       'Time, s', 
                       filename=filename+'_const.pdf')
    labels_2 = get_labels(strategies_2)
    histories_2 = run_method(method, strategies_2, labels_2)
    plot_func_residual(histories_2, 'time', f_star, labels_2, 
                       ['blue', 'blue', 'blue', 'blue', 'red'], 
                       ['-', '--', '-.', ':', '-'], 
                       [5, 4, 3, 2, 2], 
                       [0.6, 0.6, 0.6, 0.6, 1], 
                       'Log-reg: %s' % name,
                       'Time, s', 
                       filename=filename+'_powers.pdf')
def run_experiment(n, mu, max_iters):
    print('Experiment: \t n = %d, \t mu = %g, \t max_iters = %d.' %
          (n, mu, max_iters))

    oracle, x_star, f_star, B, Binv = generate_logsumexp(n, mu)

    x_0 = np.ones(n)
    H_0 = 1.0
    line_search = False
    tolerance = get_tolerance({
        'criterion': 'func',
        'f_star': f_star,
        'tolerance': 1e-8
    })
    subsolver = 'FGM'
    stopping_criterion_subproblem = 'grad_uniform_convex'

    constant_strategies = get_constant_strategies()
    power_strategies = get_power_strategies()
    adaptive_strategy = get_tolerance_strategy({
        'strategy': 'adaptive',
        'c': 1.0,
        'alpha': 1,
        'label': 'adaptive'
    })

    strategies_1 = constant_strategies + [adaptive_strategy]
    strategies_2 = power_strategies + [adaptive_strategy]

    method = lambda strategy: cubic_newton(oracle,
                                           x_0,
                                           tolerance,
                                           max_iters=max_iters,
                                           H_0=H_0,
                                           line_search=line_search,
                                           inner_tolerance_strategy=strategy,
                                           subsolver=subsolver,
                                           trace=True,
                                           B=B,
                                           Binv=Binv,
                                           stopping_criterion_subproblem=
                                           stopping_criterion_subproblem)

    labels_1 = get_labels(strategies_1)
    histories_1 = run_method(method, strategies_1, labels_1)
    mu_str = ('%g' % mu)[2:]
    filename = os.getcwd() + '/plots/logsumexp_%d_%s_time' % (n, mu_str)
    plot_func_residual(histories_1,
                       'time',
                       f_star,
                       labels_1, ['grey', 'grey', 'grey', 'grey', 'red'],
                       ['-', '--', '-.', ':', '-'], [5, 4, 3, 4, 2],
                       [0.8, 0.8, 0.8, 0.8, 1],
                       r'Log-sum-exp, $\mu = %g$' % mu,
                       'Time, s',
                       filename=filename + '_const.pdf')
    labels_2 = get_labels(strategies_2)
    histories_2 = run_method(method, strategies_2, labels_2)
    plot_func_residual(histories_2,
                       'time',
                       f_star,
                       labels_2, ['blue', 'blue', 'blue', 'blue', 'red'],
                       ['-', '--', '-.', ':', '-'], [5, 4, 3, 2, 2],
                       [0.6, 0.6, 0.6, 0.6, 1],
                       r'Log-sum-exp, $\mu = %g$' % mu,
                       'Time, s',
                       filename=filename + '_powers.pdf')
Ejemplo n.º 7
0
def run_experiment(n, mu, max_iters):
    print('Experiment: \t n = %d, \t mu = %g, \t max_iters = %d.' %
          (n, mu, max_iters))

    oracle, x_star, f_star, B, Binv = generate_logsumexp(n, mu)

    x_0 = np.ones(n)
    H_0 = 1.0
    line_search = True
    tolerance = get_tolerance({
        'criterion': 'func',
        'f_star': f_star,
        'tolerance': 1e-8
    })
    subsolver = 'FGM'
    stopping_criterion_subproblem = 'func'

    constant_strategies = get_constant_strategies()
    power_strategies = get_power_strategies()
    adaptive_strategy = get_tolerance_strategy({
        'strategy': 'adaptive',
        'c': 1.0,
        'alpha': 1,
        'label': 'adaptive'
    })
    adaptive_15_strategy = get_tolerance_strategy({
        'strategy': 'adaptive',
        'c': 1.0,
        'alpha': 1.5,
        'label': r'adaptive $1.5$'
    })
    adaptive_2_strategy = get_tolerance_strategy({
        'strategy': 'adaptive',
        'c': 1.0,
        'alpha': 2,
        'label': r'adaptive $2$'
    })

    strategies_1 = constant_strategies
    strategies_2 = power_strategies + [constant_strategies[-1]]
    strategies_3 = [
        adaptive_strategy, adaptive_15_strategy, adaptive_2_strategy,
        constant_strategies[-1]
    ]

    method = lambda strategy: cubic_newton(oracle,
                                           x_0,
                                           tolerance,
                                           max_iters=max_iters,
                                           H_0=H_0,
                                           line_search=line_search,
                                           inner_tolerance_strategy=strategy,
                                           subsolver=subsolver,
                                           trace=True,
                                           B=B,
                                           Binv=Binv,
                                           stopping_criterion_subproblem=
                                           stopping_criterion_subproblem)
    mu_str = ('%g' % mu)[2:]
    filename = os.getcwd() + '/plots/exact_logsumexp_%d_%s' % (n, mu_str)

    labels_1 = get_labels(strategies_1)
    histories_1 = run_method(method, strategies_1, labels_1)
    plot_func_residual_iter(histories_1,
                            'hess_vec_calls',
                            f_star,
                            labels_1, ['grey', 'grey', 'grey', 'grey'],
                            ['-', '--', '-.', ':'], [5, 4, 3, 4], [1, 1, 1, 1],
                            r'Log-sum-exp, $\mu = %g$: constant strategies' %
                            mu,
                            'Hessian-vector products',
                            filename=filename + '_const.pdf')
    labels_2 = get_labels(strategies_2)
    histories_2 = run_method(method, strategies_2, labels_2)
    plot_func_residual_iter(histories_2,
                            'hess_vec_calls',
                            f_star,
                            labels_2, ['blue', 'blue', 'blue', 'blue', 'gray'],
                            ['-', '--', '-.', ':', ':'], [5, 4, 3, 2, 4],
                            [0.6, 0.6, 0.6, 0.6, 0.8],
                            r'Log-sum-exp, $\mu = %g$: dynamic strategies' %
                            mu,
                            'Hessian-vector products',
                            filename=filename + '_power.pdf')
    labels_3 = get_labels(strategies_3)
    histories_3 = run_method(method, strategies_3, labels_3)
    plot_func_residual_iter(
        histories_3,
        'hess_vec_calls',
        f_star,
        labels_3, ['red', 'tab:orange', 'tab:orange', 'gray'],
        ['-', '--', '-.', ':'], [2, 4, 2, 4], [1, 1, 1, 0.8],
        r'Log-sum-exp, $\mu = %g$: adaptive strategies' % mu,
        'Hessian-vector products',
        filename=filename + '_adaptive.pdf')
def run_experiment(dataset_filename, name, max_iters):
    print('Experiment: \t %s, \t file: %s, \t max_iters = %d.' %
          (name, dataset_filename, max_iters))

    X, y = load_svmlight_file(dataset_filename)
    oracle = create_log_reg_oracle(X, y, 1 / X.shape[0])
    x_0 = np.zeros(X.shape[1])

    print('Minimize by scipy ... ', flush=True, end='')
    f_star = \
        scipy.optimize.minimize(oracle.func, x_0, jac=oracle.grad, tol=1e-9).fun
    print('f_star = %g.' % f_star)

    H_0 = 1.0
    line_search = True
    tolerance = get_tolerance({
        'criterion': 'func',
        'f_star': f_star,
        'tolerance': 1e-8
    })
    subsolver = 'FGM'
    stopping_criterion_subproblem = 'func'

    constant_strategies = get_constant_strategies()
    power_strategies = get_power_strategies()
    adaptive_strategy = get_tolerance_strategy({
        'strategy': 'adaptive',
        'c': 1.0,
        'alpha': 1,
        'label': 'adaptive'
    })
    adaptive_15_strategy = get_tolerance_strategy({
        'strategy': 'adaptive',
        'c': 1.0,
        'alpha': 1.5,
        'label': r'adaptive $1.5$'
    })
    adaptive_2_strategy = get_tolerance_strategy({
        'strategy': 'adaptive',
        'c': 1.0,
        'alpha': 2,
        'label': r'adaptive $2$'
    })

    strategies_1 = constant_strategies
    strategies_2 = power_strategies + [constant_strategies[-1]]
    strategies_3 = [
        adaptive_strategy, adaptive_15_strategy, adaptive_2_strategy,
        constant_strategies[-1]
    ]

    method = lambda strategy: cubic_newton(oracle,
                                           x_0,
                                           tolerance,
                                           max_iters=max_iters,
                                           H_0=H_0,
                                           line_search=line_search,
                                           inner_tolerance_strategy=strategy,
                                           subsolver=subsolver,
                                           trace=True,
                                           B=None,
                                           Binv=None,
                                           stopping_criterion_subproblem=
                                           stopping_criterion_subproblem)

    filename = os.getcwd() + '/plots/exact_logreg_%s' % (name)

    labels_1 = get_labels(strategies_1)
    histories_1 = run_method(method, strategies_1, labels_1)
    plot_func_residual_iter(histories_1,
                            'hess_vec_calls',
                            f_star,
                            labels_1, ['grey', 'grey', 'grey', 'grey'],
                            ['-', '--', '-.', ':'], [5, 4, 3, 4], [1, 1, 1, 1],
                            r'Log-reg, %s: constant strategies' % name,
                            'Hessian-vector products',
                            filename=filename + '_const.pdf')
    labels_2 = get_labels(strategies_2)
    histories_2 = run_method(method, strategies_2, labels_2)
    plot_func_residual_iter(histories_2,
                            'hess_vec_calls',
                            f_star,
                            labels_2, ['blue', 'blue', 'blue', 'blue', 'gray'],
                            ['-', '--', '-.', ':', ':'], [5, 4, 3, 2, 4],
                            [0.6, 0.6, 0.6, 0.6, 0.8],
                            r'Log-reg, %s: dynamic strategies' % name,
                            'Hessian-vector products',
                            filename=filename + '_power.pdf')

    labels_3 = get_labels(strategies_3)
    histories_3 = run_method(method, strategies_3, labels_3)
    plot_func_residual_iter(histories_3,
                            'hess_vec_calls',
                            f_star,
                            labels_3,
                            ['red', 'tab:orange', 'tab:orange', 'gray'],
                            ['-', '--', '-.', ':'], [2, 4, 2, 4],
                            [1, 1, 1, 0.8],
                            r'Log-reg, %s: adaptive strategies' % name,
                            'Hessian-vector products',
                            filename=filename + '_adaptive.pdf')
Ejemplo n.º 9
0
def contracting_cubic_newton(oracle, x_0, tolerance, max_iters=1000, H_0=1.0, 
                             trace=True, prox_steps_max_iters=None,
                             prox_steps_tolerance_strategy=None,
                             newton_steps_tolerance_strategy=None, B=None, 
                             Binv=None):
    """
    Accelerated Cubic Newton, using contracted proximal iterations.
    """
    oracle = OracleCallsCounter(oracle)

    # Initialization.
    history = defaultdict(list) if trace else None
    start_timestamp = datetime.now()
    l2_norm_sqr, dual_norm_sqr, to_dual, precond = norms_init(B, Binv)

    if prox_steps_tolerance_strategy is None:
        prox_steps_tolerance_strategy = get_tolerance_strategy(
            {'strategy': 'power',
             'c': 1.0,
             'alpha': 1})

    if newton_steps_tolerance_strategy is None:
        newton_steps_tolerance_strategy = get_tolerance_strategy(
            {'strategy': 'power',
             'c': 1.0,
             'alpha': 1})

    if prox_steps_max_iters is None:
        prox_steps_max_iters = 10

    x_k = np.copy(x_0)
    v_k = np.copy(x_0)
    func_k = oracle.func(x_k)
    grad_k = oracle.grad(x_k)
    grad_k_norm_sqr = dual_norm_sqr(grad_k)
    func_k_prev = None

    H_k = H_0
    A_k = 0.0

    # Main loop.
    for k in range(max_iters + 1):

        if trace:
            history['func'].append(func_k)
            history['grad_sqr_norm'].append(grad_k_norm_sqr)
            history['time'].append(
                (datetime.now() - start_timestamp).total_seconds())
            history['H'].append(H_k)
            history['func_calls'].append(oracle.func_calls)
            history['grad_calls'].append(oracle.grad_calls)
            history['hess_calls'].append(oracle.hess_calls)
            history['hess_vec_calls'].append(oracle.hess_vec_calls)

        if tolerance.stopping_condition(func_k, grad_k_norm_sqr):
            message = "success"
            break

        if k == max_iters:
            message = "iterations_exceeded"
            break

        # Choose A_k.
        A_k_new = (k + 1) ** 3.0 / H_k
        a_k_new = A_k_new - A_k

        # We minimize Contracted objective plus the Bregman divergence of d,
        # where d(x) = 1/3||x - x_0||^3.
        contracted_oracle = ContractingOracle(oracle, a_k_new, A_k, x_k)
        d = lambda x: 1.0 / 3 * l2_norm_sqr(x - x_0) ** 1.5
        d_prime = lambda x: l2_norm_sqr(x - x_0) ** 0.5 * to_dual(x - x_0)

        d_v_k = d(v_k)
        d_prime_v_k = d_prime(v_k)
        Bregman = lambda x: d(x) - d_v_k - d_prime_v_k.dot(x - v_k) 

        T = np.copy(v_k)  # Initial point.
        g_T = contracted_oracle.grad(T)
        Func_T = contracted_oracle.func(T) + Bregman(T)
        Func_T_prev = None
        
        prox_tolerance_value = \
            prox_steps_tolerance_strategy.get_tolerance(k, func_k_prev, func_k) 
        prox_steps_tolerance = \
            get_tolerance({'criterion': 'grad_uniform_convex',
                           'p': 3.0,
                           'sigma': 0.5,
                           'tolerance': prox_tolerance_value})
        
        # Iterations for computing the proximal step.
        for i in range(prox_steps_max_iters):

            hess_vec = lambda v: contracted_oracle.hess_vec(T, v)
            g = g_T - d_prime_v_k
            alpha = 1.0
            M = 1.0
            c = x_0 - T

            inner_tolerance_value = \
                newton_steps_tolerance_strategy.get_tolerance(
                    i, Func_T_prev, Func_T)
            inner_tolerance = get_tolerance(
                {'criterion': 'grad_uniform_convex',
                 'p': 3.0,
                 'sigma': 0.5 * M,
                 'tolerance': inner_tolerance_value})

            T_d_k, model_T, message, hist = \
                cubic_newton_step_ncg(hess_vec, g, M,
                                      alpha, c,
                                      np.zeros_like(x_k), 
                                      inner_tolerance, 
                                      max_iters=100,
                                      trace=True,
                                      B=B, Binv=Binv)
            if message != 'success':
                print(message, flush=True)

            T += T_d_k

            g_T = contracted_oracle.grad(T) 
            G_T = g_T + d_prime(T) - d_prime_v_k
            G_T_norm_sqr = dual_norm_sqr(G_T)
            Func_T_prev = Func_T
            Func_T = contracted_oracle.func(T) + Bregman(T)
            if prox_steps_tolerance.stopping_condition(Func_T, G_T_norm_sqr):
                break

        v_k = T
        x_k = (a_k_new * v_k + A_k * x_k) / A_k_new
        A_k = A_k_new

        func_k_prev = func_k
        func_k = oracle.func(x_k)
        grad_k = oracle.grad(x_k)
        grad_k_norm_sqr = dual_norm_sqr(grad_k)

    return x_k, message, history
Ejemplo n.º 10
0
def cubic_newton(oracle, x_0, tolerance, max_iters=1000, H_0=1.0, 
                 line_search=False, trace=True, inner_tolerance_strategy=None,
                 subsolver='FGM', B=None, Binv=None,
                 stopping_criterion_subproblem='grad_uniform_convex',
                 averaging=False):
    """
    Newton method with cubic regularization.
    """
    oracle = OracleCallsCounter(oracle)

    # Initialization.
    history = defaultdict(list) if trace else None
    start_timestamp = datetime.now()
    l2_norm_sqr, dual_norm_sqr, to_dual, precond = norms_init(B, Binv)

    if inner_tolerance_strategy is None:
        inner_tolerance_strategy = get_tolerance_strategy(
            {'strategy': 'constant',
             'delta': tolerance.tolerance ** 1.5})

    x_k = np.copy(x_0)
    func_k = oracle.func(x_k)
    grad_k = oracle.grad(x_k)
    grad_k_norm_sqr = dual_norm_sqr(grad_k)
    func_k_prev = None

    H_k = H_0

    prev_total_inner_iters = 0
    total_inner_iters = 0

    # Main loop.
    for k in range(max_iters + 1):

        if trace:
            history['func'].append(func_k)
            history['grad_sqr_norm'].append(grad_k_norm_sqr)
            history['time'].append(
                (datetime.now() - start_timestamp).total_seconds())
            history['H'].append(H_k)
            history['func_calls'].append(oracle.func_calls)
            history['grad_calls'].append(oracle.grad_calls)
            history['hess_calls'].append(oracle.hess_calls)
            history['hess_vec_calls'].append(oracle.hess_vec_calls)

            history['inner_iters'].append(
                total_inner_iters - prev_total_inner_iters)
            prev_total_inner_iters = total_inner_iters


        if tolerance.stopping_condition(func_k, grad_k_norm_sqr):
            message = "success"
            break

        if k == max_iters:
            message = "iterations_exceeded"
            break

        # Compute the direction.
        d_k = np.zeros_like(x_k)
        found = False
        
        inner_tolerance_value = \
            inner_tolerance_strategy.get_tolerance(k, func_k_prev, func_k)

        if averaging:
            lambda_k = (1.0 * k / (k + 1)) ** 3
            y_k = lambda_k * x_k + (1 - lambda_k) * x_0
            grad_y_k = oracle.grad(y_k)
            func_y_k = oracle.func(y_k)
        else:
            y_k = x_k
            grad_y_k = grad_k
            func_y_k = func_k

        line_search_max_iter = 30
        for i in range(line_search_max_iter + 1):
            if i == line_search_max_iter:
                message = "adaptive_iterations_exceeded"
                break

            if stopping_criterion_subproblem == 'func' or \
                    (subsolver != 'FGM' and subsolver != 'NCG'):
                Hess_y_k = oracle.hess(y_k)
                T_d_k, model_T, message = \
                    cubic_newton_step(grad_y_k, Hess_y_k, 0.5 * H_k, B)

            # Initialize the inner tolerance.
            if stopping_criterion_subproblem == 'func':
                inner_tolerance = \
                    get_tolerance({'criterion': 'func',
                                   'f_star': model_T,
                                   'tolerance': inner_tolerance_value})
            elif stopping_criterion_subproblem == 'grad_uniform_convex':
                inner_tolerance = \
                    get_tolerance({'criterion': 'grad_uniform_convex',
                                   'p': 3.0,
                                   'sigma': 0.25 * H_k,
                                   'tolerance': inner_tolerance_value})
            elif stopping_criterion_subproblem == 'grad_norm_bound':
                inner_tolerance = \
                    get_tolerance({'criterion': 'grad_norm_bound',
                                   'c': inner_tolerance_value})
            elif stopping_criterion_subproblem == 'grad_norm_by_difference' or \
                 stopping_criterion_subproblem == 'grad_norm_by_oracle_grad':

                if stopping_criterion_subproblem == 'grad_norm_by_difference':
                    lambda_bound = lambda T: l2_norm_sqr(T - y_k) ** 2
                else:
                    lambda_bound = lambda T: dual_norm_sqr(oracle.grad(T))
                inner_tolerance = \
                    get_tolerance({'criterion': 'grad_norm_lambda_bound',
                                   'lambda_bound': lambda_bound,
                                   'c': inner_tolerance_value})
            else:
                # Heuristic stopping criterion.
                inner_tolerance = \
                    get_tolerance({'criterion': 'grad',
                                   'tolerance': inner_tolerance_value})

            hess_vec = lambda v: oracle.hess_vec(y_k, v)

            if subsolver == 'FGM':
                T_d_k, model_T, message, hist = \
                    cubic_newton_step_fgm(hess_vec, grad_y_k, 0.5 * H_k, 
                                          d_k, inner_tolerance, 
                                          max_iters=5000,
                                          trace=True,
                                          B=B, Binv=Binv)
            elif subsolver == 'NCG':
                T_d_k, model_T, message, hist = \
                    cubic_newton_step_ncg(hess_vec, grad_y_k, 0.5 * H_k,
                                          0.0, np.zeros_like(grad_y_k),
                                          d_k, inner_tolerance, 
                                          max_iters=5000,
                                          trace=True,
                                          B=B, Binv=Binv)
            
            if message != "success":
                print('W: %s' % message, end=' ', flush=True)

            if subsolver == 'FGM' or subsolver == 'NCG':
                last_inner_iters = len(hist['func'])
                total_inner_iters += last_inner_iters
            

            d_k = T_d_k
            T = y_k + T_d_k
            func_T = oracle.func(T)
            grad_T = oracle.grad(T)
            grad_T_norm_sqr = dual_norm_sqr(grad_T)

            if not line_search:
                found = True
                break

            # Check condition for H_k.
            model_min = func_y_k + model_T
            if func_T <= model_min:
                found = True
                break
            H_k *= 2

        if not found:
            message = "E: step_failure : " + message
            break

        if line_search:
            H_k *= 0.5
            H_k = max(H_k, 1e-8)

        x_k = T
        grad_k = grad_T
        func_k_prev = func_k
        func_k = func_T
        grad_k_norm_sqr = grad_T_norm_sqr

    return x_k, message, history