def get_constant_strategies(): deltas = [1e-2, 1e-4, 1e-6, 1e-8] labels = [r'$10^{-2}$', r'$10^{-4}$', r'$10^{-6}$', r'$10^{-8}$'] strategies = [] for i, delta in enumerate(deltas): strategy = get_tolerance_strategy({ 'strategy': 'constant', 'delta': delta, 'label': labels[i] }) strategies.append(strategy) return strategies
def get_power_strategies(): powers = [1, 2, 3, 4] labels = [r'$1/k$', r'$1/k^2$', r'$1/k^3$', r'$1/k^4$'] strategies = [] for i, power in enumerate(powers): strategy = get_tolerance_strategy({ 'strategy': 'power', 'alpha': power, 'c': 1.0, 'label': labels[i] }) strategies.append(strategy) return strategies
def get_beta_strategies(): betas = [0.5, 0.1, 0.01, 0.001] labels = [ r'$\beta = 0.5$', r'$\beta = 0.1$', r'$\beta = 0.01$', r'$\beta = 0.001$' ] strategies = [] for i, beta in enumerate(betas): strategy = get_tolerance_strategy({ 'strategy': 'constant', 'delta': beta**2, 'label': labels[i] }) strategies.append(strategy) return strategies
def run_experiment(n, line_search, max_iters): print('Experiment: \t n = %d, \t line_search = %s, \t max_iters = %d.' % (n, str(line_search), max_iters)) oracle = PDifferenceOracle(3) x_star = np.zeros(n) f_star = oracle.func(x_star) x_0 = np.ones(n) H_0 = 1.0 tolerance = get_tolerance({ 'criterion': 'func', 'f_star': f_star, 'tolerance': 1e-9 }) power_strategy_1 = get_tolerance_strategy({ 'strategy': 'power', 'c': 1.0, 'alpha': 1, 'label': r'$1/k$' }) power_strategy_3 = get_tolerance_strategy({ 'strategy': 'power', 'c': 1.0, 'alpha': 3, 'label': r'$1/k^3$' }) adaptive_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1, 'label': 'adaptive' }) subsolver = 'NCG' stopping_criterion_inner = 'grad_uniform_convex' histories = [] labels = [] _, status, history_CN_power = \ cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=power_strategy_3, subsolver=subsolver, trace=True, stopping_criterion_subproblem=stopping_criterion_inner) histories.append(history_CN_power) labels.append(r'CN, $1/k^3$') _, status, history_CN_adaptive = \ cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=adaptive_strategy, subsolver=subsolver, trace=True, stopping_criterion_subproblem=stopping_criterion_inner) histories.append(history_CN_adaptive) labels.append('CN, adaptive') _, status, history_CN_averaging = \ cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=power_strategy_3, subsolver=subsolver, trace=True, stopping_criterion_subproblem=stopping_criterion_inner, averaging=True) histories.append(history_CN_averaging) labels.append(r'Averaging, $1/k^3$') if not line_search: _, status, history_CN_contracting = \ contracting_cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, prox_steps_tolerance_strategy= power_strategy_1, newton_steps_tolerance_strategy= power_strategy_1, trace=True) histories.append(history_CN_contracting) labels.append(r'Contracting') filename = os.getcwd() + '/plots/averaging_%d' % n title = r'$n = %d$' % n if line_search: filename += '_ls' title += ', line search' plot_func_residual(histories, None, f_star, labels, ['blue', 'red', 'tab:green', 'tab:purple'], ['-.', '-', '-', ':'], [3, 2, 5, 5], [0.6, 1, 0.8, 0.8], title, 'Iterations', filename=filename + '.pdf', figsize=(5.5, 5))
def run_experiment(dataset_filename, name, max_iters): print('Experiment: \t %s, \t file: %s, \t max_iters = %d.' % (name, dataset_filename, max_iters)) X, y = load_svmlight_file(dataset_filename) oracle = create_log_reg_oracle(X, y, 1 / X.shape[0]) x_0 = np.zeros(X.shape[1]) print('Minimize by scipy ... ', flush=True, end='') f_star = \ scipy.optimize.minimize(oracle.func, x_0, jac=oracle.grad, tol=1e-9).fun print('f_star = %g.' % f_star) H_0 = 1.0 line_search = True tolerance = get_tolerance({'criterion': 'func', 'f_star': f_star, 'tolerance': 1e-8}) subsolver = 'FGM' stopping_criterion_subproblem = 'grad_uniform_convex' constant_strategies = get_constant_strategies() power_strategies = get_power_strategies() adaptive_strategy = get_tolerance_strategy({'strategy': 'adaptive', 'c': 1.0, 'alpha': 1, 'label': 'adaptive'}) strategies_1 = constant_strategies + [adaptive_strategy] strategies_2 = power_strategies + [adaptive_strategy] method = lambda strategy: cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=strategy, subsolver=subsolver, trace=True, B=None, Binv=None, stopping_criterion_subproblem= stopping_criterion_subproblem) labels_1 = get_labels(strategies_1) histories_1 = run_method(method, strategies_1, labels_1) filename = os.getcwd() + '/plots/logreg_%s_time' % (name) plot_func_residual(histories_1, 'time', f_star, labels_1, ['grey', 'grey', 'grey', 'grey', 'red'], ['-', '--', '-.', ':', '-'], [5, 4, 3, 4, 2], [0.8, 0.8, 0.8, 0.8, 1], 'Log-reg: %s' % name, 'Time, s', filename=filename+'_const.pdf') labels_2 = get_labels(strategies_2) histories_2 = run_method(method, strategies_2, labels_2) plot_func_residual(histories_2, 'time', f_star, labels_2, ['blue', 'blue', 'blue', 'blue', 'red'], ['-', '--', '-.', ':', '-'], [5, 4, 3, 2, 2], [0.6, 0.6, 0.6, 0.6, 1], 'Log-reg: %s' % name, 'Time, s', filename=filename+'_powers.pdf')
def run_experiment(n, mu, max_iters): print('Experiment: \t n = %d, \t mu = %g, \t max_iters = %d.' % (n, mu, max_iters)) oracle, x_star, f_star, B, Binv = generate_logsumexp(n, mu) x_0 = np.ones(n) H_0 = 1.0 line_search = False tolerance = get_tolerance({ 'criterion': 'func', 'f_star': f_star, 'tolerance': 1e-8 }) subsolver = 'FGM' stopping_criterion_subproblem = 'grad_uniform_convex' constant_strategies = get_constant_strategies() power_strategies = get_power_strategies() adaptive_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1, 'label': 'adaptive' }) strategies_1 = constant_strategies + [adaptive_strategy] strategies_2 = power_strategies + [adaptive_strategy] method = lambda strategy: cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=strategy, subsolver=subsolver, trace=True, B=B, Binv=Binv, stopping_criterion_subproblem= stopping_criterion_subproblem) labels_1 = get_labels(strategies_1) histories_1 = run_method(method, strategies_1, labels_1) mu_str = ('%g' % mu)[2:] filename = os.getcwd() + '/plots/logsumexp_%d_%s_time' % (n, mu_str) plot_func_residual(histories_1, 'time', f_star, labels_1, ['grey', 'grey', 'grey', 'grey', 'red'], ['-', '--', '-.', ':', '-'], [5, 4, 3, 4, 2], [0.8, 0.8, 0.8, 0.8, 1], r'Log-sum-exp, $\mu = %g$' % mu, 'Time, s', filename=filename + '_const.pdf') labels_2 = get_labels(strategies_2) histories_2 = run_method(method, strategies_2, labels_2) plot_func_residual(histories_2, 'time', f_star, labels_2, ['blue', 'blue', 'blue', 'blue', 'red'], ['-', '--', '-.', ':', '-'], [5, 4, 3, 2, 2], [0.6, 0.6, 0.6, 0.6, 1], r'Log-sum-exp, $\mu = %g$' % mu, 'Time, s', filename=filename + '_powers.pdf')
def run_experiment(n, mu, max_iters): print('Experiment: \t n = %d, \t mu = %g, \t max_iters = %d.' % (n, mu, max_iters)) oracle, x_star, f_star, B, Binv = generate_logsumexp(n, mu) x_0 = np.ones(n) H_0 = 1.0 line_search = True tolerance = get_tolerance({ 'criterion': 'func', 'f_star': f_star, 'tolerance': 1e-8 }) subsolver = 'FGM' stopping_criterion_subproblem = 'func' constant_strategies = get_constant_strategies() power_strategies = get_power_strategies() adaptive_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1, 'label': 'adaptive' }) adaptive_15_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1.5, 'label': r'adaptive $1.5$' }) adaptive_2_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 2, 'label': r'adaptive $2$' }) strategies_1 = constant_strategies strategies_2 = power_strategies + [constant_strategies[-1]] strategies_3 = [ adaptive_strategy, adaptive_15_strategy, adaptive_2_strategy, constant_strategies[-1] ] method = lambda strategy: cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=strategy, subsolver=subsolver, trace=True, B=B, Binv=Binv, stopping_criterion_subproblem= stopping_criterion_subproblem) mu_str = ('%g' % mu)[2:] filename = os.getcwd() + '/plots/exact_logsumexp_%d_%s' % (n, mu_str) labels_1 = get_labels(strategies_1) histories_1 = run_method(method, strategies_1, labels_1) plot_func_residual_iter(histories_1, 'hess_vec_calls', f_star, labels_1, ['grey', 'grey', 'grey', 'grey'], ['-', '--', '-.', ':'], [5, 4, 3, 4], [1, 1, 1, 1], r'Log-sum-exp, $\mu = %g$: constant strategies' % mu, 'Hessian-vector products', filename=filename + '_const.pdf') labels_2 = get_labels(strategies_2) histories_2 = run_method(method, strategies_2, labels_2) plot_func_residual_iter(histories_2, 'hess_vec_calls', f_star, labels_2, ['blue', 'blue', 'blue', 'blue', 'gray'], ['-', '--', '-.', ':', ':'], [5, 4, 3, 2, 4], [0.6, 0.6, 0.6, 0.6, 0.8], r'Log-sum-exp, $\mu = %g$: dynamic strategies' % mu, 'Hessian-vector products', filename=filename + '_power.pdf') labels_3 = get_labels(strategies_3) histories_3 = run_method(method, strategies_3, labels_3) plot_func_residual_iter( histories_3, 'hess_vec_calls', f_star, labels_3, ['red', 'tab:orange', 'tab:orange', 'gray'], ['-', '--', '-.', ':'], [2, 4, 2, 4], [1, 1, 1, 0.8], r'Log-sum-exp, $\mu = %g$: adaptive strategies' % mu, 'Hessian-vector products', filename=filename + '_adaptive.pdf')
def run_experiment(dataset_filename, name, max_iters): print('Experiment: \t %s, \t file: %s, \t max_iters = %d.' % (name, dataset_filename, max_iters)) X, y = load_svmlight_file(dataset_filename) oracle = create_log_reg_oracle(X, y, 1 / X.shape[0]) x_0 = np.zeros(X.shape[1]) print('Minimize by scipy ... ', flush=True, end='') f_star = \ scipy.optimize.minimize(oracle.func, x_0, jac=oracle.grad, tol=1e-9).fun print('f_star = %g.' % f_star) H_0 = 1.0 line_search = True tolerance = get_tolerance({ 'criterion': 'func', 'f_star': f_star, 'tolerance': 1e-8 }) subsolver = 'FGM' stopping_criterion_subproblem = 'func' constant_strategies = get_constant_strategies() power_strategies = get_power_strategies() adaptive_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1, 'label': 'adaptive' }) adaptive_15_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1.5, 'label': r'adaptive $1.5$' }) adaptive_2_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 2, 'label': r'adaptive $2$' }) strategies_1 = constant_strategies strategies_2 = power_strategies + [constant_strategies[-1]] strategies_3 = [ adaptive_strategy, adaptive_15_strategy, adaptive_2_strategy, constant_strategies[-1] ] method = lambda strategy: cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=strategy, subsolver=subsolver, trace=True, B=None, Binv=None, stopping_criterion_subproblem= stopping_criterion_subproblem) filename = os.getcwd() + '/plots/exact_logreg_%s' % (name) labels_1 = get_labels(strategies_1) histories_1 = run_method(method, strategies_1, labels_1) plot_func_residual_iter(histories_1, 'hess_vec_calls', f_star, labels_1, ['grey', 'grey', 'grey', 'grey'], ['-', '--', '-.', ':'], [5, 4, 3, 4], [1, 1, 1, 1], r'Log-reg, %s: constant strategies' % name, 'Hessian-vector products', filename=filename + '_const.pdf') labels_2 = get_labels(strategies_2) histories_2 = run_method(method, strategies_2, labels_2) plot_func_residual_iter(histories_2, 'hess_vec_calls', f_star, labels_2, ['blue', 'blue', 'blue', 'blue', 'gray'], ['-', '--', '-.', ':', ':'], [5, 4, 3, 2, 4], [0.6, 0.6, 0.6, 0.6, 0.8], r'Log-reg, %s: dynamic strategies' % name, 'Hessian-vector products', filename=filename + '_power.pdf') labels_3 = get_labels(strategies_3) histories_3 = run_method(method, strategies_3, labels_3) plot_func_residual_iter(histories_3, 'hess_vec_calls', f_star, labels_3, ['red', 'tab:orange', 'tab:orange', 'gray'], ['-', '--', '-.', ':'], [2, 4, 2, 4], [1, 1, 1, 0.8], r'Log-reg, %s: adaptive strategies' % name, 'Hessian-vector products', filename=filename + '_adaptive.pdf')
def contracting_cubic_newton(oracle, x_0, tolerance, max_iters=1000, H_0=1.0, trace=True, prox_steps_max_iters=None, prox_steps_tolerance_strategy=None, newton_steps_tolerance_strategy=None, B=None, Binv=None): """ Accelerated Cubic Newton, using contracted proximal iterations. """ oracle = OracleCallsCounter(oracle) # Initialization. history = defaultdict(list) if trace else None start_timestamp = datetime.now() l2_norm_sqr, dual_norm_sqr, to_dual, precond = norms_init(B, Binv) if prox_steps_tolerance_strategy is None: prox_steps_tolerance_strategy = get_tolerance_strategy( {'strategy': 'power', 'c': 1.0, 'alpha': 1}) if newton_steps_tolerance_strategy is None: newton_steps_tolerance_strategy = get_tolerance_strategy( {'strategy': 'power', 'c': 1.0, 'alpha': 1}) if prox_steps_max_iters is None: prox_steps_max_iters = 10 x_k = np.copy(x_0) v_k = np.copy(x_0) func_k = oracle.func(x_k) grad_k = oracle.grad(x_k) grad_k_norm_sqr = dual_norm_sqr(grad_k) func_k_prev = None H_k = H_0 A_k = 0.0 # Main loop. for k in range(max_iters + 1): if trace: history['func'].append(func_k) history['grad_sqr_norm'].append(grad_k_norm_sqr) history['time'].append( (datetime.now() - start_timestamp).total_seconds()) history['H'].append(H_k) history['func_calls'].append(oracle.func_calls) history['grad_calls'].append(oracle.grad_calls) history['hess_calls'].append(oracle.hess_calls) history['hess_vec_calls'].append(oracle.hess_vec_calls) if tolerance.stopping_condition(func_k, grad_k_norm_sqr): message = "success" break if k == max_iters: message = "iterations_exceeded" break # Choose A_k. A_k_new = (k + 1) ** 3.0 / H_k a_k_new = A_k_new - A_k # We minimize Contracted objective plus the Bregman divergence of d, # where d(x) = 1/3||x - x_0||^3. contracted_oracle = ContractingOracle(oracle, a_k_new, A_k, x_k) d = lambda x: 1.0 / 3 * l2_norm_sqr(x - x_0) ** 1.5 d_prime = lambda x: l2_norm_sqr(x - x_0) ** 0.5 * to_dual(x - x_0) d_v_k = d(v_k) d_prime_v_k = d_prime(v_k) Bregman = lambda x: d(x) - d_v_k - d_prime_v_k.dot(x - v_k) T = np.copy(v_k) # Initial point. g_T = contracted_oracle.grad(T) Func_T = contracted_oracle.func(T) + Bregman(T) Func_T_prev = None prox_tolerance_value = \ prox_steps_tolerance_strategy.get_tolerance(k, func_k_prev, func_k) prox_steps_tolerance = \ get_tolerance({'criterion': 'grad_uniform_convex', 'p': 3.0, 'sigma': 0.5, 'tolerance': prox_tolerance_value}) # Iterations for computing the proximal step. for i in range(prox_steps_max_iters): hess_vec = lambda v: contracted_oracle.hess_vec(T, v) g = g_T - d_prime_v_k alpha = 1.0 M = 1.0 c = x_0 - T inner_tolerance_value = \ newton_steps_tolerance_strategy.get_tolerance( i, Func_T_prev, Func_T) inner_tolerance = get_tolerance( {'criterion': 'grad_uniform_convex', 'p': 3.0, 'sigma': 0.5 * M, 'tolerance': inner_tolerance_value}) T_d_k, model_T, message, hist = \ cubic_newton_step_ncg(hess_vec, g, M, alpha, c, np.zeros_like(x_k), inner_tolerance, max_iters=100, trace=True, B=B, Binv=Binv) if message != 'success': print(message, flush=True) T += T_d_k g_T = contracted_oracle.grad(T) G_T = g_T + d_prime(T) - d_prime_v_k G_T_norm_sqr = dual_norm_sqr(G_T) Func_T_prev = Func_T Func_T = contracted_oracle.func(T) + Bregman(T) if prox_steps_tolerance.stopping_condition(Func_T, G_T_norm_sqr): break v_k = T x_k = (a_k_new * v_k + A_k * x_k) / A_k_new A_k = A_k_new func_k_prev = func_k func_k = oracle.func(x_k) grad_k = oracle.grad(x_k) grad_k_norm_sqr = dual_norm_sqr(grad_k) return x_k, message, history
def cubic_newton(oracle, x_0, tolerance, max_iters=1000, H_0=1.0, line_search=False, trace=True, inner_tolerance_strategy=None, subsolver='FGM', B=None, Binv=None, stopping_criterion_subproblem='grad_uniform_convex', averaging=False): """ Newton method with cubic regularization. """ oracle = OracleCallsCounter(oracle) # Initialization. history = defaultdict(list) if trace else None start_timestamp = datetime.now() l2_norm_sqr, dual_norm_sqr, to_dual, precond = norms_init(B, Binv) if inner_tolerance_strategy is None: inner_tolerance_strategy = get_tolerance_strategy( {'strategy': 'constant', 'delta': tolerance.tolerance ** 1.5}) x_k = np.copy(x_0) func_k = oracle.func(x_k) grad_k = oracle.grad(x_k) grad_k_norm_sqr = dual_norm_sqr(grad_k) func_k_prev = None H_k = H_0 prev_total_inner_iters = 0 total_inner_iters = 0 # Main loop. for k in range(max_iters + 1): if trace: history['func'].append(func_k) history['grad_sqr_norm'].append(grad_k_norm_sqr) history['time'].append( (datetime.now() - start_timestamp).total_seconds()) history['H'].append(H_k) history['func_calls'].append(oracle.func_calls) history['grad_calls'].append(oracle.grad_calls) history['hess_calls'].append(oracle.hess_calls) history['hess_vec_calls'].append(oracle.hess_vec_calls) history['inner_iters'].append( total_inner_iters - prev_total_inner_iters) prev_total_inner_iters = total_inner_iters if tolerance.stopping_condition(func_k, grad_k_norm_sqr): message = "success" break if k == max_iters: message = "iterations_exceeded" break # Compute the direction. d_k = np.zeros_like(x_k) found = False inner_tolerance_value = \ inner_tolerance_strategy.get_tolerance(k, func_k_prev, func_k) if averaging: lambda_k = (1.0 * k / (k + 1)) ** 3 y_k = lambda_k * x_k + (1 - lambda_k) * x_0 grad_y_k = oracle.grad(y_k) func_y_k = oracle.func(y_k) else: y_k = x_k grad_y_k = grad_k func_y_k = func_k line_search_max_iter = 30 for i in range(line_search_max_iter + 1): if i == line_search_max_iter: message = "adaptive_iterations_exceeded" break if stopping_criterion_subproblem == 'func' or \ (subsolver != 'FGM' and subsolver != 'NCG'): Hess_y_k = oracle.hess(y_k) T_d_k, model_T, message = \ cubic_newton_step(grad_y_k, Hess_y_k, 0.5 * H_k, B) # Initialize the inner tolerance. if stopping_criterion_subproblem == 'func': inner_tolerance = \ get_tolerance({'criterion': 'func', 'f_star': model_T, 'tolerance': inner_tolerance_value}) elif stopping_criterion_subproblem == 'grad_uniform_convex': inner_tolerance = \ get_tolerance({'criterion': 'grad_uniform_convex', 'p': 3.0, 'sigma': 0.25 * H_k, 'tolerance': inner_tolerance_value}) elif stopping_criterion_subproblem == 'grad_norm_bound': inner_tolerance = \ get_tolerance({'criterion': 'grad_norm_bound', 'c': inner_tolerance_value}) elif stopping_criterion_subproblem == 'grad_norm_by_difference' or \ stopping_criterion_subproblem == 'grad_norm_by_oracle_grad': if stopping_criterion_subproblem == 'grad_norm_by_difference': lambda_bound = lambda T: l2_norm_sqr(T - y_k) ** 2 else: lambda_bound = lambda T: dual_norm_sqr(oracle.grad(T)) inner_tolerance = \ get_tolerance({'criterion': 'grad_norm_lambda_bound', 'lambda_bound': lambda_bound, 'c': inner_tolerance_value}) else: # Heuristic stopping criterion. inner_tolerance = \ get_tolerance({'criterion': 'grad', 'tolerance': inner_tolerance_value}) hess_vec = lambda v: oracle.hess_vec(y_k, v) if subsolver == 'FGM': T_d_k, model_T, message, hist = \ cubic_newton_step_fgm(hess_vec, grad_y_k, 0.5 * H_k, d_k, inner_tolerance, max_iters=5000, trace=True, B=B, Binv=Binv) elif subsolver == 'NCG': T_d_k, model_T, message, hist = \ cubic_newton_step_ncg(hess_vec, grad_y_k, 0.5 * H_k, 0.0, np.zeros_like(grad_y_k), d_k, inner_tolerance, max_iters=5000, trace=True, B=B, Binv=Binv) if message != "success": print('W: %s' % message, end=' ', flush=True) if subsolver == 'FGM' or subsolver == 'NCG': last_inner_iters = len(hist['func']) total_inner_iters += last_inner_iters d_k = T_d_k T = y_k + T_d_k func_T = oracle.func(T) grad_T = oracle.grad(T) grad_T_norm_sqr = dual_norm_sqr(grad_T) if not line_search: found = True break # Check condition for H_k. model_min = func_y_k + model_T if func_T <= model_min: found = True break H_k *= 2 if not found: message = "E: step_failure : " + message break if line_search: H_k *= 0.5 H_k = max(H_k, 1e-8) x_k = T grad_k = grad_T func_k_prev = func_k func_k = func_T grad_k_norm_sqr = grad_T_norm_sqr return x_k, message, history