def run_experiment(n, line_search, max_iters): print('Experiment: \t n = %d, \t line_search = %s, \t max_iters = %d.' % (n, str(line_search), max_iters)) oracle = PDifferenceOracle(3) x_star = np.zeros(n) f_star = oracle.func(x_star) x_0 = np.ones(n) H_0 = 1.0 tolerance = get_tolerance({ 'criterion': 'func', 'f_star': f_star, 'tolerance': 1e-9 }) power_strategy_1 = get_tolerance_strategy({ 'strategy': 'power', 'c': 1.0, 'alpha': 1, 'label': r'$1/k$' }) power_strategy_3 = get_tolerance_strategy({ 'strategy': 'power', 'c': 1.0, 'alpha': 3, 'label': r'$1/k^3$' }) adaptive_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1, 'label': 'adaptive' }) subsolver = 'NCG' stopping_criterion_inner = 'grad_uniform_convex' histories = [] labels = [] _, status, history_CN_power = \ cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=power_strategy_3, subsolver=subsolver, trace=True, stopping_criterion_subproblem=stopping_criterion_inner) histories.append(history_CN_power) labels.append(r'CN, $1/k^3$') _, status, history_CN_adaptive = \ cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=adaptive_strategy, subsolver=subsolver, trace=True, stopping_criterion_subproblem=stopping_criterion_inner) histories.append(history_CN_adaptive) labels.append('CN, adaptive') _, status, history_CN_averaging = \ cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=power_strategy_3, subsolver=subsolver, trace=True, stopping_criterion_subproblem=stopping_criterion_inner, averaging=True) histories.append(history_CN_averaging) labels.append(r'Averaging, $1/k^3$') if not line_search: _, status, history_CN_contracting = \ contracting_cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, prox_steps_tolerance_strategy= power_strategy_1, newton_steps_tolerance_strategy= power_strategy_1, trace=True) histories.append(history_CN_contracting) labels.append(r'Contracting') filename = os.getcwd() + '/plots/averaging_%d' % n title = r'$n = %d$' % n if line_search: filename += '_ls' title += ', line search' plot_func_residual(histories, None, f_star, labels, ['blue', 'red', 'tab:green', 'tab:purple'], ['-.', '-', '-', ':'], [3, 2, 5, 5], [0.6, 1, 0.8, 0.8], title, 'Iterations', filename=filename + '.pdf', figsize=(5.5, 5))
def run_experiment(dataset_filename, name, max_iters): print('Experiment: \t %s, \t file: %s, \t max_iters = %d.' % (name, dataset_filename, max_iters)) X, y = load_svmlight_file(dataset_filename) oracle = create_log_reg_oracle(X, y, 1 / X.shape[0]) x_0 = np.zeros(X.shape[1]) print('Minimize by scipy ... ', flush=True, end='') f_star = \ scipy.optimize.minimize(oracle.func, x_0, jac=oracle.grad, tol=1e-9).fun print('f_star = %g.' % f_star) H_0 = 1.0 line_search = True tolerance = get_tolerance({'criterion': 'func', 'f_star': f_star, 'tolerance': 1e-8}) subsolver = 'FGM' stopping_criterion_subproblem = 'grad_uniform_convex' constant_strategies = get_constant_strategies() power_strategies = get_power_strategies() adaptive_strategy = get_tolerance_strategy({'strategy': 'adaptive', 'c': 1.0, 'alpha': 1, 'label': 'adaptive'}) strategies_1 = constant_strategies + [adaptive_strategy] strategies_2 = power_strategies + [adaptive_strategy] method = lambda strategy: cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=strategy, subsolver=subsolver, trace=True, B=None, Binv=None, stopping_criterion_subproblem= stopping_criterion_subproblem) labels_1 = get_labels(strategies_1) histories_1 = run_method(method, strategies_1, labels_1) filename = os.getcwd() + '/plots/logreg_%s_time' % (name) plot_func_residual(histories_1, 'time', f_star, labels_1, ['grey', 'grey', 'grey', 'grey', 'red'], ['-', '--', '-.', ':', '-'], [5, 4, 3, 4, 2], [0.8, 0.8, 0.8, 0.8, 1], 'Log-reg: %s' % name, 'Time, s', filename=filename+'_const.pdf') labels_2 = get_labels(strategies_2) histories_2 = run_method(method, strategies_2, labels_2) plot_func_residual(histories_2, 'time', f_star, labels_2, ['blue', 'blue', 'blue', 'blue', 'red'], ['-', '--', '-.', ':', '-'], [5, 4, 3, 2, 2], [0.6, 0.6, 0.6, 0.6, 1], 'Log-reg: %s' % name, 'Time, s', filename=filename+'_powers.pdf')
def create_display_boundaries(pg_cur, settings): # Step 3 of 3 : create web optimised versions of the census boundaries start_time = datetime.now() # create schema if settings['web_schema'] != "public": pg_cur.execute( "CREATE SCHEMA IF NOT EXISTS {0} AUTHORIZATION {1}".format( settings['web_schema'], settings['pg_user'])) # prepare boundaries for all tiled map zoom levels create_sql_list = list() insert_sql_list = list() vacuum_sql_list = list() for boundary_dict in settings['bdy_table_dicts']: boundary_name = boundary_dict["boundary"] if boundary_name != "mb": id_field = boundary_dict["id_field"] name_field = boundary_dict["name_field"] area_field = boundary_dict["area_field"] input_pg_table = "{0}_{1}_aust".format(boundary_name, settings["census_year"]) pg_table = "{0}".format(boundary_name) # build create table statement create_table_list = list() create_table_list.append("DROP TABLE IF EXISTS {0}.{1} CASCADE;") create_table_list.append("CREATE TABLE {0}.{1} (") # build column list column_list = list() column_list.append("id text NOT NULL PRIMARY KEY") column_list.append("name text NOT NULL") column_list.append("area double precision NOT NULL") column_list.append("population double precision NOT NULL") column_list.append("geom geometry(MultiPolygon, 4283) NULL") for zoom_level in range(4, 18): display_zoom = str(zoom_level).zfill(2) column_list.append( "geojson_{0} jsonb NOT NULL".format(display_zoom)) # add columns to create table statement and finish it create_table_list.append(",".join(column_list)) create_table_list.append(") WITH (OIDS=FALSE);") create_table_list.append("ALTER TABLE {0}.{1} OWNER TO {2};") create_table_list.append( "CREATE INDEX {1}_geom_idx ON {0}.{1} USING gist (geom);") create_table_list.append( "ALTER TABLE {0}.{1} CLUSTER ON {1}_geom_idx") sql = "".join(create_table_list).format(settings['web_schema'], pg_table, settings['pg_user']) create_sql_list.append(sql) # get population field and table if boundary_name[:1] == "i": pop_stat = settings['indigenous_population_stat'] pop_table = settings['indigenous_population_table'] else: pop_stat = settings['population_stat'] pop_table = settings['population_table'] # print(boundary_name) # print(pop_stat + " - " + pop_table) # build insert statement insert_into_list = list() insert_into_list.append("INSERT INTO {0}.{1}".format( settings['web_schema'], pg_table)) insert_into_list.append( "SELECT bdy.{0} AS id, {1} AS name, SUM(bdy.{2}) AS area, tab.{3} AS population," .format(id_field, name_field, area_field, pop_stat)) # thin geometry to make querying faster tolerance = utils.get_tolerance(10) insert_into_list.append( "ST_Transform(ST_Multi(ST_Union(ST_SimplifyVW(" "ST_Transform(geom, 3577), {0}))), 4283),".format(tolerance, )) # create statements for geojson optimised for each zoom level geojson_list = list() for zoom_level in range(4, 18): # thin geometries to a default tolerance per zoom level tolerance = utils.get_tolerance(zoom_level) # trim coords to only the significant ones decimal_places = utils.get_decimal_places(zoom_level) geojson_list.append( "ST_AsGeoJSON(ST_Transform(ST_Multi(ST_Union(ST_SimplifyVW(ST_Transform(" "bdy.geom, 3577), {0}))), 4283), {1})::jsonb".format( tolerance, decimal_places)) insert_into_list.append(",".join(geojson_list)) insert_into_list.append("FROM {0}.{1} AS bdy".format( settings['boundary_schema'], input_pg_table)) insert_into_list.append("INNER JOIN {0}.{1}_{2} AS tab".format( settings['data_schema'], boundary_name, pop_table)) insert_into_list.append("ON bdy.{0} = tab.{1}".format( id_field, settings["region_id_field"])) insert_into_list.append("WHERE bdy.geom IS NOT NULL") insert_into_list.append("GROUP BY {0}, {1}, {2}".format( id_field, name_field, pop_stat)) sql = " ".join(insert_into_list) insert_sql_list.append(sql) vacuum_sql_list.append("VACUUM ANALYZE {0}.{1}".format( settings['web_schema'], pg_table)) # print("\n".join(insert_sql_list)) utils.multiprocess_list("sql", create_sql_list, settings, logger) utils.multiprocess_list("sql", insert_sql_list, settings, logger) utils.multiprocess_list("sql", vacuum_sql_list, settings, logger) logger.info( "\t- Step 3 of 3 : web optimised boundaries created : {0}".format( datetime.now() - start_time))
def run_experiment(n, mu, max_iters): print('Experiment: \t n = %d, \t mu = %g, \t max_iters = %d.' % (n, mu, max_iters)) oracle, x_star, f_star, B, Binv = generate_logsumexp(n, mu) x_0 = np.ones(n) H_0 = 1.0 line_search = False tolerance = get_tolerance({ 'criterion': 'func', 'f_star': f_star, 'tolerance': 1e-8 }) subsolver = 'FGM' stopping_criterion_subproblem = 'grad_uniform_convex' constant_strategies = get_constant_strategies() power_strategies = get_power_strategies() adaptive_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1, 'label': 'adaptive' }) strategies_1 = constant_strategies + [adaptive_strategy] strategies_2 = power_strategies + [adaptive_strategy] method = lambda strategy: cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=strategy, subsolver=subsolver, trace=True, B=B, Binv=Binv, stopping_criterion_subproblem= stopping_criterion_subproblem) labels_1 = get_labels(strategies_1) histories_1 = run_method(method, strategies_1, labels_1) mu_str = ('%g' % mu)[2:] filename = os.getcwd() + '/plots/logsumexp_%d_%s_time' % (n, mu_str) plot_func_residual(histories_1, 'time', f_star, labels_1, ['grey', 'grey', 'grey', 'grey', 'red'], ['-', '--', '-.', ':', '-'], [5, 4, 3, 4, 2], [0.8, 0.8, 0.8, 0.8, 1], r'Log-sum-exp, $\mu = %g$' % mu, 'Time, s', filename=filename + '_const.pdf') labels_2 = get_labels(strategies_2) histories_2 = run_method(method, strategies_2, labels_2) plot_func_residual(histories_2, 'time', f_star, labels_2, ['blue', 'blue', 'blue', 'blue', 'red'], ['-', '--', '-.', ':', '-'], [5, 4, 3, 2, 2], [0.6, 0.6, 0.6, 0.6, 1], r'Log-sum-exp, $\mu = %g$' % mu, 'Time, s', filename=filename + '_powers.pdf')
def run_experiment(n, mu, max_iters): print('Experiment: \t n = %d, \t mu = %g, \t max_iters = %d.' % (n, mu, max_iters)) oracle, x_star, f_star, B, Binv = generate_logsumexp(n, mu) x_0 = np.ones(n) H_0 = 1.0 line_search = True tolerance = get_tolerance({ 'criterion': 'func', 'f_star': f_star, 'tolerance': 1e-8 }) subsolver = 'FGM' stopping_criterion_subproblem = 'func' constant_strategies = get_constant_strategies() power_strategies = get_power_strategies() adaptive_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1, 'label': 'adaptive' }) adaptive_15_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1.5, 'label': r'adaptive $1.5$' }) adaptive_2_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 2, 'label': r'adaptive $2$' }) strategies_1 = constant_strategies strategies_2 = power_strategies + [constant_strategies[-1]] strategies_3 = [ adaptive_strategy, adaptive_15_strategy, adaptive_2_strategy, constant_strategies[-1] ] method = lambda strategy: cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=strategy, subsolver=subsolver, trace=True, B=B, Binv=Binv, stopping_criterion_subproblem= stopping_criterion_subproblem) mu_str = ('%g' % mu)[2:] filename = os.getcwd() + '/plots/exact_logsumexp_%d_%s' % (n, mu_str) labels_1 = get_labels(strategies_1) histories_1 = run_method(method, strategies_1, labels_1) plot_func_residual_iter(histories_1, 'hess_vec_calls', f_star, labels_1, ['grey', 'grey', 'grey', 'grey'], ['-', '--', '-.', ':'], [5, 4, 3, 4], [1, 1, 1, 1], r'Log-sum-exp, $\mu = %g$: constant strategies' % mu, 'Hessian-vector products', filename=filename + '_const.pdf') labels_2 = get_labels(strategies_2) histories_2 = run_method(method, strategies_2, labels_2) plot_func_residual_iter(histories_2, 'hess_vec_calls', f_star, labels_2, ['blue', 'blue', 'blue', 'blue', 'gray'], ['-', '--', '-.', ':', ':'], [5, 4, 3, 2, 4], [0.6, 0.6, 0.6, 0.6, 0.8], r'Log-sum-exp, $\mu = %g$: dynamic strategies' % mu, 'Hessian-vector products', filename=filename + '_power.pdf') labels_3 = get_labels(strategies_3) histories_3 = run_method(method, strategies_3, labels_3) plot_func_residual_iter( histories_3, 'hess_vec_calls', f_star, labels_3, ['red', 'tab:orange', 'tab:orange', 'gray'], ['-', '--', '-.', ':'], [2, 4, 2, 4], [1, 1, 1, 0.8], r'Log-sum-exp, $\mu = %g$: adaptive strategies' % mu, 'Hessian-vector products', filename=filename + '_adaptive.pdf')
def run_experiment(dataset_filename, name, max_iters): print('Experiment: \t %s, \t file: %s, \t max_iters = %d.' % (name, dataset_filename, max_iters)) X, y = load_svmlight_file(dataset_filename) oracle = create_log_reg_oracle(X, y, 1 / X.shape[0]) x_0 = np.zeros(X.shape[1]) print('Minimize by scipy ... ', flush=True, end='') f_star = \ scipy.optimize.minimize(oracle.func, x_0, jac=oracle.grad, tol=1e-9).fun print('f_star = %g.' % f_star) H_0 = 1.0 line_search = True tolerance = get_tolerance({ 'criterion': 'func', 'f_star': f_star, 'tolerance': 1e-8 }) subsolver = 'FGM' stopping_criterion_subproblem = 'func' constant_strategies = get_constant_strategies() power_strategies = get_power_strategies() adaptive_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1, 'label': 'adaptive' }) adaptive_15_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1.5, 'label': r'adaptive $1.5$' }) adaptive_2_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 2, 'label': r'adaptive $2$' }) strategies_1 = constant_strategies strategies_2 = power_strategies + [constant_strategies[-1]] strategies_3 = [ adaptive_strategy, adaptive_15_strategy, adaptive_2_strategy, constant_strategies[-1] ] method = lambda strategy: cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=strategy, subsolver=subsolver, trace=True, B=None, Binv=None, stopping_criterion_subproblem= stopping_criterion_subproblem) filename = os.getcwd() + '/plots/exact_logreg_%s' % (name) labels_1 = get_labels(strategies_1) histories_1 = run_method(method, strategies_1, labels_1) plot_func_residual_iter(histories_1, 'hess_vec_calls', f_star, labels_1, ['grey', 'grey', 'grey', 'grey'], ['-', '--', '-.', ':'], [5, 4, 3, 4], [1, 1, 1, 1], r'Log-reg, %s: constant strategies' % name, 'Hessian-vector products', filename=filename + '_const.pdf') labels_2 = get_labels(strategies_2) histories_2 = run_method(method, strategies_2, labels_2) plot_func_residual_iter(histories_2, 'hess_vec_calls', f_star, labels_2, ['blue', 'blue', 'blue', 'blue', 'gray'], ['-', '--', '-.', ':', ':'], [5, 4, 3, 2, 4], [0.6, 0.6, 0.6, 0.6, 0.8], r'Log-reg, %s: dynamic strategies' % name, 'Hessian-vector products', filename=filename + '_power.pdf') labels_3 = get_labels(strategies_3) histories_3 = run_method(method, strategies_3, labels_3) plot_func_residual_iter(histories_3, 'hess_vec_calls', f_star, labels_3, ['red', 'tab:orange', 'tab:orange', 'gray'], ['-', '--', '-.', ':'], [2, 4, 2, 4], [1, 1, 1, 0.8], r'Log-reg, %s: adaptive strategies' % name, 'Hessian-vector products', filename=filename + '_adaptive.pdf')
def contracting_cubic_newton(oracle, x_0, tolerance, max_iters=1000, H_0=1.0, trace=True, prox_steps_max_iters=None, prox_steps_tolerance_strategy=None, newton_steps_tolerance_strategy=None, B=None, Binv=None): """ Accelerated Cubic Newton, using contracted proximal iterations. """ oracle = OracleCallsCounter(oracle) # Initialization. history = defaultdict(list) if trace else None start_timestamp = datetime.now() l2_norm_sqr, dual_norm_sqr, to_dual, precond = norms_init(B, Binv) if prox_steps_tolerance_strategy is None: prox_steps_tolerance_strategy = get_tolerance_strategy( {'strategy': 'power', 'c': 1.0, 'alpha': 1}) if newton_steps_tolerance_strategy is None: newton_steps_tolerance_strategy = get_tolerance_strategy( {'strategy': 'power', 'c': 1.0, 'alpha': 1}) if prox_steps_max_iters is None: prox_steps_max_iters = 10 x_k = np.copy(x_0) v_k = np.copy(x_0) func_k = oracle.func(x_k) grad_k = oracle.grad(x_k) grad_k_norm_sqr = dual_norm_sqr(grad_k) func_k_prev = None H_k = H_0 A_k = 0.0 # Main loop. for k in range(max_iters + 1): if trace: history['func'].append(func_k) history['grad_sqr_norm'].append(grad_k_norm_sqr) history['time'].append( (datetime.now() - start_timestamp).total_seconds()) history['H'].append(H_k) history['func_calls'].append(oracle.func_calls) history['grad_calls'].append(oracle.grad_calls) history['hess_calls'].append(oracle.hess_calls) history['hess_vec_calls'].append(oracle.hess_vec_calls) if tolerance.stopping_condition(func_k, grad_k_norm_sqr): message = "success" break if k == max_iters: message = "iterations_exceeded" break # Choose A_k. A_k_new = (k + 1) ** 3.0 / H_k a_k_new = A_k_new - A_k # We minimize Contracted objective plus the Bregman divergence of d, # where d(x) = 1/3||x - x_0||^3. contracted_oracle = ContractingOracle(oracle, a_k_new, A_k, x_k) d = lambda x: 1.0 / 3 * l2_norm_sqr(x - x_0) ** 1.5 d_prime = lambda x: l2_norm_sqr(x - x_0) ** 0.5 * to_dual(x - x_0) d_v_k = d(v_k) d_prime_v_k = d_prime(v_k) Bregman = lambda x: d(x) - d_v_k - d_prime_v_k.dot(x - v_k) T = np.copy(v_k) # Initial point. g_T = contracted_oracle.grad(T) Func_T = contracted_oracle.func(T) + Bregman(T) Func_T_prev = None prox_tolerance_value = \ prox_steps_tolerance_strategy.get_tolerance(k, func_k_prev, func_k) prox_steps_tolerance = \ get_tolerance({'criterion': 'grad_uniform_convex', 'p': 3.0, 'sigma': 0.5, 'tolerance': prox_tolerance_value}) # Iterations for computing the proximal step. for i in range(prox_steps_max_iters): hess_vec = lambda v: contracted_oracle.hess_vec(T, v) g = g_T - d_prime_v_k alpha = 1.0 M = 1.0 c = x_0 - T inner_tolerance_value = \ newton_steps_tolerance_strategy.get_tolerance( i, Func_T_prev, Func_T) inner_tolerance = get_tolerance( {'criterion': 'grad_uniform_convex', 'p': 3.0, 'sigma': 0.5 * M, 'tolerance': inner_tolerance_value}) T_d_k, model_T, message, hist = \ cubic_newton_step_ncg(hess_vec, g, M, alpha, c, np.zeros_like(x_k), inner_tolerance, max_iters=100, trace=True, B=B, Binv=Binv) if message != 'success': print(message, flush=True) T += T_d_k g_T = contracted_oracle.grad(T) G_T = g_T + d_prime(T) - d_prime_v_k G_T_norm_sqr = dual_norm_sqr(G_T) Func_T_prev = Func_T Func_T = contracted_oracle.func(T) + Bregman(T) if prox_steps_tolerance.stopping_condition(Func_T, G_T_norm_sqr): break v_k = T x_k = (a_k_new * v_k + A_k * x_k) / A_k_new A_k = A_k_new func_k_prev = func_k func_k = oracle.func(x_k) grad_k = oracle.grad(x_k) grad_k_norm_sqr = dual_norm_sqr(grad_k) return x_k, message, history
def cubic_newton(oracle, x_0, tolerance, max_iters=1000, H_0=1.0, line_search=False, trace=True, inner_tolerance_strategy=None, subsolver='FGM', B=None, Binv=None, stopping_criterion_subproblem='grad_uniform_convex', averaging=False): """ Newton method with cubic regularization. """ oracle = OracleCallsCounter(oracle) # Initialization. history = defaultdict(list) if trace else None start_timestamp = datetime.now() l2_norm_sqr, dual_norm_sqr, to_dual, precond = norms_init(B, Binv) if inner_tolerance_strategy is None: inner_tolerance_strategy = get_tolerance_strategy( {'strategy': 'constant', 'delta': tolerance.tolerance ** 1.5}) x_k = np.copy(x_0) func_k = oracle.func(x_k) grad_k = oracle.grad(x_k) grad_k_norm_sqr = dual_norm_sqr(grad_k) func_k_prev = None H_k = H_0 prev_total_inner_iters = 0 total_inner_iters = 0 # Main loop. for k in range(max_iters + 1): if trace: history['func'].append(func_k) history['grad_sqr_norm'].append(grad_k_norm_sqr) history['time'].append( (datetime.now() - start_timestamp).total_seconds()) history['H'].append(H_k) history['func_calls'].append(oracle.func_calls) history['grad_calls'].append(oracle.grad_calls) history['hess_calls'].append(oracle.hess_calls) history['hess_vec_calls'].append(oracle.hess_vec_calls) history['inner_iters'].append( total_inner_iters - prev_total_inner_iters) prev_total_inner_iters = total_inner_iters if tolerance.stopping_condition(func_k, grad_k_norm_sqr): message = "success" break if k == max_iters: message = "iterations_exceeded" break # Compute the direction. d_k = np.zeros_like(x_k) found = False inner_tolerance_value = \ inner_tolerance_strategy.get_tolerance(k, func_k_prev, func_k) if averaging: lambda_k = (1.0 * k / (k + 1)) ** 3 y_k = lambda_k * x_k + (1 - lambda_k) * x_0 grad_y_k = oracle.grad(y_k) func_y_k = oracle.func(y_k) else: y_k = x_k grad_y_k = grad_k func_y_k = func_k line_search_max_iter = 30 for i in range(line_search_max_iter + 1): if i == line_search_max_iter: message = "adaptive_iterations_exceeded" break if stopping_criterion_subproblem == 'func' or \ (subsolver != 'FGM' and subsolver != 'NCG'): Hess_y_k = oracle.hess(y_k) T_d_k, model_T, message = \ cubic_newton_step(grad_y_k, Hess_y_k, 0.5 * H_k, B) # Initialize the inner tolerance. if stopping_criterion_subproblem == 'func': inner_tolerance = \ get_tolerance({'criterion': 'func', 'f_star': model_T, 'tolerance': inner_tolerance_value}) elif stopping_criterion_subproblem == 'grad_uniform_convex': inner_tolerance = \ get_tolerance({'criterion': 'grad_uniform_convex', 'p': 3.0, 'sigma': 0.25 * H_k, 'tolerance': inner_tolerance_value}) elif stopping_criterion_subproblem == 'grad_norm_bound': inner_tolerance = \ get_tolerance({'criterion': 'grad_norm_bound', 'c': inner_tolerance_value}) elif stopping_criterion_subproblem == 'grad_norm_by_difference' or \ stopping_criterion_subproblem == 'grad_norm_by_oracle_grad': if stopping_criterion_subproblem == 'grad_norm_by_difference': lambda_bound = lambda T: l2_norm_sqr(T - y_k) ** 2 else: lambda_bound = lambda T: dual_norm_sqr(oracle.grad(T)) inner_tolerance = \ get_tolerance({'criterion': 'grad_norm_lambda_bound', 'lambda_bound': lambda_bound, 'c': inner_tolerance_value}) else: # Heuristic stopping criterion. inner_tolerance = \ get_tolerance({'criterion': 'grad', 'tolerance': inner_tolerance_value}) hess_vec = lambda v: oracle.hess_vec(y_k, v) if subsolver == 'FGM': T_d_k, model_T, message, hist = \ cubic_newton_step_fgm(hess_vec, grad_y_k, 0.5 * H_k, d_k, inner_tolerance, max_iters=5000, trace=True, B=B, Binv=Binv) elif subsolver == 'NCG': T_d_k, model_T, message, hist = \ cubic_newton_step_ncg(hess_vec, grad_y_k, 0.5 * H_k, 0.0, np.zeros_like(grad_y_k), d_k, inner_tolerance, max_iters=5000, trace=True, B=B, Binv=Binv) if message != "success": print('W: %s' % message, end=' ', flush=True) if subsolver == 'FGM' or subsolver == 'NCG': last_inner_iters = len(hist['func']) total_inner_iters += last_inner_iters d_k = T_d_k T = y_k + T_d_k func_T = oracle.func(T) grad_T = oracle.grad(T) grad_T_norm_sqr = dual_norm_sqr(grad_T) if not line_search: found = True break # Check condition for H_k. model_min = func_y_k + model_T if func_T <= model_min: found = True break H_k *= 2 if not found: message = "E: step_failure : " + message break if line_search: H_k *= 0.5 H_k = max(H_k, 1e-8) x_k = T grad_k = grad_T func_k_prev = func_k func_k = func_T grad_k_norm_sqr = grad_T_norm_sqr return x_k, message, history