def check_log_reg(oracle_type, sparse=False): # Simple data: A = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) if sparse: A = scipy.sparse.csr_matrix(A) b = np.array([1, 1, -1, 1]) reg_coef = 0.5 # Logistic regression oracle: logreg = oracles.create_log_reg_oracle(A, b, reg_coef, oracle_type=oracle_type) # Check at point x = [0, 0] x = np.zeros(2) assert_almost_equal(logreg.func(x), 0.693147180) ok_(np.allclose(logreg.grad(x), [0, -0.25])) ok_(np.allclose(logreg.hess(x), [[0.625, 0.0625], [0.0625, 0.625]])) ok_(isinstance(logreg.grad(x), np.ndarray)) ok_(isinstance(logreg.hess(x), np.ndarray)) # Check func_direction and grad_direction oracles at # x = [0, 0], d = [1, 1], alpha = 0.5 and 1.0 x = np.zeros(2) d = np.ones(2) assert_almost_equal(logreg.func_directional(x, d, alpha=0.5), 0.7386407091095) assert_almost_equal(logreg.grad_directional(x, d, alpha=0.5), 0.4267589549159) assert_almost_equal(logreg.func_directional(x, d, alpha=1.0), 1.1116496416598) assert_almost_equal(logreg.grad_directional(x, d, alpha=1.0), 1.0559278283039)
def second_experiment(): data_path = 'data/gisette_scale' result_path = lambda x: 'experiment_2/grad_norm-vs-{}'.format(x) A, b = load_svmlight_file(data_path) m, n = A.shape oracle = oracles.create_log_reg_oracle(A, b, 1 / m) results = [] ls = [0, 1, 5, 10, 50, 100] for l in ls: _, _, history = optimization.lbfgs(oracle, np.zeros(n), memory_size=l, trace=True) print('lbfgs with l = {} finished'.format(l)) grad_norm = np.array(history['grad_norm']) grad_norm /= grad_norm[0] grad_norm = np.power(grad_norm, 2) grad_norm = np.log(grad_norm) results.append((l, grad_norm, history['time'])) def plotting(flag): plt.figure(figsize=(12, 8)) for l, grad_norm, times in results: x = list(range(len(grad_norm))) if flag == 'iterations' else times plt.plot(x, grad_norm, label='history size = {}'.format(l)) plt.xlabel('iterations' if flag == 'iterations' else 'seconds') plt.ylabel(r'$\log\left(grad\_norm\right)$') plt.legend() plt.grid() plt.savefig(result_path(flag)) plotting('seconds') plotting('iterations')
def plot_results(dataset): """ Plots function values dependency and squared norm of gradient on time for gradient descent and newton optimization methods :param dataset: One of 'w8a', 'gissete', 'real-sim' :return: """ available_datasets = ['w8a', 'gissete', 'real-sim'] if dataset not in available_datasets: raise ValueError( "Dataset {0} currently is not supported. Available datasets are: {1}" .format(dataset, ' '.join(available_datasets))) A, b = load_svmlight_file('./data/{}'.format(dataset)) oracle = oracles.create_log_reg_oracle(A, b, 1 / len(b)) x_init = np.zeros(A.shape[1]) [_, _, history_grad] = optimization.gradient_descent(oracle, x_init, line_search_options={ 'method': 'Wolfe', 'c': 1 }, trace=True) [_, _, history_newton] = optimization.newton(oracle, x_init, line_search_options={ 'method': 'Wolfe', 'c': 1 }, trace=True) plot_function_values_on_time(dataset, history_grad, history_newton) plot_grad_norm_values_on_time(dataset, history_grad, history_newton)
def experiment_3(): np.random.seed(31415) data_path = "data" datasets = ["w8a", "gisette_scale", "real-sim", "news20", "rcv1_train"] for dataset in datasets: print("___________________________") logging.info(f"{dataset} is in process...") A, b = load_svmlight_file(os.path.join(data_path, dataset)) print(A.shape, 1 - A.size / (A.shape[0] * A.shape[1])) m = b.size oracle = create_log_reg_oracle(A, b, 1.0 / m, "optimized") x_0 = np.zeros((A.shape[1],)) x_opt1, message, history1 = gradient_descent(oracle, x_0, trace=True) logging.info("GD ended") x_opt2, message, history2 = hessian_free_newton(oracle, x_0, trace=True) logging.info("HFN ended") x_opt3, message, history3 = lbfgs(oracle, x_0, trace=True) logging.info("L-BFGS ended") os.makedirs("report/pics/3", exist_ok=True) plt.figure() plt.plot(history1['func'], label='GD') plt.plot(history2['func'], label='HFN') plt.plot(history3['func'], label='L-BFGS') print(f"GD iterations={len(history1['func'])}, time={history1['time'][-1]}") print(f"HFN iterations={len(history2['func'])}, time={history2['time'][-1]}") print(f"L-BFGS iterations={len(history3['func'])}, time={history3['time'][-1]}") plt.xlabel('Номер итерации') plt.ylabel('Значение функции потерь') plt.legend() plt.grid() plt.savefig(f"report/pics/3/logreg_loss_value_vs_iter_{dataset}.pdf", bbox_inches='tight') plt.figure() plt.plot(history1['time'], history1['func'], label='GD') plt.plot(history2['time'], history2['func'], label='HFN') plt.plot(history3['time'], history3['func'], label='L-BFGS') plt.xlabel('Время от начала эксперимента в секундах') plt.ylabel('Значение функции потерь') plt.legend() plt.grid() plt.savefig(f"report/pics/3/logreg_loss_value_vs_time_{dataset}.pdf", bbox_inches='tight') plt.figure() plt.plot(history1['time'], (history1['grad_norm'] / history1['grad_norm'][0]) ** 2, label='GD') plt.plot(history2['time'], (history2['grad_norm'] / history2['grad_norm'][0]) ** 2, label='HFN') plt.plot(history3['time'], (history3['grad_norm'] / history3['grad_norm'][0]) ** 2, label='L-BFGS') plt.yscale('log') plt.xlabel('Время от начала эксперимента в секундах') plt.ylabel('Относительный квадрат нормы градиента') plt.legend() plt.grid() plt.savefig(f"report/pics/3/logreg_grad_norm_vs_time_{dataset}.pdf", bbox_inches='tight')
def experiment_3(): np.random.seed(31415) m, n = 10000, 8000 A = np.random.randn(m, n) b = np.sign(np.random.randn(m)) regcoef = 1 / m oracle1 = create_log_reg_oracle(A, b, regcoef, oracle_type='usual') oracle2 = create_log_reg_oracle(A, b, regcoef, oracle_type='optimized') x_0 = np.zeros((n, )) x_opt1, message, history1 = gradient_descent(oracle1, x_0, trace=True) x_opt2, message, history2 = gradient_descent(oracle2, x_0, trace=True) print(x_opt1, x_opt2) plt.figure() plt.plot(history1['func'], label='Usual') plt.plot(history2['func'], label='Optimized') plt.xlabel('Номер итерации') plt.ylabel('Значение функции потерь') plt.legend() plt.grid() plt.savefig("pics/logreg_values") plt.figure() plt.plot(history1['time'], history1['func'], label='Usual') plt.plot(history2['time'], history2['func'], label='Optimized') plt.xlabel('Время от начала эксперимента в секундах') plt.ylabel('Значение функции потерь') plt.legend() plt.grid() plt.savefig("pics/logreg_loss_value_vs_time") plt.figure() plt.plot(history1['time'], 2 * np.log((history1['grad_norm'] / history1['grad_norm'][0])), label='Usual') plt.plot(history2['time'], 2 * np.log((history2['grad_norm'] / history2['grad_norm'][0])), label='Optimized') plt.xlabel('Время от начала эксперимента в секундах') plt.ylabel('Логарифм относительного квадрата нормы градиента') plt.legend() plt.grid() plt.savefig("pics/logreg_grad_norm_vs_time")
def check_lyoha(): B = np.array(range(1, 10)).reshape(3, 3) # A = [email protected] A = B.dot(B.T) b = np.array(range(1, 4)) oracle = create_log_reg_oracle(A, b, 1.0, oracle_type='optimized') x0 = np.zeros(3) x_expected = np.array([0.01081755, 0.02428744, 0.03775733]) hist_expected = {'func': [0.69314718055994518, 0.060072133470449901, 0.020431219493905504], 'time': [0.0] * 3, 'grad_norm': [176.70314088889307, 3.295883082719103, 1.101366262174557]} x_star, msg, history = hessian_free_newton(oracle, x0, trace=True, line_search_options={'method': 'Wolfe'}, tolerance=1e-4) oracle.hess(np.zeros(3)) oracle.hess(np.array([0.01952667, 0.04648169, 0.07343672])) pass
def main(): danger = [] for _ in range(10): A = np.random.uniform(0, 1000, (5, 5)) b = np.random.uniform(0, 1000, 5) regcoef = np.random.uniform(0, 100, 1) oracle = oracles.create_log_reg_oracle(A, b, regcoef) diffs = [] for i in range(100): x = np.random.uniform(0, 100, 5) v = np.random.uniform(0, 100, 5) hess_vec_finite = oracles.hess_vec_finite_diff(oracle.func, x, v) hess_vec_oracle = oracle.hess_vec(x, v) diff = np.abs(hess_vec_finite - hess_vec_oracle) if max(diff) > 1: danger.append((A, b, regcoef, x, v)) diffs.append(max(diff)) print(max(diffs)) print(len(danger))
def main(): A = np.random.uniform(0, 10, (5, 5)) b = np.random.uniform(0, 10, 5) regcoef = np.random.uniform(0, 10, 1) oracle = oracles.create_log_reg_oracle(A, b, regcoef) print(A) print(b) print(regcoef) for i in range(10): x = np.random.uniform(0, 10, 5) grad_oracle = oracle.grad(x) hess_oracle = oracle.hess(x) grad_finite = oracles.grad_finite_diff(oracle.func, x) hess_finite = oracles.hess_finite_diff(oracle.func, x) diff_grad = np.abs(grad_finite - grad_oracle) diff_hess = np.abs(hess_finite - hess_oracle) #print(i) #print(grad_oracle) #print(grad_finite) print(np.max(diff_hess))
def check_hess_vec(): m, n = 1000, 500 A = np.random.randn(m, n) b = np.sign(np.random.randn(m)) regcoef = 1 / m x = np.random.randn(n) v = np.random.randn(n) logreg_oracle = create_log_reg_oracle(A, b, regcoef, oracle_type='optimized') v1 = logreg_oracle.hess_vec(x, v) v2 = hess_vec_finite_diff(logreg_oracle.func, x, v, eps=1e-6) res = np.allclose(v1, v2, atol=1e-2, rtol=1e-1) print(v1[:10]) print(v2[:10]) if res: print("Logreg hess_vec is OK!") else: print("Something wrong.") return res
def experiment_4(): path = 'data' np.random.seed(31415) datasets = ["w8a", "gisette_scale", "real-sim"] for dataset in datasets: A, b = load_svmlight_file(path + '/' + dataset) m = b.size oracle = create_log_reg_oracle(A, b, 1.0 / m, "optimized") x_0 = np.zeros((A.shape[1], )) # x_opt1, message, history1 = gradient_descent(oracle, x_0, trace=True) if dataset != 'real-sim': x_opt2, message, history2 = newton(oracle, x_0, trace=True) print(len(history2['time']), history2['time'][-1]) continue plt.figure() plt.plot(history1['time'], history1['func'], label='GD') if dataset != 'real-sim': plt.plot(history2['time'], history2['func'], label='Newton') plt.xlabel('Время от начала эксперимента в секундах') plt.ylabel('Значение функции потерь') plt.legend() plt.grid() plt.savefig("pics/logreg_loss_value_vs_time_" + dataset) plt.figure() plt.plot(history1['time'], 2 * np.log( (history1['grad_norm'] / history1['grad_norm'][0])), label='GD') if dataset != 'real-sim': plt.plot(history2['time'], 2 * np.log( (history2['grad_norm'] / history2['grad_norm'][0])), label='Newton') plt.xlabel('Время от начала эксперимента в секундах') plt.ylabel('Логарифм относительного квадрата нормы градиента') plt.legend() plt.grid() plt.savefig("pics/logreg_grad_norm_vs_time_" + dataset)
def experiment_2(): np.random.seed(31415) A, b = load_svmlight_file('data/gisette_scale') m = b.size oracle = create_log_reg_oracle(A, b, 1.0 / m, "optimized") x_0 = np.zeros((A.shape[1],)) fig1, ax1 = plt.subplots() fig2, ax2 = plt.subplots() ax1.set_yscale('log') ax1.set_xlabel('Номер итерации') ax1.set_ylabel('Относительный квадрат нормы градиента') ax1.grid() ax2.set_yscale('log') ax2.set_xlabel('Время от начала эксперимента') ax2.set_ylabel('Относительный квадрат нормы градиента') ax2.grid() for memory_size in tqdm([0, 1, 5, 10, 50, 100]): x_opt, message, history = lbfgs(oracle, x_0, trace=True, memory_size=memory_size) relative_grad_norms = (history['grad_norm'] / history['grad_norm'][0]) ** 2 iter_times = history['time'] iters = range(len(history['time'])) if len(relative_grad_norms) > 400: relative_grad_norms = [elem for (i, elem) in enumerate(relative_grad_norms) if i % 2 == 0] iter_times = [elem for (i, elem) in enumerate(iter_times) if i % 2 == 0] iters = range(0, len(history['time']), 2) ax1.plot(iters, relative_grad_norms, label=f"l={memory_size}") ax2.plot(iter_times, relative_grad_norms, label=f"l={memory_size}") print(f"При l={memory_size} до сходимости потребовалось " f"{len(history['time'])} итераций и {history['time'][-1]} секунд") ax1.legend() ax2.legend() os.makedirs("report/pics/2", exist_ok=True) fig1.savefig("report/pics/2/lbfgs_grad_norm_vs_iter.pdf", bbox_inches='tight') fig2.savefig("report/pics/2/lbfgs_grad_norm_vs_time.pdf", bbox_inches='tight')
def third_experiment(): data_path = 'experiment_3/datasets/' result_path = 'experiment_3/' names = ['w8a', 'gisette_scale', 'real-sim'] def plotting(history_gd, history_nm, param): f_gd = np.array(history_gd[param]) f_nm = np.array(history_nm[param]) time_gd = list(map(lambda i: i.total_seconds(), history_gd['time'])) time_nm = list(map(lambda i: i.total_seconds(), history_nm['time'])) if param == 'grad_norm': f_gd = np.log(f_gd / f_gd[0]) f_nm = np.log(f_nm / f_nm[0]) plt.figure() plt.plot(time_gd, f_gd, label='GD') plt.plot(time_nm, f_nm, label='Newton') plt.xlabel('seconds') ylabel = 'func' if param == 'func' else r'$\log\left(grad\_norm\right)$' plt.ylabel(ylabel) plt.legend() plt.grid() plt.savefig(result_path + name + '-' + param) for name in names: A, b = load_svmlight_file(data_path + name) m, n = A.shape oracle = oracles.create_log_reg_oracle(A, b, 1 / m) if name != 'real-sim': print('begin') x_star_nm, _, history_nm = optimization.newton(oracle, np.zeros(n), trace=True) print('Newton is finished') x_star_gd, _, history_gd = optimization.gradient_descent(oracle, np.zeros(n), trace=True) print('GD is finished') plotting(history_gd, history_nm, 'func') plotting(history_gd, history_nm, 'grad_norm')
def third_experiment(): data_path = lambda name: 'data/{}'.format(name) datasets = [ 'gisette_scale', 'news20.binary', 'rcv1_train.binary', 'real-sim', 'w8a' ] algorithms = [ optimization.hessian_free_newton, optimization.lbfgs, optimization.gradient_descent ] def plotting(hfn_history, lbfgs_history, gd_history, dataset): figname = lambda data, x, y: 'experiment_3/{}_{}-vs-{}.png'.format( data, y, x) def get_x(history, form): if form == 'iterations': return list(range(len(history['time']))) if form == 'time': return history['time'] def get_y(history, form): if form == 'func': return history['func'] if form == 'grad': grad_norm = np.array(history['grad_norm']) grad_norm /= grad_norm[0] grad_norm = np.power(grad_norm, 2) grad_norm = np.log(grad_norm) return grad_norm histories = [hfn_history, lbfgs_history, gd_history] names = ['HFN', 'L-BFGS', 'GD'] colors = ['b', 'r', 'g'] for x_form in ['iterations', 'time']: for y_form in ['func', 'grad']: if (x_form, y_form) == ('iterations', 'grad'): continue plt.figure() for history, name, color in zip(histories, names, colors): plt.plot(get_x(history, x_form), get_y(history, y_form), label=name, color=color) plt.title(dataset) plt.xlabel(x_form) plt.ylabel(y_form if y_form == 'func' else r'$\log\left(grad\_norm\right)$') plt.grid() plt.legend() to_save = figname(dataset, x_form, y_form) plt.savefig(to_save) for dataset in datasets: print(dataset) A, b = load_svmlight_file(data_path(dataset)) m, n = A.shape oracle = oracles.create_log_reg_oracle(A, b, 1 / m) histories = [] for i, algorithm in enumerate(algorithms): _, _, history = algorithm(oracle, np.zeros(n), trace=True) print('{} algo finished'.format(i)) histories.append(history) plotting(*histories, dataset)
matplotlib.use('Agg') import matplotlib.pyplot as plt import plot_trajectory_2d from nose.tools import assert_almost_equal, ok_, eq_ import numpy as np import numpy.linalg as li import scipy.sparse as sp # ----------------------- Checking grad and hess A = np.random.rand(10, 4) b = np.random.rand(10) tr = oracles.create_log_reg_oracle(A, b, 1) for i in range(5): x = np.random.rand(4) ok_( np.allclose(tr.grad(x), oracles.grad_finite_diff(tr.func, x), rtol=1e-1, atol=1e-1)) # ---------------------- Experiment 1 for d in ['exp1', 'exp2', 'exp3', 'exp4']: if not os.path.exists(d): os.makedirs(d) exps = [{
def run_experiment(dataset_filename, name, max_iters): print('Experiment: \t %s, \t file: %s, \t max_iters = %d.' % (name, dataset_filename, max_iters)) X, y = load_svmlight_file(dataset_filename) oracle = create_log_reg_oracle(X, y, 1 / X.shape[0]) x_0 = np.zeros(X.shape[1]) print('Minimize by scipy ... ', flush=True, end='') f_star = \ scipy.optimize.minimize(oracle.func, x_0, jac=oracle.grad, tol=1e-9).fun print('f_star = %g.' % f_star) H_0 = 1.0 line_search = True tolerance = get_tolerance({'criterion': 'func', 'f_star': f_star, 'tolerance': 1e-8}) subsolver = 'FGM' stopping_criterion_subproblem = 'grad_uniform_convex' constant_strategies = get_constant_strategies() power_strategies = get_power_strategies() adaptive_strategy = get_tolerance_strategy({'strategy': 'adaptive', 'c': 1.0, 'alpha': 1, 'label': 'adaptive'}) strategies_1 = constant_strategies + [adaptive_strategy] strategies_2 = power_strategies + [adaptive_strategy] method = lambda strategy: cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=strategy, subsolver=subsolver, trace=True, B=None, Binv=None, stopping_criterion_subproblem= stopping_criterion_subproblem) labels_1 = get_labels(strategies_1) histories_1 = run_method(method, strategies_1, labels_1) filename = os.getcwd() + '/plots/logreg_%s_time' % (name) plot_func_residual(histories_1, 'time', f_star, labels_1, ['grey', 'grey', 'grey', 'grey', 'red'], ['-', '--', '-.', ':', '-'], [5, 4, 3, 4, 2], [0.8, 0.8, 0.8, 0.8, 1], 'Log-reg: %s' % name, 'Time, s', filename=filename+'_const.pdf') labels_2 = get_labels(strategies_2) histories_2 = run_method(method, strategies_2, labels_2) plot_func_residual(histories_2, 'time', f_star, labels_2, ['blue', 'blue', 'blue', 'blue', 'red'], ['-', '--', '-.', ':', '-'], [5, 4, 3, 2, 2], [0.6, 0.6, 0.6, 0.6, 1], 'Log-reg: %s' % name, 'Time, s', filename=filename+'_powers.pdf')
def experiment_5_and_6(algo='gd'): np.random.seed(31415) m, n = 2000, 1000 A = np.random.randn(m, n) b = np.sign(np.random.randn(m)) regcoef = 1 / m logreg_oracle = create_log_reg_oracle(A, b, regcoef, oracle_type='optimized') line_search_options = [ { 'method': 'Constant', 'c': 1.0 }, { 'method': 'Constant', 'c': 0.95 }, { 'method': 'Constant', 'c': 0.9 }, { 'method': 'Constant', 'c': 0.85 }, { 'method': 'Armijo', 'c1': 1e-8 }, { 'method': 'Armijo', 'c1': 1e-6 }, { 'method': 'Armijo', 'c1': 1e-4 }, { 'method': 'Armijo', 'c1': 1e-1 }, { 'method': 'Wolfe', 'c2': 1.5 }, { 'method': 'Wolfe', 'c2': 0.9 }, { 'method': 'Wolfe', 'c2': 0.1 }, { 'method': 'Wolfe', 'c2': 0.01 }, ] colors = ['#e66101', '#fdb863', '#b2abd2', '#5e3c99'] styles = { 'Constant': { 'linestyle': '--', 'dashes': (2, 5), 'linewidth': 2 }, 'Armijo': { 'linestyle': '--', 'dashes': (5, 2) }, 'Wolfe': { 'linestyle': 'solid' }, } x_0_list = [None] * 3 x_0_list[0] = np.zeros((n, )) x_0_list[1] = np.random.uniform(-1, 1, (n, )) x_0_list[2] = np.ones((n, )) for k, x_0 in enumerate(x_0_list): plt.figure(figsize=(12, 9)) for i, options in tqdm(enumerate(line_search_options)): if algo == 'GD': x_opt, message, history = gradient_descent( logreg_oracle, x_0, trace=True, line_search_options=options) else: x_opt, message, history = newton(logreg_oracle, x_0, trace=True, line_search_options=options) args = list(options.keys()) + list(options.values()) label = "{} ({}={}, {}={})".format(algo, args[0], args[2], args[1], args[3]) values = 2 * np.log( (history['grad_norm'] / history['grad_norm'][0])) method = args[2] plt.plot(values + np.random.randn(values.size) * 0.05, color=colors[i % len(colors)], label=label, alpha=0.7, **styles[method]) plt.xlabel('Номер итерации') plt.ylabel('Логарифм относительного квадрата нормы градиента') plt.legend(loc='upper right') plt.grid() Path("pics/logreg_{}_linear_search_strategies".format(algo)).mkdir( parents=True, exist_ok=True) plt.savefig( "pics/logreg_{}_linear_search_strategies/x_0_{}.png".format( algo, k)) np.random.seed(31415) n = 2000 C = ortho_group.rvs(n) A = C.T @ np.diag(np.random.uniform(1, 20, (n, ))) @ C b = np.random.randn(n) x_0 = np.zeros((n, )) quadratic_oracle = QuadraticOracle(A, b) x_opt = np.linalg.solve(A, b) f_opt = quadratic_oracle.func(x_opt) line_search_options = [ { 'method': 'Constant', 'c': 0.09 }, { 'method': 'Constant', 'c': 0.085 }, { 'method': 'Constant', 'c': 0.08 }, { 'method': 'Constant', 'c': 0.075 }, { 'method': 'Armijo', 'c1': 1e-10 }, { 'method': 'Armijo', 'c1': 1e-7 }, { 'method': 'Armijo', 'c1': 1e-4 }, { 'method': 'Armijo', 'c1': 1e-1 }, { 'method': 'Wolfe', 'c2': 1.5 }, { 'method': 'Wolfe', 'c2': 0.9 }, { 'method': 'Wolfe', 'c2': 0.1 }, { 'method': 'Wolfe', 'c2': 0.01 }, ] x_0_list = [None] * 3 x_0_list[0] = np.zeros((n, )) x_0_list[1] = np.random.uniform(-1, 1, (n, )) x_0_list[2] = x_opt + np.random.randn(n, ) * 0.2 for k, x_0 in enumerate(x_0_list): plt.figure(figsize=(12, 9)) for i, options in tqdm(enumerate(line_search_options)): if algo == 'GD': x_opt, message, history = gradient_descent( quadratic_oracle, x_0, trace=True, line_search_options=options) else: x_opt, message, history = newton(quadratic_oracle, x_0, trace=True, line_search_options=options) args = list(options.keys()) + list(options.values()) label = "{} ({}={}, {}={})".format(algo, args[0], args[2], args[1], args[3]) values = np.log(np.abs((history['func'] - f_opt) / f_opt) + 1e-16) method = args[2] plt.plot(values + np.random.randn(values.size) * 0.05, color=colors[i % len(colors)], label=label, alpha=0.7, **styles[method]) plt.xlabel('Номер итерации') plt.ylabel('Логарифм относительной невязки') plt.legend(loc='upper right') plt.grid() Path("pics/quadratic_{}_linear_search_strategies".format(algo)).mkdir( parents=True, exist_ok=True) plt.savefig( "pics/quadratic_{}_linear_search_strategies/x_0_{}.png".format( algo, k))
def generate_log_reg_oracle(N, D, regcoef, seed=42): np.random.seed(seed) A = np.random.randn(N, D) w = np.random.randn(D) b = np.sign(A.dot(w) + np.random.randn(N)) return create_log_reg_oracle(A, b, regcoef)
def run_experiment(dataset_filename, name, max_iters): print('Experiment: \t %s, \t file: %s, \t max_iters = %d.' % (name, dataset_filename, max_iters)) X, y = load_svmlight_file(dataset_filename) oracle = create_log_reg_oracle(X, y, 1 / X.shape[0]) x_0 = np.zeros(X.shape[1]) print('Minimize by scipy ... ', flush=True, end='') f_star = \ scipy.optimize.minimize(oracle.func, x_0, jac=oracle.grad, tol=1e-9).fun print('f_star = %g.' % f_star) H_0 = 1.0 line_search = True tolerance = get_tolerance({ 'criterion': 'func', 'f_star': f_star, 'tolerance': 1e-8 }) subsolver = 'FGM' stopping_criterion_subproblem = 'func' constant_strategies = get_constant_strategies() power_strategies = get_power_strategies() adaptive_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1, 'label': 'adaptive' }) adaptive_15_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 1.5, 'label': r'adaptive $1.5$' }) adaptive_2_strategy = get_tolerance_strategy({ 'strategy': 'adaptive', 'c': 1.0, 'alpha': 2, 'label': r'adaptive $2$' }) strategies_1 = constant_strategies strategies_2 = power_strategies + [constant_strategies[-1]] strategies_3 = [ adaptive_strategy, adaptive_15_strategy, adaptive_2_strategy, constant_strategies[-1] ] method = lambda strategy: cubic_newton(oracle, x_0, tolerance, max_iters=max_iters, H_0=H_0, line_search=line_search, inner_tolerance_strategy=strategy, subsolver=subsolver, trace=True, B=None, Binv=None, stopping_criterion_subproblem= stopping_criterion_subproblem) filename = os.getcwd() + '/plots/exact_logreg_%s' % (name) labels_1 = get_labels(strategies_1) histories_1 = run_method(method, strategies_1, labels_1) plot_func_residual_iter(histories_1, 'hess_vec_calls', f_star, labels_1, ['grey', 'grey', 'grey', 'grey'], ['-', '--', '-.', ':'], [5, 4, 3, 4], [1, 1, 1, 1], r'Log-reg, %s: constant strategies' % name, 'Hessian-vector products', filename=filename + '_const.pdf') labels_2 = get_labels(strategies_2) histories_2 = run_method(method, strategies_2, labels_2) plot_func_residual_iter(histories_2, 'hess_vec_calls', f_star, labels_2, ['blue', 'blue', 'blue', 'blue', 'gray'], ['-', '--', '-.', ':', ':'], [5, 4, 3, 2, 4], [0.6, 0.6, 0.6, 0.6, 0.8], r'Log-reg, %s: dynamic strategies' % name, 'Hessian-vector products', filename=filename + '_power.pdf') labels_3 = get_labels(strategies_3) histories_3 = run_method(method, strategies_3, labels_3) plot_func_residual_iter(histories_3, 'hess_vec_calls', f_star, labels_3, ['red', 'tab:orange', 'tab:orange', 'gray'], ['-', '--', '-.', ':'], [2, 4, 2, 4], [1, 1, 1, 0.8], r'Log-reg, %s: adaptive strategies' % name, 'Hessian-vector products', filename=filename + '_adaptive.pdf')
for i, c2 in enumerate(c2_values): [_, _, history] = optimization.gradient_descent(oracle, x_init, line_search_options={ 'method': 'Wolfe', 'c2': c2 }, trace=True) plot_grad_norm_vs_time(history, colors[i], 'Wolfe, c2={}'.format(c2)) if __name__ == '__main__': ### Logistic regression A, b = generate_data() oracle = oracles.create_log_reg_oracle(A, b, 1 / len(b), oracle_type='optimized') x_init = np.zeros(A.shape[1]) c_values = [0.001, 0.01, 0.1] colors_constant = ['lime', 'green', 'darkgreen'] analyze_constant(oracle, x_init, c_values, colors_constant) c1_values = [0.1, 0.25, 0.4] colors_armijo = ['red', 'darkred', 'lightcoral'] analyze_armijo(oracle, x_init, c1_values, colors_armijo) c2_values = [0.6, 0.75, 0.9] colors_wolfe = ['blue', 'midnightblue', 'cornflowerblue'] analyze_wolfe(oracle, x_init, c2_values, colors_wolfe) plt.legend()