def gd_dir(f, x0, step_size, obj_tol, param_tol, max_iter): x_vals = [] y_vals = [] x_prev = x0 f_prev, df_prev = f(x0) x_vals.append(x_prev) y_vals.append(f_prev) i = 0 success = False utils.report_iteration(i, x_prev, f_prev, float("NaN"), float("NaN"), "gd") iter_num_to_obj_val = OrderedDict() iter_num_to_obj_val[i] = f_prev while not success and i < max_iter: pk = -step_size * df_prev x_next = x_prev + pk f_next, df_next = f(x_next) i += 1 iter_num_to_obj_val[i] = f_next cur_obj_val = abs(f_next - f_prev) cur_param_val = np.linalg.norm(x_next - x_prev) utils.report_iteration(i, x_next, f_next, cur_param_val, cur_obj_val, "gd") success = check_converge(cur_param_val, param_tol, cur_obj_val, obj_tol) x_prev = x_next f_prev = f_next df_prev = df_next x_vals.append(x_prev) y_vals.append(f_prev) return x_next, success, x_vals, iter_num_to_obj_val
def report_all_itenration_in_hindsight(func, x_vals): i = 0 x_prev = x_vals[0] f_prev = func(x_prev)[0] utils.report_iteration(i, x_prev, f_prev, float("NaN"), float("NaN")) for i in range(1, len(x_vals)): cur_x = x_vals[i] cur_f = func(cur_x)[0] cur_param_val_change = np.linalg.norm(cur_x - x_prev) cur_obj_val_change = abs(cur_f - f_prev) utils.report_iteration(i, cur_x, cur_f, cur_param_val_change, cur_obj_val_change) x_prev, f_prev = cur_x, cur_f
def bfgs_dir(f, x0, step_size, obj_tol, param_tol, max_iter, init_step_len, slope_ratio, back_track_factor): x_vals = [] y_vals = [] x_prev = x0 f_prev, df_prev, hessian = f(x0, True) x_vals.append(x_prev) y_vals.append(f_prev) i = 0 success = False utils.report_iteration(i, x_prev, f_prev, float("NaN"), float("NaN"), "bfgs") iter_num_to_obj_val = OrderedDict() iter_num_to_obj_val[i] = f_prev B_prev = np.eye(len(x0)) while not success and i < max_iter: success = should_stop_by_newton_decrement(x_prev, hessian) pk = np.linalg.solve( B_prev, -df_prev ) # Lecture 2 slide 40: ππβββ^2(π(π₯_π))^(β1)βπ(π₯_π) + lecture 3 slide 7 step_len = get_step_len_by_first_wolfe(f, df_prev, x_prev, init_step_len, pk, slope_ratio, back_track_factor) x_next = x_prev + step_len * pk f_next, df_next, hessian = f(x_next, True) i += 1 iter_num_to_obj_val[i] = f_next cur_obj_val = abs(f_next - f_prev) cur_param_val = np.linalg.norm(x_next - x_prev) utils.report_iteration(i, x_next, f_next, cur_param_val, cur_obj_val, "bfgs") success = success or check_converge(cur_param_val, param_tol, cur_obj_val, obj_tol) B_prev = get_next_B_matrix(B_prev, x_prev, x_next, df_prev, df_next) x_prev = x_next f_prev = f_next df_prev = df_next x_vals.append(x_prev) y_vals.append(f_prev) return x_next, success, x_vals, iter_num_to_obj_val