def estimate_lipschitz_bb(x, x_old, grad, grad_old, bb_type=2): s = x - x_old y = grad - grad_old if bb_type == 2: est = norm(y) / norm(s) elif bb_type == 3: est = abs(dot_product(y, s)) / norm(s) else: est = np.sqrt(norm(y)) / norm(s) return est
def get_alpha(self, fw_state, problem): extra_param_s = problem.param_func(fw_state['s']) extra_param = problem.param beta_max = self.adjust_beta(problem, fw_state, extra_param, extra_param_s) func_beta = lambda beta: problem.val( (1 - beta) * fw_state['x'] + beta * fw_state['s'], (1 - beta) * extra_param + beta * extra_param_s) delta_x = -fw_state['delta_x'] fx = fw_state['f'] L = self.nu * fw_state['L'] qx = dot_product(fw_state['grad'], delta_x) qqx = L / 2 * norm(delta_x)**2 t = min(-1 * qx / (L * norm(delta_x)**2), beta_max) while func_beta(t) > fx + t * qx + t**2 * qqx: L = self.tau * L qqx = qqx * self.tau t = min(-1 * qx / (2 * qqx), beta_max) fw_state['L'] = L return t
def estimate_lipschitz(problem, ndim): Lest = 1 if ndim == 1: dirr = np.ones(problem.n) elif ndim == 2: dirr = np.eye(problem.n) if Lest == 1: # Estimate Lipschitz Constant for _ in range(1, 16): Dir = problem.hess_mult_vec(dirr) dirr = Dir / norm(Dir) Hd = problem.hess_mult_vec(dirr) dHd = dot_product(dirr, Hd) L = dHd / (dot_product(dirr, dirr)) return L
def _compute_t_nu(self, fw_state, problem): e = problem.hess_mult(fw_state['delta_x'])**0.5 beta = norm(fw_state['delta_x']) Mf = problem.Mf nu = problem.nu Gap = fw_state['Gap'] if nu == 2: delta_v = Mf * beta t = 1 / delta_v * np.log(1 + (Gap * delta_v) / (e**2)) elif nu == 3: delta_v = Mf * e / 2 t = Gap / (Gap * delta_v + e**2) else: delta_v = (nu - 2) / 2 * Mf * (beta**(3 - nu)) * e**(nu - 2) if nu == 4: t = 1 / delta_v * (1 - np.exp(-delta_v * Gap / (e**2))) elif nu < 4 and nu > 2: const = (4 - nu) / (nu - 2) t = 1 / delta_v * (1 - (1 + (-delta_v * Gap * const / (e**2))))**(-1 / const) return t, delta_v
def run_prox_newton(problem, x_0=None, max_iter=1000, eps=1e-10, use_two_phase=False, print_every=10, Lest='backtracking', fista_iter=1000, fista_tol=1e-5, fista_type='mfista', print_fista=False): if x_0 is None: x = problem.generate_start_point() else: x = x_0 alpha_hist = [] f_hist = [] time_hist = [] err_hist = [] int_start = time.time() time_hist.append(0) bPhase2 = False Mf = problem.Mf nu = problem.nu scopt_state = {} for i in range(1, max_iter + 1): start = time.time() f = problem.val(x) grad = problem.grad(x) scopt_state['grad'] = grad scopt_state['x'] = x # compute local Lipschitz constant x_nxt = fista(problem, scopt_state, max_iter=fista_iter, tol=fista_tol, fista_type=fista_type, Lest=Lest, print_fista=print_fista) diffx = x_nxt - x lam_k = np.sqrt(problem.hess_mult(diffx)) beta_k = Mf * norm(diffx) # solution value stop-criterion nrm_dx = norm(diffx) rdiff = nrm_dx / max(1.0, norm(x)) if use_two_phase and not bPhase2: if nu == 2: # conditions to go to phase 2 sys.exit('still under implementation') elif nu < 3: sys.exit('still under implementation') elif nu == 3: if lam_k * 2 * Mf < 1: bPhase2 = True if not bPhase2: # if we are not in phase 2 if beta_k == 0: tau_k = 0 else: if nu == 2: tau_k = 1 / beta_k * np.log(1 + beta_k) elif nu == 3: d_k = 0.5 * Mf * lam_k tau_k = 1 / (1 + d_k) elif nu < 3: d_k = (nu / 2 - 1) * (Mf * lam_k)**(nu - 2) * beta_k**(3 - nu) nu_param = (nu - 2) / (4 - nu) tau_k = (1 - (1 + d_k / nu_param)**(-nu_param)) / d_k else: print('The value of nu is not valid') return None else: # if we are in phase 2 tau_k = 1 end = time.time() alpha_hist.append(tau_k) f_hist.append(f) err_hist.append(rdiff) time_hist.append(end - start) x = x + tau_k * diffx # Check the stopping criterion. if (rdiff <= eps) and (i > 1): print('Convergence achieved!') print('iter = %4d, stepsize = %3.3e, rdiff = %3.3e,value=%g' % (i, tau_k, rdiff, f)) f_hist.append(f) break if (i % print_every == 0) or (i == 1): print('iter = %4d, stepsize = %3.3e, rdiff = %3.3e , f = %g' % (i, tau_k, rdiff, f)) int_end = time.time() if i >= max_iter: f_hist.append(f) print('Exceed the maximum number of iterations') print(int_end - int_start) return x, alpha_hist, f_hist, time_hist
def fista(problem, scopt_state, max_iter=1000, tol=1e-5, fista_type='mfista', Lest='backtracking', print_fista=False): x = scopt_state['x'] def func(x_2): return 0.5 * problem.hess_mult(x_2 - x) + dot_product( scopt_state['grad'], x_2 - x) def grad_func(x_2): return problem.hess_mult_vec(x_2 - x) + scopt_state['grad'] y = x.copy() if Lest == 'estimate': L = estimate_lipschitz(problem, ndim=x.ndim) elif Lest == 'backtracking': L = 1 x_cur = y.copy() f_cur = func(x_cur) t = 1 beta = 2 for k in range(1, max_iter + 1): grad_y = grad_func(y) f_y = func(y) if Lest == 'estimate': x_tmp = y - 1 / L * grad_y z = problem.projection(x_tmp) f_z = func(z) diff_yz = z - y elif Lest == 'backtracking': z = y L = L / beta diff_yz = z - y f_z = f_y + 1 while (f_z > f_y + dot_product(grad_y, diff_yz) + (L / 2) * norm(diff_yz)**2) or (f_z > f_y): L = L * beta x_tmp = y - 1 / L * grad_y z = problem.projection(x_tmp) f_z = func(z) diff_yz = z - y if L > 1e+20: z = problem.projection(y) f_z = func(z) diff_yz = z - y L = L / beta break f_nxt = f_z if (f_nxt > f_cur) and (fista_type == 'mfista'): x_nxt = x_cur f_nxt = f_cur else: x_nxt = z zdiff = z - x_cur ndiff = norm(zdiff) if (ndiff < tol) and (k > 1) and print_fista: print('Fista err = %3.3e; Subiter = %3d; subproblem converged!' % (ndiff, k)) break xdiff = x_nxt - x_cur t_nxt = 0.5 * (1 + np.sqrt(1 + 4 * (t**2))) y = x_nxt + (t - 1) / t_nxt * xdiff + t / t_nxt * (z - x_nxt) t = t_nxt x_cur = x_nxt f_cur = f_nxt return x_nxt
def run_prox_grad( problem, x_0=None, max_iter=1000, eps=1e-10, bb_type=3, backtracking=True, btk_iters=100, print_every=10, ): if x_0 is None: x = problem.generate_start_point() else: x = x_0 x_old = 0 grad_old = 0 alpha_hist = [] f_hist = [] time_hist = [] err_hist = [] int_start = time.time() time_hist.append(0) Mf = problem.Mf nu = problem.nu for k in range(1, max_iter + 1): start = time.time() f = problem.val(x) grad = problem.grad(x) Lips_cur = estimate_lipschitz_bb(x, x_old, grad, grad_old, bb_type=bb_type) x_nxt = problem.projection(x - 1 / Lips_cur * grad) diffx = x_nxt - x nrm_dx = norm(diffx) lam_k = np.sqrt(Lips_cur * dot_product(diffx, diffx)) beta_k = Mf * norm(diffx) if backtracking: for _ in range(btk_iters): if Lips_cur <= ((lam_k * lam_k) / (nrm_dx * nrm_dx)): break else: Lips_cur = Lips_cur / 2 x_nxt = problem.projection(x - 1 / Lips_cur * grad) if backtracking: diffx = x_nxt - x nrm_dx = norm(diffx) lam_k = np.sqrt(Lips_cur * dot_product(diffx, diffx)) beta_k = Mf * norm(diffx) alpha = min(beta_k / (lam_k * (lam_k + beta_k)), 1.) alpha_hist.append(alpha) x_old = x grad_old = grad x = x + alpha * diffx end = time.time() alpha_hist.append(alpha) f_hist.append(f) rdiff = nrm_dx / max(1.0, norm(x)) err_hist.append(rdiff) time_hist.append(end - start) if (rdiff <= eps) and (k > 1): print('Convergence achieved!') print('iter = %4d, stepsize = %3.3e, rdiff = %3.3e,value=%g' % (k, alpha, rdiff, f)) break if (k % print_every == 0) or (k == 1): print('iter = %4d, stepsize = %3.3e, rdiff = %3.3e , f = %g' % (k, alpha, rdiff, f)) int_end = time.time() if k >= max_iter: f_hist.append(f) print('Exceed the maximum number of iterations') print(int_end - int_start) return x, alpha_hist, f_hist, time_hist
def run_frank_wolfe(problem, x_0=None, alpha_policy='standard', max_iter=1000, eps=1e-10, print_every=10): policy = POLICY_DICT[alpha_policy] fw_state = {} fw_state['L'] = 1 lower_bound = float("-inf") upper_bound = float("inf") real_Gap = upper_bound - lower_bound criterion = 1e10 * eps if x_0 is None: x = problem.generate_start_point() else: x = x_0 alpha_hist = [] Gap_hist = [] f_hist = [] time_hist = [0] int_start = time.time() for k in range(1, max_iter + 1): fw_state['k'] = k start_time = time.time() f = problem.val(x) #find optimal grad = problem.grad(x) fw_state['f'] = f fw_state['grad'] = grad fw_state['x'] = x fw_state['s'] = problem.linear_oracle(grad) fw_state['delta_x'] = x - fw_state['s'] fw_state['Gap'] = dot_product(grad, fw_state['delta_x']) alpha = policy.get_alpha(fw_state, problem) x_nxt = x + alpha * (fw_state['s'] - x) time_hist.append(time.time() - start_time) x_last = x.copy() alpha_hist.append(alpha) Gap_hist.append(fw_state['Gap']) f_hist.append(f) x = x_nxt if f < upper_bound: upper_bound = f x_best = x.copy() lower_bound = max(lower_bound, f - fw_state['Gap']) if (lower_bound - upper_bound) / abs(lower_bound) > 1e-3: print( f'upper_bound={upper_bound:.2e}, lower_bound={lower_bound:.2e}' ) sys.exit("Lower bound bigger than upper bound") real_Gap = upper_bound - lower_bound criterion = min(criterion, norm(x - x_last) / max(1, norm(x_last))) if k % print_every == 0 or k == 1: print( f'iter={k}, stepsize={alpha:.2e}, criterion={criterion:.2e},' f' upper_bound={upper_bound:.2e}, lower_bound={lower_bound:.2e},' f' real_Gap={real_Gap:.2e}, f_val={f}') if (criterion <= eps ) and (upper_bound - lower_bound) / np.abs(lower_bound) <= eps: f_hist.append(f) f = problem.val(x_best) print('Convergence achieved!') print( f'iter = {k}, stepsize = {alpha}, crit = {criterion}, upper_bound={upper_bound}, lower_bound={lower_bound}, real_Gap={real_Gap}' ) return x_best, alpha_hist, Gap_hist, f_hist, time_hist #x_hist.append(x) f_hist.append(f) int_end = time.time() print(int_end - int_start) return x_best, alpha_hist, Gap_hist, f_hist, time_hist