def projected_gradient_norm2(x, g, l, u): """Compute the Euclidean norm of the projected gradient at x.""" lower = where(x == l) upper = where(x == u) pg = g.copy() pg[lower] = np.minimum(g[lower], 0) pg[upper] = np.maximum(g[upper], 0) return norm2(pg[where(l != u)])
def solve(self, **kwargs): """Solve. :keywords: :maxiter: maximum number of iterations. All other keyword arguments are passed directly to the constructor of the trust-region solver. """ nlp = self.nlp # Gather initial information. self.f = self.nlp.obj(self.x) self.f0 = self.f self.g = self.nlp.grad(self.x) self.g_old = self.g self.gNorm = norms.norm2(self.g) self.g0 = self.gNorm self.tr.radius = min(max(0.1 * self.gNorm, 1.0), 100) cgtol = 1.0 if self.inexact else -1.0 stoptol = max(self.abstol, self.reltol * self.g0) step_status = None exitUser = False exitOptimal = self.gNorm <= stoptol exitIter = self.iter >= self.maxiter status = "" # Initialize non-monotonicity parameters. if not self.monotone: fMin = fRef = fCan = self.f0 l = 0 sigRef = sigCan = 0 t = cputime() # Print out header and initial log. if self.iter % 20 == 0: self.log.info(self.header) self.log.info(self.format0, self.iter, self.f, self.gNorm, "", "", "", self.tr.radius, "") while not (exitUser or exitOptimal or exitIter): self.iter += 1 self.alpha = 1.0 if self.save_g: self.g_old = self.g.copy() # Iteratively minimize the quadratic model in the trust region # min m(s) := g's + ½ s'Hs # Note that m(s) does not include f(x): m(0) = 0. if self.inexact: cgtol = max(stoptol, min(0.7 * cgtol, 0.01 * self.gNorm)) qp = QPModel(self.g, self.nlp.hop(self.x, self.nlp.pi0)) self.solver = TrustRegionSolver(qp, self.tr_solver) self.solver.solve(prec=self.precon, radius=self.tr.radius, reltol=cgtol) step = self.solver.step snorm = self.solver.step_norm cgiter = self.solver.niter # Obtain model value at next candidate m = self.solver.m if m is None: m = qp.obj(step) self.total_cgiter += cgiter x_trial = self.x + step f_trial = nlp.obj(x_trial) rho = self.tr.ratio(self.f, f_trial, m) if not self.monotone: rhoHis = (fRef - f_trial) / (sigRef - m) rho = max(rho, rhoHis) step_status = "Rej" self.step_accepted = False if rho >= self.tr.eta1: # Trust-region step is accepted. self.tr.update_radius(rho, snorm) self.x = x_trial self.f = f_trial self.g = nlp.grad(self.x) self.gNorm = norms.norm2(self.g) self.dvars = step if self.save_g: self.dgrad = self.g - self.g_old step_status = "Acc" self.step_accepted = True # Update non-monotonicity parameters. if not self.monotone: sigRef = sigRef - m sigCan = sigCan - m if f_trial < fMin: fCan = f_trial fMin = f_trial sigCan = 0 l = 0 else: l = l + 1 if f_trial > fCan: fCan = f_trial sigCan = 0 if l == self.n_non_monotone: fRef = fCan sigRef = sigCan else: # Trust-region step is rejected. if self.ny: # Backtracking linesearch a la Nocedal & Yuan. slope = np.dot(self.g, step) bk = 0 armijo = self.f + 1.0e-4 * self.alpha * slope while bk < self.nbk and f_trial >= armijo: bk = bk + 1 self.alpha /= 1.2 x_trial = self.x + self.alpha * step f_trial = nlp.obj(x_trial) self.x = x_trial self.f = f_trial self.g = nlp.grad(self.x) self.gNorm = norms.norm2(self.g) self.tr.radius = self.alpha * snorm snorm /= self.alpha step_status = "N-Y" self.step_accepted = True self.dvars = self.alpha * step if self.save_g: self.dgrad = self.g - self.g_old else: self.tr.update_radius(rho, snorm) self.step_status = step_status self.radii.append(self.tr.radius) status = "" try: self.post_iteration() except UserExitRequest: status = "usr" # Print out header, say, every 20 iterations if self.iter % 20 == 0: self.log.info(self.header) pstatus = step_status if step_status != "Acc" else "" self.log.info(self.format % (self.iter, self.f, self.gNorm, cgiter, rho, snorm, self.tr.radius, pstatus)) exitOptimal = self.gNorm <= stoptol exitIter = self.iter > self.maxiter exitUser = status == "usr" self.tsolve = cputime() - t # Solve time # Set final solver status. if status == "usr": pass elif self.gNorm <= stoptol: status = "opt" else: # self.iter > self.maxiter: status = "itr" self.status = status
def solve(self): """Solve method. All keyword arguments are passed directly to the constructor of the trust-region solver. """ self.log.debug("entering solve") model = self.model ls_fmt = "%7.1e %8.1e" # Project the initial point into [l,u]. self.x = project(self.x, model.Lvar, model.Uvar) # Gather initial information. self.f = model.obj(self.x) self.f0 = self.f self.g = model.grad(self.x) # Current gradient self.g_old = self.g.copy() self.x_old = self.x.copy() pgnorm = projected_gradient_norm2(self.x, self.g, model.Lvar, model.Uvar) self.pg0 = pgnorm cgtol = self.cgtol cg_iter = 0 cgitermax = model.n # Initialize the trust region radius self.tr.radius = min(max(0.1 * self.pg0, 1.0), 100) # Test for convergence or termination stoptol = max(self.gabstol, self.greltol * self.pg0) # stoptol = self.greltol * pgnorm exitUser = False exitOptimal = pgnorm <= stoptol exitIter = self.iter >= self.maxiter exitFunCall = model.obj.ncalls >= self.maxfuncall status = "" tick = cputime() # Print out header and initial log. if self.iter % 20 == 0: self.log.info(self.header) self.log.info(self.format0, self.iter, self.f, pgnorm, "", "", "", self.tr.radius, "") while not (exitUser or exitOptimal or exitIter or exitFunCall): self.iter += 1 self.step_accepted = False if self.save_g: self.g_old = self.g.copy() self.x_old = self.x.copy() # Wrap Hessian into an operator. H = model.hop(self.x.copy()) # Compute the Cauchy step and store in s. (s, self.alphac) = self.cauchy(self.x, self.g, H, model.Lvar, model.Uvar, self.tr.radius, self.alphac) # Compute the projected Newton step. (x, s, cg_iter, _) = self.projected_newton_step(self.x, self.g, H, self.tr.radius, model.Lvar, model.Uvar, s, cgtol, cgitermax) snorm = norms.norm2(s) self.total_cgiter += cg_iter # Compute the predicted reduction. m = np.dot(s, self.g) + .5 * np.dot(s, H * s) # Evaluate actual objective. x_trial = project(self.x + s, model.Lvar, model.Uvar) f_trial = model.obj(x_trial) # Incorporate a magical step to further improve the trial # (if possible) and modify the predicted reduction to # take the extra improvement into account if "magical_step" in dir(model): (x_trial, s_magic) = model.magical_step(x_trial) if s_magic is not None: s += s_magic m -= f_trial f_trial = model.obj(x_trial) m += f_trial # Evaluate the step and determine if the step is successful. # Compute the actual reduction. rho = self.tr.ratio(self.f, f_trial, m) ared = self.f - f_trial # On the first iteration, adjust the initial step bound. snorm = norms.norm2(s) if self.iter == 1: self.tr.radius = min(self.tr.radius, snorm) # Update the trust region bound slope = np.dot(self.g, s) if f_trial - self.f - slope <= 0: alpha = self.tr.gamma3 else: alpha = max(self.tr.gamma1, -0.5 * (slope / (f_trial - self.f - slope))) # Update the trust region bound according to the ratio # of actual to predicted reduction self.tr.update_radius(rho, snorm, alpha) # Update the iterate. if rho > self.tr.eta0: # Successful iterate # Trust-region step is accepted. self.x = x_trial self.f = f_trial self.g = model.grad(self.x) step_status = "Acc" self.step_accepted = True self.dvars = s if self.save_g: self.dgrad = self.g - self.g_old elif self.ny: try: # Trust-region step is rejected; backtrack. line_model = C1LineModel(model, self.x, s) ls = ArmijoLineSearch(line_model, bkmax=5, decr=1.75) for step in ls: self.log.debug(ls_fmt, step, ls.trial_value) ared = self.f - ls.trial_value self.x = ls.iterate self.f = ls.trial_value self.g = model.grad(self.x) snorm *= ls.step self.tr.radius = snorm step_status = "N-Y" self.dvars = ls.step * s self.step_accepted = True if self.save_g: self.dgrad = self.g - self.g_old except (LineSearchFailure, ValueError): step_status = "Rej" else: # Fall back on trust-region rule. step_status = "Rej" self.step_status = step_status status = "" try: self.post_iteration() except UserExitRequest: status = "usr" # Print out header, say, every 20 iterations if self.iter % 20 == 0: self.log.info(self.header) pstatus = step_status if step_status != "Acc" else "" # Test for convergence. pgnorm = projected_gradient_norm2(self.x, self.g, model.Lvar, model.Uvar) if pstatus == "" or pstatus == "N-Y": if pgnorm <= stoptol: exitOptimal = True status = "gtol" elif abs(ared) <= self.abstol and -m <= self.abstol: exitOptimal = True status = "fatol" elif abs(ared) <= self.reltol * abs(self.f) and \ (-m <= self.reltol * abs(self.f)): exitOptimal = True status = "frtol" else: self.iter -= 1 # to match TRON iteration number exitIter = self.iter > self.maxiter exitFunCall = model.obj.ncalls >= self.maxfuncall exitUser = status == "usr" self.log.info(self.format, self.iter, self.f, pgnorm, cg_iter, rho, snorm, self.tr.radius, pstatus) self.tsolve = cputime() - tick # Solve time self.pgnorm = pgnorm # Set final solver status. if status == "usr": pass elif self.iter > self.maxiter: status = "itr" elif status == "": # corner case; initial guess was optimal status = "gtol" self.status = status self.log.info("final status: %s", self.status)
def projected_newton_step(self, x, g, H, delta, l, u, s, cgtol, itermax): u"""Generate a sequence of approximate minimizers to the QP subproblem. min q(x) subject to l ≤ x ≤ u where q(x₀ + s) = gᵀs + ½ sᵀHs, x₀ is a base point provided by the user, H=Hᵀ and g is a vector. At each stage we have an approximate minimizer xₖ, and generate a direction pₖ by using a preconditioned conjugate gradient method on the subproblem min {q(xₖ + p) | ‖p‖ ≤ Δ, s(fixed)=0 }, where fixed is the set of variables fixed at xₖ and Δ is the trust-region bound. Given pₖ, the next minimizer is generated by a projected search. The starting point for this subroutine is x₁ = x₀ + s, where s is the Cauchy step. Returned status is one of the following: info = 1 Convergence. The final step s satisfies ‖(g + H s)[free]‖ ≤ cgtol ‖g[free]‖, and the final x is an approximate minimizer in the face defined by the free variables. info = 2 Termination. The trust region bound does not allow further progress: ‖pₖ‖ = Δ. info = 3 Failure to converge within itermax iterations. info = 4 The trust region solver could make no further progress on the problem, i.e. the computed step is zero. Return with the current point. """ self.log.debug("entering projected_newton_step") exitOptimal = False exitPCG = False exitIter = False Hs = H * s # Compute the Cauchy point. x = project(x + s, l, u) # Start the main iteration loop. # There are at most n iterations because at each iteration # at least one variable becomes active. iters = 0 while not (exitOptimal or exitPCG or exitIter): # Determine the free variables at the current minimizer. free_vars = where((x > l) & (x < u)) nfree = len(free_vars) # Exit if there are no free constraints. if nfree == 0: exitOptimal = True info = 1 continue # Obtain the submatrix of H for the free variables. ZHZ = ReducedHessian(H, free_vars) # Compute the norm of the reduced gradient Zᵀg gfree = g[free_vars] + Hs[free_vars] gfnorm = norms.norm2(g[free_vars]) # Solve the trust region subproblem in the free variables # to generate a direction p[k] tol = cgtol * gfnorm # note: gfnorm ≠ norm(gfree) qp = QPModel(gfree, ZHZ) self.solver = TrustRegionSolver(qp, self.tr_solver) self.solver.solve(prec=self.precon, radius=self.tr.radius, abstol=tol) step = self.solver.step iters += self.solver.niter # Exit if the solver took no additional steps if self.solver.niter == 0: exitOptimal = True info = 4 # Use a projected search to obtain the next iterate (xfree, proj_step) = self.projected_linesearch(x[free_vars], l[free_vars], u[free_vars], gfree, step, ZHZ, alpha=1.0) # Update the minimizer and the step. # Note that s now contains x[k+1] - x[0] x[free_vars] = xfree s[free_vars] += proj_step # Compute the gradient grad q(x[k+1]) = g + H*(x[k+1] - x[0]) # of q at x[k+1] for the free variables. Hs = H * s gfree = g[free_vars] + Hs[free_vars] gfnormf = norms.norm2(gfree) # Convergence and termination test. # We terminate if the preconditioned conjugate gradient method # encounters a direction of negative curvature, or # if the step is at the trust region bound. if gfnormf <= cgtol * gfnorm: exitOptimal = True info = 1 elif self.solver.status == "trust-region boundary active": # infotr == 3 or infotr == 4: exitPCG = True info = 2 elif iters >= itermax: exitIter = True info = 3 self.log.debug("leaving projected_newton_step with info=%d", info) return (x, s, iters, info)
def cauchy(self, x, g, H, l, u, delta, alpha): u"""Compute a Cauchy step. This step must satisfy a trust region constraint and a sufficient decrease condition. The Cauchy step is computed for the quadratic q(s) = gᵀs + ½ sᵀHs, where H=Hᵀ and g is a vector. Given a parameter α, the Cauchy step is s[α] = P[x - α g] - x, with P the projection into the box [l, u]. The Cauchy step satisfies the trust-region constraint and the sufficient decrease condition ‖s‖ ≤ Δ, q(s) ≤ μ₀ gᵀs, where μ₀ ∈ (0, 1). """ self.log.debug(u"computing Cauchy point with α=%g, δ=%d", alpha, delta) # Constant that defines sufficient decrease. mu0 = 0.01 # Interpolation and extrapolation factors. interpf = 0.1 extrapf = 10 # Find the minimal and maximal breakpoints along x - α g. (_, _, brptmax) = breakpoints(x, -g, l, u) self.log.debug("farthest breakpoint: %7.1e", brptmax) # Decide whether to interpolate or extrapolate. s = projected_step(x, -alpha * g, l, u) if norms.norm2(s) > delta: interp = True else: Hs = H * s gts = np.dot(g, s) interp = (.5 * np.dot(Hs, s) + gts >= mu0 * gts) # Either interpolate or extrapolate to find a successful step. if interp: # Reduce alpha until a successful step is found. self.log.debug("interpolating") search = True while search: alpha *= interpf s = projected_step(x, -alpha * g, l, u) s_norm = norms.norm2(s) self.log.debug("step norm = %g", s_norm) if s_norm <= delta: Hs = H * s gts = np.dot(g, s) search = (.5 * np.dot(Hs, s) + gts > mu0 * gts) else: # Increase alpha until a successful step is found. self.log.debug("extrapolating") search = True alphas = alpha while search and alpha <= brptmax: alpha *= extrapf s = projected_step(x, -alpha * g, l, u) s_norm = norms.norm2(s) self.log.debug("step norm = %g", s_norm) if s_norm <= delta: Hs = H * s gts = np.dot(g, s) if .5 * np.dot(Hs, s) + gts < mu0 * gts: search = True alphas = alpha else: search = False # Recover the last successful step. alpha = alphas s = projected_step(x, -alpha * g, l, u) return (s, alpha)
def solve(self): """Solve model with the L-BFGS method.""" model = self.model x = self.x self.logger.info(self.hdr) tstart = cputime() self.f0 = self.f = f = model.obj(x) self.g = g = model.grad(x) self.g_norm0 = g_norm = norms.norm2(g) stoptol = max(self.abstol, self.reltol * self.g_norm0) exitUser = False exitLS = False exitOptimal = g_norm <= stoptol exitIter = self.iter >= self.maxiter status = "" while not (exitUser or exitOptimal or exitIter or exitLS): # Obtain search direction H = model.hop(x) d = -(H * g) # Prepare for modified linesearch step0 = max(1.0e-3, 1.0 / g_norm) if self.iter == 0 else 1.0 line_model = C1LineModel(self.model, x, d) ls = self.setup_linesearch(line_model, step0) try: for step in ls: self.logger.debug(self.ls_fmt, step, ls.trial_value) except LineSearchFailure: exitLS = True continue self.logger.info(self.fmt, self.iter, f, g_norm, ls.slope, ls.step) # Prepare new pair {s,y} to be inserted into L-BFGS operator. self.s = ls.step * d x = ls.iterate g_next = line_model.gradval self.y = g_next - g status = "" try: self.post_iteration() except UserExitRequest: status = "usr" # Prepare for next round. g = g_next g_norm = norms.norm2(g) f = ls.trial_value self.iter += 1 exitOptimal = g_norm <= stoptol exitIter = self.iter >= self.maxiter exitUser = status == "usr" self.tsolve = cputime() - tstart self.logger.info(self.fmt_short, self.iter, f, g_norm) self.x = x self.f = f self.g = g self.g_norm = g_norm # Set final solver status. if status == "usr": pass elif self.g_norm <= stoptol: status = "opt" elif exitLS: status = "lsf" else: # self.iter > self.maxiter: status = "itr" self.status = status