def solve(self, problem, x=None, reuselinesearch=False): """ Perform optimization using nonlinear conjugate gradient method with linesearch. This method first computes the gradient of obj w.r.t. arg, and then optimizes by moving in a direction that is conjugate to all previous search directions. Arguments: - problem Pymanopt problem setup using the Problem class, this must have a .manifold attribute specifying the manifold to optimize over, as well as a cost and enough information to compute the gradient of that cost. - x=None Optional parameter. Starting point on the manifold. If none then a starting point will be randomly generated. - reuselinesearch=False Whether to reuse the previous linesearch object. Allows to use information from a previous solve run. Returns: - x Local minimum of obj, or if algorithm terminated before convergence x will be the point at which it terminated. """ man = problem.manifold verbosity = problem.verbosity objective = problem.cost gradient = problem.grad if not reuselinesearch or self.linesearch is None: self.linesearch = deepcopy(self._linesearch) linesearch = self.linesearch # If no starting point is specified, generate one at random. if x is None: x = man.rand() if verbosity >= 1: print("Optimizing...") if verbosity >= 2: iter_format_length = int(np.log10(self._maxiter)) + 1 column_printer = printer.ColumnPrinter(columns=[ ("Iteration", f"{iter_format_length}d"), ("Cost", "+.16e"), ("Gradient norm", ".8e"), ]) else: column_printer = printer.VoidPrinter() column_printer.print_header() # Calculate initial cost-related quantities cost = objective(x) grad = gradient(x) gradnorm = man.norm(x, grad) Pgrad = problem.precon(x, grad) gradPgrad = man.inner(x, grad, Pgrad) # Initial descent direction is the negative gradient desc_dir = -Pgrad self._start_optlog(extraiterfields=['gradnorm'], solverparams={ 'beta_type': self._beta_type, 'orth_value': self._orth_value, 'linesearcher': linesearch }) # Initialize iteration counter and timer iter = 0 stepsize = np.nan time0 = time.time() while True: column_printer.print_row([iter, cost, gradnorm]) if self._logverbosity >= 2: self._append_optlog(iter, x, cost, gradnorm=gradnorm) stop_reason = self._check_stopping_criterion(time0, gradnorm=gradnorm, iter=iter + 1, stepsize=stepsize) if stop_reason: if verbosity >= 1: print(stop_reason) print('') break # The line search algorithms require the directional derivative of # the cost at the current point x along the search direction. df0 = man.inner(x, grad, desc_dir) # If we didn't get a descent direction: restart, i.e., switch to # the negative gradient. Equivalent to resetting the CG direction # to a steepest descent step, which discards the past information. if df0 >= 0: # Or we switch to the negative gradient direction. if verbosity >= 3: print("Conjugate gradient info: got an ascent direction " "(df0 = %.2f), reset to the (preconditioned) " "steepest descent direction." % df0) # Reset to negative gradient: this discards the CG memory. desc_dir = -Pgrad df0 = -gradPgrad # Execute line search stepsize, newx = linesearch.search(objective, man, x, desc_dir, cost, df0) # Compute the new cost-related quantities for newx newcost = objective(newx) newgrad = gradient(newx) newgradnorm = man.norm(newx, newgrad) Pnewgrad = problem.precon(newx, newgrad) newgradPnewgrad = man.inner(newx, newgrad, Pnewgrad) # Apply the CG scheme to compute the next search direction oldgrad = man.transp(x, newx, grad) orth_grads = man.inner(newx, oldgrad, Pnewgrad) / newgradPnewgrad # Powell's restart strategy (see page 12 of Hager and Zhang's # survey on conjugate gradient methods, for example) if abs(orth_grads) >= self._orth_value: beta = 0 desc_dir = -Pnewgrad else: desc_dir = man.transp(x, newx, desc_dir) if self._beta_type == BetaTypes.FletcherReeves: beta = newgradPnewgrad / gradPgrad elif self._beta_type == BetaTypes.PolakRibiere: diff = newgrad - oldgrad ip_diff = man.inner(newx, Pnewgrad, diff) beta = max(0, ip_diff / gradPgrad) elif self._beta_type == BetaTypes.HestenesStiefel: diff = newgrad - oldgrad ip_diff = man.inner(newx, Pnewgrad, diff) try: beta = max(0, ip_diff / man.inner(newx, diff, desc_dir)) # if ip_diff = man.inner(newx, diff, desc_dir) = 0 except ZeroDivisionError: beta = 1 elif self._beta_type == BetaTypes.HagerZhang: diff = newgrad - oldgrad Poldgrad = man.transp(x, newx, Pgrad) Pdiff = Pnewgrad - Poldgrad deno = man.inner(newx, diff, desc_dir) numo = man.inner(newx, diff, Pnewgrad) numo -= (2 * man.inner(newx, diff, Pdiff) * man.inner(newx, desc_dir, newgrad) / deno) beta = numo / deno # Robustness (see Hager-Zhang paper mentioned above) desc_dir_norm = man.norm(newx, desc_dir) eta_HZ = -1 / (desc_dir_norm * min(0.01, gradnorm)) beta = max(beta, eta_HZ) else: types = ", ".join( ["BetaTypes.%s" % t for t in BetaTypes._fields]) raise ValueError( "Unknown beta_type %s. Should be one of %s." % (self._beta_type, types)) desc_dir = -Pnewgrad + beta * desc_dir # Update the necessary variables for the next iteration. x = newx cost = newcost grad = newgrad Pgrad = Pnewgrad gradnorm = newgradnorm gradPgrad = newgradPnewgrad iter += 1 if self._logverbosity <= 0: return x else: self._stop_optlog(x, cost, stop_reason, time0, stepsize=stepsize, gradnorm=gradnorm, iter=iter) return x, self._optlog
def solve(self, problem, x=None): """ Perform optimization using the particle swarm optimization algorithm. Arguments: - problem Pymanopt problem setup using the Problem class, this must have a .manifold attribute specifying the manifold to optimize over, as well as a cost (specified using a theano graph or as a python function). - x=None Optional parameter. Initial population of elements on the manifold. If None then an initial population will be randomly generated Returns: - x Local minimum of obj, or if algorithm terminated before convergence x will be the point at which it terminated """ man = problem.manifold verbosity = problem.verbosity objective = problem.cost # Choose proper default algorithm parameters. We need to know about the # dimension of the manifold to limit the parameter range, so we have to # defer proper initialization until this point. dim = man.dim if self._maxcostevals is None: self._maxcostevals = max(5000, 2 * dim) if self._maxiter is None: self._maxiter = max(500, 4 * dim) if self._populationsize is None: self._populationsize = min(40, 10 * dim) # If no initial population x is given by the user, generate one at # random. if x is None: x = [man.rand() for i in range(int(self._populationsize))] elif not hasattr(x, "__iter__"): raise ValueError("The initial population x must be iterable") else: if len(x) != self._populationsize: print("The population size was forced to the size of " "the given initial population") self._populationsize = len(x) # Initialize personal best positions to the initial population. y = list(x) # Save a copy of the swarm at the previous iteration. xprev = list(x) # Initialize velocities for each particle. v = [man.randvec(xi) for xi in x] # Compute cost for each particle xi. costs = np.array([objective(xi) for xi in x]) fy = list(costs) costevals = self._populationsize # Identify the best particle and store its cost/position. imin = costs.argmin() fbest = costs[imin] xbest = x[imin] if verbosity >= 2: iter_format_length = int(np.log10(self._maxiter)) + 1 column_printer = printer.ColumnPrinter(columns=[ ("Iteration", f"{iter_format_length}d"), ("Cost evaluations", "7d"), ("Best cost", "+.8e"), ]) else: column_printer = printer.VoidPrinter() column_printer.print_header() self._start_optlog() # Iteration counter (at any point, iter is the number of fully executed # iterations so far). iter = 0 time0 = time.time() while True: iter += 1 column_printer.print_row([iter, costevals, fbest]) # Stop if any particle triggers a stopping criterion. for i, xi in enumerate(x): stop_reason = self._check_stopping_criterion( time0, iter=iter, costevals=costevals) if stop_reason is not None: break if stop_reason: if verbosity >= 1: print(stop_reason) print('') break # Compute the inertia factor which we linearly decrease from 0.9 to # 0.4 from iter = 0 to iter = maxiter. w = 0.4 + 0.5 * (1 - iter / self._maxiter) # Compute the velocities. for i, xi in enumerate(x): # Get the position and past best position of particle i. yi = y[i] # Get the previous position and velocity of particle i. xiprev = xprev[i] vi = v[i] # Compute the new velocity of particle i, composed of three # contributions. inertia = w * man.transp(xiprev, xi, vi) nostalgia = rnd.rand() * self._nostalgia * man.log(xi, yi) social = rnd.rand() * self._social * man.log(xi, xbest) v[i] = inertia + nostalgia + social # Backup the current swarm positions. xprev = list(x) # Update positions, personal bests and global best. for i, xi in enumerate(x): # Compute new position of particle i. x[i] = man.retr(xi, v[i]) # Compute new cost of particle i. fxi = objective(xi) # Update costs of the swarm. costs[i] = fxi # Update self-best if necessary. if fxi < fy[i]: fy[i] = fxi y[i] = xi # Update global best if necessary. if fy[i] < fbest: fbest = fy[i] xbest = xi costevals += self._populationsize if self._logverbosity <= 0: return xbest else: self._stop_optlog(xbest, fbest, stop_reason, time0, costevals=costevals, iter=iter) return xbest, self._optlog
def solve(self, problem, x=None, reuselinesearch=False): """ Perform optimization using gradient descent with linesearch. This method first computes the gradient (derivative) of obj w.r.t. arg, and then optimizes by moving in the direction of steepest descent (which is the opposite direction to the gradient). Arguments: - problem Pymanopt problem setup using the Problem class, this must have a .manifold attribute specifying the manifold to optimize over, as well as a cost and enough information to compute the gradient of that cost. - x=None Optional parameter. Starting point on the manifold. If none then a starting point will be randomly generated. - reuselinesearch=False Whether to reuse the previous linesearch object. Allows to use information from a previous solve run. Returns: - x Local minimum of obj, or if algorithm terminated before convergence x will be the point at which it terminated. """ man = problem.manifold verbosity = problem.verbosity objective = problem.cost gradient = problem.grad if not reuselinesearch or self.linesearch is None: self.linesearch = deepcopy(self._linesearch) linesearch = self.linesearch # If no starting point is specified, generate one at random. if x is None: x = man.rand() if verbosity >= 1: print("Optimizing...") if verbosity >= 2: iter_format_length = int(np.log10(self._maxiter)) + 1 column_printer = printer.ColumnPrinter( columns=[ ("Iteration", f"{iter_format_length}d"), ("Cost", "+.16e"), ("Gradient norm", ".8e"), ] ) else: column_printer = printer.VoidPrinter() column_printer.print_header() self._start_optlog(extraiterfields=['gradnorm'], solverparams={'linesearcher': linesearch}) # Initialize iteration counter and timer iter = 0 time0 = time.time() while True: # Calculate new cost, grad and gradnorm cost = objective(x) grad = gradient(x) gradnorm = man.norm(x, grad) iter = iter + 1 column_printer.print_row([iter, cost, gradnorm]) if self._logverbosity >= 2: self._append_optlog(iter, x, cost, gradnorm=gradnorm) # Descent direction is minus the gradient desc_dir = -grad # Perform line-search stepsize, x = linesearch.search(objective, man, x, desc_dir, cost, -gradnorm**2) stop_reason = self._check_stopping_criterion( time0, stepsize=stepsize, gradnorm=gradnorm, iter=iter) if stop_reason: if verbosity >= 1: print(stop_reason) print('') break if self._logverbosity <= 0: return x else: self._stop_optlog(x, objective(x), stop_reason, time0, stepsize=stepsize, gradnorm=gradnorm, iter=iter) return x, self._optlog
def run(self, problem, *, initial_point=None, reuse_line_searcher=False) -> OptimizerResult: """Run CG method. Args: problem: Pymanopt problem class instance exposing the cost function and the manifold to optimize over. The class must either initial_point: Initial point on the manifold. If no value is provided then a starting point will be randomly generated. reuse_line_searcher: Whether to reuse the previous line searcher. Allows to use information from a previous call to :meth:`run`. Returns: Local minimum of the cost function, or the most recent iterate if algorithm terminated before convergence. """ manifold = problem.manifold objective = problem.cost gradient = problem.riemannian_gradient if not reuse_line_searcher or self.line_searcher is None: self.line_searcher = deepcopy(self._line_searcher) line_searcher = self.line_searcher # If no starting point is specified, generate one at random. if initial_point is None: x = manifold.random_point() else: x = initial_point if self._verbosity >= 1: print("Optimizing...") if self._verbosity >= 2: iteration_format_length = int(np.log10(self._max_iterations)) + 1 column_printer = printer.ColumnPrinter(columns=[ ("Iteration", f"{iteration_format_length}d"), ("Cost", "+.16e"), ("Gradient norm", ".8e"), ]) else: column_printer = printer.VoidPrinter() column_printer.print_header() # Calculate initial cost-related quantities. cost = objective(x) grad = gradient(x) gradient_norm = manifold.norm(x, grad) Pgrad = problem.preconditioner(x, grad) gradPgrad = manifold.inner_product(x, grad, Pgrad) # Initial descent direction is the negative gradient. descent_direction = -Pgrad self._initialize_log(optimizer_parameters={ "beta_rule": self._beta_rule, "orth_value": self._orth_value, "line_searcher": line_searcher, }, ) # Initialize iteration counter and timer. iteration = 0 step_size = np.nan start_time = time.time() while True: iteration += 1 column_printer.print_row([iteration, cost, gradient_norm]) self._add_log_entry( iteration=iteration, point=x, cost=cost, gradient_norm=gradient_norm, ) stopping_criterion = self._check_stopping_criterion( start_time=start_time, gradient_norm=gradient_norm, iteration=iteration, step_size=step_size, ) if stopping_criterion: if self._verbosity >= 1: print(stopping_criterion) print("") break # The line search algorithms require the directional derivative of # the cost at the current point x along the search direction. df0 = manifold.inner_product(x, grad, descent_direction) # If we didn't get a descent direction: restart, i.e., switch to # the negative gradient. Equivalent to resetting the CG direction # to a steepest descent step, which discards the past information. if df0 >= 0: # Or we switch to the negative gradient direction. if self._verbosity >= 3: print("Conjugate gradient info: got an ascent direction " f"(df0 = {df0:.2f}), reset to the (preconditioned) " "steepest descent direction.") # Reset to negative gradient: this discards the CG memory. descent_direction = -Pgrad df0 = -gradPgrad # Execute line search step_size, newx = line_searcher.search(objective, manifold, x, descent_direction, cost, df0) # Compute the new cost-related quantities for newx newcost = objective(newx) newgrad = gradient(newx) newgradient_norm = manifold.norm(newx, newgrad) Pnewgrad = problem.preconditioner(newx, newgrad) newgradPnewgrad = manifold.inner_product(newx, newgrad, Pnewgrad) # Powell's restart strategy. oldgrad = manifold.transport(x, newx, grad) orth_grads = (manifold.inner_product(newx, oldgrad, Pnewgrad) / newgradPnewgrad) if abs(orth_grads) >= self._orth_value: beta = 0 descent_direction = -Pnewgrad else: # Transport latest search direction to tangent space at new # estimate. descent_direction = manifold.transport(x, newx, descent_direction) beta = self._beta_update( manifold=manifold, x=x, newx=newx, grad=grad, newgrad=newgrad, Pnewgrad=Pnewgrad, newgradPnewgrad=newgradPnewgrad, Pgrad=Pgrad, gradPgrad=gradPgrad, gradient_norm=gradient_norm, oldgrad=oldgrad, descent_direction=descent_direction, ) descent_direction = -Pnewgrad + beta * descent_direction # Update the necessary variables for the next iteration. x = newx cost = newcost grad = newgrad Pgrad = Pnewgrad gradient_norm = newgradient_norm gradPgrad = newgradPnewgrad return self._return_result( start_time=start_time, point=x, cost=cost, iterations=iteration, stopping_criterion=stopping_criterion, cost_evaluations=iteration, step_size=step_size, gradient_norm=gradient_norm, )
def run(self, problem, *, initial_point=None, reuse_line_searcher=False) -> OptimizerResult: """Run steepest descent algorithm. Args: problem: Pymanopt problem class instance exposing the cost function and the manifold to optimize over. The class must either initial_point: Initial point on the manifold. If no value is provided then a starting point will be randomly generated. reuse_line_searcher: Whether to reuse the previous line searcher. Allows to use information from a previous call to :meth:`solve`. Returns: Local minimum of the cost function, or the most recent iterate if algorithm terminated before convergence. """ manifold = problem.manifold objective = problem.cost gradient = problem.riemannian_gradient if not reuse_line_searcher or self.line_searcher is None: self.line_searcher = deepcopy(self._line_searcher) line_searcher = self.line_searcher # If no starting point is specified, generate one at random. if initial_point is None: x = manifold.random_point() else: x = initial_point if self._verbosity >= 1: print("Optimizing...") if self._verbosity >= 2: iteration_format_length = int(np.log10(self._max_iterations)) + 1 column_printer = printer.ColumnPrinter(columns=[ ("Iteration", f"{iteration_format_length}d"), ("Cost", "+.16e"), ("Gradient norm", ".8e"), ]) else: column_printer = printer.VoidPrinter() column_printer.print_header() self._initialize_log( optimizer_parameters={"line_searcher": line_searcher}) # Initialize iteration counter and timer iteration = 0 start_time = time.time() while True: iteration += 1 # Calculate new cost, grad and gradient_norm cost = objective(x) grad = gradient(x) gradient_norm = manifold.norm(x, grad) column_printer.print_row([iteration, cost, gradient_norm]) self._add_log_entry( iteration=iteration, point=x, cost=cost, gradient_norm=gradient_norm, ) # Descent direction is minus the gradient desc_dir = -grad # Perform line-search step_size, x = line_searcher.search(objective, manifold, x, desc_dir, cost, -(gradient_norm**2)) stopping_criterion = self._check_stopping_criterion( start_time=start_time, step_size=step_size, gradient_norm=gradient_norm, iteration=iteration, ) if stopping_criterion: if self._verbosity >= 1: print(stopping_criterion) print("") break return self._return_result( start_time=start_time, point=x, cost=objective(x), iterations=iteration, stopping_criterion=stopping_criterion, cost_evaluations=iteration, step_size=step_size, gradient_norm=gradient_norm, )