def _initialize_solver(self): """Set up and return the initial quantities used by the solver.""" eta = np.zeros_like(self.gradient) delta = -self.gradient resid = self.gradient r_norm = inner_prod(resid, resid) return eta, delta, resid, r_norm
def solve(self, verbose=True): """Solve the trust region subproblem using the tCG method.""" # Refactoring is hard here; pylint: disable=too-many-locals # Initialize quantities used in solving the TR subproblem. eta, delta, resid, r_norm = self._initialize_solver() self.__cost = self.value stop_cause = 'maximum number of iterations reached' # Iteratively update eta until convergence (or stop). n_iter = 0 for n_iter in range(self.maxiter): # Compute <delta, Hess f[delta]> and thus update factor alpha. hess_delta = self.get_hessian(delta) dh_norm = inner_prod(delta, hess_delta) alpha = r_norm / dh_norm # Compute candidate eta_new and approximate Hess f [eta_new]. eta_new = eta + alpha * delta # If <delta, Hess f[delta]> <= 0 or ||eta_{k+1}|| >= Delta, # compute an update of eta of proper norm and stop iterating. if (dh_norm <= 0) or (froebenius(eta_new) >= self.radius): eta = self._fit_eta_tau(eta, delta) stop_cause = 'radius condition -- computed fitted-norm eta' break # Check that the cost decreases with eta_new, otherwise stop. new_cost = self._get_model_cost(eta_new) if new_cost >= self.__cost: stop_cause = 'increased cost -- adopted previous solution' break self.__cost = new_cost # Update the residuals and check the stopping criterion. resid += alpha * hess_delta rnorm_new = inner_prod(resid, resid) if rnorm_new <= self.stopping_criterion: stop_cause = 'stopping criterion reached' break # In the absence of solution, update quantities and iterate. delta = -resid + delta * rnorm_new / r_norm eta, r_norm = eta_new, rnorm_new # Store the solution reached as well as the cost function on it. self.__solution = eta self.__cost = self._get_model_cost(eta) # Print-out the number of iterations needed to converge. if verbose: print("Solution reached afer %i iterations." % n_iter) print(stop_cause)
def _solve_tr_subproblem(self, x_k, value, gradient, get_hessian, delta): """Solve the trust-region subproblem and return derived quantities. x_k : current candidate optimum x_k value : value of f(x_k) gradient : value of grad f(x_k) get_hessian : function to get Hess f(x_k) [.] delta : radius of the trust-region The trust-region subproblem consists in minimizing the quantity $m(\\eta) = f(x_k) + <\\text{{grad}} f(x_k), eta> + .5 <\\text{{Hess}} f(x_k)[eta], eta>$ under the constraint $||\\eta|| = \\Delta$. First, estimate eta_k using the truncated conjugate gradient method, and gather the value of the cost function in eta_k. Then, compute the retraction of eta_k from the tangent space in x_k; this retraction is a candidate for $x_{{k+1}}$. Finally, compute the ratio rho_k used to accept of reject the previous candidate and update the radius delta: $\\rho_k = (f(x_k) - f(R_x(\\eta_k))) / (m(0) - m(\\eta_k))$. Return eta_k, rho_k and R_x(eta_k) (candidate eta_{{k+1}}). """ # Arguments serve readability; pylint: disable=too-many-arguments # Solve the trust-region subproblem. tcg = TruncatedConjugateGradient( value, gradient, get_hessian, delta, **self.tcg_kwargs ) eta_k, cost_eta = tcg.get_solution() # Compute the retraction of the solution. rx_k = stiefel_retraction(x_k, eta_k) # Compute the ratio rho_k. f_rxk = inner_prod(self.cost_mat, np.dot(rx_k, rx_k.T)) rho_k = (value - f_rxk) / (value - cost_eta + 1e-30) # Return eta_k, rho_k and the potential new optimum candidate. return eta_k, rho_k, rx_k
def _get_f_values(self, matrix): """Return f(matrix), grad f(matrix) and Hess f(matrix)[.]. The values of f and of its riemannian gradient are explicitely computed, while a function is built to compute the riemannian hessian in any given direction. """ # Compute the value of f. matprod = np.dot(matrix, matrix.T) value = inner_prod(self.cost_mat, matprod) # Compute the value of grad f. gradient = np.dot(self.cost_mat, matrix) gradient = 2 * stiefel_projection(matrix, gradient) # Define a function to compute the value of Hess f in a direction. sym = symblockdiag(np.dot(self.cost_mat, matprod)) def get_hessian(direction): """Get the riemannian hessian of f in a given direction.""" nonlocal self, sym, matrix hess = np.dot(self.cost_mat, direction) - np.dot(sym, direction) return 2 * stiefel_projection(matrix, hess) # Return f(x_k), grad f(x_k) and function to get Hess f(x_k)[d]. return value, gradient, get_hessian
def _get_model_cost(self, eta): """Return the cost function's value, given eta and Hessf[eta].""" return (self.value + inner_prod(self.gradient, eta) + .5 * inner_prod(self.get_hessian(eta), eta))