def __init__(self,the_problem_,ls_param_pref_ = 2, iprint_=0): #{ self.the_problem = the_problem_ self.iprint = iprint_ self.ls_param_pref = ls_param_pref_ self.ls = line_search(self.ls_param_pref,self.iprint) self.reset()
def update_policy(self, states, actions, advantages): self.policy.train() states = states.to(self.device) actions = actions.to(self.device) advantages = advantages.to(self.device) action_dists = self.policy(states) log_action_probs = action_dists.log_prob(actions) loss = self.surrogate_loss(log_action_probs, log_action_probs.detach(), advantages) loss_grad = flat_grad(loss, self.policy.parameters(), retain_graph=True) mean_kl = mean_kl_first_fixed(action_dists, action_dists) Fvp_fun = get_Hvp_fun(mean_kl, self.policy.parameters()) search_dir = cg_solver(Fvp_fun, loss_grad, self.cg_max_iters) expected_improvement = torch.matmul(loss_grad, search_dir) def constraints_satisfied(step, beta): apply_update(self.policy, step) with torch.no_grad(): new_action_dists = self.policy(states) new_log_action_probs = new_action_dists.log_prob(actions) new_loss = self.surrogate_loss(new_log_action_probs, log_action_probs, advantages) mean_kl = mean_kl_first_fixed(action_dists, new_action_dists) actual_improvement = new_loss - loss improvement_ratio = actual_improvement / (expected_improvement * beta) apply_update(self.policy, -step) surrogate_cond = improvement_ratio >= self.line_search_accept_ratio and actual_improvement > 0.0 kl_cond = mean_kl <= self.max_kl_div return surrogate_cond and kl_cond max_step_len = self.get_max_step_len(search_dir, Fvp_fun, self.max_kl_div, retain_graph=True) step_len = line_search(search_dir, max_step_len, constraints_satisfied) opt_step = step_len * search_dir apply_update(self.policy, opt_step)
def newton(goal_function, x_init, algorithm="BasicArm", epsilon=decimal.Decimal(1e-10), max_iter=10000): x = [x_init] y = [goal_function(x_init)] n = len(x_init) for iter in range(max_iter): # find the hessian numerically # unfortunatelly, because numpy doesn't work with decimal # the type has to be changed to something like long double H_temp = hessian(goal_function, x[iter]) H = numpy.array(H_temp, dtype=numpy.float64) # calculate the gradient as well numerically, same applies grad_temp = gradient(goal_function, x[iter]) grad = numpy.array([[entry] for entry in grad_temp], dtype=numpy.float64) # calculate change in x based on Newton-Rhapson step # dx = H^-1 * grad dx = -1 * numpy.dot(numpy.linalg.inv(H), grad) # test definiteness of Hessian, in min it should be positive definite q_arr = numpy.dot(numpy.dot(numpy.transpose(dx), H), dx) q = decimal.Decimal(q_arr[0][0]) # if q is possitive and small enough (2 is because we haven't halved the quadratic form) we have converged if q > 0 and q < 2 * epsilon: return (x[iter], goal_function(x[iter]), "Solution found in (" + str(iter + 1) + ") iterations.", x, y) # now that we have the Newton-Rhapson diferential dx, we apply backtracking line search in that direction dx = [decimal.Decimal(dx[i][0]) for i in range(n)] grad = [decimal.Decimal(grad[i][0]) for i in range(n)] (s, f_s, msg) = line_search.line_search(goal_function, dx, x[iter], grad, algorithm) # compute next candidate x_k+1 = x_k + sdx x.append([x[iter][i] + dx[i] * s for i in range(n)]) y.append(goal_function(x[-1])) # if we're here the maximum number of iterations has been exceeded return (x[-1], goal_function(x[-1]), "Maximum number of iterations (" + str(max_iter) + ") exceeded.", x, y)
def solve(self, tracking_names=[]): """ Parameters ---------- tracker_names : list of strings. Contains the names of the variables you want to check. accaptable values: ['xk', 'xk_1', 'inv_hessian', 'grad', 'alpha', 'dx'] Returns ------- xk: n-array The algoritms choice as a minimum point. tracker: dict: str: list Contains the list of the progression of each """ # Check if input is acceptable acceptable_names = ['xk', 'xk_1', 'inv_hessian', 'grad', 'alpha', 'dx'] if not set(tracking_names).issubset(acceptable_names): raise Exception("tracking_names must be a subset of {}".format( acceptable_names)) # Initiating variables tracker = {name: [] for name in tracking_names} xk_1 = None xk = self.minimization_problem.guess.copy() dx = None grad = self.minimization_problem.gradient(xk) inv_hessian = get_inverse_hessian( self.minimization_problem, xk, None, None, hessian_approximation_method="finite_differences") alpha = line_search(self.minimization_problem.function, self.minimization_problem.gradient, xk, -inv_hessian @ grad, line_search_method=self.line_search_method, line_search_condition=self.line_search_conditions, a0=1, rho=0.1, sigma=0.7, tau=0.1, chi=9) # Save initial values for name, l in tracker.items(): l.append(locals()[name]) # Continue with next step according to object settings while (True): # Update parameters xk_1 = xk xk = xk - alpha * inv_hessian @ grad dx = np.linalg.norm(xk - xk_1) # Calculate the proerties at new xk grad = self.minimization_problem.gradient(xk) inv_hessian = get_inverse_hessian( self.minimization_problem, xk, xk_1, inv_hessian, self.hessian_approximation_method) alpha = line_search( self.minimization_problem.function, self.minimization_problem.gradient, xk, -inv_hessian @ grad, line_search_method=self.line_search_method, line_search_condition=self.line_search_conditions, a0=1, rho=self.rho, sigma=self.sigma, tau=self.tau, chi=self.chi) # Save values for name, l in tracker.items(): l.append(locals()[name].copy()) # Check if condition is reached #if dx < self.sensitivity: break if np.linalg.norm(grad) < self.sensitivity: break #if abs(self.minimization_problem.function(xk)- self.minimization_problem.function(xk_1)) < self.sensitivity: break return xk, tracker