def __init__(self,the_problem_,ls_param_pref_ = 2, iprint_=0):
   #{
      self.the_problem = the_problem_
      self.iprint = iprint_
      self.ls_param_pref = ls_param_pref_
      self.ls = line_search(self.ls_param_pref,self.iprint)

      self.reset()
Exemple #2
0
    def update_policy(self, states, actions, advantages):
        self.policy.train()

        states = states.to(self.device)
        actions = actions.to(self.device)
        advantages = advantages.to(self.device)

        action_dists = self.policy(states)
        log_action_probs = action_dists.log_prob(actions)

        loss = self.surrogate_loss(log_action_probs, log_action_probs.detach(),
                                   advantages)
        loss_grad = flat_grad(loss,
                              self.policy.parameters(),
                              retain_graph=True)

        mean_kl = mean_kl_first_fixed(action_dists, action_dists)

        Fvp_fun = get_Hvp_fun(mean_kl, self.policy.parameters())
        search_dir = cg_solver(Fvp_fun, loss_grad, self.cg_max_iters)

        expected_improvement = torch.matmul(loss_grad, search_dir)

        def constraints_satisfied(step, beta):
            apply_update(self.policy, step)

            with torch.no_grad():
                new_action_dists = self.policy(states)
                new_log_action_probs = new_action_dists.log_prob(actions)

                new_loss = self.surrogate_loss(new_log_action_probs,
                                               log_action_probs, advantages)

                mean_kl = mean_kl_first_fixed(action_dists, new_action_dists)

            actual_improvement = new_loss - loss
            improvement_ratio = actual_improvement / (expected_improvement *
                                                      beta)

            apply_update(self.policy, -step)

            surrogate_cond = improvement_ratio >= self.line_search_accept_ratio and actual_improvement > 0.0
            kl_cond = mean_kl <= self.max_kl_div

            return surrogate_cond and kl_cond

        max_step_len = self.get_max_step_len(search_dir,
                                             Fvp_fun,
                                             self.max_kl_div,
                                             retain_graph=True)
        step_len = line_search(search_dir, max_step_len, constraints_satisfied)

        opt_step = step_len * search_dir
        apply_update(self.policy, opt_step)
Exemple #3
0
def newton(goal_function,
           x_init,
           algorithm="BasicArm",
           epsilon=decimal.Decimal(1e-10),
           max_iter=10000):
    x = [x_init]
    y = [goal_function(x_init)]
    n = len(x_init)

    for iter in range(max_iter):
        # find the hessian numerically
        # unfortunatelly, because numpy doesn't work with decimal
        # the type has to be changed to something like long double
        H_temp = hessian(goal_function, x[iter])
        H = numpy.array(H_temp, dtype=numpy.float64)

        # calculate the gradient as well numerically, same applies
        grad_temp = gradient(goal_function, x[iter])
        grad = numpy.array([[entry] for entry in grad_temp],
                           dtype=numpy.float64)

        # calculate change in x based on Newton-Rhapson step
        # dx = H^-1 * grad
        dx = -1 * numpy.dot(numpy.linalg.inv(H), grad)

        # test definiteness of Hessian, in min it should be positive definite
        q_arr = numpy.dot(numpy.dot(numpy.transpose(dx), H), dx)
        q = decimal.Decimal(q_arr[0][0])
        # if q is possitive and small enough (2 is because we haven't halved the quadratic form) we have converged
        if q > 0 and q < 2 * epsilon:
            return (x[iter], goal_function(x[iter]),
                    "Solution found in (" + str(iter + 1) + ") iterations.", x,
                    y)

        # now that we have the Newton-Rhapson diferential dx, we apply backtracking line search in that direction
        dx = [decimal.Decimal(dx[i][0]) for i in range(n)]
        grad = [decimal.Decimal(grad[i][0]) for i in range(n)]
        (s, f_s, msg) = line_search.line_search(goal_function, dx, x[iter],
                                                grad, algorithm)

        # compute next candidate x_k+1 = x_k + sdx
        x.append([x[iter][i] + dx[i] * s for i in range(n)])
        y.append(goal_function(x[-1]))

    # if we're here the maximum number of iterations has been exceeded
    return (x[-1], goal_function(x[-1]),
            "Maximum number of iterations (" + str(max_iter) + ") exceeded.",
            x, y)
Exemple #4
0
    def solve(self, tracking_names=[]):
        """
        Parameters
        ----------
        tracker_names : list of strings.
            Contains the names of the variables you want to check.
            accaptable values: ['xk', 'xk_1', 'inv_hessian', 'grad', 'alpha', 'dx']

        Returns
        -------
        xk: n-array
            The algoritms choice as a minimum point.
        tracker: dict: str: list
            Contains the list of the progression of each
        """
        # Check if input is acceptable
        acceptable_names = ['xk', 'xk_1', 'inv_hessian', 'grad', 'alpha', 'dx']
        if not set(tracking_names).issubset(acceptable_names):
            raise Exception("tracking_names must be a subset of {}".format(
                acceptable_names))

        # Initiating variables
        tracker = {name: [] for name in tracking_names}
        xk_1 = None
        xk = self.minimization_problem.guess.copy()
        dx = None
        grad = self.minimization_problem.gradient(xk)
        inv_hessian = get_inverse_hessian(
            self.minimization_problem,
            xk,
            None,
            None,
            hessian_approximation_method="finite_differences")

        alpha = line_search(self.minimization_problem.function,
                            self.minimization_problem.gradient,
                            xk,
                            -inv_hessian @ grad,
                            line_search_method=self.line_search_method,
                            line_search_condition=self.line_search_conditions,
                            a0=1,
                            rho=0.1,
                            sigma=0.7,
                            tau=0.1,
                            chi=9)

        # Save initial values
        for name, l in tracker.items():
            l.append(locals()[name])

        # Continue with next step according to object settings
        while (True):
            # Update parameters
            xk_1 = xk
            xk = xk - alpha * inv_hessian @ grad
            dx = np.linalg.norm(xk - xk_1)
            # Calculate the proerties at new xk
            grad = self.minimization_problem.gradient(xk)
            inv_hessian = get_inverse_hessian(
                self.minimization_problem, xk, xk_1, inv_hessian,
                self.hessian_approximation_method)

            alpha = line_search(
                self.minimization_problem.function,
                self.minimization_problem.gradient,
                xk,
                -inv_hessian @ grad,
                line_search_method=self.line_search_method,
                line_search_condition=self.line_search_conditions,
                a0=1,
                rho=self.rho,
                sigma=self.sigma,
                tau=self.tau,
                chi=self.chi)

            # Save values
            for name, l in tracker.items():
                l.append(locals()[name].copy())

            # Check if condition is reached
            #if dx <   self.sensitivity: break
            if np.linalg.norm(grad) < self.sensitivity: break
            #if abs(self.minimization_problem.function(xk)- self.minimization_problem.function(xk_1)) < self.sensitivity: break

        return xk, tracker