def Quasi_Newton_BFGS_method(f, x0, step = 0, precision = 1):
    #The Broyden-Fletcher-Goldfarb-Shanno Descent method
    #Acquired From K&W P93

    #initial setting
    m = len(x0)
    q = get_identity_matrix(m)
    g = methods.gradient(f, x0)
    x = x0

    while step < 10000 and norm(precision) > 1e-5:
        s = -np.dot(q, g)
        alpha = line_search(f, g, x, s)
        prev_x = x
        x = x + alpha * s

        precision = abs(prev_x - x) 
        g_old = g
        g = methods.gradient(f, x)

        y = (g - g_old)/alpha
        dot_sy = np.dot(s,y)
        if dot_sy > 0:
            z = np.dot(q,y)
            q += np.outer(s,s)*(np.dot(s,y) + np.dot(y, z))/dot_sy**2 - (np.outer(z,s)+ np.outer(s, z))/dot_sy

        step += 1

    f_final = f(x)

    return x, f_final, step
    def step(self, f, x):
        q = self.q
        g = gradient(f, x)

        s = np.dot(-q, g)

        xprime = self.line_search(f, x, s)
        gprime = gradient(f, xprime)

        rho = np.matrix((xprime - x))

        y = np.matrix(gprime - g)
        yprime = y.getI()
        rhoprime = rho.getI()

        return xprime
def backtracking_line_search(f, x, d, alpha=0.001, p=0.5, beta=1e-4):
    y = f(x)
    g = methods.gradient(f, x)

    while f(x + alpha * d) > y + beta * alpha * (np.dot(g, d)):
        alpha = alpha * 0.5

    return x + alpha * d
 def step(self, f, x):
     g = gradient(f, x)
     self._v = self._gamma_v * self._v + (1 - self._gamma_v) * g
     self._s = self._gamma_s * self._s + (1 - self._gamma_s) * (g**2)
     self._k += 1
     v_hat = self._v / (1 - self._gamma_v**self._k)
     s_hat = self._s / (1 - self._gamma_s**self._k)
     return x - self._alpha * v_hat / (np.sqrt(s_hat) + self._eps)
    def line_search(self, f, x, d, alpha=0.1, p=0.5, beta=1e-4):

        g = gradient(f, x)

        while f(x + alpha * d) > f(x) + beta * alpha * (np.dot(g, d)):

            alpha = alpha * 0.5

        return x + alpha * d
 def step(self, f, x):
     d = -1 * self._g
     gprime = gradient(f, x)
     beta = max(
         0,
         np.dot(gprime, gprime - self._g) / (np.dot(self._g, self._g)))
     dprime = -1 * gprime + beta * self._d
     x = self.backtracking_line_search(f, x, dprime)
     self._g = gprime
     self._d = dprime
     self.x_old = x
     return x
def conjugate_gradient_descent_method(func, x0, x_tol=0.0005, f_tol=0.01):
    x_final = x0
    f_final = func(x0)
    CG_iterations = 1

    g = methods.gradient(func, x0)
    d = -1 * g

    precision = 1

    while CG_iterations < 10000 and norm(precision) > x_tol:
        cur_x = x_final
        gprime = methods.gradient(func, x_final)
        beta = max(0, np.dot(gprime, gprime - g) / (np.dot(g, g)))
        dprime = -1 * gprime + beta * d
        x_final = backtracking_line_search(func, x_final, dprime)
        d = dprime
        g = gprime
        CG_iterations += 1
        precision = abs(x_final - cur_x)

    f_final = func(x_final)

    return x_final, f_final, CG_iterations
def gradient_descent_method(func, initial_point, initial_step_size):
    x_old = np.array(initial_point)
    y_old = func(initial_point)
    func_eval_count = 1
    threshold = 10**(-3)
    for n in range(0, 50000):
        direction = methods.gradient(func, x_old.tolist())
        x_new = x_old - initial_step_size * direction
        y_new = func(x_new.tolist())
        func_eval_count += 1
        if LA.norm(-direction) < threshold:
            return (x_new, y_new, func_eval_count)
        if y_new < y_old:
            x_old = x_new
            y_old = y_new
        else:
            initial_step_size *= 0.1
    return (x_old, y_old, func_eval_count)
 def backtracking_line_search(self, f, x, d, alpha, p=0.5, beta=1e-4):
     y, g = f(x), gradient(f, x)
     while f(x + alpha * d) > y + beta * alpha * np.matmul(g, d):
         alpha *= p
     return alpha
 def step(self, f, x):
     g = gradient(f, x)
     alpha = self.backtracking_line_search(f, x, g, self.alpha)
     return x + alpha * g
 def step(self, f, x):
     g = gradient(f, x)
     self._v = self._beta * self._v - self._alpha * g
     return x + self._v