예제 #1
0
def dichotomous_search(loss_function: rosenbrock,
                       start: point,
                       direction: list,
                       epsilon=0.1) -> float:
    """
    derivative-free to search the longest step
    :param loss_function:
    :param start:
    :param direction:
    :param epsilon:
    :return:
    """
    a, b = advance_retreat_method(loss_function, start, direction)

    # find the minimum
    e = epsilon / 3
    p, q = (a + b) / 2 - e, (a + b) / 2 + e
    while abs(a - b) > epsilon:
        f_p = loss_function.f(start +
                              point(direction[0] * p, direction[1] * p))
        f_q = loss_function.f(start +
                              point(direction[0] * q, direction[1] * q))
        if f_p < f_q:
            b = q
        else:
            a = p
        p, q = (a + b) / 2 - e, (a + b) / 2 + e

    return (a + b) / 2
예제 #2
0
def golden_search(loss_function: rosenbrock,
                  start: point,
                  direction: list,
                  epsilon=0.1) -> float:
    """
    derivative-free to search the longest step
    :param loss_function:
    :param start:
    :param direction:
    :param epsilon:
    :return:
    """
    a, b = advance_retreat_method(loss_function, start, direction)

    # find the minimum
    golden_num = (math.sqrt(5) - 1) / 2
    p, q = a + (1 - golden_num) * (b - a), a + golden_num * (b - a)
    while abs(a - b) > epsilon:
        f_p = loss_function.f(start +
                              point(direction[0] * p, direction[1] * p))
        f_q = loss_function.f(start +
                              point(direction[0] * q, direction[1] * q))
        if f_p < f_q:
            b, q = q, p
            p = a + (1 - golden_num) * (b - a)
        else:
            a, p = p, q
            q = a + golden_num * (b - a)

    return (a + b) / 2
def Momentum(loss_function: rosenbrock,
             start: point,
             step=0.1,
             rho=0.7,
             epsilon=10e-2,
             k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param step:
    :param rho: the influence of historical gradients
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0
    direction = -loss_function.g(start) / np.linalg.norm(
        loss_function.g(start))
    p = step * direction
    x.append(x[k] + point(p[0], p[1]))

    while True:
        k += 1
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break

        # find the new x
        direction = -gradient / np.linalg.norm(gradient)
        # add the historical p
        p = rho * p + step * direction
        x.append(x[k] + point(p[0], p[1]))

    return x
def Adam_HD(loss_function: rosenbrock,
            start: point,
            initial_step=0.1,
            rho0=0.9,
            rho1=0.99,
            beta=10e-7,
            epsilon=10e-2,
            k_max=10000) -> list:
    """
    Adaptive momentum
    :param loss_function:
    :param start:
    :param initial_step:
    :param rho0:
    :param rho1:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k, step = [start], 0, initial_step
    delta = np.array([10e-7] * len(start))

    r = np.zeros(len(start))
    direction = -loss_function.g(start) / np.linalg.norm(
        loss_function.g(start))
    uk_old = direction
    r = rho1 * r + (1 - rho1) * direction**2
    p = step * uk_old
    x.append(x[k] + point(p[0], p[1]))

    while True:
        k += 1
        gradient = loss_function.g(x[k])
        # if meet the termination conditions then break
        if k > k_max or np.linalg.norm(gradient) < epsilon: break
        gradient = -gradient / np.linalg.norm(gradient)
        # add the influence rate of historical to direction
        direction = rho0 * direction + (1 - rho0) * gradient
        uk_new = direction / (r + delta)**0.5
        # find the new x
        # add the influence rate of historical to r
        r = rho1 * r + (1 - rho1) * gradient**2
        # update the step
        step = step + beta * np.dot(uk_new, uk_old)
        p = step * uk_new
        x.append(x[k] + point(p[0], p[1]))

        uk_old = uk_new

    return x
def cyclic_coordinate_method(loss_function: rosenbrock, start: point, method='golden_search', epsilon=10e-1,
                             k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param method:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, M, k = [start], len(start), 0

    while True:
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break
        # find the new x
        direction = [0] * M
        direction[np.mod(k, M)] = 1
        if method == 'golden_search':
            step = golden_search(loss_function, x[k], direction)
        elif method == 'fibonacci_search':
            step = fibonacci_search(loss_function, x[k], direction)
        elif method == 'dichotomous_search':
            step = dichotomous_search(loss_function, x[k], direction)
        else:
            return x
        x.append(x[k] + point(direction[0] * step, direction[1] * step))
        k += 1

    return x
def Adadelta(loss_function: rosenbrock,
             start: point,
             rho=0.99,
             epsilon=10e-2,
             k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param rho:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0
    delta = np.array([10e-7] * len(start))

    step = np.zeros(len(start))
    r = np.zeros(len(start))
    while True:
        gradient = loss_function.g(x[k])
        # if meet the termination conditions then break
        if k > k_max or np.linalg.norm(gradient) < epsilon: break
        gradient = -gradient / np.linalg.norm(gradient)
        # find the new x
        # add the influence rate of historical to r
        r = rho * r + (1 - rho) * gradient**2
        p = gradient * ((step + delta) / (r + delta))**0.5
        x.append(x[k] + point(p[0], p[1]))
        step = rho * step + (1 - rho) * p**2
        k += 1

    return x
def Adagrad(loss_function: rosenbrock,
            start: point,
            initial_step=0.1,
            epsilon=10e-2,
            k_max=10000) -> list:
    """
    Adaptive Gradient
    :param loss_function:
    :param start:
    :param initial_step:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0
    delte = np.array([10e-7] * len(start))

    r = np.zeros(len(start))
    while True:
        gradient = loss_function.g(x[k])
        # if meet the termination conditions then break
        if k > k_max or np.linalg.norm(gradient) < epsilon: break
        gradient = -gradient / np.linalg.norm(gradient)
        # find the new x
        r = r + gradient**2
        p = initial_step * gradient / (r + delte)**0.5
        x.append(x[k] + point(p[0], p[1]))
        k += 1

    return x
def plain_gradient_descent(loss_function: rosenbrock,
                           start: point,
                           step=0.1,
                           epsilon=10e-2,
                           k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param step:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0

    while True:
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break

        # find the new x
        direction = -gradient / np.linalg.norm(gradient)
        p = step * direction
        x.append(x[k] + point(p[0], p[1]))
        k += 1

    return x
def BFGS(loss_function: rosenbrock,
         start: point,
         method='golden_search',
         epsilon=10e-2,
         k_max=10000) -> list:
    """
    Broyden Fletcher Goldfarb Shanno
    :param loss_function:
    :param start:
    :param method:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0
    B = np.identity(len(start))  # Identity matrix

    while True:
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break

        # find the new x
        gradient = gradient / np.linalg.norm(gradient)
        direction = -np.matmul(np.linalg.inv(B), gradient)
        if method == 'golden_search':
            step = golden_search(loss_function, x[k], direction)
        elif method == 'fibonacci_search':
            step = fibonacci_search(loss_function, x[k], direction)
        elif method == 'dichotomous_search':
            step = dichotomous_search(loss_function, x[k], direction)
        else:
            return x
        p = step * direction
        x.append(x[k] + point(p[0], p[1]))
        # update the B
        yk = np.mat(
            loss_function.g(x[k + 1]) /
            np.linalg.norm(loss_function.g(x[k + 1])) - gradient).T
        pk = np.mat(p).T
        Bk = np.mat(B)
        B = B + np.array((yk * yk.T) / (yk.T * pk) - (Bk * pk * pk.T * Bk) /
                         (pk.T * Bk * pk))
        k += 1

    return x
def DFP(loss_function: rosenbrock,
        start: point,
        method='golden_search',
        epsilon=10e-2,
        k_max=10000) -> list:
    """
    Davidon Fletcher Powell, quasi_newton_method
    :param loss_function:
    :param start:
    :param step:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0
    D = np.identity(len(start))  # Identity matrix

    while True:
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break

        # find the new x
        gradient = gradient / np.linalg.norm(gradient)
        direction = -np.matmul(D, gradient)
        if method == 'golden_search':
            step = golden_search(loss_function, x[k], direction)
        elif method == 'fibonacci_search':
            step = fibonacci_search(loss_function, x[k], direction)
        elif method == 'dichotomous_search':
            step = dichotomous_search(loss_function, x[k], direction)
        else:
            return x
        p = step * direction
        x.append(x[k] + point(p[0], p[1]))
        # update the D
        yk = np.mat(
            loss_function.g(x[k + 1]) /
            np.linalg.norm(loss_function.g(x[k + 1])) - gradient).T
        pk = np.mat(p).T
        Dk = np.mat(D)
        D = D + np.array((pk * pk.T) / (pk.T * yk) - (Dk * yk * yk.T * Dk) /
                         (yk.T * Dk * yk))
        k += 1

    return x
def Nesterov_momentum_HD(loss_function: rosenbrock,
                         start: point,
                         initial_step=0.1,
                         rho=0.7,
                         mu=0.2,
                         beta=0.001,
                         epsilon=10e-2,
                         k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param step:
    :param rho: the influence of historical gradients
    :param beta: ahead rate
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k, step = [start], 0, initial_step
    direction_old = -loss_function.g(start) / np.linalg.norm(
        loss_function.g(start))
    p = step * direction_old
    x.append(x[k] + point(p[0], p[1]))

    while True:
        k += 1
        # if meet the termination conditions then break
        if k > k_max or np.linalg.norm(loss_function.g(x[k])) < epsilon: break

        # find the new x
        # ahead p * beta
        gradient = loss_function.g(x[k] + point(p[0] * mu, p[1] * mu))
        direction_new = -gradient / np.linalg.norm(gradient)
        # update the step
        step = step + beta * np.dot(direction_new, direction_old)
        # add the historical p
        p = rho * p + step * direction_new
        x.append(x[k] + point(p[0], p[1]))

        direction_old = direction_new

    return x
예제 #12
0
def armijo_goldstein_search(loss_function: rosenbrock,
                            start: point,
                            direction: list,
                            rho=0.1) -> float:
    """

    :param loss_function:
    :param start:
    :param direction:
    :param rho: meet condition 0<rho<0.5
    :return:
    """
    a, b = 0, 100
    alpha = b * random.uniform(0.5, 1)
    # find the alpha
    f1 = loss_function.f(start)
    gradient = loss_function.g(start)
    gradient_f1 = np.dot(gradient.T, np.array(direction))

    while True:
        f2 = loss_function.f(start +
                             point(direction[0] * alpha, direction[1] * alpha))
        # print(f2 - f1, alpha)
        # print(rho * alpha * gradient_f1, (1 - rho) * alpha * gradient_f1)
        # the armijo goldstein rule
        # if alpha < 1: return 0.1
        if f2 - f1 <= rho * alpha * gradient_f1:
            if f2 - f1 >= (1 - rho) * alpha * gradient_f1:
                print(alpha)
                return alpha
            else:
                a, b = alpha, b
                if b < alpha:
                    alpha = (a + b) / 2
                else:
                    alpha = 2 * alpha
        else:
            a, b = a, alpha
            alpha = (a + b) / 2
def plain_gradient_descent_HD(loss_function: rosenbrock,
                              start: point,
                              initial_step=0.1,
                              beta=0.001,
                              epsilon=10e-2,
                              k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param step:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k, step = [start], 0, initial_step
    direction_old = -loss_function.g(start) / np.linalg.norm(
        loss_function.g(start))
    p = step * direction_old
    x.append(x[k] + point(p[0], p[1]))

    while True:
        k += 1
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break

        # find the new x
        direction_new = -gradient / np.linalg.norm(gradient)
        # update the step
        step = step + beta * np.dot(direction_new, direction_old)
        p = step * direction_new
        x.append(x[k] + point(p[0], p[1]))

        direction_old = direction_new

    return x
def Newton_method(loss_function: rosenbrock,
                  start: point,
                  method='golden_search',
                  epsilon=10e-2,
                  k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param step:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0

    while True:
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break

        # find the new x
        inverse = np.linalg.inv(loss_function.H(x[k]))
        direction = -np.matmul(inverse, gradient)
        if method == 'golden_search':
            step = golden_search(loss_function, x[k], direction)
        elif method == 'fibonacci_search':
            step = fibonacci_search(loss_function, x[k], direction)
        elif method == 'dichotomous_search':
            step = dichotomous_search(loss_function, x[k], direction)
        else:
            return x
        p = step * direction
        x.append(x[k] + point(p[0], p[1]))
        k += 1

    return x
예제 #15
0
def fibonacci_search(loss_function: rosenbrock,
                     start: point,
                     direction: list,
                     epsilon=1) -> float:
    """
    derivative-free to search the longest step
    :param loss_function:
    :param start:
    :param direction:
    :param epsilon:
    :return:
    """
    a, b = advance_retreat_method(loss_function, start, direction)

    #  build the Fibonacci series
    F, d = [1.0, 2.0], (b - a) / epsilon
    while F[-1] < d:
        F.append(F[-1] + F[-2])

    # find the minimum
    N = len(F) - 1
    p, q = a + (1 - F[N - 1] / F[N]) * (b - a), a + F[N - 1] / F[N] * (b - a)
    while abs(a - b) > epsilon and N > 0:
        N = N - 1
        f_p = loss_function.f(start +
                              point(direction[0] * p, direction[1] * p))
        f_q = loss_function.f(start +
                              point(direction[0] * q, direction[1] * q))
        if f_p < f_q:
            b, q = q, p
            p = a + (1 - F[N - 1] / F[N]) * (b - a)
        else:
            a, p = p, q
            q = a + F[N - 1] / F[N] * (b - a)

    return (a + b) / 2
def conjugate_gradient(loss_function: rosenbrock,
                       start: point,
                       method='golden_search',
                       epsilon=10e-2,
                       k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param method:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, direction, k = [
        start
    ], -1 * loss_function.g(start) / np.linalg.norm(loss_function.g(start)), 0

    while True:
        # if meet the termination conditions then break
        gradient_old = loss_function.g(x[k]) / np.linalg.norm(
            loss_function.g(x[k]))
        if np.linalg.norm(loss_function.g(x[k])) < epsilon or k > k_max: break

        # find the new x
        if method == 'golden_search':
            step = golden_search(loss_function, x[k], direction)
        elif method == 'fibonacci_search':
            step = fibonacci_search(loss_function, x[k], direction)
        elif method == 'dichotomous_search':
            step = dichotomous_search(loss_function, x[k], direction)
        else:
            return x
        x.append(x[k] + point(direction[0] * step, direction[1] * step))

        # update the direction
        gradient_new = loss_function.g(x[k + 1]) / np.linalg.norm(
            loss_function.g(x[k + 1]))
        alpha = np.dot(gradient_new, gradient_new) / np.dot(
            gradient_old, gradient_old)
        direction = -gradient_new + alpha * direction
        k += 1

    return x
예제 #17
0
def advance_retreat_method(loss_function: rosenbrock,
                           start: point,
                           direction: list,
                           step=0,
                           delta=0.1) -> tuple:
    """
    find the initial section of step
    :param loss_function:
    :param start:
    :param direction:
    :param step:
    :param delta:
    :return:
    """
    alpha0, point0 = step, start

    alpha1 = alpha0 + delta
    point1 = point0 + point(direction[0] * delta, direction[1] * delta)
    if loss_function.f(point0) < loss_function.f(point1):
        while True:
            delta *= 2
            alpha2 = alpha0 - delta
            point2 = point0 - point(direction[0] * delta, direction[1] * delta)
            if loss_function.f(point2) < loss_function.f(point0):
                alpha1, alpha0 = alpha0, alpha2
                point1, point0 = point0, point2
            else:
                return alpha2, alpha1
    else:
        while True:
            delta *= 2
            alpha2 = alpha1 + delta
            point2 = point1 + point(direction[0] * delta, direction[1] * delta)
            if loss_function.f(point2) < loss_function.f(point1):
                alpha0, alpha1 = alpha1, alpha2
                point0, point1 = point1, point2
            else:
                return alpha0, alpha2