Exemple #1
0
def golden_search(loss_function: rosenbrock,
                  start: point,
                  direction: list,
                  epsilon=0.1) -> float:
    """
    derivative-free to search the longest step
    :param loss_function:
    :param start:
    :param direction:
    :param epsilon:
    :return:
    """
    a, b = advance_retreat_method(loss_function, start, direction)

    # find the minimum
    golden_num = (math.sqrt(5) - 1) / 2
    p, q = a + (1 - golden_num) * (b - a), a + golden_num * (b - a)
    while abs(a - b) > epsilon:
        f_p = loss_function.f(start +
                              point(direction[0] * p, direction[1] * p))
        f_q = loss_function.f(start +
                              point(direction[0] * q, direction[1] * q))
        if f_p < f_q:
            b, q = q, p
            p = a + (1 - golden_num) * (b - a)
        else:
            a, p = p, q
            q = a + golden_num * (b - a)

    return (a + b) / 2
Exemple #2
0
def dichotomous_search(loss_function: rosenbrock,
                       start: point,
                       direction: list,
                       epsilon=0.1) -> float:
    """
    derivative-free to search the longest step
    :param loss_function:
    :param start:
    :param direction:
    :param epsilon:
    :return:
    """
    a, b = advance_retreat_method(loss_function, start, direction)

    # find the minimum
    e = epsilon / 3
    p, q = (a + b) / 2 - e, (a + b) / 2 + e
    while abs(a - b) > epsilon:
        f_p = loss_function.f(start +
                              point(direction[0] * p, direction[1] * p))
        f_q = loss_function.f(start +
                              point(direction[0] * q, direction[1] * q))
        if f_p < f_q:
            b = q
        else:
            a = p
        p, q = (a + b) / 2 - e, (a + b) / 2 + e

    return (a + b) / 2
def Momentum(loss_function: rosenbrock,
             start: point,
             step=0.1,
             rho=0.7,
             epsilon=10e-2,
             k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param step:
    :param rho: the influence of historical gradients
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0
    direction = -loss_function.g(start) / np.linalg.norm(
        loss_function.g(start))
    p = step * direction
    x.append(x[k] + point(p[0], p[1]))

    while True:
        k += 1
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break

        # find the new x
        direction = -gradient / np.linalg.norm(gradient)
        # add the historical p
        p = rho * p + step * direction
        x.append(x[k] + point(p[0], p[1]))

    return x
Exemple #4
0
def ADMM(loss_function: example,
         start: point,
         lama: float,
         epsilon=1e-1,
         iteration_max=1000) -> list:
    """
    Alternating Direction Method of Multipliers
    :param loss_function:
    :param start:
    :param lama:
    :param epsilon:
    :param iteration_max:
    :return:
    """
    points, M, k = [start], len(start), 0

    while True:
        # update the point
        p = points[k]
        direction = [1, 0]
        step = golden_search(loss_function, lama, p, direction)
        p = p + point(direction[0] * step, direction[1] * step)
        direction = [0, 1]
        step = golden_search(loss_function, lama, p, direction)
        p = p + point(direction[0] * step, direction[1] * step)
        points.append(p)
        k += 1
        # update the lama
        lama = lama + loss_function.rho * loss_function.subject_to(p)
        # if meet the termination condition then break
        if k > iteration_max or (points[k] - points[k - 1]).L2() < epsilon:
            break

    return points
def getMap_2() -> tuple:
    # -1 denotes the unreachable point, >=0 denotes the topography of point
    Map = np.zeros((20, 40))

    # add the topography
    # unreachable -1
    Map[0][3] = -1
    Map[2][0] = Map[2][1] = Map[2][2] = Map[2][3] = Map[2][4] = Map[2][5] = -1
    Map[0][7] = Map[1][7] = Map[2][7] = Map[2][8] = Map[2][9] = Map[2][
        10] = Map[3][8] = -1
    Map[0][12] = Map[1][12] = Map[2][12] = Map[3][12] = Map[4][12] = Map[5][
        12] = Map[6][12] = Map[7][12] = -1
    Map[5][8] = Map[5][7] = Map[6][7] = Map[7][7] = Map[6][6] = Map[6][
        5] = Map[6][4] = Map[6][3] = Map[6][2] = -1
    Map[7][5] = Map[8][5] = Map[9][5] = Map[10][5] = Map[11][5] = -1
    Map[11][4] = Map[11][3] = Map[11][2] = Map[10][2] = Map[12][3] = Map[13][
        3] = Map[14][3] = Map[15][3] = Map[16][3] = -1
    Map[15][4] = Map[15][5] = Map[15][6] = Map[15][7] = Map[15][8] = Map[14][8] = Map[13][8] = Map[13][9] = Map[12][8] = \
    Map[11][8] = Map[10][8] = Map[10][7] = Map[9][7] = -1
    Map[13][11] = Map[12][12] = Map[13][12] = Map[14][12] = Map[15][12] = Map[16][12] = Map[17][12] = Map[18][12] = \
    Map[19][12] = -1
    Map[18][3] = Map[19][3] = Map[17][7] = Map[18][7] = Map[19][7] = Map[15][
        24] = Map[15][25] = Map[16][24] = Map[16][25] = -1
    Map[10][19] = Map[11][19] = Map[12][19] = Map[10][20] = Map[11][20] = Map[12][20] = Map[10][21] = Map[11][21] = \
    Map[12][21] = -1
    Map[10][28] = Map[11][31] = Map[13][31] = Map[7][36] = Map[9][36] = -1
    # desert 4
    for i in range(24, 40):
        Map[0][i] = 4
    for i in range(25, 40):
        Map[1][i] = 4
    for i in range(26, 40):
        Map[2][i] = 4
    for i in range(26, 37):
        Map[3][i] = 4
    for i in range(26, 36):
        Map[4][i] = 4
    for i in range(27, 33):
        Map[5][i] = 4
    for i in range(27, 33):
        Map[6][i] = 4
    for i in range(29, 33):
        Map[7][i] = 4
    # river 2
    Map[1][34] = Map[2][33] = Map[3][32] = Map[4][33] = Map[5][33] = Map[5][34] = Map[6][33] = Map[6][34] = Map[7][33] = \
    Map[7][34] = Map[7][35] = 2
    Map[8][32] = Map[8][33] = Map[8][34] = Map[8][35] = Map[9][32] = Map[9][
        33] = Map[9][34] = Map[10][32] = Map[10][33] = Map[11][32] = 2
    Map[10][36] = Map[10][35] = Map[11][35] = Map[11][34] = Map[12][34] = Map[12][33] = Map[13][34] = Map[13][33] = \
    Map[13][32] = Map[14][34] = Map[14][33] = Map[14][32] = 2
    Map[15][33] = Map[15][32] = Map[15][31] = Map[16][33] = Map[16][32] = Map[16][31] = Map[17][32] = Map[17][31] = \
    Map[17][30] = 2
    Map[18][31] = Map[18][30] = Map[18][29] = Map[19][30] = Map[19][29] = Map[
        19][28] = 2

    # set the start point and end point
    start = point(10, 4)
    end = point(0, 35)

    return Map, start, end
def getMap_1() -> tuple:
    # -1 denotes the unreachable point, >=0 denotes the topography of point
    Map = np.zeros((14, 17))

    # add the topography
    Map[5][6] = Map[6][6] = Map[7][7] = Map[8][7] = Map[9][7] = Map[9][
        8] = Map[10][8] = Map[11][8] = -1

    # set the start point and end point
    start = point(8, 4)
    end = point(9, 13)

    return Map, start, end
def Adam_HD(loss_function: rosenbrock,
            start: point,
            initial_step=0.1,
            rho0=0.9,
            rho1=0.99,
            beta=10e-7,
            epsilon=10e-2,
            k_max=10000) -> list:
    """
    Adaptive momentum
    :param loss_function:
    :param start:
    :param initial_step:
    :param rho0:
    :param rho1:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k, step = [start], 0, initial_step
    delta = np.array([10e-7] * len(start))

    r = np.zeros(len(start))
    direction = -loss_function.g(start) / np.linalg.norm(
        loss_function.g(start))
    uk_old = direction
    r = rho1 * r + (1 - rho1) * direction**2
    p = step * uk_old
    x.append(x[k] + point(p[0], p[1]))

    while True:
        k += 1
        gradient = loss_function.g(x[k])
        # if meet the termination conditions then break
        if k > k_max or np.linalg.norm(gradient) < epsilon: break
        gradient = -gradient / np.linalg.norm(gradient)
        # add the influence rate of historical to direction
        direction = rho0 * direction + (1 - rho0) * gradient
        uk_new = direction / (r + delta)**0.5
        # find the new x
        # add the influence rate of historical to r
        r = rho1 * r + (1 - rho1) * gradient**2
        # update the step
        step = step + beta * np.dot(uk_new, uk_old)
        p = step * uk_new
        x.append(x[k] + point(p[0], p[1]))

        uk_old = uk_new

    return x
def cyclic_coordinate_method(loss_function: rosenbrock, start: point, method='golden_search', epsilon=10e-1,
                             k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param method:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, M, k = [start], len(start), 0

    while True:
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break
        # find the new x
        direction = [0] * M
        direction[np.mod(k, M)] = 1
        if method == 'golden_search':
            step = golden_search(loss_function, x[k], direction)
        elif method == 'fibonacci_search':
            step = fibonacci_search(loss_function, x[k], direction)
        elif method == 'dichotomous_search':
            step = dichotomous_search(loss_function, x[k], direction)
        else:
            return x
        x.append(x[k] + point(direction[0] * step, direction[1] * step))
        k += 1

    return x
Exemple #9
0
def ALM(loss_function: example,
        start: point,
        lama: float,
        epsilon=1e-1,
        iteration_max=1000) -> list:
    """

    :param loss_function:
    :param start:
    :param lama:
    :param epsilon:
    :param iteration_max:
    :return:
    """
    points, M, k = [start], len(start), 0

    while True:
        # find the new point by cyclic coordinate method
        p = points[k]
        p_old = p
        while True:
            direction = [0] * M
            direction[np.mod(k, M)] = 1
            step = golden_search(loss_function, lama, p, direction)
            p = p + point(direction[0] * step, direction[1] * step)
            points.append(p)
            k += 1
            if k > iteration_max or (points[k] - points[k - 1]).L2() < epsilon:
                break
        # update the lama
        lama = lama + loss_function.rho * loss_function.subject_to(p)
        # if meet the termination condition then break
        if k > iteration_max or (p - p_old).L2() < epsilon: break

    return points
def Adadelta(loss_function: rosenbrock,
             start: point,
             rho=0.99,
             epsilon=10e-2,
             k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param rho:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0
    delta = np.array([10e-7] * len(start))

    step = np.zeros(len(start))
    r = np.zeros(len(start))
    while True:
        gradient = loss_function.g(x[k])
        # if meet the termination conditions then break
        if k > k_max or np.linalg.norm(gradient) < epsilon: break
        gradient = -gradient / np.linalg.norm(gradient)
        # find the new x
        # add the influence rate of historical to r
        r = rho * r + (1 - rho) * gradient**2
        p = gradient * ((step + delta) / (r + delta))**0.5
        x.append(x[k] + point(p[0], p[1]))
        step = rho * step + (1 - rho) * p**2
        k += 1

    return x
def singleAStarSearch(Map: np.array, start: point, end: point) -> node:
    """

    :param Map: the topography of Map
    :param start: the start point
    :param end: the end point
    :return: the end node with the information of path
    """
    visitedMap = np.zeros(Map.shape)
    heap = []
    n = node(None, start, 0, end, False)
    # if the point n is the end point then over
    while (n.row != end.row or n.column != end.column):
        # find the possible area
        row_low = n.row - 1 if n.row > 0 else n.row
        row_high = n.row + 1 if n.row < Map.shape[0] - 1 else n.row
        column_low = n.column - 1 if n.column > 0 else n.column
        column_high = n.column + 1 if n.column < Map.shape[1] - 1 else n.column
        # extend the path from point n
        for i in range(row_low, row_high + 1):
            for j in range(column_low, column_high + 1):
                # skip the point n ,visited point and unreachable point
                if (i == n.row and j
                        == n.column) or visitedMap[i][j] or Map[i][j] < 0:
                    continue
                # push the new node into heap and mark it as visited
                heapq.heappush(heap, node(n, point(i, j), Map[i][j], end,
                                          False))
                visitedMap[i][j] = 1
        # if no path can arrive at the end point then return None
        if len(heap) == 0: return None
        n = heapq.heappop(heap)

    return n
def Adagrad(loss_function: rosenbrock,
            start: point,
            initial_step=0.1,
            epsilon=10e-2,
            k_max=10000) -> list:
    """
    Adaptive Gradient
    :param loss_function:
    :param start:
    :param initial_step:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0
    delte = np.array([10e-7] * len(start))

    r = np.zeros(len(start))
    while True:
        gradient = loss_function.g(x[k])
        # if meet the termination conditions then break
        if k > k_max or np.linalg.norm(gradient) < epsilon: break
        gradient = -gradient / np.linalg.norm(gradient)
        # find the new x
        r = r + gradient**2
        p = initial_step * gradient / (r + delte)**0.5
        x.append(x[k] + point(p[0], p[1]))
        k += 1

    return x
def plain_gradient_descent(loss_function: rosenbrock,
                           start: point,
                           step=0.1,
                           epsilon=10e-2,
                           k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param step:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0

    while True:
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break

        # find the new x
        direction = -gradient / np.linalg.norm(gradient)
        p = step * direction
        x.append(x[k] + point(p[0], p[1]))
        k += 1

    return x
def Nesterov_momentum_HD(loss_function: rosenbrock,
                         start: point,
                         initial_step=0.1,
                         rho=0.7,
                         mu=0.2,
                         beta=0.001,
                         epsilon=10e-2,
                         k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param step:
    :param rho: the influence of historical gradients
    :param beta: ahead rate
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k, step = [start], 0, initial_step
    direction_old = -loss_function.g(start) / np.linalg.norm(
        loss_function.g(start))
    p = step * direction_old
    x.append(x[k] + point(p[0], p[1]))

    while True:
        k += 1
        # if meet the termination conditions then break
        if k > k_max or np.linalg.norm(loss_function.g(x[k])) < epsilon: break

        # find the new x
        # ahead p * beta
        gradient = loss_function.g(x[k] + point(p[0] * mu, p[1] * mu))
        direction_new = -gradient / np.linalg.norm(gradient)
        # update the step
        step = step + beta * np.dot(direction_new, direction_old)
        # add the historical p
        p = rho * p + step * direction_new
        x.append(x[k] + point(p[0], p[1]))

        direction_old = direction_new

    return x
def pointSet(N: int, limit=100) -> set:
    """

    :param N: the number of points
    :param limit: the restrict of points' area
    :return: set Q which includes N points
    """
    Q = set()
    while len(Q) < N:
        a = point(random.randint(0, limit), random.randint(0, limit))
        Q.add(a)  # if point a is in Q before added, Q will no change
    return Q
Exemple #16
0
def advance_retreat_method(loss_function: rosenbrock,
                           start: point,
                           direction: list,
                           step=0,
                           delta=0.1) -> tuple:
    """
    find the initial section of step
    :param loss_function:
    :param start:
    :param direction:
    :param step:
    :param delta:
    :return:
    """
    alpha0, point0 = step, start

    alpha1 = alpha0 + delta
    point1 = point0 + point(direction[0] * delta, direction[1] * delta)
    if loss_function.f(point0) < loss_function.f(point1):
        while True:
            delta *= 2
            alpha2 = alpha0 - delta
            point2 = point0 - point(direction[0] * delta, direction[1] * delta)
            if loss_function.f(point2) < loss_function.f(point0):
                alpha1, alpha0 = alpha0, alpha2
                point1, point0 = point0, point2
            else:
                return alpha2, alpha1
    else:
        while True:
            delta *= 2
            alpha2 = alpha1 + delta
            point2 = point1 + point(direction[0] * delta, direction[1] * delta)
            if loss_function.f(point2) < loss_function.f(point1):
                alpha0, alpha1 = alpha1, alpha2
                point0, point1 = point1, point2
            else:
                return alpha0, alpha2
def plain_gradient_descent_HD(loss_function: rosenbrock,
                              start: point,
                              initial_step=0.1,
                              beta=0.001,
                              epsilon=10e-2,
                              k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param step:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k, step = [start], 0, initial_step
    direction_old = -loss_function.g(start) / np.linalg.norm(
        loss_function.g(start))
    p = step * direction_old
    x.append(x[k] + point(p[0], p[1]))

    while True:
        k += 1
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break

        # find the new x
        direction_new = -gradient / np.linalg.norm(gradient)
        # update the step
        step = step + beta * np.dot(direction_new, direction_old)
        p = step * direction_new
        x.append(x[k] + point(p[0], p[1]))

        direction_old = direction_new

    return x
Exemple #18
0
def fibonacci_search(loss_function: rosenbrock,
                     start: point,
                     direction: list,
                     epsilon=1) -> float:
    """
    derivative-free to search the longest step
    :param loss_function:
    :param start:
    :param direction:
    :param epsilon:
    :return:
    """
    a, b = advance_retreat_method(loss_function, start, direction)

    #  build the Fibonacci series
    F, d = [1.0, 2.0], (b - a) / epsilon
    while F[-1] < d:
        F.append(F[-1] + F[-2])

    # find the minimum
    N = len(F) - 1
    p, q = a + (1 - F[N - 1] / F[N]) * (b - a), a + F[N - 1] / F[N] * (b - a)
    while abs(a - b) > epsilon and N > 0:
        N = N - 1
        f_p = loss_function.f(start +
                              point(direction[0] * p, direction[1] * p))
        f_q = loss_function.f(start +
                              point(direction[0] * q, direction[1] * q))
        if f_p < f_q:
            b, q = q, p
            p = a + (1 - F[N - 1] / F[N]) * (b - a)
        else:
            a, p = p, q
            q = a + F[N - 1] / F[N] * (b - a)

    return (a + b) / 2
def BFGS(loss_function: rosenbrock,
         start: point,
         method='golden_search',
         epsilon=10e-2,
         k_max=10000) -> list:
    """
    Broyden Fletcher Goldfarb Shanno
    :param loss_function:
    :param start:
    :param method:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0
    B = np.identity(len(start))  # Identity matrix

    while True:
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break

        # find the new x
        gradient = gradient / np.linalg.norm(gradient)
        direction = -np.matmul(np.linalg.inv(B), gradient)
        if method == 'golden_search':
            step = golden_search(loss_function, x[k], direction)
        elif method == 'fibonacci_search':
            step = fibonacci_search(loss_function, x[k], direction)
        elif method == 'dichotomous_search':
            step = dichotomous_search(loss_function, x[k], direction)
        else:
            return x
        p = step * direction
        x.append(x[k] + point(p[0], p[1]))
        # update the B
        yk = np.mat(
            loss_function.g(x[k + 1]) /
            np.linalg.norm(loss_function.g(x[k + 1])) - gradient).T
        pk = np.mat(p).T
        Bk = np.mat(B)
        B = B + np.array((yk * yk.T) / (yk.T * pk) - (Bk * pk * pk.T * Bk) /
                         (pk.T * Bk * pk))
        k += 1

    return x
def DFP(loss_function: rosenbrock,
        start: point,
        method='golden_search',
        epsilon=10e-2,
        k_max=10000) -> list:
    """
    Davidon Fletcher Powell, quasi_newton_method
    :param loss_function:
    :param start:
    :param step:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0
    D = np.identity(len(start))  # Identity matrix

    while True:
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break

        # find the new x
        gradient = gradient / np.linalg.norm(gradient)
        direction = -np.matmul(D, gradient)
        if method == 'golden_search':
            step = golden_search(loss_function, x[k], direction)
        elif method == 'fibonacci_search':
            step = fibonacci_search(loss_function, x[k], direction)
        elif method == 'dichotomous_search':
            step = dichotomous_search(loss_function, x[k], direction)
        else:
            return x
        p = step * direction
        x.append(x[k] + point(p[0], p[1]))
        # update the D
        yk = np.mat(
            loss_function.g(x[k + 1]) /
            np.linalg.norm(loss_function.g(x[k + 1])) - gradient).T
        pk = np.mat(p).T
        Dk = np.mat(D)
        D = D + np.array((pk * pk.T) / (pk.T * yk) - (Dk * yk * yk.T * Dk) /
                         (yk.T * Dk * yk))
        k += 1

    return x
def conjugate_gradient(loss_function: rosenbrock,
                       start: point,
                       method='golden_search',
                       epsilon=10e-2,
                       k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param method:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, direction, k = [
        start
    ], -1 * loss_function.g(start) / np.linalg.norm(loss_function.g(start)), 0

    while True:
        # if meet the termination conditions then break
        gradient_old = loss_function.g(x[k]) / np.linalg.norm(
            loss_function.g(x[k]))
        if np.linalg.norm(loss_function.g(x[k])) < epsilon or k > k_max: break

        # find the new x
        if method == 'golden_search':
            step = golden_search(loss_function, x[k], direction)
        elif method == 'fibonacci_search':
            step = fibonacci_search(loss_function, x[k], direction)
        elif method == 'dichotomous_search':
            step = dichotomous_search(loss_function, x[k], direction)
        else:
            return x
        x.append(x[k] + point(direction[0] * step, direction[1] * step))

        # update the direction
        gradient_new = loss_function.g(x[k + 1]) / np.linalg.norm(
            loss_function.g(x[k + 1]))
        alpha = np.dot(gradient_new, gradient_new) / np.dot(
            gradient_old, gradient_old)
        direction = -gradient_new + alpha * direction
        k += 1

    return x
Exemple #22
0
def armijo_goldstein_search(loss_function: rosenbrock,
                            start: point,
                            direction: list,
                            rho=0.1) -> float:
    """

    :param loss_function:
    :param start:
    :param direction:
    :param rho: meet condition 0<rho<0.5
    :return:
    """
    a, b = 0, 100
    alpha = b * random.uniform(0.5, 1)
    # find the alpha
    f1 = loss_function.f(start)
    gradient = loss_function.g(start)
    gradient_f1 = np.dot(gradient.T, np.array(direction))

    while True:
        f2 = loss_function.f(start +
                             point(direction[0] * alpha, direction[1] * alpha))
        # print(f2 - f1, alpha)
        # print(rho * alpha * gradient_f1, (1 - rho) * alpha * gradient_f1)
        # the armijo goldstein rule
        # if alpha < 1: return 0.1
        if f2 - f1 <= rho * alpha * gradient_f1:
            if f2 - f1 >= (1 - rho) * alpha * gradient_f1:
                print(alpha)
                return alpha
            else:
                a, b = alpha, b
                if b < alpha:
                    alpha = (a + b) / 2
                else:
                    alpha = 2 * alpha
        else:
            a, b = a, alpha
            alpha = (a + b) / 2
def Newton_method(loss_function: rosenbrock,
                  start: point,
                  method='golden_search',
                  epsilon=10e-2,
                  k_max=10000) -> list:
    """

    :param loss_function:
    :param start:
    :param step:
    :param epsilon:
    :param k_max:
    :return:
    """
    x, k = [start], 0

    while True:
        # if meet the termination conditions then break
        gradient = loss_function.g(x[k])
        if k > k_max or np.linalg.norm(gradient) < epsilon: break

        # find the new x
        inverse = np.linalg.inv(loss_function.H(x[k]))
        direction = -np.matmul(inverse, gradient)
        if method == 'golden_search':
            step = golden_search(loss_function, x[k], direction)
        elif method == 'fibonacci_search':
            step = fibonacci_search(loss_function, x[k], direction)
        elif method == 'dichotomous_search':
            step = dichotomous_search(loss_function, x[k], direction)
        else:
            return x
        p = step * direction
        x.append(x[k] + point(p[0], p[1]))
        k += 1

    return x
Exemple #24
0
# -*- coding: utf-8 -*-
'''
@author: Neil.YU
@license: (C) Copyright 2013-2018, Node Supply Chain Manager Corporation Limited.
@contact: [email protected]
@software: PyCharm 2018.1.2
@file: main.py
@time: 2020/5/25 19:38
@desc:
'''

from myClass import point, example, drawResult
from constrainedOptimization import ALM, ADMM

if __name__ == '__main__':
    # python 3.6
    epsilon = 1e-2
    loss_function, start = example(rho=1.0), point(-2, -2)

    points = ALM(loss_function, start, lama=0, epsilon=epsilon)
    drawResult(loss_function, points, 'ALM', epsilon)

    # Alternating Direction Method of Multipliers
    points = ADMM(loss_function, start, lama=0, epsilon=epsilon)
    drawResult(loss_function, points, 'ADMM', epsilon)
def bidirectionalAStarSearch(Map: np.array, start: point, end: point) -> tuple:
    """

    :param Map:
    :param start:
    :param end:
    :return:
    """
    visitedMap = np.zeros(Map.shape)
    visitedMap_reverse = np.zeros(Map.shape)

    heap, heap_reverse = [node(None, start, 0, end,
                               False)], [node(None, end, 0, start, True)]
    n = heapq.heappop(heap) if heap[0] < heap_reverse[0] else heapq.heappop(
        heap_reverse)
    # if the point is visited in the another path then break
    while True:
        # if point belong to the path from end to start
        if n.reverse:
            if visitedMap[n.row][n.column]:
                break
            # find the possible area
            row_low = n.row - 1 if n.row > 0 else n.row
            row_high = n.row + 1 if n.row < Map.shape[0] - 1 else n.row
            column_low = n.column - 1 if n.column > 0 else n.column
            column_high = n.column + 1 if n.column < Map.shape[
                1] - 1 else n.column
            # extend the path from point n
            for i in range(row_low, row_high + 1):
                for j in range(column_low, column_high + 1):
                    # skip the point n ,visited point and unreachable point
                    if (i == n.row and j == n.column
                        ) or visitedMap_reverse[i][j] or Map[i][j] < 0:
                        continue
                    new_node = node(n, point(i, j), Map[i][j], start, True)
                    heapq.heappush(heap_reverse, new_node)
                    visitedMap_reverse[i][j] = 1
        # if point belong to the path from start to end
        else:
            if visitedMap_reverse[n.row][n.column]:
                break
            # find the possible area
            row_low = n.row - 1 if n.row > 0 else n.row
            row_high = n.row + 1 if n.row < Map.shape[0] - 1 else n.row
            column_low = n.column - 1 if n.column > 0 else n.column
            column_high = n.column + 1 if n.column < Map.shape[
                1] - 1 else n.column
            # extend the path from point n
            for i in range(row_low, row_high + 1):
                for j in range(column_low, column_high + 1):
                    # skip the point n ,visited point and unreachable point
                    if (i == n.row and j
                            == n.column) or visitedMap[i][j] or Map[i][j] < 0:
                        continue
                    new_node = node(n, point(i, j), Map[i][j], end, False)
                    heapq.heappush(heap, new_node)
                    visitedMap[i][j] = 1
        # if no path can arrive at the end point then return None
        if len(heap) == 0 or len(heap_reverse) == 0: return None, None
        n = heapq.heappop(
            heap) if heap[0] < heap_reverse[0] else heapq.heappop(heap_reverse)

    # if point belong to the path from end to start
    if n.reverse:
        for m in heap:
            if m.row == n.row and m.column == n.column:
                return m, n
    # if point belong to the path from start to end
    else:
        for m in heap_reverse:
            if m.row == n.row and m.column == n.column:
                return n, m
def mission_1():
    a, b, step = 1, 10, 0.1
    loss_function, start = rosenbrock(a, b), point(5, -10)

    ###### derivative-free method
    # step search method
    epsilon = 10e-1
    method = ['golden_search', 'fibonacci_search', 'dichotomous_search']
    # """
    for m in method:
        points = cyclic_coordinate_method(loss_function,
                                          start,
                                          method=m,
                                          epsilon=epsilon)
        #drawResult(loss_function, points, start, 'cyclic_coordinate_method by %s' % m, epsilon)
        drawResult2(loss_function, points, start,
                    'cyclic_coordinate_method by %s' % m, epsilon)
    # """

    ###### first derivative method
    # rho denotes the influence rate of historical p on the new p, mu denotes the ahead rate in Momentum
    # rho0 denotes the influence rate of historical direction on the new direction in Adam
    # rho1 denotes the influence rate of historical r on the new r in Adam
    epsilon = 10e-1
    rho, mu = 0.8, 0.2
    rho0, rho1 = 0.9, 0.99
    # """
    points = steepest_descent(loss_function,
                              start,
                              method=method[0],
                              epsilon=epsilon)
    # drawResult(loss_function, points, start, 'steepest_descent by %s' % method[0], epsilon)
    drawResult2(loss_function, points, start,
                'steepest_descent by %s' % method[0], epsilon)

    points = conjugate_gradient(loss_function,
                                start,
                                method=method[0],
                                epsilon=epsilon)
    # drawResult(loss_function, points, start, 'conjugate_gradient by %s' % method[0], epsilon)
    drawResult2(loss_function, points, start,
                'conjugate_gradient by %s' % method[0], epsilon)
    # """

    # """
    points = plain_gradient_descent(loss_function,
                                    start,
                                    step=step,
                                    epsilon=epsilon)
    # drawResult(loss_function, points, start, 'plain_gradient_descent', epsilon, otherlabel=',step=%.2g' % step)
    drawResult2(loss_function,
                points,
                start,
                'plain_gradient_descent',
                epsilon,
                otherlabel=',step=%.2g' % step)

    points = Momentum(loss_function,
                      start,
                      step=step,
                      rho=rho,
                      epsilon=epsilon)
    # drawResult(loss_function, points, start, 'Momentum', epsilon, otherlabel=',step=%.2g,rho=%.2g' % (step, rho))
    drawResult2(loss_function,
                points,
                start,
                'Momentum',
                epsilon,
                otherlabel=',step=%.2g,rho=%.2g' % (step, rho))

    points = Nesterov_momentum(loss_function,
                               start,
                               step=step,
                               rho=rho,
                               mu=mu,
                               epsilon=epsilon)
    # drawResult(loss_function, points, start, 'Nesterov_momentum', epsilon,
    #            otherlabel=',step=%.2g,rho=%.2g,mu=%.2g' % (step, rho, mu))
    drawResult2(loss_function,
                points,
                start,
                'Nesterov_momentum',
                epsilon,
                otherlabel=',step=%.2g,rho=%.2g,mu=%.2g' % (step, rho, mu))
    # """

    # """
    points = Adagrad(loss_function, start, initial_step=step, epsilon=epsilon)
    # drawResult(loss_function, points, start, 'Adagrad', epsilon, otherlabel=',initial_step=%.2g' % step)
    drawResult2(loss_function,
                points,
                start,
                'Adagrad',
                epsilon,
                otherlabel=',initial_step=%.2g' % step)

    points = RMSprop(loss_function,
                     start,
                     initial_step=step,
                     rho=rho1,
                     epsilon=epsilon)
    # drawResult(loss_function, points, start, 'RMSProp', epsilon,
    #            otherlabel=',initial_step=%.2g,rho=%.2g' % (step, rho1))
    drawResult2(loss_function,
                points,
                start,
                'RMSProp',
                epsilon,
                otherlabel=',initial_step=%.2g,rho=%.2g' % (step, rho1))

    points = Adadelta(loss_function, start, rho=rho1, epsilon=epsilon)
    # drawResult(loss_function, points, start, 'Adadelta', epsilon, otherlabel=',rho=%.2g' % rho1)
    drawResult2(loss_function,
                points,
                start,
                'Adadelta',
                epsilon,
                otherlabel=',rho=%.2g' % rho1)

    points = Adam(loss_function,
                  start,
                  initial_step=step,
                  rho0=rho0,
                  rho1=rho1,
                  epsilon=epsilon)
    # drawResult(loss_function, points, start, 'Adam', epsilon,
    #            otherlabel=',initial_step=%.2g,rho0=%.2g,rho1=%.2g' % (step, rho0, rho1))
    drawResult2(loss_function,
                points,
                start,
                'Adam',
                epsilon,
                otherlabel=',initial_step=%.2g,rho0=%.2g,rho1=%.2g' %
                (step, rho0, rho1))
    # """

    ###### hyper gradient descent
    # beta denotes the influence rate of historical direction on the new step in hyperGradientDescent
    # """
    beta0, beta1 = 0.01, 10e-7

    points = plain_gradient_descent_HD(loss_function,
                                       start,
                                       initial_step=step,
                                       beta=beta0,
                                       epsilon=epsilon)
    # drawResult(loss_function, points, start, 'plain_gradient_descent_HD', epsilon,
    #           otherlabel=',initial_step=%.2g,beta=%.2g' % (step, beta0))
    drawResult2(loss_function,
                points,
                start,
                'plain_gradient_descent_HD',
                epsilon,
                otherlabel=',initial_step=%.2g,beta=%.2g' % (step, beta0))

    points = Nesterov_momentum_HD(loss_function,
                                  start,
                                  initial_step=step,
                                  rho=rho,
                                  mu=mu,
                                  beta=beta0,
                                  epsilon=epsilon)
    # drawResult(loss_function, points, start, 'Nesterov_momentum_HD', epsilon,
    #            otherlabel=',initial_step=%.2g,rho=%.2g,mu=%.2g,beta=%.2g' % (step, rho, mu, beta0))
    drawResult2(loss_function,
                points,
                start,
                'Nesterov_momentum_HD',
                epsilon,
                otherlabel=',initial_step=%.2g,rho=%.2g,mu=%.2g,beta=%.2g' %
                (step, rho, mu, beta0))

    points = Adam_HD(loss_function,
                     start,
                     initial_step=step,
                     rho0=rho0,
                     rho1=rho1,
                     beta=beta1,
                     epsilon=epsilon)
    # drawResult(loss_function, points, start, 'Adam_HD', epsilon,
    #            otherlabel=',initial_step=%.2g,rho0=%.2g,rho1=%.2g,beta=%.2g' % (step, rho0, rho1, beta1))
    drawResult2(loss_function,
                points,
                start,
                'Adam_HD',
                epsilon,
                otherlabel=',initial_step=%.2g,rho0=%.2g,rho1=%.2g,beta=%.2g' %
                (step, rho0, rho1, beta1))
    # """

    ###### second derivative method
    # """
    epsilon = 10e-2

    points = Newton_method(loss_function,
                           start,
                           method=method[0],
                           epsilon=epsilon)
    # drawResult(loss_function, points, start, 'Newton_method by %s' % method[0], epsilon)
    drawResult2(loss_function, points, start,
                'Newton_method by %s' % method[0], epsilon)

    points = DFP(loss_function, start, method=method[0], epsilon=epsilon)
    # drawResult(loss_function, points, start, 'DFP by %s' % method[0], epsilon)
    drawResult2(loss_function, points, start, 'DFP by %s' % method[0], epsilon)

    points = BFGS(loss_function, start, method=method[0], epsilon=epsilon)
    # drawResult(loss_function, points, start, 'BFGS by %s' % method[0], epsilon)
    drawResult2(loss_function, points, start, 'BFGS by %s' % method[0],
                epsilon)