def dichotomous_search(loss_function: rosenbrock, start: point, direction: list, epsilon=0.1) -> float: """ derivative-free to search the longest step :param loss_function: :param start: :param direction: :param epsilon: :return: """ a, b = advance_retreat_method(loss_function, start, direction) # find the minimum e = epsilon / 3 p, q = (a + b) / 2 - e, (a + b) / 2 + e while abs(a - b) > epsilon: f_p = loss_function.f(start + point(direction[0] * p, direction[1] * p)) f_q = loss_function.f(start + point(direction[0] * q, direction[1] * q)) if f_p < f_q: b = q else: a = p p, q = (a + b) / 2 - e, (a + b) / 2 + e return (a + b) / 2
def golden_search(loss_function: rosenbrock, start: point, direction: list, epsilon=0.1) -> float: """ derivative-free to search the longest step :param loss_function: :param start: :param direction: :param epsilon: :return: """ a, b = advance_retreat_method(loss_function, start, direction) # find the minimum golden_num = (math.sqrt(5) - 1) / 2 p, q = a + (1 - golden_num) * (b - a), a + golden_num * (b - a) while abs(a - b) > epsilon: f_p = loss_function.f(start + point(direction[0] * p, direction[1] * p)) f_q = loss_function.f(start + point(direction[0] * q, direction[1] * q)) if f_p < f_q: b, q = q, p p = a + (1 - golden_num) * (b - a) else: a, p = p, q q = a + golden_num * (b - a) return (a + b) / 2
def Momentum(loss_function: rosenbrock, start: point, step=0.1, rho=0.7, epsilon=10e-2, k_max=10000) -> list: """ :param loss_function: :param start: :param step: :param rho: the influence of historical gradients :param epsilon: :param k_max: :return: """ x, k = [start], 0 direction = -loss_function.g(start) / np.linalg.norm( loss_function.g(start)) p = step * direction x.append(x[k] + point(p[0], p[1])) while True: k += 1 # if meet the termination conditions then break gradient = loss_function.g(x[k]) if k > k_max or np.linalg.norm(gradient) < epsilon: break # find the new x direction = -gradient / np.linalg.norm(gradient) # add the historical p p = rho * p + step * direction x.append(x[k] + point(p[0], p[1])) return x
def Adam_HD(loss_function: rosenbrock, start: point, initial_step=0.1, rho0=0.9, rho1=0.99, beta=10e-7, epsilon=10e-2, k_max=10000) -> list: """ Adaptive momentum :param loss_function: :param start: :param initial_step: :param rho0: :param rho1: :param epsilon: :param k_max: :return: """ x, k, step = [start], 0, initial_step delta = np.array([10e-7] * len(start)) r = np.zeros(len(start)) direction = -loss_function.g(start) / np.linalg.norm( loss_function.g(start)) uk_old = direction r = rho1 * r + (1 - rho1) * direction**2 p = step * uk_old x.append(x[k] + point(p[0], p[1])) while True: k += 1 gradient = loss_function.g(x[k]) # if meet the termination conditions then break if k > k_max or np.linalg.norm(gradient) < epsilon: break gradient = -gradient / np.linalg.norm(gradient) # add the influence rate of historical to direction direction = rho0 * direction + (1 - rho0) * gradient uk_new = direction / (r + delta)**0.5 # find the new x # add the influence rate of historical to r r = rho1 * r + (1 - rho1) * gradient**2 # update the step step = step + beta * np.dot(uk_new, uk_old) p = step * uk_new x.append(x[k] + point(p[0], p[1])) uk_old = uk_new return x
def cyclic_coordinate_method(loss_function: rosenbrock, start: point, method='golden_search', epsilon=10e-1, k_max=10000) -> list: """ :param loss_function: :param start: :param method: :param epsilon: :param k_max: :return: """ x, M, k = [start], len(start), 0 while True: # if meet the termination conditions then break gradient = loss_function.g(x[k]) if k > k_max or np.linalg.norm(gradient) < epsilon: break # find the new x direction = [0] * M direction[np.mod(k, M)] = 1 if method == 'golden_search': step = golden_search(loss_function, x[k], direction) elif method == 'fibonacci_search': step = fibonacci_search(loss_function, x[k], direction) elif method == 'dichotomous_search': step = dichotomous_search(loss_function, x[k], direction) else: return x x.append(x[k] + point(direction[0] * step, direction[1] * step)) k += 1 return x
def Adadelta(loss_function: rosenbrock, start: point, rho=0.99, epsilon=10e-2, k_max=10000) -> list: """ :param loss_function: :param start: :param rho: :param epsilon: :param k_max: :return: """ x, k = [start], 0 delta = np.array([10e-7] * len(start)) step = np.zeros(len(start)) r = np.zeros(len(start)) while True: gradient = loss_function.g(x[k]) # if meet the termination conditions then break if k > k_max or np.linalg.norm(gradient) < epsilon: break gradient = -gradient / np.linalg.norm(gradient) # find the new x # add the influence rate of historical to r r = rho * r + (1 - rho) * gradient**2 p = gradient * ((step + delta) / (r + delta))**0.5 x.append(x[k] + point(p[0], p[1])) step = rho * step + (1 - rho) * p**2 k += 1 return x
def Adagrad(loss_function: rosenbrock, start: point, initial_step=0.1, epsilon=10e-2, k_max=10000) -> list: """ Adaptive Gradient :param loss_function: :param start: :param initial_step: :param epsilon: :param k_max: :return: """ x, k = [start], 0 delte = np.array([10e-7] * len(start)) r = np.zeros(len(start)) while True: gradient = loss_function.g(x[k]) # if meet the termination conditions then break if k > k_max or np.linalg.norm(gradient) < epsilon: break gradient = -gradient / np.linalg.norm(gradient) # find the new x r = r + gradient**2 p = initial_step * gradient / (r + delte)**0.5 x.append(x[k] + point(p[0], p[1])) k += 1 return x
def plain_gradient_descent(loss_function: rosenbrock, start: point, step=0.1, epsilon=10e-2, k_max=10000) -> list: """ :param loss_function: :param start: :param step: :param epsilon: :param k_max: :return: """ x, k = [start], 0 while True: # if meet the termination conditions then break gradient = loss_function.g(x[k]) if k > k_max or np.linalg.norm(gradient) < epsilon: break # find the new x direction = -gradient / np.linalg.norm(gradient) p = step * direction x.append(x[k] + point(p[0], p[1])) k += 1 return x
def BFGS(loss_function: rosenbrock, start: point, method='golden_search', epsilon=10e-2, k_max=10000) -> list: """ Broyden Fletcher Goldfarb Shanno :param loss_function: :param start: :param method: :param epsilon: :param k_max: :return: """ x, k = [start], 0 B = np.identity(len(start)) # Identity matrix while True: # if meet the termination conditions then break gradient = loss_function.g(x[k]) if k > k_max or np.linalg.norm(gradient) < epsilon: break # find the new x gradient = gradient / np.linalg.norm(gradient) direction = -np.matmul(np.linalg.inv(B), gradient) if method == 'golden_search': step = golden_search(loss_function, x[k], direction) elif method == 'fibonacci_search': step = fibonacci_search(loss_function, x[k], direction) elif method == 'dichotomous_search': step = dichotomous_search(loss_function, x[k], direction) else: return x p = step * direction x.append(x[k] + point(p[0], p[1])) # update the B yk = np.mat( loss_function.g(x[k + 1]) / np.linalg.norm(loss_function.g(x[k + 1])) - gradient).T pk = np.mat(p).T Bk = np.mat(B) B = B + np.array((yk * yk.T) / (yk.T * pk) - (Bk * pk * pk.T * Bk) / (pk.T * Bk * pk)) k += 1 return x
def DFP(loss_function: rosenbrock, start: point, method='golden_search', epsilon=10e-2, k_max=10000) -> list: """ Davidon Fletcher Powell, quasi_newton_method :param loss_function: :param start: :param step: :param epsilon: :param k_max: :return: """ x, k = [start], 0 D = np.identity(len(start)) # Identity matrix while True: # if meet the termination conditions then break gradient = loss_function.g(x[k]) if k > k_max or np.linalg.norm(gradient) < epsilon: break # find the new x gradient = gradient / np.linalg.norm(gradient) direction = -np.matmul(D, gradient) if method == 'golden_search': step = golden_search(loss_function, x[k], direction) elif method == 'fibonacci_search': step = fibonacci_search(loss_function, x[k], direction) elif method == 'dichotomous_search': step = dichotomous_search(loss_function, x[k], direction) else: return x p = step * direction x.append(x[k] + point(p[0], p[1])) # update the D yk = np.mat( loss_function.g(x[k + 1]) / np.linalg.norm(loss_function.g(x[k + 1])) - gradient).T pk = np.mat(p).T Dk = np.mat(D) D = D + np.array((pk * pk.T) / (pk.T * yk) - (Dk * yk * yk.T * Dk) / (yk.T * Dk * yk)) k += 1 return x
def Nesterov_momentum_HD(loss_function: rosenbrock, start: point, initial_step=0.1, rho=0.7, mu=0.2, beta=0.001, epsilon=10e-2, k_max=10000) -> list: """ :param loss_function: :param start: :param step: :param rho: the influence of historical gradients :param beta: ahead rate :param epsilon: :param k_max: :return: """ x, k, step = [start], 0, initial_step direction_old = -loss_function.g(start) / np.linalg.norm( loss_function.g(start)) p = step * direction_old x.append(x[k] + point(p[0], p[1])) while True: k += 1 # if meet the termination conditions then break if k > k_max or np.linalg.norm(loss_function.g(x[k])) < epsilon: break # find the new x # ahead p * beta gradient = loss_function.g(x[k] + point(p[0] * mu, p[1] * mu)) direction_new = -gradient / np.linalg.norm(gradient) # update the step step = step + beta * np.dot(direction_new, direction_old) # add the historical p p = rho * p + step * direction_new x.append(x[k] + point(p[0], p[1])) direction_old = direction_new return x
def armijo_goldstein_search(loss_function: rosenbrock, start: point, direction: list, rho=0.1) -> float: """ :param loss_function: :param start: :param direction: :param rho: meet condition 0<rho<0.5 :return: """ a, b = 0, 100 alpha = b * random.uniform(0.5, 1) # find the alpha f1 = loss_function.f(start) gradient = loss_function.g(start) gradient_f1 = np.dot(gradient.T, np.array(direction)) while True: f2 = loss_function.f(start + point(direction[0] * alpha, direction[1] * alpha)) # print(f2 - f1, alpha) # print(rho * alpha * gradient_f1, (1 - rho) * alpha * gradient_f1) # the armijo goldstein rule # if alpha < 1: return 0.1 if f2 - f1 <= rho * alpha * gradient_f1: if f2 - f1 >= (1 - rho) * alpha * gradient_f1: print(alpha) return alpha else: a, b = alpha, b if b < alpha: alpha = (a + b) / 2 else: alpha = 2 * alpha else: a, b = a, alpha alpha = (a + b) / 2
def plain_gradient_descent_HD(loss_function: rosenbrock, start: point, initial_step=0.1, beta=0.001, epsilon=10e-2, k_max=10000) -> list: """ :param loss_function: :param start: :param step: :param epsilon: :param k_max: :return: """ x, k, step = [start], 0, initial_step direction_old = -loss_function.g(start) / np.linalg.norm( loss_function.g(start)) p = step * direction_old x.append(x[k] + point(p[0], p[1])) while True: k += 1 # if meet the termination conditions then break gradient = loss_function.g(x[k]) if k > k_max or np.linalg.norm(gradient) < epsilon: break # find the new x direction_new = -gradient / np.linalg.norm(gradient) # update the step step = step + beta * np.dot(direction_new, direction_old) p = step * direction_new x.append(x[k] + point(p[0], p[1])) direction_old = direction_new return x
def Newton_method(loss_function: rosenbrock, start: point, method='golden_search', epsilon=10e-2, k_max=10000) -> list: """ :param loss_function: :param start: :param step: :param epsilon: :param k_max: :return: """ x, k = [start], 0 while True: # if meet the termination conditions then break gradient = loss_function.g(x[k]) if k > k_max or np.linalg.norm(gradient) < epsilon: break # find the new x inverse = np.linalg.inv(loss_function.H(x[k])) direction = -np.matmul(inverse, gradient) if method == 'golden_search': step = golden_search(loss_function, x[k], direction) elif method == 'fibonacci_search': step = fibonacci_search(loss_function, x[k], direction) elif method == 'dichotomous_search': step = dichotomous_search(loss_function, x[k], direction) else: return x p = step * direction x.append(x[k] + point(p[0], p[1])) k += 1 return x
def fibonacci_search(loss_function: rosenbrock, start: point, direction: list, epsilon=1) -> float: """ derivative-free to search the longest step :param loss_function: :param start: :param direction: :param epsilon: :return: """ a, b = advance_retreat_method(loss_function, start, direction) # build the Fibonacci series F, d = [1.0, 2.0], (b - a) / epsilon while F[-1] < d: F.append(F[-1] + F[-2]) # find the minimum N = len(F) - 1 p, q = a + (1 - F[N - 1] / F[N]) * (b - a), a + F[N - 1] / F[N] * (b - a) while abs(a - b) > epsilon and N > 0: N = N - 1 f_p = loss_function.f(start + point(direction[0] * p, direction[1] * p)) f_q = loss_function.f(start + point(direction[0] * q, direction[1] * q)) if f_p < f_q: b, q = q, p p = a + (1 - F[N - 1] / F[N]) * (b - a) else: a, p = p, q q = a + F[N - 1] / F[N] * (b - a) return (a + b) / 2
def conjugate_gradient(loss_function: rosenbrock, start: point, method='golden_search', epsilon=10e-2, k_max=10000) -> list: """ :param loss_function: :param start: :param method: :param epsilon: :param k_max: :return: """ x, direction, k = [ start ], -1 * loss_function.g(start) / np.linalg.norm(loss_function.g(start)), 0 while True: # if meet the termination conditions then break gradient_old = loss_function.g(x[k]) / np.linalg.norm( loss_function.g(x[k])) if np.linalg.norm(loss_function.g(x[k])) < epsilon or k > k_max: break # find the new x if method == 'golden_search': step = golden_search(loss_function, x[k], direction) elif method == 'fibonacci_search': step = fibonacci_search(loss_function, x[k], direction) elif method == 'dichotomous_search': step = dichotomous_search(loss_function, x[k], direction) else: return x x.append(x[k] + point(direction[0] * step, direction[1] * step)) # update the direction gradient_new = loss_function.g(x[k + 1]) / np.linalg.norm( loss_function.g(x[k + 1])) alpha = np.dot(gradient_new, gradient_new) / np.dot( gradient_old, gradient_old) direction = -gradient_new + alpha * direction k += 1 return x
def advance_retreat_method(loss_function: rosenbrock, start: point, direction: list, step=0, delta=0.1) -> tuple: """ find the initial section of step :param loss_function: :param start: :param direction: :param step: :param delta: :return: """ alpha0, point0 = step, start alpha1 = alpha0 + delta point1 = point0 + point(direction[0] * delta, direction[1] * delta) if loss_function.f(point0) < loss_function.f(point1): while True: delta *= 2 alpha2 = alpha0 - delta point2 = point0 - point(direction[0] * delta, direction[1] * delta) if loss_function.f(point2) < loss_function.f(point0): alpha1, alpha0 = alpha0, alpha2 point1, point0 = point0, point2 else: return alpha2, alpha1 else: while True: delta *= 2 alpha2 = alpha1 + delta point2 = point1 + point(direction[0] * delta, direction[1] * delta) if loss_function.f(point2) < loss_function.f(point1): alpha0, alpha1 = alpha1, alpha2 point0, point1 = point1, point2 else: return alpha0, alpha2