def Fletcher_Freeman(X, func, gfunc, hess_func, hyper_parameters=None, search_mode="ELS", epsilon=1e-5, max_epoch=1000): """Fletcher_Freeman方法求极小值点 Args: X ([np.array]): [Input X] func ([回调函数]): [目标函数] gfunc ([回调函数]): [目标函数的一阶导函数] hess_func ([回调函数]): [目标函数的Hessian矩阵] hyper_parameters: (json): 超参数,超参数中包括: search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS'] epsilon ([float], optional): [当函数值下降小于epsilon,迭代结束]. Defaults to 1e-5. max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000. Returns: 返回求解得到的极小值点,极小值点对应的函数值和迭代次数 """ if hyper_parameters is not None: search_mode = hyper_parameters["search_mode"] epsilon = hyper_parameters["epsilon"] max_epoch = hyper_parameters["max_epoch"] k = 1 function_k = 0 func_values = [] #记录每一步的函数值,在GLL中有用 mk = 0 #GLL当中的mk初始值 label.step2 G = hess_func(X) function_k += 1 F = func(X) func_values.append(F) L, D, y = utils.Bunch_Parlett(G) n = len(X) # 根据D的特征值正负性的不同情况,分情况计算下降方向d eigenvalue, eigenvector = np.linalg.eig(D) # 特征值中有负值 if np.any(eigenvalue < 0): logger.info("特征值中有负值") d = np.squeeze(descent_by_general_inverse(X, L, D, gfunc)) elif np.any(eigenvalue == 0): # 特征值中既有正值又有零 logger.info("特征值中既有正值又有零") d = descent_by_general_inverse(X, L, D, gfunc) if np.where(d != 0)[0].shape[0] == 0: G_modified = np.dot(np.dot(L, D), L.T) right_zero = np.zeros(n) descent_list = np.linalg.solve(G, right_zero) # descent_list = np.linalg.solve(G, right_zero) for descent in descent_list: if gfunc(X) @ descent < 0: # 判断哪一个dk,使得gkdk小于0,把dk为0向量的情况排除出去 d = descent break else: logger.info("特征值全为正") G_modified = np.dot(np.dot(L, D), L.T) inv_hass = np.linalg.inv(G) # inv_hass = np.linalg.inv(G) d = -np.dot(inv_hass, gfunc(X)) #求得下降方向之后,此后的步骤与GM稳定牛顿法无异 if search_mode == "ELS": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}". format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8))) a, b, add_retreat_func = ELS.retreat_method( func, X, d, hyper_parameters=hyper_parameters["ELS"]["retreat_method"] if hyper_parameters is not None else None) alpha_star, add_golden_func = ELS.golden_method( func, X, d, a, b, hyper_parameters=hyper_parameters["ELS"]["golden_method"] if hyper_parameters is not None else None) add_func_k = add_retreat_func + add_golden_func elif search_mode == "ILS": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}". format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8))) alpha_star, add_func_k = ILS.inexact_line_search( func, gfunc, X, d, hyper_parameters=hyper_parameters["ILS"] if hyper_parameters is not None else None) elif search_mode == "GLL": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}". format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8))) alpha_star, add_func_k, mk = GLL_search( func, gfunc, X, d, func_values, mk, hyper_parameters=hyper_parameters["GLL"] if hyper_parameters is not None else None) else: raise ValueError("参数search_mode 必须从['ELS', 'ILS']当中选择") # logging.info("线搜索结束") X_new = X + d * alpha_star function_k = function_k + add_func_k + 1 func_X_new = func(X_new) if abs(func_X_new - F) <= epsilon: logger.info( "因为函数值下降在{epsilon}以内,{mode}的FF方法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终X={X},最终函数值={func_X_new}" .format(epsilon=epsilon, mode=search_mode, iter=k, func_k=function_k, X=X, func_X_new=func_X_new)) return X_new, func_X_new, k, function_k if k > max_epoch: logger.info("超过最大迭代次数:%d", max_epoch) return X_new, func_X_new, k, function_k X = X_new k += 1 goto.step2
def CLSR1(X, func, gfunc, hyper_parameters=None, M=15, search_mode="ELS", epsilon=1e-5, max_epoch=1000): """ 压缩形式的有限内存SR1方法 Args: X ([np.array]): [Input X] func ([回调函数]): [目标函数] gfunc ([回调函数]): [目标函数的一阶导函数] hess_func ([回调函数]): [目标函数的Hessian矩阵] hyper_parameters: (json): 超参数,超参数中包括: M (int, optional): [计算修正Hk的时候,需要之前记录的M个信息,记录的信息包括s和y], 要求M的取值范围在[5, 9, 15]. Defaults to 15. search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS'] epsilon ([float], optional): [||g_k|| < 1e-5 * max(1, ||x_k||)时,迭代结束]. Defaults to 1e-8. max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000. """ if hyper_parameters is not None: M = hyper_parameters["LSR1"]["M"] search_mode = hyper_parameters["search_mode"] epsilon = hyper_parameters["epsilon"] max_epoch = hyper_parameters["max_epoch"] n = len(X) k = 1 function_k = 0 func_values = [] # 记录每一步的函数值,在GLL中有用 mk = 0 # GLL当中的mk初始值 Sk_que = Queue() # 记录最多M个s_k,LSR1修正Hk时有用 Yk_que = Queue() # 记录最多M个y_k,LSR1修正Hk时有用 Dk_que = Queue() # 记录最多M个s^T * y g = gfunc(X) F = func(X) function_k += 1 func_values.append(F) start_time = time.time() #计算下降方向d_k,这一步包括使用压缩形式修正Hk,和计算dk = -Hk * gk label.count_dk # if len(p_history) > 0: # mu = ((s_history[-1] @ y_history[-1])/ (y_history[-1] @ y_history[-1])) # else: # mu = 1 Hk = np.eye(n, dtype=float) item_num = min(Sk_que.qsize(), M) if item_num > 0: Sk = np.mat(Sk_que.queue).T Yk = np.mat(Yk_que.queue).T Lk = np.zeros((item_num, item_num), dtype=float) for i in range(item_num): for j in range(i): Lk[i][j] = Sk_que.queue[i] @ Yk_que.queue[j] Dk = np.diag(Dk_que.queue) mid_mat = Dk + Lk + Lk.T - (Yk.T @ Hk @ Yk) try: # 有可能之间的矩阵不可逆 mid_mat_inv = np.linalg.inv(mid_mat) except: logger.info("修正Hk时,中间的矩阵不可逆,用修正Cholesky分解") L, D = utils.modified_Cholesky( mid_mat, hyper_parameters["modified_Cholesky"]) mid_mat_ = utils.get_modified_G(L, D) mid_mat_inv = np.linalg.inv(mid_mat_) Hk = Hk + (Sk - Hk @ Yk) @ mid_mat_inv @ (Sk - Hk @ Yk).T d = np.squeeze(np.array(-Hk @ g)) before_LS_time = time.time() #求得下降方向之后,此后的步骤与其他优化方法无异 if search_mode == "ELS": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}" .format(iter=k, func_k=function_k, time=before_LS_time - start_time, X=X, g=g, d=d, func_x=round(F, 8))) a, b, add_retreat_func = ELS.retreat_method( func, X, d, hyper_parameters=hyper_parameters["ELS"]["retreat_method"] if hyper_parameters is not None else None) alpha_star, add_golden_func = ELS.golden_method( func, X, d, a, b, hyper_parameters=hyper_parameters["ELS"]["golden_method"] if hyper_parameters is not None else None) add_func_k = add_retreat_func + add_golden_func elif search_mode == "ILS": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}" .format(iter=k, func_k=function_k, time=before_LS_time - start_time, X=X, g=g, d=d, func_x=round(F, 8))) alpha_star, add_func_k = ILS.inexact_line_search( func, gfunc, X, d, hyper_parameters=hyper_parameters["ILS"] if hyper_parameters is not None else None) elif search_mode == "GLL": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}" .format(iter=k, func_k=function_k, time=before_LS_time - start_time, X=X, g=g, d=d, func_x=round(F, 8))) alpha_star, add_func_k, mk = GLL_search( func, gfunc, X, d, func_values, mk, hyper_parameters=hyper_parameters["GLL"] if hyper_parameters is not None else None) logger.info("当前更新的步长为{}".format(alpha_star)) X_new = X + d * alpha_star function_k = function_k + add_func_k + 1 func_X_new = func(X_new) func_values.append(func_X_new) g_new = gfunc(X_new) if item_num == M: Sk_que.get() Yk_que.get() Dk_que.get() Sk_que.put(d * alpha_star) Yk_que.put(g_new - g) Dk_que.put((d * alpha_star) @ (g_new - g)) # 更新 logging.info("g is {}".format(g_new)) logger.info("g的范数为{g},epsilon * max(1, |x_k|)为{xk}".format( g=np.linalg.norm(g_new), xk=epsilon * max(1, np.linalg.norm(X_new)))) # 给出的终止条件可能存在一些问题,由于编程语言进度的限制,g的下降量可能为0,从而计算 rho的时候可能存在除0的情况 if np.linalg.norm(g_new) < epsilon * max(1, np.linalg.norm(X_new)): # if abs(func_X_new - F) <= epsilon: end_time = time.time() logger.info( "因为满足终止条件,{mode}的有限内存BFGS方法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}" .format(mode=search_mode, iter=k, func_k=function_k, time=end_time - start_time, X=X, func_X_new=func_X_new)) return X_new, func_X_new, k, function_k, end_time - start_time if k > max_epoch: end_time = time.time() logger.info( "超过最大迭代次数,{mode}的有限内存BFGS方法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}" .format(mode=search_mode, iter=k, func_k=function_k, time=end_time - start_time, X=X, func_X_new=func_X_new)) return X_new, func_X_new, k, function_k, end_time - start_time X = X_new g = g_new F = func_X_new k += 1 goto.count_dk
def inexact_newton_method(X, func, gfunc, hess_func, hyper_parameters=None, search_mode="ILS", eta_mode=1, safeguard=True, eta0=0.5, gamma=1, sigma=1.5, epsilon=1e-5, max_epoch=1000): """[使用非精确牛顿法极小值点 d = -G_k^{-1} * g_k] Args: X ([np.array]): [Input X] func ([回调函数]): [目标函数] gfunc ([回调函数]): [目标函数的一阶导函数] hess_func ([回调函数]): [目标函数的Hessian矩阵] hyper_parameters: (Dic): 超参数,超参数中包括: search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS'] eta_mode (int, optional): [{eta}选择的方式]. Defaults to 1. [1, 2] eta0 ([float], optional): [eta的初值]. Defaults to 0.5. gamma ([float], optional): [eta选择2当中的系数参数]. Defaults to 1. sigma ([float], optional): [eta选择2当中的指数参数]. Defaults to 1.5. safeguard ([bool], optional): [是否使用安全保护]. Defaults to True. epsilon ([float], optional): [||g_k|| < 1e-5 * max(1, ||x_k||)时,迭代结束]. Defaults to 1e-8. max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000. Returns: 返回求解得到的极小值点,极小值点对应的函数值和迭代次数 """ if hyper_parameters is not None: search_mode = hyper_parameters["search_mode"] epsilon = hyper_parameters["epsilon"] max_epoch = hyper_parameters["max_epoch"] eta_mode = hyper_parameters["INM"]["eta_mode"] eta0 = hyper_parameters["INM"]["eta0"] safeguard = hyper_parameters["INM"]["safeguard"] if eta_mode == 2: gamma = hyper_parameters["INM"]["gamma"] sigma = hyper_parameters["INM"]["sigma"] n = len(X) k = 1 function_k = 0 func_values = [] # 记录每一步的函数值,在GLL中有用 mk = 0 # GLL当中的mk初始值 g_pre = None G_pre = None d_pre = None g = gfunc(X) G = hess_func(X) eta_pre = None # 把当前函数值加入func_values F = func(X) function_k += 1 func_values.append(F) start_time = time.time() use_gmres = True #计算下降方向d_k,这一步包括修正Hk,和计算dk = -Hk * gk label .count_dk #选择当前的eta if g_pre is None: eta = eta0 else: if eta_mode == 1: eta = np.linalg.norm(g - g_pre - G_pre @ d_pre) / np.linalg.norm(g_pre) elif eta_mode == 2: eta = gamma * (np.linalg.norm(g) / np.linalg.norm(g_pre)) ** sigma # 安全保护 if eta_pre is not None and safeguard: if eta_mode == 1: if eta_pre ** ((1/math.sqrt(5))/2) > 0.1: eta = max(eta, eta_pre ** ((1/math.sqrt(5))/2) ) elif eta_mode == 2: if gamma * eta_pre ** sigma > 0.1: eta = max(eta, gamma * eta_pre ** sigma) #使用GMRES方法迭代求解dk if use_gmres: logger.info("eta is {}".format(eta)) gmres_result = gmres(G, -g, tol=eta) logger.info("gmers reslut is {}".format(gmres_result)) d = gmres_result[0] if np.all(d == 0) or use_gmres == False: inv_hass = np.linalg.inv(G) d = -np.dot(inv_hass , g) use_gmres = False # end_time = time.time() # logger.info("迭代求解所得下降方向为0,{mode}的非精确牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}".format(mode=search_mode, iter=k, func_k=function_k, time=end_time-start_time, X=X,func_X_new=func_X_new)) # return X, func_X_new, k, function_k, end_time-start_time before_LS_time = time.time() #求得下降方向之后,此后的步骤与其他优化方法无异 if search_mode == "ELS": logger.info("迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}".format(iter=k,func_k=function_k,time=before_LS_time-start_time,X=X, g=g, d=d,func_x=round(F, 8))) a, b, add_retreat_func = ELS.retreat_method(func, X, d, hyper_parameters=hyper_parameters["ELS"]["retreat_method"] if hyper_parameters is not None else None) alpha_star, add_golden_func = ELS.golden_method(func, X, d, a, b, hyper_parameters=hyper_parameters["ELS"]["golden_method"] if hyper_parameters is not None else None) add_func_k = add_retreat_func + add_golden_func elif search_mode == "ILS": logger.info("迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}".format(iter=k,func_k=function_k,time=before_LS_time-start_time,X=X, g=g, d=d,func_x=round(F, 8))) alpha_star, add_func_k = ILS.inexact_line_search(func, gfunc, X, d, hyper_parameters=hyper_parameters["ILS"] if hyper_parameters is not None else None) elif search_mode == "GLL": logger.info("迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}".format(iter=k,func_k=function_k,time=before_LS_time-start_time,X=X, g=g, d=d,func_x=round(F, 8))) alpha_star, add_func_k, mk = GLL_search(func, gfunc, X, d, func_values, mk, hyper_parameters=hyper_parameters["GLL"] if hyper_parameters is not None else None) # 更新 logger.info("当前更新的步长为{}".format(alpha_star)) X_new = X + d * alpha_star function_k = function_k + add_func_k + 1 func_X_new = func(X_new) func_values.append(func_X_new) g_pre = g G_pre = G d_pre = d g = gfunc(X_new) G = hess_func(X) logging.info("g is {}".format(g)) logger.info("g的范数为{g},epsilon * max(1, |x_k|)为{xk}".format(g = np.linalg.norm(g), xk = epsilon * max(1, np.linalg.norm(X_new)))) # 给出的终止条件可能存在一些问题,由于编程语言进度的限制,g的下降量可能为0,从而计算 rho的时候可能存在除0的情况 if np.linalg.norm(g) < epsilon * max(1, np.linalg.norm(X_new)): # if abs(func_X_new - F) <= epsilon: end_time = time.time() logger.info("因为满足终止条件,{mode}的非精确牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}".format(mode=search_mode, iter=k, func_k=function_k, time=end_time-start_time, X=X,func_X_new=func_X_new)) return X_new, func_X_new, k, function_k, end_time-start_time if k > max_epoch: end_time = time.time() logger.info("超过最大迭代次数,{mode}的非精确牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}".format(mode=search_mode, iter=k, func_k=function_k, time=end_time-start_time, X=X,func_X_new=func_X_new)) return X_new, func_X_new, k, function_k, end_time-start_time X = X_new F = func_X_new k += 1 goto .count_dk
def INBM(X, func, gfunc, hess_func, hyper_parameters=None, search_mode="ILS", eta_mode=1, safeguard=True, eta0=0.5, gamma=1, sigma=1.5, t=1e-4, eta_max=0.9, theta_min=0.1, theta_max=0.5, epsilon=1e-5, max_epoch=1000): """[summary] Args: X ([np.array]): [Input X] func ([回调函数]): [目标函数] gfunc ([回调函数]): [目标函数的一阶导函数] hess_func ([回调函数]): [目标函数的Hessian矩阵] hyper_parameters: (Dic): 超参数,超参数中包括: search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS'] eta_mode (int, optional): [{eta}选择的方式]. Defaults to 1. [1, 2] eta0 ([float], optional): [eta的初值]. Defaults to 0.5. gamma ([float], optional): [eta选择2当中的系数参数]. Defaults to 1. sigma ([float], optional): [eta选择2当中的指数参数]. Defaults to 1.5. safeguard ([bool], optional): [是否使用安全保护]. Defaults to True. t ([float], optional): [线性方程组情况条件2中的t]. Defaults to 1e-4. eta_max (float, optional): [eta 的上界]. Defaults to 0.9. theta_min (float, optional): [theta的下界,在while循环中在theta的取值范围中通过二次插值取theta]. Defaults to 0.1. theta_max (float, optional): [theta的上界,在while循环中在theta的取值范围中通过二次插值取theta]. Defaults to 0.5. epsilon ([float], optional): [||g_k|| < 1e-5 * max(1, ||x_k||)时,迭代结束]. Defaults to 1e-8. max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000. """ if hyper_parameters is not None: search_mode = hyper_parameters["search_mode"] epsilon = hyper_parameters["epsilon"] max_epoch = hyper_parameters["max_epoch"] eta_mode = hyper_parameters["INBM"]["eta_mode"] eta0 = hyper_parameters["INBM"]["eta0"] safeguard = hyper_parameters["INBM"]["safeguard"] t = hyper_parameters["INBM"]["t"] eta_max = hyper_parameters["INBM"]["eta_max"] theta_min = hyper_parameters["INBM"]["theta_min"] theta_max = hyper_parameters["INBM"]["theta_max"] if eta_mode == 2: gamma = hyper_parameters["INBM"]["gamma"] sigma = hyper_parameters["INBM"]["sigma"] n = len(X) k = 1 function_k = 0 func_values = [] # 记录每一步的函数值,在GLL中有用 mk = 0 # GLL当中的mk初始值 g_pre = None G_pre = None d_pre = None g = gfunc(X) G = hess_func(X) eta_pre = None # 把当前函数值加入func_values F = func(X) function_k += 1 func_values.append(F) start_time = time.time() use_gmres = True #计算下降方向d_k,这一步包括修正Hk,和计算dk = -Hk * gk label .count_dk #选择当前的eta if g_pre is None: eta = eta0 else: if eta_mode == 1: eta = np.linalg.norm(g - g_pre - G_pre @ d_pre) / np.linalg.norm(g_pre) elif eta_mode == 2: eta = gamma * (np.linalg.norm(g) / np.linalg.norm(g_pre)) ** sigma elif eta_mode == 0: eta = eta0 # 安全保护 if eta_pre is not None and safeguard: if eta_mode == 1: if eta_pre ** ((1/math.sqrt(5))/2) > 0.1: eta = max(eta, eta_pre ** ((1/math.sqrt(5))/2) ) elif eta_mode == 2: if gamma * eta_pre ** sigma > 0.1: eta = max(eta, gamma * eta_pre ** sigma) #使用GMRES方法迭代求解dk eta = min(eta, eta_max) if use_gmres: logger.info("eta is {}".format(eta)) gmres_result = gmres(G, -g, tol=eta) logger.info("gmers reslut is {}".format(gmres_result)) d = gmres_result[0] if np.all(d == 0) or use_gmres == False: inv_hass = np.linalg.inv(G) d = -np.dot(inv_hass , g) use_gmres = False # end_time = time.time() # logger.info("迭代求解所得下降方向为0,{mode}的非精确牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}".format(mode=search_mode, iter=k, func_k=function_k, time=end_time-start_time, X=X,func_X_new=func_X_new)) # return X, func_X_new, k, function_k, end_time-start_time # 通过while循环来取得满足线性方程组情况条件2 while np.linalg.norm(gfunc(X + d)) > (1 - t * (1 - eta)) * np.linalg.norm(gfunc(X)): denominator = (F ** 2 - func(X + d) ** 2 + 2 * F * (g @ d)) #防止可能存在的除0现象,先把theta置为1,以便触发之后的步骤if语句的判断,把theta置为给定的范围内的中点 if abs(denominator) < 1e-20: theta = 1 else: theta = (F * (g @ d)) / (F ** 2 - func(X + d) ** 2 + 2 * F * (g @ d)) if theta < theta_min or theta > theta_max: # 如果二次插值计算出的theta不在给定的范围内,则在给定的范围内取中点 theta = (theta_min + theta_max) / 2 d = theta * d eta = 1 - theta * (1 - eta) before_LS_time = time.time() #求得下降方向之后,此后的步骤与其他优化方法无异 if search_mode == "ELS": logger.info("迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}".format(iter=k,func_k=function_k,time=before_LS_time-start_time,X=X, g=g, d=d,func_x=round(F, 8))) a, b, add_retreat_func = ELS.retreat_method(func, X, d, hyper_parameters=hyper_parameters["ELS"]["retreat_method"] if hyper_parameters is not None else None) alpha_star, add_golden_func = ELS.golden_method(func, X, d, a, b, hyper_parameters=hyper_parameters["ELS"]["golden_method"] if hyper_parameters is not None else None) add_func_k = add_retreat_func + add_golden_func elif search_mode == "ILS": logger.info("迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}".format(iter=k,func_k=function_k,time=before_LS_time-start_time,X=X, g=g, d=d,func_x=round(F, 8))) alpha_star, add_func_k = ILS.inexact_line_search(func, gfunc, X, d, hyper_parameters=hyper_parameters["ILS"] if hyper_parameters is not None else None) elif search_mode == "GLL": logger.info("迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}".format(iter=k,func_k=function_k,time=before_LS_time-start_time,X=X, g=g, d=d,func_x=round(F, 8))) alpha_star, add_func_k, mk = GLL_search(func, gfunc, X, d, func_values, mk, hyper_parameters=hyper_parameters["GLL"] if hyper_parameters is not None else None) # 更新 logger.info("当前更新的步长为{}".format(alpha_star)) X_new = X + d * alpha_star function_k = function_k + add_func_k + 1 func_X_new = func(X_new) func_values.append(func_X_new) g_pre = g G_pre = G d_pre = d g = gfunc(X_new) G = hess_func(X) logging.info("g is {}".format(g)) logger.info("g的范数为{g},epsilon * max(1, |x_k|)为{xk}".format(g = np.linalg.norm(g), xk = epsilon * max(1, np.linalg.norm(X_new)))) # 给出的终止条件可能存在一些问题,由于编程语言进度的限制,g的下降量可能为0,从而计算 rho的时候可能存在除0的情况 if np.linalg.norm(g) < epsilon * max(1, np.linalg.norm(X_new)): # if abs(func_X_new - F) <= epsilon: end_time = time.time() logger.info("因为满足终止条件,{mode}的非精确牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}".format(mode=search_mode, iter=k, func_k=function_k, time=end_time-start_time, X=X,func_X_new=func_X_new)) return X_new, func_X_new, k, function_k, end_time-start_time if k > max_epoch: end_time = time.time() logger.info("超过最大迭代次数,{mode}的非精确牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}".format(mode=search_mode, iter=k, func_k=function_k, time=end_time-start_time, X=X,func_X_new=func_X_new)) return X_new, func_X_new, k, function_k, end_time-start_time X = X_new F = func_X_new k += 1 goto .count_dk
def LBFGS(X, func, gfunc, hyper_parameters=None, M=15, search_mode="ELS", epsilon=1e-5, max_epoch=1000): """ 有限内存的BFGS方法 Args: X ([np.array]): [Input X] func ([回调函数]): [目标函数] gfunc ([回调函数]): [目标函数的一阶导函数] hess_func ([回调函数]): [目标函数的Hessian矩阵] hyper_parameters: (json): 超参数,超参数中包括: M (int, optional): [计算修正Hk的时候,需要之前记录的M个信息,记录的信息包括s和y], 要求M的取值范围在[5, 9, 15]. Defaults to 15. search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS'] epsilon ([float], optional): [||g_k|| < 1e-5 * max(1, ||x_k||)时,迭代结束]. Defaults to 1e-8. max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000. """ if hyper_parameters is not None: M = hyper_parameters["LBFGS"]["M"] search_mode = hyper_parameters["search_mode"] epsilon = hyper_parameters["epsilon"] max_epoch = hyper_parameters["max_epoch"] n = len(X) k = 1 function_k = 0 func_values = [] # 记录每一步的函数值,在GLL中有用 mk = 0 # GLL当中的mk初始值 s_history = [] # 记录每一步的x_{k+1} - x_{k},LBFGS修正Hk时有用 y_history = [] # 记录每一步的g_{k+1} - g_{k},LBFGS修正Hk时有用 p_history = [] # 1 / s_{k}^ t * y_k,以免重复计算 LBFGS_alpha = np.zeros(M) # LBFGS算法1中计算的ai,先声明,反复使用节约内存空间 g = gfunc(X) # 把当前函数值加入func_values F = func(X) function_k += 1 func_values.append(F) start_time = time.time() #计算下降方向d_k,这一步包括修正Hk,和计算dk = -Hk * gk label.count_dk #使用LBFGS得到Hk # LBFGS 的 算法1,计算ai q = copy.deepcopy(g) for i in range(min(len(s_history), M)): LBFGS_alpha[M - 1 - i] = p_history[-i - 1] * (s_history[-i - 1] @ q) q -= LBFGS_alpha[M - 1 - i] * y_history[-i - 1] # LBFGS 的 算法2,计算r = Hk gk # if len(p_history) > 0: # Hk_0 = np.eye(n, dtype=float) * ((s_history[-1] @ y_history[-1])/ (y_history[-1] @ y_history[-1])) # else: Hk_0 = np.eye(n, dtype=float) r = Hk_0 @ q for i in range(min(len(s_history), M), 0, -1): beta = p_history[-i] * (y_history[-i] @ r) r += (LBFGS_alpha[-i] - beta) * s_history[-i] d = np.array(-r) before_LS_time = time.time() #求得下降方向之后,此后的步骤与其他优化方法无异 if search_mode == "ELS": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}" .format(iter=k, func_k=function_k, time=before_LS_time - start_time, X=X, g=g, d=d, func_x=round(F, 8))) a, b, add_retreat_func = ELS.retreat_method( func, X, d, hyper_parameters=hyper_parameters["ELS"]["retreat_method"] if hyper_parameters is not None else None) alpha_star, add_golden_func = ELS.golden_method( func, X, d, a, b, hyper_parameters=hyper_parameters["ELS"]["golden_method"] if hyper_parameters is not None else None) add_func_k = add_retreat_func + add_golden_func elif search_mode == "ILS": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}" .format(iter=k, func_k=function_k, time=before_LS_time - start_time, X=X, g=g, d=d, func_x=round(F, 8))) alpha_star, add_func_k = ILS.inexact_line_search( func, gfunc, X, d, hyper_parameters=hyper_parameters["ILS"] if hyper_parameters is not None else None) elif search_mode == "GLL": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}" .format(iter=k, func_k=function_k, time=before_LS_time - start_time, X=X, g=g, d=d, func_x=round(F, 8))) alpha_star, add_func_k, mk = GLL_search( func, gfunc, X, d, func_values, mk, hyper_parameters=hyper_parameters["GLL"] if hyper_parameters is not None else None) logger.info("当前更新的步长为{}".format(alpha_star)) X_new = X + d * alpha_star function_k = function_k + add_func_k + 1 func_X_new = func(X_new) func_values.append(func_X_new) g_new = gfunc(X_new) s_history.append(d * alpha_star) y_history.append(g_new - g) p_history.append(1 / (s_history[-1] @ y_history[-1])) # 更新 logging.info("g is {}".format(g_new)) logger.info("g的范数为{g},epsilon * max(1, |x_k|)为{xk}".format( g=np.linalg.norm(g_new), xk=epsilon * max(1, np.linalg.norm(X_new)))) # 给出的终止条件可能存在一些问题,由于编程语言进度的限制,g的下降量可能为0,从而计算 rho的时候可能存在除0的情况 if np.linalg.norm(g_new) < epsilon * max(1, np.linalg.norm(X_new)): # if abs(func_X_new - F) <= epsilon: end_time = time.time() logger.info( "因为满足终止条件,{mode}的有限内存BFGS方法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}" .format(mode=search_mode, iter=k, func_k=function_k, time=end_time - start_time, X=X, func_X_new=func_X_new)) return X_new, func_X_new, k, function_k, end_time - start_time if k > max_epoch: end_time = time.time() logger.info( "超过最大迭代次数,{mode}的有限内存BFGS方法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}" .format(mode=search_mode, iter=k, func_k=function_k, time=end_time - start_time, X=X, func_X_new=func_X_new)) return X_new, func_X_new, k, function_k, end_time - start_time X = X_new g = g_new F = func_X_new k += 1 goto.count_dk
def GM_newton(X, func, gfunc, hess_func, hyper_parameters=None, zeta=1e-2, search_mode="ELS", epsilon=1e-5, max_epoch=1000): """使用Gill Murray稳定牛顿法求极小值点 d = -G_k^{-1} * g_k] Args: X ([np.array]): [Input X] func ([回调函数]): [目标函数] gfunc ([回调函数]): [目标函数的一阶导函数] hess_func ([回调函数]): [目标函数的Hessian矩阵] hyper_parameters: (Dic): 超参数,超参数中包括: zeta ([float], optional): [当gk的模大于zeta, 求解方程得到下降方向]. Defaults to 1e-2. search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS'] epsilon ([float], optional): [当函数值下降小于epsilon,迭代结束]. Defaults to 1e-5. max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000. Returns: 返回求解得到的极小值点,极小值点对应的函数值和迭代次数 """ if hyper_parameters is not None: zeta = hyper_parameters["GM_newton"]["zeta"] search_mode = hyper_parameters["search_mode"] epsilon = hyper_parameters["epsilon"] max_epoch = hyper_parameters["max_epoch"] function_k = 0 k = 1 func_values = [] #记录每一步的函数值,在GLL中有用 mk = 0 #GLL当中的mk初始值 assert epsilon > 0, "must have epsilon > 0" # 步2:计算g和G label.step2 g = gfunc(X) G = hess_func(X) # 把当前函数值加入func_values function_k += 1 F = func(X) func_values.append(F) # 步3:对G进行修正Cholesky分解 L, D = utils.modified_Cholesky(G) modified_G = utils.get_modified_G(L, D) # 步4, ||g(x)|| > zeta ,解方程计算下降方向 if np.linalg.norm(g) > zeta: G_1 = np.linalg.inv(modified_G) d = -np.dot(G_1, g) goto.step6 # 步5:计算负曲率方向,如果psi>=0则停止,否则求出方向d LT = copy.deepcopy(L).T E = modified_G - G d = negative_curvature(LT, D, E) if d == None: logger.info( "因为负曲率方向不存在,{mode}的GM稳定牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终X={X},最终函数值={func_X_new}" .format(mode=search_mode, iter=k, func_k=function_k, X=X, func_X_new=func_X_new)) return X, F, k, function_k else: gT = np.mat(g).T if np.dot(gT, d) > 0: d = -d # 步6:线搜索求步长 label.step6 if search_mode == "ELS": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}". format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8))) a, b, add_retreat_func = ELS.retreat_method( func, X, d, hyper_parameters=hyper_parameters["ELS"]["retreat_method"] if hyper_parameters is not None else None) alpha_star, add_golden_func = ELS.golden_method( func, X, d, a, b, hyper_parameters=hyper_parameters["ELS"]["golden_method"] if hyper_parameters is not None else None) add_func_k = add_retreat_func + add_golden_func elif search_mode == "ILS": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}". format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8))) alpha_star, add_func_k = ILS.inexact_line_search( func, gfunc, X, d, hyper_parameters=hyper_parameters["ILS"] if hyper_parameters is not None else None) elif search_mode == "GLL": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}". format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8))) alpha_star, add_func_k, mk = GLL_search( func, gfunc, X, d, func_values, mk, hyper_parameters=hyper_parameters["GLL"] if hyper_parameters is not None else None) else: raise ValueError("参数search_mode 必须从['ELS', 'ILS']当中选择") X_new = X + d * alpha_star function_k = function_k + add_func_k + 1 func_X_new = func(X_new) if abs(func_X_new - F) <= epsilon: logger.info( "因为函数值下降在{epsilon}以内,{mode}的GM稳定牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终X={X},最终函数值={func_X_new}" .format(mode=search_mode, epsilon=epsilon, iter=k, func_k=function_k, X=X, func_X_new=func_X_new)) return X_new, func_X_new, k, function_k if k > max_epoch: logger.info("超过最大迭代次数:%d", max_epoch) return X_new, func_X_new, k, function_k X = X_new k += 1 goto.step2
def damp_newton(X, func, gfunc, hess_func, hyper_parameters=None, search_mode="ELS", use_modified_Cholesky=True, epsilon=1e-5, max_epoch=1000): """[使用阻尼牛顿法极小值点 d = -G_k^{-1} * g_k] Args: X ([np.array]): [Input X] func ([回调函数]): [目标函数] gfunc ([回调函数]): [目标函数的一阶导函数] hess_func ([回调函数]): [目标函数的Hessian矩阵] hyper_parameters: (Dic): 超参数,超参数中包括: search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS'] epsilon ([float], optional): [当函数值下降小于epsilon,迭代结束]. Defaults to 1e-5. max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000. Returns: 返回求解得到的极小值点,极小值点对应的函数值和迭代次数 """ if hyper_parameters is not None: search_mode = hyper_parameters["search_mode"] epsilon = hyper_parameters["epsilon"] max_epoch = hyper_parameters["max_epoch"] use_modified_Cholesky = hyper_parameters["damp_newton"][ "use_modified_Cholesky"] k = 1 function_k = 0 #函数调用次数 func_values = [] #记录每一步的函数值,在GLL中有用 mk = 0 #GLL当中的mk初始值 #计算下降方向d_k label.count_dk G = hess_func(X) g = gfunc(X) # 把当前函数值加入func_values F = func(X) function_k += 1 func_values.append(F) try: if use_modified_Cholesky: L, D = utils.modified_Cholesky( G, hyper_parameters["modified_Cholesky"]) G_ = utils.get_modified_G(L, D) inv_hass = np.linalg.inv(G_) d = -np.dot(inv_hass, g) else: inv_hass = np.linalg.inv(G) d = -np.dot(inv_hass, g) except: logger.info("Hessian 矩阵不可逆,用修正Cholesky分解求下降方向") L, D = utils.modified_Cholesky(G, hyper_parameters["modified_Cholesky"]) G_ = utils.get_modified_G(L, D) inv_hass = np.linalg.inv(G_) d = -np.dot(inv_hass, g) #计算步长 if search_mode == "ELS": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}". format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8))) a, b, add_retreat_func = ELS.retreat_method( func, X, d, hyper_parameters=hyper_parameters["ELS"]["retreat_method"] if hyper_parameters is not None else None) alpha_star, add_golden_func = ELS.golden_method( func, X, d, a, b, hyper_parameters=hyper_parameters["ELS"]["golden_method"] if hyper_parameters is not None else None) add_func_k = add_retreat_func + add_golden_func elif search_mode == "ILS": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}". format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8))) alpha_star, add_func_k = ILS.inexact_line_search( func, gfunc, X, d, hyper_parameters=hyper_parameters["ILS"] if hyper_parameters is not None else None) elif search_mode == "GLL": logger.info( "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}". format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8))) alpha_star, add_func_k, mk = GLL_search( func, gfunc, X, d, func_values, mk, hyper_parameters=hyper_parameters["GLL"] if hyper_parameters is not None else None) else: raise ValueError("参数search_mode 必须从['ELS', 'ILS']当中选择") X_new = X + d * alpha_star function_k = function_k + add_func_k + 1 func_X_new = func(X_new) if abs(func_X_new - F) <= epsilon: logger.info( "因为函数值下降在{epsilon}以内,{mode}的阻尼牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终X={X},最终函数值={func_X_new}" .format(epsilon=epsilon, mode=search_mode, iter=k, func_k=function_k, X=X, func_X_new=func_X_new)) return X_new, func_X_new, k, function_k if k > max_epoch: logger.info("超过最大迭代次数:%d", max_epoch) return X_new, func_X_new, k, function_k X = X_new k += 1 goto.count_dk