def Fletcher_Freeman(X,
                     func,
                     gfunc,
                     hess_func,
                     hyper_parameters=None,
                     search_mode="ELS",
                     epsilon=1e-5,
                     max_epoch=1000):
    """Fletcher_Freeman方法求极小值点

    Args:
        X ([np.array]): [Input X]
        func ([回调函数]): [目标函数]
        gfunc ([回调函数]): [目标函数的一阶导函数]
        hess_func ([回调函数]): [目标函数的Hessian矩阵]
        hyper_parameters: (json): 超参数,超参数中包括:
            search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS']
            epsilon ([float], optional): [当函数值下降小于epsilon,迭代结束]. Defaults to 1e-5.
            max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000.

    Returns:
        返回求解得到的极小值点,极小值点对应的函数值和迭代次数
    """
    if hyper_parameters is not None:
        search_mode = hyper_parameters["search_mode"]
        epsilon = hyper_parameters["epsilon"]
        max_epoch = hyper_parameters["max_epoch"]
    k = 1
    function_k = 0
    func_values = []  #记录每一步的函数值,在GLL中有用
    mk = 0  #GLL当中的mk初始值

    label.step2

    G = hess_func(X)
    function_k += 1
    F = func(X)
    func_values.append(F)
    L, D, y = utils.Bunch_Parlett(G)

    n = len(X)
    # 根据D的特征值正负性的不同情况,分情况计算下降方向d
    eigenvalue, eigenvector = np.linalg.eig(D)
    # 特征值中有负值
    if np.any(eigenvalue < 0):
        logger.info("特征值中有负值")
        d = np.squeeze(descent_by_general_inverse(X, L, D, gfunc))

    elif np.any(eigenvalue == 0):  # 特征值中既有正值又有零
        logger.info("特征值中既有正值又有零")
        d = descent_by_general_inverse(X, L, D, gfunc)
        if np.where(d != 0)[0].shape[0] == 0:
            G_modified = np.dot(np.dot(L, D), L.T)
            right_zero = np.zeros(n)
            descent_list = np.linalg.solve(G, right_zero)
            # descent_list = np.linalg.solve(G, right_zero)
            for descent in descent_list:
                if gfunc(X) @ descent < 0:  # 判断哪一个dk,使得gkdk小于0,把dk为0向量的情况排除出去
                    d = descent
                    break

    else:
        logger.info("特征值全为正")

        G_modified = np.dot(np.dot(L, D), L.T)
        inv_hass = np.linalg.inv(G)
        # inv_hass = np.linalg.inv(G)
        d = -np.dot(inv_hass, gfunc(X))

    #求得下降方向之后,此后的步骤与GM稳定牛顿法无异
    if search_mode == "ELS":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}".
            format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8)))
        a, b, add_retreat_func = ELS.retreat_method(
            func,
            X,
            d,
            hyper_parameters=hyper_parameters["ELS"]["retreat_method"]
            if hyper_parameters is not None else None)
        alpha_star, add_golden_func = ELS.golden_method(
            func,
            X,
            d,
            a,
            b,
            hyper_parameters=hyper_parameters["ELS"]["golden_method"]
            if hyper_parameters is not None else None)
        add_func_k = add_retreat_func + add_golden_func
    elif search_mode == "ILS":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}".
            format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8)))
        alpha_star, add_func_k = ILS.inexact_line_search(
            func,
            gfunc,
            X,
            d,
            hyper_parameters=hyper_parameters["ILS"]
            if hyper_parameters is not None else None)
    elif search_mode == "GLL":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}".
            format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8)))
        alpha_star, add_func_k, mk = GLL_search(
            func,
            gfunc,
            X,
            d,
            func_values,
            mk,
            hyper_parameters=hyper_parameters["GLL"]
            if hyper_parameters is not None else None)

    else:
        raise ValueError("参数search_mode 必须从['ELS', 'ILS']当中选择")
    # logging.info("线搜索结束")
    X_new = X + d * alpha_star
    function_k = function_k + add_func_k + 1
    func_X_new = func(X_new)
    if abs(func_X_new - F) <= epsilon:
        logger.info(
            "因为函数值下降在{epsilon}以内,{mode}的FF方法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终X={X},最终函数值={func_X_new}"
            .format(epsilon=epsilon,
                    mode=search_mode,
                    iter=k,
                    func_k=function_k,
                    X=X,
                    func_X_new=func_X_new))
        return X_new, func_X_new, k, function_k
    if k > max_epoch:
        logger.info("超过最大迭代次数:%d", max_epoch)
        return X_new, func_X_new, k, function_k
    X = X_new
    k += 1
    goto.step2
def CLSR1(X,
          func,
          gfunc,
          hyper_parameters=None,
          M=15,
          search_mode="ELS",
          epsilon=1e-5,
          max_epoch=1000):
    """ 压缩形式的有限内存SR1方法

    Args:
        X ([np.array]): [Input X]
        func ([回调函数]): [目标函数]
        gfunc ([回调函数]): [目标函数的一阶导函数]
        hess_func ([回调函数]): [目标函数的Hessian矩阵]
        hyper_parameters: (json): 超参数,超参数中包括:
            M (int, optional): [计算修正Hk的时候,需要之前记录的M个信息,记录的信息包括s和y], 要求M的取值范围在[5, 9, 15]. Defaults to 15.
            search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS']
            epsilon ([float], optional): [||g_k|| < 1e-5 * max(1, ||x_k||)时,迭代结束]. Defaults to 1e-8.
            max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000.
    """
    if hyper_parameters is not None:
        M = hyper_parameters["LSR1"]["M"]
        search_mode = hyper_parameters["search_mode"]
        epsilon = hyper_parameters["epsilon"]
        max_epoch = hyper_parameters["max_epoch"]
    n = len(X)
    k = 1
    function_k = 0
    func_values = []  # 记录每一步的函数值,在GLL中有用
    mk = 0  # GLL当中的mk初始值

    Sk_que = Queue()  # 记录最多M个s_k,LSR1修正Hk时有用
    Yk_que = Queue()  # 记录最多M个y_k,LSR1修正Hk时有用
    Dk_que = Queue()  # 记录最多M个s^T * y

    g = gfunc(X)
    F = func(X)
    function_k += 1
    func_values.append(F)
    start_time = time.time()
    #计算下降方向d_k,这一步包括使用压缩形式修正Hk,和计算dk = -Hk * gk
    label.count_dk

    # if len(p_history) > 0:
    #     mu = ((s_history[-1] @ y_history[-1])/ (y_history[-1] @ y_history[-1]))
    # else:
    #     mu = 1
    Hk = np.eye(n, dtype=float)
    item_num = min(Sk_que.qsize(), M)
    if item_num > 0:
        Sk = np.mat(Sk_que.queue).T
        Yk = np.mat(Yk_que.queue).T
        Lk = np.zeros((item_num, item_num), dtype=float)
        for i in range(item_num):
            for j in range(i):
                Lk[i][j] = Sk_que.queue[i] @ Yk_que.queue[j]
        Dk = np.diag(Dk_que.queue)
        mid_mat = Dk + Lk + Lk.T - (Yk.T @ Hk @ Yk)
        try:
            # 有可能之间的矩阵不可逆
            mid_mat_inv = np.linalg.inv(mid_mat)
        except:
            logger.info("修正Hk时,中间的矩阵不可逆,用修正Cholesky分解")
            L, D = utils.modified_Cholesky(
                mid_mat, hyper_parameters["modified_Cholesky"])
            mid_mat_ = utils.get_modified_G(L, D)
            mid_mat_inv = np.linalg.inv(mid_mat_)

        Hk = Hk + (Sk - Hk @ Yk) @ mid_mat_inv @ (Sk - Hk @ Yk).T

    d = np.squeeze(np.array(-Hk @ g))

    before_LS_time = time.time()
    #求得下降方向之后,此后的步骤与其他优化方法无异
    if search_mode == "ELS":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}"
            .format(iter=k,
                    func_k=function_k,
                    time=before_LS_time - start_time,
                    X=X,
                    g=g,
                    d=d,
                    func_x=round(F, 8)))
        a, b, add_retreat_func = ELS.retreat_method(
            func,
            X,
            d,
            hyper_parameters=hyper_parameters["ELS"]["retreat_method"]
            if hyper_parameters is not None else None)
        alpha_star, add_golden_func = ELS.golden_method(
            func,
            X,
            d,
            a,
            b,
            hyper_parameters=hyper_parameters["ELS"]["golden_method"]
            if hyper_parameters is not None else None)
        add_func_k = add_retreat_func + add_golden_func
    elif search_mode == "ILS":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}"
            .format(iter=k,
                    func_k=function_k,
                    time=before_LS_time - start_time,
                    X=X,
                    g=g,
                    d=d,
                    func_x=round(F, 8)))
        alpha_star, add_func_k = ILS.inexact_line_search(
            func,
            gfunc,
            X,
            d,
            hyper_parameters=hyper_parameters["ILS"]
            if hyper_parameters is not None else None)
    elif search_mode == "GLL":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}"
            .format(iter=k,
                    func_k=function_k,
                    time=before_LS_time - start_time,
                    X=X,
                    g=g,
                    d=d,
                    func_x=round(F, 8)))
        alpha_star, add_func_k, mk = GLL_search(
            func,
            gfunc,
            X,
            d,
            func_values,
            mk,
            hyper_parameters=hyper_parameters["GLL"]
            if hyper_parameters is not None else None)

    logger.info("当前更新的步长为{}".format(alpha_star))
    X_new = X + d * alpha_star
    function_k = function_k + add_func_k + 1
    func_X_new = func(X_new)
    func_values.append(func_X_new)
    g_new = gfunc(X_new)

    if item_num == M:
        Sk_que.get()
        Yk_que.get()
        Dk_que.get()
    Sk_que.put(d * alpha_star)
    Yk_que.put(g_new - g)
    Dk_que.put((d * alpha_star) @ (g_new - g))

    # 更新
    logging.info("g is {}".format(g_new))
    logger.info("g的范数为{g},epsilon * max(1, |x_k|)为{xk}".format(
        g=np.linalg.norm(g_new), xk=epsilon * max(1, np.linalg.norm(X_new))))
    # 给出的终止条件可能存在一些问题,由于编程语言进度的限制,g的下降量可能为0,从而计算 rho的时候可能存在除0的情况
    if np.linalg.norm(g_new) < epsilon * max(1, np.linalg.norm(X_new)):
        # if abs(func_X_new - F) <= epsilon:
        end_time = time.time()
        logger.info(
            "因为满足终止条件,{mode}的有限内存BFGS方法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}"
            .format(mode=search_mode,
                    iter=k,
                    func_k=function_k,
                    time=end_time - start_time,
                    X=X,
                    func_X_new=func_X_new))
        return X_new, func_X_new, k, function_k, end_time - start_time
    if k > max_epoch:
        end_time = time.time()
        logger.info(
            "超过最大迭代次数,{mode}的有限内存BFGS方法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}"
            .format(mode=search_mode,
                    iter=k,
                    func_k=function_k,
                    time=end_time - start_time,
                    X=X,
                    func_X_new=func_X_new))

        return X_new, func_X_new, k, function_k, end_time - start_time
    X = X_new
    g = g_new

    F = func_X_new
    k += 1
    goto.count_dk
def inexact_newton_method(X, func, gfunc, hess_func, hyper_parameters=None, search_mode="ILS", eta_mode=1, safeguard=True, eta0=0.5, gamma=1, sigma=1.5, epsilon=1e-5, max_epoch=1000):
    """[使用非精确牛顿法极小值点
         d = -G_k^{-1} * g_k]

    Args:
        X ([np.array]): [Input X]
        func ([回调函数]): [目标函数]
        gfunc ([回调函数]): [目标函数的一阶导函数]
        hess_func ([回调函数]): [目标函数的Hessian矩阵]
        hyper_parameters: (Dic): 超参数,超参数中包括:
            search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS']
            eta_mode (int, optional): [{eta}选择的方式]. Defaults to 1. [1, 2]
            eta0 ([float], optional): [eta的初值]. Defaults to 0.5.
            gamma ([float], optional): [eta选择2当中的系数参数]. Defaults to 1.
            sigma ([float], optional): [eta选择2当中的指数参数]. Defaults to 1.5.
            safeguard ([bool], optional): [是否使用安全保护]. Defaults to True.
            epsilon ([float], optional): [||g_k|| < 1e-5 * max(1, ||x_k||)时,迭代结束]. Defaults to 1e-8.
            max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000.
    
    Returns:
        返回求解得到的极小值点,极小值点对应的函数值和迭代次数
    """
    if hyper_parameters is not None:
        search_mode = hyper_parameters["search_mode"]
        epsilon = hyper_parameters["epsilon"]
        max_epoch = hyper_parameters["max_epoch"]
        eta_mode = hyper_parameters["INM"]["eta_mode"]
        eta0 = hyper_parameters["INM"]["eta0"]
        safeguard = hyper_parameters["INM"]["safeguard"]
        if eta_mode == 2:
            gamma = hyper_parameters["INM"]["gamma"]
            sigma = hyper_parameters["INM"]["sigma"]

    n = len(X)
    k = 1
    function_k = 0
    func_values = [] # 记录每一步的函数值,在GLL中有用
    mk = 0 # GLL当中的mk初始值
    g_pre = None
    G_pre = None
    d_pre = None
    g = gfunc(X)
    G = hess_func(X)
    eta_pre = None
    # 把当前函数值加入func_values
    F = func(X)
    function_k += 1
    func_values.append(F)
    start_time = time.time()
    use_gmres = True
    #计算下降方向d_k,这一步包括修正Hk,和计算dk = -Hk * gk
    label .count_dk
    
    #选择当前的eta
    if g_pre is None:
        eta = eta0
    else:
        if eta_mode == 1:
            eta = np.linalg.norm(g - g_pre - G_pre @ d_pre) / np.linalg.norm(g_pre)
        elif eta_mode == 2:
            eta = gamma * (np.linalg.norm(g) / np.linalg.norm(g_pre)) ** sigma
        
    # 安全保护
    if eta_pre is not None and safeguard:
        if eta_mode == 1:
            if eta_pre ** ((1/math.sqrt(5))/2) > 0.1:
                eta = max(eta, eta_pre ** ((1/math.sqrt(5))/2) )
        elif eta_mode == 2:
            if gamma * eta_pre ** sigma > 0.1:
                eta = max(eta, gamma * eta_pre ** sigma)
    #使用GMRES方法迭代求解dk
    if use_gmres:
        logger.info("eta is {}".format(eta))
        gmres_result = gmres(G, -g, tol=eta)
        logger.info("gmers reslut is {}".format(gmres_result))
        d = gmres_result[0]
    if np.all(d == 0) or use_gmres == False:
        inv_hass = np.linalg.inv(G)
        d = -np.dot(inv_hass , g)
        use_gmres = False
 
        # end_time = time.time()
        # logger.info("迭代求解所得下降方向为0,{mode}的非精确牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}".format(mode=search_mode, iter=k, func_k=function_k, time=end_time-start_time, X=X,func_X_new=func_X_new))
        # return X, func_X_new, k, function_k, end_time-start_time
    before_LS_time = time.time()
    #求得下降方向之后,此后的步骤与其他优化方法无异
    if search_mode == "ELS":
        logger.info("迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}".format(iter=k,func_k=function_k,time=before_LS_time-start_time,X=X, g=g, d=d,func_x=round(F, 8)))
        a, b, add_retreat_func = ELS.retreat_method(func, X, d, hyper_parameters=hyper_parameters["ELS"]["retreat_method"] if hyper_parameters is not None else None) 
        alpha_star, add_golden_func = ELS.golden_method(func, X, d, a, b, hyper_parameters=hyper_parameters["ELS"]["golden_method"] if hyper_parameters is not None else None) 
        add_func_k = add_retreat_func + add_golden_func
    elif search_mode == "ILS":
        logger.info("迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}".format(iter=k,func_k=function_k,time=before_LS_time-start_time,X=X, g=g, d=d,func_x=round(F, 8)))
        alpha_star, add_func_k = ILS.inexact_line_search(func, gfunc, X, d, hyper_parameters=hyper_parameters["ILS"] if hyper_parameters is not None else None) 
    elif search_mode == "GLL":
        logger.info("迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}".format(iter=k,func_k=function_k,time=before_LS_time-start_time,X=X, g=g, d=d,func_x=round(F, 8)))
        alpha_star, add_func_k, mk = GLL_search(func, gfunc, X, d, func_values, mk, hyper_parameters=hyper_parameters["GLL"] if hyper_parameters is not None else None) 
    # 更新
    logger.info("当前更新的步长为{}".format(alpha_star))
    X_new = X + d * alpha_star
    function_k = function_k + add_func_k + 1
    func_X_new = func(X_new)
    func_values.append(func_X_new)
    g_pre = g
    G_pre = G
    d_pre = d
    g = gfunc(X_new)
    G = hess_func(X)
    
    logging.info("g is {}".format(g))
    logger.info("g的范数为{g},epsilon * max(1, |x_k|)为{xk}".format(g = np.linalg.norm(g), xk = epsilon * max(1, np.linalg.norm(X_new))))
    # 给出的终止条件可能存在一些问题,由于编程语言进度的限制,g的下降量可能为0,从而计算 rho的时候可能存在除0的情况
    if np.linalg.norm(g) < epsilon * max(1, np.linalg.norm(X_new)): 
    # if abs(func_X_new - F) <= epsilon:
        end_time = time.time()
        logger.info("因为满足终止条件,{mode}的非精确牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}".format(mode=search_mode, iter=k, func_k=function_k, time=end_time-start_time, X=X,func_X_new=func_X_new))
        return X_new, func_X_new, k, function_k, end_time-start_time
    if k > max_epoch:
        end_time = time.time()
        logger.info("超过最大迭代次数,{mode}的非精确牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}".format(mode=search_mode, iter=k, func_k=function_k, time=end_time-start_time, X=X,func_X_new=func_X_new))
        return X_new, func_X_new, k, function_k, end_time-start_time
    X = X_new
    F = func_X_new
    k += 1
    goto .count_dk
def INBM(X, func, gfunc, hess_func, hyper_parameters=None, search_mode="ILS", eta_mode=1, safeguard=True, eta0=0.5, gamma=1, sigma=1.5, t=1e-4, eta_max=0.9, theta_min=0.1, theta_max=0.5, epsilon=1e-5, max_epoch=1000):
    """[summary]

    Args:
        X ([np.array]): [Input X]
        func ([回调函数]): [目标函数]
        gfunc ([回调函数]): [目标函数的一阶导函数]
        hess_func ([回调函数]): [目标函数的Hessian矩阵]
        hyper_parameters: (Dic): 超参数,超参数中包括:
            search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS']
            eta_mode (int, optional): [{eta}选择的方式]. Defaults to 1. [1, 2]
            eta0 ([float], optional): [eta的初值]. Defaults to 0.5.
            gamma ([float], optional): [eta选择2当中的系数参数]. Defaults to 1.
            sigma ([float], optional): [eta选择2当中的指数参数]. Defaults to 1.5.
            safeguard ([bool], optional): [是否使用安全保护]. Defaults to True.

            t ([float], optional): [线性方程组情况条件2中的t]. Defaults to 1e-4.
            eta_max (float, optional): [eta 的上界]. Defaults to 0.9.
            theta_min (float, optional): [theta的下界,在while循环中在theta的取值范围中通过二次插值取theta]. Defaults to 0.1.
            theta_max (float, optional): [theta的上界,在while循环中在theta的取值范围中通过二次插值取theta]. Defaults to 0.5.
            epsilon ([float], optional): [||g_k|| < 1e-5 * max(1, ||x_k||)时,迭代结束]. Defaults to 1e-8.
            max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000.
    """
    if hyper_parameters is not None:
        search_mode = hyper_parameters["search_mode"]
        epsilon = hyper_parameters["epsilon"]
        max_epoch = hyper_parameters["max_epoch"]
        eta_mode = hyper_parameters["INBM"]["eta_mode"]
        eta0 = hyper_parameters["INBM"]["eta0"]
        safeguard = hyper_parameters["INBM"]["safeguard"]
        t = hyper_parameters["INBM"]["t"]
        eta_max = hyper_parameters["INBM"]["eta_max"]
        theta_min = hyper_parameters["INBM"]["theta_min"]
        theta_max = hyper_parameters["INBM"]["theta_max"]
        if eta_mode == 2:
            gamma = hyper_parameters["INBM"]["gamma"]
            sigma = hyper_parameters["INBM"]["sigma"]
    n = len(X)
    k = 1
    function_k = 0
    func_values = [] # 记录每一步的函数值,在GLL中有用
    mk = 0 # GLL当中的mk初始值
    g_pre = None
    G_pre = None
    d_pre = None
    g = gfunc(X)
    G = hess_func(X)
    eta_pre = None
    # 把当前函数值加入func_values
    F = func(X)
    function_k += 1
    func_values.append(F)
    start_time = time.time()
    use_gmres = True
    #计算下降方向d_k,这一步包括修正Hk,和计算dk = -Hk * gk
    label .count_dk
    
    #选择当前的eta
    if g_pre is None:
        eta = eta0
    else:
        if eta_mode == 1:
            eta = np.linalg.norm(g - g_pre - G_pre @ d_pre) / np.linalg.norm(g_pre)
        elif eta_mode == 2:
            eta = gamma * (np.linalg.norm(g) / np.linalg.norm(g_pre)) ** sigma
        elif eta_mode == 0:
            eta = eta0
        
    # 安全保护
    if eta_pre is not None and safeguard:
        if eta_mode == 1:
            if eta_pre ** ((1/math.sqrt(5))/2) > 0.1:
                eta = max(eta, eta_pre ** ((1/math.sqrt(5))/2) )
        elif eta_mode == 2:
            if gamma * eta_pre ** sigma > 0.1:
                eta = max(eta, gamma * eta_pre ** sigma)
    #使用GMRES方法迭代求解dk
    eta = min(eta, eta_max)
    
    if use_gmres:
        logger.info("eta is {}".format(eta))
        gmres_result = gmres(G, -g, tol=eta)
        logger.info("gmers reslut is {}".format(gmres_result))
        d = gmres_result[0]
    if np.all(d == 0) or use_gmres == False:
        inv_hass = np.linalg.inv(G)
        d = -np.dot(inv_hass , g)
        use_gmres = False
        # end_time = time.time()
        # logger.info("迭代求解所得下降方向为0,{mode}的非精确牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}".format(mode=search_mode, iter=k, func_k=function_k, time=end_time-start_time, X=X,func_X_new=func_X_new))
        # return X, func_X_new, k, function_k, end_time-start_time
    # 通过while循环来取得满足线性方程组情况条件2
    while np.linalg.norm(gfunc(X + d)) > (1 - t * (1 - eta)) * np.linalg.norm(gfunc(X)):
        denominator = (F ** 2 - func(X + d) ** 2 + 2 * F * (g @ d))
        #防止可能存在的除0现象,先把theta置为1,以便触发之后的步骤if语句的判断,把theta置为给定的范围内的中点
        if abs(denominator) < 1e-20: 
            theta = 1
        else:
            theta = (F * (g @ d)) / (F ** 2 - func(X + d) ** 2 + 2 * F * (g @ d))
        if theta < theta_min or theta > theta_max: # 如果二次插值计算出的theta不在给定的范围内,则在给定的范围内取中点
            theta = (theta_min + theta_max) / 2
        d = theta * d
        eta = 1 - theta * (1 - eta)
    
    before_LS_time = time.time()
    #求得下降方向之后,此后的步骤与其他优化方法无异
    if search_mode == "ELS":
        logger.info("迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}".format(iter=k,func_k=function_k,time=before_LS_time-start_time,X=X, g=g, d=d,func_x=round(F, 8)))
        a, b, add_retreat_func = ELS.retreat_method(func, X, d, hyper_parameters=hyper_parameters["ELS"]["retreat_method"] if hyper_parameters is not None else None) 
        alpha_star, add_golden_func = ELS.golden_method(func, X, d, a, b, hyper_parameters=hyper_parameters["ELS"]["golden_method"] if hyper_parameters is not None else None) 
        add_func_k = add_retreat_func + add_golden_func
    elif search_mode == "ILS":
        logger.info("迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}".format(iter=k,func_k=function_k,time=before_LS_time-start_time,X=X, g=g, d=d,func_x=round(F, 8)))
        alpha_star, add_func_k = ILS.inexact_line_search(func, gfunc, X, d, hyper_parameters=hyper_parameters["ILS"] if hyper_parameters is not None else None) 
    elif search_mode == "GLL":
        logger.info("迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}".format(iter=k,func_k=function_k,time=before_LS_time-start_time,X=X, g=g, d=d,func_x=round(F, 8)))
        alpha_star, add_func_k, mk = GLL_search(func, gfunc, X, d, func_values, mk, hyper_parameters=hyper_parameters["GLL"] if hyper_parameters is not None else None) 
    # 更新
    logger.info("当前更新的步长为{}".format(alpha_star))
    X_new = X + d * alpha_star
    function_k = function_k + add_func_k + 1
    func_X_new = func(X_new)
    func_values.append(func_X_new)
    g_pre = g
    G_pre = G
    d_pre = d
    g = gfunc(X_new)
    G = hess_func(X)
    
    logging.info("g is {}".format(g))
    logger.info("g的范数为{g},epsilon * max(1, |x_k|)为{xk}".format(g = np.linalg.norm(g), xk = epsilon * max(1, np.linalg.norm(X_new))))
    # 给出的终止条件可能存在一些问题,由于编程语言进度的限制,g的下降量可能为0,从而计算 rho的时候可能存在除0的情况
    if np.linalg.norm(g) < epsilon * max(1, np.linalg.norm(X_new)): 
    # if abs(func_X_new - F) <= epsilon:
        end_time = time.time()
        logger.info("因为满足终止条件,{mode}的非精确牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}".format(mode=search_mode, iter=k, func_k=function_k, time=end_time-start_time, X=X,func_X_new=func_X_new))
        return X_new, func_X_new, k, function_k, end_time-start_time
    if k > max_epoch:
        end_time = time.time()
        logger.info("超过最大迭代次数,{mode}的非精确牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}".format(mode=search_mode, iter=k, func_k=function_k, time=end_time-start_time, X=X,func_X_new=func_X_new))
        return X_new, func_X_new, k, function_k, end_time-start_time
    X = X_new
    F = func_X_new
    k += 1
    goto .count_dk
def LBFGS(X,
          func,
          gfunc,
          hyper_parameters=None,
          M=15,
          search_mode="ELS",
          epsilon=1e-5,
          max_epoch=1000):
    """ 有限内存的BFGS方法

    Args:
        X ([np.array]): [Input X]
        func ([回调函数]): [目标函数]
        gfunc ([回调函数]): [目标函数的一阶导函数]
        hess_func ([回调函数]): [目标函数的Hessian矩阵]
        hyper_parameters: (json): 超参数,超参数中包括:
            M (int, optional): [计算修正Hk的时候,需要之前记录的M个信息,记录的信息包括s和y], 要求M的取值范围在[5, 9, 15]. Defaults to 15.
            search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS']
            epsilon ([float], optional): [||g_k|| < 1e-5 * max(1, ||x_k||)时,迭代结束]. Defaults to 1e-8.
            max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000.
    """
    if hyper_parameters is not None:
        M = hyper_parameters["LBFGS"]["M"]
        search_mode = hyper_parameters["search_mode"]
        epsilon = hyper_parameters["epsilon"]
        max_epoch = hyper_parameters["max_epoch"]
    n = len(X)
    k = 1
    function_k = 0
    func_values = []  # 记录每一步的函数值,在GLL中有用
    mk = 0  # GLL当中的mk初始值

    s_history = []  # 记录每一步的x_{k+1} - x_{k},LBFGS修正Hk时有用
    y_history = []  # 记录每一步的g_{k+1} - g_{k},LBFGS修正Hk时有用
    p_history = []  # 1 / s_{k}^ t * y_k,以免重复计算
    LBFGS_alpha = np.zeros(M)  # LBFGS算法1中计算的ai,先声明,反复使用节约内存空间
    g = gfunc(X)
    # 把当前函数值加入func_values
    F = func(X)
    function_k += 1
    func_values.append(F)
    start_time = time.time()
    #计算下降方向d_k,这一步包括修正Hk,和计算dk = -Hk * gk
    label.count_dk

    #使用LBFGS得到Hk
    # LBFGS 的 算法1,计算ai
    q = copy.deepcopy(g)
    for i in range(min(len(s_history), M)):
        LBFGS_alpha[M - 1 - i] = p_history[-i - 1] * (s_history[-i - 1] @ q)
        q -= LBFGS_alpha[M - 1 - i] * y_history[-i - 1]
    # LBFGS 的 算法2,计算r = Hk gk
    # if len(p_history) > 0:
    #     Hk_0 = np.eye(n, dtype=float) * ((s_history[-1] @ y_history[-1])/ (y_history[-1] @ y_history[-1]))
    # else:
    Hk_0 = np.eye(n, dtype=float)
    r = Hk_0 @ q
    for i in range(min(len(s_history), M), 0, -1):
        beta = p_history[-i] * (y_history[-i] @ r)
        r += (LBFGS_alpha[-i] - beta) * s_history[-i]

    d = np.array(-r)
    before_LS_time = time.time()
    #求得下降方向之后,此后的步骤与其他优化方法无异
    if search_mode == "ELS":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}"
            .format(iter=k,
                    func_k=function_k,
                    time=before_LS_time - start_time,
                    X=X,
                    g=g,
                    d=d,
                    func_x=round(F, 8)))
        a, b, add_retreat_func = ELS.retreat_method(
            func,
            X,
            d,
            hyper_parameters=hyper_parameters["ELS"]["retreat_method"]
            if hyper_parameters is not None else None)
        alpha_star, add_golden_func = ELS.golden_method(
            func,
            X,
            d,
            a,
            b,
            hyper_parameters=hyper_parameters["ELS"]["golden_method"]
            if hyper_parameters is not None else None)
        add_func_k = add_retreat_func + add_golden_func
    elif search_mode == "ILS":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}"
            .format(iter=k,
                    func_k=function_k,
                    time=before_LS_time - start_time,
                    X=X,
                    g=g,
                    d=d,
                    func_x=round(F, 8)))
        alpha_star, add_func_k = ILS.inexact_line_search(
            func,
            gfunc,
            X,
            d,
            hyper_parameters=hyper_parameters["ILS"]
            if hyper_parameters is not None else None)
    elif search_mode == "GLL":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前用时{time},当前X取值为{X},当前g的取值为{g}, 下降方向为{d},当前函数值为{func_x}"
            .format(iter=k,
                    func_k=function_k,
                    time=before_LS_time - start_time,
                    X=X,
                    g=g,
                    d=d,
                    func_x=round(F, 8)))
        alpha_star, add_func_k, mk = GLL_search(
            func,
            gfunc,
            X,
            d,
            func_values,
            mk,
            hyper_parameters=hyper_parameters["GLL"]
            if hyper_parameters is not None else None)

    logger.info("当前更新的步长为{}".format(alpha_star))
    X_new = X + d * alpha_star
    function_k = function_k + add_func_k + 1
    func_X_new = func(X_new)
    func_values.append(func_X_new)
    g_new = gfunc(X_new)

    s_history.append(d * alpha_star)
    y_history.append(g_new - g)
    p_history.append(1 / (s_history[-1] @ y_history[-1]))

    # 更新
    logging.info("g is {}".format(g_new))
    logger.info("g的范数为{g},epsilon * max(1, |x_k|)为{xk}".format(
        g=np.linalg.norm(g_new), xk=epsilon * max(1, np.linalg.norm(X_new))))
    # 给出的终止条件可能存在一些问题,由于编程语言进度的限制,g的下降量可能为0,从而计算 rho的时候可能存在除0的情况
    if np.linalg.norm(g_new) < epsilon * max(1, np.linalg.norm(X_new)):
        # if abs(func_X_new - F) <= epsilon:
        end_time = time.time()
        logger.info(
            "因为满足终止条件,{mode}的有限内存BFGS方法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}"
            .format(mode=search_mode,
                    iter=k,
                    func_k=function_k,
                    time=end_time - start_time,
                    X=X,
                    func_X_new=func_X_new))
        return X_new, func_X_new, k, function_k, end_time - start_time
    if k > max_epoch:
        end_time = time.time()
        logger.info(
            "超过最大迭代次数,{mode}的有限内存BFGS方法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终用时{time},最终X={X},最终函数值={func_X_new}"
            .format(mode=search_mode,
                    iter=k,
                    func_k=function_k,
                    time=end_time - start_time,
                    X=X,
                    func_X_new=func_X_new))

        return X_new, func_X_new, k, function_k, end_time - start_time
    X = X_new
    g = g_new
    F = func_X_new
    k += 1
    goto.count_dk
예제 #6
0
def GM_newton(X,
              func,
              gfunc,
              hess_func,
              hyper_parameters=None,
              zeta=1e-2,
              search_mode="ELS",
              epsilon=1e-5,
              max_epoch=1000):
    """使用Gill Murray稳定牛顿法求极小值点
         d = -G_k^{-1} * g_k]

    Args:
        X ([np.array]): [Input X]
        func ([回调函数]): [目标函数]
        gfunc ([回调函数]): [目标函数的一阶导函数]
        hess_func ([回调函数]): [目标函数的Hessian矩阵]
        hyper_parameters: (Dic): 超参数,超参数中包括:
            zeta ([float], optional): [当gk的模大于zeta, 求解方程得到下降方向]. Defaults to 1e-2.
            search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS']
            epsilon ([float], optional): [当函数值下降小于epsilon,迭代结束]. Defaults to 1e-5.
            max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000.

    Returns:
        返回求解得到的极小值点,极小值点对应的函数值和迭代次数
    """

    if hyper_parameters is not None:
        zeta = hyper_parameters["GM_newton"]["zeta"]
        search_mode = hyper_parameters["search_mode"]
        epsilon = hyper_parameters["epsilon"]
        max_epoch = hyper_parameters["max_epoch"]
    function_k = 0
    k = 1
    func_values = []  #记录每一步的函数值,在GLL中有用
    mk = 0  #GLL当中的mk初始值
    assert epsilon > 0, "must have epsilon > 0"
    # 步2:计算g和G
    label.step2
    g = gfunc(X)
    G = hess_func(X)
    # 把当前函数值加入func_values
    function_k += 1
    F = func(X)
    func_values.append(F)
    # 步3:对G进行修正Cholesky分解

    L, D = utils.modified_Cholesky(G)

    modified_G = utils.get_modified_G(L, D)
    # 步4, ||g(x)|| > zeta ,解方程计算下降方向
    if np.linalg.norm(g) > zeta:
        G_1 = np.linalg.inv(modified_G)
        d = -np.dot(G_1, g)
        goto.step6
    # 步5:计算负曲率方向,如果psi>=0则停止,否则求出方向d
    LT = copy.deepcopy(L).T
    E = modified_G - G
    d = negative_curvature(LT, D, E)
    if d == None:
        logger.info(
            "因为负曲率方向不存在,{mode}的GM稳定牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终X={X},最终函数值={func_X_new}"
            .format(mode=search_mode,
                    iter=k,
                    func_k=function_k,
                    X=X,
                    func_X_new=func_X_new))
        return X, F, k, function_k
    else:
        gT = np.mat(g).T
        if np.dot(gT, d) > 0:
            d = -d
    # 步6:线搜索求步长
    label.step6
    if search_mode == "ELS":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}".
            format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8)))
        a, b, add_retreat_func = ELS.retreat_method(
            func,
            X,
            d,
            hyper_parameters=hyper_parameters["ELS"]["retreat_method"]
            if hyper_parameters is not None else None)
        alpha_star, add_golden_func = ELS.golden_method(
            func,
            X,
            d,
            a,
            b,
            hyper_parameters=hyper_parameters["ELS"]["golden_method"]
            if hyper_parameters is not None else None)
        add_func_k = add_retreat_func + add_golden_func
    elif search_mode == "ILS":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}".
            format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8)))
        alpha_star, add_func_k = ILS.inexact_line_search(
            func,
            gfunc,
            X,
            d,
            hyper_parameters=hyper_parameters["ILS"]
            if hyper_parameters is not None else None)
    elif search_mode == "GLL":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}".
            format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8)))
        alpha_star, add_func_k, mk = GLL_search(
            func,
            gfunc,
            X,
            d,
            func_values,
            mk,
            hyper_parameters=hyper_parameters["GLL"]
            if hyper_parameters is not None else None)
    else:
        raise ValueError("参数search_mode 必须从['ELS', 'ILS']当中选择")

    X_new = X + d * alpha_star
    function_k = function_k + add_func_k + 1
    func_X_new = func(X_new)
    if abs(func_X_new - F) <= epsilon:
        logger.info(
            "因为函数值下降在{epsilon}以内,{mode}的GM稳定牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终X={X},最终函数值={func_X_new}"
            .format(mode=search_mode,
                    epsilon=epsilon,
                    iter=k,
                    func_k=function_k,
                    X=X,
                    func_X_new=func_X_new))
        return X_new, func_X_new, k, function_k
    if k > max_epoch:
        logger.info("超过最大迭代次数:%d", max_epoch)
        return X_new, func_X_new, k, function_k
    X = X_new
    k += 1
    goto.step2
예제 #7
0
def damp_newton(X,
                func,
                gfunc,
                hess_func,
                hyper_parameters=None,
                search_mode="ELS",
                use_modified_Cholesky=True,
                epsilon=1e-5,
                max_epoch=1000):
    """[使用阻尼牛顿法极小值点
         d = -G_k^{-1} * g_k]

    Args:
        X ([np.array]): [Input X]
        func ([回调函数]): [目标函数]
        gfunc ([回调函数]): [目标函数的一阶导函数]
        hess_func ([回调函数]): [目标函数的Hessian矩阵]
        hyper_parameters: (Dic): 超参数,超参数中包括:
            search_mode (str, optional): [线搜索的模式(选择精确线搜索还是非精确线搜索)]. Defaults to 'ELS'. ['ELS', 'ILS']
            epsilon ([float], optional): [当函数值下降小于epsilon,迭代结束]. Defaults to 1e-5.
            max_epoch (int, optional): [最大允许的迭代次数]. Defaults to 1000.

    Returns:
        返回求解得到的极小值点,极小值点对应的函数值和迭代次数
    """
    if hyper_parameters is not None:
        search_mode = hyper_parameters["search_mode"]
        epsilon = hyper_parameters["epsilon"]
        max_epoch = hyper_parameters["max_epoch"]
        use_modified_Cholesky = hyper_parameters["damp_newton"][
            "use_modified_Cholesky"]

    k = 1
    function_k = 0  #函数调用次数
    func_values = []  #记录每一步的函数值,在GLL中有用
    mk = 0  #GLL当中的mk初始值
    #计算下降方向d_k
    label.count_dk
    G = hess_func(X)
    g = gfunc(X)
    # 把当前函数值加入func_values
    F = func(X)
    function_k += 1
    func_values.append(F)
    try:
        if use_modified_Cholesky:
            L, D = utils.modified_Cholesky(
                G, hyper_parameters["modified_Cholesky"])
            G_ = utils.get_modified_G(L, D)
            inv_hass = np.linalg.inv(G_)
            d = -np.dot(inv_hass, g)
        else:
            inv_hass = np.linalg.inv(G)
            d = -np.dot(inv_hass, g)
    except:
        logger.info("Hessian 矩阵不可逆,用修正Cholesky分解求下降方向")
        L, D = utils.modified_Cholesky(G,
                                       hyper_parameters["modified_Cholesky"])
        G_ = utils.get_modified_G(L, D)
        inv_hass = np.linalg.inv(G_)
        d = -np.dot(inv_hass, g)

    #计算步长
    if search_mode == "ELS":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}".
            format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8)))
        a, b, add_retreat_func = ELS.retreat_method(
            func,
            X,
            d,
            hyper_parameters=hyper_parameters["ELS"]["retreat_method"]
            if hyper_parameters is not None else None)
        alpha_star, add_golden_func = ELS.golden_method(
            func,
            X,
            d,
            a,
            b,
            hyper_parameters=hyper_parameters["ELS"]["golden_method"]
            if hyper_parameters is not None else None)
        add_func_k = add_retreat_func + add_golden_func
    elif search_mode == "ILS":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}".
            format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8)))
        alpha_star, add_func_k = ILS.inexact_line_search(
            func,
            gfunc,
            X,
            d,
            hyper_parameters=hyper_parameters["ILS"]
            if hyper_parameters is not None else None)
    elif search_mode == "GLL":
        logger.info(
            "迭代第{iter}轮,当前函数调用次数{func_k},当前X取值为{X},下降方向为{d},当前函数值为{func_x}".
            format(iter=k, func_k=function_k, X=X, d=d, func_x=round(F, 8)))
        alpha_star, add_func_k, mk = GLL_search(
            func,
            gfunc,
            X,
            d,
            func_values,
            mk,
            hyper_parameters=hyper_parameters["GLL"]
            if hyper_parameters is not None else None)
    else:
        raise ValueError("参数search_mode 必须从['ELS', 'ILS']当中选择")

    X_new = X + d * alpha_star
    function_k = function_k + add_func_k + 1
    func_X_new = func(X_new)
    if abs(func_X_new - F) <= epsilon:
        logger.info(
            "因为函数值下降在{epsilon}以内,{mode}的阻尼牛顿法,迭代结束,迭代轮次{iter},函数调用次数{func_k},最终X={X},最终函数值={func_X_new}"
            .format(epsilon=epsilon,
                    mode=search_mode,
                    iter=k,
                    func_k=function_k,
                    X=X,
                    func_X_new=func_X_new))
        return X_new, func_X_new, k, function_k
    if k > max_epoch:
        logger.info("超过最大迭代次数:%d", max_epoch)
        return X_new, func_X_new, k, function_k
    X = X_new
    k += 1
    goto.count_dk