Beispiel #1
0
 def _update_eta(self, gx0, gx1, ggx0, d):
     if self.eta_method == "1":
         self.eta = vector_2norm(gx1 - gx0 -
                                 np.dot(ggx0, d)) / vector_2norm(gx0)
     else:
         # delta \in (0, 1]
         # alpha \in (1, 2]
         self.eta = 0.5 * (vector_2norm(gx1) / vector_2norm(gx0))**1.5
Beispiel #2
0
 def _get_descent_direction(self, f, g, gg, x0, epsilon):
     L, D = modify_cholesky_fraction(gg(x0))
     """
     print("L is {}\n"
           "D is {}".format(L, D))
     print("gg(x0) is {}".format(gg(x0)))
     """
     # print("LDL^T is {}".format(np.dot(np.dot(L, D), L.T)-gg(x0)))
     if vector_2norm(g(x0)) > epsilon:
         #   d = LDL_equation(L, D, -g(x0))
         G = np.dot(np.dot(L, D), L.T)
         d = -np.dot(np.linalg.inv(G), g(x0))
     else:
         psi = np.diag(2*gg(x0)-np.dot(np.dot(L, D), L.T))
         index = np.argmin(psi)
         #   print("psi min is {}".format(np.min(psi)))
         if psi[index] > 0:
             return np.zeros(x0.shape)
         y = np.zeros(x0.shape)
         y[index][0] = 1
         d = ut_equation(L.T, y)
         dd = np.dot(np.linalg.inv(L.T), y)
         if np.dot(g(x0).T, dd) > 0:
             d = -dd
     return d
    def _cauchy(self, fx, gx, ggx):
        """
        :Note cauchy point method to solve sub question

        :param fx: f(x), np.ndarray of shape (N, 1)
        :param gx: g(x), np.ndarray of shape (N, 1)
        :param ggx: gg(x), np.ndarray of shape (N, N)

        :return: d: the descent direction, np.ndarray of shape (N, 1)
        """
        gxggxgx = np.dot(gx.T, np.dot(ggx, gx))
        eta = 1
        if gxggxgx > 0:
            eta = np.min([1, vector_2norm(gx) ** 3 / (self.delta * gxggxgx)])
        d = -1 * eta * self.delta / vector_2norm(gx) * gx
        return d
Beispiel #4
0
    def compute(self, f, g, gg, x0):
        self.iter_num = 0
        fx0 = f(x0)
        gx0 = g(x0)
        ggx0 = gg(x0)
        fx1 = fx0
        gx1 = gx0
        ggx1 = ggx0
        while True:
            fx0 = fx1
            gx0 = gx1
            ggx0 = ggx1
            d = self._get_descent_direction(ggx0, gx0)
            '''
            if vector_2norm((np.dot(ggx0, d)+gx0).flatten()) > self.eta * vector_2norm(gx0.flatten()):
                print("{} f(x0) is {}".format(self.iter_num, fx0))
                raise ValueError("")
            '''
            # get suitable step length
            eta = self.eta
            for i in range(2):
                x1 = x0 + d
                fx1, gx1, ggx1 = f(x1), g(x1), gg(x1)
                theta = self.get_theta(gx0, ggx0, gx1, ggx1, d)
                if vector_2norm(g(x0 + d)) <= (1 - self.t *
                                               (1 - eta)) * vector_2norm(gx0):
                    break
                d = theta * d
                print(i)
                eta = 1 - theta * (1 - eta)

            #print("{} 开始更新参数 {}".format(self.iter_num, self.eta))
            self._update_eta(gx0, gx1, ggx0, d)
            #print("iter_num is{}, f(x) is{} g(x) is {} x is {}".format(self.iter_num, fx1, vector_2norm(gx1), vector_2norm(x1)))
            if self._convergence(gx1,
                                 x1) is True or self._maximum_loop() is True:
                print("iter_num is{}, f(x) is{}".format(self.iter_num, fx1))
                break
            x0 = x1
            self._iter_increment()
        return x1
Beispiel #5
0
 def _r_i(self, x, i):
     if i != self.n:
         return np.sqrt(self.eta)*(x[i][0]-1)
     else:
         return vector_2norm(x)**2-0.25
Beispiel #6
0
 def _convergence(self, gx, x):
     #   print("delta is{}, condition{}".format(np.abs(f(x0)-f(x1)), np.abs(f(x0)-f(x1)) < self.max_error))
     return True if vector_2norm(gx.flatten()) < self.max_error * np.max(
         [1, vector_2norm(x.flatten())]) else False
Beispiel #7
0
    def _get_descent_direction(self, f, g, gg, x0):
        L, D, P = bunch_parlett_fraction(gg(x0))
        min_eigval = np.min(np.linalg.eigvals(D))
        """
        print("D is {}".format(D))
        print("L is {}\n"
              " P is {}".format(L, P))
        """
        #   all eigenvalue is positive
        if min_eigval > 1e-8:
            return -np.dot(np.linalg.inv(gg(x0)), g(x0))

        # has negative eigenvalue
        elif min_eigval < -1e-8:
            self.d_tag = 1
        if self.d_tag == 0:
            #   construct the a
            #
            a = np.zeros(x0.shape)
            m = 0
            while m != D.shape[0]:
                if m == D.shape[0] - 1:
                    a[m][0] = 1 if D[m][m] <= 0 else 0
                    break
                if np.abs(D[m][m + 1]) > 1e-15:
                    # 2x2 block
                    tmp = D[m:m + 2, m:m + 2]
                    eigval, eigvec = np.linalg.eig(tmp)
                    index = np.argmin(eigval)
                    if eigval[index] > 0:
                        raise ValueError("negative value")
                    try:
                        a[m:m + 2][0] = tmp[:, index] / (np.sqrt(np.sum(tmp[:, index] ** 2)))
                    except:
                        print(index)
                    m = m + 2
                else:
                    if D[m][m] <= 0:
                        a[m][0] = 1
                    m = m + 1

            a = ut_equation(L.T, a)
            d = np.dot(P, a)
            if np.dot(g(x0).T, d) > 0:
                d = -d
            self.d_tag = 1
        else:
            #   construct the positive D
            m = 0
            Dpp = np.zeros(D.shape)
            while m != D.shape[0]:
                if m == D.shape[0] - 1:
                    Dpp[m][m] = 1/D[m][m] if D[m][m] > 0 else 0
                    break
                if np.abs(D[m][m + 1]) > 1e-15:
                    # 2x2 block
                    tmp = D[m:m + 2, m:m + 2]
                    eigval, eigvec = np.linalg.eig(tmp)
                    index = np.argmax(eigval)
                    if eigval[index] <= 0:
                        raise ValueError("negative value")
                    try:
                        Dpp[m:m + 2][m:m+2] = 1/eigval[index] * np.dot(tmp[:, index], tmp[:, index].T)
                    except:
                        print(index)
                    m = m + 2
                else:
                    # 1x1 block
                    if D[m][m] > 0:
                        Dpp[m][m] = 1/D[m][m]
                    m = m + 1
            self.d_tag = 0

            lp = np.dot(P, np.linalg.inv(L).T)
            d = -np.dot(np.dot(lp, Dpp), lp.T)
            d = np.dot(d, g(x0))
        if vector_2norm(d) < 1e-8:
            # if d equals to zero
            # compute LDL d = 0,
            _, _, v = np.linalg.svd(np.dot(np.dot(L, D), L.T))
            print(np.dot(gg(x0), v[-1, :].reshape(-1, 1)))
            if np.dot(v[-1, :], g(x0)) > 0:
                return -v[-1, :].reshape(-1, 1)*10
            else:
                return v[-1, :].reshape(-1, 1)*10
        return d
Beispiel #8
0
 def _convergence(self, gx, x):
     return True if vector_2norm(gx.flatten()) < self.max_error * np.max(
         [1, vector_2norm(x.flatten())]) else False
    def _subspace(self, fx, gx, ggx):
        """
        :Note two-dimension subspace method to solve sub question

        :param fx: f(x), np.ndarray of shape (N, 1)
        :param gx: g(x), np.ndarray of shape (N, 1)
        :param ggx: gg(x), np.ndarray of shape (N, N)

        :return: d: the descent direction, np.ndarray of shape (N, 1)
        """
        min_eigval = np.min(np.linalg.eigvals(ggx))
        if min_eigval < -1e-5:
            modify_ggx = ggx - 1.5*min_eigval*np.eye(ggx.shape[0])
            inv_modify_ggx = np.linalg.inv(modify_ggx)
            invmodifyggx_gx = np.dot(inv_modify_ggx, gx)
            a = vector_2norm(gx)**2
            b = np.dot(gx.T, invmodifyggx_gx)
            c = vector_2norm(invmodifyggx_gx)**2
            d = np.dot(gx.T, np.dot(ggx, gx))
            e = np.dot(np.dot(ggx, gx).T, invmodifyggx_gx)
            f = np.dot(np.dot(ggx, invmodifyggx_gx).T, invmodifyggx_gx)

            p = a*c - b ** 2
            q = e*b - a*f
            m = 4*b*e - 2*a*f - 2*c*d
            r = d*f - e ** 2
            n = a*e - b*d

            # 0=q_4 v^4 + q_3 v^3 + q_2 v^2 + q_1 v+ q_0
            q4 = 16*p**2 * self.delta**2
            q3 = 8*m*p*self.delta**2
            q2 = (8*p*r + m**2) * self.delta**2 - 4*a*p**2
            q1 = 2*m*r*self.delta**2 - 4*(a*p*q+b*n*p)
            q0 = self.delta**2*r**2 - (a*q**2 + 2*b*n*q + c*n**2)

            v = np.roots(np.array([q4, q3, q2, q1, q0]).flatten())
            v = np.sort(v)
            for i in v:
                if np.imag(i) == 0:
                    v = np.real(i)
                    break
            t = 4*p*v**2 + m*v + n
            d = 1/t * ((2*v*p+q)*gx + n*invmodifyggx_gx)

        elif min_eigval > 1e-5:
            a = vector_2norm(gx)**2
            inv_ggx = np.linalg.inv(ggx)
            b = np.dot(np.dot(inv_ggx, gx).T, gx)
            c = vector_2norm(np.dot(inv_ggx, gx))**2
            d = np.dot(np.dot(ggx, gx).T, gx)

            m = a*c - b*b
            n = a*b - c*d
            q = a*a - b*d

            # 0=q_4 v^4 + q_3 v^3 + q_2 v^2 + q_1 v+ q_0
            q4 = 16*m**2 * self.delta**2
            q3 = 16*m*n*self.delta**2
            q2 = 4*n**2 * self.delta ** 2 - 8*m*q*self.delta**2 - 4*a*m**2
            q1 = 4*b*q*m - 4*n*q*self.delta**2
            q0 = (self.delta**2 - c) * q**2

            v = np.roots(np.array([q4, q3, q2, q1, q0]).flatten())
            v = np.sort(v)
            for i in v:
                if np.imag(i) == 0:
                    v = np.real(i)
                    break
            t = 4*m*v**2 + 2*n*v - q
            d = (2*v*m*gx + q*np.dot(inv_ggx, gx)) / t
        else:
            return self._cauchy(fx, gx, ggx)

        return d