Ejemplo n.º 1
0
    def test_line_search_wolfe2_bounds(self):
        # See gh-7475

        # For this f and p, starting at a point on axis 0, the strong Wolfe
        # condition 2 is met if and only if the step length s satisfies
        # |x + s| <= c2 * |x|
        f = lambda x: np.dot(x, x)
        fp = lambda x: 2 * x
        p = np.array([1, 0])

        # Smallest s satisfying strong Wolfe conditions for these arguments is 30
        x = -60 * p
        c2 = 0.5

        s, _, _, _, _, _ = ls.line_search_wolfe2(f, fp, x, p, amax=30, c2=c2)
        assert_line_wolfe(x, p, s, f, fp)

        s, _, _, _, _, _ = assert_warns(LineSearchWarning,
                                        ls.line_search_wolfe2, f, fp, x, p,
                                        amax=29, c2=c2)
        assert_(s is None)

        # s=30 will only be tried on the 6th iteration, so this won't converge
        assert_warns(LineSearchWarning, ls.line_search_wolfe2, f, fp, x, p,
                     c2=c2, maxiter=5)
Ejemplo n.º 2
0
    def test_line_search_wolfe2_bounds(self):
        # See gh-7475

        # For this f and p, starting at a point on axis 0, the strong Wolfe
        # condition 2 is met if and only if the step length s satisfies
        # |x + s| <= c2 * |x|
        f = lambda x: np.dot(x, x)
        fp = lambda x: 2 * x
        p = np.array([1, 0])

        # Smallest s satisfying strong Wolfe conditions for these arguments is 30
        x = -60 * p
        c2 = 0.5

        s, _, _, _, _, _ = ls.line_search_wolfe2(f, fp, x, p, amax=30, c2=c2)
        assert_line_wolfe(x, p, s, f, fp)

        s, _, _, _, _, _ = assert_warns(LineSearchWarning,
                                        ls.line_search_wolfe2, f, fp, x, p,
                                        amax=29, c2=c2)
        assert_(s is None)

        # s=30 will only be tried on the 6th iteration, so this won't converge
        assert_warns(LineSearchWarning, ls.line_search_wolfe2, f, fp, x, p,
                     c2=c2, maxiter=5)
Ejemplo n.º 3
0
 def test_line_search_wolfe2(self):
     c = 0
     smax = 100
     for name, f, fprime, x, p, old_f in self.line_iter():
         f0 = f(x)
         g0 = fprime(x)
         self.fcount = 0
         with suppress_warnings() as sup:
             sup.filter(LineSearchWarning,
                        "The line search algorithm did not converge")
             s, fc, gc, fv, ofv, gv = ls.line_search_wolfe2(f,
                                                            fprime,
                                                            x,
                                                            p,
                                                            g0,
                                                            f0,
                                                            old_f,
                                                            amax=smax)
         assert_equal(self.fcount, fc + gc)
         assert_fp_equal(ofv, f(x))
         assert_fp_equal(fv, f(x + s * p))
         if gv is not None:
             assert_array_almost_equal(gv, fprime(x + s * p), decimal=14)
         if s < smax:
             c += 1
             assert_line_wolfe(x, p, s, f, fprime, err_msg=name)
     assert_(c > 3)  # check that the iterator really works...
Ejemplo n.º 4
0
def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval,
                         **kwargs):
    """
    Same as line_search_wolfe1, but fall back to line_search_wolfe2 if
    suitable step length is not found, and raise an exception if a
    suitable step length is not found.

    Raises
    ------
    _LineSearchError
        If no suitable step size is found

    """
    ret = line_search_wolfe1(f, fprime, xk, pk, gfk,
                             old_fval, old_old_fval,
                             **kwargs)

    if ret[0] is None:
        # line search failed: try different one.
        ret = line_search_wolfe2(f, fprime, xk, pk, gfk,
                                 old_fval, old_old_fval, **kwargs)

    if ret[0] is None:
        raise _LineSearchError()

    return ret
Ejemplo n.º 5
0
def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval,
                         **kwargs):
    """
    Same as line_search_wolfe1, but fall back to line_search_wolfe2 if
    suitable step length is not found, and raise an exception if a
    suitable step length is not found.

    Raises
    ------
    _LineSearchError
        If no suitable step size is found

    """
    ret = line_search_wolfe1(f, fprime, xk, pk, gfk, old_fval, old_old_fval,
                             **kwargs)

    if ret[0] is None:
        # line search failed: try different one.
        ret = line_search_wolfe2(f, fprime, xk, pk, gfk, old_fval,
                                 old_old_fval, **kwargs)

    if ret[0] is None:
        raise _LineSearchError()

    return ret
 def test_line_search_wolfe2(self):
     c = 0
     smax = 100
     for name, f, fprime, x, p, old_f in self.line_iter():
         f0 = f(x)
         g0 = fprime(x)
         self.fcount = 0
         with warnings.catch_warnings():
             warnings.simplefilter('ignore', LineSearchWarning)
             s, fc, gc, fv, ofv, gv = ls.line_search_wolfe2(f,
                                                            fprime,
                                                            x,
                                                            p,
                                                            g0,
                                                            f0,
                                                            old_f,
                                                            amax=smax)
         assert_equal(self.fcount, fc + gc)
         assert_fp_equal(ofv, f(x))
         assert_fp_equal(fv, f(x + s * p))
         if gv is not None:
             assert_array_almost_equal(gv, fprime(x + s * p), decimal=14)
         if s < smax:
             c += 1
             assert_line_wolfe(x, p, s, f, fprime, err_msg=name)
     assert_(c > 3)  # check that the iterator really works...
Ejemplo n.º 7
0
def ls_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval):
    """
    Same as line_search_wolfe1, but fall back to line_search_wolfe2 if
    suitable step length is not found, and raise an exception if a
    suitable step length is not found.
    Raises
    ------
    _LineSearchError
        If no suitable step size is found
    """

    ret = line_search_wolfe1(f, fprime, xk, pk, gfk, old_fval, old_old_fval)
    alpha = ret[0]

    if alpha is None or alpha < 1e-12:
        #print('A')
        # line search failed: try different one.
        ret = line_search_wolfe2(f, fprime, xk, pk, gfk, old_fval,
                                 old_old_fval)
        alpha = ret[0]

    if alpha is None or alpha < 1e-12:
        #print('B')
        ret = line_search_armijo(f, xk, pk, gfk, old_fval)
        alpha = ret[0]

    if alpha is None or alpha < 1e-12:
        #print('C')
        alpha = backtracking_line_search(f, gfk, xk, pk)

    return alpha
Ejemplo n.º 8
0
def ncg(func, x0, tol=1e-4, max_iter=500, max_n_evals=1000, c1=1e-4, c2=0.1, disp=False, trace=False):
    time_start = time.clock()
    f = lambda x: func(x)[0]
    myfprime = lambda x: func(x)[1].T

    iter = 0
    n_evals = 1
    loss, g = func(x0)
    norm = linalg.norm(g, inf)
    x = x0
    d = -g

    if (trace):
        hist = dict(f=[loss], norm_g=[norm], n_evals=[0], elaps_t=[0])

    while (iter < max_iter and n_evals < max_n_evals):

        res = line_search_wolfe2(f, myfprime, x, d, g.T, loss, c1=c1, c2=c2)
        alpha = res[0]
        fc = res[1]
        gc = res[2]
        x = x + alpha * d
        loss, g_next = func(x)

        norm = linalg.norm(g, inf)
        n_evals = n_evals + fc + gc + 1
        iter = iter + 1

        if (trace):
            hist['f'].append(loss)
            hist['norm_g'].append(norm)
            hist['n_evals'].append(n_evals)
            hist['elaps_t'].append(time.clock() - time_start)

        if (disp):
            print(iter, ') ', loss[0, 0], ' ', n_evals, ' ', norm)

        if (norm < tol):
            result = [x, loss[0, 0], 0]
            if (trace):
                result.append(hist)
            return result
        else:
            betta = g_next.T.dot(g_next - g)/(g.T.dot(g))
            d = -g_next + betta*d
            g = g_next

    result = [x, loss[0, 0], 1]
    if (trace):
        result.append(hist)
    return result
Ejemplo n.º 9
0
def newton(func, x0, tol=1e-4, max_iter=500, max_n_evals=1000, c1=1e-4, c2=0.9, disp=False, trace=False):
    time_start = time.clock()
    f = lambda x: func(x)[0]
    myfprime = lambda x: func(x)[1].T

    iter = 0
    n_evals = 1
    loss, grad, hess = func(x0)
    choDec = sp.linalg.cho_factor(hess,True)
    gk = sp.linalg.cho_solve(choDec, -grad)

    norm = linalg.norm(grad, inf)
    x = x0

    if (trace):
        hist = dict(f=[loss], norm_g=[norm], n_evals=[0], elaps_t=[0])

    while (iter < max_iter and n_evals < max_n_evals):

        res = line_search_wolfe2(f, myfprime, x, gk, grad.T, loss, c1=c1, c2=c2)
        alpha = res[0]
        fc = res[1]
        gc = res[2]
        x = x + alpha * gk
        loss, grad, hess = func(x)
        choDec = sp.linalg.cho_factor(hess, True)
        gk = sp.linalg.cho_solve(choDec, -grad)
        norm = linalg.norm(grad, inf)
        n_evals = n_evals + fc + gc + 1
        iter = iter + 1

        if (trace):
            hist['f'].append(loss)
            hist['norm_g'].append(norm)
            hist['n_evals'].append(n_evals)
            hist['elaps_t'].append(time.clock() - time_start)

        if (disp):
            print(iter, ') ', loss[0, 0], ' ', n_evals, ' ', norm)

        if (norm < tol):
            result = [x, loss[0, 0], 0]
            if (trace):
                result.append(hist)
            return result

    result = [x, loss[0, 0], 1]
    if (trace):
        result.append(hist)
    return result
Ejemplo n.º 10
0
  def test_line_search(self):

    def f(x):
      return jnp.cos(jnp.sum(jnp.exp(-x)) ** 2)

    # assert not line_search(jax.value_and_grad(f), np.ones(2), np.array([-0.5, -0.25])).failed
    xk = jnp.ones(2)
    pk = jnp.array([-0.5, -0.25])
    res = line_search(f, xk, pk, maxiter=100)

    scipy_res = line_search_wolfe2(f, grad(f), xk, pk)

    self.assertAllClose(scipy_res[0], res.a_k, atol=1e-5, check_dtypes=False)
    self.assertAllClose(scipy_res[3], res.f_k, atol=1e-5, check_dtypes=False)
Ejemplo n.º 11
0
    def line_search(self, oracle, x_k, d_k, previous_alpha=None):
        """
        Finds the step size alpha for a given starting point x_k
        and for a given search direction d_k that satisfies necessary
        conditions for phi(alpha) = oracle.func(x_k + alpha * d_k).

        Parameters
        ----------
        oracle : BaseSmoothOracle-descendant object
            Oracle with .func_directional() and .grad_directional() methods implemented for computing
            function values and its directional derivatives.
        x_k : np.array
            Starting point
        d_k : np.array
            Search direction
        previous_alpha : float or None
            Starting point to use instead of self.alpha_0 to keep the progress from
             previous steps. If None, self.alpha_0, is used as a starting point.

        Returns
        -------
        alpha : float or None if failure
            Chosen step size
        """
        def backtracking(alpha):
            while oracle.func_directional(x_k, d_k, alpha) > \
                  oracle.func(x_k) + self.c1 * alpha * oracle.grad(x_k) @ d_k:
                alpha /= 2
            return alpha

        if self._method == 'Constant':
            return self.c

        if self._method == 'Armijo':
            alpha = self.alpha_0 if not previous_alpha else previous_alpha
            return backtracking(alpha)

        if self._method == 'Wolfe':
            alpha = linesearch.line_search_wolfe2(oracle.func,
                                                  oracle.grad,
                                                  x_k,
                                                  d_k,
                                                  c1=self.c1,
                                                  c2=self.c2)[0]
            if not alpha:
                alpha = backtracking(self.alpha_0)
            return alpha
        return None
Ejemplo n.º 12
0
def _line_search_wolfe12(f, fprime, xk, pk, gfk, old_fval, old_old_fval,
                         **kwargs):
    """
    Same as line_search_wolfe1, but fall back to line_search_wolfe2 if
    suitable step length is not found, and raise an exception if a
    suitable step length is not found.

    Raises
    ------
    _LineSearchError
        If no suitable step size is found

    """

    extra_condition = kwargs.pop('extra_condition', None)

    ret = line_search_wolfe1(f, fprime, xk, pk, gfk, old_fval, old_old_fval,
                             **kwargs)

    if ret[0] is not None and extra_condition is not None:
        xp1 = xk + ret[0] * pk
        if not extra_condition(ret[0], xp1, ret[3], ret[5]):
            # Reject step if extra_condition fails
            ret = (None, )

    if ret[0] is None:
        # line search failed: try different one.
        with warnings.catch_warnings():
            warnings.simplefilter('ignore', LineSearchWarning)
            kwargs2 = {}
            for key in ('c1', 'c2', 'amax'):
                if key in kwargs:
                    kwargs2[key] = kwargs[key]
            ret = line_search_wolfe2(f,
                                     fprime,
                                     xk,
                                     pk,
                                     gfk,
                                     old_fval,
                                     old_old_fval,
                                     extra_condition=extra_condition,
                                     **kwargs2)

    if ret[0] is None:
        raise _LineSearchError()

    return ret
Ejemplo n.º 13
0
 def test_line_search_wolfe2(self):
     c = 0
     smax = 100
     for name, f, fprime, x, p, old_f in self.line_iter():
         f0 = f(x)
         g0 = fprime(x)
         self.fcount = 0
         s, fc, gc, fv, ofv, gv = ls.line_search_wolfe2(f, fprime, x, p,
                                                        g0, f0, old_f,
                                                        amax=smax)
         assert_equal(self.fcount, fc+gc)
         assert_equal(ofv, f(x))
         assert_equal(fv, f(x + s*p))
         if gv is not None:
             assert_equal(gv, fprime(x + s*p))
         if s < smax:
             c += 1
             assert_line_wolfe(x, p, s, f, fprime, err_msg=name)
     assert_(c > 3) # check that the iterator really works...
Ejemplo n.º 14
0
 def test_line_search_wolfe2(self):
     c = 0
     smax = 100
     for name, f, fprime, x, p, old_f in self.line_iter():
         f0 = f(x)
         g0 = fprime(x)
         self.fcount = 0
         with warnings.catch_warnings():
             warnings.simplefilter('ignore', LineSearchWarning)
             s, fc, gc, fv, ofv, gv = ls.line_search_wolfe2(f, fprime, x, p,
                                                            g0, f0, old_f,
                                                            amax=smax)
         assert_equal(self.fcount, fc+gc)
         assert_fp_equal(ofv, f(x))
         assert_fp_equal(fv, f(x + s*p))
         if gv is not None:
             assert_array_almost_equal(gv, fprime(x + s*p), decimal=14)
         if s < smax:
             c += 1
             assert_line_wolfe(x, p, s, f, fprime, err_msg=name)
     assert_(c > 3)  # check that the iterator really works...
Ejemplo n.º 15
0
    def test_line_search(self):
        import jax

        import jax.numpy as np

        def f(x):
            return np.cos(np.sum(np.exp(-x))**2)

        # assert not line_search(jax.value_and_grad(f), num_per_cluster.ones(2), num_per_cluster.array([-0.5, -0.25])).failed
        xk = np.ones(2)
        pk = np.array([-0.5, -0.25])
        res = line_search(jax.value_and_grad(f), xk, pk, maxiter=100)

        from scipy.optimize.linesearch import line_search_wolfe2

        scipy_res = line_search_wolfe2(f, jax.grad(f), xk, pk)

        # print(scipy_res[0], res.a_k)
        # print(scipy_res[3], res.f_k)

        assert np.isclose(scipy_res[0], res.a_k)
        assert np.isclose(scipy_res[3], res.f_k)
Ejemplo n.º 16
0
 def test_line_search_wolfe2(self):
     c = 0
     smax = 512
     for name, f, fprime, x, p, old_f in self.line_iter():
         f0 = f(x)
         g0 = fprime(x)
         self.fcount = 0
         with suppress_warnings() as sup:
             sup.filter(LineSearchWarning,
                        "The line search algorithm could not find a solution")
             sup.filter(LineSearchWarning,
                        "The line search algorithm did not converge")
             s, fc, gc, fv, ofv, gv = ls.line_search_wolfe2(f, fprime, x, p,
                                                            g0, f0, old_f,
                                                            amax=smax)
         assert_equal(self.fcount, fc+gc)
         assert_fp_equal(ofv, f(x))
         assert_fp_equal(fv, f(x + s*p))
         if gv is not None:
             assert_array_almost_equal(gv, fprime(x + s*p), decimal=14)
         if s < smax:
             c += 1
             assert_line_wolfe(x, p, s, f, fprime, err_msg=name)
     assert_(c > 3)  # check that the iterator really works...
def ncg(func,
        x0,
        tol=1e-4,
        max_iter=500,
        c1=1e-4,
        c2=0.1,
        disp=False,
        trace=False):

    t_start = time.time()

    func_wrapper = FuncWrapper(func)
    func_f = lambda x: func_wrapper(x)[0]
    func_g = lambda x: func_wrapper(x)[1]

    x_min = x0
    f_min, g_min = func_f(x_min), func_g(x_min)
    direction = -g_min
    norm_g = la.norm(g_min, np.inf)
    n_iter = 0

    list_f = list()
    list_norm_g = list()
    list_n_evals = list()
    list_elaps_t = list()

    while n_iter < max_iter and norm_g > tol:

        wolfe_answer = line_search_wolfe2(func_f,
                                          func_g,
                                          x_min,
                                          direction,
                                          c1=c1,
                                          c2=c2)
        alpha = wolfe_answer[0]

        x_min = x_min + alpha * direction
        pre_g_min = g_min
        f_min, g_min = func_f(x_min), func_g(x_min)
        norm_g = la.norm(g_min, np.inf)
        elaps_t = time.time() - t_start

        beta = np.dot(g_min, g_min) / np.dot(direction, g_min - pre_g_min)
        direction = -g_min + beta * direction

        n_iter = n_iter + 1

        list_f.append(f_min)
        list_norm_g.append(norm_g)
        list_n_evals.append(func_wrapper.n_counter)
        list_elaps_t.append(elaps_t)

        if disp:
            print('%s %3d %s %5f %s %5f %s %3d %s %10f' %
                  ('#:', n_iter, '   f:', f_min, '   norm_g:', norm_g,
                   '   n_evals:', func_wrapper.n_counter, '   elaps_t:',
                   elaps_t))

    status = 0 if norm_g < tol else 1

    if trace:
        hist = {
            'f': np.array(list_f),
            'norm_g': np.array(list_norm_g),
            'n_evals': np.array(list_n_evals),
            'elaps_t': np.array(list_elaps_t)
        }
        return x_min, f_min, status, hist
    else:
        return x_min, f_min, status
Ejemplo n.º 18
0
def fmin_barrier_bfgs(
    f,
    x0,
    fprime=None,
    gtol=1e-6,
    norm=Inf,
    epsilon=_epsilon,
    maxiter=None,
    full_output=0,
    disp=1,
    retall=0,
    callback=None,
    barrier=None,
):
    """Minimize a function using the BFGS algorithm without jumping a barrier.

    Parameters
    ----------
    f : callable f(x,*args)
        Objective function to be minimized.
    x0 : ndarray
        Initial guess.
    fprime : callable f'(x,*args)
        Gradient of f.
    args : tuple
        Extra arguments passed to f and fprime.
    gtol : float
        Gradient norm must be less than gtol before succesful termination.
    norm : float
        Order of norm (Inf is max, -Inf is min)
    epsilon : int or ndarray
        If fprime is approximated, use this value for the step size.
    callback : callable
        An optional user-supplied function to call after each
        iteration.  Called as callback(xk), where xk is the
        current parameter vector.
    barrier : callable
        barrier(x) returns true iff a barrier has been jumped.

    Returns
    -------
    xopt : ndarray
        Parameters which minimize f, i.e. f(xopt) == fopt.
    fopt : float
        Minimum value.
    gopt : ndarray
        Value of gradient at minimum, f'(xopt), which should be near 0.
    Bopt : ndarray
        Value of 1/f''(xopt), i.e. the inverse hessian matrix.
    func_calls : int
        Number of function_calls made.
    grad_calls : int
        Number of gradient calls made.
    warnflag : integer
        1 : Maximum number of iterations exceeded.
        2 : Gradient and/or function calls not changing.
    allvecs  :  list
        Results at each iteration.  Only returned if retall is True.

    Other Parameters
    ----------------
    maxiter : int
        Maximum number of iterations to perform.
    full_output : bool
        If True,return fopt, func_calls, grad_calls, and warnflag
        in addition to xopt.
    disp : bool
        Print convergence message if True.
    retall : bool
        Return a list of results at each iteration if True.

    Notes
    -----
    Optimize the function, f, whose gradient is given by fprime
    using the quasi-Newton method of Broyden, Fletcher, Goldfarb,
    and Shanno (BFGS) See Wright, and Nocedal 'Numerical
    Optimization', 1999, pg. 198.

    """
    x0 = asarray(x0).squeeze()
    if x0.ndim == 0:
        x0.shape = (1,)
    if maxiter is None:
        maxiter = len(x0) * 200
    func_calls, f = wrap_function(f)
    if barrier is None:
        barr_calls, barr = wrap_function(lambda _: 0)
    else:
        barr_calls, barr = wrap_function(barrier)
    if fprime is None:
        grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon))
    else:
        grad_calls, myfprime = wrap_function(fprime)
    #    debug_here()
    if barr(x0):
        print "Optimization started with value violating constraints!"
        sys.stdout.flush()
    gfk = myfprime(x0)
    k = 0
    N = len(x0)

    Hk = numpy.eye(N)
    old_fval = f(x0)
    old_old_fval = old_fval + 5000
    xk = x0
    if retall:
        allvecs = [x0]
    sk = [2 * gtol]
    warnflag = 0
    gnorm = vecnorm(gfk, ord=norm)
    best_x = xk
    best_f = old_fval
    best_k = 0
    best_g = gfk
    while (gnorm > gtol) and (k < maxiter):
        pk = -numpy.dot(Hk, gfk)

        amax, bamax = backtrack(xk, pk, barr)  # scipy.optimize.fmin_bfgs
        # modified here
        # and line_searches below!
        #        amax = 50.
        famax = f(xk + amax * pk)
        if disp:
            print "Iter:%d  f:%14.10g  #b:%d  #f:%d" % (k, old_fval, barr_calls[0], func_calls[0]),
            if barrier is not None:
                print "barrier%d:%3g f(amax):%3g" % (bamax, amax, famax),
        method = ""

        if (bamax == 0) and (famax < old_fval):
            alpha_k = amax
            old_fval2 = famax
        else:
            alpha_k = None
            try:
                alpha_k, fc, gc, old_fval2, old_old_fval2, gfkp1 = line_search_wolfe2(
                    f, myfprime, xk, pk, gfk, old_fval, old_old_fval, amax=amax
                )
            except:
                if disp:
                    print "Warning: error in line_search_wolfe2.."

            if alpha_k is not None:
                method = "wolfe2"
            else:
                # line search failed: try different one.
                alpha_k, fc, gc, old_fval2, old_old_fval2, gfkp1 = line_search_wolfe1(
                    f, myfprime, xk, pk, gfk, old_fval, old_old_fval, amax=amax
                )

            if alpha_k is None:
                alpha_k, old_fval2 = simple_search(f, xk, pk, amax)
                if alpha_k is not None:
                    method = "simple"

            if alpha_k is None:
                pk = -pk
                alpha_k, old_fval2 = simple_search(f, xk, pk, amax)
                if alpha_k is not None:
                    method = "simple2"

            ##        debug_here()
            #        if old_fval>famax and isfinite(famax):
            #            alpha_k = amax
            #            old_fval = famax
            #            gfkp1   = myfprime(xk + amax*pk)
            #        else:
            #            alpha_k = minimum(alpha_k,amax)

            print

            if alpha_k is not None:
                bval = barr(xk + alpha_k * pk)
            else:
                bval = 1
            if bval:
                if bamax:
                    warnflag = 2
                    break
                if famax < old_fval:
                    alpha_k = amax
                else:
                    alpha_k, old_fval = simple_search(f, xk, pk, amax)
                    method = "simple3"

        #        if alpha_k is not None:
        #            old_fval= f(xk + alpha_k*pk)
        #            gfkp1   = myfprime(xk + alpha_k*pk)

        if alpha_k is None:
            old_fval = f(xk)
            warnflag = 2
            break

        old_old_fval = old_fval
        old_fval = old_fval2

        xkp1 = xk + alpha_k * pk
        gfkp1 = myfprime(xk + alpha_k * pk)

        gnorm = vecnorm(gfkp1, ord=norm)
        print "gnorm:%4e %s" % (gnorm, method)

        if callback is not None:
            callback(xk)

        if retall:
            allvecs.append(xkp1)
        sk = xkp1 - xk
        xk = xkp1

        old_fval = f(xk)
        if not isfinite(old_fval):
            pass
        #            debug_here()

        if gfkp1 is None:
            gfkp1 = myfprime(xkp1)

        yk = gfkp1 - gfk
        gfk = gfkp1
        k += 1

        if old_fval < best_f:
            best_x = xk
            best_f = old_fval
            best_k = k
            best_g = gfk

        if (gnorm <= gtol) or (k > best_k + 10):
            break

        try:  # this was handled in numeric, let it remaines for more safety
            rhok = 1.0 / (numpy.dot(yk, sk))
        except ZeroDivisionError:
            rhok = 1000.0
            print "Divide-by-zero encountered: rhok assumed large"
        if isinf(rhok):  # this is patch for numpy
            rhok = 1000.0
            print "Divide-by-zero encountered: rhok assumed large"

        #        I = numpy.eye(N,dtype=int)
        #        A1 = I - sk[:,numpy.newaxis] * yk[numpy.newaxis,:] * rhok
        #        A2 = I - yk[:,numpy.newaxis] * sk[numpy.newaxis,:] * rhok
        #        Hk = numpy.dot(A1,numpy.dot(Hk,A2)) + rhok * sk[:,numpy.newaxis] \
        #                 * sk[numpy.newaxis,:]

        # Same as above with inplace operations
        Hkyk = numpy.dot(Hk, yk) * rhok
        numpy.add(Hk, -Hkyk[:, numpy.newaxis] * sk[numpy.newaxis, :], Hk)
        Hkyk = numpy.dot(Hk.T, yk) * rhok
        numpy.add(Hk, -sk[:, numpy.newaxis] * Hkyk[numpy.newaxis, :], Hk)
        numpy.add(Hk, rhok * sk[:, numpy.newaxis] * sk[numpy.newaxis, :], Hk)

    if disp or full_output:
        fval = best_f
    if warnflag == 2:
        if disp:
            print "Warning: Desired error not necessarily achieved " "due to precision loss"
            print "         Current function value: %f" % fval
            print "         Iterations: %d" % k
            print "         Function evaluations: %d" % func_calls[0]
            print "         Barrier  evaluations: %d" % barr_calls[0]
            print "         Gradient evaluations: %d" % grad_calls[0]

    elif k >= maxiter:
        warnflag = 1
        if disp:
            print "Warning: Maximum number of iterations has been exceeded"
            print "         Current function value: %f" % fval
            print "         Iterations: %d" % k
            print "         Function evaluations: %d" % func_calls[0]
            print "         Barrier  evaluations: %d" % barr_calls[0]
            print "         Gradient evaluations: %d" % grad_calls[0]
    else:
        if disp:
            print "Optimization terminated successfully."
            print "         Current function value: %f" % fval
            print "         Iterations: %d" % k
            print "         Function evaluations: %d" % func_calls[0]
            print "         Barrier  evaluations: %d" % barr_calls[0]
            print "         Gradient evaluations: %d" % grad_calls[0]

    if full_output:
        retlist = best_x, fval, best_g, func_calls[0], grad_calls[0], warnflag
        if retall:
            retlist += (allvecs,)
    else:
        retlist = best_x
        if retall:
            retlist = (best_x, allvecs)

    return retlist
def lbfgs(func,
          x0,
          tol=1e-4,
          max_iter=500,
          m=10,
          c1=1e-4,
          c2=0.9,
          disp=False,
          trace=False):

    t_start = time.time()

    func_wrapper = FuncWrapper(func)
    func_f = lambda x: func_wrapper(x)[0]
    func_g = lambda x: func_wrapper(x)[1]

    x_min = x0
    f_min, g_min = func_f(x_min), func_g(x_min)
    sy_hist = deque(maxlen=m)
    n_iter = 0
    norm_g = la.norm(g_min, np.inf)

    list_f = list()
    list_norm_g = list()
    list_n_evals = list()
    list_elaps_t = list()

    while n_iter < max_iter and norm_g > tol:

        pre_x = x_min
        pre_g = g_min

        direction = lbfgs_compute_dir(sy_hist, g_min)

        wolfe_answer = line_search_wolfe2(func_f,
                                          func_g,
                                          x_min,
                                          direction,
                                          g_min,
                                          c1=c1,
                                          c2=c2)
        alpha = wolfe_answer[0]

        x_min = x_min + alpha * direction
        f_min, g_min = func_f(x_min), func_g(x_min)
        norm_g = la.norm(g_min, np.inf)
        elaps_t = time.time() - t_start

        s = x_min - pre_x
        y = g_min - pre_g
        sy_hist.append(np.array([s, y]))

        n_iter = n_iter + 1

        list_f.append(f_min)
        list_norm_g.append(norm_g)
        list_n_evals.append(func_wrapper.n_counter)
        list_elaps_t.append(elaps_t)

        if disp:
            print('%s %3d %s %5f %s %5f %s %3d %s %10f' %
                  ('#:', n_iter, '   f:', f_min, '   norm_g:', norm_g,
                   '   n_evals:', func_wrapper.n_counter, '   elaps_t:',
                   elaps_t))

    status = 0 if la.norm(g_min, np.inf) < tol else 1

    if trace:
        hist = {
            'f': np.array(list_f),
            'norm_g': np.array(list_norm_g),
            'n_evals': np.array(list_n_evals),
            'elaps_t': np.array(list_elaps_t)
        }
        return x_min, f_min, status, hist
    else:
        return x_min, f_min, status
def hfn(func,
        x0,
        hess_vec,
        tol=1e-4,
        max_iter=500,
        c1=1e-4,
        c2=0.9,
        disp=False,
        trace=False):

    t_start = time.time()

    func_wrapper = FuncWrapper(func)
    func_f = lambda x: func_wrapper(x)[0]
    func_g = lambda x: func_wrapper(x)[1]

    x_min = x0
    f_min, g_min = func_f(x_min), func_g(x_min)
    n_iter = 0
    norm_g = la.norm(g_min, np.inf)

    list_f = list()
    list_norm_g = list()
    list_n_evals = list()
    list_elaps_t = list()

    while n_iter < max_iter and norm_g >= tol:
        hess_vec_c = lambda v: hess_vec(x_min, v)
        forcing = min(0.5, norm_g**(0.5))
        cg_answer = cg(hess_vec_c,
                       -g_min,
                       x_min,
                       tol=forcing * norm_g,
                       trace=True)
        direction = cg_answer[0]
        pre_direction = direction
        not_direction = np.dot(direction, -g_min) <= 0
        while not_direction:
            forcing = 0.1 * forcing
            cg_answer = cg(hess_vec_c,
                           -g_min,
                           pre_direction,
                           tol=forcing * norm_g,
                           trace=True)
            direction = cg_answer[0]
            not_direction = np.dot(direction, -g_min) <= 0

        wolfe_answer = line_search_wolfe2(func_f,
                                          func_g,
                                          x_min,
                                          direction,
                                          func_g(x_min),
                                          c1=c1,
                                          c2=c2)
        alpha = wolfe_answer[0]

        x_min = x_min + alpha * direction
        f_min, g_min = func_f(x_min), func_g(x_min)
        norm_g = la.norm(g_min, np.inf)
        elaps_t = time.time() - t_start

        n_iter = n_iter + 1

        list_f.append(f_min)
        list_norm_g.append(norm_g)
        list_n_evals.append(func_wrapper.n_counter)
        list_elaps_t.append(elaps_t)

        if disp:
            print('%s %3d %s %5f %s %5f %s %3d %s %8f' %
                  ('#:', n_iter, '   f:', f_min, '   norm_g:', norm_g,
                   '   n_evals:', func_wrapper.n_counter, '   elaps_t:',
                   elaps_t))

    status = 0 if norm_g < tol else 1

    if trace:
        hist = {
            'f': np.array(list_f),
            'norm_g': np.array(list_norm_g),
            'n_evals': np.array(list_n_evals),
            'elaps_t': np.array(list_elaps_t)
        }
        return x_min, f_min, status, hist
    else:
        return x_min, f_min, status
Ejemplo n.º 21
0
    def run(self, *a, **kw):
        status = RUNNING

        fi = self.f(self.x0)
        fi_old = fi + 5000

        gi, ur, si = self.reset(self.x0, *a, **kw)
        xi = self.x0
        xi_old = numpy.nan
        it = 0

        while it < self.maxiter:
            if not self.runsignal.is_set():
                break

            if self.f_call.value > self.max_f_eval:
                status = MAX_F_EVAL

            gi = -self.df(xi, *a, **kw)
            if numpy.dot(gi.T, gi) <= self.gtol:
                status = CONVERGED
                break
            if numpy.isnan(numpy.dot(gi.T, gi)):
                if numpy.any(numpy.isnan(xi_old)):
                    status = CONVERGED
                    break
                self.reset(xi_old)

            gammai = ur(gi)
            if gammai < 1e-6 or it % xi.shape[0] == 0:
                gi, ur, si = self.reset(xi, *a, **kw)
            si = gi + gammai * si
            alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe1(
                self.f, self.df, xi, si, gi, fi, fi_old)
            if alphai is None:
                alphai, _, _, fi2, fi_old2, gfi = \
                         line_search_wolfe2(self.f, self.df,
                                            xi, si, gi,
                                            fi, fi_old)
                if alphai is None:
                    # This line search also failed to find a better solution.
                    status = LINE_SEARCH
                    break
            if fi2 < fi:
                fi, fi_old = fi2, fi_old2
            if gfi is not None:
                gi = gfi

            if numpy.isnan(fi) or fi_old < fi:
                gi, ur, si = self.reset(xi, *a, **kw)

            else:
                xi += numpy.dot(alphai, si)
                if self.messages:
                    sys.stdout.write("\r")
                    sys.stdout.flush()
                    sys.stdout.write(
                        "iteration: {0:> 6g}  f:{1:> 12e}  |g|:{2:> 12e}".
                        format(it, fi, numpy.dot(gi.T, gi)))

            if it % self.report_every == 0:
                self.callback(xi, fi, gi, it, self.f_call.value,
                              self.df_call.value, status)
            it += 1
        else:
            status = MAXITER
        self.callback_return(xi, fi, gi, it, self.f_call.value,
                             self.df_call.value, status)
        self.result = [
            xi, fi, gi, it, self.f_call.value, self.df_call.value, status
        ]
Ejemplo n.º 22
0
    def run(self, *a, **kw):
        status = RUNNING

        fi = self.f(self.x0)
        fi_old = fi + 5000

        gi, ur, si = self.reset(self.x0, *a, **kw)
        xi = self.x0
        xi_old = numpy.nan
        it = 0

        while it < self.maxiter:
            if not self.runsignal.is_set():
                break

            if self.f_call.value > self.max_f_eval:
                status = MAX_F_EVAL

            gi = -self.df(xi, *a, **kw)
            if numpy.dot(gi.T, gi) <= self.gtol:
                status = CONVERGED
                break
            if numpy.isnan(numpy.dot(gi.T, gi)):
                if numpy.any(numpy.isnan(xi_old)):
                    status = CONVERGED
                    break
                self.reset(xi_old)

            gammai = ur(gi)
            if gammai < 1e-6 or it % xi.shape[0] == 0:
                gi, ur, si = self.reset(xi, *a, **kw)
            si = gi + gammai * si
            alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe1(self.f, self.df, xi, si, gi, fi, fi_old)
            if alphai is None:
                alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe2(self.f, self.df, xi, si, gi, fi, fi_old)
                if alphai is None:
                    # This line search also failed to find a better solution.
                    status = LINE_SEARCH
                    break
            if fi2 < fi:
                fi, fi_old = fi2, fi_old2
            if gfi is not None:
                gi = gfi

            if numpy.isnan(fi) or fi_old < fi:
                gi, ur, si = self.reset(xi, *a, **kw)

            else:
                xi += numpy.dot(alphai, si)
                if self.messages:
                    sys.stdout.write("\r")
                    sys.stdout.flush()
                    sys.stdout.write(
                        "iteration: {0:> 6g}  f:{1:> 12e}  |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi))
                    )

            if it % self.report_every == 0:
                self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
            it += 1
        else:
            status = MAXITER
        self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
        self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
Ejemplo n.º 23
0
def _minimize_bfgs(fun, x0, args=(), jac=None, callback=None,
                   tol=1e-5, norm=Inf, eps=_epsilon, maxiter=None,
                   disp=False, return_all=False,
                   **unknown_options):
    """
    Minimization of scalar function of one or more variables using the
    BFGS algorithm.

    Options for the BFGS algorithm are:
        disp : bool
            Set to True to print convergence messages.
        maxiter : int
            Maximum number of iterations to perform.
        tol : float
            Cost change must be less than `tol` before succesful termination.
        norm : float
            Order of norm (Inf is max, -Inf is min).
        eps : float or ndarray
            If `jac` is approximated, use this value for the step size.

    This function is called by the `minimize` function with `method=BFGS`.
    It is not supposed to be called directly.
    """
    _check_unknown_options(unknown_options)
    f = fun
    fprime = jac
    epsilon = eps
    retall = return_all

    x0 = asarray(x0).flatten()
    if x0.ndim == 0:
        x0.shape = (1,)
    if maxiter is None:
        maxiter = len(x0)*200
    func_calls, f = wrap_function(f, args)
    if fprime is None:
        grad_calls, myfprime = wrap_function(approx_fprime, (f, epsilon))
    else:
        grad_calls, myfprime = wrap_function(fprime, args)
    gfk = myfprime(x0)
    k = 0
    N = len(x0)
    I = numpy.eye(N, dtype=int)
    Hk = I
    old_fval = f(x0)
    old_old_fval = old_fval + 5000
    xk = x0
    if retall:
        allvecs = [x0]
    sk = [2*0.1]
    warnflag = 0
    gnorm = vecnorm(gfk, ord=norm)
    while (fabs(old_fval - old_old_fval) > tol) and (k < maxiter):
        pk = -numpy.dot(Hk, gfk)
        alpha_k, fc, gc, old_fval2, old_old_fval2, gfkp1 = \
           line_search_wolfe1(f, myfprime, xk, pk, gfk,
                              old_fval, old_old_fval)
        if alpha_k is not None:
            old_fval = old_fval2
            old_old_fval = old_old_fval2
        else:
            # line search failed: try different one.
            alpha_k, fc, gc, old_fval, old_old_fval, gfkp1 = \
                     line_search_wolfe2(f, myfprime, xk, pk, gfk,
                                        old_fval, old_old_fval)
            if alpha_k is None:
                # This line search also failed to find a better solution.
                warnflag = 2
                break
        xkp1 = xk + alpha_k * pk
        if retall:
            allvecs.append(xkp1)
        sk = xkp1 - xk
        xk = xkp1
        if gfkp1 is None:
            gfkp1 = myfprime(xkp1)

        yk = gfkp1 - gfk
        gfk = gfkp1
        if callback is not None:
            callback(xk)
        k += 1
        gnorm = vecnorm(gfk, ord=norm)
        #if (gnorm <= gtol):
        #    break

        if not numpy.isfinite(old_fval):
            # We correctly found +-Inf as optimal value, or something went
            # wrong.
            warnflag = 2
            break

        try:  #this was handled in numeric, let it remaines for more safety
            rhok = 1.0 / (numpy.dot(yk, sk))
        except ZeroDivisionError:
            rhok = 1000.0
            print("Divide-by-zero encountered: rhok assumed large")
        if isinf(rhok):  #this is patch for numpy
            rhok = 1000.0
            print("Divide-by-zero encountered: rhok assumed large")
        A1 = I - sk[:, numpy.newaxis] * yk[numpy.newaxis, :] * rhok
        A2 = I - yk[:, numpy.newaxis] * sk[numpy.newaxis, :] * rhok
        Hk = numpy.dot(A1, numpy.dot(Hk, A2)) + rhok * sk[:, numpy.newaxis] \
                * sk[numpy.newaxis, :]

    fval = old_fval
    if warnflag == 2:
        msg = _status_message['pr_loss']
        if disp:
            print("Warning:", msg)
            print("         Current function value:",fval)
            print("         Iterations:", k)
            print("         Function evaluations:", func_calls[0])
            print("         Gradient evaluations:", grad_calls[0])

    elif k >= maxiter:
        warnflag = 1
        msg = _status_message['maxiter']
        if disp:
            print("Warning:", msg)
            print("         Current function value:", fval)
            print("         Iterations:", k)
            print("         Function evaluations:", func_calls[0])
            print("         Gradient evaluations:", grad_calls[0])
    else:
        msg = _status_message['success']
        if disp:
            print(msg)
            print("         Current function value:", fval)
            print("         Iterations:", k)
            print("         Function evaluations:", func_calls[0])
            print("         Gradient evaluations:", grad_calls[0])

    result = Result(fun=fval, jac=gfk, hess=Hk, nfev=func_calls[0],
                    njev=grad_calls[0], status=warnflag,
                    success=(warnflag == 0), message=msg, x=xk)
    if retall:
        result['allvecs'] = allvecs
    return result
Ejemplo n.º 24
0
def hfn(func, x0, hess_vec, tol=1e-5, max_iter=500, c1=1e-4, c2=0.9, disp=False, trace=False):

    if (trace):
        hist = {}
        hist['f'] = []
        hist['norm_g'] = []
        hist['elaps_t'] = []
        start_time = time.clock()

    f = lambda x: func(x)[0];
    df = lambda x: func(x)[1];

    x = x0
    [loss, grad, extra] = func(x)
    grad_norm = linalg.norm(grad, inf)
    eps = min(1 / 2, sqrt(grad_norm)) * grad_norm

    for i in range(0, max_iter):

        #Start cg
        z = zeros(shape(x))
        g =  grad
        d = -g
        u = hess_vec(x, d, extra)

        for j in range(0,1000):
            gamma = g.transpose().dot(g)/(d.transpose().dot(u))
            z = z + gamma*d
            g1 = g + gamma*u
            b = True
            if linalg.norm(g1,inf)<eps:
                b = False
                break
            else:
                betta = g1.transpose().dot(g1)/(g.transpose().dot(g))
                d = -g1+betta*d
                u = hess_vec(x,d,extra)
                g = g1
        if b:
            print('CG не сошелся')

        #Одномерный линейный поиск
        alpha = line_search_wolfe2(f = f,myfprime = df, xk = x, pk = z, gfk = grad, old_fval = loss, c1 = c1, c2 = c2)
        if (alpha[0] == None):
            alpha = line_search_armijo(f = f,myfprime = df, xk = x, pk = z, gfk = grad, old_fval = loss, c1 = c1, alpha0 = 1)
        x = x + alpha[0]*z

        [loss, grad, extra] = func(x)
        grad_norm = linalg.norm(grad, inf)
        eps = min(1 / 2, sqrt(grad_norm)) * grad_norm

        if (disp):
            print(str(1+i) + ')', loss, grad_norm);
        if (trace):
            hist['f'].append(loss)
            hist['norm_g'].append(grad_norm)
            current_time = time.clock() - start_time
            hist['elaps_t'].append(current_time)

        if grad_norm<tol:
            return x, loss, 0

    return x, loss, 1