def test_fun_and_grad(self):
        ex = ExScalarFunction()

        def fg_allclose(x, y):
            assert_allclose(x[0], y[0])
            assert_allclose(x[1], y[1])

        # with analytic gradient
        x0 = [2.0, 0.3]
        analit = ScalarFunction(ex.fun, x0, (), ex.grad, ex.hess, None,
                                (-np.inf, np.inf))

        fg = ex.fun(x0), ex.grad(x0)
        fg_allclose(analit.fun_and_grad(x0), fg)
        assert (analit.ngev == 1)

        x0[1] = 1.
        fg = ex.fun(x0), ex.grad(x0)
        fg_allclose(analit.fun_and_grad(x0), fg)

        # with finite difference gradient
        x0 = [2.0, 0.3]
        sf = ScalarFunction(ex.fun, x0, (), '3-point', ex.hess, None,
                            (-np.inf, np.inf))
        assert (sf.ngev == 1)
        fg = ex.fun(x0), ex.grad(x0)
        fg_allclose(sf.fun_and_grad(x0), fg)
        assert (sf.ngev == 1)

        x0[1] = 1.
        fg = ex.fun(x0), ex.grad(x0)
        fg_allclose(sf.fun_and_grad(x0), fg)
    def test_x_storage_overlap(self):
        # Scalar_Function should not store references to arrays, it should
        # store copies - this checks that updating an array in-place causes
        # Scalar_Function.x to be updated.

        def f(x):
            return np.sum(np.asarray(x)**2)

        x = np.array([1., 2., 3.])
        sf = ScalarFunction(f, x, (), '3-point', lambda x: x, None,
                            (-np.inf, np.inf))

        assert x is not sf.x
        assert_equal(sf.fun(x), 14.0)
        assert x is not sf.x

        x[0] = 0.
        f1 = sf.fun(x)
        assert_equal(f1, 13.0)

        x[0] = 1
        f2 = sf.fun(x)
        assert_equal(f2, 14.0)
        assert x is not sf.x

        # now test with a HessianUpdate strategy specified
        hess = BFGS()
        x = np.array([1., 2., 3.])
        sf = ScalarFunction(f, x, (), '3-point', hess, None, (-np.inf, np.inf))

        assert x is not sf.x
        assert_equal(sf.fun(x), 14.0)
        assert x is not sf.x

        x[0] = 0.
        f1 = sf.fun(x)
        assert_equal(f1, 13.0)

        x[0] = 1
        f2 = sf.fun(x)
        assert_equal(f2, 14.0)
        assert x is not sf.x
    def test_finite_difference_grad(self):
        ex = ExScalarFunction()
        nfev = 0
        ngev = 0

        x0 = [1.0, 0.0]
        analit = ScalarFunction(ex.fun, x0, (), ex.grad,
                                ex.hess, None, (-np.inf, np.inf))
        nfev += 1
        ngev += 1
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev, nfev)
        approx = ScalarFunction(ex.fun, x0, (), '2-point',
                                ex.hess, None, (-np.inf, np.inf))
        nfev += 3
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_equal(analit.f, approx.f)
        assert_array_almost_equal(analit.g, approx.g)

        x = [10, 0.3]
        f_analit = analit.fun(x)
        g_analit = analit.grad(x)
        nfev += 1
        ngev += 1
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        f_approx = approx.fun(x)
        g_approx = approx.grad(x)
        nfev += 3
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_almost_equal(f_analit, f_approx)
        assert_array_almost_equal(g_analit, g_approx)

        x = [2.0, 1.0]
        g_analit = analit.grad(x)
        ngev += 1
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)

        g_approx = approx.grad(x)
        nfev += 3
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_almost_equal(g_analit, g_approx)

        x = [2.5, 0.3]
        f_analit = analit.fun(x)
        g_analit = analit.grad(x)
        nfev += 1
        ngev += 1
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        f_approx = approx.fun(x)
        g_approx = approx.grad(x)
        nfev += 3
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_almost_equal(f_analit, f_approx)
        assert_array_almost_equal(g_analit, g_approx)

        x = [2, 0.3]
        f_analit = analit.fun(x)
        g_analit = analit.grad(x)
        nfev += 1
        ngev += 1
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        f_approx = approx.fun(x)
        g_approx = approx.grad(x)
        nfev += 3
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_almost_equal(f_analit, f_approx)
        assert_array_almost_equal(g_analit, g_approx)
    def test_finite_difference_hess_linear_operator(self):
        ex = ExScalarFunction()
        nfev = 0
        ngev = 0
        nhev = 0

        x0 = [1.0, 0.0]
        analit = ScalarFunction(ex.fun, x0, (), ex.grad,
                                ex.hess, None, (-np.inf, np.inf))
        nfev += 1
        ngev += 1
        nhev += 1
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev, ngev)
        assert_array_equal(ex.nhev, nhev)
        assert_array_equal(analit.nhev, nhev)
        approx = ScalarFunction(ex.fun, x0, (), ex.grad,
                                '2-point', None, (-np.inf, np.inf))
        assert_(isinstance(approx.H, LinearOperator))
        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
            assert_array_equal(analit.f, approx.f)
            assert_array_almost_equal(analit.g, approx.g)
            assert_array_almost_equal(analit.H.dot(v), approx.H.dot(v))
        nfev += 1
        ngev += 4
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_equal(ex.nhev, nhev)
        assert_array_equal(analit.nhev+approx.nhev, nhev)

        x = [2.0, 1.0]
        H_analit = analit.hess(x)
        nhev += 1
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_equal(ex.nhev, nhev)
        assert_array_equal(analit.nhev+approx.nhev, nhev)
        H_approx = approx.hess(x)
        assert_(isinstance(H_approx, LinearOperator))
        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
            assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v))
        ngev += 4
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_equal(ex.nhev, nhev)
        assert_array_equal(analit.nhev+approx.nhev, nhev)

        x = [2.1, 1.2]
        H_analit = analit.hess(x)
        nhev += 1
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_equal(ex.nhev, nhev)
        assert_array_equal(analit.nhev+approx.nhev, nhev)
        H_approx = approx.hess(x)
        assert_(isinstance(H_approx, LinearOperator))
        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
            assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v))
        ngev += 4
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_equal(ex.nhev, nhev)
        assert_array_equal(analit.nhev+approx.nhev, nhev)

        x = [2.5, 0.3]
        _ = analit.grad(x)
        H_analit = analit.hess(x)
        ngev += 1
        nhev += 1
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_equal(ex.nhev, nhev)
        assert_array_equal(analit.nhev+approx.nhev, nhev)
        _ = approx.grad(x)
        H_approx = approx.hess(x)
        assert_(isinstance(H_approx, LinearOperator))
        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
            assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v))
        ngev += 4
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_equal(ex.nhev, nhev)
        assert_array_equal(analit.nhev+approx.nhev, nhev)

        x = [5.2, 2.3]
        _ = analit.grad(x)
        H_analit = analit.hess(x)
        ngev += 1
        nhev += 1
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_equal(ex.nhev, nhev)
        assert_array_equal(analit.nhev+approx.nhev, nhev)
        _ = approx.grad(x)
        H_approx = approx.hess(x)
        assert_(isinstance(H_approx, LinearOperator))
        for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]):
            assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v))
        ngev += 4
        assert_array_equal(ex.nfev, nfev)
        assert_array_equal(analit.nfev+approx.nfev, nfev)
        assert_array_equal(ex.ngev, ngev)
        assert_array_equal(analit.ngev+approx.ngev, ngev)
        assert_array_equal(ex.nhev, nhev)
        assert_array_equal(analit.nhev+approx.nhev, nhev)
Exemple #5
0
def _prepare_scalar_function(fun,
                             x0,
                             jac=None,
                             args=(),
                             bounds=None,
                             epsilon=None,
                             finite_diff_rel_step=None,
                             hess=None):
    """
    Creates a ScalarFunction object for use with scalar minimizers
    (BFGS/LBFGSB/SLSQP/TNC/CG/etc).
    Parameters
    ----------
    fun : callable
        The objective function to be minimized.
            ``fun(x, *args) -> float``
        where ``x`` is an 1-D array with shape (n,) and ``args``
        is a tuple of the fixed parameters needed to completely
        specify the function.
    x0 : ndarray, shape (n,)
        Initial guess. Array of real elements of size (n,),
        where 'n' is the number of independent variables.
    jac : {callable,  '2-point', '3-point', 'cs', None}, optional
        Method for computing the gradient vector. If it is a callable, it
        should be a function that returns the gradient vector:
            ``jac(x, *args) -> array_like, shape (n,)``
        If one of `{'2-point', '3-point', 'cs'}` is selected then the gradient
        is calculated with a relative step for finite differences. If `None`,
        then two-point finite differences with an absolute step is used.
    args : tuple, optional
        Extra arguments passed to the objective function and its
        derivatives (`fun`, `jac` functions).
    bounds : sequence, optional
        Bounds on variables. 'new-style' bounds are required.
    eps : float or ndarray
        If `jac is None` the absolute step size used for numerical
        approximation of the jacobian via forward differences.
    finite_diff_rel_step : None or array_like, optional
        If `jac in ['2-point', '3-point', 'cs']` the relative step size to
        use for numerical approximation of the jacobian. The absolute step
        size is computed as ``h = rel_step * sign(x0) * max(1, abs(x0))``,
        possibly adjusted to fit into the bounds. For ``method='3-point'``
        the sign of `h` is ignored. If None (default) then step is selected
        automatically.
    hess : {callable,  '2-point', '3-point', 'cs', None}
        Computes the Hessian matrix. If it is callable, it should return the
        Hessian matrix:
            ``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)``
        Alternatively, the keywords {'2-point', '3-point', 'cs'} select a
        finite difference scheme for numerical estimation.
        Whenever the gradient is estimated via finite-differences, the Hessian
        cannot be estimated with options {'2-point', '3-point', 'cs'} and needs
        to be estimated using one of the quasi-Newton strategies.
    Returns
    -------
    sf : ScalarFunction
    """
    if callable(jac):
        grad = jac


#    elif jac in FD_METHODS:
#        # epsilon is set to None so that ScalarFunction is made to use
#        # rel_step
#        epsilon = None
#        grad = jac
    else:
        # default (jac is None) is to do 2-point finite differences with
        # absolute step size. ScalarFunction has to be provided an
        # epsilon value that is not None to use absolute steps. This is
        # normally the case from most _minimize* methods.
        grad = '2-point'
        epsilon = epsilon

    if hess is None:
        # ScalarFunction requires something for hess, so we give a dummy
        # implementation here if nothing is provided, return a value of None
        # so that downstream minimisers halt. The results of `fun.hess`
        # should not be used.
        def hess(x, *args):
            return None

    if bounds is None:
        bounds = (-np.inf, np.inf)

    # ScalarFunction caches. Reuse of fun(x) during grad
    # calculation reduces overall function evaluations.
    sf = ScalarFunction(fun,
                        x0,
                        args,
                        grad,
                        hess,
                        finite_diff_rel_step,
                        bounds,
                        epsilon=epsilon)

    return sf
    def test_lowest_x(self):
        # ScalarFunction should remember the lowest func(x) visited.
        x0 = np.array([2, 3, 4])
        sf = ScalarFunction(rosen, x0, (), rosen_der, rosen_hess,
                            None, None)
        sf.fun([1, 1, 1])
        sf.fun(x0)
        sf.fun([1.01, 1, 1.0])
        sf.grad([1.01, 1, 1.0])
        assert_equal(sf._lowest_f, 0.0)
        assert_equal(sf._lowest_x, [1.0, 1.0, 1.0])

        sf = ScalarFunction(rosen, x0, (), '2-point', rosen_hess,
                            None, (-np.inf, np.inf))
        sf.fun([1, 1, 1])
        sf.fun(x0)
        sf.fun([1.01, 1, 1.0])
        sf.grad([1.01, 1, 1.0])
        assert_equal(sf._lowest_f, 0.0)
        assert_equal(sf._lowest_x, [1.0, 1.0, 1.0])