def test_fun_and_grad(self): ex = ExScalarFunction() def fg_allclose(x, y): assert_allclose(x[0], y[0]) assert_allclose(x[1], y[1]) # with analytic gradient x0 = [2.0, 0.3] analit = ScalarFunction(ex.fun, x0, (), ex.grad, ex.hess, None, (-np.inf, np.inf)) fg = ex.fun(x0), ex.grad(x0) fg_allclose(analit.fun_and_grad(x0), fg) assert (analit.ngev == 1) x0[1] = 1. fg = ex.fun(x0), ex.grad(x0) fg_allclose(analit.fun_and_grad(x0), fg) # with finite difference gradient x0 = [2.0, 0.3] sf = ScalarFunction(ex.fun, x0, (), '3-point', ex.hess, None, (-np.inf, np.inf)) assert (sf.ngev == 1) fg = ex.fun(x0), ex.grad(x0) fg_allclose(sf.fun_and_grad(x0), fg) assert (sf.ngev == 1) x0[1] = 1. fg = ex.fun(x0), ex.grad(x0) fg_allclose(sf.fun_and_grad(x0), fg)
def test_x_storage_overlap(self): # Scalar_Function should not store references to arrays, it should # store copies - this checks that updating an array in-place causes # Scalar_Function.x to be updated. def f(x): return np.sum(np.asarray(x)**2) x = np.array([1., 2., 3.]) sf = ScalarFunction(f, x, (), '3-point', lambda x: x, None, (-np.inf, np.inf)) assert x is not sf.x assert_equal(sf.fun(x), 14.0) assert x is not sf.x x[0] = 0. f1 = sf.fun(x) assert_equal(f1, 13.0) x[0] = 1 f2 = sf.fun(x) assert_equal(f2, 14.0) assert x is not sf.x # now test with a HessianUpdate strategy specified hess = BFGS() x = np.array([1., 2., 3.]) sf = ScalarFunction(f, x, (), '3-point', hess, None, (-np.inf, np.inf)) assert x is not sf.x assert_equal(sf.fun(x), 14.0) assert x is not sf.x x[0] = 0. f1 = sf.fun(x) assert_equal(f1, 13.0) x[0] = 1 f2 = sf.fun(x) assert_equal(f2, 14.0) assert x is not sf.x
def test_finite_difference_grad(self): ex = ExScalarFunction() nfev = 0 ngev = 0 x0 = [1.0, 0.0] analit = ScalarFunction(ex.fun, x0, (), ex.grad, ex.hess, None, (-np.inf, np.inf)) nfev += 1 ngev += 1 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev, nfev) approx = ScalarFunction(ex.fun, x0, (), '2-point', ex.hess, None, (-np.inf, np.inf)) nfev += 3 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_equal(analit.f, approx.f) assert_array_almost_equal(analit.g, approx.g) x = [10, 0.3] f_analit = analit.fun(x) g_analit = analit.grad(x) nfev += 1 ngev += 1 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) f_approx = approx.fun(x) g_approx = approx.grad(x) nfev += 3 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_almost_equal(f_analit, f_approx) assert_array_almost_equal(g_analit, g_approx) x = [2.0, 1.0] g_analit = analit.grad(x) ngev += 1 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) g_approx = approx.grad(x) nfev += 3 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_almost_equal(g_analit, g_approx) x = [2.5, 0.3] f_analit = analit.fun(x) g_analit = analit.grad(x) nfev += 1 ngev += 1 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) f_approx = approx.fun(x) g_approx = approx.grad(x) nfev += 3 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_almost_equal(f_analit, f_approx) assert_array_almost_equal(g_analit, g_approx) x = [2, 0.3] f_analit = analit.fun(x) g_analit = analit.grad(x) nfev += 1 ngev += 1 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) f_approx = approx.fun(x) g_approx = approx.grad(x) nfev += 3 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_almost_equal(f_analit, f_approx) assert_array_almost_equal(g_analit, g_approx)
def test_finite_difference_hess_linear_operator(self): ex = ExScalarFunction() nfev = 0 ngev = 0 nhev = 0 x0 = [1.0, 0.0] analit = ScalarFunction(ex.fun, x0, (), ex.grad, ex.hess, None, (-np.inf, np.inf)) nfev += 1 ngev += 1 nhev += 1 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev, ngev) assert_array_equal(ex.nhev, nhev) assert_array_equal(analit.nhev, nhev) approx = ScalarFunction(ex.fun, x0, (), ex.grad, '2-point', None, (-np.inf, np.inf)) assert_(isinstance(approx.H, LinearOperator)) for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]): assert_array_equal(analit.f, approx.f) assert_array_almost_equal(analit.g, approx.g) assert_array_almost_equal(analit.H.dot(v), approx.H.dot(v)) nfev += 1 ngev += 4 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_equal(ex.nhev, nhev) assert_array_equal(analit.nhev+approx.nhev, nhev) x = [2.0, 1.0] H_analit = analit.hess(x) nhev += 1 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_equal(ex.nhev, nhev) assert_array_equal(analit.nhev+approx.nhev, nhev) H_approx = approx.hess(x) assert_(isinstance(H_approx, LinearOperator)) for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]): assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v)) ngev += 4 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_equal(ex.nhev, nhev) assert_array_equal(analit.nhev+approx.nhev, nhev) x = [2.1, 1.2] H_analit = analit.hess(x) nhev += 1 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_equal(ex.nhev, nhev) assert_array_equal(analit.nhev+approx.nhev, nhev) H_approx = approx.hess(x) assert_(isinstance(H_approx, LinearOperator)) for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]): assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v)) ngev += 4 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_equal(ex.nhev, nhev) assert_array_equal(analit.nhev+approx.nhev, nhev) x = [2.5, 0.3] _ = analit.grad(x) H_analit = analit.hess(x) ngev += 1 nhev += 1 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_equal(ex.nhev, nhev) assert_array_equal(analit.nhev+approx.nhev, nhev) _ = approx.grad(x) H_approx = approx.hess(x) assert_(isinstance(H_approx, LinearOperator)) for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]): assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v)) ngev += 4 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_equal(ex.nhev, nhev) assert_array_equal(analit.nhev+approx.nhev, nhev) x = [5.2, 2.3] _ = analit.grad(x) H_analit = analit.hess(x) ngev += 1 nhev += 1 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_equal(ex.nhev, nhev) assert_array_equal(analit.nhev+approx.nhev, nhev) _ = approx.grad(x) H_approx = approx.hess(x) assert_(isinstance(H_approx, LinearOperator)) for v in ([1.0, 2.0], [3.0, 4.0], [5.0, 2.0]): assert_array_almost_equal(H_analit.dot(v), H_approx.dot(v)) ngev += 4 assert_array_equal(ex.nfev, nfev) assert_array_equal(analit.nfev+approx.nfev, nfev) assert_array_equal(ex.ngev, ngev) assert_array_equal(analit.ngev+approx.ngev, ngev) assert_array_equal(ex.nhev, nhev) assert_array_equal(analit.nhev+approx.nhev, nhev)
def _prepare_scalar_function(fun, x0, jac=None, args=(), bounds=None, epsilon=None, finite_diff_rel_step=None, hess=None): """ Creates a ScalarFunction object for use with scalar minimizers (BFGS/LBFGSB/SLSQP/TNC/CG/etc). Parameters ---------- fun : callable The objective function to be minimized. ``fun(x, *args) -> float`` where ``x`` is an 1-D array with shape (n,) and ``args`` is a tuple of the fixed parameters needed to completely specify the function. x0 : ndarray, shape (n,) Initial guess. Array of real elements of size (n,), where 'n' is the number of independent variables. jac : {callable, '2-point', '3-point', 'cs', None}, optional Method for computing the gradient vector. If it is a callable, it should be a function that returns the gradient vector: ``jac(x, *args) -> array_like, shape (n,)`` If one of `{'2-point', '3-point', 'cs'}` is selected then the gradient is calculated with a relative step for finite differences. If `None`, then two-point finite differences with an absolute step is used. args : tuple, optional Extra arguments passed to the objective function and its derivatives (`fun`, `jac` functions). bounds : sequence, optional Bounds on variables. 'new-style' bounds are required. eps : float or ndarray If `jac is None` the absolute step size used for numerical approximation of the jacobian via forward differences. finite_diff_rel_step : None or array_like, optional If `jac in ['2-point', '3-point', 'cs']` the relative step size to use for numerical approximation of the jacobian. The absolute step size is computed as ``h = rel_step * sign(x0) * max(1, abs(x0))``, possibly adjusted to fit into the bounds. For ``method='3-point'`` the sign of `h` is ignored. If None (default) then step is selected automatically. hess : {callable, '2-point', '3-point', 'cs', None} Computes the Hessian matrix. If it is callable, it should return the Hessian matrix: ``hess(x, *args) -> {LinearOperator, spmatrix, array}, (n, n)`` Alternatively, the keywords {'2-point', '3-point', 'cs'} select a finite difference scheme for numerical estimation. Whenever the gradient is estimated via finite-differences, the Hessian cannot be estimated with options {'2-point', '3-point', 'cs'} and needs to be estimated using one of the quasi-Newton strategies. Returns ------- sf : ScalarFunction """ if callable(jac): grad = jac # elif jac in FD_METHODS: # # epsilon is set to None so that ScalarFunction is made to use # # rel_step # epsilon = None # grad = jac else: # default (jac is None) is to do 2-point finite differences with # absolute step size. ScalarFunction has to be provided an # epsilon value that is not None to use absolute steps. This is # normally the case from most _minimize* methods. grad = '2-point' epsilon = epsilon if hess is None: # ScalarFunction requires something for hess, so we give a dummy # implementation here if nothing is provided, return a value of None # so that downstream minimisers halt. The results of `fun.hess` # should not be used. def hess(x, *args): return None if bounds is None: bounds = (-np.inf, np.inf) # ScalarFunction caches. Reuse of fun(x) during grad # calculation reduces overall function evaluations. sf = ScalarFunction(fun, x0, args, grad, hess, finite_diff_rel_step, bounds, epsilon=epsilon) return sf
def test_lowest_x(self): # ScalarFunction should remember the lowest func(x) visited. x0 = np.array([2, 3, 4]) sf = ScalarFunction(rosen, x0, (), rosen_der, rosen_hess, None, None) sf.fun([1, 1, 1]) sf.fun(x0) sf.fun([1.01, 1, 1.0]) sf.grad([1.01, 1, 1.0]) assert_equal(sf._lowest_f, 0.0) assert_equal(sf._lowest_x, [1.0, 1.0, 1.0]) sf = ScalarFunction(rosen, x0, (), '2-point', rosen_hess, None, (-np.inf, np.inf)) sf.fun([1, 1, 1]) sf.fun(x0) sf.fun([1.01, 1, 1.0]) sf.grad([1.01, 1, 1.0]) assert_equal(sf._lowest_f, 0.0) assert_equal(sf._lowest_x, [1.0, 1.0, 1.0])