def jacobian_inputs(): """Very contrived test case for finite difference formulae with linear function.""" steps_pos = np.array([[0.1, 0.1, 0.1, 0.1], [0.2, 0.2, 0.2, 0.2]]) steps = namedtuple_from_kwargs(pos=steps_pos, neg=-steps_pos) jac1 = (np.arange(1, 13)).reshape(3, 4) jac2 = jac1 * 1.1 evals_pos1 = jac1 @ (np.zeros((4, 4)) + np.eye(4) * 0.1) evals_pos2 = jac2 @ (np.zeros((4, 4)) + np.eye(4) * 0.2) evals_neg1 = jac1 @ (np.zeros((4, 4)) - np.eye(4) * 0.1) evals_neg2 = jac2 @ (np.zeros((4, 4)) - np.eye(4) * 0.2) evals = namedtuple_from_kwargs(pos=np.array([evals_pos1, evals_pos2]), neg=np.array([evals_neg1, evals_neg2])) expected_jac = np.array([jac1, jac2]) f0 = np.zeros(3) out = { "evals": evals, "steps": steps, "f0": f0, "expected_jac": expected_jac } return out
def _reshape_cross_step_evals(raw_evals_cross_step, n_steps, dim_x, f0): """Reshape raw_evals for evaluation points with cross steps. Returned object is a namedtuple with entries 'pos' and 'neg' corresponding to positive and negative steps. Each entry will be a numpy array with dimension (n_steps, dim_f, dim_x, dim_x). Since the array is, by definition, symmetric over the last two dimensions, the function is not evaluated on both sides to save computation time and the information is simply copied here. In comparison to the two_step case, however, this symmetry holds only over the dimension 'pos' and 'neg'. That is, the lower triangular of the last two dimensions of 'pos' must equal the upper triangular of the last two dimensions of 'neg'. Further, the diagonal of the last two dimensions must be equal to f0. Mathematical: evals.pos = (f(x0 + delta_jl e_j - delta_kl e_k)) evals.neg = (f(x0 - delta_jl e_j + delta_kl e_k)) for j,k=1,...,dim_x and l=1,...,n_steps """ tril_idx = np.tril_indices(dim_x, -1) diag_idx = np.diag_indices(dim_x) evals = np.array(raw_evals_cross_step).reshape(2, n_steps, dim_x, dim_x, -1) evals = evals.transpose(0, 1, 4, 2, 3) evals[0][..., tril_idx[0], tril_idx[1]] = evals[1][..., tril_idx[1], tril_idx[0]] evals[0][..., diag_idx[0], diag_idx[1]] = np.atleast_2d(f0).T[np.newaxis, ...] evals = namedtuple_from_kwargs(pos=evals[0], neg=evals[0].swapaxes(2, 3)) return evals
def test_convert_evaluation_data_to_frame(): arr = np.arange(4).reshape(2, 2) arr2 = arr.reshape(2, 1, 2) steps = namedtuple_from_kwargs(pos=arr, neg=-arr) evals = namedtuple_from_kwargs(pos=arr2, neg=-arr2) expected = [ [1, 0, 0, 0, 0, 0], [1, 0, 1, 0, 1, 1], [1, 1, 0, 0, 2, 2], [1, 1, 1, 0, 3, 3], [-1, 0, 0, 0, 0, 0], [-1, 0, 1, 0, 1, -1], [-1, 1, 0, 0, 2, -2], [-1, 1, 1, 0, 3, -3], ] expected = pd.DataFrame( expected, columns=["sign", "step_number", "dim_x", "dim_f", "step", "eval"]) got = _convert_evaluation_data_to_frame(steps, evals) assert_frame_equal(expected, got.reset_index(), check_dtype=False)
def _reshape_one_step_evals(raw_evals_one_step, n_steps, dim_x): """Reshape raw_evals for evaluation points with one step. Returned object is a namedtuple with entries 'pos' and 'neg' corresponding to positive and negative steps. Each entry will be a numpy array with dimension (n_steps, dim_f, dim_x). Mathematical: evals.pos = (f(x0 + delta_jl e_j)) evals.neg = (f(x0 - delta_jl e_j)) for j=1,...,dim_x and l=1,...,n_steps """ evals = np.array(raw_evals_one_step).reshape(2, n_steps, dim_x, -1) evals = evals.swapaxes(2, 3) evals = namedtuple_from_kwargs(pos=evals[0], neg=evals[1]) return evals
def _reshape_two_step_evals(raw_evals_two_step, n_steps, dim_x): """Reshape raw_evals for evaluation points with two steps. Returned object is a namedtuple with entries 'pos' and 'neg' corresponding to positive and negative steps. Each entry will be a numpy array with dimension (n_steps, dim_f, dim_x, dim_x). Since the array is, by definition, symmetric over the last two dimensions, the function is not evaluated on both sides to save computation time and the information is simply copied here. Mathematical: evals.pos = (f(x0 + delta_jl e_j + delta_kl e_k)) evals.neg = (f(x0 - delta_jl e_j - delta_kl e_k)) for j,k=1,...,dim_x and l=1,...,n_steps """ tril_idx = np.tril_indices(dim_x, -1) evals = np.array(raw_evals_two_step).reshape(2, n_steps, dim_x, dim_x, -1) evals = evals.transpose(0, 1, 4, 2, 3) evals[..., tril_idx[0], tril_idx[1]] = evals[..., tril_idx[1], tril_idx[0]] evals = namedtuple_from_kwargs(pos=evals[0], neg=evals[1]) return evals
def generate_steps( x, method, n_steps, target, base_steps, scaling_factor, lower_bounds, upper_bounds, step_ratio, min_steps, ): """Generate steps for finite differences with or without Richardson Extrapolation. steps can be used to construct x-vectors at which the function has to be evaluated for finite difference formulae. How the vectors are constructed from the steps differs between first and second derivative. Note that both positive and negative steps are returned, even for one-sided methods, because bounds might make it necessary to flip the direction of the method. The rule of thumb for the generation of base_steps is: - first_derivative: `np.finfo(float).eps ** (1 / 2) * np.maximum(np.abs(x), 0.1)` - second_derivative: `np.finfo(float).eps ** (1 / 3) * np.maximum(np.abs(x), 0.1)` Where `np.finfo(float).eps` is machine accuracy. This rule of thumb is also used in statsmodels and scipy. The step generation is bound aware and will try to find a good solution if any step would violate a bound. For this, we use the following rules until no bounds are violated: 1. If a one sided method is used, flip to the direction with more distance to the bound. 2. Decrease the base_steps, unless this would mean to go below min_steps. By default min_steps is equal to base_steps, so no squeezing happens unless explicitly requested by setting a smaller min_step. 3. Set the conflicting steps to NaN, which means that this step won't be usable in the calculation of derivatives. All derivative functions can handle NaNs and will produce the best possible derivative estimate given the remaining steps. If all steps of one parameter are set to NaN, no derivative estimate will be produced for that parameter. Args: x (numpy.ndarray): 1d array at which the derivative is calculated. method (str): One of ["central", "forward", "backward"] n_steps (int): Number of steps needed. For central methods, this is the number of steps per direction. It is 1 if no Richardson extrapolation is used. target (str): One of ["first_derivative", "second_derivative"]. This is used to choose the appropriate rule of thumb for the base_steps. base_steps (numpy.ndarray, optional): 1d array of the same length as x. base_steps * scaling_factor is the absolute value of the first (and possibly only) step used in the finite differences approximation of the derivative. If base_steps * scaling_factor conflicts with bounds, the actual steps will be adjusted. If base_steps is not provided, it will be determined according to a rule of thumb as long as this does not conflict with min_steps. scaling_factor (numpy.ndarray or float): Scaling factor which is applied to base_steps. If it is an numpy.ndarray, it needs to have the same shape as x. scaling_factor is useful if you want to increase or decrease the base_step relative to the rule-of-thumb or user provided base_step, for example to benchmark the effect of the step size. lower_bounds (numpy.ndarray): 1d array with lower bounds for each parameter. upper_bounds (numpy.ndarray): 1d array with upper bounds for each parameter. step_ratio (float or array): Ratio between two consecutive Richardson extrapolation steps in the same direction. default 2.0. Has to be larger than one. step ratio is only used if n_steps > 1. min_steps (numpy.ndarray): Minimal possible step sizes that can be chosen to accommodate bounds. Needs to have same length as x. By default min_steps is equal to base_steps, i.e step size is not decreased beyond what is optimal according to the rule of thumb. Returns: steps (namedtuple): Namedtuple with the field names pos and neg. Each field contains a numpy array of shape (n_steps, len(x)) with the steps in the corresponding direction. The steps are always symmetric, in the sense that steps.neg[i, j] = - steps.pos[i, j] unless one of them is NaN. """ if lower_bounds is None: lower_bounds = np.full(x.shape, -np.inf) if upper_bounds is None: upper_bounds = np.full(x.shape, np.inf) base_steps = _calculate_or_validate_base_steps(base_steps, x, target, min_steps, scaling_factor) min_steps = base_steps if min_steps is None else min_steps assert (upper_bounds - lower_bounds >= 2 * min_steps).all(), "min_steps is too large to fit into bounds." upper_step_bounds = upper_bounds - x lower_step_bounds = lower_bounds - x pos = step_ratio**np.arange(n_steps) * base_steps.reshape(-1, 1) neg = -pos.copy() if method in ["forward", "backward"]: pos, neg = _set_unused_side_to_nan(x, pos, neg, method, lower_step_bounds, upper_step_bounds) pos, neg = _rescale_to_accomodate_bounds(base_steps, pos, neg, lower_step_bounds, upper_step_bounds, min_steps) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=RuntimeWarning) pos[pos > upper_step_bounds.reshape(-1, 1)] = np.nan neg[neg < lower_step_bounds.reshape(-1, 1)] = np.nan steps = namedtuple_from_kwargs(pos=pos.T, neg=neg.T) return steps
def first_derivative( func, params, func_kwargs=None, method="central", n_steps=1, base_steps=None, scaling_factor=1, lower_bounds=None, upper_bounds=None, step_ratio=2, min_steps=None, f0=None, n_cores=DEFAULT_N_CORES, error_handling="continue", batch_evaluator="joblib", return_func_value=False, return_info=True, key=None, ): """Evaluate first derivative of func at params according to method and step options. Internally, the function is converted such that it maps from a 1d array to a 1d array. Then the Jacobian of that function is calculated. The resulting derivative estimate is always a :class:`numpy.ndarray`. The parameters and the function output can be pandas objects (Series or DataFrames with value column). In that case the output of first_derivative is also a pandas object and with appropriate index and columns. Detailed description of all options that influence the step size as well as an explanation of how steps are adjusted to bounds in case of a conflict, see :func:`~estimagic.differentiation.generate_steps.generate_steps`. Args: func (callable): Function of which the derivative is calculated. params (numpy.ndarray, pandas.Series or pandas.DataFrame): 1d numpy array or :class:`pandas.DataFrame` with parameters at which the derivative is calculated. If it is a DataFrame, it can contain the columns "lower_bound" and "upper_bound" for bounds. See :ref:`params`. func_kwargs (dict): Additional keyword arguments for func, optional. method (str): One of ["central", "forward", "backward"], default "central". n_steps (int): Number of steps needed. For central methods, this is the number of steps per direction. It is 1 if no Richardson extrapolation is used. base_steps (numpy.ndarray, optional): 1d array of the same length as params. base_steps * scaling_factor is the absolute value of the first (and possibly only) step used in the finite differences approximation of the derivative. If base_steps * scaling_factor conflicts with bounds, the actual steps will be adjusted. If base_steps is not provided, it will be determined according to a rule of thumb as long as this does not conflict with min_steps. scaling_factor (numpy.ndarray or float): Scaling factor which is applied to base_steps. If it is an numpy.ndarray, it needs to be as long as params. scaling_factor is useful if you want to increase or decrease the base_step relative to the rule-of-thumb or user provided base_step, for example to benchmark the effect of the step size. Default 1. lower_bounds (numpy.ndarray): 1d array with lower bounds for each parameter. If params is a DataFrame and has the columns "lower_bound", this will be taken as lower_bounds if now lower_bounds have been provided explicitly. upper_bounds (numpy.ndarray): 1d array with upper bounds for each parameter. If params is a DataFrame and has the columns "upper_bound", this will be taken as upper_bounds if no upper_bounds have been provided explicitly. step_ratio (float, numpy.array): Ratio between two consecutive Richardson extrapolation steps in the same direction. default 2.0. Has to be larger than one. The step ratio is only used if n_steps > 1. min_steps (numpy.ndarray): Minimal possible step sizes that can be chosen to accommodate bounds. Must have same length as params. By default min_steps is equal to base_steps, i.e step size is not decreased beyond what is optimal according to the rule of thumb. f0 (numpy.ndarray): 1d numpy array with func(x), optional. n_cores (int): Number of processes used to parallelize the function evaluations. Default 1. error_handling (str): One of "continue" (catch errors and continue to calculate derivative estimates. In this case, some derivative estimates can be missing but no errors are raised), "raise" (catch errors and continue to calculate derivative estimates at fist but raise an error if all evaluations for one parameter failed) and "raise_strict" (raise an error as soon as a function evaluation fails). batch_evaluator (str or callable): Name of a pre-implemented batch evaluator (currently 'joblib' and 'pathos_mp') or Callable with the same interface as the estimagic batch_evaluators. return_func_value (bool): If True, return function value at params, stored in output dict under "func_value". Default False. This is useful when using first_derivative during optimization. return_info (bool): If True, return additional information on function evaluations and internal derivative candidates, stored in output dict under "func_evals" and "derivative_candidates". Derivative candidates are only returned if n_steps > 1. Default True. key (str): If func returns a dictionary, take the derivative of func(params)[key]. Returns: result (dict): Result dictionary with keys: - "derivative" (numpy.ndarray, pandas.Series or pandas.DataFrame): The estimated first derivative of func at params. The shape of the output depends on the dimension of params and func(params): - f: R -> R leads to shape (1,), usually called derivative - f: R^m -> R leads to shape (m, ), usually called Gradient - f: R -> R^n leads to shape (n, 1), usually called Jacobian - f: R^m -> R^n leads to shape (n, m), usually called Jacobian - "func_value" (numpy.ndarray, pandas.Series or pandas.DataFrame): Function value at params, returned if return_func_value is True. - "func_evals" (pandas.DataFrame): Function evaluations produced by internal derivative method, returned if return_info is True. - "derivative_candidates" (pandas.DataFrame): Derivative candidates from Richardson extrapolation, returned if return_info is True and n_steps > 1. """ lower_bounds, upper_bounds = _process_bounds(lower_bounds, upper_bounds, params) # handle keyword arguments func_kwargs = {} if func_kwargs is None else func_kwargs partialed_func = functools.partial(func, **func_kwargs) # convert params to numpy, but keep label information params_index = (params.index if isinstance(params, (pd.DataFrame, pd.Series)) else None) x = params["value"].to_numpy() if isinstance(params, pd.DataFrame) else params x = np.atleast_1d(x).astype(float) if np.isnan(x).any(): raise ValueError("The parameter vector must not contain NaNs.") # generate the step array steps = generate_steps( x=x, method=method, n_steps=n_steps, target="first_derivative", base_steps=base_steps, scaling_factor=scaling_factor, lower_bounds=lower_bounds, upper_bounds=upper_bounds, step_ratio=step_ratio, min_steps=min_steps, ) # generate parameter vectors at which func has to be evaluated as numpy arrays evaluation_points = [] for step_arr in steps: for i, j in product(range(n_steps), range(len(x))): if np.isnan(step_arr[i, j]): evaluation_points.append(np.nan) else: point = x.copy() point[j] += step_arr[i, j] evaluation_points.append(point) # convert the numpy arrays to whatever is needed by func evaluation_points = _convert_evaluation_points_to_original( evaluation_points, params) # we always evaluate f0, so we can fall back to one-sided derivatives if # two-sided derivatives fail. The extra cost is negligible in most cases. if f0 is None: evaluation_points.append(params) # do the function evaluations, including error handling batch_error_handling = "raise" if error_handling == "raise_strict" else "continue" raw_evals = _nan_skipping_batch_evaluator( func=partialed_func, arguments=evaluation_points, n_cores=n_cores, error_handling=batch_error_handling, batch_evaluator=batch_evaluator, ) # extract information on exceptions that occurred during function evaluations exc_info = "\n\n".join([val for val in raw_evals if isinstance(val, str)]) raw_evals = [ val if not isinstance(val, str) else np.nan for val in raw_evals ] # store full function value at params as func_value and a processed version of it # that we need to calculate derivatives as f0 if f0 is None: f0 = raw_evals[-1] raw_evals = raw_evals[:-1] func_value = f0 f0 = f0[key] if isinstance(f0, dict) else f0 f_was_scalar = np.isscalar(f0) out_index = f0.index if isinstance(f0, pd.Series) else None f0 = np.atleast_1d(f0) # convert the raw evaluations to numpy arrays raw_evals = _convert_evals_to_numpy(raw_evals, key) # apply finite difference formulae evals = np.array(raw_evals).reshape(2, n_steps, len(x), -1) evals = np.transpose(evals, axes=(0, 1, 3, 2)) evals = namedtuple_from_kwargs(pos=evals[0], neg=evals[1]) jac_candidates = {} for m in ["forward", "backward", "central"]: jac_candidates[m] = finite_differences.jacobian(evals, steps, f0, m) # get the best derivative estimate out of all derivative estimates that could be # calculated, given the function evaluations. orders = { "central": ["central", "forward", "backward"], "forward": ["forward", "backward"], "backward": ["backward", "forward"], } if n_steps == 1: jac = _consolidate_one_step_derivatives(jac_candidates, orders[method]) updated_candidates = None else: richardson_candidates = _compute_richardson_candidates( jac_candidates, steps, n_steps) jac, updated_candidates = _consolidate_extrapolated( richardson_candidates) # raise error if necessary if error_handling in ("raise", "raise_strict") and np.isnan(jac).any(): raise Exception(exc_info) # results processing derivative = jac.flatten() if f_was_scalar else jac derivative = _add_index_to_derivative(derivative, params_index, out_index) result = {"derivative": derivative} if return_func_value: result["func_value"] = func_value info = _collect_additional_info(return_info, steps, evals, updated_candidates, target="first_derivative") result = {**result, **info} return result
def hessian(evals, steps, f0, method): """Calculate a Hessian estimate with finite differences according to method. Notation: f:R^dim_x -> R^dim_f. We compute the derivative at x0, with f0 = f(x0). The formulae in Rideout [2009] which are implemented here use three types of function evaluations: 1. f(theta + delta_j e_j) 2. f(theta + delta_j e_j + delta_k e_k) 3. f(theta + delta_j e_j - delta_k e_k) Which are called here: 1. ``evals_one``, 2. ``evals_two`` and 3. ``evals_cross``, corresponding to the idea that we are moving in one direction, in two directions and in two cross directions (opposite signs). Note that theta denotes x0, delta_j the step size for the j-th variable and e_j the j-th standard basis vector. Note also that the brackets in the finite difference formulae are not arbitrary but improve the numerical accuracy, see Rideout [2009]. Args: evals (dict[namedtuple]): Dictionary with keys "one_step" for function evals in a single step direction, "two_step" for evals in two steps in the same direction, and "cross_step" for evals in two steps in the opposite direction. Each dict item has the fields called pos and neg for evaluations with positive and negative steps, respectively. Each field is a numpy array of shape (n_steps, dim_f, dim_x). It contains np.nan for evaluations that failed or were not attempted because a one-sided derivative rule was chosen. steps (namedtuple): Namedtuple with the fields pos and neg. Each field contains a numpy array of shape (n_steps, dim_x) with the steps in the corresponding direction. The steps are always symmetric, in the sense that steps.neg[i, j] = - steps.pos[i, j] unless one of them is NaN. f0 (numpy.ndarray): Numpy array of length dim_f with the output of the function at the user supplied parameters. method (str): One of {"forward", "backward", "central_average", "central_cross"} These correspond to the finite difference approximations defined in equations [7, x, 8, 9] in Rideout [2009], where ("backward", x) is not found in Rideout [2009] but is the natural extension of equation 7 to the backward case. Returns: hess (numpy.ndarray): Numpy array of shape (n_steps, dim_f, dim_x, dim_x) with estimated Hessians. I.e. there are n_step hessian estimates. """ n_steps, dim_f, dim_x = evals["one_step"].pos.shape f0 = f0.reshape(1, dim_f, 1, 1) # rename variables to increase readability in formulas evals_one = namedtuple_from_kwargs( pos=np.expand_dims(evals["one_step"].pos, axis=3), neg=np.expand_dims(evals["one_step"].neg, axis=3), ) evals_two = evals["two_step"] evals_cross = evals["cross_step"] if method == "forward": outer_product_steps = _calculate_outer_product_steps( steps.pos, n_steps, dim_x) diffs = (evals_two.pos - evals_one.pos.swapaxes(2, 3)) - (evals_one.pos - f0) hess = diffs / outer_product_steps elif method == "backward": outer_product_steps = _calculate_outer_product_steps( steps.neg, n_steps, dim_x) diffs = (evals_two.neg - evals_one.neg.swapaxes(2, 3)) - (evals_one.neg - f0) hess = diffs / outer_product_steps elif method == "central_average": outer_product_steps = _calculate_outer_product_steps( steps.pos, n_steps, dim_x) forward = (evals_two.pos - evals_one.pos.swapaxes(2, 3)) - (evals_one.pos - f0) backward = (evals_two.neg - evals_one.neg.swapaxes(2, 3)) - (evals_one.neg - f0) hess = (forward + backward) / (2 * outer_product_steps) elif method == "central_cross": outer_product_steps = _calculate_outer_product_steps( steps.pos, n_steps, dim_x) diffs = (evals_two.pos - evals_cross.pos) - (evals_cross.neg - evals_two.neg) hess = diffs / (4 * outer_product_steps) else: raise ValueError( "Method has to be 'forward', 'backward', 'central_average' or ", "'central_cross'.", ) return hess