def test_generate_steps_min_step_equals_base_step():
    calculated_steps = generate_steps(
        x=np.arange(3),
        method="central",
        n_steps=2,
        target="first_derivative",
        base_steps=np.array([0.1, 0.2, 0.3]),
        lower_bounds=np.full(3, -np.inf),
        upper_bounds=np.full(3, 2.5),
        step_ratio=2.0,
        min_steps=None,
        scaling_factor=1.0,
    )

    expected_pos = np.array([[0.1, 0.2], [0.2, 0.4], [0.3, np.nan]]).T
    expected_neg = np.array([[-0.1, -0.2], [-0.2, -0.4], [-0.3, -0.6]]).T
    aaae(calculated_steps.pos, expected_pos)
    aaae(calculated_steps.neg, expected_neg)
Beispiel #2
0
def second_derivative(
    func,
    params,
    func_kwargs=None,
    method="central_cross",
    n_steps=1,
    base_steps=None,
    scaling_factor=1,
    lower_bounds=None,
    upper_bounds=None,
    step_ratio=2,
    min_steps=None,
    f0=None,
    n_cores=DEFAULT_N_CORES,
    error_handling="continue",
    batch_evaluator="joblib",
    return_func_value=False,
    return_info=True,
    key=None,
):
    """Evaluate second derivative of func at params according to method and step options

    Internally, the function is converted such that it maps from a 1d array to a 1d
    array. Then the Hessians of that function are calculated. The resulting derivative
    estimate is always a :class:`numpy.ndarray`.

    The parameters and the function output can be pandas objects (Series or DataFrames
    with value column). In that case the output of second_derivative is also a pandas
    object and with appropriate index and columns.

    Detailed description of all options that influence the step size as well as an
    explanation of how steps are adjusted to bounds in case of a conflict,
    see :func:`~estimagic.differentiation.generate_steps.generate_steps`.

    Args:
        func (callable): Function of which the derivative is calculated.
        params (numpy.ndarray, pandas.Series or pandas.DataFrame): 1d numpy array or
            :class:`pandas.DataFrame` with parameters at which the derivative is
            calculated. If it is a DataFrame, it can contain the columns "lower_bound"
            and "upper_bound" for bounds. See :ref:`params`.
        func_kwargs (dict): Additional keyword arguments for func, optional.
        method (str): One of {"forward", "backward", "central_average", "central_cross"}
            These correspond to the finite difference approximations defined in
            equations [7, x, 8, 9] in Rideout [2009], where ("backward", x) is not found
            in Rideout [2009] but is the natural extension of equation 7 to the backward
            case. Default "central_cross".
        n_steps (int): Number of steps needed. For central methods, this is
            the number of steps per direction. It is 1 if no Richardson extrapolation
            is used.
        base_steps (numpy.ndarray, optional): 1d array of the same length as params.
            base_steps * scaling_factor is the absolute value of the first (and possibly
            only) step used in the finite differences approximation of the derivative.
            If base_steps * scaling_factor conflicts with bounds, the actual steps will
            be adjusted. If base_steps is not provided, it will be determined according
            to a rule of thumb as long as this does not conflict with min_steps.
        scaling_factor (numpy.ndarray or float): Scaling factor which is applied to
            base_steps. If it is an numpy.ndarray, it needs to be as long as params.
            scaling_factor is useful if you want to increase or decrease the base_step
            relative to the rule-of-thumb or user provided base_step, for example to
            benchmark the effect of the step size. Default 1.
        lower_bounds (numpy.ndarray): 1d array with lower bounds for each parameter. If
            params is a DataFrame and has the columns "lower_bound", this will be taken
            as lower_bounds if now lower_bounds have been provided explicitly.
        upper_bounds (numpy.ndarray): 1d array with upper bounds for each parameter. If
            params is a DataFrame and has the columns "upper_bound", this will be taken
            as upper_bounds if no upper_bounds have been provided explicitly.
        step_ratio (float, numpy.array): Ratio between two consecutive Richardson
            extrapolation steps in the same direction. default 2.0. Has to be larger
            than one. The step ratio is only used if n_steps > 1.
        min_steps (numpy.ndarray): Minimal possible step sizes that can be chosen to
            accommodate bounds. Must have same length as params. By default min_steps is
            equal to base_steps, i.e step size is not decreased beyond what is optimal
            according to the rule of thumb.
        f0 (numpy.ndarray): 1d numpy array with func(x), optional.
        n_cores (int): Number of processes used to parallelize the function
            evaluations. Default 1.
        error_handling (str): One of "continue" (catch errors and continue to calculate
            derivative estimates. In this case, some derivative estimates can be
            missing but no errors are raised), "raise" (catch errors and continue
            to calculate derivative estimates at fist but raise an error if all
            evaluations for one parameter failed) and "raise_strict" (raise an error
            as soon as a function evaluation fails).
        batch_evaluator (str or callable): Name of a pre-implemented batch evaluator
            (currently 'joblib' and 'pathos_mp') or Callable with the same interface
            as the estimagic batch_evaluators.
        return_func_value (bool): If True, return function value at params, stored in
            output dict under "func_value". Default False. This is useful when using
            first_derivative during optimization.
        return_info (bool): If True, return additional information on function
            evaluations and internal derivative candidates, stored in output dict under
            "func_evals" and "derivative_candidates". Derivative candidates are only
            returned if n_steps > 1. Default True.
        key (str): If func returns a dictionary, take the derivative of
            func(params)[key].

    Returns:
        result (dict): Result dictionary with keys:
            - "derivative" (numpy.ndarray, pandas.Series or pandas.DataFrame): The
                estimated second derivative of func at params. The shape of the output
                depends on the dimension of params and func(params):

                - f: R -> R leads to shape (1,), usually called second derivative
                - f: R^m -> R leads to shape (m, m), usually called Hessian
                - f: R -> R^n leads to shape (n,), usually called Hessian
                - f: R^m -> R^n leads to shape (n, m, m), usually called Hessian tensor

            - "func_value" (numpy.ndarray, pandas.Series or pandas.DataFrame): Function
                value at params, returned if return_func_value is True.

            - "func_evals_one_step" (pandas.DataFrame): Function evaluations produced by
                internal derivative method when altering the params vector at one
                dimension, returned if return_info is True.

            - "func_evals_two_step" (pandas.DataFrame): This features is not implemented
                yet and therefore set to None. Once implemented it will contain
                function evaluations produced by internal derivative method when
                altering the params vector at two dimensions, returned if return_info is
                True.

            - "func_evals_cross_step" (pandas.DataFrame): This features is not
                implemented yet and therefore set to None. Once implemented it will
                contain function evaluations produced by internal derivative method when
                altering the params vector at two dimensions in different directions,
                returned if return_info is True.

    """
    lower_bounds, upper_bounds = _process_bounds(lower_bounds, upper_bounds,
                                                 params)

    # handle keyword arguments
    func_kwargs = {} if func_kwargs is None else func_kwargs
    partialed_func = functools.partial(func, **func_kwargs)

    # convert params to numpy, but keep label information
    params_index = (params.index if isinstance(params, (pd.DataFrame,
                                                        pd.Series)) else None)

    x = params["value"].to_numpy() if isinstance(params,
                                                 pd.DataFrame) else params
    x = np.atleast_1d(x).astype(float)

    if np.isnan(x).any():
        raise ValueError("The parameter vector must not contain NaNs.")

    implemented_methods = {
        "forward", "backward", "central_average", "central_cross"
    }
    if method not in implemented_methods:
        raise ValueError(f"Method has to be in {implemented_methods}.")

    # generate the step array
    steps = generate_steps(
        x=x,
        method=("central" if "central" in method else method),
        n_steps=n_steps,
        target="second_derivative",
        base_steps=base_steps,
        scaling_factor=scaling_factor,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        step_ratio=step_ratio,
        min_steps=min_steps,
    )

    # generate parameter vectors at which func has to be evaluated as numpy arrays
    evaluation_points = {"one_step": [], "two_step": [], "cross_step": []}
    for step_arr in steps:
        # single direction steps
        for i, j in product(range(n_steps), range(len(x))):
            if np.isnan(step_arr[i, j]):
                evaluation_points["one_step"].append(np.nan)
            else:
                point = x.copy()
                point[j] += step_arr[i, j]
                evaluation_points["one_step"].append(point)
        # two and cross direction steps
        for i, j, k in product(range(n_steps), range(len(x)), range(len(x))):
            if j > k or np.isnan(step_arr[i, j]) or np.isnan(step_arr[i, k]):
                evaluation_points["two_step"].append(np.nan)
                evaluation_points["cross_step"].append(np.nan)
            else:
                point = x.copy()
                point[j] += step_arr[i, j]
                point[k] += step_arr[i, k]
                evaluation_points["two_step"].append(point)
                if j == k:
                    evaluation_points["cross_step"].append(np.nan)
                else:
                    point = x.copy()
                    point[j] += step_arr[i, j]
                    point[k] -= step_arr[i, k]
                    evaluation_points["cross_step"].append(point)

    # convert the numpy arrays to whatever is needed by func
    evaluation_points = {
        step_type: _convert_evaluation_points_to_original(points, params)
        for step_type, points in evaluation_points.items()
    }

    # we always evaluate f0, so we can fall back to one-sided derivatives if
    # two-sided derivatives fail. The extra cost is negligible in most cases.
    if f0 is None:
        evaluation_points["one_step"].append(params)

    # do the function evaluations for one and two step, including error handling
    batch_error_handling = "raise" if error_handling == "raise_strict" else "continue"
    raw_evals = _nan_skipping_batch_evaluator(
        func=partialed_func,
        arguments=list(
            itertools.chain.from_iterable(evaluation_points.values())),
        n_cores=n_cores,
        error_handling=batch_error_handling,
        batch_evaluator=batch_evaluator,
    )

    # extract information on exceptions that occurred during function evaluations
    exc_info = "\n\n".join([val for val in raw_evals if isinstance(val, str)])
    raw_evals = [
        val if not isinstance(val, str) else np.nan for val in raw_evals
    ]

    n_one_step, n_two_step, n_cross_step = map(len, evaluation_points.values())
    raw_evals = {
        "one_step": raw_evals[:n_one_step],
        "two_step": raw_evals[n_one_step:n_two_step + n_one_step],
        "cross_step": raw_evals[n_two_step + n_one_step:],
    }

    # store full function value at params as func_value and a processed version of it
    # that we need to calculate derivatives as f0
    if f0 is None:
        f0 = raw_evals["one_step"][-1]
        raw_evals["one_step"] = raw_evals["one_step"][:-1]
    f0 = f0[key] if isinstance(f0, dict) else f0
    out_index = f0.index if isinstance(f0, pd.Series) else None
    f0 = np.atleast_1d(f0)

    # convert the raw evaluations to numpy arrays
    raw_evals = {
        step_type: _convert_evals_to_numpy(evals, key)
        for step_type, evals in raw_evals.items()
    }

    # reshape arrays into dimension (n_steps, dim_f, dim_x) or (n_steps, dim_f, dim_x,
    # dim_x) for finite differences
    evals = {}
    evals["one_step"] = _reshape_one_step_evals(raw_evals["one_step"], n_steps,
                                                len(x))
    evals["two_step"] = _reshape_two_step_evals(raw_evals["two_step"], n_steps,
                                                len(x))
    evals["cross_step"] = _reshape_cross_step_evals(raw_evals["cross_step"],
                                                    n_steps, len(x), f0)

    # apply finite difference formulae
    hess_candidates = {}
    for m in ["forward", "backward", "central_average", "central_cross"]:
        hess_candidates[m] = finite_differences.hessian(evals, steps, f0, m)

    # get the best derivative estimate out of all derivative estimates that could be
    # calculated, given the function evaluations.
    orders = {
        "central_cross":
        ["central_cross", "central_average", "forward", "backward"],
        "central_average":
        ["central_average", "central_cross", "forward", "backward"],
        "forward": ["forward", "backward", "central_average", "central_cross"],
        "backward":
        ["backward", "forward", "central_average", "central_cross"],
    }

    if n_steps == 1:
        hess = _consolidate_one_step_derivatives(hess_candidates,
                                                 orders[method])
        updated_candidates = None
    else:
        raise ValueError(
            "Richardson extrapolation is not implemented for the second derivative yet."
        )

    # raise error if necessary
    if error_handling in ("raise", "raise_strict") and np.isnan(hess).any():
        raise Exception(exc_info)

    # results processing
    derivative = np.squeeze(hess)
    derivative = _add_index_to_second_derivative(derivative, params_index,
                                                 out_index)

    result = {"derivative": derivative}
    if return_func_value:
        result["func_value"] = f0
    info = _collect_additional_info(return_info,
                                    steps,
                                    evals,
                                    updated_candidates,
                                    target="second_derivative")
    result = {**result, **info}
    return result
Beispiel #3
0
def first_derivative(
    func,
    params,
    *,
    func_kwargs=None,
    method="central",
    n_steps=1,
    base_steps=None,
    scaling_factor=1,
    lower_bounds=None,
    upper_bounds=None,
    step_ratio=2,
    min_steps=None,
    f0=None,
    n_cores=DEFAULT_N_CORES,
    error_handling="continue",
    batch_evaluator="joblib",
    return_func_value=False,
    return_info=False,
    key=None,
):
    """Evaluate first derivative of func at params according to method and step options.

    Internally, the function is converted such that it maps from a 1d array to a 1d
    array. Then the Jacobian of that function is calculated.

    The parameters and the function output can be estimagic-pytrees; for more details on
    estimagi-pytrees see :ref:`eeppytrees`. By default the resulting Jacobian will be
    returned as a block-pytree.

    For a detailed description of all options that influence the step size as well as an
    explanation of how steps are adjusted to bounds in case of a conflict, see
    :func:`~estimagic.differentiation.generate_steps.generate_steps`.

    Args:
        func (callable): Function of which the derivative is calculated.
        params (pytree): A pytree. See :ref:`params`.
        func_kwargs (dict): Additional keyword arguments for func, optional.
        method (str): One of ["central", "forward", "backward"], default "central".
        n_steps (int): Number of steps needed. For central methods, this is
            the number of steps per direction. It is 1 if no Richardson extrapolation
            is used.
        base_steps (numpy.ndarray, optional): 1d array of the same length as params.
            base_steps * scaling_factor is the absolute value of the first (and possibly
            only) step used in the finite differences approximation of the derivative.
            If base_steps * scaling_factor conflicts with bounds, the actual steps will
            be adjusted. If base_steps is not provided, it will be determined according
            to a rule of thumb as long as this does not conflict with min_steps.
        scaling_factor (numpy.ndarray or float): Scaling factor which is applied to
            base_steps. If it is an numpy.ndarray, it needs to be as long as params.
            scaling_factor is useful if you want to increase or decrease the base_step
            relative to the rule-of-thumb or user provided base_step, for example to
            benchmark the effect of the step size. Default 1.
        lower_bounds (pytree): To be written.
        upper_bounds (pytree): To be written.
        step_ratio (float, numpy.array): Ratio between two consecutive Richardson
            extrapolation steps in the same direction. default 2.0. Has to be larger
            than one. The step ratio is only used if n_steps > 1.
        min_steps (numpy.ndarray): Minimal possible step sizes that can be chosen to
            accommodate bounds. Must have same length as params. By default min_steps is
            equal to base_steps, i.e step size is not decreased beyond what is optimal
            according to the rule of thumb.
        f0 (numpy.ndarray): 1d numpy array with func(x), optional.
        n_cores (int): Number of processes used to parallelize the function
            evaluations. Default 1.
        error_handling (str): One of "continue" (catch errors and continue to calculate
            derivative estimates. In this case, some derivative estimates can be
            missing but no errors are raised), "raise" (catch errors and continue
            to calculate derivative estimates at fist but raise an error if all
            evaluations for one parameter failed) and "raise_strict" (raise an error
            as soon as a function evaluation fails).
        batch_evaluator (str or callable): Name of a pre-implemented batch evaluator
            (currently 'joblib' and 'pathos_mp') or Callable with the same interface
            as the estimagic batch_evaluators.
        return_func_value (bool): If True, return function value at params, stored in
            output dict under "func_value". Default False. This is useful when using
            first_derivative during optimization.
        return_info (bool): If True, return additional information on function
            evaluations and internal derivative candidates, stored in output dict under
            "func_evals" and "derivative_candidates". Derivative candidates are only
            returned if n_steps > 1. Default False.
        key (str): If func returns a dictionary, take the derivative of
            func(params)[key].

    Returns:
        result (dict): Result dictionary with keys:
            - "derivative" (numpy.ndarray, pandas.Series or pandas.DataFrame): The
                estimated first derivative of func at params. The shape of the output
                depends on the dimension of params and func(params):

                - f: R -> R leads to shape (1,), usually called derivative
                - f: R^m -> R leads to shape (m, ), usually called Gradient
                - f: R -> R^n leads to shape (n, 1), usually called Jacobian
                - f: R^m -> R^n leads to shape (n, m), usually called Jacobian

            - "func_value" (numpy.ndarray, pandas.Series or pandas.DataFrame): Function
                value at params, returned if return_func_value is True.

            - "func_evals" (pandas.DataFrame): Function evaluations produced by internal
                derivative method, returned if return_info is True.

            - "derivative_candidates" (pandas.DataFrame): Derivative candidates from
                Richardson extrapolation, returned if return_info is True and n_steps >
                1.

    """
    _is_fast_params = isinstance(params, np.ndarray) and params.ndim == 1
    registry = get_registry(extended=True)

    lower_bounds, upper_bounds = get_bounds(params, lower_bounds, upper_bounds)

    # handle keyword arguments
    func_kwargs = {} if func_kwargs is None else func_kwargs
    partialed_func = functools.partial(func, **func_kwargs)

    # convert params to numpy
    if not _is_fast_params:
        x, params_treedef = tree_flatten(params, registry=registry)
        x = np.array(x, dtype=np.float64)
    else:
        x = params.astype(float)

    if np.isnan(x).any():
        raise ValueError("The parameter vector must not contain NaNs.")

    # generate the step array
    steps = generate_steps(
        x=x,
        method=method,
        n_steps=n_steps,
        target="first_derivative",
        base_steps=base_steps,
        scaling_factor=scaling_factor,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        step_ratio=step_ratio,
        min_steps=min_steps,
    )

    # generate parameter vectors at which func has to be evaluated as numpy arrays
    evaluation_points = []
    for step_arr in steps:
        for i, j in product(range(n_steps), range(len(x))):
            if np.isnan(step_arr[i, j]):
                evaluation_points.append(np.nan)
            else:
                point = x.copy()
                point[j] += step_arr[i, j]
                evaluation_points.append(point)

    # convert the numpy arrays to whatever is needed by func
    if not _is_fast_params:
        evaluation_points = [
            # entries are either a numpy.ndarray or np.nan
            _unflatten_if_not_nan(p, params_treedef, registry)
            for p in evaluation_points
        ]

    # we always evaluate f0, so we can fall back to one-sided derivatives if
    # two-sided derivatives fail. The extra cost is negligible in most cases.
    if f0 is None:
        evaluation_points.append(params)

    # do the function evaluations, including error handling
    batch_error_handling = "raise" if error_handling == "raise_strict" else "continue"
    raw_evals = _nan_skipping_batch_evaluator(
        func=partialed_func,
        arguments=evaluation_points,
        n_cores=n_cores,
        error_handling=batch_error_handling,
        batch_evaluator=batch_evaluator,
    )

    # extract information on exceptions that occurred during function evaluations
    exc_info = "\n\n".join([val for val in raw_evals if isinstance(val, str)])
    raw_evals = [
        val if not isinstance(val, str) else np.nan for val in raw_evals
    ]

    # store full function value at params as func_value and a processed version of it
    # that we need to calculate derivatives as f0
    if f0 is None:
        f0 = raw_evals[-1]
        raw_evals = raw_evals[:-1]
    func_value = f0

    use_key = key is not None and isinstance(f0, dict)
    f0_tree = f0[key] if use_key else f0
    scalar_out = np.isscalar(f0_tree)
    vector_out = isinstance(f0_tree, np.ndarray) and f0_tree.ndim == 1

    if scalar_out:
        f0 = np.array([f0_tree], dtype=float)
    elif vector_out:
        f0 = f0_tree.astype(float)
    else:
        f0 = tree_leaves(f0_tree, registry=registry)
        f0 = np.array(f0, dtype=np.float64)

    # convert the raw evaluations to numpy arrays
    raw_evals = _convert_evals_to_numpy(
        raw_evals=raw_evals,
        key=key,
        registry=registry,
        is_scalar_out=scalar_out,
        is_vector_out=vector_out,
    )

    # apply finite difference formulae
    evals = np.array(raw_evals).reshape(2, n_steps, len(x), -1)
    evals = np.transpose(evals, axes=(0, 1, 3, 2))
    evals = Evals(pos=evals[0], neg=evals[1])

    jac_candidates = {}
    for m in ["forward", "backward", "central"]:
        jac_candidates[m] = finite_differences.jacobian(evals, steps, f0, m)

    # get the best derivative estimate out of all derivative estimates that could be
    # calculated, given the function evaluations.
    orders = {
        "central": ["central", "forward", "backward"],
        "forward": ["forward", "backward"],
        "backward": ["backward", "forward"],
    }

    if n_steps == 1:
        jac = _consolidate_one_step_derivatives(jac_candidates, orders[method])
        updated_candidates = None
    else:
        richardson_candidates = _compute_richardson_candidates(
            jac_candidates, steps, n_steps)
        jac, updated_candidates = _consolidate_extrapolated(
            richardson_candidates)

    # raise error if necessary
    if error_handling in ("raise", "raise_strict") and np.isnan(jac).any():
        raise Exception(exc_info)

    # results processing
    if _is_fast_params and vector_out:
        derivative = jac
    elif _is_fast_params and scalar_out:
        derivative = jac.flatten()
    else:
        derivative = matrix_to_block_tree(jac, f0_tree, params)

    result = {"derivative": derivative}
    if return_func_value:
        result["func_value"] = func_value
    if return_info:
        info = _collect_additional_info(steps,
                                        evals,
                                        updated_candidates,
                                        target="first_derivative")
        result = {**result, **info}
    return result
Beispiel #4
0
def first_derivative(
    func,
    x,
    func_kwargs=None,
    method="central",
    n_steps=1,
    base_steps=None,
    scaling_factor=1,
    lower_bounds=None,
    upper_bounds=None,
    step_ratio=2,
    min_steps=None,
    f0=None,
    n_cores=1,
    return_richardson_info=False,
):

    """Evaluate first derivative of func at x according to method and step options.

    Internally, the function is converted such that it maps from a 1d array to a 1d
    array. Then the Jacobian of that function is calculated. The resulting derivative
    estimate is always a numpy array.

    Detailed description of all options that influence the step size as well as an
    explanation of how steps are adjusted to bounds in case of a conflict,
    see :func:`~estimagic.differentiation.generate_steps.generate_steps`.

    Args:
        func (callable): Function of which the derivative is calculated.
        x (np.ndarray): 1d array at which the derivative is calculated.
        func_kwargs (dict): Additional keyword arguments for func, optional.
        method (str): One of ["central", "forward", "backward"], default "central".
        n_steps (int): Number of steps needed. For central methods, this is
            the number of steps per direction. It is 1 if no Richardson extrapolation
            is used.
        base_steps (np.ndarray, optional): 1d array of the same length as x. base_steps
            * scaling_factor is the absolute value of the first (and possibly only) step
            used in the finite differences approximation of the derivative. If the
            base_steps * scaling_factor conflicts with bounds, the actual steps will
            be adjusted. If base_steps is not provided, it will be determined according
            to a rule of thumb as long as this does not conflict with min_steps.
        scaling_factor (np.ndarray or float): Scaling factor which is applied to
            base_steps. If it is an np.ndarray, it needs to have the same shape as x.
            scaling_factor is useful if you want to increase or decrease the base_step
            relative to the rule-of-thumb or user provided base_step, for example to
            benchmark the effect of the step size. Default 1.
        lower_bounds (np.ndarray): 1d array with lower bounds for each parameter.
        upper_bounds (np.ndarray): 1d array with upper bounds for each parameter.
        step_ratio (float or array): Ratio between two consecutive Richardson
            extrapolation steps in the same direction. default 2.0. Has to be larger
            than one. step ratio is only used if n_steps > 1.
        min_steps (np.ndarray): Minimal possible step sizes that can be chosen to
            accommodate bounds. Needs to have same length as x. By default min_steps is
            equal to base_steps, i.e step size is not decreased beyond what is optimal
            according to the rule of thumb.
        f0 (np.ndarray): 1d numpy array with func(x), optional.
        n_cores (int): Number of processes used to parallelize the function
            evaluations. Default 1.
        return_richardson_info (bool): Should additional information on the Richardson
            extrapolation be returned. Has no effect if n_steps = 1.

    Returns:
        derivative (np.ndarray): The estimated first derivative of func at x.
            The shape of the output depends on the dimension of x and func(x):
            f: R -> R leads to shape (1,), usually called derivative
            f: R^m -> R leads to shape (m, ), usually called Gradient
            f: R -> R^n leads to shape (n, 1), usually called Jacobian
            f: R^m -> R^n leads to shape (n, m), usually called Jacobian

        info (OrderedDict): Dictionary with all derivative estimates and
            error estimates for different parameter specifications using Richardson
            extrapolations. Is only returned if return_richardson_info is True.

    """
    func_kwargs = {} if func_kwargs is None else func_kwargs
    partialed_func = functools.partial(func, **func_kwargs)
    f0 = partialed_func(x) if f0 is None else f0

    x_was_scalar = np.isscalar(x)
    f_was_scalar = np.isscalar(f0)

    x = np.atleast_1d(x).astype(np.float_)
    f0 = np.atleast_1d(f0).astype(np.float_)

    @nan_if_exception
    @de_scalarize(x_was_scalar)
    def internal_func(x):
        return partialed_func(x)

    steps = generate_steps(
        x=x,
        method=method,
        n_steps=n_steps,
        target="first_derivative",
        base_steps=base_steps,
        scaling_factor=scaling_factor,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        step_ratio=step_ratio,
        min_steps=min_steps,
    )

    evaluation_points = []
    for step_arr in steps:
        for i, j in product(range(n_steps), range(len(x))):
            if np.isnan(step_arr[i, j]):
                evaluation_points.append(np.nan)
            else:
                point = x.copy()
                point[j] += step_arr[i, j]
                evaluation_points.append(point)

    raw_evals = _nan_skipping_batch_evaluator(internal_func, evaluation_points, n_cores)

    evals = np.array(raw_evals).reshape(2, n_steps, len(x), -1)
    evals = np.transpose(evals, axes=(0, 1, 3, 2))
    evals = namedtuple_from_kwargs(pos=evals[0], neg=evals[1])

    jac_candidates = {}
    for m in ["forward", "backward", "central"]:
        jac_candidates[m] = finite_differences.jacobian(evals, steps, f0, method)

    orders = {
        "central": ["central", "forward", "backward"],
        "forward": ["forward", "backward"],
        "backward": ["backward", "forward"],
    }

    if n_steps == 1:
        jac = _consolidate_one_step_derivatives(jac_candidates, orders[method])
    else:
        richardson_candidates = _compute_richardson_candidates(
            jac_candidates, steps, n_steps
        )
        jac = _consolidate_extrapolated(richardson_candidates)

    derivative = jac.flatten() if f_was_scalar else jac

    return_info = n_steps > 1 and return_richardson_info
    out = (derivative, richardson_candidates) if return_info else derivative
    return out
Beispiel #5
0
def first_derivative(
    func,
    params,
    func_kwargs=None,
    method="central",
    n_steps=1,
    base_steps=None,
    scaling_factor=1,
    lower_bounds=None,
    upper_bounds=None,
    step_ratio=2,
    min_steps=None,
    f0=None,
    n_cores=DEFAULT_N_CORES,
    error_handling="continue",
    batch_evaluator="joblib",
    return_func_value=False,
    key=None,
):
    """Evaluate first derivative of func at params according to method and step options.

    Internally, the function is converted such that it maps from a 1d array to a 1d
    array. Then the Jacobian of that function is calculated. The resulting derivative
    estimate is always a :class:`numpy.ndarray`.

    The parameters and the function output can be pandas objects (Series or DataFrames
    with value column). In that case the output of first_derivative is also a pandas
    object and with appropriate index and columns.

    Detailed description of all options that influence the step size as well as an
    explanation of how steps are adjusted to bounds in case of a conflict,
    see :func:`~estimagic.differentiation.generate_steps.generate_steps`.

    Args:
        func (callable): Function of which the derivative is calculated.
        params (numpy.ndarray, pandas.Series or pandas.DataFrame): 1d numpy array or
            :class:`pandas.DataFrame` with parameters at which the derivative is
            calculated. If it is a DataFrame, it can contain the columns "lower_bound"
            and "upper_bound" for bounds. See :ref:`params`.
        func_kwargs (dict): Additional keyword arguments for func, optional.
        method (str): One of ["central", "forward", "backward"], default "central".
        n_steps (int): Number of steps needed. For central methods, this is
            the number of steps per direction. It is 1 if no Richardson extrapolation
            is used.
        base_steps (numpy.ndarray, optional): 1d array of the same length as pasams.
            base_steps * scaling_factor is the absolute value of the first (and possibly
            only) step used in the finite differences approximation of the derivative.
            If base_steps * scaling_factor conflicts with bounds, the actual steps will
            be adjusted. If base_steps is not provided, it will be determined according
            to a rule of thumb as long as this does not conflict with min_steps.
        scaling_factor (numpy.ndarray or float): Scaling factor which is applied to
            base_steps. If it is an numpy.ndarray, it needs to be as long as params.
            scaling_factor is useful if you want to increase or decrease the base_step
            relative to the rule-of-thumb or user provided base_step, for example to
            benchmark the effect of the step size. Default 1.
        lower_bounds (numpy.ndarray): 1d array with lower bounds for each parameter. If
            params is a DataFrame and has the columns "lower_bound", this will be taken
            as lower_bounds if now lower_bounds have been provided explicitly.
        upper_bounds (numpy.ndarray): 1d array with upper bounds for each parameter. If
            params is a DataFrame and has the columns "upper_bound", this will be taken
            as upper_bounds if no upper_bounds have been provided explicitly.
        step_ratio (float, numpy.array): Ratio between two consecutive Richardson
            extrapolation steps in the same direction. default 2.0. Has to be larger
            than one. The step ratio is only used if n_steps > 1.
        min_steps (numpy.ndarray): Minimal possible step sizes that can be chosen to
            accommodate bounds. Must have same length as params. By default min_steps is
            equal to base_steps, i.e step size is not decreased beyond what is optimal
            according to the rule of thumb.
        f0 (numpy.ndarray): 1d numpy array with func(x), optional.
        n_cores (int): Number of processes used to parallelize the function
            evaluations. Default 1.
        error_handling (str): One of "continue" (catch errors and continue to calculate
            derivative estimates. In this case, some derivative estimates can be
            missing but no errors are raised), "raise" (catch errors and continue
            to calculate derivative estimates at fist but raise an error if all
            evaluations for one parameter failed) and "raise_strict" (raise an error
            as soon as a function evaluation fails).
        batch_evaluator (str or callable): Name of a pre-implemented batch evaluator
            (currently 'joblib' and 'pathos_mp') or Callable with the same interface
            as the estimagic batch_evaluators.
        return_func_value (bool): If True, return a tuple with the derivative and the
            function value at params. Default False. This is useful when using
            first_derivative during optimization.
        key (str): If func returns a dictionary, take the derivative of
            func(params)[key].

    Returns:
        derivative (numpy.ndarray, pandas.Series or pandas.DataFrame): The estimated
            first derivative of func at params. The shape of the output depends on the
            dimension of params and func(params):

            - f: R -> R leads to shape (1,), usually called derivative
            - f: R^m -> R leads to shape (m, ), usually called Gradient
            - f: R -> R^n leads to shape (n, 1), usually called Jacobian
            - f: R^m -> R^n leads to shape (n, m), usually called Jacobian
        float, dict, numpy.ndarray or pandas.Series: The function value at params, only
            returned if return_func_value is True.

    """
    lower_bounds, upper_bounds = _process_bounds(lower_bounds, upper_bounds,
                                                 params)

    # handle keyword arguments
    func_kwargs = {} if func_kwargs is None else func_kwargs
    partialed_func = functools.partial(func, **func_kwargs)

    # convert params to numpy, but keep label information
    params_index = (params.index if isinstance(params, (pd.DataFrame,
                                                        pd.Series)) else None)

    x = params["value"].to_numpy() if isinstance(params,
                                                 pd.DataFrame) else params
    x = np.atleast_1d(x).astype(float)

    if np.isnan(x).any():
        raise ValueError("The parameter vector must not contain NaNs.")

    # generate the step array
    steps = generate_steps(
        x=x,
        method=method,
        n_steps=n_steps,
        target="first_derivative",
        base_steps=base_steps,
        scaling_factor=scaling_factor,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        step_ratio=step_ratio,
        min_steps=min_steps,
    )

    # generate parameter vectors at which func has to be evaluated as numpy arrays
    evaluation_points = []
    for step_arr in steps:
        for i, j in product(range(n_steps), range(len(x))):
            if np.isnan(step_arr[i, j]):
                evaluation_points.append(np.nan)
            else:
                point = x.copy()
                point[j] += step_arr[i, j]
                evaluation_points.append(point)

    # convert the numpy arrays to whatever is needed by func
    evaluation_points = _convert_evaluation_points_to_original(
        evaluation_points, params)

    # we always evaluate f0, so we can fall back to one-sided derivatives if
    # two-sided derivatives fail. The extra cost is negligible in most cases.
    if f0 is None:
        evaluation_points.append(params)

    # do the function evaluations, including error handling
    batch_error_handling = "raise" if error_handling == "raise_strict" else "continue"
    raw_evals = _nan_skipping_batch_evaluator(
        func=partialed_func,
        arguments=evaluation_points,
        n_cores=n_cores,
        error_handling=batch_error_handling,
        batch_evaluator=batch_evaluator,
    )

    # extract information on exceptions that occurred during function evaluations
    exc_info = "\n\n".join([val for val in raw_evals if isinstance(val, str)])
    raw_evals = [
        val if not isinstance(val, str) else np.nan for val in raw_evals
    ]

    # store full function value at params as func_value and a processed version of it
    # that we need to calculate derivatives as f0
    if f0 is None:
        f0 = raw_evals[-1]
        raw_evals = raw_evals[:-1]
    func_value = f0
    f0 = f0[key] if isinstance(f0, dict) else f0
    f_was_scalar = np.isscalar(f0)
    out_index = f0.index if isinstance(f0, pd.Series) else None
    f0 = np.atleast_1d(f0)

    # convert the raw evaluations to numpy arrays
    raw_evals = _convert_evals_to_numpy(raw_evals, key)

    # apply finite difference formulae
    evals = np.array(raw_evals).reshape(2, n_steps, len(x), -1)
    evals = np.transpose(evals, axes=(0, 1, 3, 2))
    evals = namedtuple_from_kwargs(pos=evals[0], neg=evals[1])

    jac_candidates = {}
    for m in ["forward", "backward", "central"]:
        jac_candidates[m] = finite_differences.jacobian(evals, steps, f0, m)

    # get the best derivative estimate out of all derivative estimates that could be
    # calculated, given the function evaluations.
    orders = {
        "central": ["central", "forward", "backward"],
        "forward": ["forward", "backward"],
        "backward": ["backward", "forward"],
    }

    if n_steps == 1:
        jac = _consolidate_one_step_derivatives(jac_candidates, orders[method])
    else:
        richardson_candidates = _compute_richardson_candidates(
            jac_candidates, steps, n_steps)
        jac = _consolidate_extrapolated(richardson_candidates)

    # raise error if necessary
    if error_handling in ("raise", "raise_strict") and np.isnan(jac).any():
        raise Exception(exc_info)

    # results processing
    derivative = jac.flatten() if f_was_scalar else jac
    derivative = _add_index_to_derivative(derivative, params_index, out_index)
    res = (derivative, func_value) if return_func_value else derivative
    return res