예제 #1
0
def _compute_testable_estimagic_and_jax_derivatives(func,
                                                    params,
                                                    func_jax=None):
    """

    Computes first and second derivative using estimagic and jax. Then converts leaves
    of jax output to numpy so that we can use numpy.testing. For higher dimensional
    output we need to define two function, one with numpy array output and one with
    jax.numpy array output.

    """
    func_jax = func if func_jax is None else func_jax

    estimagic_jac = first_derivative(func, params)["derivative"]
    jax_jac = jax.jacobian(func_jax)(params)

    estimagic_hess = second_derivative(func, params)["derivative"]
    jax_hess = jax.hessian(func_jax)(params)

    out = {
        "jac": {
            "estimagic": estimagic_jac,
            "jax": jax_jac
        },
        "hess": {
            "estimagic": estimagic_hess,
            "jax": jax_hess
        },
    }
    return out
예제 #2
0
def test_second_derivative_scalar(method):
    def f(x):
        return x**2

    calculated = second_derivative(f, 3.0, n_cores=1)
    expected = 2.0

    assert np.abs(calculated["derivative"] - expected) < 1.5 * 10**(-6)
예제 #3
0
def test_second_derivative_scalar_with_return_func_value(method):
    def f(x):
        return x**3

    calculated = second_derivative(f,
                                   3.0,
                                   return_func_value=True,
                                   return_info=False,
                                   n_cores=1)
    expected = {"derivative": 18.0, "func_value": 27.0}

    assert calculated["func_value"] == expected["func_value"]
    assert np.abs(calculated["derivative"] -
                  expected["derivative"]) < 1.5 * 10**(-6)
예제 #4
0
def test_second_derivative_hessian(binary_choice_inputs, method):
    fix = binary_choice_inputs
    func = partial(logit_loglike, y=fix["y"], x=fix["x"])

    calculated = second_derivative(
        func=func,
        method=method,
        params=fix["params_np"],
        n_steps=1,
        f0=func(fix["params_np"]),
        n_cores=1,
    )

    expected = logit_loglike_hessian(fix["params_np"], fix["y"], fix["x"])

    assert np.max(np.abs(calculated["derivative"] - expected)) < 1.5 * 10**(-2)
    assert np.mean(
        np.abs(calculated["derivative"] - expected)) < 1.5 * 10**(-3)
예제 #5
0
def estimate_ml(
    loglike,
    params,
    optimize_options,
    *,
    lower_bounds=None,
    upper_bounds=None,
    constraints=None,
    logging=False,
    log_options=None,
    loglike_kwargs=None,
    numdiff_options=None,
    jacobian=None,
    jacobian_kwargs=None,
    hessian=None,
    hessian_kwargs=None,
    design_info=None,
):
    """Do a maximum likelihood (ml) estimation.

    This is a high level interface of our lower level functions for maximization,
    numerical differentiation and inference. It does the full workflow for maximum
    likelihood estimation with just one function call.

    While we have good defaults, you can still configure each aspect of each step
    via the optional arguments of this function. If you find it easier to do the
    maximization separately, you can do so and just provide the optimal parameters as
    ``params`` and set ``optimize_options=False``

    Args:
        loglike (callable): Likelihood function that takes a params (and potentially
            other keyword arguments) and returns a dictionary that has at least the
            entries "value" (a scalar float) and "contributions" (a 1d numpy array or
            pytree) with the log likelihood contribution per individual.
        params (pytree): A pytree containing the estimated or start parameters of the
            likelihood model. If the supplied parameters are estimated parameters, set
            optimize_options to False. Pytrees can be a numpy array, a pandas Series, a
            DataFrame with "value" column, a float and any kind of (nested) dictionary
            or list containing these elements. See :ref:`params` for examples.
        optimize_options (dict, str or False): Keyword arguments that govern the
            numerical optimization. Valid entries are all arguments of
            :func:`~estimagic.optimization.optimize.minimize` except for those that are
            passed explicilty to ``estimate_ml``. If you pass False as optimize_options
            you signal that ``params`` are already the optimal parameters and no
            numerical optimization is needed. If you pass a str as optimize_options it
            is used as the ``algorithm`` option.
        lower_bounds (pytree): A pytree with the same structure as params with lower
            bounds for the parameters. Can be ``-np.inf`` for parameters with no lower
            bound.
        upper_bounds (pytree): As lower_bounds. Can be ``np.inf`` for parameters with
            no upper bound.
        constraints (list, dict): List with constraint dictionaries or single dict.
            See :ref:`constraints`.
        logging (pathlib.Path, str or False): Path to sqlite3 file (which typically has
            the file extension ``.db``. If the file does not exist, it will be created.
            The dashboard can only be used when logging is used.
        log_options (dict): Additional keyword arguments to configure the logging.
            - "fast_logging": A boolean that determines if "unsafe" settings are used
            to speed up write processes to the database. This should only be used for
            very short running criterion functions where the main purpose of the log
            is a real-time dashboard and it would not be catastrophic to get a
            corrupted database in case of a sudden system shutdown. If one evaluation
            of the criterion function (and gradient if applicable) takes more than
            100 ms, the logging overhead is negligible.
            - "if_table_exists": (str) One of "extend", "replace", "raise". What to
            do if the tables we want to write to already exist. Default "extend".
            - "if_database_exists": (str): One of "extend", "replace", "raise". What to
            do if the database we want to write to already exists. Default "extend".
        loglike_kwargs (dict): Additional keyword arguments for loglike.
        numdiff_options (dict): Keyword arguments for the calculation of numerical
            derivatives for the calculation of standard errors. See
            :ref:`first_derivative` for details.
        jacobian (callable or None): A function that takes ``params`` and potentially
            other keyword arguments and returns the jacobian of loglike["contributions"]
            with respect to the params. Note that you only need to pass a Jacobian
            function if you have a closed form Jacobian. If you pass None, a numerical
            Jacobian will be calculated.
        jacobian_kwargs (dict): Additional keyword arguments for the Jacobian function.
        hessian (callable or None or False): A function that takes ``params`` and
            potentially other keyword arguments and returns the Hessian of
            loglike["value"] with respect to the params.  If you pass None, a numerical
            Hessian will be calculated. If you pass ``False``, you signal that no
            Hessian should be calculated. Thus, no result that requires the Hessian will
            be calculated.
        hessian_kwargs (dict): Additional keyword arguments for the Hessian function.
        design_info (pandas.DataFrame): DataFrame with one row per observation that
            contains some or all of the variables "psu" (primary sampling unit),
            "strata" and "fpc" (finite population corrector). See
            :ref:`robust_likelihood_inference` for details.

    Returns:
        LikelihoodResult: A LikelihoodResult object.

    """
    # ==================================================================================
    # Check and process inputs
    # ==================================================================================
    is_optimized = optimize_options is False

    if not is_optimized:
        if isinstance(optimize_options, str):
            optimize_options = {"algorithm": optimize_options}

        check_optimization_options(
            optimize_options,
            usage="estimate_ml",
            algorithm_mandatory=True,
        )

    jac_case = get_derivative_case(jacobian)
    hess_case = get_derivative_case(hessian)

    check_numdiff_options(numdiff_options, "estimate_ml")
    numdiff_options = {} if numdiff_options in (None,
                                                False) else numdiff_options
    loglike_kwargs = {} if loglike_kwargs is None else loglike_kwargs
    constraints = [] if constraints is None else constraints
    jacobian_kwargs = {} if jacobian_kwargs is None else jacobian_kwargs
    hessian_kwargs = {} if hessian_kwargs is None else hessian_kwargs

    # ==================================================================================
    # Calculate estimates via maximization (if necessary)
    # ==================================================================================

    if is_optimized:
        estimates = params
        opt_res = None
    else:
        opt_res = maximize(
            criterion=loglike,
            criterion_kwargs=loglike_kwargs,
            params=params,
            lower_bounds=lower_bounds,
            upper_bounds=upper_bounds,
            constraints=constraints,
            logging=logging,
            log_options=log_options,
            **optimize_options,
        )
        estimates = opt_res.params

    # ==================================================================================
    # Do first function evaluations at estimated parameters
    # ==================================================================================

    try:
        loglike_eval = loglike(estimates, **loglike_kwargs)
    except (KeyboardInterrupt, SystemExit):
        raise
    except Exception as e:
        msg = "Error while evaluating loglike at estimated params."
        raise InvalidFunctionError(msg) from e

    if callable(jacobian):
        try:
            jacobian_eval = jacobian(estimates, **jacobian_kwargs)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            msg = "Error while evaluating closed form jacobian at estimated params."
            raise InvalidFunctionError(msg) from e
    else:
        jacobian_eval = None

    if callable(hessian):
        try:
            hessian_eval = hessian(estimates, **hessian_kwargs)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as e:
            msg = "Error while evaluating closed form hessian at estimated params."
            raise InvalidFunctionError(msg) from e
    else:
        hessian_eval = None

    # ==================================================================================
    # Get the converter for params and function outputs
    # ==================================================================================

    converter, internal_estimates = get_converter(
        params=estimates,
        constraints=constraints,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        func_eval=loglike_eval,
        primary_key="contributions",
        scaling=False,
        scaling_options=None,
        derivative_eval=jacobian_eval,
    )

    # ==================================================================================
    # Calculate internal jacobian
    # ==================================================================================

    if jac_case == "closed-form":
        int_jac = converter.derivative_to_internal(jacobian_eval,
                                                   internal_estimates.values)
    elif jac_case == "numerical":

        def func(x):
            p = converter.params_from_internal(x)
            loglike_eval = loglike(p, **loglike_kwargs)["contributions"]
            out = converter.func_to_internal(loglike_eval)
            return out

        jac_res = first_derivative(
            func=func,
            params=internal_estimates.values,
            lower_bounds=internal_estimates.lower_bounds,
            upper_bounds=internal_estimates.upper_bounds,
            **numdiff_options,
        )

        int_jac = jac_res["derivative"]
    else:
        int_jac = None

    if constraints in [None, []
                       ] and jacobian_eval is None and int_jac is not None:
        loglike_contribs = loglike_eval
        if isinstance(loglike_contribs,
                      dict) and "contributions" in loglike_contribs:
            loglike_contribs = loglike_contribs["contributions"]

        jacobian_eval = matrix_to_block_tree(
            int_jac,
            outer_tree=loglike_contribs,
            inner_tree=estimates,
        )

    if jacobian_eval is None:
        _no_jac_reason = (
            "no closed form jacobian was provided and there are constraints")
    else:
        _no_jac_reason = None
    # ==================================================================================
    # Calculate internal Hessian
    # ==================================================================================

    if hess_case == "skip":
        int_hess = None
    elif hess_case == "numerical":

        def func(x):
            p = converter.params_from_internal(x)
            loglike_eval = loglike(p, **loglike_kwargs)["value"]
            out = converter.func_to_internal(loglike_eval)
            return out

        hess_res = second_derivative(
            func=func,
            params=internal_estimates.values,
            lower_bounds=internal_estimates.lower_bounds,
            upper_bounds=internal_estimates.upper_bounds,
            **numdiff_options,
        )
        int_hess = hess_res["derivative"]
    elif hess_case == "closed-form" and constraints:
        raise NotImplementedError(
            "Closed-form Hessians are not yet compatible with constraints.")
    elif hess_case == "closed-form":
        int_hess = block_tree_to_matrix(
            hessian_eval,
            outer_tree=params,
            inner_tree=params,
        )
    else:
        raise ValueError()

    if constraints in [None, []
                       ] and hessian_eval is None and int_hess is not None:
        hessian_eval = matrix_to_block_tree(
            int_hess,
            outer_tree=params,
            inner_tree=params,
        )

    if hessian_eval is None:
        if hess_case == "skip":
            _no_hess_reason = "the hessian calculation was explicitly skipped."
        else:
            _no_hess_reason = (
                "no closed form hessian was provided and there are constraints"
            )
    else:
        _no_hess_reason = None

    # ==================================================================================
    # create a LikelihoodResult object
    # ==================================================================================

    free_estimates = calculate_free_estimates(estimates, internal_estimates)

    res = LikelihoodResult(
        _params=estimates,
        _converter=converter,
        _optimize_result=opt_res,
        _jacobian=jacobian_eval,
        _no_jacobian_reason=_no_jac_reason,
        _hessian=hessian_eval,
        _no_hessian_reason=_no_hess_reason,
        _internal_jacobian=int_jac,
        _internal_hessian=int_hess,
        _design_info=design_info,
        _internal_estimates=internal_estimates,
        _free_estimates=free_estimates,
        _has_constraints=constraints not in [None, []],
    )

    return res