Esempio n. 1
0
    def summary(self, ci_method="percentile", ci_level=0.95):
        """Create a summary of bootstrap results.

        Args:
            ci_method (str): Method of choice for confidence interval computation.
                The default is "percentile".
            ci_level (float): Confidence level for the calculation of confidence
                intervals. The default is 0.95.

        Returns:
            pd.DataFrame: The estimation summary as a DataFrame containing information
                on the mean, standard errors, as well as the confidence intervals.
                Soon this will be a pytree.
        """
        registry = get_registry(extended=True)
        names = leaf_names(self.base_outcome, registry=registry)
        summary_data = _calulcate_summary_data_bootstrap(self,
                                                         ci_method=ci_method,
                                                         ci_level=ci_level)
        summary = calculate_estimation_summary(
            summary_data=summary_data,
            names=names,
            free_names=names,
        )
        return summary
Esempio n. 2
0
    def ci(self, ci_method="percentile", ci_level=0.95):
        """Calculate confidence intervals.

        Args:
            ci_method (str): Method of choice for computing confidence intervals.
                The default is "percentile".
            ci_level (float): Confidence level for the calculation of confidence
                intervals. The default is 0.95.

        Returns:
            Any: Pytree with the same structure as base_outcome containing lower
                bounds of confidence intervals.
            Any: Pytree with the same structure as base_outcome containing upper
                bounds of confidence intervals.
        """
        registry = get_registry(extended=True)
        base_outcome_flat, treedef = tree_flatten(self._base_outcome,
                                                  registry=registry)

        lower_flat, upper_flat = calculate_ci(base_outcome_flat,
                                              self._internal_outcomes,
                                              ci_method, ci_level)

        lower = tree_unflatten(treedef, lower_flat, registry=registry)
        upper = tree_unflatten(treedef, upper_flat, registry=registry)
        return lower, upper
Esempio n. 3
0
def _check_dimensions_hessian(hessian, f_tree, params_tree):
    extended_registry = get_registry(extended=True)
    flat_f = tree_leaves(f_tree, registry=extended_registry)
    flat_p = tree_leaves(params_tree, registry=extended_registry)

    if len(flat_f) == 1:
        if np.squeeze(hessian).ndim == 0:
            if len(flat_p) != 1:
                raise ValueError(
                    "Hessian dimension does not match those of params.")
        elif np.squeeze(hessian).ndim == 2:
            if np.squeeze(hessian).shape != (len(flat_p), len(flat_p)):
                raise ValueError(
                    "Hessian dimension does not match those of params.")
        else:
            raise ValueError(
                "Hessian must be 0- or 2-d if f is scalar-valued.")
    else:
        if hessian.ndim != 3:
            raise ValueError("Hessian must be 3d if f is multidimensional.")
        if hessian.shape[0] != len(flat_f):
            raise ValueError(
                "First Hessian dimension does not match that of f.")
        if hessian.shape[1:] != (len(flat_p), len(flat_p)):
            raise ValueError(
                "Last two Hessian dimensions do not match those of params.")
Esempio n. 4
0
def get_history_arrays(history, direction):
    parhist = history["params"]
    is_flat = (len(parhist) > 0 and isinstance(parhist[0], np.ndarray)
               and parhist[0].ndim == 1)
    if is_flat:
        to_internal = lambda x: x.tolist()
    else:
        registry = get_registry(extended=True)
        to_internal = partial(tree_just_flatten, registry=registry)

    critvals = np.array(history["criterion"])

    params = np.array([to_internal(p) for p in history["params"]])

    runtimes = np.array(history["runtime"])

    if direction == "minimize":
        monotone = np.minimum.accumulate(critvals)
        is_accepted = critvals <= monotone
    elif direction == "maximize":
        monotone = np.maximum.accumulate(critvals)
        is_accepted = critvals >= monotone

    out = {
        "criterion": critvals,
        "params": params,
        "runtimes": runtimes,
        "monotone_criterion": monotone,
        "is_accepted": is_accepted,
    }
    return out
Esempio n. 5
0
def _get_results(names, raw_results, kwargs_list):
    registry = get_registry(extended=True)
    results = {}

    for name, result, inputs in zip(names, raw_results, kwargs_list):

        if isinstance(result, OptimizeResult):
            history = result.history
            params_history = pd.DataFrame([
                tree_just_flatten(p, registry=registry)
                for p in history["params"]
            ])
            criterion_history = pd.Series(history["criterion"])
            time_history = pd.Series(history["runtime"])
        elif isinstance(result, str):
            _criterion = inputs["criterion"]

            params_history = pd.DataFrame(
                tree_just_flatten(inputs["params"], registry=registry)).T
            criterion_history = pd.Series(
                _criterion(inputs["params"])["value"])

            time_history = pd.Series([np.inf])
        else:
            raise ValueError(
                "'result' object is expected to be of type 'dict' or 'str'.")

        results[name] = {
            "params_history": params_history,
            "criterion_history": criterion_history,
            "time_history": time_history,
            "solution": result,
        }

    return results
Esempio n. 6
0
    def cov(self, return_type="pytree"):
        """Calculate the variance-covariance matrix of the estimated parameters.

        Args:
            return_type (str): One of "pytree", "array" or "dataframe". Default pytree.
                If "array", a 2d numpy array with the covariance is returned. If
                "dataframe", a pandas DataFrame with parameter names in the
                index and columns are returned.
                The default is "pytree".

        Returns:
            Any: The covariance matrix of the estimated parameters as a block-pytree,
                numpy.ndarray, or pandas.DataFrame.
        """
        cov = self._internal_cov

        if return_type == "dataframe":
            registry = get_registry(extended=True)
            names = np.array(leaf_names(self._base_outcome, registry=registry))
            cov = pd.DataFrame(cov, columns=names, index=names)
        elif return_type == "pytree":
            cov = matrix_to_block_tree(cov, self._base_outcome,
                                       self._base_outcome)
        elif return_type != "array":
            raise ValueError(
                "return_type must be one of pytree, array, or dataframe, "
                f"not {return_type}.")
        return cov
Esempio n. 7
0
def transform_free_values_to_params_tree(values, free_params, params):
    """Fill non-free values and project to params tree structure."""
    mask = free_params.free_mask
    flat = np.full(len(mask), np.nan)
    flat[np.ix_(mask)] = values
    registry = get_registry(extended=True)
    pytree = tree_unflatten(params, flat, registry=registry)
    return pytree
Esempio n. 8
0
def get_moments_cov(data,
                    calculate_moments,
                    *,
                    moment_kwargs=None,
                    bootstrap_kwargs=None):
    """Bootstrap the covariance matrix of the moment conditions.

    Args:
        data (pandas.DataFrame): DataFrame with empirical data.
        calculate_moments (callable): Function that calculates that takes data and
            moment_kwargs as arguments and returns a 1d numpy array or pandas Series
            with moment conditions.
        moment_kwargs (dict): Additional keyword arguments for calculate_moments.
        bootstrap_kwargs (dict): Additional keyword arguments that govern the
            bootstrapping. Allowed arguments are "n_draws", "seed", "n_cores",
            "batch_evaluator", "cluster" and "error_handling". For details see the
            bootstrap function.

    Returns:
        pandas.DataFrame or numpy.ndarray: The covariance matrix of the moment
            conditions for msm estimation.

    """
    moment_kwargs = {} if moment_kwargs is None else moment_kwargs
    bootstrap_kwargs = {} if bootstrap_kwargs is None else bootstrap_kwargs
    valid_bs_kwargs = {
        "n_cores",
        "n_draws",
        "seed",
        "batch_evaluator",
        "cluster",
        "error_handling",
    }
    problematic = set(bootstrap_kwargs).difference(valid_bs_kwargs)
    if problematic:
        raise ValueError(f"Invalid bootstrap_kwargs: {problematic}")

    first_eval = calculate_moments(data, **moment_kwargs)

    registry = get_registry(extended=True)

    @functools.wraps(calculate_moments)
    def func(data, **kwargs):
        raw = calculate_moments(data, **kwargs)
        out = pd.Series(tree_just_flatten(
            raw, registry=registry))  # xxxx won't be necessary soon!
        return out

    cov_arr = bootstrap(data=data, outcome=func,
                        outcome_kwargs=moment_kwargs).cov()

    if isinstance(cov_arr, pd.DataFrame):
        cov_arr = cov_arr.to_numpy()  # xxxx won't be necessary soon

    cov = matrix_to_block_tree(cov_arr, first_eval, first_eval)

    return cov
Esempio n. 9
0
 def __post_init__(self):
     _database = _load_database(self.path)
     _start_params = read_start_params(_database)
     _registry = get_registry(extended=True)
     _, _treedef = tree_flatten(_start_params, registry=_registry)
     self._database = _database
     self._registry = _registry
     self._treedef = _treedef
     self._start_params = _start_params
def test_optimization_with_valid_logging(algorithm, params):
    res = minimize(
        flexible_sos_ls,
        params=params,
        algorithm=algorithm,
        logging="logging.db",
    )
    registry = get_registry(extended=True)
    flat = np.array(tree_just_flatten(res.params, registry=registry))
    aaae(flat, np.zeros(3))
Esempio n. 11
0
def assert_almost_equal(x, y, decimal=6):
    if isinstance(x, np.ndarray):
        x_flat = x
        y_flat = y
    else:
        registry = get_registry(extended=True)
        x_flat = np.array(tree_just_flatten(x, registry=registry))
        y_flat = np.array(tree_just_flatten(x, registry=registry))

    aaae(x_flat, y_flat, decimal=decimal)
Esempio n. 12
0
def _update_bounds_and_flatten(nan_tree, bounds, direction):
    registry = get_registry(extended=True, data_col=direction)
    flat_nan_tree = tree_leaves(nan_tree, registry=registry)

    if bounds is not None:

        registry = get_registry(extended=True)
        flat_bounds = tree_leaves(bounds, registry=registry)

        seperator = 10 * "$"
        params_names = leaf_names(nan_tree,
                                  registry=registry,
                                  separator=seperator)
        bounds_names = leaf_names(bounds,
                                  registry=registry,
                                  separator=seperator)

        flat_nan_dict = dict(zip(params_names, flat_nan_tree))

        invalid = {"names": [], "bounds": []}
        for bounds_name, bounds_leaf in zip(bounds_names, flat_bounds):

            # if a bounds leaf is None we treat it as saying the the corresponding
            # subtree of params has no bounds.
            if bounds_leaf is not None:
                if bounds_name in flat_nan_dict:
                    flat_nan_dict[bounds_name] = bounds_leaf
                else:
                    invalid["names"].append(bounds_name)
                    invalid["bounds"].append(bounds_leaf)

        if invalid["bounds"]:
            msg = (
                f"{direction} could not be matched to params pytree. The bounds "
                f"{invalid['bounds']} with names {invalid['names']} are not part of "
                "params.")
            raise InvalidBoundsError(msg)

        flat_nan_tree = list(flat_nan_dict.values())

    updated = np.array(flat_nan_tree, dtype=np.float64)
    return updated
Esempio n. 13
0
def _check_dimensions_matrix(matrix, outer_tree, inner_tree):
    extended_registry = get_registry(extended=True)
    flat_outer = tree_leaves(outer_tree, registry=extended_registry)
    flat_inner = tree_leaves(inner_tree, registry=extended_registry)

    if matrix.shape[0] != len(flat_outer):
        raise ValueError(
            "First dimension of matrix does not match that of outer_tree.")
    if matrix.shape[1] != len(flat_inner):
        raise ValueError(
            "Second dimension of matrix does not match that of inner_tree.")
Esempio n. 14
0
def test_block_tree_to_hessian_bijection():
    params = {"a": np.arange(4), "b": [{"c": (1, 2), "d": np.array([5, 6])}]}
    f_tree = {"e": np.arange(3), "f": (5, 6, [7, 8, {"g": 1.0}])}

    registry = get_registry(extended=True)
    n_p = len(tree_leaves(params, registry=registry))
    n_f = len(tree_leaves(f_tree, registry=registry))

    expected = np.arange(n_f * n_p**2).reshape(n_f, n_p, n_p)
    block_hessian = hessian_to_block_tree(expected, f_tree, params)
    got = block_tree_to_hessian(block_hessian, f_tree, params)
    assert_array_equal(expected, got)
Esempio n. 15
0
def _get_selection_indices(params, selector):
    """Get index of selected flat params and number of flat params."""
    registry = get_registry(extended=True)
    flat_params, params_treedef = tree_flatten(params, registry=registry)
    n_params = len(flat_params)
    indices = np.arange(n_params, dtype=int)
    params_indices = tree_unflatten(params_treedef, indices, registry=registry)
    selected = selector(params_indices)
    selection_indices = np.array(tree_just_flatten(selected,
                                                   registry=registry),
                                 dtype=int)
    return selection_indices, n_params
Esempio n. 16
0
def test_log_reader_read_multistart_history(example_db):
    reader = OptimizeLogReader(example_db)
    history, local_history, exploration = reader.read_multistart_history(
        direction="minimize")
    assert local_history is None
    assert exploration is None

    registry = get_registry(extended=True)
    assert tree_equal(
        tree_just_flatten(history, registry=registry),
        tree_just_flatten(reader.read_history(), registry=registry),
    )
def test_ci(outcome, method, setup, expected):
    registry = get_registry(extended=True)

    def outcome_flat(data):
        return tree_just_flatten(outcome(data), registry=registry)

    base_outcome = outcome_flat(setup["df"])
    lower, upper = calculate_ci(base_outcome,
                                setup["estimates"],
                                ci_method=method)

    aaae(lower, expected[method + "_ci"][:, 0])
    aaae(upper, expected[method + "_ci"][:, 1])
Esempio n. 18
0
def calculate_free_estimates(estimates, internal_estimates):
    mask = internal_estimates.free_mask
    names = internal_estimates.names

    registry = get_registry(extended=True)
    external_flat = np.array(tree_just_flatten(estimates, registry=registry))

    free_estimates = FreeParams(
        values=external_flat[mask],
        free_mask=mask,
        all_names=names,
        free_names=np.array(names)[mask].tolist(),
    )
    return free_estimates
Esempio n. 19
0
    def outcomes(self):
        """Returns the estimated bootstrap outcomes.

        Returns:
            List[Any]: The boostrap outcomes as a list of pytrees.
        """
        registry = get_registry(extended=True)
        _, treedef = tree_flatten(self._base_outcome, registry=registry)

        outcomes = [
            tree_unflatten(treedef, out, registry=registry)
            for out in self._internal_outcomes
        ]
        return outcomes
Esempio n. 20
0
def tree_params_converter(tree_params):
    registry = get_registry(extended=True)
    _, treedef = tree_flatten(tree_params, registry=registry)

    converter = TreeConverter(
        params_flatten=lambda params: np.array(
            tree_just_flatten(params, registry=registry)
        ),
        params_unflatten=lambda x: tree_unflatten(
            treedef, x.tolist(), registry=registry
        ),
        func_flatten=None,
        derivative_flatten=None,
    )
    return converter
Esempio n. 21
0
    def se(self):
        """Calculate standard errors.

        Returns:
            Any: The standard errors of the estimated parameters as a block-pytree,
                numpy.ndarray, or pandas.DataFrame.
        """
        cov = self._internal_cov
        se = np.sqrt(np.diagonal(cov))

        registry = get_registry(extended=True)
        _, treedef = tree_flatten(self._base_outcome, registry=registry)

        se = tree_unflatten(treedef, se, registry=registry)
        return se
def get_params_groups_and_short_names(params, free_mask, max_group_size=8):
    """Create parameter groups and short names.

    Args:
        params (pytree): parameters as supplied by the user.
        free_mask (np.array): 1d boolean array of same length as params, identifying
            the free parameters.
        max_group_size (int): maximal allowed size of a group. Groups that are larger
            than this will be split.

    Returns:
        groups (list): list of strings and None. For each entry in flat params the key
            of the group to which the parameter belongs. None if the parameter is not
            free.
        names (list): list of the parameter names to be displayed in the dashboard.

    """
    sep = "$$$+++"
    registry = get_registry(extended=True)
    paths = leaf_names(params, registry=registry, separator=sep)
    split_paths = [path.split(sep) for path in paths]

    groups = []
    names = []
    for path_list, is_free in zip(split_paths, free_mask):
        group, name = _get_group_and_name(path_list, is_free)
        groups.append(group)
        names.append(name)

    # if every parameter has its own group, they should all actually be in one group
    if len(pd.unique(groups)) == len(groups):
        groups = ["Parameters"] * len(groups)

    groups = groups
    counts = pd.value_counts(groups)
    to_be_split = counts[counts > max_group_size]
    for group_name, n_occurrences in to_be_split.items():
        split_group_names = _split_long_group(
            group_name=group_name,
            n_occurrences=n_occurrences,
            max_group_size=max_group_size,
        )
        groups = _replace_too_common_groups(groups, group_name,
                                            split_group_names)

    return groups, names
Esempio n. 23
0
def get_msm_optimization_functions(
    simulate_moments,
    empirical_moments,
    weights,
    *,
    simulate_moments_kwargs=None,
    jacobian=None,
    jacobian_kwargs=None,
):
    """Construct criterion functions and their derivatives for msm estimation.

    Args:
        simulate_moments (callable): Function that takes params and potentially other
            keyworrd arguments and returns simulated moments as a pandas Series.
            Alternatively, the function can return a dict with any number of entries
            as long as one of those entries is "simulated_moments".
        empirical_moments (pandas.Series): A pandas series with the empirical
            equivalents of the simulated moments.
        weights (pytree): The weighting matrix as block pytree.
        simulate_moments_kwargs (dict): Additional keyword arguments for
            ``simulate_moments``.
        jacobian (callable or pandas.DataFrame): A function that take ``params`` and
            potentially other keyword arguments and returns the jacobian of
            simulate_moments with respect to the params. Alternatively you can pass
            a pandas.DataFrame with the jacobian at the optimal parameters. This is
            only possible if you pass ``optimize_options=False``.
        jacobian_kwargs (dict): Additional keyword arguments for jacobian.

    Returns:
        dict: Dictionary containing at least the entry "criterion". If enough inputs
            are provided it also contains the entries "derivative" and
            "criterion_and_derivative". All values are functions that take params
            as only argument.

    """
    flat_weights = block_tree_to_matrix(
        weights,
        outer_tree=empirical_moments,
        inner_tree=empirical_moments,
    )

    chol_weights = np.linalg.cholesky(flat_weights)

    registry = get_registry(extended=True)
    flat_emp_mom = tree_just_flatten(empirical_moments, registry=registry)

    _simulate_moments = _partial_kwargs(simulate_moments,
                                        simulate_moments_kwargs)
    _jacobian = _partial_kwargs(jacobian, jacobian_kwargs)

    criterion = functools.partial(
        _msm_criterion,
        simulate_moments=_simulate_moments,
        flat_empirical_moments=flat_emp_mom,
        chol_weights=chol_weights,
        registry=registry,
    )

    out = {"criterion": criterion}

    if _jacobian is not None:
        raise NotImplementedError(
            "Closed form jacobians are not yet supported in estimate_msm")

    return out
Esempio n. 24
0
                moments_cov=moments_cov,
                params_cov=params_cov,
            )
        else:
            raise ValueError(f"Invalid kind: {kind}")

        if return_type == "array":
            out = raw
        elif return_type == "pytree":
            out = matrix_to_block_tree(
                raw,
                outer_tree=self._params,
                inner_tree=self._empirical_moments,
            )
        elif return_type == "dataframe":
            registry = get_registry(extended=True)
            row_names = self._internal_estimates.names
            col_names = leaf_names(self._empirical_moments, registry=registry)
            out = pd.DataFrame(
                data=raw,
                index=row_names,
                columns=col_names,
            )
        else:
            msg = (
                f"Invalid return type: {return_type}. Valid are 'pytree', 'array' "
                "and 'dataframe'")
            raise ValueError(msg)
        return out

    def to_pickle(self, path):
Esempio n. 25
0
def get_bounds(
    params,
    lower_bounds=None,
    upper_bounds=None,
    soft_lower_bounds=None,
    soft_upper_bounds=None,
    registry=None,
    add_soft_bounds=False,
):
    """Consolidate lower/upper bounds with bounds available in params.

    Updates bounds defined in params. If no bounds are available the entry is set to
    -np.inf for the lower bound and np.inf for the upper bound. If a bound is defined in
    params and lower_bounds or upper_bounds, the bound from lower_bounds or upper_bounds
    will be used.

    Args:
        params (pytree): The parameter pytree.
        lower_bounds (pytree): Must be a subtree of params.
        upper_bounds (pytree): Must be a subtree of params.
        registry (dict): pybaum registry.

    Returns:
        np.ndarray: Consolidated and flattened lower_bounds.
        np.ndarray: Consolidated and flattened upper_bounds.

    """
    fast_path = _is_fast_path(
        params=params,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        add_soft_bounds=add_soft_bounds,
    )
    if fast_path:
        return _get_fast_path_bounds(
            params=params,
            lower_bounds=lower_bounds,
            upper_bounds=upper_bounds,
        )

    registry = get_registry(extended=True) if registry is None else registry
    n_params = len(tree_leaves(params, registry=registry))

    # Fill leaves with np.nan. If params contains a data frame with bounds as a column,
    # that column is NOT overwritten (as long as an extended registry is used).
    nan_tree = tree_map(lambda leaf: np.nan, params, registry=registry)

    lower_flat = _update_bounds_and_flatten(nan_tree,
                                            lower_bounds,
                                            direction="lower_bound")
    upper_flat = _update_bounds_and_flatten(nan_tree,
                                            upper_bounds,
                                            direction="upper_bound")

    if len(lower_flat) != n_params:
        raise InvalidBoundsError(
            "lower_bounds do not match dimension of params.")
    if len(upper_flat) != n_params:
        raise InvalidBoundsError(
            "upper_bounds do not match dimension of params.")

    lower_flat[np.isnan(lower_flat)] = -np.inf
    upper_flat[np.isnan(upper_flat)] = np.inf

    if add_soft_bounds:
        lower_flat_soft = _update_bounds_and_flatten(
            nan_tree, soft_lower_bounds, direction="soft_lower_bound")
        lower_flat_soft[np.isnan(lower_flat_soft)] = -np.inf
        lower_flat = np.maximum(lower_flat, lower_flat_soft)

        upper_flat_soft = _update_bounds_and_flatten(
            nan_tree, soft_upper_bounds, direction="soft_upper_bound")
        upper_flat_soft[np.isnan(upper_flat_soft)] = np.inf
        upper_flat = np.minimum(upper_flat, upper_flat_soft)

    if (lower_flat > upper_flat).any():
        msg = "Invalid bounds. Some lower bounds are larger than upper bounds."
        raise InvalidBoundsError(msg)

    return lower_flat, upper_flat
Esempio n. 26
0
 def params_to_internal(self, params):
     registry = get_registry(extended=True)
     return np.array(tree_just_flatten(params, registry=registry))
Esempio n. 27
0
def second_derivative(
    func,
    params,
    *,
    func_kwargs=None,
    method="central_cross",
    n_steps=1,
    base_steps=None,
    scaling_factor=1,
    lower_bounds=None,
    upper_bounds=None,
    step_ratio=2,
    min_steps=None,
    f0=None,
    n_cores=DEFAULT_N_CORES,
    error_handling="continue",
    batch_evaluator="joblib",
    return_func_value=False,
    return_info=False,
    key=None,
):
    """Evaluate second derivative of func at params according to method and step options

    Internally, the function is converted such that it maps from a 1d array to a 1d
    array. Then the Hessians of that function are calculated. The resulting derivative
    estimate is always a :class:`numpy.ndarray`.

    The parameters and the function output can be pandas objects (Series or DataFrames
    with value column). In that case the output of second_derivative is also a pandas
    object and with appropriate index and columns.

    Detailed description of all options that influence the step size as well as an
    explanation of how steps are adjusted to bounds in case of a conflict,
    see :func:`~estimagic.differentiation.generate_steps.generate_steps`.

    Args:
        func (callable): Function of which the derivative is calculated.
        params (numpy.ndarray, pandas.Series or pandas.DataFrame): 1d numpy array or
            :class:`pandas.DataFrame` with parameters at which the derivative is
            calculated. If it is a DataFrame, it can contain the columns "lower_bound"
            and "upper_bound" for bounds. See :ref:`params`.
        func_kwargs (dict): Additional keyword arguments for func, optional.
        method (str): One of {"forward", "backward", "central_average", "central_cross"}
            These correspond to the finite difference approximations defined in
            equations [7, x, 8, 9] in Rideout [2009], where ("backward", x) is not found
            in Rideout [2009] but is the natural extension of equation 7 to the backward
            case. Default "central_cross".
        n_steps (int): Number of steps needed. For central methods, this is
            the number of steps per direction. It is 1 if no Richardson extrapolation
            is used.
        base_steps (numpy.ndarray, optional): 1d array of the same length as params.
            base_steps * scaling_factor is the absolute value of the first (and possibly
            only) step used in the finite differences approximation of the derivative.
            If base_steps * scaling_factor conflicts with bounds, the actual steps will
            be adjusted. If base_steps is not provided, it will be determined according
            to a rule of thumb as long as this does not conflict with min_steps.
        scaling_factor (numpy.ndarray or float): Scaling factor which is applied to
            base_steps. If it is an numpy.ndarray, it needs to be as long as params.
            scaling_factor is useful if you want to increase or decrease the base_step
            relative to the rule-of-thumb or user provided base_step, for example to
            benchmark the effect of the step size. Default 1.
        lower_bounds (numpy.ndarray): 1d array with lower bounds for each parameter. If
            params is a DataFrame and has the columns "lower_bound", this will be taken
            as lower_bounds if now lower_bounds have been provided explicitly.
        upper_bounds (numpy.ndarray): 1d array with upper bounds for each parameter. If
            params is a DataFrame and has the columns "upper_bound", this will be taken
            as upper_bounds if no upper_bounds have been provided explicitly.
        step_ratio (float, numpy.array): Ratio between two consecutive Richardson
            extrapolation steps in the same direction. default 2.0. Has to be larger
            than one. The step ratio is only used if n_steps > 1.
        min_steps (numpy.ndarray): Minimal possible step sizes that can be chosen to
            accommodate bounds. Must have same length as params. By default min_steps is
            equal to base_steps, i.e step size is not decreased beyond what is optimal
            according to the rule of thumb.
        f0 (numpy.ndarray): 1d numpy array with func(x), optional.
        n_cores (int): Number of processes used to parallelize the function
            evaluations. Default 1.
        error_handling (str): One of "continue" (catch errors and continue to calculate
            derivative estimates. In this case, some derivative estimates can be
            missing but no errors are raised), "raise" (catch errors and continue
            to calculate derivative estimates at fist but raise an error if all
            evaluations for one parameter failed) and "raise_strict" (raise an error
            as soon as a function evaluation fails).
        batch_evaluator (str or callable): Name of a pre-implemented batch evaluator
            (currently 'joblib' and 'pathos_mp') or Callable with the same interface
            as the estimagic batch_evaluators.
        return_func_value (bool): If True, return function value at params, stored in
            output dict under "func_value". Default False. This is useful when using
            first_derivative during optimization.
        return_info (bool): If True, return additional information on function
            evaluations and internal derivative candidates, stored in output dict under
            "func_evals" and "derivative_candidates". Derivative candidates are only
            returned if n_steps > 1. Default False.
        key (str): If func returns a dictionary, take the derivative of
            func(params)[key].

    Returns:
        result (dict): Result dictionary with keys:
            - "derivative" (numpy.ndarray, pandas.Series or pandas.DataFrame): The
                estimated second derivative of func at params. The shape of the output
                depends on the dimension of params and func(params):

                - f: R -> R leads to shape (1,), usually called second derivative
                - f: R^m -> R leads to shape (m, m), usually called Hessian
                - f: R -> R^n leads to shape (n,), usually called Hessian
                - f: R^m -> R^n leads to shape (n, m, m), usually called Hessian tensor

            - "func_value" (numpy.ndarray, pandas.Series or pandas.DataFrame): Function
                value at params, returned if return_func_value is True.

            - "func_evals_one_step" (pandas.DataFrame): Function evaluations produced by
                internal derivative method when altering the params vector at one
                dimension, returned if return_info is True.

            - "func_evals_two_step" (pandas.DataFrame): This features is not implemented
                yet and therefore set to None. Once implemented it will contain
                function evaluations produced by internal derivative method when
                altering the params vector at two dimensions, returned if return_info is
                True.

            - "func_evals_cross_step" (pandas.DataFrame): This features is not
                implemented yet and therefore set to None. Once implemented it will
                contain function evaluations produced by internal derivative method when
                altering the params vector at two dimensions in different directions,
                returned if return_info is True.

    """
    lower_bounds, upper_bounds = get_bounds(params, lower_bounds, upper_bounds)

    # handle keyword arguments
    func_kwargs = {} if func_kwargs is None else func_kwargs
    partialed_func = functools.partial(func, **func_kwargs)

    # convert params to numpy
    registry = get_registry(extended=True)
    x, params_treedef = tree_flatten(params, registry=registry)
    x = np.atleast_1d(x).astype(np.float64)

    if np.isnan(x).any():
        raise ValueError("The parameter vector must not contain NaNs.")

    implemented_methods = {
        "forward", "backward", "central_average", "central_cross"
    }
    if method not in implemented_methods:
        raise ValueError(f"Method has to be in {implemented_methods}.")

    # generate the step array
    steps = generate_steps(
        x=x,
        method=("central" if "central" in method else method),
        n_steps=n_steps,
        target="second_derivative",
        base_steps=base_steps,
        scaling_factor=scaling_factor,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        step_ratio=step_ratio,
        min_steps=min_steps,
    )

    # generate parameter vectors at which func has to be evaluated as numpy arrays
    evaluation_points = {"one_step": [], "two_step": [], "cross_step": []}
    for step_arr in steps:
        # single direction steps
        for i, j in product(range(n_steps), range(len(x))):
            if np.isnan(step_arr[i, j]):
                evaluation_points["one_step"].append(np.nan)
            else:
                point = x.copy()
                point[j] += step_arr[i, j]
                evaluation_points["one_step"].append(point)
        # two and cross direction steps
        for i, j, k in product(range(n_steps), range(len(x)), range(len(x))):
            if j > k or np.isnan(step_arr[i, j]) or np.isnan(step_arr[i, k]):
                evaluation_points["two_step"].append(np.nan)
                evaluation_points["cross_step"].append(np.nan)
            else:
                point = x.copy()
                point[j] += step_arr[i, j]
                point[k] += step_arr[i, k]
                evaluation_points["two_step"].append(point)
                if j == k:
                    evaluation_points["cross_step"].append(np.nan)
                else:
                    point = x.copy()
                    point[j] += step_arr[i, j]
                    point[k] -= step_arr[i, k]
                    evaluation_points["cross_step"].append(point)

    # convert the numpy arrays to whatever is needed by func
    evaluation_points = {
        # entries are either a numpy.ndarray or np.nan, we unflatten only
        step_type:
        [_unflatten_if_not_nan(p, params_treedef, registry) for p in points]
        for step_type, points in evaluation_points.items()
    }

    # we always evaluate f0, so we can fall back to one-sided derivatives if
    # two-sided derivatives fail. The extra cost is negligible in most cases.
    if f0 is None:
        evaluation_points["one_step"].append(params)

    # do the function evaluations for one and two step, including error handling
    batch_error_handling = "raise" if error_handling == "raise_strict" else "continue"
    raw_evals = _nan_skipping_batch_evaluator(
        func=partialed_func,
        arguments=list(
            itertools.chain.from_iterable(evaluation_points.values())),
        n_cores=n_cores,
        error_handling=batch_error_handling,
        batch_evaluator=batch_evaluator,
    )

    # extract information on exceptions that occurred during function evaluations
    exc_info = "\n\n".join([val for val in raw_evals if isinstance(val, str)])
    raw_evals = [
        val if not isinstance(val, str) else np.nan for val in raw_evals
    ]

    n_one_step, n_two_step, n_cross_step = map(len, evaluation_points.values())
    raw_evals = {
        "one_step": raw_evals[:n_one_step],
        "two_step": raw_evals[n_one_step:n_two_step + n_one_step],
        "cross_step": raw_evals[n_two_step + n_one_step:],
    }

    # store full function value at params as func_value and a processed version of it
    # that we need to calculate derivatives as f0
    if f0 is None:
        f0 = raw_evals["one_step"][-1]
        raw_evals["one_step"] = raw_evals["one_step"][:-1]
    func_value = f0

    f0_tree = f0[key] if key is not None and isinstance(f0, dict) else f0
    f0 = tree_leaves(f0_tree, registry=registry)
    f0 = np.array(f0, dtype=np.float64)

    # convert the raw evaluations to numpy arrays
    raw_evals = {
        step_type: _convert_evals_to_numpy(evals, key, registry)
        for step_type, evals in raw_evals.items()
    }

    # reshape arrays into dimension (n_steps, dim_f, dim_x) or (n_steps, dim_f, dim_x,
    # dim_x) for finite differences
    evals = {}
    evals["one_step"] = _reshape_one_step_evals(raw_evals["one_step"], n_steps,
                                                len(x))
    evals["two_step"] = _reshape_two_step_evals(raw_evals["two_step"], n_steps,
                                                len(x))
    evals["cross_step"] = _reshape_cross_step_evals(raw_evals["cross_step"],
                                                    n_steps, len(x), f0)

    # apply finite difference formulae
    hess_candidates = {}
    for m in ["forward", "backward", "central_average", "central_cross"]:
        hess_candidates[m] = finite_differences.hessian(evals, steps, f0, m)

    # get the best derivative estimate out of all derivative estimates that could be
    # calculated, given the function evaluations.
    orders = {
        "central_cross":
        ["central_cross", "central_average", "forward", "backward"],
        "central_average":
        ["central_average", "central_cross", "forward", "backward"],
        "forward": ["forward", "backward", "central_average", "central_cross"],
        "backward":
        ["backward", "forward", "central_average", "central_cross"],
    }

    if n_steps == 1:
        hess = _consolidate_one_step_derivatives(hess_candidates,
                                                 orders[method])
        updated_candidates = None
    else:
        raise ValueError(
            "Richardson extrapolation is not implemented for the second derivative yet."
        )

    # raise error if necessary
    if error_handling in ("raise", "raise_strict") and np.isnan(hess).any():
        raise Exception(exc_info)

    # results processing
    derivative = hessian_to_block_tree(hess, f0_tree, params)

    result = {"derivative": derivative}
    if return_func_value:
        result["func_value"] = func_value
    if return_info:
        info = _collect_additional_info(steps,
                                        evals,
                                        updated_candidates,
                                        target="second_derivative")
        result = {**result, **info}
    return result
Esempio n. 28
0
def calculate_estimation_summary(
    summary_data,
    names,
    free_names,
):
    """Create estimation summary using pre-calculated results.

    Args:
        summary_data (dict): Dictionary with entries ['params', 'p_value', 'ci_lower',
        'ci_upper', 'standard_error'].
        names (List[str]): List of parameter names, corresponding to result_object.
        free_names (List[str]): List of parameter names for free parameters.

    Returns:
        pytree: A pytree with the same structure as params. Each leaf in the params
            tree is replaced by a DataFrame containing columns "value",
            "standard_error", "pvalue", "ci_lower" and "ci_upper".  Parameters that do
            not have a standard error (e.g. because they were fixed during estimation)
            contain NaNs in all but the "value" column. The value column is only
            reproduced for convenience.

    """
    # ==================================================================================
    # Flatten summary and construct data frame for flat estimates
    # ==================================================================================

    registry = get_registry(extended=True)
    flat_data = {
        key: tree_just_flatten(val, registry=registry)
        for key, val in summary_data.items()
    }

    df = pd.DataFrame(flat_data, index=names)

    df.loc[free_names, "stars"] = pd.cut(
        df.loc[free_names, "p_value"],
        bins=[-1, 0.01, 0.05, 0.1, 2],
        labels=["***", "**", "*", ""],
    )

    # ==================================================================================
    # Map summary data into params tree structure
    # ==================================================================================

    # create tree with values corresponding to indices of df
    indices = tree_unflatten(summary_data["value"], names, registry=registry)

    estimates_flat = tree_just_flatten(summary_data["value"])
    indices_flat = tree_just_flatten(indices)

    # use index chunks in indices_flat to access the corresponding sub data frame of df,
    # and use the index information stored in estimates_flat to form the correct (multi)
    # index for the resulting leaf.
    summary_flat = []
    for index_leaf, params_leaf in zip(indices_flat, estimates_flat):

        if np.isscalar(params_leaf):
            loc = [index_leaf]
            index = [0]
        elif isinstance(params_leaf, pd.DataFrame) and "value" in params_leaf:
            loc = index_leaf["value"].to_numpy().flatten()
            index = params_leaf.index
        elif isinstance(params_leaf, pd.DataFrame):
            loc = index_leaf.to_numpy().flatten()
            # use product of existing index and columns for regular pd.DataFrame
            index = pd.MultiIndex.from_tuples([
                (*row, col) if isinstance(row, tuple) else (row, col)
                for row in params_leaf.index for col in params_leaf.columns
            ])
        elif isinstance(params_leaf, pd.Series):
            loc = index_leaf.to_numpy().flatten()
            index = params_leaf.index
        else:
            # array case (numpy or jax)
            loc = index_leaf.flatten()
            if params_leaf.ndim == 1:
                index = pd.RangeIndex(stop=params_leaf.size)
            else:
                index = pd.MultiIndex.from_arrays(
                    np.unravel_index(np.arange(params_leaf.size),
                                     params_leaf.shape))

        df_chunk = df.loc[loc]
        df_chunk.index = index

        summary_flat.append(df_chunk)

    summary = tree_unflatten(summary_data["value"], summary_flat)
    return summary
Esempio n. 29
0
import numpy as np
import pandas as pd
import pytest
from estimagic.differentiation.derivatives import first_derivative
from estimagic.optimization.optimize import minimize
from estimagic.parameters.tree_registry import get_registry
from numpy.testing import assert_array_almost_equal as aaae
from pybaum import tree_just_flatten
from pybaum import tree_map

REGISTRY = get_registry(extended=True)


def flexible_sos_scalar(params):
    flat = np.array(tree_just_flatten(params, registry=REGISTRY))
    return flat @ flat


def flexible_sos_scalar_derivative(params):
    return tree_map(lambda x: 2.0 * x, params)


def flexible_sos_ls(params):
    return {"root_contributions": params}


def flexible_sos_ls_derivative(params):
    deriv_dict = first_derivative(
        flexible_sos_ls,
        params,
        key="root_contributions",
def get_tree_converter(
    params,
    lower_bounds,
    upper_bounds,
    func_eval,
    primary_key,
    derivative_eval=None,
    soft_lower_bounds=None,
    soft_upper_bounds=None,
    add_soft_bounds=False,
):
    """Get flatten and unflatten functions for criterion and its derivative.

    The function creates a converter with methods to convert parameters, derivatives
    and the output of the criterion function between the user provided pytree structure
    and flat representations.

    The main motivation for bundling all of this together (as opposed to handling
    parameters, derivatives and function outputs separately) is that the derivative
    conversion needs to know about the structure of params and the criterion output.

    Args:
        params (pytree): The user provided parameters.
        lower_bounds (pytree): The user provided lower_bounds
        upper_bounds (pytree): The user provided upper bounds
        func_eval (float, dict or pytree): An evaluation of ``func`` at ``params``.
            Used to deterimine how the function output has to be transformed for the
            optimizer.
        primary_key (str): One of "value", "contributions" and "root_contributions".
            Used to determine how the function and derivative output has to be
            transformed for the optimzer.
        derivative_eval (dict, pytree or None): Evaluation of the derivative of
            func at params. Used for consistency checks.
        soft_lower_bounds (pytree): As lower_bounds
        soft_upper_bounds (pytree): As upper_bounds
        add_soft_bounds (bool): Whether soft bounds should be added to the flat_params

    Returns:
        TreeConverter: NamedTuple with flatten and unflatten methods.
        FlatParams: NamedTuple of 1d arrays with flattened bounds and param names.

    """
    _registry = get_registry(extended=True)
    _params_vec, _params_treedef = tree_flatten(params, registry=_registry)
    _params_vec = np.array(_params_vec).astype(float)
    _lower, _upper = get_bounds(
        params=params,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        registry=_registry,
    )

    if add_soft_bounds:
        _soft_lower, _soft_upper = get_bounds(
            params=params,
            lower_bounds=lower_bounds,
            upper_bounds=upper_bounds,
            registry=_registry,
            soft_lower_bounds=soft_lower_bounds,
            soft_upper_bounds=soft_upper_bounds,
            add_soft_bounds=add_soft_bounds,
        )
    else:
        _soft_lower, _soft_upper = None, None

    _param_names = leaf_names(params, registry=_registry)

    flat_params = FlatParams(
        values=_params_vec,
        lower_bounds=_lower,
        upper_bounds=_upper,
        names=_param_names,
        soft_lower_bounds=_soft_lower,
        soft_upper_bounds=_soft_upper,
    )

    _params_flatten = _get_params_flatten(registry=_registry)
    _params_unflatten = _get_params_unflatten(
        registry=_registry, treedef=_params_treedef
    )
    _func_flatten = _get_func_flatten(
        registry=_registry,
        func_eval=func_eval,
        primary_key=primary_key,
    )
    _derivative_flatten = _get_derivative_flatten(
        registry=_registry,
        primary_key=primary_key,
        params=params,
        func_eval=func_eval,
        derivative_eval=derivative_eval,
    )

    converter = TreeConverter(
        params_flatten=_params_flatten,
        params_unflatten=_params_unflatten,
        func_flatten=_func_flatten,
        derivative_flatten=_derivative_flatten,
    )

    return converter, flat_params