Esempio n. 1
0
    def ci(self, ci_method="percentile", ci_level=0.95):
        """Calculate confidence intervals.

        Args:
            ci_method (str): Method of choice for computing confidence intervals.
                The default is "percentile".
            ci_level (float): Confidence level for the calculation of confidence
                intervals. The default is 0.95.

        Returns:
            Any: Pytree with the same structure as base_outcome containing lower
                bounds of confidence intervals.
            Any: Pytree with the same structure as base_outcome containing upper
                bounds of confidence intervals.
        """
        registry = get_registry(extended=True)
        base_outcome_flat, treedef = tree_flatten(self._base_outcome,
                                                  registry=registry)

        lower_flat, upper_flat = calculate_ci(base_outcome_flat,
                                              self._internal_outcomes,
                                              ci_method, ci_level)

        lower = tree_unflatten(treedef, lower_flat, registry=registry)
        upper = tree_unflatten(treedef, upper_flat, registry=registry)
        return lower, upper
Esempio n. 2
0
def matrix_to_block_tree(matrix, outer_tree, inner_tree):
    """Convert a matrix (2-dimensional array) to block-tree.

    A block tree most often arises when one applies an operation to a function that maps
    between two trees. For certain functions this results in a 2-dimensional data array.
    Two main examples are the Jacobian of the function f : inner_tree -> outer_tree,
    which results in a block tree structure, or the covariance matrix of a tree, in
    which case outer_tree = inner_tree.

    Args:
        matrix (numpy.ndarray): 2d representation of the block tree. Has shape (m, n).
        outer_tree: A pytree. If flattened to scalars has length m.
        inner_tree: A pytree. If flattened to scalars has length n.

    Returns:
        block_tree: A (block) pytree.

    """
    _check_dimensions_matrix(matrix, outer_tree, inner_tree)

    flat_outer, treedef_outer = tree_flatten(outer_tree)
    flat_inner, treedef_inner = tree_flatten(inner_tree)

    flat_outer_np = [
        _convert_to_numpy(leaf, only_pandas=True) for leaf in flat_outer
    ]
    flat_inner_np = [
        _convert_to_numpy(leaf, only_pandas=True) for leaf in flat_inner
    ]

    shapes_outer = [np.shape(a) for a in flat_outer_np]
    shapes_inner = [np.shape(a) for a in flat_inner_np]

    block_bounds_outer = np.cumsum(
        [int(np.product(s)) for s in shapes_outer[:-1]])
    block_bounds_inner = np.cumsum(
        [int(np.product(s)) for s in shapes_inner[:-1]])

    blocks = []
    for leaf_outer, s1, submat in zip(
            flat_outer, shapes_outer,
            np.split(matrix, block_bounds_outer, axis=0)):
        row = []
        for leaf_inner, s2, block_values in zip(
                flat_inner, shapes_inner,
                np.split(submat, block_bounds_inner, axis=1)):
            raw_block = block_values.reshape((*s1, *s2))
            block = _convert_raw_block_to_pandas(raw_block, leaf_outer,
                                                 leaf_inner)
            row.append(block)

        blocks.append(row)

    block_tree = tree_unflatten(
        treedef_outer, [tree_unflatten(treedef_inner, row) for row in blocks])

    return block_tree
Esempio n. 3
0
def hessian_to_block_tree(hessian, f_tree, params_tree):
    """Convert a Hessian array to block-tree format.

    Remark: In comparison to Jax we need this formatting function because we calculate
    the second derivative using second-order finite differences. Jax computes the
    second derivative by applying their jacobian function twice, which produces the
    desired block-tree shape of the Hessian automatically. If we apply our first
    derivative function twice we get the same block-tree shape.

    Args:
        hessian (np.ndarray): The Hessian, 2- or 3-dimensional array representation of
            the resulting block-tree.
        f_tree (pytree): The function evaluated at params_tree.
        params_tree (pytree): The params_tree.

    Returns:
        hessian_block_tree (pytree): The pytree

    """
    _check_dimensions_hessian(hessian, f_tree, params_tree)

    if hessian.ndim == 2:
        hessian = hessian[np.newaxis]

    flat_f, treedef_f = tree_flatten(f_tree)
    flat_p, treedef_p = tree_flatten(params_tree)

    flat_f_np = [_convert_to_numpy(leaf, only_pandas=True) for leaf in flat_f]
    flat_p_np = [_convert_to_numpy(leaf, only_pandas=True) for leaf in flat_p]

    shapes_f = [np.shape(a) for a in flat_f_np]
    shapes_p = [np.shape(a) for a in flat_p_np]

    block_bounds_f = np.cumsum([int(np.product(s)) for s in shapes_f[:-1]])
    block_bounds_p = np.cumsum([int(np.product(s)) for s in shapes_p[:-1]])

    sub_block_trees = []
    for s0, subarr in zip(shapes_f, np.split(hessian, block_bounds_f, axis=0)):
        blocks = []
        for leaf_outer, s1, submat in zip(
                flat_p, shapes_p, np.split(subarr, block_bounds_p, axis=1)):
            row = []
            for leaf_inner, s2, block_values in zip(
                    flat_p, shapes_p, np.split(submat, block_bounds_p,
                                               axis=2)):
                raw_block = block_values.reshape(((*s0, *s1, *s2)))
                raw_block = np.squeeze(raw_block)
                block = _convert_raw_block_to_pandas(raw_block, leaf_outer,
                                                     leaf_inner)
                row.append(block)
            blocks.append(row)
        block_tree = tree_unflatten(
            treedef_p, [tree_unflatten(treedef_p, row) for row in blocks])
        sub_block_trees.append(block_tree)

    hessian_block_tree = tree_unflatten(treedef_f, sub_block_trees)
    return hessian_block_tree
Esempio n. 4
0
def _read_optimization_history(database, params_treedef, registry):
    """Read a histories out values, parameters and other information."""

    raw_res, _ = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=0,
        return_type="list_of_dicts",
    )

    history = {"params": [], "criterion": [], "runtime": []}
    for data in raw_res:
        if data["value"] is not None:
            params = tree_unflatten(params_treedef,
                                    data["params"],
                                    registry=registry)
            history["params"].append(params)
            history["criterion"].append(data["value"])
            history["runtime"].append(data["timestamp"])

    times = np.array(history["runtime"])
    times -= times[0]
    history["runtime"] = times

    return history
Esempio n. 5
0
def _read_optimization_iteration(database, iteration, params_treedef,
                                 registry):
    """Get information about an optimization iteration."""
    if iteration >= 0:
        rowid = iteration + 1
    else:
        last_iteration = read_last_rows(
            database=database,
            table_name="optimization_iterations",
            n_rows=1,
            return_type="list_of_dicts",
        )
        highest_rowid = last_iteration[0]["rowid"]

        # iteration is negative here!
        rowid = highest_rowid + iteration + 1

    data = read_specific_row(
        database,
        table_name="optimization_iterations",
        rowid=rowid,
        return_type="list_of_dicts",
    )

    if len(data) == 0:
        raise IndexError(f"Invalid iteration requested: {iteration}")
    else:
        data = data[0]

    params = tree_unflatten(params_treedef, data["params"], registry=registry)
    data["params"] = params

    return data
Esempio n. 6
0
def transform_free_values_to_params_tree(values, free_params, params):
    """Fill non-free values and project to params tree structure."""
    mask = free_params.free_mask
    flat = np.full(len(mask), np.nan)
    flat[np.ix_(mask)] = values
    registry = get_registry(extended=True)
    pytree = tree_unflatten(params, flat, registry=registry)
    return pytree
Esempio n. 7
0
def _get_selection_indices(params, selector):
    """Get index of selected flat params and number of flat params."""
    registry = get_registry(extended=True)
    flat_params, params_treedef = tree_flatten(params, registry=registry)
    n_params = len(flat_params)
    indices = np.arange(n_params, dtype=int)
    params_indices = tree_unflatten(params_treedef, indices, registry=registry)
    selected = selector(params_indices)
    selection_indices = np.array(tree_just_flatten(selected,
                                                   registry=registry),
                                 dtype=int)
    return selection_indices, n_params
Esempio n. 8
0
    def outcomes(self):
        """Returns the estimated bootstrap outcomes.

        Returns:
            List[Any]: The boostrap outcomes as a list of pytrees.
        """
        registry = get_registry(extended=True)
        _, treedef = tree_flatten(self._base_outcome, registry=registry)

        outcomes = [
            tree_unflatten(treedef, out, registry=registry)
            for out in self._internal_outcomes
        ]
        return outcomes
Esempio n. 9
0
def tree_params_converter(tree_params):
    registry = get_registry(extended=True)
    _, treedef = tree_flatten(tree_params, registry=registry)

    converter = TreeConverter(
        params_flatten=lambda params: np.array(
            tree_just_flatten(params, registry=registry)
        ),
        params_unflatten=lambda x: tree_unflatten(
            treedef, x.tolist(), registry=registry
        ),
        func_flatten=None,
        derivative_flatten=None,
    )
    return converter
Esempio n. 10
0
    def se(self):
        """Calculate standard errors.

        Returns:
            Any: The standard errors of the estimated parameters as a block-pytree,
                numpy.ndarray, or pandas.DataFrame.
        """
        cov = self._internal_cov
        se = np.sqrt(np.diagonal(cov))

        registry = get_registry(extended=True)
        _, treedef = tree_flatten(self._base_outcome, registry=registry)

        se = tree_unflatten(treedef, se, registry=registry)
        return se
def params_plot(
    result,
    selector=None,
    max_evaluations=None,
    template=PLOTLY_TEMPLATE,
    show_exploration=False,
):
    """Plot the params history of an optimization.

    Args:
        result (Union[OptimizeResult, pathlib.Path, str]): An optimization results with
            collected history. If dict, then the key is used as the name in a legend.
        selector (callable): A callable that takes params and returns a subset
            of params. If provided, only the selected subset of params is plotted.
        max_evaluations (int): Clip the criterion history after that many entries.
        template (str): The template for the figure. Default is "plotly_white".
        show_exploration (bool): If True, exploration samples of a multistart
            optimization are visualized. Default is False.

    Returns:
        plotly.graph_objs._figure.Figure: The figure.

    """
    # ==================================================================================
    # Process inputs
    # ==================================================================================

    if isinstance(result, OptimizeResult):
        data = _extract_plotting_data_from_results_object(
            result,
            stack_multistart=True,
            show_exploration=show_exploration,
            plot_name="params_plot",
        )
        start_params = result.start_params
    elif isinstance(result, (str, Path)):
        data = _extract_plotting_data_from_database(
            result,
            stack_multistart=True,
            show_exploration=show_exploration,
        )
        start_params = data["start_params"]
    else:
        raise ValueError("result must be an OptimizeResult or a path to a log file.")

    if data["stacked_local_histories"] is not None:
        history = data["stacked_local_histories"]["params"]
    else:
        history = data["history"]["params"]

    # ==================================================================================
    # Create figure
    # ==================================================================================

    fig = go.Figure()

    registry = get_registry(extended=True)

    hist_arr = np.array([tree_just_flatten(p, registry=registry) for p in history]).T
    names = leaf_names(start_params, registry=registry)

    if selector is not None:
        flat, treedef = tree_flatten(start_params, registry=registry)
        helper = tree_unflatten(treedef, list(range(len(flat))), registry=registry)
        selected = np.array(tree_just_flatten(selector(helper), registry=registry))
        names = [names[i] for i in selected]
        hist_arr = hist_arr[selected]

    for name, data in zip(names, hist_arr):
        if max_evaluations is not None and len(data) > max_evaluations:
            data = data[:max_evaluations]

        trace = go.Scatter(
            x=np.arange(len(data)),
            y=data,
            mode="lines",
            name=name,
        )
        fig.add_trace(trace)

    fig.update_layout(
        template=template,
        xaxis_title_text="No. of criterion evaluations",
        yaxis_title_text="Parameter value",
        legend={"yanchor": "top", "xanchor": "right", "y": 0.95, "x": 0.95},
    )

    return fig
Esempio n. 12
0
def calculate_estimation_summary(
    summary_data,
    names,
    free_names,
):
    """Create estimation summary using pre-calculated results.

    Args:
        summary_data (dict): Dictionary with entries ['params', 'p_value', 'ci_lower',
        'ci_upper', 'standard_error'].
        names (List[str]): List of parameter names, corresponding to result_object.
        free_names (List[str]): List of parameter names for free parameters.

    Returns:
        pytree: A pytree with the same structure as params. Each leaf in the params
            tree is replaced by a DataFrame containing columns "value",
            "standard_error", "pvalue", "ci_lower" and "ci_upper".  Parameters that do
            not have a standard error (e.g. because they were fixed during estimation)
            contain NaNs in all but the "value" column. The value column is only
            reproduced for convenience.

    """
    # ==================================================================================
    # Flatten summary and construct data frame for flat estimates
    # ==================================================================================

    registry = get_registry(extended=True)
    flat_data = {
        key: tree_just_flatten(val, registry=registry)
        for key, val in summary_data.items()
    }

    df = pd.DataFrame(flat_data, index=names)

    df.loc[free_names, "stars"] = pd.cut(
        df.loc[free_names, "p_value"],
        bins=[-1, 0.01, 0.05, 0.1, 2],
        labels=["***", "**", "*", ""],
    )

    # ==================================================================================
    # Map summary data into params tree structure
    # ==================================================================================

    # create tree with values corresponding to indices of df
    indices = tree_unflatten(summary_data["value"], names, registry=registry)

    estimates_flat = tree_just_flatten(summary_data["value"])
    indices_flat = tree_just_flatten(indices)

    # use index chunks in indices_flat to access the corresponding sub data frame of df,
    # and use the index information stored in estimates_flat to form the correct (multi)
    # index for the resulting leaf.
    summary_flat = []
    for index_leaf, params_leaf in zip(indices_flat, estimates_flat):

        if np.isscalar(params_leaf):
            loc = [index_leaf]
            index = [0]
        elif isinstance(params_leaf, pd.DataFrame) and "value" in params_leaf:
            loc = index_leaf["value"].to_numpy().flatten()
            index = params_leaf.index
        elif isinstance(params_leaf, pd.DataFrame):
            loc = index_leaf.to_numpy().flatten()
            # use product of existing index and columns for regular pd.DataFrame
            index = pd.MultiIndex.from_tuples([
                (*row, col) if isinstance(row, tuple) else (row, col)
                for row in params_leaf.index for col in params_leaf.columns
            ])
        elif isinstance(params_leaf, pd.Series):
            loc = index_leaf.to_numpy().flatten()
            index = params_leaf.index
        else:
            # array case (numpy or jax)
            loc = index_leaf.flatten()
            if params_leaf.ndim == 1:
                index = pd.RangeIndex(stop=params_leaf.size)
            else:
                index = pd.MultiIndex.from_arrays(
                    np.unravel_index(np.arange(params_leaf.size),
                                     params_leaf.shape))

        df_chunk = df.loc[loc]
        df_chunk.index = index

        summary_flat.append(df_chunk)

    summary = tree_unflatten(summary_data["value"], summary_flat)
    return summary
 def params_unflatten(x):
     return tree_unflatten(treedef=treedef, leaves=list(x), registry=registry)
Esempio n. 14
0
def _read_multistart_optimization_history(database, params_treedef, registry,
                                          direction):
    """Read multistart histories out values, parameters and other information.

    Returns:
        tuple:
        - dict: history that led to lowest criterion
        - dict: all other histories
        - dict: exploration phase

    """
    # ==================================================================================
    # Process raw data
    # ==================================================================================
    steps = read_steps_table(database)

    raw_res, _ = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=0,
        return_type="list_of_dicts",
    )

    history = {"params": [], "criterion": [], "runtime": [], "step": []}
    for data in raw_res:
        if data["value"] is not None:
            params = tree_unflatten(params_treedef,
                                    data["params"],
                                    registry=registry)
            history["params"].append(params)
            history["criterion"].append(data["value"])
            history["runtime"].append(data["timestamp"])
            history["step"].append(data["step"])

    times = np.array(history["runtime"])
    times -= times[0]
    history["runtime"] = times

    # ==================================================================================
    # Format data as data frames
    # ==================================================================================
    df = pd.DataFrame(history)
    df = df.merge(steps[["rowid", "type"]], left_on="step", right_on="rowid")
    df = df.drop(columns="rowid")

    # ==================================================================================
    # Extract data from df
    # ==================================================================================
    exploration = df.query("type == 'exploration'").drop(
        columns=["step", "type"])

    histories = df.query("type == 'optimization'")
    histories = histories.drop(columns="type")
    histories = histories.set_index("step", append=True)

    # ==================================================================================
    # The best history is given by the history that attains the global minimum or
    # maximum. All other histories are defined as local histories.

    if direction == "minimize":
        best_idx = (
            histories["criterion"].groupby(level="step").min().idxmin()
        )  # noqa: F841
        exploration = exploration.sort_values(by="criterion", ascending=True)
    elif direction == "maximize":
        best_idx = (
            histories["criterion"].groupby(level="step").max().idxmax()
        )  # noqa: F841
        exploration = exploration.sort_values(by="criterion", ascending=False)
    else:
        raise ValueError()

    history = histories.xs(best_idx, level="step").to_dict(orient="list")

    exploration = None if len(exploration) == 0 else exploration
    if exploration is not None:
        exploration = exploration.to_dict(orient="list")

    local_histories = []
    for idx in histories.index.get_level_values("step").unique().difference(
        [best_idx]):
        _local_history = histories.xs(idx, level="step").to_dict(orient="list")
        local_histories.append(_local_history)

    local_histories = None if len(local_histories) == 0 else local_histories

    return history, local_histories, exploration
Esempio n. 15
0
def test_unflatten_df_with_value_column(value_df):
    registry = get_registry(extended=True)
    _, treedef = tree_flatten(value_df, registry=registry)
    unflat = tree_unflatten(treedef, [10, 11, 12], registry=registry)
    assert unflat.equals(value_df.assign(value=[10, 11, 12]))
Esempio n. 16
0
def test_unflatten_partially_numeric_df(other_df):
    registry = get_registry(extended=True)
    _, treedef = tree_flatten(other_df, registry=registry)
    unflat = tree_unflatten(treedef, [1, 2, 3, 4, 5, 6], registry=registry)
    other_df = other_df.assign(b=[1, 3, 5], c=[2, 4, 6])
    assert_frame_equal(unflat, other_df, check_dtype=False)
Esempio n. 17
0
def _unflatten_if_not_nan(leaves, treedef, registry):
    if isinstance(leaves, np.ndarray):
        out = tree_unflatten(treedef, leaves, registry=registry)
    else:
        out = leaves
    return out