def summary(self, ci_method="percentile", ci_level=0.95): """Create a summary of bootstrap results. Args: ci_method (str): Method of choice for confidence interval computation. The default is "percentile". ci_level (float): Confidence level for the calculation of confidence intervals. The default is 0.95. Returns: pd.DataFrame: The estimation summary as a DataFrame containing information on the mean, standard errors, as well as the confidence intervals. Soon this will be a pytree. """ registry = get_registry(extended=True) names = leaf_names(self.base_outcome, registry=registry) summary_data = _calulcate_summary_data_bootstrap(self, ci_method=ci_method, ci_level=ci_level) summary = calculate_estimation_summary( summary_data=summary_data, names=names, free_names=names, ) return summary
def cov(self, return_type="pytree"): """Calculate the variance-covariance matrix of the estimated parameters. Args: return_type (str): One of "pytree", "array" or "dataframe". Default pytree. If "array", a 2d numpy array with the covariance is returned. If "dataframe", a pandas DataFrame with parameter names in the index and columns are returned. The default is "pytree". Returns: Any: The covariance matrix of the estimated parameters as a block-pytree, numpy.ndarray, or pandas.DataFrame. """ cov = self._internal_cov if return_type == "dataframe": registry = get_registry(extended=True) names = np.array(leaf_names(self._base_outcome, registry=registry)) cov = pd.DataFrame(cov, columns=names, index=names) elif return_type == "pytree": cov = matrix_to_block_tree(cov, self._base_outcome, self._base_outcome) elif return_type != "array": raise ValueError( "return_type must be one of pytree, array, or dataframe, " f"not {return_type}.") return cov
def _update_bounds_and_flatten(nan_tree, bounds, direction): registry = get_registry(extended=True, data_col=direction) flat_nan_tree = tree_leaves(nan_tree, registry=registry) if bounds is not None: registry = get_registry(extended=True) flat_bounds = tree_leaves(bounds, registry=registry) seperator = 10 * "$" params_names = leaf_names(nan_tree, registry=registry, separator=seperator) bounds_names = leaf_names(bounds, registry=registry, separator=seperator) flat_nan_dict = dict(zip(params_names, flat_nan_tree)) invalid = {"names": [], "bounds": []} for bounds_name, bounds_leaf in zip(bounds_names, flat_bounds): # if a bounds leaf is None we treat it as saying the the corresponding # subtree of params has no bounds. if bounds_leaf is not None: if bounds_name in flat_nan_dict: flat_nan_dict[bounds_name] = bounds_leaf else: invalid["names"].append(bounds_name) invalid["bounds"].append(bounds_leaf) if invalid["bounds"]: msg = ( f"{direction} could not be matched to params pytree. The bounds " f"{invalid['bounds']} with names {invalid['names']} are not part of " "params.") raise InvalidBoundsError(msg) flat_nan_tree = list(flat_nan_dict.values()) updated = np.array(flat_nan_tree, dtype=np.float64) return updated
def get_params_groups_and_short_names(params, free_mask, max_group_size=8): """Create parameter groups and short names. Args: params (pytree): parameters as supplied by the user. free_mask (np.array): 1d boolean array of same length as params, identifying the free parameters. max_group_size (int): maximal allowed size of a group. Groups that are larger than this will be split. Returns: groups (list): list of strings and None. For each entry in flat params the key of the group to which the parameter belongs. None if the parameter is not free. names (list): list of the parameter names to be displayed in the dashboard. """ sep = "$$$+++" registry = get_registry(extended=True) paths = leaf_names(params, registry=registry, separator=sep) split_paths = [path.split(sep) for path in paths] groups = [] names = [] for path_list, is_free in zip(split_paths, free_mask): group, name = _get_group_and_name(path_list, is_free) groups.append(group) names.append(name) # if every parameter has its own group, they should all actually be in one group if len(pd.unique(groups)) == len(groups): groups = ["Parameters"] * len(groups) groups = groups counts = pd.value_counts(groups) to_be_split = counts[counts > max_group_size] for group_name, n_occurrences in to_be_split.items(): split_group_names = _split_long_group( group_name=group_name, n_occurrences=n_occurrences, max_group_size=max_group_size, ) groups = _replace_too_common_groups(groups, group_name, split_group_names) return groups, names
) else: raise ValueError(f"Invalid kind: {kind}") if return_type == "array": out = raw elif return_type == "pytree": out = matrix_to_block_tree( raw, outer_tree=self._params, inner_tree=self._empirical_moments, ) elif return_type == "dataframe": registry = get_registry(extended=True) row_names = self._internal_estimates.names col_names = leaf_names(self._empirical_moments, registry=registry) out = pd.DataFrame( data=raw, index=row_names, columns=col_names, ) else: msg = ( f"Invalid return type: {return_type}. Valid are 'pytree', 'array' " "and 'dataframe'") raise ValueError(msg) return out def to_pickle(self, path): """Save the MomentsResult object to pickle.
def get_tree_converter( params, lower_bounds, upper_bounds, func_eval, primary_key, derivative_eval=None, soft_lower_bounds=None, soft_upper_bounds=None, add_soft_bounds=False, ): """Get flatten and unflatten functions for criterion and its derivative. The function creates a converter with methods to convert parameters, derivatives and the output of the criterion function between the user provided pytree structure and flat representations. The main motivation for bundling all of this together (as opposed to handling parameters, derivatives and function outputs separately) is that the derivative conversion needs to know about the structure of params and the criterion output. Args: params (pytree): The user provided parameters. lower_bounds (pytree): The user provided lower_bounds upper_bounds (pytree): The user provided upper bounds func_eval (float, dict or pytree): An evaluation of ``func`` at ``params``. Used to deterimine how the function output has to be transformed for the optimizer. primary_key (str): One of "value", "contributions" and "root_contributions". Used to determine how the function and derivative output has to be transformed for the optimzer. derivative_eval (dict, pytree or None): Evaluation of the derivative of func at params. Used for consistency checks. soft_lower_bounds (pytree): As lower_bounds soft_upper_bounds (pytree): As upper_bounds add_soft_bounds (bool): Whether soft bounds should be added to the flat_params Returns: TreeConverter: NamedTuple with flatten and unflatten methods. FlatParams: NamedTuple of 1d arrays with flattened bounds and param names. """ _registry = get_registry(extended=True) _params_vec, _params_treedef = tree_flatten(params, registry=_registry) _params_vec = np.array(_params_vec).astype(float) _lower, _upper = get_bounds( params=params, lower_bounds=lower_bounds, upper_bounds=upper_bounds, registry=_registry, ) if add_soft_bounds: _soft_lower, _soft_upper = get_bounds( params=params, lower_bounds=lower_bounds, upper_bounds=upper_bounds, registry=_registry, soft_lower_bounds=soft_lower_bounds, soft_upper_bounds=soft_upper_bounds, add_soft_bounds=add_soft_bounds, ) else: _soft_lower, _soft_upper = None, None _param_names = leaf_names(params, registry=_registry) flat_params = FlatParams( values=_params_vec, lower_bounds=_lower, upper_bounds=_upper, names=_param_names, soft_lower_bounds=_soft_lower, soft_upper_bounds=_soft_upper, ) _params_flatten = _get_params_flatten(registry=_registry) _params_unflatten = _get_params_unflatten( registry=_registry, treedef=_params_treedef ) _func_flatten = _get_func_flatten( registry=_registry, func_eval=func_eval, primary_key=primary_key, ) _derivative_flatten = _get_derivative_flatten( registry=_registry, primary_key=primary_key, params=params, func_eval=func_eval, derivative_eval=derivative_eval, ) converter = TreeConverter( params_flatten=_params_flatten, params_unflatten=_params_unflatten, func_flatten=_func_flatten, derivative_flatten=_derivative_flatten, ) return converter, flat_params
def dashboard_app( doc, session_data, updating_options, ): """Create plots showing the development of the criterion and parameters. Args: doc (bokeh.Document): Argument required by bokeh. session_data (dict): Infos to be passed between and within apps. Keys of this app's entry are: - last_retrieved (int): last iteration currently in the ColumnDataSource. - database_path (str or pathlib.Path) - callbacks (dict): dictionary to be populated with callbacks. updating_options (dict): Specification how to update the plotting data. It contains rollover, update_frequency, update_chunk, jump and stride. """ # style the Document template_folder = Path(__file__).resolve().parent # conversion to string from pathlib Path is necessary for FileSystemLoader env = Environment(loader=FileSystemLoader(str(template_folder))) doc.template = env.get_template("index.html") # process inputs database = load_database(path=session_data["database_path"]) start_point = _calculate_start_point(database, updating_options) session_data["last_retrieved"] = start_point # build start_params DataFrame registry = get_registry(extended=True) start_params_tree = read_start_params(path_or_database=database) internal_params = tree_just_flatten(tree=start_params_tree, registry=registry) full_names = leaf_names(start_params_tree, registry=registry) optimization_problem = read_last_rows( database=database, table_name="optimization_problem", n_rows=1, return_type="dict_of_lists", ) free_mask = optimization_problem["free_mask"][0] params_groups, short_names = get_params_groups_and_short_names( params=start_params_tree, free_mask=free_mask ) start_params = pd.DataFrame( { "full_name": full_names, "name": short_names, "group": params_groups, "value": internal_params, } ) start_params["id"] = _create_id_column(start_params) group_to_param_ids = _map_group_to_other_column(start_params, "id") group_to_param_names = _map_group_to_other_column(start_params, "name") criterion_history, params_history = _create_cds_for_dashboard(group_to_param_ids) # create elements title_text = """<h1 style="font-size:30px;">estimagic Dashboard</h1>""" title = Row( children=[ Div( text=title_text, sizing_mode="scale_width", ) ], name="title", margin=(5, 5, -20, 5), ) plots = _create_initial_plots( criterion_history=criterion_history, params_history=params_history, group_to_param_ids=group_to_param_ids, group_to_param_names=group_to_param_names, ) restart_button = _create_restart_button( doc=doc, database=database, session_data=session_data, start_params=start_params, updating_options=updating_options, ) button_row = Row( children=[restart_button], name="button_row", ) # add elements to bokeh Document grid = Column(children=[title, button_row, *plots], sizing_mode="stretch_width") doc.add_root(grid) # start the convergence plot immediately restart_button.active = True
def test_leaf_names_partially_numeric_df(other_df): registry = get_registry(extended=True) names = leaf_names(other_df, registry=registry) assert names == [ "alpha_b", "alpha_c", "beta_b", "beta_c", "gamma_b", "gamma_c" ]
def test_leaf_names_df_with_value_column(value_df): registry = get_registry(extended=True) names = leaf_names(value_df, registry=registry) assert names == ["alpha", "beta", "gamma"]
def params_plot( result, selector=None, max_evaluations=None, template=PLOTLY_TEMPLATE, show_exploration=False, ): """Plot the params history of an optimization. Args: result (Union[OptimizeResult, pathlib.Path, str]): An optimization results with collected history. If dict, then the key is used as the name in a legend. selector (callable): A callable that takes params and returns a subset of params. If provided, only the selected subset of params is plotted. max_evaluations (int): Clip the criterion history after that many entries. template (str): The template for the figure. Default is "plotly_white". show_exploration (bool): If True, exploration samples of a multistart optimization are visualized. Default is False. Returns: plotly.graph_objs._figure.Figure: The figure. """ # ================================================================================== # Process inputs # ================================================================================== if isinstance(result, OptimizeResult): data = _extract_plotting_data_from_results_object( result, stack_multistart=True, show_exploration=show_exploration, plot_name="params_plot", ) start_params = result.start_params elif isinstance(result, (str, Path)): data = _extract_plotting_data_from_database( result, stack_multistart=True, show_exploration=show_exploration, ) start_params = data["start_params"] else: raise ValueError("result must be an OptimizeResult or a path to a log file.") if data["stacked_local_histories"] is not None: history = data["stacked_local_histories"]["params"] else: history = data["history"]["params"] # ================================================================================== # Create figure # ================================================================================== fig = go.Figure() registry = get_registry(extended=True) hist_arr = np.array([tree_just_flatten(p, registry=registry) for p in history]).T names = leaf_names(start_params, registry=registry) if selector is not None: flat, treedef = tree_flatten(start_params, registry=registry) helper = tree_unflatten(treedef, list(range(len(flat))), registry=registry) selected = np.array(tree_just_flatten(selector(helper), registry=registry)) names = [names[i] for i in selected] hist_arr = hist_arr[selected] for name, data in zip(names, hist_arr): if max_evaluations is not None and len(data) > max_evaluations: data = data[:max_evaluations] trace = go.Scatter( x=np.arange(len(data)), y=data, mode="lines", name=name, ) fig.add_trace(trace) fig.update_layout( template=template, xaxis_title_text="No. of criterion evaluations", yaxis_title_text="Parameter value", legend={"yanchor": "top", "xanchor": "right", "y": 0.95, "x": 0.95}, ) return fig
def test_calculate_estimation_summary(): # input data summary_data = { "value": { "a": pd.Series([0], index=["i"]), "b": pd.DataFrame({ "c1": [1], "c2": [2] }), }, "standard_error": { "a": pd.Series([0.1], index=["i"]), "b": pd.DataFrame({ "c1": [0.2], "c2": [0.3] }), }, "ci_lower": { "a": pd.Series([-0.2], index=["i"]), "b": pd.DataFrame({ "c1": [-0.4], "c2": [-0.6] }), }, "ci_upper": { "a": pd.Series([0.2], index=["i"]), "b": pd.DataFrame({ "c1": [0.4], "c2": [0.6] }), }, "p_value": { "a": pd.Series([0.001], index=["i"]), "b": pd.DataFrame({ "c1": [0.2], "c2": [0.07] }), }, "free": np.array([True, True, True]), } registry = get_registry(extended=True) names = leaf_names(summary_data["value"], registry=registry) free_names = names # function call summary = calculate_estimation_summary(summary_data, names, free_names) # expectations expectation = { "a": pd.DataFrame( { "value": 0, "standard_error": 0.1, "ci_lower": -0.2, "ci_upper": 0.2, "p_value": 0.001, "free": True, "stars": "***", }, index=["i"], ), "b": pd.DataFrame( { "value": [1, 2], "standard_error": [0.2, 0.3], "ci_lower": [-0.4, -0.6], "ci_upper": [0.4, 0.6], "p_value": [0.2, 0.7], "free": [True, True], "stars": ["", "*"], }, index=pd.MultiIndex.from_tuples([(0, "c1"), (0, "c2")]), ), } tree_equal(summary, expectation)