def ci(self, ci_method="percentile", ci_level=0.95): """Calculate confidence intervals. Args: ci_method (str): Method of choice for computing confidence intervals. The default is "percentile". ci_level (float): Confidence level for the calculation of confidence intervals. The default is 0.95. Returns: Any: Pytree with the same structure as base_outcome containing lower bounds of confidence intervals. Any: Pytree with the same structure as base_outcome containing upper bounds of confidence intervals. """ registry = get_registry(extended=True) base_outcome_flat, treedef = tree_flatten(self._base_outcome, registry=registry) lower_flat, upper_flat = calculate_ci(base_outcome_flat, self._internal_outcomes, ci_method, ci_level) lower = tree_unflatten(treedef, lower_flat, registry=registry) upper = tree_unflatten(treedef, upper_flat, registry=registry) return lower, upper
def matrix_to_block_tree(matrix, outer_tree, inner_tree): """Convert a matrix (2-dimensional array) to block-tree. A block tree most often arises when one applies an operation to a function that maps between two trees. For certain functions this results in a 2-dimensional data array. Two main examples are the Jacobian of the function f : inner_tree -> outer_tree, which results in a block tree structure, or the covariance matrix of a tree, in which case outer_tree = inner_tree. Args: matrix (numpy.ndarray): 2d representation of the block tree. Has shape (m, n). outer_tree: A pytree. If flattened to scalars has length m. inner_tree: A pytree. If flattened to scalars has length n. Returns: block_tree: A (block) pytree. """ _check_dimensions_matrix(matrix, outer_tree, inner_tree) flat_outer, treedef_outer = tree_flatten(outer_tree) flat_inner, treedef_inner = tree_flatten(inner_tree) flat_outer_np = [ _convert_to_numpy(leaf, only_pandas=True) for leaf in flat_outer ] flat_inner_np = [ _convert_to_numpy(leaf, only_pandas=True) for leaf in flat_inner ] shapes_outer = [np.shape(a) for a in flat_outer_np] shapes_inner = [np.shape(a) for a in flat_inner_np] block_bounds_outer = np.cumsum( [int(np.product(s)) for s in shapes_outer[:-1]]) block_bounds_inner = np.cumsum( [int(np.product(s)) for s in shapes_inner[:-1]]) blocks = [] for leaf_outer, s1, submat in zip( flat_outer, shapes_outer, np.split(matrix, block_bounds_outer, axis=0)): row = [] for leaf_inner, s2, block_values in zip( flat_inner, shapes_inner, np.split(submat, block_bounds_inner, axis=1)): raw_block = block_values.reshape((*s1, *s2)) block = _convert_raw_block_to_pandas(raw_block, leaf_outer, leaf_inner) row.append(block) blocks.append(row) block_tree = tree_unflatten( treedef_outer, [tree_unflatten(treedef_inner, row) for row in blocks]) return block_tree
def hessian_to_block_tree(hessian, f_tree, params_tree): """Convert a Hessian array to block-tree format. Remark: In comparison to Jax we need this formatting function because we calculate the second derivative using second-order finite differences. Jax computes the second derivative by applying their jacobian function twice, which produces the desired block-tree shape of the Hessian automatically. If we apply our first derivative function twice we get the same block-tree shape. Args: hessian (np.ndarray): The Hessian, 2- or 3-dimensional array representation of the resulting block-tree. f_tree (pytree): The function evaluated at params_tree. params_tree (pytree): The params_tree. Returns: hessian_block_tree (pytree): The pytree """ _check_dimensions_hessian(hessian, f_tree, params_tree) if hessian.ndim == 2: hessian = hessian[np.newaxis] flat_f, treedef_f = tree_flatten(f_tree) flat_p, treedef_p = tree_flatten(params_tree) flat_f_np = [_convert_to_numpy(leaf, only_pandas=True) for leaf in flat_f] flat_p_np = [_convert_to_numpy(leaf, only_pandas=True) for leaf in flat_p] shapes_f = [np.shape(a) for a in flat_f_np] shapes_p = [np.shape(a) for a in flat_p_np] block_bounds_f = np.cumsum([int(np.product(s)) for s in shapes_f[:-1]]) block_bounds_p = np.cumsum([int(np.product(s)) for s in shapes_p[:-1]]) sub_block_trees = [] for s0, subarr in zip(shapes_f, np.split(hessian, block_bounds_f, axis=0)): blocks = [] for leaf_outer, s1, submat in zip( flat_p, shapes_p, np.split(subarr, block_bounds_p, axis=1)): row = [] for leaf_inner, s2, block_values in zip( flat_p, shapes_p, np.split(submat, block_bounds_p, axis=2)): raw_block = block_values.reshape(((*s0, *s1, *s2))) raw_block = np.squeeze(raw_block) block = _convert_raw_block_to_pandas(raw_block, leaf_outer, leaf_inner) row.append(block) blocks.append(row) block_tree = tree_unflatten( treedef_p, [tree_unflatten(treedef_p, row) for row in blocks]) sub_block_trees.append(block_tree) hessian_block_tree = tree_unflatten(treedef_f, sub_block_trees) return hessian_block_tree
def _read_optimization_history(database, params_treedef, registry): """Read a histories out values, parameters and other information.""" raw_res, _ = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=0, return_type="list_of_dicts", ) history = {"params": [], "criterion": [], "runtime": []} for data in raw_res: if data["value"] is not None: params = tree_unflatten(params_treedef, data["params"], registry=registry) history["params"].append(params) history["criterion"].append(data["value"]) history["runtime"].append(data["timestamp"]) times = np.array(history["runtime"]) times -= times[0] history["runtime"] = times return history
def _read_optimization_iteration(database, iteration, params_treedef, registry): """Get information about an optimization iteration.""" if iteration >= 0: rowid = iteration + 1 else: last_iteration = read_last_rows( database=database, table_name="optimization_iterations", n_rows=1, return_type="list_of_dicts", ) highest_rowid = last_iteration[0]["rowid"] # iteration is negative here! rowid = highest_rowid + iteration + 1 data = read_specific_row( database, table_name="optimization_iterations", rowid=rowid, return_type="list_of_dicts", ) if len(data) == 0: raise IndexError(f"Invalid iteration requested: {iteration}") else: data = data[0] params = tree_unflatten(params_treedef, data["params"], registry=registry) data["params"] = params return data
def transform_free_values_to_params_tree(values, free_params, params): """Fill non-free values and project to params tree structure.""" mask = free_params.free_mask flat = np.full(len(mask), np.nan) flat[np.ix_(mask)] = values registry = get_registry(extended=True) pytree = tree_unflatten(params, flat, registry=registry) return pytree
def _get_selection_indices(params, selector): """Get index of selected flat params and number of flat params.""" registry = get_registry(extended=True) flat_params, params_treedef = tree_flatten(params, registry=registry) n_params = len(flat_params) indices = np.arange(n_params, dtype=int) params_indices = tree_unflatten(params_treedef, indices, registry=registry) selected = selector(params_indices) selection_indices = np.array(tree_just_flatten(selected, registry=registry), dtype=int) return selection_indices, n_params
def outcomes(self): """Returns the estimated bootstrap outcomes. Returns: List[Any]: The boostrap outcomes as a list of pytrees. """ registry = get_registry(extended=True) _, treedef = tree_flatten(self._base_outcome, registry=registry) outcomes = [ tree_unflatten(treedef, out, registry=registry) for out in self._internal_outcomes ] return outcomes
def tree_params_converter(tree_params): registry = get_registry(extended=True) _, treedef = tree_flatten(tree_params, registry=registry) converter = TreeConverter( params_flatten=lambda params: np.array( tree_just_flatten(params, registry=registry) ), params_unflatten=lambda x: tree_unflatten( treedef, x.tolist(), registry=registry ), func_flatten=None, derivative_flatten=None, ) return converter
def se(self): """Calculate standard errors. Returns: Any: The standard errors of the estimated parameters as a block-pytree, numpy.ndarray, or pandas.DataFrame. """ cov = self._internal_cov se = np.sqrt(np.diagonal(cov)) registry = get_registry(extended=True) _, treedef = tree_flatten(self._base_outcome, registry=registry) se = tree_unflatten(treedef, se, registry=registry) return se
def params_plot( result, selector=None, max_evaluations=None, template=PLOTLY_TEMPLATE, show_exploration=False, ): """Plot the params history of an optimization. Args: result (Union[OptimizeResult, pathlib.Path, str]): An optimization results with collected history. If dict, then the key is used as the name in a legend. selector (callable): A callable that takes params and returns a subset of params. If provided, only the selected subset of params is plotted. max_evaluations (int): Clip the criterion history after that many entries. template (str): The template for the figure. Default is "plotly_white". show_exploration (bool): If True, exploration samples of a multistart optimization are visualized. Default is False. Returns: plotly.graph_objs._figure.Figure: The figure. """ # ================================================================================== # Process inputs # ================================================================================== if isinstance(result, OptimizeResult): data = _extract_plotting_data_from_results_object( result, stack_multistart=True, show_exploration=show_exploration, plot_name="params_plot", ) start_params = result.start_params elif isinstance(result, (str, Path)): data = _extract_plotting_data_from_database( result, stack_multistart=True, show_exploration=show_exploration, ) start_params = data["start_params"] else: raise ValueError("result must be an OptimizeResult or a path to a log file.") if data["stacked_local_histories"] is not None: history = data["stacked_local_histories"]["params"] else: history = data["history"]["params"] # ================================================================================== # Create figure # ================================================================================== fig = go.Figure() registry = get_registry(extended=True) hist_arr = np.array([tree_just_flatten(p, registry=registry) for p in history]).T names = leaf_names(start_params, registry=registry) if selector is not None: flat, treedef = tree_flatten(start_params, registry=registry) helper = tree_unflatten(treedef, list(range(len(flat))), registry=registry) selected = np.array(tree_just_flatten(selector(helper), registry=registry)) names = [names[i] for i in selected] hist_arr = hist_arr[selected] for name, data in zip(names, hist_arr): if max_evaluations is not None and len(data) > max_evaluations: data = data[:max_evaluations] trace = go.Scatter( x=np.arange(len(data)), y=data, mode="lines", name=name, ) fig.add_trace(trace) fig.update_layout( template=template, xaxis_title_text="No. of criterion evaluations", yaxis_title_text="Parameter value", legend={"yanchor": "top", "xanchor": "right", "y": 0.95, "x": 0.95}, ) return fig
def calculate_estimation_summary( summary_data, names, free_names, ): """Create estimation summary using pre-calculated results. Args: summary_data (dict): Dictionary with entries ['params', 'p_value', 'ci_lower', 'ci_upper', 'standard_error']. names (List[str]): List of parameter names, corresponding to result_object. free_names (List[str]): List of parameter names for free parameters. Returns: pytree: A pytree with the same structure as params. Each leaf in the params tree is replaced by a DataFrame containing columns "value", "standard_error", "pvalue", "ci_lower" and "ci_upper". Parameters that do not have a standard error (e.g. because they were fixed during estimation) contain NaNs in all but the "value" column. The value column is only reproduced for convenience. """ # ================================================================================== # Flatten summary and construct data frame for flat estimates # ================================================================================== registry = get_registry(extended=True) flat_data = { key: tree_just_flatten(val, registry=registry) for key, val in summary_data.items() } df = pd.DataFrame(flat_data, index=names) df.loc[free_names, "stars"] = pd.cut( df.loc[free_names, "p_value"], bins=[-1, 0.01, 0.05, 0.1, 2], labels=["***", "**", "*", ""], ) # ================================================================================== # Map summary data into params tree structure # ================================================================================== # create tree with values corresponding to indices of df indices = tree_unflatten(summary_data["value"], names, registry=registry) estimates_flat = tree_just_flatten(summary_data["value"]) indices_flat = tree_just_flatten(indices) # use index chunks in indices_flat to access the corresponding sub data frame of df, # and use the index information stored in estimates_flat to form the correct (multi) # index for the resulting leaf. summary_flat = [] for index_leaf, params_leaf in zip(indices_flat, estimates_flat): if np.isscalar(params_leaf): loc = [index_leaf] index = [0] elif isinstance(params_leaf, pd.DataFrame) and "value" in params_leaf: loc = index_leaf["value"].to_numpy().flatten() index = params_leaf.index elif isinstance(params_leaf, pd.DataFrame): loc = index_leaf.to_numpy().flatten() # use product of existing index and columns for regular pd.DataFrame index = pd.MultiIndex.from_tuples([ (*row, col) if isinstance(row, tuple) else (row, col) for row in params_leaf.index for col in params_leaf.columns ]) elif isinstance(params_leaf, pd.Series): loc = index_leaf.to_numpy().flatten() index = params_leaf.index else: # array case (numpy or jax) loc = index_leaf.flatten() if params_leaf.ndim == 1: index = pd.RangeIndex(stop=params_leaf.size) else: index = pd.MultiIndex.from_arrays( np.unravel_index(np.arange(params_leaf.size), params_leaf.shape)) df_chunk = df.loc[loc] df_chunk.index = index summary_flat.append(df_chunk) summary = tree_unflatten(summary_data["value"], summary_flat) return summary
def params_unflatten(x): return tree_unflatten(treedef=treedef, leaves=list(x), registry=registry)
def _read_multistart_optimization_history(database, params_treedef, registry, direction): """Read multistart histories out values, parameters and other information. Returns: tuple: - dict: history that led to lowest criterion - dict: all other histories - dict: exploration phase """ # ================================================================================== # Process raw data # ================================================================================== steps = read_steps_table(database) raw_res, _ = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=0, return_type="list_of_dicts", ) history = {"params": [], "criterion": [], "runtime": [], "step": []} for data in raw_res: if data["value"] is not None: params = tree_unflatten(params_treedef, data["params"], registry=registry) history["params"].append(params) history["criterion"].append(data["value"]) history["runtime"].append(data["timestamp"]) history["step"].append(data["step"]) times = np.array(history["runtime"]) times -= times[0] history["runtime"] = times # ================================================================================== # Format data as data frames # ================================================================================== df = pd.DataFrame(history) df = df.merge(steps[["rowid", "type"]], left_on="step", right_on="rowid") df = df.drop(columns="rowid") # ================================================================================== # Extract data from df # ================================================================================== exploration = df.query("type == 'exploration'").drop( columns=["step", "type"]) histories = df.query("type == 'optimization'") histories = histories.drop(columns="type") histories = histories.set_index("step", append=True) # ================================================================================== # The best history is given by the history that attains the global minimum or # maximum. All other histories are defined as local histories. if direction == "minimize": best_idx = ( histories["criterion"].groupby(level="step").min().idxmin() ) # noqa: F841 exploration = exploration.sort_values(by="criterion", ascending=True) elif direction == "maximize": best_idx = ( histories["criterion"].groupby(level="step").max().idxmax() ) # noqa: F841 exploration = exploration.sort_values(by="criterion", ascending=False) else: raise ValueError() history = histories.xs(best_idx, level="step").to_dict(orient="list") exploration = None if len(exploration) == 0 else exploration if exploration is not None: exploration = exploration.to_dict(orient="list") local_histories = [] for idx in histories.index.get_level_values("step").unique().difference( [best_idx]): _local_history = histories.xs(idx, level="step").to_dict(orient="list") local_histories.append(_local_history) local_histories = None if len(local_histories) == 0 else local_histories return history, local_histories, exploration
def test_unflatten_df_with_value_column(value_df): registry = get_registry(extended=True) _, treedef = tree_flatten(value_df, registry=registry) unflat = tree_unflatten(treedef, [10, 11, 12], registry=registry) assert unflat.equals(value_df.assign(value=[10, 11, 12]))
def test_unflatten_partially_numeric_df(other_df): registry = get_registry(extended=True) _, treedef = tree_flatten(other_df, registry=registry) unflat = tree_unflatten(treedef, [1, 2, 3, 4, 5, 6], registry=registry) other_df = other_df.assign(b=[1, 3, 5], c=[2, 4, 6]) assert_frame_equal(unflat, other_df, check_dtype=False)
def _unflatten_if_not_nan(leaves, treedef, registry): if isinstance(leaves, np.ndarray): out = tree_unflatten(treedef, leaves, registry=registry) else: out = leaves return out