def test_batch_evaluator_with_list_unpacking(batch_evaluator, n_cores): batch_evaluator = process_batch_evaluator(batch_evaluator) calculated = batch_evaluator( func=add_x_and_y, arguments=[(1, 2), (3, 4)], n_cores=n_cores, unpack_symbol="*", ) expected = [3, 7] assert calculated == expected
def test_batch_evaluator_with_dict_unpacking(batch_evaluator, n_cores): batch_evaluator = process_batch_evaluator(batch_evaluator) calculated = batch_evaluator( func=add_x_and_y, arguments=[{"x": 1, "y": 2}, {"x": 3, "y": 4}], n_cores=n_cores, unpack_symbol="**", ) expected = [3, 7] assert calculated == expected
def test_batch_evaluator_with_unhandled_exceptions(batch_evaluator, n_cores): batch_evaluator = process_batch_evaluator(batch_evaluator) with pytest.raises(AssertionError): batch_evaluator( func=buggy_func, arguments=list(range(10)), n_cores=n_cores, error_handling="raise", )
def wrapper_add_history_collection_via_batch_evaluator(**kwargs): raw_be = kwargs.get("batch_evaluator", "joblib") batch_evaluator = process_batch_evaluator(raw_be) container = [] @functools.wraps(batch_evaluator) def wrapped_batch_evaluator(*args, **kwargs): if args: func = args[0] else: func = kwargs["func"] # find out if func is our internal criterion function if isinstance(func, partial) and "history_container" in func.keywords: # partial in None as history container to disable history collection via # criterion function, which would not work with parallelization anyways _func = partial(func, history_container=None, return_history_entry=True) if args: _args = (_func, *args[1:]) _kwargs = kwargs else: _args = args _kwargs = kwargs.copy() _kwargs["func"] = _func raw_out = batch_evaluator(*_args, **_kwargs) out = [tup[0] for tup in raw_out] _hist = [tup[1] for tup in raw_out if tup[1] is not None] container.extend(_hist) else: out = batch_evaluator(*args, **kwargs) return out new_kwargs = kwargs.copy() new_kwargs["batch_evaluator"] = wrapped_batch_evaluator out = algorithm(**new_kwargs) if "history" in out: out["history"] = out["history"] + container else: out["history"] = container return out
def test_batch_evaluator_without_exceptions(batch_evaluator, n_cores): batch_evaluator = process_batch_evaluator(batch_evaluator) calculated = batch_evaluator( func=double, arguments=list(range(10)), n_cores=n_cores, ) expected = list(range(0, 20, 2)) assert calculated == expected
def get_bootstrap_outcomes( data, outcome, cluster_by=None, rng=None, n_draws=1000, n_cores=1, error_handling="continue", batch_evaluator="joblib", ): """Draw bootstrap samples and calculate outcomes. Args: data (pandas.DataFrame): original dataset. outcome (callable): function of the dataset calculating statistic of interest. Returns a general pytree (e.g. pandas Series, dict, numpy array, etc.). cluster_by (str): column name of the variable to cluster by. rng (numpy.random.Generator): A random number generator. n_draws (int): number of bootstrap draws. n_cores (int): number of jobs for parallelization. error_handling (str): One of "continue", "raise". Default "continue" which means that bootstrap estimates are only calculated for those samples where no errors occur and a warning is produced if any error occurs. batch_evaluator (str or Callable): Name of a pre-implemented batch evaluator (currently 'joblib' and 'pathos_mp') or Callable with the same interface as the estimagic batch_evaluators. See :ref:`batch_evaluators`. Returns: estimates (list): List of pytrees of estimated bootstrap outcomes. """ check_inputs(data=data, cluster_by=cluster_by) batch_evaluator = process_batch_evaluator(batch_evaluator) indices = get_bootstrap_indices( data=data, rng=rng, cluster_by=cluster_by, n_draws=n_draws, ) estimates = _get_bootstrap_outcomes_from_indices( indices=indices, data=data, outcome=outcome, n_cores=n_cores, error_handling=error_handling, batch_evaluator=batch_evaluator, ) return estimates
def test_batch_evaluator_with_handled_exceptions(batch_evaluator, n_cores): batch_evaluator = process_batch_evaluator(batch_evaluator) with warnings.catch_warnings(): warnings.simplefilter("ignore") calculated = batch_evaluator( func=buggy_func, arguments=list(range(10)), n_cores=n_cores, error_handling="continue", ) for calc in calculated: assert isinstance(calc, str)
def _fill_multistart_options_with_defaults(options, params, x, params_to_internal): """Fill options for multistart optimization with defaults.""" defaults = { "sample": None, "n_samples": 10 * len(x), "share_optimizations": 0.1, "sampling_distribution": "uniform", "sampling_method": "sobol" if len(x) <= 200 else "random", "mixing_weight_method": "tiktak", "mixing_weight_bounds": (0.1, 0.995), "convergence_relative_params_tolerance": 0.01, "convergence_max_discoveries": 2, "n_cores": 1, "batch_evaluator": "joblib", "seed": None, "exploration_error_handling": "continue", "optimization_error_handling": "continue", } options = {k.replace(".", "_"): v for k, v in options.items()} out = {**defaults, **options} if "batch_size" not in out: out["batch_size"] = out["n_cores"] else: if out["batch_size"] < out["n_cores"]: raise ValueError("batch_size must be at least as large as n_cores.") out["batch_evaluator"] = process_batch_evaluator(out["batch_evaluator"]) if isinstance(out["mixing_weight_method"], str): out["mixing_weight_method"] = WEIGHT_FUNCTIONS[out["mixing_weight_method"]] if out["sample"] is not None: out["sample"] = process_multistart_sample( out["sample"], params, params_to_internal ) out["n_samples"] = len(out["sample"]) out["n_optimizations"] = max(1, int(out["n_samples"] * out["share_optimizations"])) del out["share_optimizations"] return out
def neldermead_parallel( criterion, x, *, init_simplex_method="gao_han", n_cores=1, adaptive=True, stopping_max_iterations=STOPPING_MAX_ITERATIONS, convergence_absolute_criterion_tolerance=CONVERGENCE_SECOND_BEST_ABSOLUTE_CRITERION_TOLERANCE, # noqa: E501 convergence_absolute_params_tolerance=CONVERGENCE_SECOND_BEST_ABSOLUTE_PARAMS_TOLERANCE, # noqa: E501 batch_evaluator="joblib", ): """ Parallel Nelder-Mead algorithm following Lee D., Wiswall M., A parallel implementation of the simplex function minimization routine, Computational Economics, 2007. Parameters ---------- criterion (callable): A function that takes a Numpy array_like as an argument and return scalar floating point. x (array_like): 1-D array of initial value of parameters init_simplex_method (string or callable): Name of the method to create initial simplex or callable which takes as an argument initial value of parameters and returns initial simplex as j+1 x j array, where j is length of x. The default is "gao_han". n_cores (int): Degrees of parallization. The default is 1 (no parallelization). adaptive (bool): Adjust parameters of Nelder-Mead algorithm to accounf for simplex size. The default is True. stopping_max_iterations (int): Maximum number of algorithm iterations. The default is STOPPING_MAX_ITERATIONS. convergence_absolute_criterion_tolerance (float): maximal difference between function value evaluated on simplex points. The default is CONVERGENCE_SECOND_BEST_ABSOLUTE_CRITERION_TOLERANCE. convergence_absolute_params_tolerance (float): maximal distance between points in the simplex. The default is CONVERGENCE_SECOND_BEST_ABSOLUTE_PARAMS_TOLERANCE. batch_evaluator (string or callable): See :ref:`batch_evaluators` for details. Default "joblib". Returns ------- TYPE DESCRIPTION. """ if x.ndim >= 1: x = x.ravel( ) # check if the vector of initial values is one-dimensional j = len(x) # size of the parameter vector if n_cores <= 1: p = 1 # if number of cores is nonpositive, set it to 1 else: if n_cores >= j: # number of parallelisation cannot be bigger than # the number of parameters minus 1 p = int(j - 1) else: p = int(n_cores) # set parameters of Nelder-Mead algorithm # for a discussion about Nlder-Mead parameters see Gao F., Han L., Implementing the # Nelder-Mead siplex algorithm with adaptive parameters, Computational Optimization # and Applications, 2012 alpha, gamma, beta, tau = _init_algo_params(adaptive, j) # construct initial simplex using one of feasible methods # see Wssing, Simon, Proper initialization is crucial for # the Nelder–Mead simplex search, Optimization Letters, 2019 # for a discussion about the choice of initialization if not callable(init_simplex_method): s = globals()["_" + init_simplex_method](x) else: s = init_simplex_method(x) batch_evaluator = process_batch_evaluator(batch_evaluator) # calculate criterion values for the initial simplex f_s = np.array( batch_evaluator(func=criterion, arguments=s, n_cores=n_cores))[:, None] # parallelized function def func_parallel(args): criterion, s_j, s_j_r, f_s_0, f_s_j, f_s_j_1, m = args # read arguments f_s_j_r = criterion( s_j_r) # calculate value of the criterion at the reflection point if f_s_j_r < f_s_0: # if the reflection point is better than the best point s_j_e = m + gamma * (s_j_r - m) # calculate expansion point f_s_j_e = criterion( s_j_e ) # calculate value of the criterion at the expansion point if f_s_j_e < f_s_0: # if the expansion point is better than the best point return np.hstack( [s_j_e, f_s_j_e, 0]) # return the expansion point as a new point else: # if the expansion point is worse than the best point return np.hstack( [s_j_r, f_s_j_r, 0]) # return the reflection point as a new point elif (f_s_j_r < f_s_j_1 ): # if reflection point is better than the next worst point return np.hstack([s_j_r, f_s_j_r, 0]) # return reflection point as a new point else: # if the reflection point is worse than the next worst point if ( f_s_j_r < f_s_j ): # if value of the criterion at reflection point is better than # value of the criterion at initial point s_j_c = m + beta * (s_j_r - m ) # calculate outside contraction point else: s_j_c = m - beta * (s_j_r - m ) # calculate inside contraction point f_s_j_c = criterion( s_j_c ) # calculate a value of the criterion at contraction point if f_s_j_c < np.minimum( f_s_j, f_s_j_r ): # if ta value of the criterion at contraction point is better # than original and refrelction point return np.hstack( [s_j_c, f_s_j_c, 0]) # return contraction point as as new point else: if f_s_j_r < f_s_j: return np.hstack( [s_j_r, f_s_j_r, 1]) # return reflection point as a new point else: # if value of the criterion at contraction point is worse # than the value uf the criterion at the reflection # and the initial points return np.hstack([s_j, f_s_j, 1 ]) # return the old point as a new point optimal = False # optmisation condition, if True stop the algorithem iterations = 0 # number of criterion evaluations while not optimal: iterations += 1 # new iteration # sort points and arguments increasing row = np.argsort(f_s.ravel()) s = np.take(s, row, axis=0) f_s = np.take(f_s, row, axis=0) # calculate centroid m = (s[:-p, :].sum(axis=0)) / (j - p + 1) # calculate reflaction points s_j_r = m + alpha * (m - s[-p:, :]) # calculate new points of simplex s[-p:, :], f_s[-p:, :], shrink_count = np.split( np.vstack( batch_evaluator( func=func_parallel, arguments=tuple(( criterion, s[j + 1 - p + i, :], s_j_r[i, :], f_s[0, :], f_s[j + 1 - p + i, :], f_s[j - p + i, :], m, ) for i in range(p)), n_cores=p, )), [-2, -1], axis=1, ) # shrink simplex if there is no improvement in every process if shrink_count.sum() == p: s = (tau * s[0:1, :] + (1 - tau) * s ) # new simplex is a linear combination of the best point # and remaining points # evaluate function at new simplex f_s = np.array( batch_evaluator( func=criterion, arguments=s, n_cores=n_cores, ))[:, None] # termination criteria if (np.max(np.abs(f_s[0, :] - f_s[1:, :])) <= convergence_absolute_criterion_tolerance and np.max(np.abs(s[0, :] - s[1:, ])) <= convergence_absolute_params_tolerance): optimal = True converge = True reason_to_stop = "Termination codition satisfied" elif (iterations >= stopping_max_iterations ): # if maximum amount of iteration is exceeded optimal = True converge = False reason_to_stop = "Maximum number of interation exceeded" continue # save results result = { "solution_x": s[np.nonzero(f_s == f_s.min())[0][0], :], "solution_criterion": f_s.min(), "n_iterations": iterations, "success": converge, "reached_convergence_criterion": reason_to_stop, } return result
def run_explorations(func, primary_key, sample, batch_evaluator, n_cores, step_id, error_handling): """Do the function evaluations for the exploration phase. Args: func (callable): An already partialled version of ``internal_criterion_and_derivative_template`` where the following arguments are still free: ``x``, ``task``, ``error_handling``, ``fixed_log_data``. primary_key: The primary criterion entry of the local optimizer. Needed to interpret the output of the internal criterion function. sample (numpy.ndarray): 2d numpy array where each row is a sampled internal parameter vector. batch_evaluator (str or callable): See :ref:`batch_evaluators`. n_cores (int): Number of cores. step_id (int): The identifier of the exploration step. error_handling (str): One of "raise" or "continue". Returns: dict: A dictionary with the the following entries: "sorted_values": 1d numpy array with sorted function values. Invalid function values are excluded. "sorted_sample": 2d numpy array with corresponding internal parameter vectors. "contributions": None or 2d numpy array with the contributions entries of the function evaluations. "root_contributions": None or 2d numpy array with the root_contributions entries of the function evaluations. """ algo_info = AlgoInfo( primary_criterion_entry=primary_key, parallelizes=True, needs_scaling=False, name="tiktak_explorer", is_available=True, arguments=[], ) _func = partial( func, task="criterion", algo_info=algo_info, error_handling=error_handling, ) arguments = [] for x in sample: arguments.append({"x": x, "fixed_log_data": {"step": int(step_id)}}) batch_evaluator = process_batch_evaluator(batch_evaluator) criterion_outputs = batch_evaluator( _func, arguments=arguments, n_cores=n_cores, unpack_symbol="**", # If desired, errors are caught inside criterion function. error_handling="raise", ) values = [ aggregate_func_output_to_value(c, primary_key) for c in criterion_outputs ] raw_values = np.array(values) is_valid = np.isfinite(raw_values) if not is_valid.any(): raise RuntimeError( "All function evaluations of the exploration phase in a multistart " "optimization are invalid. Check your code or the sampling bounds." ) valid_values = raw_values[is_valid] valid_sample = sample[is_valid] # this sorts from low to high values; internal criterion and derivative took care # of the sign switch. sorting_indices = np.argsort(valid_values) out = { "sorted_values": valid_values[sorting_indices], "sorted_sample": valid_sample[sorting_indices], } return out
def test_get_batch_evaluator_with_callable(): assert callable(process_batch_evaluator(lambda x: x))
def test_get_batch_evaluator_invalid_type(): with pytest.raises(TypeError): process_batch_evaluator(3)
def test_get_batch_evaluator_invalid_value(): with pytest.raises(ValueError): process_batch_evaluator("bla")
def _minimize_pygmo( criterion, x, lower_bounds, upper_bounds, method, algo_options, derivative=None, ): """Minimize a function with pygmo. Args: criterion (callable): x (np.ndarray): Starting values of the parameters. lower_bounds (np.ndarray): upper_bounds (np.ndarray): method (str): One of the optimizers of the pygmo package. algo_options (dict): Options for the optimizer. In addition to the algo options that will be passed directly to the pygmo algorithms we have the following entries: - population_size (int): Population size for genetic algorithms. - batch_evaluator (str or callable): An estimagic batch evaluator, default joblib batch evaluator. - n_cores (int): Number of cores used for parallel evaluation of the criterion function. Default 1. - seed (int or None): Random seed for drawing the initial population. - discard_start_params (bool): If True, the start params are not guaranteed to be part of the initial population. This saves one criterion function evaluation that cannot be done in parallel with other evaluations. Default False. Returns: results (dict): Dictionary with optimization results. """ algo_options = algo_options.copy() if not IS_PYGMO_INSTALLED: raise NotInstalledError( f"The {method} algorithm requires the pygmo package to be installed. " "You can install it with 'conda install -c conda-forge pygmo'. Visit " "https://esa.github.io/pygmo2/install.html for more detailed installation " "instructions." ) population_size = algo_options.pop("population_size", 1) batch_evaluator = algo_options.pop("batch_evaluator", "joblib") batch_evaluator = process_batch_evaluator(batch_evaluator) n_cores = algo_options.pop("n_cores", 1) seed = algo_options.pop("seed", None) discard_start_params = algo_options.pop("discard_start_params", False) bounds = (lower_bounds, upper_bounds) prob = _create_problem( func=criterion, bounds=bounds, dim=len(x), batch_evaluator=batch_evaluator, n_cores=n_cores, ) algo = _create_algorithm(method, algo_options, n_cores) pop = _create_population( prob, population_size, x, seed=seed, discard_start_params=discard_start_params ) evolved = algo.evolve(pop) result = _process_pygmo_results(evolved) return result
def slice_plot( func, params, lower_bounds=None, upper_bounds=None, func_kwargs=None, selector=None, n_cores=DEFAULT_N_CORES, n_gridpoints=20, plots_per_row=2, param_names=None, share_y=True, expand_yrange=0.02, share_x=False, color="#497ea7", template=PLOTLY_TEMPLATE, title=None, return_dict=False, make_subplot_kwargs=None, batch_evaluator="joblib", ): """Plot criterion along coordinates at given and random values. Generates plots for each parameter and optionally combines them into a figure with subplots. Args: criterion (callable): criterion function that takes params and returns a scalar value or dictionary with the entry "value". params (pytree): A pytree with parameters. lower_bounds (pytree): A pytree with same structure as params. Must be specified and finite for all parameters unless params is a DataFrame containing with "lower_bound" column. upper_bounds (pytree): A pytree with same structure as params. Must be specified and finite for all parameters unless params is a DataFrame containing with "lower_bound" column. selector (callable): Function that takes params and returns a subset of params for which we actually want to generate the plot. n_cores (int): Number of cores. n_gridpoins (int): Number of gridpoints on which the criterion function is evaluated. This is the number per plotted line. plots_per_row (int): Number of plots per row. param_names (dict or NoneType): Dictionary mapping old parameter names to new ones. share_y (bool): If True, the individual plots share the scale on the yaxis and plots in one row actually share the y axis. share_x (bool): If True, set the same range of x axis for all plots and share the x axis for all plots in one column. expand_y (float): The ration by which to expand the range of the (shared) y axis, such that the axis is not cropped at exactly max of Criterion Value. color: The line color. template (str): The template for the figure. Default is "plotly_white". layout_kwargs (dict or NoneType): Dictionary of key word arguments used to update layout of plotly Figure object. If None, the default kwargs defined in the function will be used. title (str): The figure title. return_dict (bool): If True, return dictionary with individual plots of each parameter, else, ombine individual plots into a figure with subplots. make_subplot_kwargs (dict or NoneType): Dictionary of keyword arguments used to instantiate plotly Figure with multiple subplots. Is used to define properties such as, for example, the spacing between subplots (governed by 'horizontal_spacing' and 'vertical_spacing'). If None, default arguments defined in the function are used. batch_evaluator (str or callable): See :ref:`batch_evaluators`. Returns: out (dict or plotly.Figure): Returns either dictionary with individual slice plots for each parameter or a plotly Figure combining the individual plots. """ layout_kwargs = None if title is not None: title_kwargs = {"text": title} else: title_kwargs = None if func_kwargs is not None: func = partial(func, **func_kwargs) func_eval = func(params) converter, internal_params = get_converter( params=params, constraints=None, lower_bounds=lower_bounds, upper_bounds=upper_bounds, func_eval=func_eval, primary_key="value", scaling=False, scaling_options=None, ) n_params = len(internal_params.values) selected = np.arange(n_params, dtype=int) if selector is not None: helper = converter.params_from_internal(selected) registry = get_registry(extended=True) selected = np.array(tree_just_flatten(selector(helper), registry=registry), dtype=int) if not np.isfinite(internal_params.lower_bounds[selected]).all(): raise ValueError( "All selected parameters must have finite lower bounds.") if not np.isfinite(internal_params.upper_bounds[selected]).all(): raise ValueError( "All selected parameters must have finite upper bounds.") evaluation_points, metadata = [], [] for pos in selected: lb = internal_params.lower_bounds[pos] ub = internal_params.upper_bounds[pos] grid = np.linspace(lb, ub, n_gridpoints) name = internal_params.names[pos] for param_value in grid: if param_value != internal_params.values[pos]: meta = { "name": name, "Parameter Value": param_value, } x = internal_params.values.copy() x[pos] = param_value point = converter.params_from_internal(x) evaluation_points.append(point) metadata.append(meta) batch_evaluator = process_batch_evaluator(batch_evaluator) func_values = batch_evaluator( func=func, arguments=evaluation_points, error_handling="continue", n_cores=n_cores, ) # add NaNs where an evaluation failed func_values = [ converter.func_to_internal(val) if not isinstance(val, str) else np.nan for val in func_values ] func_values += [converter.func_to_internal(func_eval)] * len(selected) for pos in selected: meta = { "name": internal_params.names[pos], "Parameter Value": internal_params.values[pos], } metadata.append(meta) plot_data = pd.DataFrame(metadata) plot_data["Function Value"] = func_values if param_names is not None: plot_data["name"] = plot_data["name"].replace(param_names) lb = plot_data["Function Value"].min() ub = plot_data["Function Value"].max() y_range = ub - lb yaxis_ub = ub + y_range * expand_yrange yaxis_lb = lb - y_range * expand_yrange layout_kwargs = get_layout_kwargs( layout_kwargs, None, title_kwargs, template, False, ) plots_dict = {} for pos in selected: par_name = internal_params.names[pos] if param_names is not None and par_name in param_names: par_name = param_names[par_name] df = plot_data[plot_data["name"] == par_name].sort_values( "Parameter Value") subfig = px.line( df, y="Function Value", x="Parameter Value", color_discrete_sequence=[color], ) subfig.add_trace( go.Scatter( x=[internal_params.values[pos]], y=[converter.func_to_internal(func_eval)], marker={"color": color}, )) subfig.update_layout(**layout_kwargs) subfig.update_xaxes(title={"text": par_name}) subfig.update_yaxes(title={"text": "Function Value"}) if share_y is True: subfig.update_yaxes(range=[yaxis_lb, yaxis_ub]) plots_dict[par_name] = subfig if return_dict: out = plots_dict else: plots = list(plots_dict.values()) out = combine_plots( plots=plots, plots_per_row=plots_per_row, sharex=share_x, sharey=share_y, share_yrange_all=share_y, share_xrange_all=share_x, expand_yrange=expand_yrange, make_subplot_kwargs=make_subplot_kwargs, showlegend=False, template=template, clean_legend=True, layout_kwargs=layout_kwargs, legend_kwargs={}, title_kwargs=title_kwargs, ) return out