def test_one_bound_is_allowed_for_increasing(): params = pd.DataFrame(data=[[1], [2], [2.9]], columns=["value"]) params["lower_bound"] = [-np.inf, 1, -np.inf] params["upper_bound"] = [np.inf, 2, np.inf] constraints = [{"loc": params.index, "type": "increasing"}] process_constraints(constraints, params)
def test_invalid_bound_for_increasing(): params = pd.DataFrame(data=[[1], [2], [2.9]], columns=["value"]) params["lower_bound"] = [-np.inf, 1, 0.5] params["upper_bound"] = np.nan constraints = [{"loc": params.index, "type": "increasing"}] with pytest.raises(ValueError): process_constraints(constraints, params)
def test_valuep_error_if_constraints_are_violated(example_params, all_constraints, case): constraints = all_constraints[case] params = reduce_params(example_params, constraints) for val in ["invalid_value0", "invalid_value1"]: params["value"] = params[val] with pytest.raises(ValueError): process_constraints(constraints, params)
def test_empty_constraint_is_dropped(constraints): params = pd.DataFrame(np.ones((5, 1)), columns=["value"]) pc, pp = process_constraints(constraints, params) # no transforming constraints assert pc == [] # pre-replacements are just copying the parameter vector aaae(pp["_pre_replacements"], np.arange(5)) # no post replacements aaae(pp["_post_replacements"], np.full(5, -1))
def back_and_forth_transformation_and_assert(params, constraints): pc, pp = process_constraints(constraints, params) internal = reparametrize_to_internal( pp["value"].to_numpy(), pp["_internal_free"].to_numpy(), pc ) external = reparametrize_from_internal( internal=internal, fixed_values=pp["_internal_fixed_value"].to_numpy(), pre_replacements=pp["_pre_replacements"].to_numpy(), params=params, return_numpy=True, processed_constraints=pc, post_replacements=pp["_post_replacements"].to_numpy(), ) aaae(external, params["value"].to_numpy()) return internal, external
def get_internal_bounds( params, constraints, scaling_factor=None, scaling_offset=None, processed_params=None ): if constraints in [None, []]: params = add_default_bounds_to_params(params) lower_bounds = params["lower_bound"] upper_bounds = params["upper_bound"] else: if processed_params is None: params = add_default_bounds_to_params(params) check_params_are_valid(params) _, processed_params = process_constraints( constraints=constraints, params=params, scaling_factor=scaling_factor, scaling_offset=scaling_offset, ) free = processed_params[processed_params["_internal_free"]] lower_bounds = free["_internal_lower"].to_numpy() upper_bounds = free["_internal_upper"].to_numpy() return lower_bounds, upper_bounds
def get_reparametrize_functions( params, constraints, scaling_factor=None, scaling_offset=None, processed_params=None, processed_constraints=None, ): """Construct functions to map between internal and external parameters. All required information is partialed into the functions. Args: params (pandas.DataFrame): See :ref:`params`. constraints (list): List of constraint dictionaries. scaling_factor (np.ndarray or None): If None, no scaling factor is used. scaling_offset (np.ndarray or None): If None, no scaling offset is used. processed_params (pandas.DataFrame): Processed parameters. processed_constraints (list): Processed constraints. Returns: func: Function that maps an external parameter vector to an internal one func: Function that maps an internal parameter vector to an external one """ if constraints in [None, []]: partialed_to_internal = functools.partial( no_constraint_to_internal, scaling_factor=scaling_factor, scaling_offset=scaling_offset, ) partialed_from_internal = functools.partial( no_constraint_from_internal, params=params, scaling_factor=scaling_factor, scaling_offset=scaling_offset, ) else: if processed_params is None or processed_constraints is None: params = add_default_bounds_to_params(params) check_params_are_valid(params) processed_constraints, processed_params = process_constraints( constraints=constraints, params=params, scaling_factor=scaling_factor, scaling_offset=scaling_offset, ) # get partialed reparametrize from internal pre_replacements = processed_params["_pre_replacements"].to_numpy() post_replacements = processed_params["_post_replacements"].to_numpy() fixed_values = processed_params["_internal_fixed_value"].to_numpy() # get partialed reparametrize to internal internal_free = processed_params["_internal_free"].to_numpy() partialed_to_internal = functools.partial( reparametrize_to_internal, internal_free=internal_free, processed_constraints=processed_constraints, scaling_factor=scaling_factor, scaling_offset=scaling_offset, ) partialed_from_internal = functools.partial( reparametrize_from_internal, fixed_values=fixed_values, pre_replacements=pre_replacements, processed_constraints=processed_constraints, post_replacements=post_replacements, params=params, scaling_factor=scaling_factor, scaling_offset=scaling_offset, ) return partialed_to_internal, partialed_from_internal
def get_derivative_conversion_function( params, constraints, scaling_factor=None, scaling_offset=None, processed_params=None, processed_constraints=None, ): """Construct functions to map between internal and external derivatives. All required information is partialed into the functions. Args: params (pandas.DataFrame): See :ref:`params`. constraints (list): List of constraint dictionaries. scaling_factor (np.ndarray or None): If None, no scaling factor is used. scaling_offset (np.ndarray or None): If None, no scaling offset is used. processed_params (pandas.DataFrame): Processed parameters. processed_constraints (list): Processed constraints. Returns: func: Function that converts an external derivative to an internal one """ if constraints in [None, []]: convert_derivative = functools.partial( no_constraint_derivative_to_internal, scaling_factor=scaling_factor, ) else: if processed_params is None or processed_constraints is None: params = add_default_bounds_to_params(params) check_params_are_valid(params) processed_constraints, processed_params = process_constraints( constraints=constraints, params=params, scaling_factor=scaling_factor, scaling_offset=scaling_offset, ) pre_replacements = processed_params["_pre_replacements"].to_numpy() post_replacements = processed_params["_post_replacements"].to_numpy() fixed_values = processed_params["_internal_fixed_value"].to_numpy() dim_internal = int(processed_params["_internal_free"].sum()) pre_replace_jac = pre_replace_jacobian( pre_replacements=pre_replacements, dim_in=dim_internal ) post_replace_jac = post_replace_jacobian(post_replacements=post_replacements) convert_derivative = functools.partial( convert_external_derivative_to_internal, fixed_values=fixed_values, pre_replacements=pre_replacements, processed_constraints=processed_constraints, pre_replace_jac=pre_replace_jac, post_replace_jac=post_replace_jac, scaling_factor=scaling_factor, scaling_offset=scaling_offset, ) return convert_derivative
def get_space_converter( internal_params, internal_constraints, ): """Get functions to convert between in-/external space of params and derivatives. In the internal parameter space the optimization problem is unconstrained except for bounds. Args: internal_params (InternalParams): NamedTuple with internal parameter values and bounds. internal_constraints (list): List of constraints with processed selector fields. Returns: SpaceConverter: The space converter. InternalParams: NamedTuple with entries: - value (np.ndarray): Internal parameter values. - lower_bounds (np.ndarray): Lower bounds on the internal params. - upper_bounds (np.ndarray): Upper bounds on the internal params. - soft_lower_bounds (np.ndarray): Soft lower bounds on the internal params. - soft_upper_bounds (np.ndarray): Soft upper bounds on the internal params. - name (list): List of names of the external parameters. - free_mask (np.ndarray): Boolean mask representing which external parameter is free. """ transformations, constr_info = process_constraints( constraints=internal_constraints, params_vec=internal_params.values, lower_bounds=internal_params.lower_bounds, upper_bounds=internal_params.upper_bounds, param_names=internal_params.names, ) _params_to_internal = partial( reparametrize_to_internal, internal_free=constr_info["internal_free"], transformations=transformations, ) _params_from_internal = partial( reparametrize_from_internal, fixed_values=constr_info["internal_fixed_values"], pre_replacements=constr_info["pre_replacements"], transformations=transformations, post_replacements=constr_info["post_replacements"], ) _dim_internal = int(constr_info["internal_free"].sum()) _pre_replace_jac = pre_replace_jacobian( pre_replacements=constr_info["pre_replacements"], dim_in=_dim_internal ) _post_replace_jac = post_replace_jacobian( post_replacements=constr_info["post_replacements"] ) _derivative_to_internal = partial( convert_external_derivative_to_internal, fixed_values=constr_info["internal_fixed_values"], pre_replacements=constr_info["pre_replacements"], transformations=transformations, pre_replace_jac=_pre_replace_jac, post_replace_jac=_post_replace_jac, ) _has_transforming_constraints = bool(transformations) converter = SpaceConverter( params_to_internal=_params_to_internal, params_from_internal=_params_from_internal, derivative_to_internal=_derivative_to_internal, has_transforming_constraints=_has_transforming_constraints, ) free_mask = constr_info["internal_free"] if ( internal_params.soft_lower_bounds is not None and not _has_transforming_constraints ): _soft_lower = internal_params.soft_lower_bounds[free_mask] else: _soft_lower = None if ( internal_params.soft_upper_bounds is not None and not _has_transforming_constraints ): _soft_upper = internal_params.soft_upper_bounds[free_mask] else: _soft_upper = None params = InternalParams( values=converter.params_to_internal(internal_params.values), lower_bounds=constr_info["lower_bounds"], upper_bounds=constr_info["upper_bounds"], names=internal_params.names, free_mask=free_mask, soft_lower_bounds=_soft_lower, soft_upper_bounds=_soft_upper, ) return converter, params
jac_case = get_derivative_case(jacobian) hess_case = get_derivative_case(hessian) check_is_optimized_and_derivative_case(is_optimized, jac_case) check_is_optimized_and_derivative_case(is_optimized, hess_case) cov_cases = _get_cov_cases(jac_case, hess_case, design_info) check_numdiff_options(numdiff_options, "estimate_ml") numdiff_options = {} if numdiff_options in (None, False) else numdiff_options constraints = [] if constraints is None else constraints processed_constraints, _ = process_constraints(constraints, params) # ================================================================================== # Calculate estimates via maximization (if necessary) # ================================================================================== if is_optimized: estimates = params else: opt_res = maximize( criterion=loglike, criterion_kwargs=loglike_kwargs, params=params, constraints=constraints, derivative=derivative, derivative_kwargs=derivative_kwargs,
def _optimize( direction, criterion, params, algorithm, *, criterion_kwargs, constraints, algo_options, derivative, derivative_kwargs, criterion_and_derivative, criterion_and_derivative_kwargs, numdiff_options, logging, log_options, error_handling, error_penalty, cache_size, scaling, scaling_options, multistart, multistart_options, ): """Minimize or maximize criterion using algorithm subject to constraints. Arguments are the same as in maximize and minimize, with an additional direction argument. Direction is a string that can take the values "maximize" and "minimize". Returns are the same as in maximize and minimize. """ criterion_kwargs = _setdefault(criterion_kwargs, {}) constraints = _setdefault(constraints, []) algo_options = _setdefault(algo_options, {}) derivative_kwargs = _setdefault(derivative_kwargs, {}) criterion_and_derivative_kwargs = _setdefault( criterion_and_derivative_kwargs, {}) numdiff_options = _setdefault(numdiff_options, {}) log_options = _setdefault(log_options, {}) scaling_options = _setdefault(scaling_options, {}) error_penalty = _setdefault(error_penalty, {}) multistart_options = _setdefault(multistart_options, {}) if logging: logging = Path(logging) check_optimize_kwargs( direction=direction, criterion=criterion, criterion_kwargs=criterion_kwargs, params=params, algorithm=algorithm, constraints=constraints, algo_options=algo_options, derivative=derivative, derivative_kwargs=derivative_kwargs, criterion_and_derivative=criterion_and_derivative, criterion_and_derivative_kwargs=criterion_and_derivative_kwargs, numdiff_options=numdiff_options, logging=logging, log_options=log_options, error_handling=error_handling, error_penalty=error_penalty, cache_size=cache_size, scaling=scaling, scaling_options=scaling_options, multistart=multistart, multistart_options=multistart_options, ) # store some arguments in a dictionary to save them in the database later if logging: problem_data = { "direction": direction, # "criterion"-criterion, "criterion_kwargs": criterion_kwargs, "algorithm": algorithm, "constraints": constraints, "algo_options": algo_options, # "derivative"-derivative, "derivative_kwargs": derivative_kwargs, # "criterion_and_derivative"-criterion_and_derivative, "criterion_and_derivative_kwargs": criterion_and_derivative_kwargs, "numdiff_options": numdiff_options, "log_options": log_options, "error_handling": error_handling, "error_penalty": error_penalty, "cache_size": int(cache_size), } # partial the kwargs into corresponding functions criterion = functools.partial(criterion, **criterion_kwargs) if derivative is not None: derivative = functools.partial(derivative, **derivative_kwargs) if criterion_and_derivative is not None: criterion_and_derivative = functools.partial( criterion_and_derivative, **criterion_and_derivative_kwargs) # process params and constraints params = add_default_bounds_to_params(params) for col in ["value", "lower_bound", "upper_bound"]: params[col] = params[col].astype(float) check_params_are_valid(params) # get processed params and constraints if constraints: pc, pp = process_constraints(constraints, params) else: pc, pp = None, None if pc and multistart: types = {constr["type"] for constr in pc} raise NotImplementedError( "multistart optimizations are not yet compatible with transforming " f"constraints. Your transforming constraints are of type {types}.") # calculate scaling factor and offset and redo params and constraint processing if scaling: scaling_factor, scaling_offset = calculate_scaling_factor_and_offset( params=params, constraints=constraints, criterion=criterion, **scaling_options, processed_params=pp, processed_constraints=pc, ) pc, pp = process_constraints( constraints=constraints, params=params, scaling_factor=scaling_factor, scaling_offset=scaling_offset, ) else: scaling_factor, scaling_offset = None, None if logging: # name and group column are needed in the dashboard but could lead to problems # if present anywhere else params_with_name_and_group = _add_name_and_group_columns_to_params( params) problem_data["params"] = params_with_name_and_group params_to_internal, params_from_internal = get_reparametrize_functions( params=params, constraints=constraints, scaling_factor=scaling_factor, scaling_offset=scaling_offset, processed_params=pp, processed_constraints=pc, ) # get internal parameters and bounds x = params_to_internal(params["value"].to_numpy()) # this if condition reduces overhead in the no-constraints case if constraints in [None, []]: lower_bounds = params["lower_bound"].to_numpy() upper_bounds = params["upper_bound"].to_numpy() else: lower_bounds, upper_bounds = get_internal_bounds( params=params, constraints=constraints, scaling_factor=scaling_factor, scaling_offset=scaling_offset, processed_params=pp, ) # get convert derivative convert_derivative = get_derivative_conversion_function( params=params, constraints=constraints, scaling_factor=scaling_factor, scaling_offset=scaling_offset, processed_params=pp, processed_constraints=pc, ) # do first function evaluation first_eval = { "internal_params": x, "external_params": params, "output": criterion(params), } # fill numdiff_options with defaults numdiff_options = _fill_numdiff_options_with_defaults( numdiff_options, lower_bounds, upper_bounds) # create and initialize the database if logging: database = _create_and_initialize_database(logging, log_options, first_eval, problem_data) db_kwargs = { "database": database, "path": logging, "fast_logging": log_options.get("fast_logging", False), } else: db_kwargs = {"database": None, "path": None, "fast_logging": False} # get the algorithm internal_algorithm = get_algorithm( algorithm=algorithm, lower_bounds=lower_bounds, upper_bounds=upper_bounds, algo_options=algo_options, logging=logging, db_kwargs=db_kwargs, ) # set default error penalty error_penalty = _fill_error_penalty_with_defaults(error_penalty, first_eval, direction) # create cache x_hash = hash_array(x) cache = {x_hash: {"criterion": first_eval["output"]}} # partial the internal_criterion_and_derivative_template always_partialled = { "direction": direction, "criterion": criterion, "params": params, "reparametrize_from_internal": params_from_internal, "convert_derivative": convert_derivative, "derivative": derivative, "criterion_and_derivative": criterion_and_derivative, "numdiff_options": numdiff_options, "logging": logging, "db_kwargs": db_kwargs, "first_criterion_evaluation": first_eval, "cache": cache, "cache_size": cache_size, } internal_criterion_and_derivative = functools.partial( internal_criterion_and_derivative_template, **always_partialled, ) # do actual optimizations if not multistart: steps = [{"type": "optimization", "name": "optimization"}] step_ids = log_scheduled_steps_and_get_ids( steps=steps, logging=logging, db_kwargs=db_kwargs, ) internal_criterion_and_derivative = functools.partial( internal_criterion_and_derivative, error_handling=error_handling, error_penalty=error_penalty, ) raw_res = internal_algorithm(internal_criterion_and_derivative, x, step_ids[0]) else: lower, upper = get_internal_sampling_bounds(params, constraints) multistart_options = _fill_multistart_options_with_defaults( options=multistart_options, params=params, x=x, params_to_internal=params_to_internal, ) raw_res = run_multistart_optimization( local_algorithm=internal_algorithm, criterion_and_derivative=internal_criterion_and_derivative, x=x, lower_bounds=lower, upper_bounds=upper, options=multistart_options, logging=logging, db_kwargs=db_kwargs, error_handling=error_handling, error_penalty=error_penalty, ) res = process_internal_optimizer_result( raw_res, direction=direction, params_from_internal=params_from_internal, ) return res
def numpy_interface(func=None, *, params=None, constraints=None, numpy_output=False): """Convert x to params. This decorated function receives a NumPy array of parameters and converts it to a :class:`pandas.DataFrame` which can be handled by the user's criterion function. For convenience, the decorated function can also be called directly with a params DataFrame. In that case, the decorator does nothing. Args: func (callable): The function to which the decorator is applied. params (pandas.DataFrame): See :ref:`params`. constraints (list of dict): Contains constraints. numpy_output (bool): Whether pandas objects in the output should also be converted to numpy arrays. Returns: callable """ constraints = [] if constraints is None else constraints pc, pp = process_constraints(constraints, params) fixed_values = pp["_internal_fixed_value"].to_numpy() pre_replacements = pp["_pre_replacements"].to_numpy().astype(int) post_replacements = pp["_post_replacements"].to_numpy().astype(int) def decorator_numpy_interface(func): @functools.wraps(func) def wrapper_numpy_interface(x, *args, **kwargs): if isinstance(x, pd.DataFrame): p = x elif isinstance(x, np.ndarray): p = reparametrize_from_internal( internal=x, fixed_values=fixed_values, pre_replacements=pre_replacements, processed_constraints=pc, post_replacements=post_replacements, params=params, return_numpy=False, ) else: raise ValueError( "x must be a numpy array or DataFrame with 'value' column." ) criterion_value = func(p, *args, **kwargs) if isinstance(criterion_value, (pd.DataFrame, pd.Series)) and numpy_output: criterion_value = criterion_value.to_numpy() return criterion_value return wrapper_numpy_interface if callable(func): return decorator_numpy_interface(func) else: return decorator_numpy_interface
params = reduce_params(example_params, constraints) params["value"] = params[f"value{number}"] keep = params[f"internal_value{number}"].notnull() expected_internal_values = params[f"internal_value{number}"][keep] expected_internal_lower = params["internal_lower"] expected_internal_upper = params["internal_upper"] to_internal, _ = get_reparametrize_functions( params=params, constraints=constraints, scaling_factor=None, scaling_offset=None, ) _, pp = process_constraints(constraints, params) calculated_internal_values_np = to_internal(pp["value"].to_numpy()) calculated_internal_values_pd = to_internal(pp) calculated_internal_lower = pp["_internal_lower"] calculated_internal_upper = pp["_internal_upper"] aaae(calculated_internal_values_np, calculated_internal_values_pd) aaae(calculated_internal_values_np, expected_internal_values) aaae(calculated_internal_lower, expected_internal_lower) aaae(calculated_internal_upper, expected_internal_upper) @pytest.mark.parametrize("case, number", to_test) def test_reparametrize_from_internal(example_params, all_constraints, case, number):
def transform_covariance( params, internal_cov, constraints, n_samples, bounds_handling, ): """Transform the internal covariance matrix to an external one, given constraints. Args: params (pd.DataFrame): DataFrame where the "value" column contains estimated parameters of a likelihood model. See :ref:`params` for details. internal_cov (np.ndarray or pandas.DataFrame) with a covariance matrix of the internal parameter vector. For background information about internal and external params see :ref:`implementation_of_constraints`. constraints (list): List with constraint dictionaries. See .. _link: ../../docs/source/how_to_guides/how_to_use_constraints.ipynb n_samples (int): Number of samples used to transform the covariance matrix of the internal parameter vector into the covariance matrix of the external parameters. bounds_handling (str): One of "clip", "raise", "ignore". Determines how bounds are handled. If "clip", confidence intervals are clipped at the bounds. Standard errors are only adjusted if a sampling step is necessary due to additional constraints. If "raise" and any lower or upper bound is binding, we raise an error. If "ignore", boundary problems are simply ignored. Returns: pd.DataFrame: Quadratic DataFrame containing the covariance matrix of the free parameters. If parameters were fixed (explicitly or by other constraints), the index is a subset of params.index. The columns are the same as the index. """ processed_constraints, processed_params = process_constraints(constraints, params) free_index = processed_params.query("_internal_free").index if isinstance(internal_cov, pd.DataFrame): internal_cov = internal_cov.to_numpy() if processed_constraints: _to_internal, _from_internal = get_reparametrize_functions( params=params, constraints=constraints ) free = processed_params.loc[free_index] is_free = processed_params["_internal_free"].to_numpy() lower_bounds = free["_internal_lower"] upper_bounds = free["_internal_upper"] internal_mean = _to_internal(params) sample = np.random.multivariate_normal( mean=internal_mean, cov=internal_cov, size=n_samples, ) transformed_free = [] for params_vec in sample: if bounds_handling == "clip": params_vec = np.clip(params_vec, a_min=lower_bounds, a_max=upper_bounds) elif bounds_handling == "raise": if (params_vec < lower_bounds).any() or ( params_vec > upper_bounds ).any(): raise ValueError() transformed = _from_internal(internal=params_vec) transformed_free.append(transformed[is_free]) free_cov = np.cov( np.array(transformed_free), rowvar=False, ) else: free_cov = internal_cov res = pd.DataFrame(data=free_cov, columns=free_index, index=free_index) return res