Esempio n. 1
0
def test_processing_single_optim_with_non_standard_inputs(
        single_non_standard_inputs):
    kwargs = single_non_standard_inputs
    res = broadcast_arguments(**kwargs)

    check_single_argument_types(res[0])
    assert res[0]["constraints"] == single_non_standard_inputs["constraints"]
Esempio n. 2
0
def test_processing_multi_optim_with_non_standard_inputs_same_constraints_for_all(
    multiple_non_standard_inputs, ):
    kwargs = multiple_non_standard_inputs
    res = broadcast_arguments(**kwargs)

    check_single_argument_types(res[0])
    assert res[0]["constraints"] == kwargs["constraints"]
    assert res[1]["constraints"] == kwargs["constraints"]
Esempio n. 3
0
def test_processing_single_optim_with_all_standard_inputs():
    criterion = np.mean
    params = pd.DataFrame(np.ones(12).reshape(4, 3))
    algorithm = "scipy_L-BFGS-B"

    res = broadcast_arguments(criterion=criterion,
                              params=params,
                              algorithm=algorithm)

    check_single_argument_types(res[0])
Esempio n. 4
0
def test_processing_multiple_optim_with_all_standard_inputs():
    criterion = np.mean
    params = [
        pd.DataFrame(np.ones(12).reshape(4, 3)),
        pd.DataFrame(np.zeros(16).reshape(4, 4)),
    ]

    algorithms = ["scipy_L-BFGS-B", "pygmo_xnes"]

    res = broadcast_arguments(criterion=criterion,
                              params=params,
                              algorithm=algorithms)

    assert len(res) == 2
    check_single_argument_types(res[0])
    check_single_argument_types(res[1])
Esempio n. 5
0
def test_processing_multi_optim_with_non_standard_inputs_different_constraints(
    multiple_non_standard_inputs, ):
    kwargs = multiple_non_standard_inputs
    differing_constraints = [
        [{
            "loc": 1,
            "type": "fixed",
            "value": 4
        }],
        [{
            "loc": [2, 3],
            "type": "increasing"
        }],
    ]
    kwargs["constraints"] = differing_constraints

    res = broadcast_arguments(**kwargs)

    check_single_argument_types(res[0])
    assert res[0]["constraints"] == differing_constraints[0]
    assert res[1]["constraints"] == differing_constraints[1]
Esempio n. 6
0
def optimize(
    direction,
    criterion,
    params,
    algorithm,
    *,
    criterion_kwargs=None,
    constraints=None,
    algo_options=None,
    derivative=None,
    derivative_kwargs=None,
    criterion_and_derivative=None,
    criterion_and_derivative_kwargs=None,
    numdiff_options=None,
    logging=DEFAULT_DATABASE_NAME,
    log_options=None,
    error_handling="raise",
    error_penalty=None,
    batch_evaluator="joblib",
    batch_evaluator_options=None,
    cache_size=100,
):
    """Minimize or maximize criterion using algorithm subject to constraints.

    Each argument except for batch_evaluator and batch_evaluator_options can also be
    replaced by a list of arguments in which case several optimizations are run in
    parallel. For this, either all arguments must be lists of the same length, or some
    arguments can be provided as single arguments in which case they are automatically
    broadcasted.

    Args:
        direction (str): One of "maximize" or "minimize".
        criterion (Callable): A function that takes a pandas DataFrame (see
            :ref:`params`) as first argument and returns one of the following:
            - scalar floating point or a numpy array (depending on the algorithm)
            - a dictionary that contains at the entries "value" (a scalar float),
            "contributions" or "root_contributions" (depending on the algortihm) and
            any number of additional entries. The additional dict entries will be
            logged and (if supported) displayed in the dashboard. Check the
            documentation of your algorithm to see which entries or output type
            are required.
        params (pd.DataFrame): A DataFrame with a column called "value" and optional
            additional columns. See :ref:`params` for detail.
        algorithm (str or callable): Specifies the optimization algorithm. For supported
            algorithms this is a string with the name of the algorithm. Otherwise it can
            be a callable with the estimagic algorithm interface. See :ref:`algorithms`.
        criterion_kwargs (dict): Additional keyword arguments for criterion
        constraints (list): List with constraint dictionaries.
            See .. _link: ../../docs/source/how_to_guides/how_to_use_constranits.ipynb
        algo_options (dict): Algorithm specific configuration of the optimization. See
            :ref:`list_of_algorithms` for supported options of each algorithm.
        derivative (callable, optional): Function that calculates the first derivative
            of criterion. For most algorithm, this is the gradient of the scalar
            output (or "value" entry of the dict). However some algorithms (e.g. bhhh)
            require the jacobian of the "contributions" entry of the dict. You will get
            an error if you provide the wrong type of derivative.
        derivative_kwargs (dict): Additional keyword arguments for derivative.
        criterion_and_derivative (callable): Function that returns criterion
            and derivative as a tuple. This can be used to exploit synergies in the
            evaluation of both functions. The fist element of the tuple has to be
            exactly the same as the output of criterion. The second has to be exactly
            the same as the output of derivative.
        criterion_and_derivative_kwargs (dict): Additional keyword arguments for
            criterion and derivative.
        numdiff_options (dict): Keyword arguments for the calculation of numerical
            derivatives. See :ref:`first_derivative` for details. Note that the default
            method is changed to "forward" for speed reasons.
        logging (pathlib.Path, str or False): Path to sqlite3 file (which typically has
            the file extension ``.db``. If the file does not exist, it will be created.
            When doing parallel optimizations and logging is provided, you have to
            provide a different path for each optimization you are running. You can
            disable logging completely by setting it to False, but we highly recommend
            not to do so. The dashboard can only be used when logging is used.
        log_options (dict): Additional keyword arguments to configure the logging.
            - "suffix": A string that is appended to the default table names, separated
            by an underscore. You can use this if you want to write the log into an
            existing database where the default names "optimization_iterations",
            "optimization_status" and "optimization_problem" are already in use.
            - "fast_logging": A boolean that determines if "unsafe" settings are used
            to speed up write processes to the database. This should only be used for
            very short running criterion functions where the main purpose of the log
            is a real-time dashboard and it would not be catastrophic to get a
            corrupted database in case of a sudden system shutdown. If one evaluation
            of the criterion function (and gradient if applicable) takes more than
            100 ms, the logging overhead is negligible.
            - "if_exists": (str) One of "extend", "replace", "raise"
            - "save_all_arguments": (bool). If True, all arguments to
              optimize that can be pickled are saved in the log file. Otherwise, only
              the information needed by the dashboard is saved. Default False.
        error_handling (str): Either "raise" or "continue". Note that "continue" does
            not absolutely guarantee that no error is raised but we try to handle as
            many errors as possible in that case without aborting the optimization.
        error_penalty (dict): Dict with the entries "constant" (float) and "slope"
            (float). If the criterion or gradient raise an error and error_handling is
            "continue", return ``constant + slope * norm(params - start_params)`` where
            ``norm`` is the euclidean distance as criterion value and adjust the
            derivative accordingly. This is meant to guide the optimizer back into a
            valid region of parameter space (in direction of the start parameters).
            Note that the constant has to be high enough to ensure that the penalty is
            actually a bad function value. The default constant is f0 + abs(f0) + 100
            for minimizations and f0 - abs(f0) - 100 for maximizations, where
            f0 is the criterion value at start parameters. The default slope is 0.1.
        batch_evaluator (str or Callable): Name of a pre-implemented batch evaluator
            (currently 'joblib' and 'pathos_mp') or Callable with the same interface
            as the estimagic batch_evaluators. See :ref:`batch_evaluators`.
        batch_evaluator_options (dict): Additional configurations for the batch
            batch evaluator. See :ref:`batch_evaluators`.
        cache_size (int): Number of criterion and derivative evaluations that are cached
            in memory in case they are needed.

    """
    arguments = broadcast_arguments(
        direction=direction,
        criterion=criterion,
        params=params,
        algorithm=algorithm,
        criterion_kwargs=criterion_kwargs,
        constraints=constraints,
        algo_options=algo_options,
        derivative=derivative,
        derivative_kwargs=derivative_kwargs,
        criterion_and_derivative=criterion_and_derivative,
        criterion_and_derivative_kwargs=criterion_and_derivative_kwargs,
        numdiff_options=numdiff_options,
        logging=logging,
        log_options=log_options,
        error_handling=error_handling,
        error_penalty=error_penalty,
        cache_size=cache_size,
    )

    # do rough sanity checks before actual optimization for quicker feedback
    for arg in arguments:
        check_argument(arg)

    if isinstance(batch_evaluator, str):
        batch_evaluator = getattr(be, f"{batch_evaluator}_batch_evaluator")

    if batch_evaluator_options is None:
        batch_evaluator_options = {}

    batch_evaluator_options["unpack_symbol"] = "**"
    default_batch_error_handling = "raise" if len(
        arguments) == 1 else "continue"
    batch_evaluator_options["error_handling"] = batch_evaluator_options.get(
        "error_handling", default_batch_error_handling)

    res = batch_evaluator(_single_optimize, arguments,
                          **batch_evaluator_options)

    res = [_dummy_result_from_traceback(r) for (r) in res]

    res = res[0] if len(res) == 1 else res

    return res
Esempio n. 7
0
def maximize_log_likelihood(
    log_like_obs,
    params,
    algorithm,
    criterion_kwargs=None,
    constraints=None,
    general_options=None,
    algo_options=None,
    gradient_options=None,
    logging=DEFAULT_DATABASE_NAME,
    log_options=None,
    dashboard=False,
    dash_options=None,
):
    """Estimate parameters via maximum likelihood.

    This function provides a convenient interface for estimating models via maximum
    likelihood. In the future, it will also calculate standard errors for the solution.

    The criterion function ``log_like_obs`` has to return an array of log likelihoods at
    the first position, not the mean log likelihood. The array is internally aggregated
    to whatever output is needed. For example, the mean is used for maximization, the
    sum for standard error calculations.

    The second return can be a :class:`pandas.DataFrame` in the `tidy data format`_ to
    display the distribution of contributions for subgroups via the comparison plot in
    the future.

    The limitation to log likelihoods instead of likelihoods may seem unnecessarily
    restrictive, but it is preferred for two reasons.

    1. Optimization methods which rely on gradients generally work better optimizing the
       log transformation. See `1`_ for a simplified example.

    2. Using the log transformation to convert products of probabilities to sums of log
       probabilities is numerically more stable as it prevents over- and underflows. See
       `2`_ for an example.

    Args:
        log_like_obs (callable or list of callables):
            Python function that takes a pandas DataFrame with parameters as the first
            argument and returns an array of log likelihood contributions as the first
            return.

        params (pd.DataFrame or list of pd.DataFrames):
            See :ref:`params`.

        algorithm (str or list of strings):
            specifies the optimization algorithm. See :ref:`list_of_algorithms`.

        criterion_kwargs (dict or list of dicts):
            additional keyword arguments for criterion

        constraints (list or list of lists):
            list with constraint dictionaries. See for details.

        general_options (dict):
            additional configurations for the optimization

        algo_options (dict or list of dicts):
            algorithm specific configurations for the optimization

        gradient_options (dict):
            Options for the gradient function.

        logging (str or pathlib.Path): Path to an sqlite3 file which typically has the
            file extension ``.db``. If the file does not exist, it will be created. See
            :ref:`logging` for details.

        log_options (dict): Keyword arguments to influence the logging. See
            :ref:`logging` for details.

        dashboard (bool):
            whether to create and show a dashboard. See :ref:`dashboard` for details.

        dash_options (dict):
            dictionary with kwargs for the dashboard. See :ref:`dashboard` for details.

    Returns:
        results (tuple or list of tuples):
            The return is either a tuple containing a dictionary of the results and the
            parameters or a list of tuples containing multiples of the former.

    .. _tidy data format:
        http://dx.doi.org/10.18637/jss.v059.i10

    .. _1:
        https://stats.stackexchange.com/a/176563/218971

    .. _2:
        https://statmodeling.stat.columbia.edu/2016/06/11/log-sum-of-exponentials/

    """
    if isinstance(log_like_obs, list):
        extended_loglikelobs = [
            expand_criterion_output(crit_func) for crit_func in log_like_obs
        ]
        wrapped_loglikeobs = [
            aggregate_criterion_output(np.mean)(crit_func)
            for crit_func in extended_loglikelobs
        ]
    else:
        extended_loglikelobs = expand_criterion_output(log_like_obs)
        wrapped_loglikeobs = aggregate_criterion_output(
            np.mean)(extended_loglikelobs)

    results = maximize(
        wrapped_loglikeobs,
        params,
        algorithm,
        criterion_kwargs,
        constraints,
        general_options,
        algo_options,
        gradient_options,
        logging,
        log_options,
        dashboard,
        dash_options,
    )

    # To convert the mean log likelihood in the results dictionary to the log
    # likelihood, get the length of contributions for each optimization.
    arguments = broadcast_arguments(criterion=extended_loglikelobs,
                                    params=params,
                                    criterion_kwargs=criterion_kwargs)
    check_arguments(arguments)

    contribs_and_cp_data = [
        args_one_run["criterion"](args_one_run["params"],
                                  **args_one_run["criterion_kwargs"])
        for args_one_run in arguments
    ]
    n_contributions = [len(c_and_cp[0]) for c_and_cp in contribs_and_cp_data]

    if isinstance(results, list):
        for result, n_contribs in zip(results, n_contributions):
            result[0]["fitness"] = result[0]["fitness"] * n_contribs
    else:
        results[0]["fitness"] = results[0]["fitness"] * n_contributions[0]

    return results
Esempio n. 8
0
def minimize(
    criterion,
    params,
    algorithm,
    criterion_kwargs=None,
    constraints=None,
    general_options=None,
    algo_options=None,
    gradient_options=None,
    logging=DEFAULT_DATABASE_NAME,
    log_options=None,
    dashboard=False,
    dash_options=None,
):
    """Minimize *criterion* using *algorithm* subject to *constraints* and bounds.

    Each argument except for ``general_options`` can also be replaced by a list of
    arguments in which case several optimizations are run in parallel. For this, either
    all arguments must be lists of the same length, or some arguments can be provided
    as single arguments in which case they are automatically broadcasted.

    Args:
        criterion (callable or list of callables):
            Python callable that takes a pandas DataFrame with parameters as the first
            argument. Supported outputs are:
                - scalar floating point
                - np.ndarray: contributions for the tao Pounders algorithm.
                - tuple of a scalar floating point and a pd.DataFrame:
                    In this case the first output is the criterion value.
                    The second output are the comparison_plot_data.
        params (pd.DataFrame or list of pd.DataFrames):
            See :ref:`params`.
        algorithm (str or list of strings): Specifies the optimization algorithm.
            See :ref:`list_of_algorithms`.
        criterion_kwargs (dict or list of dicts): Additional keyword arguments for
            criterion.
        constraints (list or list of lists): List with constraint dictionaries.
            See :ref:`constraints`.
        general_options (dict): Additional configurations for the optimization.
            Keys can include:
                - keep_dashboard_alive (bool): Do not terminate the dashboard process
                    after the optimization(s) finish(es).
        algo_options (dict or list of dicts): Algorithm specific configurations for the
            optimization.
        gradient_options (dict): Options for the gradient function.
        logging (str or pathlib.Path or list): Path(s) to (an) sqlite3 file(s) which
            typically has the file extension ``.db``. If the file does not exist,
            it will be created. See :ref:`logging` for details.
        log_options (dict or list of dict): Keyword arguments to influence the logging.
            See :ref:`logging` for details.
        dashboard (bool): Whether to create and show a dashboard, default is False.
            See :ref:`dashboard` for details.
        dash_options (dict or list of dict, optional): Options passed to the dashboard.
            Supported keys are:
                - port (int): port where to display the dashboard.
                - no_browser (bool): whether to display the dashboard in a browser.
                - rollover (int): how many iterations to keep in the convergence plots.

    Returns:
        results (tuple or list of tuples): Each tuple consists of the harmonized result
        info dictionary and the params DataFrame with the minimizing parameter values
        of the untransformed problem as specified of the user.

    """
    # Gradients are currently not allowed to be passed to minimize.
    gradient = None

    arguments = broadcast_arguments(
        criterion=criterion,
        params=params,
        algorithm=algorithm,
        criterion_kwargs=criterion_kwargs,
        constraints=constraints,
        general_options=general_options,
        algo_options=algo_options,
        gradient=gradient,
        gradient_options=gradient_options,
        logging=logging,
        log_options=log_options,
        dashboard=dashboard,
        dash_options=dash_options,
    )

    check_arguments(arguments)

    optim_arguments = []
    results_arguments = []
    database_paths_for_dashboard = []
    for single_arg in arguments:
        optim_kwargs, database_path, result_kwargs = transform_problem(**single_arg)
        optim_arguments.append(optim_kwargs)
        results_arguments.append(result_kwargs)
        if database_path is not None:
            database_paths_for_dashboard.append(database_path)

    if dashboard:
        dashboard_process = run_dashboard_in_separate_process(
            database_paths=database_paths_for_dashboard
        )

    if len(arguments) == 1:
        # Run only one optimization
        results = [_internal_minimize(**optim_arguments[0])]
    else:
        # Run multiple optimizations
        if "n_cores" not in optim_arguments[0]["general_options"]:
            raise ValueError(
                "n_cores need to be specified in general_options"
                + " if multiple optimizations should be run."
            )
        n_cores = optim_arguments[0]["general_options"]["n_cores"]

        results = Parallel(n_jobs=n_cores)(
            delayed(_internal_minimize)(**optim_kwargs)
            for optim_kwargs in optim_arguments
        )

    if dashboard and dashboard_process is not None:
        if not results_arguments[0]["keep_dashboard_alive"]:
            dashboard_process.terminate()

    results = _process_optimization_results(results, results_arguments)

    return results