예제 #1
0
def test_one_bound_is_allowed_for_increasing():
    params = pd.DataFrame(data=[[1], [2], [2.9]], columns=["value"])
    params["lower"] = [-np.inf, 1, -np.inf]
    params["upper"] = [np.inf, 2, np.inf]

    constraints = [{"loc": params.index, "type": "increasing"}]

    process_constraints(constraints, params)
예제 #2
0
def test_value_error_if_constraints_are_violated(example_params,
                                                 all_constraints, case):
    constraints = all_constraints[case]
    params = reduce_params(example_params, constraints)
    for val in ["invalid_value0", "invalid_value1"]:
        params["value"] = params[val]

        with pytest.raises(ValueError):
            process_constraints(constraints, params)
예제 #3
0
def test_invalid_bound_for_increasing():
    params = pd.DataFrame(data=[[1], [2], [2.9]], columns=["value"])
    params["lower"] = [-np.inf, 1, 0.5]
    params["upper"] = np.nan

    constraints = [{"loc": params.index, "type": "increasing"}]

    with pytest.raises(ValueError):
        process_constraints(constraints, params)
예제 #4
0
def get_start_params_from_free_params(free, constraints, params_index):
    """Construct a full params df from free parameters, constraints and the param_index.

    Args:
        free (DataFrame): free parameters
        constraints (list): list of constraints
        params_index (pd.Index or pd.MultiIndex): The index of a non-internal parameter
            DataFrame. See :ref:`params`.

    Returns:
        params (DataFrame): see :ref:`params`.

    """
    _, fixed = make_start_params_helpers(params_index, constraints)
    fake_params = pd.DataFrame(index=params_index,
                               columns=["value", "lower", "upper"])
    processed_constraints = process_constraints(constraints, fake_params)
    equality_constraints = [
        c for c in processed_constraints if c["type"] == "equality"
    ]
    params = pd.concat([free, fixed], axis=0).loc[params_index]
    for constr in equality_constraints:
        params_subset = params.loc[constr["index"]]
        values = list(params_subset["value"].value_counts(dropna=True).index)
        assert len(values) <= 1, "Too many values."
        params.loc[constr["index"], "value"] = values[0]
    return params
예제 #5
0
def _back_and_forth_transformation_and_assert(params, constraints):
    pc, pp = process_constraints(constraints, params)

    internal = reparametrize_to_internal(pp, pc)

    external = reparametrize_from_internal(
        internal=internal,
        fixed_values=pp["_internal_fixed_value"].to_numpy(),
        pre_replacements=pp["_pre_replacements"].to_numpy(),
        processed_constraints=pc,
        post_replacements=pp["_post_replacements"].to_numpy(),
        processed_params=pp,
    )

    assert_series_equal(external["value"], params["value"])
    return internal, external
예제 #6
0
def make_start_params_helpers(params_index, constraints):
    """Helper DataFrames to generate start params.

    Construct a default params DataFrame and split it into free and parameters and
    parameters that are fixed explicitly or implicitly through equality constraints.

    The free parameters can be exposed to a user to generate custom start parameters
    in a complex model. The fixed part can then be used to transform the user provided
    start parameters into a full params_df.

    Args:
        params_index (pd.Index or pd.MultiIndex): The index of a non-internal parameter
            DataFrame. See :ref:`params`.
        constraints (list): A list of constraints

    Returns:
        free (DataFrame): free parameters
        fixed (DataFrame): parameters that are fixed because of explicit fixes
            or equality constraints.

    """
    params = pd.DataFrame(index=params_index)
    params["value"] = np.nan
    params["lower"] = -np.inf
    params["upper"] = np.inf

    constraints = process_constraints(constraints, params)

    fixes = [c for c in constraints if c["type"] == "fixed"]
    params = apply_fixes_to_external_params(params, fixes)

    equality_constraints = [c for c in constraints if c["type"] == "equality"]
    for constr in equality_constraints:
        params.update(_equality_to_internal(params.loc[constr["index"]]))

    # It is a known bug that df.update changes some dtypes: https://tinyurl.com/y66hqxg2
    params["_fixed"] = params["_fixed"].astype(bool)
    free = params.query("~_fixed").drop(columns="_fixed")
    fixed = params.query("_fixed").drop(columns="_fixed")
    return free, fixed
예제 #7
0
def constraints(params):
    constr = [
        {"loc": ("c", "c2"), "type": "probability"},
        {
            "loc": [("a", "a", "0"), ("a", "a", "2"), ("a", "a", "4")],
            "type": "fixed",
            "value": [0.1, 0.3, 0.5],
        },
        {"loc": ("e", "off"), "type": "fixed", "value": 0},
        {"loc": "d", "type": "increasing"},
        {"loc": "e", "type": "covariance"},
        {"loc": "f", "type": "covariance"},
        {"loc": "g", "type": "sum", "value": 5},
        {"loc": "h", "type": "equality"},
        {"loc": "i", "type": "equality"},
        {"query": 'subcategory == "j1" | subcategory == "i1"', "type": "equality"},
        {"loc": "k", "type": "sdcorr"},
        {"loc": "l", "type": "covariance"},
        {"locs": ["f", "l"], "type": "pairwise_equality"},
    ]
    constr = process_constraints(constr, params)
    return constr
예제 #8
0
def constraints(params):
    constr = [
        {
            "loc": ("c", "c2"),
            "type": "probability"
        },
        {
            "loc": "d",
            "type": "increasing"
        },
        {
            "loc": "e",
            "type": "covariance"
        },
        {
            "loc": "f",
            "type": "covariance"
        },
        {
            "loc": "g",
            "type": "sum",
            "value": 5
        },
        {
            "loc": "h",
            "type": "equality"
        },
        {
            "loc": "i",
            "type": "equality"
        },
        {
            "query": 'subcategory == "j1" | subcategory == "i1"',
            "type": "equality"
        },
    ]
    constr = process_constraints(constr, params)
    return constr
예제 #9
0
def _single_optimize(
    direction,
    criterion,
    criterion_kwargs,
    params,
    algorithm,
    constraints,
    algo_options,
    derivative,
    derivative_kwargs,
    criterion_and_derivative,
    criterion_and_derivative_kwargs,
    numdiff_options,
    logging,
    log_options,
    error_handling,
    error_penalty,
    cache_size,
):
    """Minimize or maximize *criterion* using *algorithm* subject to *constraints*.

    See the docstring of ``optimize`` for an explanation of all arguments.

    Returns:
        dict: The optimization result.

    """
    # store all arguments in a dictionary to save them in the database later
    problem_data = {
        "direction": direction,
        # "criterion"-criterion,
        "criterion_kwargs": criterion_kwargs,
        "algorithm": algorithm,
        "constraints": constraints,
        "algo_options": algo_options,
        # "derivative"-derivative,
        "derivative_kwargs": derivative_kwargs,
        # "criterion_and_derivative"-criterion_and_derivative,
        "criterion_and_derivative_kwargs": criterion_and_derivative_kwargs,
        "numdiff_options": numdiff_options,
        "logging": logging,
        "log_options": log_options,
        "error_handling": error_handling,
        "error_penalty": error_penalty,
        "cache_size": int(cache_size),
    }

    # partial the kwargs into corresponding functions
    criterion = functools.partial(criterion, **criterion_kwargs)
    if derivative is not None:
        derivative = functools.partial(derivative, **derivative_kwargs)
    if criterion_and_derivative is not None:
        criterion_and_derivative = functools.partial(
            criterion_and_derivative, **criterion_and_derivative_kwargs)

    # process params and constraints
    params = process_bounds(params)
    for col in ["value", "lower_bound", "upper_bound"]:
        params[col] = params[col].astype(float)
    _check_params(params)

    processed_constraints, processed_params = process_constraints(
        constraints, params)

    # name and group column are needed in the dashboard but could lead to problems
    # if present anywhere else
    params_with_name_and_group = _add_name_and_group_columns_to_params(params)
    problem_data["params"] = params_with_name_and_group

    # get internal parameters and bounds
    x = reparametrize_to_internal(
        params["value"].to_numpy(),
        processed_params["_internal_free"].to_numpy(),
        processed_constraints,
    )

    free = processed_params.query("_internal_free")
    lower_bounds = free["_internal_lower"].to_numpy()
    upper_bounds = free["_internal_upper"].to_numpy()

    # process algorithm and algo_options
    if isinstance(algorithm, str):
        algo_name = algorithm
    else:
        algo_name = getattr(algorithm, "name", "your algorithm")

    if isinstance(algorithm, str):
        try:
            algorithm = AVAILABLE_ALGORITHMS[algorithm]
        except KeyError:
            proposed = propose_algorithms(algorithm,
                                          list(AVAILABLE_ALGORITHMS))
            raise ValueError(
                f"Invalid algorithm: {algorithm}. Did you mean {proposed}?")

    algo_options = _adjust_options_to_algorithms(algo_options, lower_bounds,
                                                 upper_bounds, algorithm,
                                                 algo_name)

    # get partialed reparametrize from internal
    pre_replacements = processed_params["_pre_replacements"].to_numpy()
    post_replacements = processed_params["_post_replacements"].to_numpy()
    fixed_values = processed_params["_internal_fixed_value"].to_numpy()

    partialed_reparametrize_from_internal = functools.partial(
        reparametrize_from_internal,
        fixed_values=fixed_values,
        pre_replacements=pre_replacements,
        processed_constraints=processed_constraints,
        post_replacements=post_replacements,
    )

    # get convert derivative
    pre_replace_jac = pre_replace_jacobian(pre_replacements=pre_replacements,
                                           dim_in=len(x))
    post_replace_jac = post_replace_jacobian(
        post_replacements=post_replacements)

    convert_derivative = functools.partial(
        convert_external_derivative_to_internal,
        fixed_values=fixed_values,
        pre_replacements=pre_replacements,
        processed_constraints=processed_constraints,
        pre_replace_jac=pre_replace_jac,
        post_replace_jac=post_replace_jac,
    )

    # do first function evaluation
    first_eval = {
        "internal_params": x,
        "external_params": params,
        "output": criterion(params),
    }

    # fill numdiff_options with defaults
    numdiff_options = _fill_numdiff_options_with_defaults(
        numdiff_options, lower_bounds, upper_bounds)

    # create and initialize the database
    if not logging:
        database = False
    else:
        database = _create_and_initialize_database(logging, log_options,
                                                   first_eval, problem_data)

    # set default error penalty
    error_penalty = _fill_error_penalty_with_defaults(error_penalty,
                                                      first_eval, direction)

    # create cache
    x_hash = hash_array(x)
    cache = {x_hash: {"criterion": first_eval["output"]}}

    # partial the internal_criterion_and_derivative_template
    internal_criterion_and_derivative = functools.partial(
        internal_criterion_and_derivative_template,
        direction=direction,
        criterion=criterion,
        params=params,
        reparametrize_from_internal=partialed_reparametrize_from_internal,
        convert_derivative=convert_derivative,
        derivative=derivative,
        criterion_and_derivative=criterion_and_derivative,
        numdiff_options=numdiff_options,
        database=database,
        database_path=logging,
        log_options=log_options,
        error_handling=error_handling,
        error_penalty=error_penalty,
        first_criterion_evaluation=first_eval,
        cache=cache,
        cache_size=cache_size,
    )

    res = algorithm(internal_criterion_and_derivative, x, **algo_options)

    p = params.copy()
    p["value"] = partialed_reparametrize_from_internal(res["solution_x"])
    res["solution_params"] = p

    if "solution_criterion" not in res:
        res["solution_criterion"] = criterion(p)

    # in the long run we can get some of those from the database if logging was used.
    optional_entries = [
        "solution_derivative",
        "solution_hessian",
        "n_criterion_evaluations",
        "n_derivative_evaluations",
        "n_iterations",
        "success",
        "reached_convergence_criterion",
        "message",
    ]

    for entry in optional_entries:
        res[entry] = res.get(entry, f"Not reported by {algo_name}")

    if logging:
        _log_final_status(res, database, logging, log_options)

    return res
예제 #10
0
def _single_minimize(
    criterion,
    params,
    algorithm,
    criterion_kwargs,
    constraints,
    general_options,
    algo_options,
    dashboard,
    db_options,
):
    """Minimize * criterion * using * algorithm * subject to * constraints * and bounds.
    Only one minimization.

    Args:
        criterion (function):
            Python function that takes a pandas DataFrame with parameters as the first
            argument and returns a scalar floating point value.

        params (pd.DataFrame):
            See :ref:`params`.

        algorithm (str):
            specifies the optimization algorithm. See :ref:`list_of_algorithms`.

        criterion_kwargs (dict):
            additional keyword arguments for criterion

        constraints (list):
            list with constraint dictionaries. See for details.

        general_options (dict):
            additional configurations for the optimization

        algo_options (dict):
            algorithm specific configurations for the optimization

        dashboard (bool):
            whether to create and show a dashboard

        db_options (dict):
            dictionary with kwargs to be supplied to the run_server function.

    """
    simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
    params = _process_params(params)

    fitness_factor = -1 if general_options.get("_maximization", False) else 1
    fitness_eval = fitness_factor * criterion(params, **criterion_kwargs)
    constraints, params = process_constraints(constraints, params)
    internal_params = reparametrize_to_internal(params, constraints)

    queue = Queue() if dashboard else None
    if dashboard:
        stop_signal = Event()
        outer_server_process = Process(
            target=run_server,
            kwargs={
                "queue": queue,
                "db_options": db_options,
                "start_param_df": params,
                "start_fitness": fitness_eval,
                "stop_signal": stop_signal,
            },
            daemon=False,
        )
        outer_server_process.start()

    result = _internal_minimize(
        criterion=criterion,
        criterion_kwargs=criterion_kwargs,
        params=params,
        internal_params=internal_params,
        constraints=constraints,
        algorithm=algorithm,
        algo_options=algo_options,
        general_options=general_options,
        queue=queue,
        fitness_factor=fitness_factor,
    )

    if dashboard:
        stop_signal.set()
        outer_server_process.terminate()
    return result
예제 #11
0
    return params.loc[all_locs].copy()


@pytest.mark.parametrize("case, number", to_test)
def test_reparametrize_to_internal(example_params, all_constraints, case,
                                   number):
    constraints = all_constraints[case]
    params = reduce_params(example_params, constraints)
    params["value"] = params[f"value{number}"]

    keep = params[f"internal_value{number}"].notnull()
    expected_internal_values = params[f"internal_value{number}"][keep]
    expected_internal_lower = params["internal_lower"]
    expected_internal_upper = params["internal_upper"]

    pc, pp = process_constraints(constraints, params)

    calculated_internal_values = reparametrize_to_internal(pp, pc)
    calculated_internal_lower = pp["_internal_lower"]
    calculated_internal_upper = pp["_internal_upper"]

    aaae(calculated_internal_values, expected_internal_values)
    aaae(calculated_internal_lower, expected_internal_lower)
    aaae(calculated_internal_upper, expected_internal_upper)


@pytest.mark.parametrize("case, number", to_test)
def test_reparametrize_from_internal(example_params, all_constraints, case,
                                     number):
    constraints = all_constraints[case]
    params = reduce_params(example_params, constraints)
예제 #12
0
def transform_problem(
    criterion,
    params,
    algorithm,
    criterion_kwargs,
    constraints,
    general_options,
    algo_options,
    gradient,
    gradient_kwargs,
    gradient_options,
    logging,
    log_options,
    dashboard,
    dash_options,
):
    """Transform the user supplied problem.

    The transformed optimization problem is converted from the original problem
    which consists of the user supplied criterion, params DataFrame, criterion_kwargs,
    constraints and gradient (if supplied).
    In addition, the transformed optimization problem provides sophisticated logging
    tools if activated by the user.

    The transformed problem can be solved by almost any optimizer package:
        1. The only constraints are bounds on the parameters.
        2. The internal_criterion function takes an one dimensional np.array as input.
        3. The internal criterion function returns a scalar value
            (except for the case of the tao_pounders algorithm).

    Note that because of the reparametrizations done by estimagic to implement
    constraints on behalf of the user the internal params cannot be interpreted without
    reparametrizing it to the full params DataFrame.

    Args:
        criterion (callable or list of callables): Python function that takes a pandas
            DataFrame with parameters as the first argument. Supported outputs are:
                - scalar floating point
                - np.ndarray: contributions for the tao Pounders algorithm.
                - tuple of a scalar floating point and a pd.DataFrame:
                    In this case the first output is the criterion value.
                    The second output are the comparison_plot_data.
                    See :ref:`comparison_plot`.
                    .. warning::
                        This feature is not implemented in the dashboard yet.
        params (pd.DataFrame or list of pd.DataFrames): See :ref:`params`.
        algorithm (str or list of strings): Name of the optimization algorithm.
            See :ref:`list_of_algorithms`.
        criterion_kwargs (dict or list of dict): Additional criterion keyword arguments.
        constraints (list or list of lists): List with constraint dictionaries.
            See :ref:`constraints` for details.
        general_options (dict): Additional configurations for the optimization.
            Keys can include:
                - keep_dashboard_alive (bool): if True and dashboard is True the process
                    in which the dashboard is run is not terminated when maximize or
                    minimize finish.
        algo_options (dict or list of dicts): Algorithm specific configurations.
        gradient_options (dict): Options for the gradient function.
        gradient_kwargs (dict): Additional keyword arguments for the gradient.
        logging (str or pathlib.Path or list thereof): Path to an sqlite3 file which
            typically has the file extension ``.db``. If the file does not exist,
            it will be created. See :ref:`logging` for details.
        log_options (dict or list of dict): Keyword arguments to influence the logging.
            See :ref:`logging` for details.
        dashboard (bool): Whether to create and show a dashboard, default is False.
            See :ref:`dashboard` for details.
        dash_options (dict or list of dict, optional): Options passed to the dashboard.
            Supported keys are:
                - port (int): port where to display the dashboard
                - no_browser (bool): whether to display the dashboard in a browser
                - rollover (int): how many iterations to keep in the monitoring plots

    Returns:
        optim_kwargs (dict): Dictionary collecting all arguments that are going to be
            passed to _internal_minimize.
        database_path (str or pathlib.Path or None): Path to the database.
        result_kwargs (dict): Arguments needed to reparametrize back from the internal
            paramater array to the params DataFrame of the user supplied problem.
            In addition it contains whether the dashboard process should be kept alive
            after the optimization(s) terminate(s).

    """
    optim_kwargs, params, dash_options, database_path = _pre_process_arguments(
        params=params,
        algorithm=algorithm,
        algo_options=algo_options,
        logging=logging,
        dashboard=dashboard,
        dash_options=dash_options,
    )

    # harmonize criterion interface
    is_maximization = general_options.pop("_maximization", False)
    criterion = expand_criterion_output(criterion)
    criterion = negative_criterion(criterion) if is_maximization else criterion

    # first criterion evaluation for the database and the pounders algorithm
    fitness_eval, comparison_plot_data, raw_result = _evaluate_criterion(
        criterion=criterion, params=params, criterion_kwargs=criterion_kwargs)
    general_options = general_options.copy()
    general_options["_start_criterion_value"] = raw_result
    general_options["start_criterion_value"] = fitness_eval

    with warnings.catch_warnings():
        warnings.simplefilter(action="ignore",
                              category=pd.errors.PerformanceWarning)

        # transform the user supplied inputs into the internal inputs.
        constraints, params = process_constraints(constraints, params)
        internal_params = reparametrize_to_internal(params, constraints)
        bounds = _get_internal_bounds(params)

    # setup the database to pass it to the internal functions for logging
    if logging:
        database = prepare_database(
            path=logging,
            params=params,
            comparison_plot_data=comparison_plot_data,
            dash_options=dash_options,
            constraints=constraints,
            **log_options,
        )
    else:
        database = False

    # transform the user supplied criterion and gradient function into their
    # internal counterparts that use internal inputs.

    # this must be passed to _create_internal_criterion because the internal
    # gradient creates its own internal criterion function whose calls are
    # logged differently by the database.
    logging_decorator = functools.partial(
        log_evaluation,
        database=database,
        tables=[
            "params_history", "criterion_history", "comparison_plot",
            "timestamps"
        ],
    )

    internal_criterion = _create_internal_criterion(
        criterion=criterion,
        params=params,
        constraints=constraints,
        criterion_kwargs=criterion_kwargs,
        logging_decorator=logging_decorator,
        general_options=general_options,
        database=database,
    )

    internal_gradient = _create_internal_gradient(
        gradient=gradient,
        gradient_kwargs=gradient_kwargs,
        gradient_options=gradient_options,
        criterion=criterion,
        params=params,
        constraints=constraints,
        criterion_kwargs=criterion_kwargs,
        general_options=general_options,
        database=database,
    )

    internal_kwargs = {
        "internal_criterion": internal_criterion,
        "internal_params": internal_params,
        "bounds": bounds,
        "internal_gradient": internal_gradient,
        "database": database,
        "general_options": general_options,
    }
    optim_kwargs.update(internal_kwargs)

    result_kwargs = {
        "params": params,
        "constraints": constraints,
        "keep_dashboard_alive": general_options.pop("keep_dashboard_alive",
                                                    False),
    }
    return optim_kwargs, database_path, result_kwargs
예제 #13
0
    if design_info is not None:
        raise NotImplementedError(
            "Cluster robust standard errors are not yet implemented.")

    if jacobian is not None:
        raise NotImplementedError()

    if hessian is not None:
        raise NotImplementedError()

    # calculate internal covariance matrix
    loglike = functools.partial(loglike, **loglike_kwargs)
    internal_loglike = numpy_interface(loglike,
                                       params=params,
                                       constraints=constraints)
    processed_constraints, processed_params = process_constraints(
        constraints, params)

    internal_params = reparametrize_to_internal(
        external=params["value"].to_numpy(),
        internal_free=processed_params["_internal_free"],
        processed_constraints=processed_constraints,
    )

    if cov_type == "jacobian":
        numdiff_options = numdiff_options.copy()
        numdiff_options["key"] = "contributions"

        internal_jac = first_derivative(
            internal_loglike,
            internal_params,
            **numdiff_options,
예제 #14
0
def transform_covariance(
    params,
    internal_cov,
    constraints,
    n_samples,
    bounds_handling,
):
    """Transform the internal covariance matrix to an external one, given constraints.

    Args:
        params (pd.DataFrame): DataFrame where the "value" column contains estimated
            parameters of a likelihood model. See :ref:`params` for details.
        internal_cov (np.ndarray) with a covariance matrix of the internal parameter
            vector. For background information about internal and external params
            see :ref:`implementation_of_constraints`.
        constraints (list): List with constraint dictionaries.
            See .. _link: ../../docs/source/how_to_guides/how_to_use_constraints.ipynb
        n_samples (int): Number of samples used to transform the covariance matrix of
            the internal parameter vector into the covariance matrix of the external
            parameters.
        bounds_handling (str): One of "clip", "raise", "ignore". Determines how bounds
            are handled. If "clip", confidence intervals are clipped at the bounds.
            Standard errors are only adjusted if a sampling step is necessary due to
            additional constraints. If "raise" and any lower or upper bound is binding,
            we raise an error. If "ignore", boundary problems are simply ignored.

    Returns:
        pd.DataFrame: Quadratic DataFrame containing the covariance matrix of the free
            parameters. If parameters were fixed (explicitly or by other constraints),
            the index is a subset of params.index. The columns are the same as the
            index.

    """
    processed_constraints, processed_params = process_constraints(
        constraints, params)
    free_index = processed_params.query("_internal_free").index

    if processed_constraints:
        free = processed_params.loc[free_index]
        is_free = processed_params["_internal_free"].to_numpy()
        pre_replacements = processed_params["_pre_replacements"].to_numpy()
        post_replacements = processed_params["_post_replacements"].to_numpy()
        fixed_values = processed_params["_internal_fixed_value"].to_numpy()
        lower_bounds = free["_internal_lower"]
        upper_bounds = free["_internal_upper"]

        internal_mean = reparametrize_to_internal(
            external=params["value"].to_numpy(),
            internal_free=is_free,
            processed_constraints=processed_constraints,
        )
        sample = np.random.multivariate_normal(
            mean=internal_mean,
            cov=internal_cov,
            size=n_samples,
        )
        transformed_free = []
        for params_vec in sample:
            if bounds_handling == "clip":
                params_vec = np.clip(params_vec,
                                     a_min=lower_bounds,
                                     a_max=upper_bounds)
            elif bounds_handling == "raise":
                if (params_vec < lower_bounds).any() or (params_vec >
                                                         upper_bounds).any():
                    raise ValueError()

            transformed = reparametrize_from_internal(
                internal=params_vec,
                fixed_values=fixed_values,
                pre_replacements=pre_replacements,
                processed_constraints=processed_constraints,
                post_replacements=post_replacements,
            )
            transformed_free.append(transformed[is_free])

        free_cov = np.cov(
            np.array(transformed_free),
            rowvar=False,
        )

    else:
        free_cov = internal_cov

    res = pd.DataFrame(data=free_cov, columns=free_index, index=free_index)
    return res
예제 #15
0
def minimize(
    criterion,
    params,
    algorithm,
    criterion_args=None,
    criterion_kwargs=None,
    constraints=None,
    general_options=None,
    algo_options=None,
    dashboard=False,
    db_options=None,
):
    """Minimize *criterion* using *algorithm* subject to *constraints* and bounds.

    Args:
        criterion (function):
            Python function that takes a pandas Series with parameters as the first
            argument and returns a scalar floating point value.

        params (pd.DataFrame):
            See :ref:`params`.

        algorithm (str):
            specifies the optimization algorithm. See :ref:`list_of_algorithms`.

        criterion_args (list or tuple):
            additional positional arguments for criterion

        criterion_kwargs (dict):
            additional keyword arguments for criterion

        constraints (list):
            list with constraint dictionaries. See for details.

        general_options (dict):
            additional configurations for the optimization

        algo_options (dict):
            algorithm specific configurations for the optimization

        dashboard (bool):
            whether to create and show a dashboard

        db_options (dict):
            dictionary with kwargs to be supplied to the run_server function.

    """
    # set default arguments
    criterion_args = [] if criterion_args is None else criterion_args
    criterion_kwargs = {} if criterion_kwargs is None else criterion_kwargs
    constraints = [] if constraints is None else constraints
    general_options = {} if general_options is None else general_options
    algo_options = {} if algo_options is None else algo_options
    db_options = {} if db_options is None else db_options

    params = _process_params_df(params)
    fitness_eval = criterion(params["value"], *criterion_args,
                             **criterion_kwargs)
    constraints = process_constraints(constraints, params)
    internal_params = reparametrize_to_internal(params, constraints)

    queue = Queue() if dashboard else None
    start_signal = Queue() if dashboard else None
    if dashboard:
        # later only the parameter series can be supplied
        # but for the setup of the dashboard we want the whole DataFrame
        queue.put(
            QueueEntry(params=params, fitness=fitness_eval,
                       still_running=True))

        # To-Do: Don't hard code the port
        server_thread = Thread(
            target=run_server,
            kwargs={
                "queue": queue,
                "port": 5039,
                "db_options": db_options,
                "start_signal": start_signal,
            },
            daemon=True,
        )
        server_thread.start()

    if dashboard:
        # wait for server_thread to give start signal
        while start_signal.qsize() == 0:
            sleep(0.01)

    result = _minimize(
        criterion=criterion,
        criterion_args=criterion_args,
        criterion_kwargs=criterion_kwargs,
        params=params,
        internal_params=internal_params,
        constraints=constraints,
        algorithm=algorithm,
        algo_options=algo_options,
        general_options=general_options,
        queue=queue,
    )

    if dashboard:
        queue.put(
            QueueEntry(params=result[1],
                       fitness=result[0]["f"],
                       still_running=False))
    return result
예제 #16
0
def minimize(
    criterion,
    params,
    algorithm,
    criterion_args=None,
    criterion_kwargs=None,
    constraints=None,
    general_options=None,
    algo_options=None,
    dashboard=False,
    db_options=None,
):
    """Minimize *criterion* using *algorithm* subject to *constraints* and bounds.

    Args:
        criterion (function):
            Python function that takes a pandas Series with parameters as the first
            argument and returns a scalar floating point value.

        params (pd.DataFrame):
            See :ref:`params`.

        algorithm (str):
            specifies the optimization algorithm. See :ref:`list_of_algorithms`.

        criterion_args (list or tuple):
            additional positional arguments for criterion

        criterion_kwargs (dict):
            additional keyword arguments for criterion

        constraints (list):
            list with constraint dictionaries. See for details.

        general_options (dict):
            additional configurations for the optimization

        algo_options (dict):
            algorithm specific configurations for the optimization

        dashboard (bool):
            whether to create and show a dashboard

        db_options (dict):
            dictionary with kwargs to be supplied to the run_server function.

    """
    # set default arguments
    criterion_args = [] if criterion_args is None else criterion_args
    criterion_kwargs = {} if criterion_kwargs is None else criterion_kwargs
    constraints = [] if constraints is None else constraints
    general_options = {} if general_options is None else general_options
    algo_options = {} if algo_options is None else algo_options
    db_options = {} if db_options is None else db_options

    params = _process_params(params)
    fitness_eval = criterion(params, *criterion_args, **criterion_kwargs)
    constraints = process_constraints(constraints, params)
    internal_params = reparametrize_to_internal(params, constraints)

    queue = Queue() if dashboard else None
    if dashboard:
        stop_signal = Event()
        outer_server_process = Process(
            target=run_server,
            kwargs={
                "queue": queue,
                "db_options": db_options,
                "start_param_df": params,
                "start_fitness": fitness_eval,
                "stop_signal": stop_signal,
            },
            daemon=False,
        )
        outer_server_process.start()

    result, timing_info = _minimize(
        criterion=criterion,
        criterion_args=criterion_args,
        criterion_kwargs=criterion_kwargs,
        params=params,
        internal_params=internal_params,
        constraints=constraints,
        algorithm=algorithm,
        algo_options=algo_options,
        general_options=general_options,
        queue=queue,
    )

    if dashboard:
        stop_signal.set()
        outer_server_process.terminate()
    return result, timing_info
예제 #17
0
def _single_minimize(
    criterion,
    params,
    algorithm,
    criterion_kwargs,
    constraints,
    general_options,
    algo_options,
    gradient,
    gradient_options,
    logging,
    log_options,
    dashboard,
    db_options,
):
    """Minimize * criterion * using * algorithm * subject to * constraints * and bounds.
    Only one minimization.

    Args:
        criterion (function):
            Python function that takes a pandas DataFrame with parameters as the first
            argument and returns a scalar floating point value.

        params (pd.DataFrame):
            See :ref:`params`.

        algorithm (str):
            specifies the optimization algorithm. See :ref:`list_of_algorithms`.

        criterion_kwargs (dict):
            additional keyword arguments for criterion

        constraints (list):
            list with constraint dictionaries. See for details.

        general_options (dict):
            additional configurations for the optimization

        algo_options (dict):
            algorithm specific configurations for the optimization

        gradient (callable or None):
            Gradient function.

        gradient_options (dict):
            Options for the gradient function.

        logging (str or pathlib.Path): Path to an sqlite3 file which typically has the
            file extension ``.db``. If the file does not exist, it will be created. See
            :ref:`logging` for details.

        log_options (dict): Keyword arguments to influence the logging. See
            :ref:`logging` for details.

        dashboard (bool):
            whether to create and show a dashboard

        db_options (dict):
            dictionary with kwargs to be supplied to the run_server function.

    """
    simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
    params = _process_params(params)

    # Apply decorator two handle criterion functions with one or two returns.
    criterion = expand_criterion_output(criterion)

    is_maximization = general_options.pop("_maximization", False)
    criterion = negative_criterion(criterion) if is_maximization else criterion
    fitness_factor = -1 if is_maximization else 1

    criterion_out, comparison_plot_data = criterion(params, **criterion_kwargs)
    if np.isscalar(criterion_out):
        fitness_eval = fitness_factor * criterion_out
    else:
        fitness_eval = fitness_factor * np.mean(np.square(criterion_out))

    if np.any(np.isnan(fitness_eval)):
        raise ValueError(
            "The criterion function evaluated at the start parameters returns NaNs."
        )

    database = (prepare_database(logging, params, comparison_plot_data,
                                 log_options) if logging else False)

    general_options["start_criterion_value"] = fitness_eval

    constraints, params = process_constraints(constraints, params)
    internal_params = reparametrize_to_internal(params, constraints)

    queue = Queue() if dashboard else None
    if dashboard:
        stop_signal = Event()
        outer_server_process = Process(
            target=run_server,
            kwargs={
                "queue": queue,
                "db_options": db_options,
                "start_param_df": params,
                "start_fitness": fitness_eval,
                "stop_signal": stop_signal,
            },
            daemon=False,
        )
        outer_server_process.start()

    result, params = _internal_minimize(
        criterion=criterion,
        criterion_kwargs=criterion_kwargs,
        params=params,
        internal_params=internal_params,
        constraints=constraints,
        algorithm=algorithm,
        algo_options=algo_options,
        gradient=gradient,
        gradient_options=gradient_options,
        general_options=general_options,
        database=database,
        queue=queue,
        fitness_factor=fitness_factor,
    )

    if dashboard:
        stop_signal.set()
        outer_server_process.terminate()

    return result, params
예제 #18
0
def numpy_interface(func=None,
                    *,
                    params=None,
                    constraints=None,
                    numpy_output=False):
    """Convert x to params.

    This decorated function receives a NumPy array of parameters and converts it to a
    :class:`pandas.DataFrame` which can be handled by the user's criterion function.

    For convenience, the decorated function can also be called directly with a
    params DataFrame. In that case, the decorator does nothing.

    Args:
        func (callable): The function to which the decorator is applied.
        params (pandas.DataFrame): See :ref:`params`.
        constraints (list of dict): Contains constraints.
        numpy_output (bool): Whether pandas objects in the output should also be
            converted to numpy arrays.

    Returns:
        callable

    """
    constraints = [] if constraints is None else constraints

    pc, pp = process_constraints(constraints, params)

    fixed_values = pp["_internal_fixed_value"].to_numpy()
    pre_replacements = pp["_pre_replacements"].to_numpy().astype(int)
    post_replacements = pp["_post_replacements"].to_numpy().astype(int)

    def decorator_numpy_interface(func):
        @functools.wraps(func)
        def wrapper_numpy_interface(x, *args, **kwargs):
            if isinstance(x, pd.DataFrame):
                p = x
            elif isinstance(x, np.ndarray):
                p = params.copy()
                p["value"] = reparametrize_from_internal(
                    internal=x,
                    fixed_values=fixed_values,
                    pre_replacements=pre_replacements,
                    processed_constraints=pc,
                    post_replacements=post_replacements,
                )
            else:
                raise ValueError(
                    "x must be a numpy array or DataFrame with 'value' column."
                )

            criterion_value = func(p, *args, **kwargs)

            if isinstance(criterion_value,
                          (pd.DataFrame, pd.Series)) and numpy_output:
                criterion_value = criterion_value.to_numpy()

            return criterion_value

        return wrapper_numpy_interface

    if callable(func):
        return decorator_numpy_interface(func)
    else:
        return decorator_numpy_interface