def test_invalid_derivative_versions(direction, algorithm, derivative, criterion_and_derivative): start_params = pd.DataFrame() start_params["value"] = [1, 2, 3] if direction == "minimize": with pytest.raises(ValueError): minimize( criterion=sos_dict_criterion, params=start_params, algorithm=algorithm, derivative=derivative, criterion_and_derivative=criterion_and_derivative, ) else: deriv = derivative if derivative is None else switch_sign(derivative) crit_and_deriv = (criterion_and_derivative if criterion_and_derivative is None else switch_sign(criterion_and_derivative)) with pytest.raises(ValueError): maximize( criterion=switch_sign(sos_dict_criterion), params=start_params, algorithm=algorithm, derivative=deriv, criterion_and_derivative=crit_and_deriv, )
def test_warnings_with_old_bounds_names(): base_params = pd.DataFrame() base_params["value"] = [1, 2, 3] for wrong_name in "lower", "upper": params = base_params.copy() params[wrong_name] = 0 with pytest.warns(UserWarning): maximize( lambda params: 1 - params["value"] @ params["value"], params=params, algorithm="scipy_lbfgsb", )
def test_valid_derivative_versions(direction, algorithm, derivative, criterion_and_derivative): start_params = pd.DataFrame() start_params["value"] = [1, 2, 3] if direction == "minimize": res = minimize( criterion=sos_dict_criterion, params=start_params, algorithm=algorithm, derivative=derivative, criterion_and_derivative=criterion_and_derivative, error_handling="raise", ) else: deriv = derivative if derivative is None else switch_sign(derivative) crit_and_deriv = (criterion_and_derivative if criterion_and_derivative is None else switch_sign(criterion_and_derivative)) res = maximize( criterion=switch_sign(sos_dict_criterion), params=start_params, algorithm=algorithm, derivative=deriv, criterion_and_derivative=crit_and_deriv, error_handling="raise", ) aaae(res["solution_params"]["value"].to_numpy(), np.zeros(3), decimal=4)
def test_multistart_minimize_with_sum_of_squares_at_defaults( criterion, direction, params): if direction == "minimize": res = minimize( criterion=criterion, params=params, algorithm="scipy_lbfgsb", multistart=True, ) else: res = maximize( criterion=switch_sign(sos_dict_criterion), params=params, algorithm="scipy_lbfgsb", multistart=True, ) assert hasattr(res, "multistart_info") ms_info = res.multistart_info assert len(ms_info["exploration_sample"]) == 40 assert len(ms_info["exploration_results"]) == 40 assert all( isinstance(entry, float) for entry in ms_info["exploration_results"]) assert all( isinstance(entry, OptimizeResult) for entry in ms_info["local_optima"]) assert all( isinstance(entry, pd.DataFrame) for entry in ms_info["start_parameters"]) assert np.allclose(res.criterion, 0) aaae(res.params["value"], np.zeros(4))
def fit(self, start_params=None, dashboard=False, db_options=None): """Fit the model and return an instance of SkillModelResults.""" db_options = {} if db_options is None else db_options args = self.likelihood_arguments_dict() start_params = self.generate_full_start_params(start_params) def criterion(params, args): log_like_contributions = log_likelihood_contributions(params, **args) log_like_contributions[log_like_contributions < -1e300] = -1e300 return np.mean(log_like_contributions) algo_options = {"maxfun": 1000000, "maxiter": 1000000} res = maximize( criterion, start_params, constraints=self.constraints, algorithm="scipy_L-BFGS-B", criterion_args=(args,), dashboard=dashboard, db_options=db_options, algo_options=algo_options, ) return res
def test_maximize(algorithm): np.random.seed(1234) params = pd.Series([1, -1, -1.5, 1.5], name="value").to_frame() params["lower"] = -2 params["upper"] = 2 origin, algo_name = algorithm.split("_", 1) if origin == "pygmo": if algo_name == "simulated_annealing": algo_options = {} elif algo_name in ["ihs"]: algo_options = {"popsize": 1, "gen": 1000} elif algo_name in ["sga"]: algo_options = {"popsize": 50, "gen": 500} elif algo_name in ["sea"]: algo_options = {"popsize": 5, "gen": 7000} elif algo_name == "simulated_annealing": np.random.seed(5471) algo_options = {"n_T_adj": 20, "Tf": 0.0001, "n_range_adj": 20} else: algo_options = {"popsize": 30, "gen": 150} else: algo_options = {} res_dict, final_params = maximize( f, params, algorithm, algo_options=algo_options, logging=False, ) aaae(final_params["value"].to_numpy(), np.zeros(len(final_params)), decimal=2)
def test_sign_is_switched_back_after_maximization(): params = pd.DataFrame() params["value"] = [1, 2, 3] res = maximize( lambda params: 1 - params["value"] @ params["value"], params=params, algorithm="scipy_lbfgsb", ) assert np.allclose(res["solution_criterion"], 1)
def test_maximize_with_gradient(algorithm): start_params = pd.DataFrame() start_params["value"] = [1, 2.5, -1] info, params = maximize( criterion=minus_sum_of_squares, params=start_params, algorithm=algorithm, gradient=minus_sum_of_squares_gradient, ) aaae(info["x"], [0, 0, 0])
def test_convergence_via_max_discoveries_works(params): options = { "convergence_relative_params_tolerance": np.inf, "convergence_max_discoveries": 2, } res = maximize( criterion=switch_sign(sos_dict_criterion), params=params, algorithm="scipy_lbfgsb", multistart=True, multistart_options=options, ) assert len(res.multistart_info["local_optima"]) == 2
def multinomial_probit(formula, data, cov_structure, integration_method, algorithm): """Optimize multinomial probit model. Args: formula (str): A patsy formula data (pd.DataFrame): The dataset cov_structure (str): Takes values "iid" or "free" integration_method (str): Takes the values 'mc_integration', 'smooth_mc_integration', 'gauss_integration' or 'mprobit_choice_probabilities'. algorithn (str): Takes the values 'scipy_L-BFGS-B', 'scipy_SLSQP', 'nlopt_bobyqa' or 'nlopt_newuoa_bound'. Returns: result_dict (dic): Information of the optimization, for example the value of the maximized log likelihood params: Optimal parameters for the model. """ y, x, params = multinomial_processing(formula, data, cov_structure) params_df = pd.DataFrame(params, columns=['value']) if cov_structure == 'iid': constraints = [] else: constraints = [{ 'loc': 'covariance', 'type': 'covariance' }, { 'loc': ('covariance', 0), 'type': 'fixed', 'value': 1.0 }] result = maximize(multinomial_probit_loglike, params_df, algorithm, criterion_kwargs={ 'y': y, 'x': x, 'cov_structure': cov_structure, 'integration_method': integration_method }, constraints=constraints, dashboard=False) return result
def test_covariance_constraint_in_2_by_2_case(): spector_data = sm.datasets.spector.load_pandas() spector_data.exog = sm.add_constant(spector_data.exog) x_df = sm.add_constant(spector_data.exog) start_params = np.array([-10, 2, 0.2, 2]) kwargs = {"y": spector_data.endog, "x": x_df.to_numpy()} result = maximize( criterion=logit_loglike, criterion_kwargs=kwargs, params=start_params, algorithm="scipy_lbfgsb", constraints={ "loc": [1, 2, 3], "type": "covariance" }, ) expected = np.array([-13.0213351, 2.82611417, 0.09515704, 2.37867869]) aaae(result.params, expected, decimal=4)
def run_bootstrap(df, params, options, constr, num_boots, is_perturb=False): boot_params = pd.DataFrame(index=params.index) identifiers = df["Identifier"].unique() for iter_ in range(num_boots): np.random.seed(iter_) boot_df = get_bootstrap_sample(df, seed=iter_) # Set up starting values params_start = params.copy() if is_perturb: for index in params.index: lower, upper = params_start.loc[index, ["lower", "upper"]] params_start.loc[index, "value"] = np.random.uniform(lower, upper) for dict_ in constr: try: stat = params.loc[(dict_["loc"]), "value"].values except: stat = params.loc[(dict_["loc"]), "value"] params_start.loc[(dict_["loc"]), "value"] = stat crit_func = rp.get_crit_func(params, options, boot_df) results, params_rslt = maximize( crit_func, params_start, "nlopt_bobyqa", algo_options={"maxeval": 100}, constraints=constr, ) boot_params[f"bootstrap_{iter_}"] = params_rslt["value"] return boot_params
def test_bug_from_copenhagen_presentation(): # Make sure maximum of work hours is optimal def u(params): return params["work"]["hours"]**2 start_params = { "work": { "hourly_wage": 25.5, "hours": 2_000 }, "time_budget": 24 * 7 * 365, } def return_all_but_working_hours(params): out = deepcopy(params) del out["work"]["hours"] return out res = maximize( criterion=u, params=start_params, algorithm="scipy_lbfgsb", constraints=[ { "selector": return_all_but_working_hours, "type": "fixed" }, { "selector": lambda p: [p["work"]["hours"], p["time_budget"]], "type": "increasing", }, ], lower_bounds={"work": { "hours": 0 }}, ) assert np.allclose(res.params["work"]["hours"], start_params["time_budget"])
def estimate_ml( loglike, params, optimize_options, *, lower_bounds=None, upper_bounds=None, constraints=None, logging=False, log_options=None, loglike_kwargs=None, numdiff_options=None, jacobian=None, jacobian_kwargs=None, hessian=None, hessian_kwargs=None, design_info=None, ): """Do a maximum likelihood (ml) estimation. This is a high level interface of our lower level functions for maximization, numerical differentiation and inference. It does the full workflow for maximum likelihood estimation with just one function call. While we have good defaults, you can still configure each aspect of each step via the optional arguments of this function. If you find it easier to do the maximization separately, you can do so and just provide the optimal parameters as ``params`` and set ``optimize_options=False`` Args: loglike (callable): Likelihood function that takes a params (and potentially other keyword arguments) and returns a dictionary that has at least the entries "value" (a scalar float) and "contributions" (a 1d numpy array or pytree) with the log likelihood contribution per individual. params (pytree): A pytree containing the estimated or start parameters of the likelihood model. If the supplied parameters are estimated parameters, set optimize_options to False. Pytrees can be a numpy array, a pandas Series, a DataFrame with "value" column, a float and any kind of (nested) dictionary or list containing these elements. See :ref:`params` for examples. optimize_options (dict, str or False): Keyword arguments that govern the numerical optimization. Valid entries are all arguments of :func:`~estimagic.optimization.optimize.minimize` except for those that are passed explicilty to ``estimate_ml``. If you pass False as optimize_options you signal that ``params`` are already the optimal parameters and no numerical optimization is needed. If you pass a str as optimize_options it is used as the ``algorithm`` option. lower_bounds (pytree): A pytree with the same structure as params with lower bounds for the parameters. Can be ``-np.inf`` for parameters with no lower bound. upper_bounds (pytree): As lower_bounds. Can be ``np.inf`` for parameters with no upper bound. constraints (list, dict): List with constraint dictionaries or single dict. See :ref:`constraints`. logging (pathlib.Path, str or False): Path to sqlite3 file (which typically has the file extension ``.db``. If the file does not exist, it will be created. The dashboard can only be used when logging is used. log_options (dict): Additional keyword arguments to configure the logging. - "fast_logging": A boolean that determines if "unsafe" settings are used to speed up write processes to the database. This should only be used for very short running criterion functions where the main purpose of the log is a real-time dashboard and it would not be catastrophic to get a corrupted database in case of a sudden system shutdown. If one evaluation of the criterion function (and gradient if applicable) takes more than 100 ms, the logging overhead is negligible. - "if_table_exists": (str) One of "extend", "replace", "raise". What to do if the tables we want to write to already exist. Default "extend". - "if_database_exists": (str): One of "extend", "replace", "raise". What to do if the database we want to write to already exists. Default "extend". loglike_kwargs (dict): Additional keyword arguments for loglike. numdiff_options (dict): Keyword arguments for the calculation of numerical derivatives for the calculation of standard errors. See :ref:`first_derivative` for details. jacobian (callable or None): A function that takes ``params`` and potentially other keyword arguments and returns the jacobian of loglike["contributions"] with respect to the params. Note that you only need to pass a Jacobian function if you have a closed form Jacobian. If you pass None, a numerical Jacobian will be calculated. jacobian_kwargs (dict): Additional keyword arguments for the Jacobian function. hessian (callable or None or False): A function that takes ``params`` and potentially other keyword arguments and returns the Hessian of loglike["value"] with respect to the params. If you pass None, a numerical Hessian will be calculated. If you pass ``False``, you signal that no Hessian should be calculated. Thus, no result that requires the Hessian will be calculated. hessian_kwargs (dict): Additional keyword arguments for the Hessian function. design_info (pandas.DataFrame): DataFrame with one row per observation that contains some or all of the variables "psu" (primary sampling unit), "strata" and "fpc" (finite population corrector). See :ref:`robust_likelihood_inference` for details. Returns: LikelihoodResult: A LikelihoodResult object. """ # ================================================================================== # Check and process inputs # ================================================================================== is_optimized = optimize_options is False if not is_optimized: if isinstance(optimize_options, str): optimize_options = {"algorithm": optimize_options} check_optimization_options( optimize_options, usage="estimate_ml", algorithm_mandatory=True, ) jac_case = get_derivative_case(jacobian) hess_case = get_derivative_case(hessian) check_numdiff_options(numdiff_options, "estimate_ml") numdiff_options = {} if numdiff_options in (None, False) else numdiff_options loglike_kwargs = {} if loglike_kwargs is None else loglike_kwargs constraints = [] if constraints is None else constraints jacobian_kwargs = {} if jacobian_kwargs is None else jacobian_kwargs hessian_kwargs = {} if hessian_kwargs is None else hessian_kwargs # ================================================================================== # Calculate estimates via maximization (if necessary) # ================================================================================== if is_optimized: estimates = params opt_res = None else: opt_res = maximize( criterion=loglike, criterion_kwargs=loglike_kwargs, params=params, lower_bounds=lower_bounds, upper_bounds=upper_bounds, constraints=constraints, logging=logging, log_options=log_options, **optimize_options, ) estimates = opt_res.params # ================================================================================== # Do first function evaluations at estimated parameters # ================================================================================== try: loglike_eval = loglike(estimates, **loglike_kwargs) except (KeyboardInterrupt, SystemExit): raise except Exception as e: msg = "Error while evaluating loglike at estimated params." raise InvalidFunctionError(msg) from e if callable(jacobian): try: jacobian_eval = jacobian(estimates, **jacobian_kwargs) except (KeyboardInterrupt, SystemExit): raise except Exception as e: msg = "Error while evaluating closed form jacobian at estimated params." raise InvalidFunctionError(msg) from e else: jacobian_eval = None if callable(hessian): try: hessian_eval = hessian(estimates, **hessian_kwargs) except (KeyboardInterrupt, SystemExit): raise except Exception as e: msg = "Error while evaluating closed form hessian at estimated params." raise InvalidFunctionError(msg) from e else: hessian_eval = None # ================================================================================== # Get the converter for params and function outputs # ================================================================================== converter, internal_estimates = get_converter( params=estimates, constraints=constraints, lower_bounds=lower_bounds, upper_bounds=upper_bounds, func_eval=loglike_eval, primary_key="contributions", scaling=False, scaling_options=None, derivative_eval=jacobian_eval, ) # ================================================================================== # Calculate internal jacobian # ================================================================================== if jac_case == "closed-form": int_jac = converter.derivative_to_internal(jacobian_eval, internal_estimates.values) elif jac_case == "numerical": def func(x): p = converter.params_from_internal(x) loglike_eval = loglike(p, **loglike_kwargs)["contributions"] out = converter.func_to_internal(loglike_eval) return out jac_res = first_derivative( func=func, params=internal_estimates.values, lower_bounds=internal_estimates.lower_bounds, upper_bounds=internal_estimates.upper_bounds, **numdiff_options, ) int_jac = jac_res["derivative"] else: int_jac = None if constraints in [None, [] ] and jacobian_eval is None and int_jac is not None: loglike_contribs = loglike_eval if isinstance(loglike_contribs, dict) and "contributions" in loglike_contribs: loglike_contribs = loglike_contribs["contributions"] jacobian_eval = matrix_to_block_tree( int_jac, outer_tree=loglike_contribs, inner_tree=estimates, ) if jacobian_eval is None: _no_jac_reason = ( "no closed form jacobian was provided and there are constraints") else: _no_jac_reason = None # ================================================================================== # Calculate internal Hessian # ================================================================================== if hess_case == "skip": int_hess = None elif hess_case == "numerical": def func(x): p = converter.params_from_internal(x) loglike_eval = loglike(p, **loglike_kwargs)["value"] out = converter.func_to_internal(loglike_eval) return out hess_res = second_derivative( func=func, params=internal_estimates.values, lower_bounds=internal_estimates.lower_bounds, upper_bounds=internal_estimates.upper_bounds, **numdiff_options, ) int_hess = hess_res["derivative"] elif hess_case == "closed-form" and constraints: raise NotImplementedError( "Closed-form Hessians are not yet compatible with constraints.") elif hess_case == "closed-form": int_hess = block_tree_to_matrix( hessian_eval, outer_tree=params, inner_tree=params, ) else: raise ValueError() if constraints in [None, [] ] and hessian_eval is None and int_hess is not None: hessian_eval = matrix_to_block_tree( int_hess, outer_tree=params, inner_tree=params, ) if hessian_eval is None: if hess_case == "skip": _no_hess_reason = "the hessian calculation was explicitly skipped." else: _no_hess_reason = ( "no closed form hessian was provided and there are constraints" ) else: _no_hess_reason = None # ================================================================================== # create a LikelihoodResult object # ================================================================================== free_estimates = calculate_free_estimates(estimates, internal_estimates) res = LikelihoodResult( _params=estimates, _converter=converter, _optimize_result=opt_res, _jacobian=jacobian_eval, _no_jacobian_reason=_no_jac_reason, _hessian=hessian_eval, _no_hessian_reason=_no_hess_reason, _internal_jacobian=int_jac, _internal_hessian=int_hess, _design_info=design_info, _internal_estimates=internal_estimates, _free_estimates=free_estimates, _has_constraints=constraints not in [None, []], ) return res
def maximize_log_likelihood( log_like_obs, params, algorithm, criterion_kwargs=None, constraints=None, general_options=None, algo_options=None, gradient_options=None, logging=DEFAULT_DATABASE_NAME, log_options=None, dashboard=False, dash_options=None, ): """Estimate parameters via maximum likelihood. This function provides a convenient interface for estimating models via maximum likelihood. In the future, it will also calculate standard errors for the solution. The criterion function ``log_like_obs`` has to return an array of log likelihoods at the first position, not the mean log likelihood. The array is internally aggregated to whatever output is needed. For example, the mean is used for maximization, the sum for standard error calculations. The second return can be a :class:`pandas.DataFrame` in the `tidy data format`_ to display the distribution of contributions for subgroups via the comparison plot in the future. The limitation to log likelihoods instead of likelihoods may seem unnecessarily restrictive, but it is preferred for two reasons. 1. Optimization methods which rely on gradients generally work better optimizing the log transformation. See `1`_ for a simplified example. 2. Using the log transformation to convert products of probabilities to sums of log probabilities is numerically more stable as it prevents over- and underflows. See `2`_ for an example. Args: log_like_obs (callable or list of callables): Python function that takes a pandas DataFrame with parameters as the first argument and returns an array of log likelihood contributions as the first return. params (pd.DataFrame or list of pd.DataFrames): See :ref:`params`. algorithm (str or list of strings): specifies the optimization algorithm. See :ref:`list_of_algorithms`. criterion_kwargs (dict or list of dicts): additional keyword arguments for criterion constraints (list or list of lists): list with constraint dictionaries. See for details. general_options (dict): additional configurations for the optimization algo_options (dict or list of dicts): algorithm specific configurations for the optimization gradient_options (dict): Options for the gradient function. logging (str or pathlib.Path): Path to an sqlite3 file which typically has the file extension ``.db``. If the file does not exist, it will be created. See :ref:`logging` for details. log_options (dict): Keyword arguments to influence the logging. See :ref:`logging` for details. dashboard (bool): whether to create and show a dashboard. See :ref:`dashboard` for details. dash_options (dict): dictionary with kwargs for the dashboard. See :ref:`dashboard` for details. Returns: results (tuple or list of tuples): The return is either a tuple containing a dictionary of the results and the parameters or a list of tuples containing multiples of the former. .. _tidy data format: http://dx.doi.org/10.18637/jss.v059.i10 .. _1: https://stats.stackexchange.com/a/176563/218971 .. _2: https://statmodeling.stat.columbia.edu/2016/06/11/log-sum-of-exponentials/ """ if isinstance(log_like_obs, list): extended_loglikelobs = [ expand_criterion_output(crit_func) for crit_func in log_like_obs ] wrapped_loglikeobs = [ aggregate_criterion_output(np.mean)(crit_func) for crit_func in extended_loglikelobs ] else: extended_loglikelobs = expand_criterion_output(log_like_obs) wrapped_loglikeobs = aggregate_criterion_output( np.mean)(extended_loglikelobs) results = maximize( wrapped_loglikeobs, params, algorithm, criterion_kwargs, constraints, general_options, algo_options, gradient_options, logging, log_options, dashboard, dash_options, ) # To convert the mean log likelihood in the results dictionary to the log # likelihood, get the length of contributions for each optimization. arguments = broadcast_arguments(criterion=extended_loglikelobs, params=params, criterion_kwargs=criterion_kwargs) check_arguments(arguments) contribs_and_cp_data = [ args_one_run["criterion"](args_one_run["params"], **args_one_run["criterion_kwargs"]) for args_one_run in arguments ] n_contributions = [len(c_and_cp[0]) for c_and_cp in contribs_and_cp_data] if isinstance(results, list): for result, n_contribs in zip(results, n_contributions): result[0]["fitness"] = result[0]["fitness"] * n_contribs else: results[0]["fitness"] = results[0]["fitness"] * n_contributions[0] return results
processed_constraints, _ = process_constraints(constraints, params) # ================================================================================== # Calculate estimates via maximization (if necessary) # ================================================================================== if is_optimized: estimates = params else: opt_res = maximize( criterion=loglike, criterion_kwargs=loglike_kwargs, params=params, constraints=constraints, derivative=derivative, derivative_kwargs=derivative_kwargs, criterion_and_derivative=loglike_and_derivative, criterion_and_derivative_kwargs=loglike_and_derivative_kwargs, logging=logging, log_options=log_options, **optimize_options, ) estimates = opt_res["solution_params"] # ================================================================================== # Calculate internal jacobian # ================================================================================== deriv_to_internal = get_derivative_conversion_function( params=params, constraints=constraints)