def create_state_space_class(optim_paras, options): """Create the state space of the model.""" core, indexer = _create_core_and_indexer(optim_paras, options) dense_grid = _create_dense_state_space_grid(optim_paras) # Downcast after calculations or be aware of silent integer overflows. core = compute_covariates(core, options["covariates_core"]) core = core.apply(downcast_to_smallest_dtype) dense = _create_dense_state_space_covariates(dense_grid, optim_paras, options) base_draws_sol = create_base_draws( (options["n_periods"], options["solution_draws"], len(optim_paras["choices"])), next(options["solution_seed_startup"]), options["monte_carlo_sequence"], ) if dense: state_space = _MultiDimStateSpace(core, indexer, base_draws_sol, optim_paras, options, dense) else: state_space = _SingleDimStateSpace(core, indexer, base_draws_sol, optim_paras, options) return state_space
def get_simulate_func(params, options): """Get the simulation function. Return :func:`simulate` where all arguments except the parameter vector are fixed with :func:`functools.partial`. Thus, the function can be directly passed into an optimizer for estimation with simulated method of moments or other techniques. Parameters ---------- params : pandas.DataFrame DataFrame containing model parameters. options : dict Dictionary containing model options. Returns ------- simulate_function : :func:`simulate` Simulation function where all arguments except the parameter vector are set. """ optim_paras, options = process_params_and_options(params, options) state_space = StateSpace(optim_paras, options) shape = ( options["n_periods"], options["simulation_agents"], len(optim_paras["choices"]), ) base_draws_sim = create_base_draws( shape, next(options["simulation_seed_startup"]), "random") base_draws_wage = create_base_draws( shape, next(options["simulation_seed_startup"]), "random") simulate_function = functools.partial( simulate, base_draws_sim=base_draws_sim, base_draws_wage=base_draws_wage, state_space=state_space, options=options, ) return simulate_function
def create_draws(self, options): """Get draws.""" n_choices_in_sets = list(set(map(sum, self.dense_key_to_choice_set.values()))) shocks_sets = [] for n_choices in n_choices_in_sets: draws = create_base_draws( (options["n_periods"], options["solution_draws"], n_choices), next(options["solution_seed_startup"]), options["monte_carlo_sequence"], ) shocks_sets.append(draws) draws = {} for dense_idx, complex_ix in self.dense_key_to_complex.items(): period = complex_ix[0] n_choices = sum(complex_ix[1]) idx = n_choices_in_sets.index(n_choices) draws[dense_idx] = shocks_sets[idx][period] return draws
def __init__(self, optim_paras, options): """Initialize the state space class.""" self.base_draws_sol = create_base_draws( ( options["n_periods"], options["solution_draws"], len(optim_paras["choices"]), ), next(options["solution_seed_startup"]), options["monte_carlo_sequence"], ) states_df, self.indexer = _create_state_space(optim_paras, options) base_covariates_df = create_base_covariates(states_df, options["covariates"]) # Downcast after calculations or be aware of silent integer overflows. states_df = states_df.apply(downcast_to_smallest_dtype) base_covariates_df = base_covariates_df.apply( downcast_to_smallest_dtype) self.states = states_df.to_numpy() self.covariates = _create_choice_covariates(base_covariates_df, states_df, optim_paras) self.wages, self.nonpec = _create_reward_components( self.states[:, -1], self.covariates, optim_paras) self.is_inadmissible = _create_is_inadmissible_indicator( states_df, optim_paras, options) self._create_slices_by_periods(options["n_periods"]) self.indices_of_child_states = _get_indices_of_child_states( self, optim_paras)
def get_crit_func( params, options, df, return_scalar=True, return_comparison_plot_data=False ): """Get the criterion function. Return a version of the likelihood functions in respy where all arguments except the parameter vector are fixed with :func:`functools.partial`. Thus the function can be directly passed into an optimizer or a function for taking numerical derivatives. Parameters ---------- params : pandas.DataFrame DataFrame containing model parameters. options : dict Dictionary containing model options. df : pandas.DataFrame The model is fit to this dataset. return_scalar : bool, default False Indicator for whether the mean log likelihood should be returned or the log likelihood contributions. return_comparison_plot_data : bool, default False Indicator for whether a :class:`pandas.DataFrame` with various contributions for the visualization with estimagic should be returned. Returns ------- criterion_function : :func:`log_like` Criterion function where all arguments except the parameter vector are set. Raises ------ AssertionError If data has not the expected format. """ optim_paras, options = process_params_and_options(params, options) optim_paras = _adjust_optim_paras_for_estimation(optim_paras, df) check_estimation_data(df, optim_paras) solve = get_solve_func(params, options) state_space = solve.keywords["state_space"] df, type_covariates = _process_estimation_data( df, state_space, optim_paras, options ) base_draws_est = create_base_draws( ( df.shape[0] * optim_paras["n_types"], options["estimation_draws"], len(optim_paras["choices"]), ), next(options["estimation_seed_startup"]), options["monte_carlo_sequence"], ) criterion_function = partial( log_like, df=df, base_draws_est=base_draws_est, solve=solve, type_covariates=type_covariates, options=options, return_scalar=return_scalar, return_comparison_plot_data=return_comparison_plot_data, ) return criterion_function
def get_log_like_func(params, options, df, return_scalar=True, return_comparison_plot_data=False): """Get the criterion function for maximum likelihood estimation. Return a version of the likelihood functions in respy where all arguments except the parameter vector are fixed with :func:`functools.partial`. Thus the function can be directly passed into an optimizer or a function for taking numerical derivatives. Parameters ---------- params : pandas.DataFrame DataFrame containing model parameters. options : dict Dictionary containing model options. df : pandas.DataFrame The model is fit to this dataset. return_scalar : bool, default False Indicator for whether the mean log likelihood should be returned or the log likelihood contributions. return_comparison_plot_data : bool, default False Indicator for whether a :class:`pandas.DataFrame` with various contributions for the visualization with estimagic should be returned. Returns ------- criterion_function : :func:`log_like` Criterion function where all arguments except the parameter vector are set. Raises ------ AssertionError If data has not the expected format. Examples -------- >>> import respy as rp >>> params, options, data = rp.get_example_model("robinson_crusoe_basic") At default the function returns the log likelihood as a scalar value. >>> log_like = rp.get_log_like_func(params=params, options=options, df=data) >>> scalar = log_like(params) Additionally, a :class:`pandas.DataFrame` with data for visualization can be returned. >>> log_like = rp.get_log_like_func(params=params, options=options, df=data, ... return_comparison_plot_data=True ... ) >>> scalar, df = log_like(params) In alternative to the log likelihood, a :class:`numpy.array` of the individual log likelihood contributions can be returned. >>> log_like_contribs = rp.get_log_like_func(params=params, options=options, ... df=data, return_scalar=False ... ) >>> array = log_like_contribs(params) """ optim_paras, options = process_params_and_options(params, options) optim_paras = _update_optim_paras_with_initial_experience_levels( optim_paras, df) check_estimation_data(df, optim_paras) solve = get_solve_func(params, options) state_space = solve.keywords["state_space"] df, type_covariates = _process_estimation_data(df, state_space, optim_paras, options) # Replace with decorator. base_draws_est = {} for dense_key, indices in df.groupby("dense_key").groups.items(): n_choices = sum(state_space.dense_key_to_choice_set[dense_key]) draws = create_base_draws( (len(indices), options["estimation_draws"], n_choices), next(options["estimation_seed_startup"]), options["monte_carlo_sequence"], ) base_draws_est[dense_key] = draws criterion_function = partial( log_like, df=df, base_draws_est=base_draws_est, solve=solve, type_covariates=type_covariates, options=options, return_scalar=return_scalar, return_comparison_plot_data=return_comparison_plot_data, ) return criterion_function
def get_simulate_func( params, options, method="n_step_ahead_with_sampling", df=None, n_simulation_periods=None, ): """Get the simulation function. Return :func:`simulate` where all arguments except the parameter vector are fixed with :func:`functools.partial`. Thus, the function can be directly passed into an optimizer for estimation with simulated method of moments or other techniques. Parameters ---------- params : pandas.DataFrame DataFrame containing model parameters. options : dict Dictionary containing model options. method : {"n_step_ahead_with_sampling", "n_step_ahead_with_data", "one_step_ahead"} The simulation method which can be one of three and is explained in more detail in :func:`simulate`. df : pandas.DataFrame or None, default None DataFrame containing one or multiple observations per individual. n_simulation_periods : int or None, default None Simulate data for a number of periods. This options does not affect ``options["n_periods"]`` which controls the number of periods for which decision rules are computed. Returns ------- simulate_function : :func:`simulate` Simulation function where all arguments except the parameter vector are set. Examples -------- >>> import respy as rp >>> params, options = rp.get_example_model("robinson_crusoe_basic", with_data=False) >>> simulate = rp.get_simulate_func(params, options) >>> data = simulate(params) """ optim_paras, options = process_params_and_options(params, options) n_simulation_periods, options = _harmonize_simulation_arguments( method, df, n_simulation_periods, options) df = _process_input_df_for_simulation(df, method, options, optim_paras) solve = get_solve_func(params, options) # We draw shocks for all observations and for all choices although some choices # might not be available. Later, only the relevant shocks are selected. n_observations = (df.shape[0] if method == "one_step_ahead" else df.shape[0] * n_simulation_periods) shape = (n_observations, len(optim_paras["choices"])) base_draws_sim = create_base_draws( shape, next(options["simulation_seed_startup"]), "random") base_draws_wage = create_base_draws( shape, next(options["simulation_seed_startup"]), "random") simulate_function = functools.partial( simulate, base_draws_sim=base_draws_sim, base_draws_wage=base_draws_wage, df=df, method=method, n_simulation_periods=n_simulation_periods, solve=solve, options=options, ) return simulate_function
def get_log_like_func(params, options, df, return_scalar=True): """Get the criterion function for maximum likelihood estimation. Return a version of the likelihood functions in respy where all arguments except the parameter vector are fixed with :func:`functools.partial`. Thus the function can be directly passed into an optimizer or a function for taking numerical derivatives. Parameters ---------- params : pandas.DataFrame DataFrame containing model parameters. options : dict Dictionary containing model options. df : pandas.DataFrame The model is fit to this dataset. return_scalar : bool, default False Indicator for whether the mean log likelihood should be returned. If False will return a dictionary with the following key and value pairs: - "value": mean log likelihood (float) - "contributions": log likelihood contributions (numpy.array) - "comparison_plot_data" : DataFrame with various contributions for the visualization with estimagic. Data contains the following columns: - ``identifier`` : Individual identifiers derived from input df. - ``period`` : Periods derived from input df. - ``choice`` : Choice that ``value`` is connected to. - ``value`` : Value of log likelihood contribution. - ``kind`` : Kind of contribution (e.g choice or wage). - ``type`` and `log_type_probability``: Will be included in models with types. Returns ------- criterion_function : :func:`log_like` Criterion function where all arguments except the parameter vector are set. Raises ------ AssertionError If data has not the expected format. Examples -------- >>> import respy as rp >>> params, options, data = rp.get_example_model("robinson_crusoe_basic") At default the function returns the log likelihood as a scalar value. >>> log_like = rp.get_log_like_func(params=params, options=options, df=data) >>> scalar = log_like(params) Alternatively, a dictionary containing the log likelihood, as well as log likelihood contributions and a :class:`pandas.DataFrame` can be returned. >>> log_like = rp.get_log_like_func(params=params, options=options, df=data, ... return_scalar=False ... ) >>> outputs = log_like(params) >>> outputs.keys() dict_keys(['value', 'contributions', 'comparison_plot_data']) """ optim_paras, options = process_params_and_options(params, options) optim_paras = _update_optim_paras_with_initial_experience_levels( optim_paras, df) check_estimation_data(df, optim_paras) solve = get_solve_func(params, options) state_space = solve.keywords["state_space"] df, type_covariates = _process_estimation_data(df, state_space, optim_paras, options) # Replace with decorator. base_draws_est = {} for dense_key, indices in df.groupby("dense_key").groups.items(): n_choices = sum(state_space.dense_key_to_choice_set[dense_key]) draws = create_base_draws( (len(indices), options["estimation_draws"], n_choices), next(options["estimation_seed_startup"]), options["monte_carlo_sequence"], ) base_draws_est[dense_key] = draws criterion_function = partial( log_like, df=df, base_draws_est=base_draws_est, solve=solve, type_covariates=type_covariates, options=options, return_scalar=return_scalar, ) return criterion_function
def get_crit_func(params, options, df, version="log_like"): """Get the criterion function. Return a version of the likelihood functions in respy where all arguments except the parameter vector are fixed with :func:`functools.partial`. Thus the function can be directly passed into an optimizer or a function for taking numerical derivatives. By default we return :func:`log_like`. Other versions can be requested via the version argument. Parameters ---------- params : pandas.DataFrame DataFrame containing model parameters. options : dict Dictionary containing model options. df : pandas.DataFrame The model is fit to this dataset. version : str, default "log_like" Can take the values "log_like" and "log_like_obs". Returns ------- criterion_function : :func:`log_like` Criterion function where all arguments except the parameter vector are set. Raises ------ AssertionError If data has not the expected format. """ optim_paras, options = process_params_and_options(params, options) optim_paras = _adjust_optim_paras_for_estimation(optim_paras, df) check_estimation_data(df, optim_paras) state_space = StateSpace(optim_paras, options) ( choices, idx_indiv_first_obs, indices, log_wages_observed, type_covariates, ) = _process_estimation_data(df, state_space, optim_paras, options) base_draws_est = create_base_draws( (len(choices), options["estimation_draws"], len( optim_paras["choices"])), next(options["estimation_seed_startup"]), options["monte_carlo_sequence"], ) if version == "log_like": unpartialed = log_like elif version == "log_like_obs": unpartialed = log_like_obs else: raise ValueError("version has to be 'log_like' or 'log_like_obs'.") criterion_function = partial( unpartialed, choices=choices, idx_indiv_first_obs=idx_indiv_first_obs, indices=indices, log_wages_observed=log_wages_observed, base_draws_est=base_draws_est, state_space=state_space, type_covariates=type_covariates, options=options, ) # this will be relevant for estimagic topography plots criterion_function.__name__ = version return criterion_function
def get_simulate_func( params, options, method="n_step_ahead_with_sampling", df=None, n_simulation_periods=None, ): """Get the simulation function. Return :func:`simulate` where all arguments except the parameter vector are fixed with :func:`functools.partial`. Thus, the function can be directly passed into an optimizer for estimation with simulated method of moments or other techniques. Parameters ---------- params : pandas.DataFrame DataFrame containing model parameters. options : dict Dictionary containing model options. method : {"n_step_ahead_with_sampling", "n_step_ahead_with_data", "one_step_ahead"} The simulation method which can be one of three and is explained in more detail in :func:`simulate`. df : pandas.DataFrame or None DataFrame containing one or multiple observations per individual. n_simulation_periods : int or None Simulate data for a number of periods. This options does not affect ``options["n_periods"]`` which controls the number of periods for which decision rules are computed. Returns ------- simulate_function : :func:`simulate` Simulation function where all arguments except the parameter vector are set. """ optim_paras, options = process_params_and_options(params, options) n_simulation_periods, options = _harmonize_simulation_arguments( method, df, n_simulation_periods, options) df = _process_input_df_for_simulation(df, method, n_simulation_periods, options, optim_paras) solve = get_solve_func(params, options) shape = (df.shape[0], len(optim_paras["choices"])) base_draws_sim = create_base_draws( shape, next(options["simulation_seed_startup"]), "random") base_draws_wage = create_base_draws( shape, next(options["simulation_seed_startup"]), "random") simulate_function = functools.partial( simulate, base_draws_sim=base_draws_sim, base_draws_wage=base_draws_wage, df=df, solve=solve, options=options, ) return simulate_function