def _process_estimation_data(df, state_space, optim_paras, options): """Process estimation data. All necessary objects for :func:`_internal_log_like_obs` dependent on the data are produced. Some objects have to be repeated for each type which is a desirable format for the estimation where every observations is weighted by type probabilities. Parameters ---------- df : pandas.DataFrame The DataFrame which contains the data used for estimation. The DataFrame contains individual identifiers, periods, experiences, lagged choices, choices in current period, the wage and other observed data. indexer : numpy.ndarray Indexer for the core state space. optim_paras : dict options : dict Returns ------- choices : numpy.ndarray Array with shape (n_observations, n_types) where information is only repeated over the second axis. idx_indiv_first_obs : numpy.ndarray Array with shape (n_individuals,) containing indices for the first observations of each individual. indices : numpy.ndarray Array with shape (n_observations, n_types) containing indices for states which correspond to observations. log_wages_observed : numpy.ndarray Array with shape (n_observations, n_types) containing clipped log wages. type_covariates : numpy.ndarray Array with shape (n_individuals, n_type_covariates) containing covariates to predict probabilities for each type. """ n_types = optim_paras["n_types"] col_dtype = generate_column_dtype_dict_for_estimation(optim_paras) df = (df.sort_index()[list(col_dtype)[2:]].rename( columns=rename_labels_to_internal).rename_axis( index=rename_labels_to_internal)) df = convert_labeled_variables_to_codes(df, optim_paras) # Duplicate observations for each type. if n_types >= 2: df = pd.concat([df.copy().assign(type=i) for i in range(n_types)]) df["dense_key"], df["core_index"] = map_observations_to_states( df, state_space, optim_paras) # For the estimation, log wages are needed with shape (n_observations, n_types). df["log_wage"] = np.log( np.clip(df.wage.to_numpy(), 1 / MAX_FLOAT, MAX_FLOAT)) df = df.drop(columns="wage") # For the type covariates, we only need the first observation of each individual. if n_types >= 2: initial_states = df.query("period == 0").copy() type_covariates = compute_covariates(initial_states, options["covariates_core"], raise_errors=False) type_covariates = type_covariates.apply(downcast_to_smallest_dtype) else: type_covariates = None return df, type_covariates
def simulate( params, base_draws_sim, base_draws_wage, df, method, n_simulation_periods, solve, options, ): """Perform a simulation. This function performs one of three possible simulation exercises. The type of the simulation is controlled by ``method`` in :func:`get_simulate_func`. Ordered from no data to panel data on individuals, there is: 1. *n-step-ahead simulation with sampling*: The first observation of an individual is sampled from the initial conditions, i.e., the distribution of observed variables or initial experiences, etc. in the first period. Then, the individuals are guided for ``n`` periods by the decision rules from the solution of the model. 2. *n-step-ahead simulation with data*: Instead of sampling individuals from the initial conditions, take the first observation of each individual in the data. Then, do as in 1.. 3. *one-step-ahead simulation*: Take the complete data and find for each observation the corresponding outcomes, e.g, choices and wages, using the decision rules from the model solution. Parameters ---------- params : pandas.DataFrame or pandas.Series Contains parameters. base_draws_sim : numpy.ndarray Array with shape (n_periods, n_individuals, n_choices) to provide a unique set of shocks for each individual in each period. base_draws_wage : numpy.ndarray Array with shape (n_periods, n_individuals, n_choices) to provide a unique set of wage measurement errors for each individual in each period. df : pandas.DataFrame or None Can be one three objects: - :data:`None` if no data is provided. This triggers sampling from initial conditions and a n-step-ahead simulation. - :class:`pandas.DataFrame` containing panel data on individuals which triggers a one-step-ahead simulation. - :class:`pandas.DataFrame` containing only first observations which triggers a n-step-ahead simulation taking the data as initial conditions. method : str The simulation method. n_simulation_periods : int Number periods to simulate. solve : :func:`~respy.solve.solve` Function which creates the solution of the model with new parameters. options : dict Contains model options. Returns ------- simulated_data : pandas.DataFrame DataFrame of simulated individuals. """ # Copy DataFrame so that the DataFrame attached to :func:`simulate` is not altered. df = df.copy() is_n_step_ahead = method != "one_step_ahead" optim_paras, options = process_params_and_options(params, options) state_space = solve(params) # Prepare simulation. df = _extend_data_with_sampled_characteristics(df, optim_paras, options) # Prepare shocks and store them in the pandas.DataFrame. draws_wage_transformed = np.exp(base_draws_wage * optim_paras["meas_error"]) data = [] for period in range(n_simulation_periods): # If it is a one-step-ahead simulation, we pick rows from the panel data. For # n-step-ahead simulation, `df` always contains only data of the current period. current_df = df.query("period == @period").copy() if method == "one_step_ahead": slice_ = np.where(df.eval("period == @period"))[0] else: slice_ = slice(df.shape[0] * period, df.shape[0] * (period + 1)) for i, choice in enumerate(optim_paras["choices"]): current_df[f"shock_reward_{choice}"] = base_draws_sim[slice_, i] current_df[f"meas_error_wage_{choice}"] = draws_wage_transformed[ slice_, i] current_df["dense_key"], current_df[ "core_index"] = map_observations_to_states(current_df, state_space, optim_paras) wages = state_space.get_attribute_from_period("wages", period) nonpecs = state_space.get_attribute_from_period("nonpecs", period) index_to_choice_set = state_space.get_attribute_from_period( "dense_key_to_choice_set", period) continuation_values = state_space.get_continuation_values( period=period) current_df_extended = _simulate_single_period( current_df, index_to_choice_set, wages, nonpecs, continuation_values, optim_paras=optim_paras, ) data.append(current_df_extended.copy(deep=True)) if is_n_step_ahead and period != n_simulation_periods - 1: current_df_extended = current_df_extended.reset_index() df = apply_law_of_motion_for_core(current_df_extended, optim_paras) state_space_columns = create_state_space_columns(optim_paras) df = df.set_index(["identifier", "period"])[state_space_columns] simulated_data = _process_simulation_output(data, optim_paras) return simulated_data