예제 #1
0
def _process_estimation_data(df, state_space, optim_paras, options):
    """Process estimation data.

    All necessary objects for :func:`_internal_log_like_obs` dependent on the data are
    produced.

    Some objects have to be repeated for each type which is a desirable format for the
    estimation where every observations is weighted by type probabilities.

    Parameters
    ----------
    df : pandas.DataFrame
        The DataFrame which contains the data used for estimation. The DataFrame
        contains individual identifiers, periods, experiences, lagged choices, choices
        in current period, the wage and other observed data.
    indexer : numpy.ndarray
        Indexer for the core state space.
    optim_paras : dict
    options : dict

    Returns
    -------
    choices : numpy.ndarray
        Array with shape (n_observations, n_types) where information is only repeated
        over the second axis.
    idx_indiv_first_obs : numpy.ndarray
        Array with shape (n_individuals,) containing indices for the first observations
        of each individual.
    indices : numpy.ndarray
        Array with shape (n_observations, n_types) containing indices for states which
        correspond to observations.
    log_wages_observed : numpy.ndarray
        Array with shape (n_observations, n_types) containing clipped log wages.
    type_covariates : numpy.ndarray
        Array with shape (n_individuals, n_type_covariates) containing covariates to
        predict probabilities for each type.

    """
    n_types = optim_paras["n_types"]
    col_dtype = generate_column_dtype_dict_for_estimation(optim_paras)

    df = (df.sort_index()[list(col_dtype)[2:]].rename(
        columns=rename_labels_to_internal).rename_axis(
            index=rename_labels_to_internal))
    df = convert_labeled_variables_to_codes(df, optim_paras)

    # Duplicate observations for each type.
    if n_types >= 2:
        df = pd.concat([df.copy().assign(type=i) for i in range(n_types)])

    df["dense_key"], df["core_index"] = map_observations_to_states(
        df, state_space, optim_paras)

    # For the estimation, log wages are needed with shape (n_observations, n_types).
    df["log_wage"] = np.log(
        np.clip(df.wage.to_numpy(), 1 / MAX_FLOAT, MAX_FLOAT))
    df = df.drop(columns="wage")

    # For the type covariates, we only need the first observation of each individual.
    if n_types >= 2:
        initial_states = df.query("period == 0").copy()
        type_covariates = compute_covariates(initial_states,
                                             options["covariates_core"],
                                             raise_errors=False)
        type_covariates = type_covariates.apply(downcast_to_smallest_dtype)
    else:
        type_covariates = None

    return df, type_covariates
예제 #2
0
def simulate(
    params,
    base_draws_sim,
    base_draws_wage,
    df,
    method,
    n_simulation_periods,
    solve,
    options,
):
    """Perform a simulation.

    This function performs one of three possible simulation exercises. The type of the
    simulation is controlled by ``method`` in :func:`get_simulate_func`. Ordered from no
    data to panel data on individuals, there is:

    1. *n-step-ahead simulation with sampling*: The first observation of an individual
       is sampled from the initial conditions, i.e., the distribution of observed
       variables or initial experiences, etc. in the first period. Then, the individuals
       are guided for ``n`` periods by the decision rules from the solution of the
       model.

    2. *n-step-ahead simulation with data*: Instead of sampling individuals from the
       initial conditions, take the first observation of each individual in the data.
       Then, do as in 1..

    3. *one-step-ahead simulation*: Take the complete data and find for each observation
       the corresponding outcomes, e.g, choices and wages, using the decision rules from
       the model solution.

    Parameters
    ----------
    params : pandas.DataFrame or pandas.Series
        Contains parameters.
    base_draws_sim : numpy.ndarray
        Array with shape (n_periods, n_individuals, n_choices) to provide a unique set
        of shocks for each individual in each period.
    base_draws_wage : numpy.ndarray
        Array with shape (n_periods, n_individuals, n_choices) to provide a unique set
        of wage measurement errors for each individual in each period.
    df : pandas.DataFrame or None
        Can be one three objects:

        - :data:`None` if no data is provided. This triggers sampling from initial
          conditions and a n-step-ahead simulation.
        - :class:`pandas.DataFrame` containing panel data on individuals which triggers
          a one-step-ahead simulation.
        - :class:`pandas.DataFrame` containing only first observations which triggers a
          n-step-ahead simulation taking the data as initial conditions.
    method : str
        The simulation method.
    n_simulation_periods : int
        Number periods to simulate.
    solve : :func:`~respy.solve.solve`
        Function which creates the solution of the model with new parameters.
    options : dict
        Contains model options.

    Returns
    -------
    simulated_data : pandas.DataFrame
        DataFrame of simulated individuals.

    """
    # Copy DataFrame so that the DataFrame attached to :func:`simulate` is not altered.
    df = df.copy()
    is_n_step_ahead = method != "one_step_ahead"

    optim_paras, options = process_params_and_options(params, options)
    state_space = solve(params)

    # Prepare simulation.
    df = _extend_data_with_sampled_characteristics(df, optim_paras, options)

    # Prepare shocks and store them in the pandas.DataFrame.
    draws_wage_transformed = np.exp(base_draws_wage *
                                    optim_paras["meas_error"])

    data = []
    for period in range(n_simulation_periods):
        # If it is a one-step-ahead simulation, we pick rows from the panel data. For
        # n-step-ahead simulation, `df` always contains only data of the current period.
        current_df = df.query("period == @period").copy()

        if method == "one_step_ahead":
            slice_ = np.where(df.eval("period == @period"))[0]
        else:
            slice_ = slice(df.shape[0] * period, df.shape[0] * (period + 1))

        for i, choice in enumerate(optim_paras["choices"]):
            current_df[f"shock_reward_{choice}"] = base_draws_sim[slice_, i]
            current_df[f"meas_error_wage_{choice}"] = draws_wage_transformed[
                slice_, i]

        current_df["dense_key"], current_df[
            "core_index"] = map_observations_to_states(current_df, state_space,
                                                       optim_paras)

        wages = state_space.get_attribute_from_period("wages", period)
        nonpecs = state_space.get_attribute_from_period("nonpecs", period)
        index_to_choice_set = state_space.get_attribute_from_period(
            "dense_key_to_choice_set", period)
        continuation_values = state_space.get_continuation_values(
            period=period)

        current_df_extended = _simulate_single_period(
            current_df,
            index_to_choice_set,
            wages,
            nonpecs,
            continuation_values,
            optim_paras=optim_paras,
        )

        data.append(current_df_extended.copy(deep=True))

        if is_n_step_ahead and period != n_simulation_periods - 1:
            current_df_extended = current_df_extended.reset_index()
            df = apply_law_of_motion_for_core(current_df_extended, optim_paras)
            state_space_columns = create_state_space_columns(optim_paras)
            df = df.set_index(["identifier", "period"])[state_space_columns]

    simulated_data = _process_simulation_output(data, optim_paras)

    return simulated_data