def test_child_indices():
    """Testing existence of properties for calculation of child indices!"""
    point_constr = {"n_periods": 2, "n_lagged_choices": 1}

    params, options = generate_random_model(point_constr=point_constr)

    # Add some inadmissible states
    optim_paras, options = process_params_and_options(params, options)

    state_space = create_state_space_class(optim_paras, options)

    # Create all relevant columns
    core_columns = ["period"] + create_core_state_space_columns(optim_paras)

    # compose child indices of first choice
    initial_state = state_space.core.iloc[0][core_columns].to_numpy()

    # Get all the future states
    states = []
    for i in range(len(optim_paras["choices"])):
        child = initial_state.copy()
        child[0] += 1
        child[i + 1] += 1
        child[-1] = i
        ix = state_space.indexer[tuple(child)]
        states.append(np.array(ix).reshape(1, 2))

    manual = np.concatenate(states, axis=0)
    np.testing.assert_array_equal(state_space.child_indices[0][0], manual)
Exemple #2
0
def test_state_space_restrictions_by_traversing_forward(model):
    """Test for inadmissible states in the state space.

    The test is motivated by the addition of another restriction in
    https://github.com/OpenSourceEconomics/respy/pull/145. To ensure that similar errors
    do not happen again, this test takes all states of the first period and finds all
    their child states. Taking only the child states their children are found and so on.
    At last, the set of visited states is compared against the total set of states.

    The test can only applied to some models. Most models would need custom
    ``options["core_state_space_filters"]`` to remove inaccessible states from the state
    space.

    """
    params, options = process_model_or_seed(model)
    optim_paras, options = process_params_and_options(params, options)

    solve = get_solve_func(params, options)
    state_space = solve(params)

    indices = np.full((state_space.core.shape[0], len(optim_paras["choices"])),
                      INDEXER_INVALID_INDEX)
    core_columns = create_core_state_space_columns(optim_paras)

    for period in range(options["n_periods"] - 1):

        if period == 0:
            states = state_space.core.query(
                "period == 0")[core_columns].to_numpy(np.int)
        else:
            indices_period = state_space.indices_of_child_states[
                state_space.slices_by_periods[period - 1]]
            indices_period = indices_period[indices_period >= 0]
            states = state_space.core[core_columns].to_numpy(
                np.int)[indices_period]

        indices = _insert_indices_of_child_states(
            indices,
            states,
            state_space.indexer[period],
            state_space.indexer[period + 1],
            state_space.is_inadmissible,
            len(optim_paras["choices_w_exp"]),
            optim_paras["n_lagged_choices"],
        )

    # Take all valid indices and add the indices of the first period.
    set_valid_indices = set(indices[indices != INDEXER_INVALID_INDEX]) | set(
        range(state_space.core.query("period == 0").shape[0]))

    assert set_valid_indices == set(range(state_space.core.shape[0]))
Exemple #3
0
def _collect_child_indices(complex_, choice_set, indexer, optim_paras,
                           options):
    """Collect child indices for states.

    The function takes the states of one dense key, applies the law of motion for each
    available choice and maps the resulting states to core keys and core indices.

    Parameters
    ----------
    complex_ : tuple
        See :ref:`complex`.
    choice_set : tuple
        Tuple representing admissible choices
    indexer : numba.typed.Dict
        A dictionary with core states as keys and the core key and core index as values.
    optim_paras : dict
        Contains model parameters.
    options : dict
        Contains model options.

    Returns
    -------
    indices : numpy.ndarray
        Array with shape ``(n_states, n_choices * 2)``. Represents the mapping
        (core_index, choice) -> (dense_key, core_index).

    """
    core_columns = create_core_state_space_columns(optim_paras)
    states = load_objects("states", complex_, options)

    n_choices = sum(choice_set)
    indices = np.full((states.shape[0], n_choices, 2), -1, dtype=np.int64)

    indices_valid_choices = [
        i for i, is_valid in enumerate(choice_set) if is_valid
    ]
    for i, choice in enumerate(indices_valid_choices):
        states_ = states.copy(deep=True)

        states_["choice"] = choice
        states_ = apply_law_of_motion_for_core(states_, optim_paras)

        states_ = states_[["period"] + core_columns]

        indices[:, i, 0], indices[:, i,
                                  1] = map_states_to_core_key_and_core_index(
                                      states_.to_numpy(), indexer)

    return indices
Exemple #4
0
    def _create_indices_of_child_states(self, optim_paras):
        """For each parent state get the indices of child states.

        During the backward induction, the ``expected_value_functions`` in the future
        period serve as the ``continuation_values`` of the current period. As the
        indices for child states never change, these indices can be precomputed and
        added to the state_space.

        Actually, the indices of the child states do not have to cover the last period,
        but it makes the code prettier and reduces the need to expand the indices in the
        estimation.

        """
        n_choices = len(optim_paras["choices"])
        n_choices_w_exp = len(optim_paras["choices_w_exp"])
        n_periods = optim_paras["n_periods"]
        n_states = self.core.shape[0]
        core_columns = create_core_state_space_columns(optim_paras)

        indices = np.full((n_states, n_choices),
                          INDEXER_INVALID_INDEX,
                          dtype=INDEXER_DTYPE)

        # Skip the last period which does not have child states.
        for period in reversed(range(n_periods - 1)):
            states_in_period = self.core.query(
                "period == @period")[core_columns].to_numpy(dtype=np.int8)

            indices = _insert_indices_of_child_states(
                indices,
                states_in_period,
                self.indexer[period],
                self.indexer[period + 1],
                self.is_inadmissible,
                n_choices_w_exp,
                optim_paras["n_lagged_choices"],
            )

        return indices
Exemple #5
0
def _create_indexer(core, core_key_to_core_indices, optim_paras):
    """Create indexer of core state space.

    Returns
    -------
    indexer :  numba.typed.Dict
        Maps a row of the core state space into its position within the
        period_choice_cores. c: core_state -> (core_key,core_index)

    """
    core_columns = ["period"] + create_core_state_space_columns(optim_paras)
    n_core_state_variables = len(core_columns)

    indexer = Dict.empty(
        key_type=nb.types.UniTuple(nb.types.int64, n_core_state_variables),
        value_type=nb.types.UniTuple(nb.types.int64, 2),
    )

    for core_idx, indices in core_key_to_core_indices.items():
        states = core.loc[indices, core_columns].to_numpy()
        for i, state in enumerate(states):
            indexer[tuple(state)] = (core_idx, i)
    return indexer
Exemple #6
0
def _process_estimation_data(df, state_space, optim_paras, options):
    """Process estimation data.

    All necessary objects for :func:`_internal_log_like_obs` dependent on the data are
    produced.

    Some objects have to be repeated for each type which is a desirable format for the
    estimation where every observations is weighted by type probabilities.

    Parameters
    ----------
    df : pandas.DataFrame
        The DataFrame which contains the data used for estimation. The DataFrame
        contains individual identifiers, periods, experiences, lagged choices, choices
        in current period, the wage and other observed data.
    indexer : numpy.ndarray
        Indexer for the core state space.
    optim_paras : dict
    options : dict

    Returns
    -------
    choices : numpy.ndarray
        Array with shape (n_observations, n_types) where information is only repeated
        over the second axis.
    idx_indiv_first_obs : numpy.ndarray
        Array with shape (n_individuals,) containing indices for the first observations
        of each individual.
    indices : numpy.ndarray
        Array with shape (n_observations, n_types) containing indices for states which
        correspond to observations.
    log_wages_observed : numpy.ndarray
        Array with shape (n_observations, n_types) containing clipped log wages.
    type_covariates : numpy.ndarray
        Array with shape (n_individuals, n_type_covariates) containing covariates to
        predict probabilities for each type.

    """
    col_dtype = generate_column_dtype_dict_for_estimation(optim_paras)

    df = (
        df.sort_index()[list(col_dtype)[2:]]
        .rename(columns=rename_labels_to_internal)
        .rename_axis(index=rename_labels_to_internal)
    )
    df = convert_labeled_variables_to_codes(df, optim_paras)

    # Get indices of states in the state space corresponding to all observations for all
    # types. The indexer has the shape (n_observations,).
    n_periods = int(df.index.get_level_values("period").max() + 1)
    indices = []
    core_columns = create_core_state_space_columns(optim_paras)

    for period in range(n_periods):
        period_df = df.query("period == @period")
        period_core = tuple(period_df[col].to_numpy() for col in core_columns)
        period_indices = state_space.indexer[period][period_core]
        indices.append(period_indices)

    indices = np.concatenate(indices)

    # The indexer is now sorted in period-individual pairs whereas the estimation needs
    # individual-period pairs. Sort it!
    indices_to_reorder = (
        df.sort_values(["period", "identifier"])
        .assign(__index__=np.arange(df.shape[0]))
        .sort_values(["identifier", "period"])["__index__"]
        .to_numpy()
    )
    df["index"] = indices[indices_to_reorder]

    # Add indices of child states to the DataFrame.
    children = pd.DataFrame(
        data=state_space.indices_of_child_states[df["index"].to_numpy()],
        index=df.index,
        columns=[f"child_index_{c}" for c in optim_paras["choices"]],
    )
    df = pd.concat([df, children], axis="columns")

    # For the estimation, log wages are needed with shape (n_observations, n_types).
    df["log_wage"] = np.log(np.clip(df.wage.to_numpy(), 1 / MAX_FLOAT, MAX_FLOAT))
    df = df.drop(columns="wage")

    # For the type covariates, we only need the first observation of each individual.
    if optim_paras["n_types"] >= 2:
        initial_states = df.query("period == 0").copy()
        type_covariates = compute_covariates(
            initial_states, options["covariates_core"], raise_errors=False
        )
        type_covariates = type_covariates.apply(downcast_to_smallest_dtype)
    else:
        type_covariates = None

    return df, type_covariates
Exemple #7
0
def simulate(params, base_draws_sim, base_draws_wage, df, solve, options):
    """Perform a simulation.

    This function performs one of three possible simulation exercises. The type of the
    simulation is controlled by ``method`` in :func:`get_simulate_func`. Ordered from no
    data to panel data on individuals, there is:

    1. *n-step-ahead simulation with sampling*: The first observation of an individual
       is sampled from the initial conditions, i.e., the distribution of observed
       variables or initial experiences, etc. in the first period. Then, the individuals
       are guided for ``n`` periods by the decision rules from the solution of the
       model.

    2. *n-step-ahead simulation with data*: Instead of sampling individuals from the
       initial conditions, take the first observation of each individual in the data.
       Then, do as in 1..

    3. *one-step-ahead simulation*: Take the complete data and find for each observation
       the corresponding outcomes, e.g, choices and wages, using the decision rules from
       the model solution.

    Parameters
    ----------
    params : pandas.DataFrame or pandas.Series
        Contains parameters.
    base_draws_sim : numpy.ndarray
        Array with shape (n_periods, n_individuals, n_choices) to provide a unique set
        of shocks for each individual in each period.
    base_draws_wage : numpy.ndarray
        Array with shape (n_periods, n_individuals, n_choices) to provide a unique set
        of wage measurement errors for each individual in each period.
    df : pandas.DataFrame or None
        Can be one three objects:

        - :data:`None` if no data is provided. This triggers sampling from initial
          conditions and a n-step-ahead simulation.
        - :class:`pandas.DataFrame` containing panel data on individuals which triggers
          a one-step-ahead simulation.
        - :class:`pandas.DataFrame` containing only first observations which triggers a
          n-step-ahead simulation taking the data as initial conditions.
    solve : :func:`~respy.solve.solve`
        Function which creates the solution of the model with new parameters.
    options : dict
        Contains model options.

    Returns
    -------
    simulated_data : pandas.DataFrame
        DataFrame of simulated individuals.

    """
    # Copy DataFrame so that the DataFrame attached to :func:`simulate` is not altered.
    df = df.copy()

    optim_paras, options = process_params_and_options(params, options)

    state_space = solve(params)

    # Prepare simulation.
    n_simulation_periods = int(df.index.get_level_values("period").max() + 1)

    df = _extend_data_with_sampled_characteristics(df, optim_paras, options)

    # Prepare shocks and store them in the pandas.DataFrame.
    n_wages = len(optim_paras["choices_w_wage"])
    base_draws_sim_transformed = transform_base_draws_with_cholesky_factor(
        base_draws_sim, optim_paras["shocks_cholesky"], n_wages)
    base_draws_wage_transformed = np.exp(base_draws_wage *
                                         optim_paras["meas_error"])
    for i, choice in enumerate(optim_paras["choices"]):
        df[f"shock_reward_{choice}"] = base_draws_sim_transformed[:, i]
        df[f"meas_error_wage_{choice}"] = base_draws_wage_transformed[:, i]

    core_columns = create_core_state_space_columns(optim_paras)
    is_n_step_ahead = np.any(df[core_columns].isna())

    data = []
    for period in range(n_simulation_periods):

        # If it is a one-step-ahead simulation, we pick rows from the panel data. For
        # n-step-ahead simulation, `df` always contains only data of the current period.
        current_df = df.query("period == @period").copy()
        wages = state_space.get_attribute_from_period("wages", period)
        nonpecs = state_space.get_attribute_from_period("nonpecs", period)
        continuation_values = state_space.get_continuation_values(
            period=period)
        is_inadmissible = state_space.get_attribute_from_period(
            "is_inadmissible", period)

        current_df_extended = _simulate_single_period(
            current_df,
            state_space.indexer[period],
            wages,
            nonpecs,
            continuation_values,
            is_inadmissible,
            optim_paras=optim_paras,
        )

        data.append(current_df_extended)

        if is_n_step_ahead and period != n_simulation_periods - 1:
            next_df = _apply_law_of_motion(current_df_extended, optim_paras)
            df = df.fillna(next_df)

    simulated_data = _process_simulation_output(data, optim_paras)

    return simulated_data
Exemple #8
0
def _simulate_single_period(df, indexer, wages, nonpecs, continuation_values,
                            is_inadmissible, optim_paras):
    """Simulate individuals in a single period.

    The function performs the following sets:

    - Map individuals in one period to the states in the model.
    - Simulate choices and wages for those individuals.
    - Store additional information in a :class:`pandas.DataFrame` and return it.

    """
    n_wages = len(optim_paras["choices_w_wage"])

    # Get indices which connect states in the state space and simulated agents. Subtract
    # the minimum of indices (excluding invalid indices) because wages, etc. contain
    # only wages in this period and normal indices select rows from all wages.
    columns = create_core_state_space_columns(optim_paras)
    indices = indexer[tuple(df[col].astype("int64") for col in columns)]
    period_indices = indices - np.min(
        indexer[indexer != INDEXER_INVALID_INDEX])

    try:
        wages = wages[period_indices]
        nonpecs = nonpecs[period_indices]
        continuation_values = continuation_values[period_indices]
        is_inadmissible = is_inadmissible[period_indices]
    except IndexError as e:
        raise Exception(
            "Simulated individuals could not be mapped to their corresponding states in"
            " the state space. This might be caused by a mismatch between "
            "option['core_state_space_filters'] and the initial conditions."
        ) from e

    draws_shock = df[[f"shock_reward_{c}"
                      for c in optim_paras["choices"]]].to_numpy()
    draws_wage = df[[f"meas_error_wage_{c}"
                     for c in optim_paras["choices"]]].to_numpy()

    value_functions, flow_utilities = calculate_value_functions_and_flow_utilities(
        wages,
        nonpecs,
        continuation_values,
        draws_shock,
        optim_paras["delta"],
    )

    # We need to ensure that no individual chooses an inadmissible state. Thus, set
    # value functions to NaN. This cannot be done in `aggregate_keane_wolpin_utility` as
    # the interpolation requires a mild penalty.
    value_functions = np.where(is_inadmissible, np.nan, value_functions)

    choice = np.nanargmax(value_functions, axis=1)

    wages = wages * draws_shock * draws_wage
    wages[:, n_wages:] = np.nan
    wage = np.choose(choice, wages.T)

    # Store necessary information and information for debugging, etc..
    df["choice"] = choice
    df["wage"] = wage
    df["discount_rate"] = optim_paras["delta"]
    for i, choice in enumerate(optim_paras["choices"]):
        df[f"nonpecuniary_reward_{choice}"] = nonpecs[:, i]
        df[f"wage_{choice}"] = wages[:, i]
        df[f"flow_utility_{choice}"] = flow_utilities[:, i]
        df[f"value_function_{choice}"] = value_functions[:, i]
        df[f"continuation_value_{choice}"] = continuation_values[:, i]

    return df