Beispiel #1
0
def get_example_model(model, with_data=True):
    """Return parameters, options and data (optional) of an example model.

    Parameters
    ----------
    model : str
        Choose one model name in ``{"robinson_crusoe_basic", "robinson_crusoe_extended",
        kw_94_one", "kw_94_two", "kw_94_three", "kw_97_basic", "kw_97_extended"
        "kw_2000"}``.
    with_data : bool
        Whether the accompanying data set should be returned. For some data sets, real
        data can be provided, for others, a simulated data set will be produced.

    """
    assert model in EXAMPLE_MODELS, f"{model} is not in {EXAMPLE_MODELS}."

    options = yaml.safe_load(
        (TEST_RESOURCES_DIR / f"{model}.yaml").read_text())
    params = pd.read_csv(TEST_RESOURCES_DIR / f"{model}.csv",
                         index_col=["category", "name"])

    if "kw_97" in model and with_data:
        df = (create_kw_97(params, options), )
    elif ("kw_94" in model or "robinson" in model) and with_data:
        simulate = get_simulate_func(params, options)
        df = (simulate(params), )
    else:
        df = ()
        if with_data:
            warnings.warn(f"No data available for model '{model}'.",
                          category=UserWarning)

    return (params, options) + df
Beispiel #2
0
def get_example_model(model, with_data=True):
    """Return parameters, options and data (optional) of an example model.

    Parameters
    ----------
    model : str
        Use arbitrary string to see all available models in traceback.
    with_data : bool
        Whether the accompanying data set should be returned.

    """
    assert model in EXAMPLE_MODELS, f"{model} is not in {EXAMPLE_MODELS}."

    options = yaml.safe_load(
        (TEST_RESOURCES_DIR / f"{model}.yaml").read_text())
    params = pd.read_csv(TEST_RESOURCES_DIR / f"{model}.csv",
                         index_col=["category", "name"])

    if "kw_97" in model and with_data:
        df = (create_kw_97(params, options), )
    elif ("kw_94" in model or "robinson" in model) and with_data:
        simulate = get_simulate_func(params, options)
        df = (simulate(params), )
    else:
        df = ()
        if with_data:
            warnings.warn(f"No data available for model '{model}'.",
                          category=UserWarning)

    return (params, options) + df
def msm_args(worker_id):
    """Provides example input for testing method of simulated moments."""
    calc_moments = {"Mean Wage": _calc_wage_mean, "Choices": _calc_choice_freq}

    params, options = get_example_model("kw_94_one", with_data=False)
    options["n_periods"] = 3

    # Give each pytest worker another directory, so that they do not clean the directory
    # for the other workers.
    options["cache_path"] = f".respy-{worker_id}"

    simulate = get_simulate_func(params, options)
    df = simulate(params)

    empirical_moments = {
        "Choices": _replace_nans(_calc_choice_freq(df)),
        "Mean Wage": _replace_nans(_calc_wage_mean(df)),
    }

    weighting_matrix = get_diag_weighting_matrix(empirical_moments)

    return (
        params,
        options,
        calc_moments,
        _replace_nans,
        empirical_moments,
        weighting_matrix,
    )
def test_transition_probabilities_for_one_exogenous_process(model_with_one_exog_proc):
    params, options = model_with_one_exog_proc

    simulate = get_simulate_func(params, options)
    df = simulate(params)

    df["Prev_Illness"] = df.groupby("Identifier")["Illness"].shift()
    probs = pd.crosstab(df["Illness"], df["Prev_Illness"], normalize=True)

    assert np.allclose(probs, [[0.81, 0.09], [0.09, 0.01]], atol=0.01)
def test_return_output_dict_for_likelihood(model):
    params, options = process_model_or_seed(model)
    options["n_periods"] = 3

    simulate = get_simulate_func(params, options)
    df = simulate(params)

    log_like = get_log_like_func(params, options, df, return_scalar=False)
    log_like = log_like(params)

    assert isinstance(log_like["value"], float)
    assert isinstance(log_like["contributions"], np.ndarray)
    assert isinstance(log_like["comparison_plot_data"], pd.DataFrame)
Beispiel #6
0
def test_return_scalar_for_likelihood(model):
    params, options = process_model_or_seed(model)

    simulate = get_simulate_func(params, options)
    df = simulate(params)

    loglike = get_crit_func(params, options, df, return_scalar=True)
    value = loglike(params)

    assert isinstance(value, float)

    loglike = get_crit_func(params, options, df, return_scalar=False)
    array = loglike(params)

    assert isinstance(array, np.ndarray)
Beispiel #7
0
def simulate_truncated_data(params, options, is_missings=True):
    """Simulate a (truncated) dataset.

    The data can have two more properties. First, truncated history, second, missing
    wages.

    """
    optim_paras, _ = process_params_and_options(params, options)

    simulate = get_simulate_func(params, options)
    df = simulate(params)

    np.random.seed(options["simulation_seed"])

    if is_missings:
        # Truncate the histories of agents. This mimics the effect of attrition.
        # Histories can be truncated after the first period or not at all. So, all
        # individuals have at least one observation.
        period_of_truncation = (  # noqa: F841
            df.reset_index()
            .groupby("Identifier")
            .Period.transform(lambda x: np.random.choice(x.max() + 1) + 1)
            .to_numpy()
        )
        data_subset = df.query("Period < @period_of_truncation").copy()

        # Add some missings to wage data.
        is_working = data_subset["Choice"].isin(optim_paras["choices_w_wage"])
        num_drop_wages = int(is_working.sum() * np.random.uniform(high=0.5))

        if num_drop_wages > 0:
            indices = data_subset["Wage"][is_working].index
            index_missing = np.random.choice(indices, num_drop_wages, replace=False)

            data_subset.loc[index_missing, "Wage"] = np.nan
        else:
            pass
    else:
        data_subset = df

    # We can restrict the information to observed entities only.
    col_dtype = generate_column_dtype_dict_for_estimation(optim_paras)
    data_subset = data_subset[list(col_dtype)[2:]]

    return data_subset
Beispiel #8
0
def test_simulation_with_flexible_choice_sets():
    params, options = process_model_or_seed("robinson_crusoe_basic")

    # Extend with observable characteristic.
    params.loc[("observable_health_well", "probability"), "value"] = 0.9
    params.loc[("observable_health_sick", "probability"), "value"] = 0.1

    # Sick people can never work.
    options["negative_choice_set"] = {
        "fishing": ["health == 'sick'"],
        "friday": ["period < 2", "exp_fishing == 0"],
    }
    # Create internal specification objects.
    optim_paras, options = process_params_and_options(params, options)
    simulate = get_simulate_func(params, options)
    df = simulate(params)

    assert isinstance(df, pd.DataFrame)
def test_return_scalar_for_likelihood(model):
    params, options = process_model_or_seed(model)
    options["n_periods"] = 3

    simulate = get_simulate_func(params, options)
    df = simulate(params)

    log_like = get_log_like_func(params, options, df, return_scalar=True)
    value = log_like(params)

    assert isinstance(value, float)

    log_like_contribs = get_log_like_func(params,
                                          options,
                                          df,
                                          return_scalar=False)
    outputs = log_like_contribs(params)

    assert isinstance(outputs, dict)
def test_randomness_msm(model_or_seed):
    params, options = process_model_or_seed(model_or_seed)
    simulate = get_simulate_func(params, options)
    df = simulate(params)

    empirical_moments = _replace_nans(_calc_choice_freq(df))

    weighting_matrix = get_diag_weighting_matrix(empirical_moments)

    weighted_sum_squared_errors = get_moment_errors_func(
        params,
        options,
        _calc_choice_freq,
        _replace_nans,
        empirical_moments,
        weighting_matrix,
    )

    assert weighted_sum_squared_errors(params) == 0
Beispiel #11
0
def test_invariance_of_model_solution_in_solve_and_criterion_functions(model):
    params, options = process_model_or_seed(model)

    options["n_periods"] = 2 if model == "kw_2000" else 3

    solve = get_solve_func(params, options)
    state_space = solve(params)

    simulate = get_simulate_func(params, options)
    df = simulate(params)
    state_space_sim = simulate.keywords["solve"].keywords["state_space"]

    criterion = get_crit_func(params, options, df)
    _ = criterion(params)
    state_space_crit = criterion.keywords["solve"].keywords["state_space"]

    for state_space_ in [state_space_sim, state_space_crit]:
        assert state_space.core.equals(
            state_space_.core.reindex_like(state_space.core))

        apply_to_attributes_of_two_state_spaces(
            state_space.get_attribute("wages"),
            state_space_.get_attribute("wages"),
            np.testing.assert_array_equal,
        )
        apply_to_attributes_of_two_state_spaces(
            state_space.get_attribute("nonpecs"),
            state_space_.get_attribute("nonpecs"),
            np.testing.assert_array_equal,
        )
        apply_to_attributes_of_two_state_spaces(
            state_space.get_attribute("expected_value_functions"),
            state_space_.get_attribute("expected_value_functions"),
            np.testing.assert_array_equal,
        )
        apply_to_attributes_of_two_state_spaces(
            state_space.get_attribute("base_draws_sol"),
            state_space_.get_attribute("base_draws_sol"),
            np.testing.assert_array_equal,
        )
Beispiel #12
0
def test_return_comparison_plot_data_for_likelihood(model):
    params, options = process_model_or_seed(model)

    simulate = get_simulate_func(params, options)
    df = simulate(params)

    loglike = get_crit_func(params,
                            options,
                            df,
                            return_comparison_plot_data=False)
    loglike = loglike(params)

    assert isinstance(loglike, float)

    loglike = get_crit_func(params,
                            options,
                            df,
                            return_comparison_plot_data=True)
    loglike, df = loglike(params)

    assert isinstance(loglike, float)
    assert isinstance(df, pd.DataFrame)
Beispiel #13
0
def test_invariance_of_model_solution_in_solve_and_criterion_functions(model):
    params, options = process_model_or_seed(model)

    solve = get_solve_func(params, options)
    state_space = solve(params)

    simulate = get_simulate_func(params, options)
    df = simulate(params)
    state_space_sim = simulate.keywords["solve"].keywords["state_space"]

    log_like = get_log_like_func(params, options, df)
    _ = log_like(params)
    state_space_crit = log_like.keywords["solve"].keywords["state_space"]

    for state_space_ in [state_space_sim, state_space_crit]:
        assert state_space.core.equals(
            state_space_.core.reindex_like(state_space.core))

        apply_to_attributes_of_two_state_spaces(
            state_space.wages,
            state_space_.wages,
            np.testing.assert_array_equal,
        )
        apply_to_attributes_of_two_state_spaces(
            state_space.nonpecs,
            state_space_.nonpecs,
            np.testing.assert_array_equal,
        )
        apply_to_attributes_of_two_state_spaces(
            state_space.expected_value_functions,
            state_space_.expected_value_functions,
            np.testing.assert_array_equal,
        )
        apply_to_attributes_of_two_state_spaces(
            state_space.base_draws_sol,
            state_space_.base_draws_sol,
            np.testing.assert_array_equal,
        )
Beispiel #14
0
def inputs():
    calc_moments = {"Mean Wage": _calc_wage_mean, "Choices": _calc_choice_freq}

    params, options = get_example_model("kw_94_one", with_data=False)
    options["n_periods"] = 5
    simulate = get_simulate_func(params, options)
    df = simulate(params)

    empirical_moments = {
        "Choices": _replace_nans(_calc_choice_freq(df)),
        "Mean Wage": _replace_nans(_calc_wage_mean(df)),
    }

    weighting_matrix = get_diag_weighting_matrix(empirical_moments)

    return (
        params,
        options,
        calc_moments,
        _replace_nans,
        empirical_moments,
        weighting_matrix,
    )
Beispiel #15
0
def get_moment_errors_func(
    params,
    options,
    calc_moments,
    replace_nans,
    empirical_moments,
    weighting_matrix=None,
    n_simulation_periods=None,
    return_scalar=True,
):
    """Get the moment errors function for MSM estimation.

    Parameters
    ----------
    params : pandas.DataFrame or pandas.Series
        Contains parameters.
    options : dict
        Dictionary containing model options.
    calc_moments : callable or list or dict
        Function(s) used to calculate simulated moments. Must match structure
        of empirical moments i.e. if empirical_moments is a list of
        pandas.DataFrames, calc_moments must be a list of the same length
        containing functions that correspond to the moments in
        empirical_moments.
    replace_nans : callable or list or dict or None
        Functions(s) specifying how to handle missings in simulated_moments.
        Must match structure of empirical_moments.
    empirical_moments : pandas.DataFrame or pandas.Series or dict or list
        Contains the empirical moments calculated for the observed data. Moments
        should be saved to pandas.DataFrame or pandas.Series that can either be
        passed to the function directly or as items of a list or dictionary.
        Index of pandas.DataFrames can be of type MultiIndex, but columns cannot.
    weighting_matrix : numpy.ndarray, default None
        Square matrix of dimension (NxN) with N denoting the number of
        empirical_moments. Used to weight squared moment errors. Will use
        identity matrix by default.
    n_simulation_periods : int, default None
        Dictates the number of periods in the simulated dataset.
        This option does not affect ``options["n_periods"]`` which controls the
        number of periods for which decision rules are computed.
    return_scalar : bool, default True
        Indicates whether to return the scalar value of weighted square product of
        moment error vector or dictionary that additionally contains vector of
        (weighted) moment errors, simulated moments that follow the structure
        of empirical moments, and simulated as well as empirical moments in a
        pandas.DataFrame that adheres to a tidy data format. The dictionary will contain
        the following key and value pairs:

        - "value": Scalar vale of weighted moment errors (float)
        - "root_contributions": Moment error vectors multiplied with root of weighting
          matrix (numpy.ndarray)
        - "simulated_moments": Simulated moments for given parametrization. Will be in
        the same data format as `empirical_moments` (pandas.Series or pandas.DataFrame
        or list or dict)
        - "comparison_plot_data": A :class:`pandas.DataFrame` that contains both
        empirical and simulated moments in a tidy data format (pandas.DataFrame). Data
        contains the following columns:

            - ``moment_column``: Contains the column names of the moment
            DataFrames/Series names.
            - ``moment_index``: Contains the index of the moment DataFrames/
            Series.MultiIndex indices will be joined to one string.
            - ``value``: Contains moment values.
            - ``moment_set``: Indicator for each set of moments, will use keys if
            empirical_moments are specified in a dict. Moments input as lists will be
            numbered according to position.
            - ``kind``: Indicates whether moments are empirical or simulated.

    Returns
    -------
    moment_errors_func : callable
         Function where all arguments except the parameter vector are set.

    Raises
    ------
    ValueError
        If replacement function cannot be broadcast (1:1 or 1:N) to simulated moments.
    ValueError
        If the number of functions to compute the simulated moments does not match the
        number of empirical moments.

    """
    empirical_moments = copy.deepcopy(empirical_moments)
    are_empirical_moments_dict = isinstance(empirical_moments, dict)

    if weighting_matrix is None:
        weighting_matrix = get_diag_weighting_matrix(empirical_moments)

    simulate = get_simulate_func(params=params,
                                 options=options,
                                 n_simulation_periods=n_simulation_periods)

    empirical_moments = _harmonize_input(empirical_moments)
    calc_moments = _harmonize_input(calc_moments)

    # If only one replacement function is given for multiple sets of moments,
    # duplicate replacement function for all sets of simulated moments.
    if replace_nans is None:
        replace_nans = _return_input

    if callable(replace_nans):
        replace_nans = {k: replace_nans for k in empirical_moments}
    replace_nans = _harmonize_input(replace_nans)

    if 1 < len(replace_nans) < len(empirical_moments):
        raise ValueError(
            "Replacement functions can only be matched 1:1 or 1:n with sets of "
            "empirical moments.")

    elif len(replace_nans) > len(empirical_moments):
        raise ValueError(
            "There are more replacement functions than sets of empirical moments."
        )

    else:
        pass

    if len(calc_moments) != len(empirical_moments):
        raise ValueError(
            "Number of functions to calculate simulated moments must be equal to "
            "the number of sets of empirical moments.")

    moment_errors_func = functools.partial(
        moment_errors,
        simulate=simulate,
        calc_moments=calc_moments,
        replace_nans=replace_nans,
        empirical_moments=empirical_moments,
        weighting_matrix=weighting_matrix,
        return_scalar=return_scalar,
        are_empirical_moments_dict=are_empirical_moments_dict,
    )

    return moment_errors_func
def get_msm_func(
    params,
    options,
    calc_moments,
    replace_nans,
    empirical_moments,
    weighting_matrix,
    n_simulation_periods=None,
    return_scalar=True,
):
    """Get the msm function.

    Parameters
    ----------
    params : pandas.DataFrame or pandas.Series
        Contains parameters.
    options : dict
        Dictionary containing model options.
    calc_moments : callable or list
        Function(s) used to calculate simulated moments. Must match structure
        of empirical moments i.e. if empirical_moments is a list of
        pandas.DataFrames, calc_moments must be a list of the same length
        containing functions that correspond to the moments in
        empirical_moments.
    replace_nans : callable or list
        Functions(s) specifying how to handle missings in simulated_moments.
        Must match structure of empirical_moments.
        Exception: If only one replacement function is specified, it will be
        used on all sets of simulated moments.
    empirical_moments : pandas.DataFrame or pandas.Series or dict or list
        Contains the empirical moments calculated for the observed data. Moments
        should be saved to pandas.DataFrame or pandas.Series that can either be
        passed to the function directly or as items of a list or dictionary.
        Index of pandas.DataFrames can be of type MultiIndex, but columns cannot.
    weighting_matrix : numpy.ndarray
        Square matrix of dimension (NxN) with N denoting the number of
        empirical_moments. Used to weight squared moment errors.
    n_simulation_periods : int, default None
        Dictates the number of periods in the simulated dataset.
        This option does not affect ``options["n_periods"]`` which controls the
        number of periods for which decision rules are computed.
    return_scalar : bool, default True
        Indicates whether to return moment error vector (False) or weighted
        square product of moment error vector (True).

    Returns
    -------
    msm_func: callable
        MSM function where all arguments except the parameter vector are set.

    """
    empirical_moments = copy.deepcopy(empirical_moments)

    simulate = get_simulate_func(params=params,
                                 options=options,
                                 n_simulation_periods=n_simulation_periods)

    empirical_moments = _harmonize_input(empirical_moments)
    calc_moments = _harmonize_input(calc_moments)
    replace_nans = _harmonize_input(replace_nans)

    # If only one replacement function is given for multiple sets of moments, duplicate
    # replacement function for all sets of simulated moments.
    if len(replace_nans) == 1 and len(empirical_moments) > 1:
        replace_nans = replace_nans * len(empirical_moments)

    elif 1 < len(replace_nans) < len(empirical_moments):
        raise ValueError(
            "Replacement functions can only be matched 1:1 or 1:n with sets of "
            "empirical moments.")

    elif len(replace_nans) > len(empirical_moments):
        raise ValueError(
            "There are more replacement functions than sets of empirical moments."
        )

    else:
        pass

    if len(calc_moments) != len(empirical_moments):
        raise ValueError(
            "Number of functions to calculate simulated moments must be equal to "
            "the number of sets of empirical moments.")

    msm_func = functools.partial(
        msm,
        simulate=simulate,
        calc_moments=calc_moments,
        replace_nans=replace_nans,
        empirical_moments=empirical_moments,
        weighting_matrix=weighting_matrix,
        return_scalar=return_scalar,
    )

    return msm_func