Beispiel #1
0
def create_state_space_class(optim_paras, options):
    """Create the state space of the model."""
    core, indexer = _create_core_and_indexer(optim_paras, options)
    dense_grid = _create_dense_state_space_grid(optim_paras)

    # Downcast after calculations or be aware of silent integer overflows.
    core = compute_covariates(core, options["covariates_core"])
    core = core.apply(downcast_to_smallest_dtype)
    dense = _create_dense_state_space_covariates(dense_grid, optim_paras,
                                                 options)

    base_draws_sol = create_base_draws(
        (options["n_periods"], options["solution_draws"],
         len(optim_paras["choices"])),
        next(options["solution_seed_startup"]),
        options["monte_carlo_sequence"],
    )

    if dense:
        state_space = _MultiDimStateSpace(core, indexer, base_draws_sol,
                                          optim_paras, options, dense)
    else:
        state_space = _SingleDimStateSpace(core, indexer, base_draws_sol,
                                           optim_paras, options)

    return state_space
Beispiel #2
0
def get_simulate_func(params, options):
    """Get the simulation function.

    Return :func:`simulate` where all arguments except the parameter vector are fixed
    with :func:`functools.partial`. Thus, the function can be directly passed into an
    optimizer for estimation with simulated method of moments or other techniques.

    Parameters
    ----------
    params : pandas.DataFrame
        DataFrame containing model parameters.
    options : dict
        Dictionary containing model options.

    Returns
    -------
    simulate_function : :func:`simulate`
        Simulation function where all arguments except the parameter vector are set.

    """
    optim_paras, options = process_params_and_options(params, options)

    state_space = StateSpace(optim_paras, options)

    shape = (
        options["n_periods"],
        options["simulation_agents"],
        len(optim_paras["choices"]),
    )
    base_draws_sim = create_base_draws(
        shape, next(options["simulation_seed_startup"]), "random")
    base_draws_wage = create_base_draws(
        shape, next(options["simulation_seed_startup"]), "random")

    simulate_function = functools.partial(
        simulate,
        base_draws_sim=base_draws_sim,
        base_draws_wage=base_draws_wage,
        state_space=state_space,
        options=options,
    )

    return simulate_function
Beispiel #3
0
    def create_draws(self, options):
        """Get draws."""
        n_choices_in_sets = list(set(map(sum, self.dense_key_to_choice_set.values())))
        shocks_sets = []

        for n_choices in n_choices_in_sets:
            draws = create_base_draws(
                (options["n_periods"], options["solution_draws"], n_choices),
                next(options["solution_seed_startup"]),
                options["monte_carlo_sequence"],
            )
            shocks_sets.append(draws)
        draws = {}
        for dense_idx, complex_ix in self.dense_key_to_complex.items():
            period = complex_ix[0]
            n_choices = sum(complex_ix[1])
            idx = n_choices_in_sets.index(n_choices)
            draws[dense_idx] = shocks_sets[idx][period]

        return draws
Beispiel #4
0
    def __init__(self, optim_paras, options):
        """Initialize the state space class."""
        self.base_draws_sol = create_base_draws(
            (
                options["n_periods"],
                options["solution_draws"],
                len(optim_paras["choices"]),
            ),
            next(options["solution_seed_startup"]),
            options["monte_carlo_sequence"],
        )

        states_df, self.indexer = _create_state_space(optim_paras, options)

        base_covariates_df = create_base_covariates(states_df,
                                                    options["covariates"])

        # Downcast after calculations or be aware of silent integer overflows.
        states_df = states_df.apply(downcast_to_smallest_dtype)
        base_covariates_df = base_covariates_df.apply(
            downcast_to_smallest_dtype)
        self.states = states_df.to_numpy()

        self.covariates = _create_choice_covariates(base_covariates_df,
                                                    states_df, optim_paras)

        self.wages, self.nonpec = _create_reward_components(
            self.states[:, -1], self.covariates, optim_paras)

        self.is_inadmissible = _create_is_inadmissible_indicator(
            states_df, optim_paras, options)

        self._create_slices_by_periods(options["n_periods"])

        self.indices_of_child_states = _get_indices_of_child_states(
            self, optim_paras)
Beispiel #5
0
def get_crit_func(
    params, options, df, return_scalar=True, return_comparison_plot_data=False
):
    """Get the criterion function.

    Return a version of the likelihood functions in respy where all arguments
    except the parameter vector are fixed with :func:`functools.partial`. Thus the
    function can be directly passed into an optimizer or a function for taking
    numerical derivatives.

    Parameters
    ----------
    params : pandas.DataFrame
        DataFrame containing model parameters.
    options : dict
        Dictionary containing model options.
    df : pandas.DataFrame
        The model is fit to this dataset.
    return_scalar : bool, default False
        Indicator for whether the mean log likelihood should be returned or the log
        likelihood contributions.
    return_comparison_plot_data : bool, default False
        Indicator for whether a :class:`pandas.DataFrame` with various contributions for
        the visualization with estimagic should be returned.

    Returns
    -------
    criterion_function : :func:`log_like`
        Criterion function where all arguments except the parameter vector are set.

    Raises
    ------
    AssertionError
        If data has not the expected format.

    """
    optim_paras, options = process_params_and_options(params, options)

    optim_paras = _adjust_optim_paras_for_estimation(optim_paras, df)

    check_estimation_data(df, optim_paras)

    solve = get_solve_func(params, options)
    state_space = solve.keywords["state_space"]

    df, type_covariates = _process_estimation_data(
        df, state_space, optim_paras, options
    )

    base_draws_est = create_base_draws(
        (
            df.shape[0] * optim_paras["n_types"],
            options["estimation_draws"],
            len(optim_paras["choices"]),
        ),
        next(options["estimation_seed_startup"]),
        options["monte_carlo_sequence"],
    )

    criterion_function = partial(
        log_like,
        df=df,
        base_draws_est=base_draws_est,
        solve=solve,
        type_covariates=type_covariates,
        options=options,
        return_scalar=return_scalar,
        return_comparison_plot_data=return_comparison_plot_data,
    )

    return criterion_function
Beispiel #6
0
def get_log_like_func(params,
                      options,
                      df,
                      return_scalar=True,
                      return_comparison_plot_data=False):
    """Get the criterion function for maximum likelihood estimation.

    Return a version of the likelihood functions in respy where all arguments
    except the parameter vector are fixed with :func:`functools.partial`. Thus the
    function can be directly passed into an optimizer or a function for taking
    numerical derivatives.

    Parameters
    ----------
    params : pandas.DataFrame
        DataFrame containing model parameters.
    options : dict
        Dictionary containing model options.
    df : pandas.DataFrame
        The model is fit to this dataset.
    return_scalar : bool, default False
        Indicator for whether the mean log likelihood should be returned or the log
        likelihood contributions.
    return_comparison_plot_data : bool, default False
        Indicator for whether a :class:`pandas.DataFrame` with various contributions for
        the visualization with estimagic should be returned.

    Returns
    -------
    criterion_function : :func:`log_like`
        Criterion function where all arguments except the parameter vector are set.

    Raises
    ------
    AssertionError
        If data has not the expected format.

    Examples
    --------
    >>> import respy as rp
    >>> params, options, data = rp.get_example_model("robinson_crusoe_basic")

    At default the function returns the log likelihood as a scalar value.

    >>> log_like = rp.get_log_like_func(params=params, options=options, df=data)
    >>> scalar = log_like(params)

    Additionally, a :class:`pandas.DataFrame` with data for visualization can be
    returned.

    >>> log_like = rp.get_log_like_func(params=params, options=options, df=data,
    ...     return_comparison_plot_data=True
    ... )
    >>> scalar, df = log_like(params)

    In alternative to the log likelihood, a :class:`numpy.array` of the individual
    log likelihood contributions can be returned.

    >>> log_like_contribs = rp.get_log_like_func(params=params, options=options,
    ...     df=data, return_scalar=False
    ... )
    >>> array = log_like_contribs(params)
    """
    optim_paras, options = process_params_and_options(params, options)

    optim_paras = _update_optim_paras_with_initial_experience_levels(
        optim_paras, df)

    check_estimation_data(df, optim_paras)

    solve = get_solve_func(params, options)
    state_space = solve.keywords["state_space"]

    df, type_covariates = _process_estimation_data(df, state_space,
                                                   optim_paras, options)

    # Replace with decorator.
    base_draws_est = {}
    for dense_key, indices in df.groupby("dense_key").groups.items():
        n_choices = sum(state_space.dense_key_to_choice_set[dense_key])
        draws = create_base_draws(
            (len(indices), options["estimation_draws"], n_choices),
            next(options["estimation_seed_startup"]),
            options["monte_carlo_sequence"],
        )
        base_draws_est[dense_key] = draws

    criterion_function = partial(
        log_like,
        df=df,
        base_draws_est=base_draws_est,
        solve=solve,
        type_covariates=type_covariates,
        options=options,
        return_scalar=return_scalar,
        return_comparison_plot_data=return_comparison_plot_data,
    )

    return criterion_function
Beispiel #7
0
def get_simulate_func(
    params,
    options,
    method="n_step_ahead_with_sampling",
    df=None,
    n_simulation_periods=None,
):
    """Get the simulation function.

    Return :func:`simulate` where all arguments except the parameter vector are fixed
    with :func:`functools.partial`. Thus, the function can be directly passed into an
    optimizer for estimation with simulated method of moments or other techniques.

    Parameters
    ----------
    params : pandas.DataFrame
        DataFrame containing model parameters.
    options : dict
        Dictionary containing model options.
    method : {"n_step_ahead_with_sampling", "n_step_ahead_with_data", "one_step_ahead"}
        The simulation method which can be one of three and is explained in more detail
        in :func:`simulate`.
    df : pandas.DataFrame or None, default None
        DataFrame containing one or multiple observations per individual.
    n_simulation_periods : int or None, default None
        Simulate data for a number of periods. This options does not affect
        ``options["n_periods"]`` which controls the number of periods for which decision
        rules are computed.

    Returns
    -------
    simulate_function : :func:`simulate`
        Simulation function where all arguments except the parameter vector are set.

    Examples
    --------
    >>> import respy as rp
    >>> params, options = rp.get_example_model("robinson_crusoe_basic", with_data=False)
    >>> simulate = rp.get_simulate_func(params, options)
    >>> data = simulate(params)

    """
    optim_paras, options = process_params_and_options(params, options)

    n_simulation_periods, options = _harmonize_simulation_arguments(
        method, df, n_simulation_periods, options)

    df = _process_input_df_for_simulation(df, method, options, optim_paras)

    solve = get_solve_func(params, options)

    # We draw shocks for all observations and for all choices although some choices
    # might not be available. Later, only the relevant shocks are selected.
    n_observations = (df.shape[0] if method == "one_step_ahead" else
                      df.shape[0] * n_simulation_periods)
    shape = (n_observations, len(optim_paras["choices"]))

    base_draws_sim = create_base_draws(
        shape, next(options["simulation_seed_startup"]), "random")
    base_draws_wage = create_base_draws(
        shape, next(options["simulation_seed_startup"]), "random")

    simulate_function = functools.partial(
        simulate,
        base_draws_sim=base_draws_sim,
        base_draws_wage=base_draws_wage,
        df=df,
        method=method,
        n_simulation_periods=n_simulation_periods,
        solve=solve,
        options=options,
    )

    return simulate_function
def get_log_like_func(params, options, df, return_scalar=True):
    """Get the criterion function for maximum likelihood estimation.

    Return a version of the likelihood functions in respy where all arguments
    except the parameter vector are fixed with :func:`functools.partial`. Thus the
    function can be directly passed into an optimizer or a function for taking
    numerical derivatives.

    Parameters
    ----------
    params : pandas.DataFrame
        DataFrame containing model parameters.
    options : dict
        Dictionary containing model options.
    df : pandas.DataFrame
        The model is fit to this dataset.
    return_scalar : bool, default False
        Indicator for whether the mean log likelihood should be returned. If False will
        return a dictionary with the following key and value pairs:
        - "value": mean log likelihood (float)
        - "contributions": log likelihood contributions (numpy.array)
        - "comparison_plot_data" : DataFrame with various contributions for
        the visualization with estimagic. Data contains the following columns:
            - ``identifier`` : Individual identifiers derived from input df.
            - ``period`` : Periods derived from input df.
            - ``choice`` : Choice that ``value`` is connected to.
            - ``value`` : Value of log likelihood contribution.
            - ``kind`` : Kind of contribution (e.g choice or wage).
            - ``type`` and `log_type_probability``: Will be included in models with
            types.

    Returns
    -------
    criterion_function : :func:`log_like`
        Criterion function where all arguments except the parameter vector are set.

    Raises
    ------
    AssertionError
        If data has not the expected format.

    Examples
    --------
    >>> import respy as rp
    >>> params, options, data = rp.get_example_model("robinson_crusoe_basic")

    At default the function returns the log likelihood as a scalar value.

    >>> log_like = rp.get_log_like_func(params=params, options=options, df=data)
    >>> scalar = log_like(params)

    Alternatively, a dictionary containing the log likelihood, as well as
    log likelihood contributions and a :class:`pandas.DataFrame` can be returned.

    >>> log_like = rp.get_log_like_func(params=params, options=options, df=data,
    ...     return_scalar=False
    ... )
    >>> outputs = log_like(params)
    >>> outputs.keys()
    dict_keys(['value', 'contributions', 'comparison_plot_data'])
    """
    optim_paras, options = process_params_and_options(params, options)

    optim_paras = _update_optim_paras_with_initial_experience_levels(
        optim_paras, df)

    check_estimation_data(df, optim_paras)

    solve = get_solve_func(params, options)
    state_space = solve.keywords["state_space"]

    df, type_covariates = _process_estimation_data(df, state_space,
                                                   optim_paras, options)

    # Replace with decorator.
    base_draws_est = {}
    for dense_key, indices in df.groupby("dense_key").groups.items():
        n_choices = sum(state_space.dense_key_to_choice_set[dense_key])
        draws = create_base_draws(
            (len(indices), options["estimation_draws"], n_choices),
            next(options["estimation_seed_startup"]),
            options["monte_carlo_sequence"],
        )
        base_draws_est[dense_key] = draws

    criterion_function = partial(
        log_like,
        df=df,
        base_draws_est=base_draws_est,
        solve=solve,
        type_covariates=type_covariates,
        options=options,
        return_scalar=return_scalar,
    )

    return criterion_function
Beispiel #9
0
def get_crit_func(params, options, df, version="log_like"):
    """Get the criterion function.

    Return a version of the likelihood functions in respy where all arguments
    except the parameter vector are fixed with :func:`functools.partial`. Thus the
    function can be directly passed into an optimizer or a function for taking
    numerical derivatives.

    By default we return :func:`log_like`. Other versions can be requested via the
    version argument.

    Parameters
    ----------
    params : pandas.DataFrame
        DataFrame containing model parameters.
    options : dict
        Dictionary containing model options.
    df : pandas.DataFrame
        The model is fit to this dataset.
    version : str, default "log_like"
        Can take the values "log_like" and "log_like_obs".

    Returns
    -------
    criterion_function : :func:`log_like`
        Criterion function where all arguments except the parameter vector are set.

    Raises
    ------
    AssertionError
        If data has not the expected format.

    """
    optim_paras, options = process_params_and_options(params, options)

    optim_paras = _adjust_optim_paras_for_estimation(optim_paras, df)

    check_estimation_data(df, optim_paras)

    state_space = StateSpace(optim_paras, options)

    (
        choices,
        idx_indiv_first_obs,
        indices,
        log_wages_observed,
        type_covariates,
    ) = _process_estimation_data(df, state_space, optim_paras, options)

    base_draws_est = create_base_draws(
        (len(choices), options["estimation_draws"], len(
            optim_paras["choices"])),
        next(options["estimation_seed_startup"]),
        options["monte_carlo_sequence"],
    )

    if version == "log_like":
        unpartialed = log_like
    elif version == "log_like_obs":
        unpartialed = log_like_obs
    else:
        raise ValueError("version has to be 'log_like' or 'log_like_obs'.")

    criterion_function = partial(
        unpartialed,
        choices=choices,
        idx_indiv_first_obs=idx_indiv_first_obs,
        indices=indices,
        log_wages_observed=log_wages_observed,
        base_draws_est=base_draws_est,
        state_space=state_space,
        type_covariates=type_covariates,
        options=options,
    )

    # this will be relevant for estimagic topography plots
    criterion_function.__name__ = version
    return criterion_function
Beispiel #10
0
def get_simulate_func(
    params,
    options,
    method="n_step_ahead_with_sampling",
    df=None,
    n_simulation_periods=None,
):
    """Get the simulation function.

    Return :func:`simulate` where all arguments except the parameter vector are fixed
    with :func:`functools.partial`. Thus, the function can be directly passed into an
    optimizer for estimation with simulated method of moments or other techniques.

    Parameters
    ----------
    params : pandas.DataFrame
        DataFrame containing model parameters.
    options : dict
        Dictionary containing model options.
    method : {"n_step_ahead_with_sampling", "n_step_ahead_with_data", "one_step_ahead"}
        The simulation method which can be one of three and is explained in more detail
        in :func:`simulate`.
    df : pandas.DataFrame or None
        DataFrame containing one or multiple observations per individual.
    n_simulation_periods : int or None
        Simulate data for a number of periods. This options does not affect
        ``options["n_periods"]`` which controls the number of periods for which decision
        rules are computed.

    Returns
    -------
    simulate_function : :func:`simulate`
        Simulation function where all arguments except the parameter vector are set.

    """
    optim_paras, options = process_params_and_options(params, options)

    n_simulation_periods, options = _harmonize_simulation_arguments(
        method, df, n_simulation_periods, options)

    df = _process_input_df_for_simulation(df, method, n_simulation_periods,
                                          options, optim_paras)

    solve = get_solve_func(params, options)

    shape = (df.shape[0], len(optim_paras["choices"]))
    base_draws_sim = create_base_draws(
        shape, next(options["simulation_seed_startup"]), "random")
    base_draws_wage = create_base_draws(
        shape, next(options["simulation_seed_startup"]), "random")

    simulate_function = functools.partial(
        simulate,
        base_draws_sim=base_draws_sim,
        base_draws_wage=base_draws_wage,
        df=df,
        solve=solve,
        options=options,
    )

    return simulate_function