Esempi in Python per check_estimation_data, esempi in Python per respy.pre_processing.data_checking.check_estimation_data

Esempio n. 1

0

Mostra file

File: likelihood.py Progetto: MengqiaoYu/respy

def get_crit_func(
    params, options, df, return_scalar=True, return_comparison_plot_data=False
):
    """Get the criterion function.

    Return a version of the likelihood functions in respy where all arguments
    except the parameter vector are fixed with :func:`functools.partial`. Thus the
    function can be directly passed into an optimizer or a function for taking
    numerical derivatives.

    Parameters
    ----------
    params : pandas.DataFrame
        DataFrame containing model parameters.
    options : dict
        Dictionary containing model options.
    df : pandas.DataFrame
        The model is fit to this dataset.
    return_scalar : bool, default False
        Indicator for whether the mean log likelihood should be returned or the log
        likelihood contributions.
    return_comparison_plot_data : bool, default False
        Indicator for whether a :class:`pandas.DataFrame` with various contributions for
        the visualization with estimagic should be returned.

    Returns
    -------
    criterion_function : :func:`log_like`
        Criterion function where all arguments except the parameter vector are set.

    Raises
    ------
    AssertionError
        If data has not the expected format.

    """
    optim_paras, options = process_params_and_options(params, options)

    optim_paras = _adjust_optim_paras_for_estimation(optim_paras, df)

    check_estimation_data(df, optim_paras)

    solve = get_solve_func(params, options)
    state_space = solve.keywords["state_space"]

    df, type_covariates = _process_estimation_data(
        df, state_space, optim_paras, options
    )

    base_draws_est = create_base_draws(
        (
            df.shape[0] * optim_paras["n_types"],
            options["estimation_draws"],
            len(optim_paras["choices"]),
        ),
        next(options["estimation_seed_startup"]),
        options["monte_carlo_sequence"],
    )

    criterion_function = partial(
        log_like,
        df=df,
        base_draws_est=base_draws_est,
        solve=solve,
        type_covariates=type_covariates,
        options=options,
        return_scalar=return_scalar,
        return_comparison_plot_data=return_comparison_plot_data,
    )

    return criterion_function

Esempio n. 2

0

Mostra file

File: likelihood.py Progetto: OpenSourceEconomics/respy

def get_log_like_func(params, options, df, return_scalar=True):
    """Get the criterion function for maximum likelihood estimation.

    Return a version of the likelihood functions in respy where all arguments
    except the parameter vector are fixed with :func:`functools.partial`. Thus the
    function can be directly passed into an optimizer or a function for taking
    numerical derivatives.

    Parameters
    ----------
    params : pandas.DataFrame
        DataFrame containing model parameters.
    options : dict
        Dictionary containing model options.
    df : pandas.DataFrame
        The model is fit to this dataset.
    return_scalar : bool, default False
        Indicator for whether the mean log likelihood should be returned. If False will
        return a dictionary with the following key and value pairs:
        - "value": mean log likelihood (float)
        - "contributions": log likelihood contributions (numpy.array)
        - "comparison_plot_data" : DataFrame with various contributions for
        the visualization with estimagic. Data contains the following columns:
            - ``identifier`` : Individual identifiers derived from input df.
            - ``period`` : Periods derived from input df.
            - ``choice`` : Choice that ``value`` is connected to.
            - ``value`` : Value of log likelihood contribution.
            - ``kind`` : Kind of contribution (e.g choice or wage).
            - ``type`` and `log_type_probability``: Will be included in models with
            types.

    Returns
    -------
    criterion_function : :func:`log_like`
        Criterion function where all arguments except the parameter vector are set.

    Raises
    ------
    AssertionError
        If data has not the expected format.

    Examples
    --------
    >>> import respy as rp
    >>> params, options, data = rp.get_example_model("robinson_crusoe_basic")

    At default the function returns the log likelihood as a scalar value.

    >>> log_like = rp.get_log_like_func(params=params, options=options, df=data)
    >>> scalar = log_like(params)

    Alternatively, a dictionary containing the log likelihood, as well as
    log likelihood contributions and a :class:`pandas.DataFrame` can be returned.

    >>> log_like = rp.get_log_like_func(params=params, options=options, df=data,
    ...     return_scalar=False
    ... )
    >>> outputs = log_like(params)
    >>> outputs.keys()
    dict_keys(['value', 'contributions', 'comparison_plot_data'])
    """
    optim_paras, options = process_params_and_options(params, options)

    optim_paras = _update_optim_paras_with_initial_experience_levels(
        optim_paras, df)

    check_estimation_data(df, optim_paras)

    solve = get_solve_func(params, options)
    state_space = solve.keywords["state_space"]

    df, type_covariates = _process_estimation_data(df, state_space,
                                                   optim_paras, options)

    # Replace with decorator.
    base_draws_est = {}
    for dense_key, indices in df.groupby("dense_key").groups.items():
        n_choices = sum(state_space.dense_key_to_choice_set[dense_key])
        draws = create_base_draws(
            (len(indices), options["estimation_draws"], n_choices),
            next(options["estimation_seed_startup"]),
            options["monte_carlo_sequence"],
        )
        base_draws_est[dense_key] = draws

    criterion_function = partial(
        log_like,
        df=df,
        base_draws_est=base_draws_est,
        solve=solve,
        type_covariates=type_covariates,
        options=options,
        return_scalar=return_scalar,
    )

    return criterion_function

Esempio n. 3

0

Mostra file

def get_log_like_func(params,
                      options,
                      df,
                      return_scalar=True,
                      return_comparison_plot_data=False):
    """Get the criterion function for maximum likelihood estimation.

    Return a version of the likelihood functions in respy where all arguments
    except the parameter vector are fixed with :func:`functools.partial`. Thus the
    function can be directly passed into an optimizer or a function for taking
    numerical derivatives.

    Parameters
    ----------
    params : pandas.DataFrame
        DataFrame containing model parameters.
    options : dict
        Dictionary containing model options.
    df : pandas.DataFrame
        The model is fit to this dataset.
    return_scalar : bool, default False
        Indicator for whether the mean log likelihood should be returned or the log
        likelihood contributions.
    return_comparison_plot_data : bool, default False
        Indicator for whether a :class:`pandas.DataFrame` with various contributions for
        the visualization with estimagic should be returned.

    Returns
    -------
    criterion_function : :func:`log_like`
        Criterion function where all arguments except the parameter vector are set.

    Raises
    ------
    AssertionError
        If data has not the expected format.

    Examples
    --------
    >>> import respy as rp
    >>> params, options, data = rp.get_example_model("robinson_crusoe_basic")

    At default the function returns the log likelihood as a scalar value.

    >>> log_like = rp.get_log_like_func(params=params, options=options, df=data)
    >>> scalar = log_like(params)

    Additionally, a :class:`pandas.DataFrame` with data for visualization can be
    returned.

    >>> log_like = rp.get_log_like_func(params=params, options=options, df=data,
    ...     return_comparison_plot_data=True
    ... )
    >>> scalar, df = log_like(params)

    In alternative to the log likelihood, a :class:`numpy.array` of the individual
    log likelihood contributions can be returned.

    >>> log_like_contribs = rp.get_log_like_func(params=params, options=options,
    ...     df=data, return_scalar=False
    ... )
    >>> array = log_like_contribs(params)
    """
    optim_paras, options = process_params_and_options(params, options)

    optim_paras = _update_optim_paras_with_initial_experience_levels(
        optim_paras, df)

    check_estimation_data(df, optim_paras)

    solve = get_solve_func(params, options)
    state_space = solve.keywords["state_space"]

    df, type_covariates = _process_estimation_data(df, state_space,
                                                   optim_paras, options)

    # Replace with decorator.
    base_draws_est = {}
    for dense_key, indices in df.groupby("dense_key").groups.items():
        n_choices = sum(state_space.dense_key_to_choice_set[dense_key])
        draws = create_base_draws(
            (len(indices), options["estimation_draws"], n_choices),
            next(options["estimation_seed_startup"]),
            options["monte_carlo_sequence"],
        )
        base_draws_est[dense_key] = draws

    criterion_function = partial(
        log_like,
        df=df,
        base_draws_est=base_draws_est,
        solve=solve,
        type_covariates=type_covariates,
        options=options,
        return_scalar=return_scalar,
        return_comparison_plot_data=return_comparison_plot_data,
    )

    return criterion_function

Esempio n. 4

0

Mostra file

def get_crit_func(params, options, df, version="log_like"):
    """Get the criterion function.

    Return a version of the likelihood functions in respy where all arguments
    except the parameter vector are fixed with :func:`functools.partial`. Thus the
    function can be directly passed into an optimizer or a function for taking
    numerical derivatives.

    By default we return :func:`log_like`. Other versions can be requested via the
    version argument.

    Parameters
    ----------
    params : pandas.DataFrame
        DataFrame containing model parameters.
    options : dict
        Dictionary containing model options.
    df : pandas.DataFrame
        The model is fit to this dataset.
    version : str, default "log_like"
        Can take the values "log_like" and "log_like_obs".

    Returns
    -------
    criterion_function : :func:`log_like`
        Criterion function where all arguments except the parameter vector are set.

    Raises
    ------
    AssertionError
        If data has not the expected format.

    """
    optim_paras, options = process_params_and_options(params, options)

    optim_paras = _adjust_optim_paras_for_estimation(optim_paras, df)

    check_estimation_data(df, optim_paras)

    state_space = StateSpace(optim_paras, options)

    (
        choices,
        idx_indiv_first_obs,
        indices,
        log_wages_observed,
        type_covariates,
    ) = _process_estimation_data(df, state_space, optim_paras, options)

    base_draws_est = create_base_draws(
        (len(choices), options["estimation_draws"], len(
            optim_paras["choices"])),
        next(options["estimation_seed_startup"]),
        options["monte_carlo_sequence"],
    )

    if version == "log_like":
        unpartialed = log_like
    elif version == "log_like_obs":
        unpartialed = log_like_obs
    else:
        raise ValueError("version has to be 'log_like' or 'log_like_obs'.")

    criterion_function = partial(
        unpartialed,
        choices=choices,
        idx_indiv_first_obs=idx_indiv_first_obs,
        indices=indices,
        log_wages_observed=log_wages_observed,
        base_draws_est=base_draws_est,
        state_space=state_space,
        type_covariates=type_covariates,
        options=options,
    )

    # this will be relevant for estimagic topography plots
    criterion_function.__name__ = version
    return criterion_function