Beispiel #1
0
def test_type_proportions(model, type_proportions):
    nine_years_or_less = type_proportions[0]
    ten_years_or_more = type_proportions[1]

    params, options = rp.get_example_model(model, with_data=False)

    options["n_periods"] = 1
    options["simulated_agents"] = 10_000

    simulate = rp.get_simulate_func(params, options)

    df = simulate(params)

    np.testing.assert_allclose(
        df.loc[df.Experience_School.le(9),
               "Type"].value_counts(normalize=True, sort=False).sort_index(),
        nine_years_or_less,
        atol=0.05,
    )

    np.testing.assert_allclose(
        df.loc[df.Experience_School.ge(10),
               "Type"].value_counts(normalize=True, sort=False).sort_index(),
        ten_years_or_more,
        atol=0.05,
    )
Beispiel #2
0
def test_data_variables(model):
    """Value function components in df add up to internally computed values."""
    _, _, df = rp.get_example_model(model)

    for choice in df.Choice.unique():
        choice = choice.capitalize()
        # Shocks in working choices are already included in the wage.
        df["Shock_Nonpec"] = np.where(df[f"Wage_{choice}"].isna(),
                                      df[f"Shock_Reward_{choice}"], 0)
        df[f"Flow_Utility_{choice}_"] = (df[f"Wage_{choice}"].fillna(0) +
                                         df[f"Nonpecuniary_Reward_{choice}"] +
                                         df["Shock_Nonpec"])
        df[f"Value_Function_{choice}_"] = (
            df[f"Flow_Utility_{choice}_"] +
            df["Discount_Rate"] * df[f"Continuation_Value_{choice}"])

        pd.testing.assert_series_equal(
            df[f"Flow_Utility_{choice}_"],
            df[f"Flow_Utility_{choice}"],
            check_names=False,
        )
        pd.testing.assert_series_equal(
            df[f"Value_Function_{choice}_"],
            df[f"Value_Function_{choice}"],
            check_names=False,
        )
Beispiel #3
0
def main():
    """Evaluate the criterion function multiple times for a scalability report.

    The criterion function is evaluated ``maxfun``-times. The number of threads used is
    limited by environment variables. **respy** has to be imported after the environment
    variables are set as Numpy, Numba and others load them at import time.

    """
    model = sys.argv[1]
    maxfun = int(sys.argv[2])
    n_threads = int(sys.argv[3])

    # Validate input.
    assert maxfun >= 0, "Maximum number of function evaluations cannot be negative."
    assert n_threads >= 1 or n_threads == -1, (
        "Use -1 to impose no restrictions on maximum number of threads or choose a "
        "number higher than zero.")

    # Set number of threads
    os.environ["NUMBA_NUM_THREADS"] = f"{n_threads}"
    os.environ["MKL_NUM_THREADS"] = f"{n_threads}"
    os.environ["OMP_NUM_THREADS"] = f"{n_threads}"
    os.environ["NUMEXPR_NUM_THREADS"] = f"{n_threads}"

    # Late import of respy to ensure that environment variables are read by Numpy, etc..
    import respy as rp

    # Get model
    params, options = rp.get_example_model(model, with_data=False)

    # Simulate the data
    simulate = rp.get_simulate_func(params, options)
    df = simulate(params)

    # Get the criterion function and the parameter vector.
    crit_func = rp.get_log_like_func(params, options, df)

    # Run the estimation
    start = dt.datetime.now()

    for _ in range(maxfun):
        crit_func(params)

    end = dt.datetime.now()

    # Aggregate information
    output = {
        "model": model,
        "maxfun": maxfun,
        "n_threads": n_threads,
        "start": str(start),
        "end": str(end),
        "duration": str(end - start),
    }

    # Save time to file
    with open("scalability_results.txt", "a+") as file:
        file.write(json.dumps(output))
        file.write("\n")
Beispiel #4
0
def test_table_6_exact_solution_row_mean_and_sd(model, subsidy):
    """Replicate the first two rows of Table 6 in Keane and Wolpin (1994).

    In more detail, the mean effects and the standard deviations of a 500, 1000, and
    2000 dollar tuition subsidy on years of schooling and of experience in occupation a
    and occupation b based on 40 samples of 100 individuals using true parameters are
    tested.

    """
    params, options = rp.get_example_model(model, with_data=False)
    options["simulation_agents"] = 4000
    simulate = rp.get_simulate_func(params, options)

    df_wo_ts = simulate(params)

    params.loc[("nonpec_edu", "at_least_twelve_exp_edu"), "value"] += subsidy
    df_w_ts = simulate(params)

    columns = [
        "Bootstrap_Sample", "Experience_Edu", "Experience_A", "Experience_B"
    ]

    # Calculate the statistics based on 40 bootstrap samples á 100 individuals.
    # Assign bootstrap sample number.
    for df in [df_wo_ts, df_w_ts]:
        df["Bootstrap_Sample"] = pd.cut(df.index.get_level_values(0),
                                        bins=40,
                                        labels=np.arange(1, 41))

    # Calculate mean experiences.
    mean_exp_wo_ts = (df_wo_ts.query("Period == 39")[columns].groupby(
        "Bootstrap_Sample").mean())
    mean_exp_w_ts = (df_w_ts.query("Period == 39")[columns].groupby(
        "Bootstrap_Sample").mean())

    # Calculate bootstrap statistics.
    diff = (mean_exp_w_ts.subtract(mean_exp_wo_ts).assign(
        Data=model).reset_index().set_index(["Data", "Bootstrap_Sample"
                                             ]).stack().unstack([0, 2]))

    rp_replication = diff.agg(["mean", "std"])

    # Expected values are taken from Table 6 in the paper.
    kw_94_table_6 = pd.read_csv(TEST_RESOURCES_DIR / "kw_94_table_6.csv",
                                index_col=0,
                                header=[0, 1],
                                nrows=2)

    # Test that standard deviations are very close.
    np.testing.assert_allclose(rp_replication[model].iloc[1],
                               kw_94_table_6[model].iloc[1],
                               atol=0.05)

    # Test that difference lies within one standard deviation.
    diff = (rp_replication[model].iloc[0].to_numpy() -
            kw_94_table_6[model].iloc[0].to_numpy())
    assert (np.abs(diff) < kw_94_table_6[model].iloc[1]).all()
Beispiel #5
0
def process_model_or_seed(model_or_seed, **kwargs):
    if isinstance(model_or_seed, str):
        params, options = rp.get_example_model(model_or_seed, with_data=False)
    else:
        np.random.seed(model_or_seed)
        params, options = generate_random_model(**kwargs)

    if "kw_97" in str(model_or_seed):
        options["n_periods"] = 10

    return params, options
Beispiel #6
0
def scaling_model_specification(base_model,
                                num_periods=None,
                                add_occ=None,
                                add_types=None):
    params, options = rp.get_example_model(base_model, with_data=False)

    if num_periods is not None:
        options = _modify_periods(options, num_periods)

    if add_occ is not None:
        params, options = _add_occupations(params, options, add_occ)

    if add_types is not None:
        params = _add_types(params, add_types)

    return params, options
Beispiel #7
0
def test_dense_period_choice():
    params, options = rp.get_example_model("kw_94_one", with_data=False)
    options["negative_choice_set"] = {}
    options["negative_choice_set"]["b"] = ["period < 5"]

    optim_paras, options = process_params_and_options(params, options)
    state_space = create_state_space_class(optim_paras, options)

    check = _create_dense_period_choice(state_space.core, state_space.dense,
                                        state_space.core_key_to_core_indices,
                                        state_space.core_key_to_complex,
                                        optim_paras, options)

    for key in check:
        if key[0] < 5:
            assert ~key[1][1]
def get_quantity_of_interest(sample):

    # We need the baseline options and a grid for the indices. It does not matter which of the
    # three KW94 specifications we use here.
    base_params, base_options = rp.get_example_model("kw_94_one",
                                                     with_data=False)
    index = pd.read_csv(f"{INPUT_DIR}/table41_kw_94.csv",
                        sep=",")["parameter"].values

    sample = pd.Series(data=sample, index=index)
    param_sample = transform_params_kw94_respy(sample)
    param_sample = pd.DataFrame(param_sample, columns=["value"])

    policy_edu, _ = model_wrapper_kw_94(param_sample, base_options, 500.0)
    base_edu, _ = model_wrapper_kw_94(param_sample, base_options, 0.0)

    return policy_edu - base_edu
Beispiel #9
0
def process_model_or_seed(model_or_seed=None, **kwargs):
    if isinstance(model_or_seed, str):
        params, options = rp.get_example_model(model_or_seed, with_data=False)
    elif isinstance(model_or_seed, int):
        np.random.seed(model_or_seed)
        params, options = generate_random_model(**kwargs)
    else:
        raise ValueError

    if "kw_94" in str(model_or_seed):
        options["n_periods"] = 10
    if "kw_97" in str(model_or_seed):
        options["n_periods"] = 5
    elif "kw_2000" in str(model_or_seed):
        options["n_periods"] = 3

    return params, options
def test_transform_datasets():
    """ Test whether the transformations work for the baseline parameterization.
    """
    for count, dataset in enumerate(["one", "two"]):
        par_name = f"kw_94_{dataset}"
        csv_name = f"{INPUT_DIR}/table4{count + 1}_kw_94.csv"

        par_respy, _ = rp.get_example_model(par_name, with_data=False)
        par_respy = par_respy["value"].to_numpy()

        df = pd.read_csv(csv_name, sep=",")
        par_uq = pd.Series(data=df["true"].values, index=df["parameter"].values)
        par_uq = transform_params_kw94_respy(par_uq).to_numpy()

        # TODO: For some reason this test fails for the third dataset. This needs to be further
        #  investigated later.
        if dataset != 'three':
            np.testing.assert_almost_equal(par_respy, par_uq)
def run(args):

    # We need to take stock for baseline parameters and store them for future processing.
    base_params, base_options = rp.get_example_model("kw_94_one", with_data=False)
    policy_edu, _ = model_wrapper_kw_94(base_params, base_options, 500)
    base_edu, _ = model_wrapper_kw_94(base_params, base_options, 0)
    base_quantity = policy_edu - base_edu

    base_quantity = pd.DataFrame(base_quantity, columns=['avg_schooling'], index=[0])
    base_quantity.to_pickle(RSLT_DIR / "base_quantity.uq.pkl")
    base_params.to_pickle(RSLT_DIR / "base_params.uq.pkl")

    # We need to set up the covariance matrix and the estimated parameters from the paper.
    df = pd.read_csv(f"{INPUT_DIR}/table41_kw_94.csv", sep=",")
    mean, cov = df["true"].values, np.diag((df["sd"] ** 2).values)

    # We are ready to draw the random points of evaluation.
    np.random.seed(args.seed)
    distribution = cp.MvNormal(loc=mean, scale=cov)

    samples = list()
    for _ in range(args.num_draws):
        samples.append(distribution.sample())

    quantities = mp.Pool(args.num_procs).map(get_quantity_of_interest, samples)

    # We now store the random parameters and the quantity of interest for further processing.
    index = pd.read_csv(f"{INPUT_DIR}/table41_kw_94.csv", sep=",")["parameter"].values

    params = list()
    for sample in samples:
        sample = pd.Series(data=sample, index=index)

        param_sample = pd.DataFrame(transform_params_kw94_respy(sample), columns=["value"])
        params.append(param_sample)
    mc_params = pd.concat(params, keys=range(args.num_draws), names=['iteration'])

    mc_quantities = pd.DataFrame(quantities, columns=['avg_schooling'], index=range(args.num_draws))
    mc_quantities.index.name = 'iteration'

    mc_quantities.to_pickle("mc_quantity.uq.pkl")
    mc_params.to_pickle("mc_params.uq.pkl")
Beispiel #12
0
def process_model_or_seed(model_or_seed=None, **kwargs):
    if isinstance(model_or_seed, str):
        params, options = rp.get_example_model(model_or_seed, with_data=False)
    elif isinstance(model_or_seed, int):
        np.random.seed(model_or_seed)
        params, options = generate_random_model(**kwargs)
    else:
        raise ValueError

    if "kw_94" in str(model_or_seed):
        options["n_periods"] = 10
    elif "kw_97" in str(model_or_seed):
        options["n_periods"] = 5
    elif "kw_2000" in str(model_or_seed):
        options["n_periods"] = 3
    elif "robinson_crusoe_extended" in str(model_or_seed):
        options["n_periods"] = 5
    elif "robinson_crusoe_with_observed_characteristics" in str(model_or_seed):
        options["n_periods"] = 5

    return params, options
Beispiel #13
0
def test_distribution_of_lagged_choices():
    params, options, actual_df = rp.get_example_model("kw_97_extended")

    options["n_periods"] = 1
    options["simulated_agents"] = 10_000

    simulate = rp.get_simulate_func(params, options)
    df = simulate(params)

    actual_df = actual_df.query("Period == 0")
    expected = pd.crosstab(actual_df.Lagged_Choice_1,
                           actual_df.Experience_School,
                           normalize="columns")

    df = df.query("Period == 0")
    calculated = pd.crosstab(df.Lagged_Choice_1,
                             df.Experience_School,
                             normalize="columns")

    # Allow for 4% differences which likely for small subsets.
    np.testing.assert_allclose(expected, calculated, atol=0.04)
def generate_data(model, present_bias=1):
    """Generate and save simulated data from specified model, with specified
    present-bias parameter.

    Parameters
    ----------
    model: string
        "kw_94_one", "kw_94_two", "kw_94_three" according to the desired
        Keane and Wolpin (1994) specification.
    present bias: float
        1 for exponential discounting, < 1 for hyperbolic discounting.

    """
    params, options = rp.get_example_model(model, with_data=False)

    params.loc[("beta", "beta"), ["value", "comment"]] = [
        present_bias,
        "present-bias parameter",
    ]

    simulation_seeds = np.linspace(0, 99, 100)
    solution_seeds = np.linspace(1000, 1099, 100)

    # Generate datasets
    for simulation, solution in zip(simulation_seeds, solution_seeds):
        options["simulation_seed"] = int(simulation)
        options["solution_seed"] = int(solution)
        simulate = rp.get_simulate_func(params, options)
        df = simulate(params)

        # Save datasets (require paths to exist)
        if present_bias == 1:
            df.to_pickle(
                f"respy_datasets/exp_datasets/{model}/seed_sim_{str(int(simulation))}_sol_seed_{str(int(solution))}.pickle"
            )
        else:
            df.to_pickle(
                f"respy_datasets/hyp_datasets/{model}/seed_sim_{str(int(simulation))}_sol_seed_{str(int(solution))}.pickle"
            )
Beispiel #15
0
def test_replication_of_choice_probabilities(model, table):
    """Replicate choice probabilities in Tables 2.1-2.3. in Keane and Wolpin (1994b).

    For each of the three parameterizations a data set is simulated and the choice
    probabilities for each period are compared to the numbers in the paper.

    """
    # Get choice probabilities from paper.
    expected = pd.read_csv(TEST_RESOURCES_DIR / table, index_col="period")

    # Simulate data for choice probabilities with more individuals to stabilize choice
    # probabilities. Also, more draws in the solution for better approximation of EMAX.
    params, options = rp.get_example_model(model, with_data=False)
    options["simulated_agents"] = 10_000

    simulate = rp.get_simulate_func(params, options)
    df = simulate(params)

    result = (df.groupby("Period").Choice.value_counts(
        normalize=True).unstack().fillna(0))

    np.testing.assert_allclose(expected, result, atol=0.1)
Beispiel #16
0
def task_get_history_delta_wage_moments(produces):
    np.random.seed(123)
    params, options, data_stored = rp.get_example_model("kw_94_one")
    params.loc[("delta", "delta")]

    model_to_simulate = rp.get_simulate_func(params, options)
    parameter_true = {"delta_delta": 0.95}

    pseudo_observed_data = compute_model(
        parameter_true,
        model_to_simulate=model_to_simulate,
        parameter_for_simulation=params,
        options_for_simulation=options,
        descriptives="wage_moments",
    )

    population_size = 500
    max_nr_populations = 10
    minimum_epsilon = 0.05
    delta_prior_low = 0.9
    delta_prior_length = 0.09
    parameters_prior = {
        "delta_delta": [[delta_prior_low, delta_prior_length], "uniform"]
    }

    history = respyabc(
        model=compute_model,
        parameters_prior=parameters_prior,
        data=pseudo_observed_data,
        distance_abc=compute_mean_squared_distance,
        descriptives="wage_moments",
        population_size_abc=population_size,
        max_nr_populations_abc=max_nr_populations,
        minimum_epsilon_abc=minimum_epsilon,
    )
    with open(produces, "wb") as out_file:
        pickle.dump(history, out_file)
Beispiel #17
0
    df_occ = df_occ[~cond]
    df_occ = df_occ.unstack()
    return df_occ


def calc_wage_distribution_overall(df):
    """Compute choice frequencies."""
    df_ove = df.groupby(["Period"])["Wage"].describe()[["mean", "std"]]
    df_ove["Choice"] = "all"
    df_ove.set_index(["Choice"], append=True, inplace=True)
    df_ove = df_ove.reorder_levels(["Period", "Choice"])
    df_ove = df_ove.unstack()
    return df_ove


params, options, df_emp = rp.get_example_model("kw_97_extended_respy")

# We want to reduce the computational burden for debugging purposes and our continuous
# integration pipeline.
if IS_DEBUG:
    options["n_periods"] = 12

simulate_func = rp.get_simulate_func(params, options)
df_sim = simulate_func(params)

df_descriptives = None

for label, df in [("empirical", df_emp), ("simulated", df_sim)]:

    df_occ = calc_wage_distribution_occupation(df)
    df_ove = calc_wage_distribution_overall(df)
Beispiel #18
0
def test_table_6_exact_solution_row_mean_and_sd():
    """Replicate the first two rows of Table 6 in Keane and Wolpin (1994).

    In more detail, the mean effects and the standard deviations of a 500, 1000, and
    2000 dollar tuition subsidy on years of schooling and of experience in occupation a
    and occupation b based on 40 samples of 100 individuals using true parameters are
    tested.

    """
    # Specify the three different data sets.
    models = np.repeat(["one", "two", "three"], 2)
    tuition_subsidies = [0, 500, 0, 1000, 0, 2000]

    # Generate the 3 * 2 data sets as list of DataFrames by simulating with respective
    # tuition subsidy.
    data_frames = []
    for model, subsidy in zip(models, tuition_subsidies):
        params, options = rp.get_example_model(f"kw_94_{model}",
                                               with_data=False)
        options["simulation_agents"] = 4000
        simulate = rp.get_simulate_func(params, options)
        params.loc[("nonpec_edu", "at_least_twelve_exp_edu"),
                   "value"] += subsidy
        data_frames.append(simulate(params))

    columns = [
        "Bootstrap_Sample", "Experience_Edu", "Experience_A", "Experience_B"
    ]

    # Calculate the statistics based on 40 bootstrap samples á 100 individuals.
    bootstrapped_statistics = []
    for i, title in zip(range(0, 6, 2),
                        ["kw_94_one", "kw_94_two", "kw_94_three"]):
        # Select sample with and without tuition subsidy.
        df_wo_ts = data_frames[i]
        df_w_ts = data_frames[i + 1]

        # Assign bootstrap sample number.
        df_wo_ts["Bootstrap_Sample"] = pd.cut(df_wo_ts.Identifier,
                                              bins=40,
                                              labels=np.arange(1, 41))
        df_w_ts["Bootstrap_Sample"] = pd.cut(df_w_ts.Identifier,
                                             bins=40,
                                             labels=np.arange(1, 41))

        # Calculate mean experiences.
        mean_exp_wo_ts = (
            df_wo_ts.loc[df_wo_ts.Period.eq(39),
                         columns].groupby("Bootstrap_Sample").mean())
        mean_exp_w_ts = (
            df_w_ts.loc[df_w_ts.Period.eq(39),
                        columns].groupby("Bootstrap_Sample").mean())

        # Calculate bootstrap statistics.
        diff = (mean_exp_w_ts.subtract(mean_exp_wo_ts).assign(
            Data=title).reset_index().set_index(["Data", "Bootstrap_Sample"
                                                 ]).stack().unstack([0, 2]))
        bootstrapped_statistics.append(diff)

    rp_replication = pd.concat(
        [bs.agg(["mean", "std"]) for bs in bootstrapped_statistics], axis=1)

    # Expected values are taken from csv of table 6.
    kw_94_table_6 = pd.read_csv(TEST_RESOURCES_DIR / "kw_94_table_6.csv",
                                index_col=0,
                                header=[0, 1],
                                nrows=2)

    # Test that standard deviations are very close.
    np.testing.assert_allclose(rp_replication.iloc[1],
                               kw_94_table_6.iloc[1],
                               atol=0.05)

    # Test that difference lies within one standard deviation.
    diff = rp_replication.iloc[0].to_numpy() - kw_94_table_6.iloc[0].to_numpy()
    assert (np.abs(diff) < kw_94_table_6.iloc[1]).all()
Beispiel #19
0
import respy as rp

# We create a grid for the KW94 additions.
params, options, data = rp.get_example_model("kw_94_one")
occ_grid_kw94 = params.loc[(f"wage_a", slice(None)), :].copy()

occ_grid_kw94.reset_index(inplace=True)
occ_grid_kw94["category"] = "wage_aa"
occ_grid_kw94.set_index(["category", "name"], inplace=True)
occ_grid_kw94.to_pickle("occ_grid_kw_94.pkl")

# We create a grid for the KW97 additions.
params, options, data = rp.get_example_model("kw_97_extended")
labels = list()
labels += [
    "constant", "exp_school", "exp_white_collar", "exp_white_collar_square"
]
labels += ["exp_blue_collar", "exp_blue_collar_squared", "exp_military"]
labels += ["type_1", "type_2", "type_3"]
occ_grid_kw97 = params.loc[(f"wage_white_collar", labels), :].copy()

occ_grid_kw97.reset_index(inplace=True)
occ_grid_kw97.loc[:, "category"] = "wage_aa"
occ_grid_kw97.set_index(["category", "name"], inplace=True)
occ_grid_kw97.to_pickle("occ_grid_kw_97.pkl")
        # extract choices
        choices = df.groupby("Period").Choice.value_counts(
            normalize=True).unstack()

        # extract wages (mean and std)
        wages = df[col_to_keep].groupby("Period").describe().loc[:, (
            slice(None), ["mean", "std"])]
        res = pd.concat([wages, choices], axis=1)

    return res


if __name__ == "__main__":

    # load params
    params, options = rp.get_example_model("kw_94_three", with_data=False)
    options["simulation_agents"] = 10_000

    params_dict = {
        "true": {
            "delta": 0.95,
            "beta": 0.8
        },
        "miss_exp": {
            "delta": 0.938,
            "beta": 1
        },
        "miss_1": {
            "delta": 0.948,
            "beta": 0.83
        },
import os

import pandas as pd
import respy as rp


params, options, df_obs = rp.get_example_model("kw_97_basic")
params = pd.read_pickle("params_revised.pkl")
label = ("nonpec_school", "hs_graduate")

# We need to save on memory when running the script under CI.
if "CI" in os.environ:
    params, options, df_obs = rp.get_example_model("kw_94_one")
    label = ("nonpec_edu", "at_least_twelve_exp_edu")

simulate_func = rp.get_simulate_func(params, options)
df_sim = simulate_func(params)
df_sim.to_pickle("df_sim.pkl")

params_pol = params.copy()
params_pol.loc[label, "value"] += 2000
df_pol = simulate_func(params_pol)
df_pol.to_pickle("df_pol.pkl")
"""Auxiliary code for bootsrap."""
import respy as rp
from calibration_maximum_likelihood import run_bootstrap

NUM_BOOTS = 1000

# Get the basic model setup
params_base, options_base, df = rp.get_example_model("robinson",
                                                     with_data=True)

params_base["lower"] = [0.9, 0.00, -0.20, 1.00, 0.0050, 0.001, -0.2]
params_base["upper"] = [1.0, 0.10, 0.00, 1.10, 0.0150, 0.030, +0.2]

# We will use estimagic and fix all parameters at their true values.
constr_base = [
    {
        "loc": ("shocks_sdcorr", "sd_fishing"),
        "type": "fixed"
    },
    {
        "loc": ("shocks_sdcorr", "sd_hammock"),
        "type": "fixed"
    },
    {
        "loc": ("shocks_sdcorr", "corr_hammock_fishing"),
        "type": "fixed"
    },
    {
        "loc": "wage_fishing",
        "type": "fixed"
    },
Beispiel #23
0
def test_n_step_ahead_simulation_with_data(model):
    params, options, df = rp.get_example_model(model)
    options["n_periods"] = 11
    simulate = rp.get_simulate_func(params, options, "n_step_ahead_with_data",
                                    df)
    _ = simulate(params)
Beispiel #24
0
def test_one_step_ahead_simulation(model):
    params, options, df = rp.get_example_model(model)
    options["n_periods"] = 11
    simulate = rp.get_simulate_func(params, options, "one_step_ahead", df)
    _ = simulate(params)
def transform_params_kw94_respy(kw94_params):

    assert len(kw94_params) == 26, "Length of KW94 vector must be 26."

    params, _ = rp.get_example_model("kw_94_one", with_data=False)

    rp_params = pd.Series(data=np.full(len(params["value"].values), np.nan),
                          index=params.index)

    # Copy values that are not in KW94 from respy paramters.
    rp_params[("delta", "delta")] = params.loc[("delta", "delta"), "value"]
    rp_params[("meas_error", "sd_a")] = params.loc[("meas_error", "sd_a"),
                                                   "value"]
    rp_params[("meas_error", "sd_b")] = params.loc[("meas_error", "sd_b"),
                                                   "value"]
    rp_params[("lagged_choice_1_edu",
               "edu_ten")] = params.loc[("lagged_choice_1_edu", "edu_ten"),
                                        "value"]
    rp_params[("initial_exp_edu", "10")] = params.loc[("initial_exp_edu",
                                                       "10"), "value"]
    rp_params[("maximum_exp", "edu")] = params.loc[("maximum_exp", "edu"),
                                                   "value"]

    # Set values that are transformed with *(-1) by respy
    # square experiences alphas
    rp_params[("wage_a", "exp_a_square")] = -kw94_params["alpha13"]
    rp_params[("wage_a", "exp_b_square")] = -kw94_params["alpha15"]
    rp_params[("wage_b", "exp_b_square")] = -kw94_params["alpha23"]
    rp_params[("wage_b", "exp_a_square")] = -kw94_params["alpha25"]
    # betas
    rp_params[("nonpec_edu",
               "at_least_twelve_exp_edu")] = -kw94_params["beta1"]
    rp_params[("nonpec_edu", "not_edu_last_period")] = -kw94_params["beta2"]

    # Set SDs and Corrs that are Cholesky elements in KW94.
    chol = np.zeros((4, 4))
    np.fill_diagonal(chol, kw94_params[["a11", "a22", "a33", "a44"]])
    chol[1, 0] = kw94_params["a21"]
    chol[2, :2] = [kw94_params["a31"], kw94_params["a32"]]
    chol[3, :3] = [kw94_params["a41"], kw94_params["a42"], kw94_params["a43"]]

    cov = np.matmul(chol, chol.T)
    sd = np.sqrt(np.diag(cov))

    rp_params[("shocks_sdcorr", "sd_a")] = sd[0]
    rp_params[("shocks_sdcorr", "sd_b")] = sd[1]
    rp_params[("shocks_sdcorr", "sd_edu")] = sd[2]
    rp_params[("shocks_sdcorr", "sd_home")] = sd[3]
    rp_params[("shocks_sdcorr", "corr_b_a")] = cov[1, 0] / (sd[1] * sd[0])
    rp_params[("shocks_sdcorr", "corr_edu_a")] = cov[2, 0] / (sd[2] * sd[0])
    rp_params[("shocks_sdcorr", "corr_edu_b")] = cov[2, 1] / (sd[2] * sd[1])
    rp_params[("shocks_sdcorr", "corr_home_a")] = cov[3, 0] / (sd[3] * sd[0])
    rp_params[("shocks_sdcorr", "corr_home_b")] = cov[3, 1] / (sd[3] * sd[1])
    rp_params[("shocks_sdcorr", "corr_home_edu")] = cov[3, 2] / (sd[3] * sd[2])

    # Fill in KW94 paramters that are not transformed.
    # alphas
    rp_params[("wage_a", "constant")] = kw94_params["alpha10"]
    rp_params[("wage_a", "exp_edu")] = kw94_params["alpha11"]
    rp_params[("wage_a", "exp_a")] = kw94_params["alpha12"]
    rp_params[("wage_a", "exp_b")] = kw94_params["alpha14"]

    rp_params[("wage_b", "constant")] = kw94_params["alpha20"]
    rp_params[("wage_b", "exp_edu")] = kw94_params["alpha21"]
    # second number behind alpha switched compared to above
    rp_params[("wage_b", "exp_a")] = kw94_params["alpha24"]
    rp_params[("wage_b", "exp_b")] = kw94_params["alpha22"]

    # betas
    rp_params[("nonpec_edu", "constant")] = kw94_params["beta0"]

    # gamma
    rp_params[("nonpec_home", "constant")] = kw94_params["gamma0"]

    return rp_params
Beispiel #26
0
def test_one_step_ahead_simulation():
    params, options, df = rp.get_example_model("kw_97_basic")
    options["n_periods"] = 11
    simulate = rp.get_simulate_func(params, options, "one_step_ahead", df)
    df = simulate(params)
Beispiel #27
0
from auxiliary import TAGS

if __name__ == "__main__":
    comm = MPI.Comm.Get_parent()
    num_slaves, rank = comm.Get_size(), comm.Get_rank()
    status = MPI.Status()

    # We need some additional task-specific information.
    prob_info = comm.bcast(None)

    subdir = f"subdir_child_{rank}"
    os.mkdir(subdir)
    os.chdir(subdir)

    # We now set up the simulation function of  `respy` and receive some task-specific information.
    params, options, df = rp.get_example_model("kw_94_one")
    simulate = rp.get_simulate_func(params, options)

    rslt = list()
    while True:

        # Signal readiness
        comm.send(None, dest=0)

        # Receive instructions and act accordingly.
        comm.recv(status=status)
        tag = status.Get_tag()

        if tag == TAGS.EXIT:
            # We set up a container to store the results.
            df = pd.DataFrame(rslt, columns=["qoi", "delta", "exp_edu"])
import respy as rp
import sys
import pandas as pd
import os

if __name__ == '__main__':
    if not os.path.exists('simulated_data'):
        os.mkdir('simulated_data')
    model_name = sys.argv[1]

    params, options, _ = rp.get_example_model(model_name)
    options['solution_draws'] = 250
    options['simulation_agents'] = 750
    state_space, data = rp.simulate(params, options)
    pd.to_pickle(data, os.path.join("simulated_data", f"{model_name}.pickle"))
Beispiel #29
0
import pandas as pd
import respy as rp

GRID_TAU = [0.01, 0.001, 0.0001, 0.00001, 0.000001, 0.0000001]
GRID_AGENTS = [100, 1000, 10000]
GRID_DRAWS = [100, 1000, 10000]

index = []
for num_agents in GRID_AGENTS:
    for num_draws in GRID_DRAWS:
        for tau in GRID_TAU:
            index.append((num_agents, num_draws, tau))
index = pd.MultiIndex.from_tuples(index, names=("agents", "draws", "tau"))
rslts = pd.DataFrame(index=index, columns=["delta"])

params_base, options_base = rp.get_example_model("robinson", False)
delta_true = params_base.loc[("delta", "delta"), "value"]

for num_agents in GRID_AGENTS:

    options = options_base.copy()

    options["estimation_draws"] = num_draws
    options["solution_draws"] = num_draws

    for num_draws in GRID_DRAWS:

        simulate = rp.get_simulate_func(params_base, options)
        df = simulate(params_base)

        for tau in GRID_TAU:
Beispiel #30
0
def main():
    """Run the estimation of a model using a number of threads and a maximum of function
    evaluations.

    Currently, we circumvent the optimization by setting maxfun to 0 and just looping
    over the estimation.

    """
    version = sys.argv[1]
    model = sys.argv[2]
    maxfun = int(sys.argv[3])
    num_procs = int(sys.argv[4])
    num_threads = int(sys.argv[5])

    # Test commandline input
    assert maxfun >= 0, "Maximum number of function evaluations cannot be negative."
    assert num_threads >= 1 or num_threads == -1, (
        "Use -1 to impose no restrictions on maximum number of threads or choose a "
        "number higher than zero.")

    # Set number of threads
    if not num_threads == -1 and version == "python":
        os.environ["NUMBA_NUM_THREADS"] = f"{num_threads}"
        os.environ["MKL_NUM_THREADS"] = f"{num_threads}"
        os.environ["OMP_NUM_THREADS"] = f"{num_threads}"
        os.environ["NUMEXPR_NUM_THREADS"] = f"{num_threads}"

    # Late import of respy to ensure that environment variables are read.
    from respy import RespyCls, get_example_model
    from respy.python.interface import respy_interface
    from respy.fortran.interface import resfort_interface

    # Get model
    options_spec, params_spec = get_example_model(model)

    # Adjust options
    options_spec["program"]["version"] = version
    options_spec["estimation"]["maxfun"] = 0
    if version == "fortran":
        options_spec["program"]["procs"] = num_procs
        options_spec["program"]["threads"] = num_threads

    # Go into temporary folder
    folder = f"__{num_threads}"
    if Path(folder).exists():
        shutil.rmtree(folder)

    Path(folder).mkdir()
    os.chdir(folder)

    # Initialize the class
    respy_obj = RespyCls(params_spec, options_spec)

    # Simulate the data
    state_space, simulated_data = respy_interface(respy_obj, "simulate")

    # Run the estimation
    print(
        f"Start. Program: {version}, Model: {model}, Maxfun: {maxfun}, Procs: "
        f"{num_procs}, Threads: {num_threads}.")
    start = dt.datetime.now()

    for _ in range(maxfun):
        if version == "python":
            respy_interface(respy_obj, "estimate", simulated_data)
        else:
            resfort_interface(respy_obj, "estimate", simulated_data)

    end = dt.datetime.now()

    print(f"End. Duration: {end - start} seconds.")

    # Aggregate information
    output = {
        "version": version,
        "model": model,
        "maxfun": maxfun,
        "num_procs": num_procs,
        "num_threads": num_threads,
        "start": str(start),
        "end": str(end),
        "duration": str(end - start),
    }

    # Step out of temp folder and delete it
    os.chdir("..")
    shutil.rmtree(folder)

    # Save time to file
    with open("scalability_results.txt", "a+") as file:
        file.write(json.dumps(output))
        file.write("\n")