Ejemplo n.º 1
0
def test6():
    """Additionally to test5 this test checks if the comparison file provides the
    expected output when maxiter is set to zero and the estimation process uses the
    initialization file values as start values.
    """
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["MAXITER"], constr[
            "AGENTS"] = False, 0, 15000
        constr["START"], constr["SAME_SIZE"] = "init", True
        dict_ = generate_random_dict(constr)
        dict_["DIST"]["params"][1], dict_["DIST"]["params"][5] = 0.0, 1.0
        print_dict(dict_)
        simulate("test.grmpy.yml")
        fit("test.grmpy.yml")
        dict_ = read_desc("comparison.grmpy.info")
        for section in ["ALL", "TREATED", "UNTREATED"]:
            np.testing.assert_equal(len(set(dict_[section]["Number"])), 1)
            np.testing.assert_almost_equal(
                dict_[section]["Observed Sample"],
                dict_[section]["Simulated Sample (finish)"],
                0.001,
            )
            np.testing.assert_array_almost_equal(
                dict_[section]["Simulated Sample (finish)"],
                dict_[section]["Simulated Sample (start)"],
                0.001,
            )
Ejemplo n.º 2
0
def test5():
    """The test checks if the estimation process works properly when maxiter is set to
    zero.
    """
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["MAXITER"] = False, 0
        generate_random_dict(constr)
        simulate("test.grmpy.yml")
        fit("test.grmpy.yml")
Ejemplo n.º 3
0
def test4():
    """The test checks if the estimation process works if the Powell algorithm is
    specified as the optimizer option.
    """
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["AGENTS"], constr["start"] = (
            False,
            10000,
            "init",
        )
        constr["optimizer"] = "SCIPY-Powell"
        generate_random_dict(constr)

        simulate("test.grmpy.yml")
        fit("test.grmpy.yml")
Ejemplo n.º 4
0
def test9():
    """This test ensures that the random initialization file generating process, the
    read in process and the simulation process works if the constraints function allows
    for different number of covariates for each treatment state and the occurence of
    cost-benefit shifters."""
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["AGENT"], constr["STATE_DIFF"] = (
            False,
            1000,
            True,
        )
        constr["OVERLAP"] = True
        generate_random_dict(constr)
        read("test.grmpy.yml")
        simulate("test.grmpy.yml")
        fit("test.grmpy.yml")

    cleanup()
Ejemplo n.º 5
0
def test1():
    """
    This module contains a simple test for the equality of the results of
    R's locpoly function and grmpy's locpoly function. Therefore,
    the mock data set from Carneiro et al (2011) is used.
    """
    init_dict = read(TEST_RESOURCES_DIR + "/replication_semipar.yml")
    init_dict["ESTIMATION"]["file"] = TEST_RESOURCES_DIR + "/aer-replication-mock.pkl"
    print_dict(init_dict, TEST_RESOURCES_DIR + "/replication_semipar")
    test_rslt = fit(TEST_RESOURCES_DIR + "/replication_semipar.grmpy.yml", semipar=True)

    expected_mte_u = pd.read_pickle(
        TEST_RESOURCES_DIR + "/replication-results-mte_u.pkl"
    )

    np.testing.assert_array_almost_equal(test_rslt["mte_u"], expected_mte_u, 6)
Ejemplo n.º 6
0
def test10():
    """This test checks if the refactor auxiliary function returns an unchanged init
    file if the maximum number of iterations is set to zero.
    """

    for _ in range(10):
        constr = dict()
        constr["DETERMINISTIC"], constr["AGENTS"] = False, 1000
        constr["MAXITER"], constr["START"] = 0, "init"
        generate_random_dict(constr)
        init_dict = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        start = start_values(init_dict, df, "init")
        start = backward_transformation(start)

        rslt = fit("test.grmpy.yml")

        np.testing.assert_equal(start, rslt["AUX"]["x_internal"])
Ejemplo n.º 7
0
def test10():
    """This test checks if the refactor auxiliary function returns an unchanged init
    file if the maximum number of iterations is set to zero.
    """

    for _ in range(10):
        constr = dict()
        constr["DETERMINISTIC"], constr["AGENTS"] = False, 1000
        constr["MAXITER"], constr["START"], constr[
            "OPTIMIZER"] = 0, "init", "BFGS"
        generate_random_dict(constr)
        dict_ = read("test.grmpy.yml")
        df = simulate("test.grmpy.yml")
        D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, dict_)
        start = start_values(dict_, D, X1, X0, Z1, Z0, Y1, Y0, "init")
        start = backward_transformation(start)

        rslt = fit("test.grmpy.yml")

        np.testing.assert_equal(start, rslt["opt_rslt"]["params"].values)
Ejemplo n.º 8
0
def monte_carlo(file, grid_points):
    """This function estimates the ATE for a sample with different correlation structures between U1
    and V. Two different strategies for (OLS,LATE) are implemented.
    """

    # Define a dictionary with a key for each estimation strategy
    effects = {}
    for key_ in ["grmpy", "ols", "true"]:
        effects[key_] = []

    # Loop over different correlations between V and U_1
    for rho in np.linspace(0.00, 0.99, grid_points):

        # Readjust the initialization file values to add correlation
        model_spec = read(file)
        sim_spec = read("reliability.grmpy.yml")
        X = sim_spec["TREATED"]["order"]
        update_correlation_structure(model_spec, rho)

        # Simulate a Data set and specify exogeneous and endogeneous variables
        df_mc = create_data()
        endog, exog, exog_ols = df_mc["wage"], df_mc[X], df_mc[["state"] + X]

        # Calculate true average treatment effect
        ATE = np.mean(df_mc["wage1"] - df_mc["wage0"])
        effects["true"] += [ATE]

        # Estimate  via grmpy
        rslt = fit("reliability.grmpy.yml")
        beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"]
        stat = np.dot(np.mean(exog), beta_diff)

        effects["grmpy"] += [stat]

        # Estimate via OLS
        ols = sm.OLS(endog, exog_ols).fit()
        stat = ols.params[0]

        effects["ols"] += [stat]

    return effects
Ejemplo n.º 9
0
def test_replication_carneiro():
    """
    This function checks the equality of the results of
    R's locpoly function and grmpy's locpoly function. The mock data set
    from Carneiro et al (2011) is used and both the mte_u and the final
    mte are compared.
    """
    init_dict = read(TEST_RESOURCES_DIR + "/replication_semipar.yml")
    init_dict["ESTIMATION"][
        "file"] = TEST_RESOURCES_DIR + "/aer-replication-mock.pkl"
    print_dict(init_dict, TEST_RESOURCES_DIR + "/replication_semipar")
    test_rslt = fit(TEST_RESOURCES_DIR + "/replication_semipar.grmpy.yml",
                    semipar=True)

    expected_mte_u = pd.read_pickle(TEST_RESOURCES_DIR +
                                    "/replication-results-mte_u.pkl")
    expected_mte = pd.read_pickle(TEST_RESOURCES_DIR +
                                  "/replication-results-mte.pkl")

    np.testing.assert_array_almost_equal(test_rslt["mte_u"], expected_mte_u, 6)
    np.testing.assert_array_almost_equal(test_rslt["mte"], expected_mte, 6)
Ejemplo n.º 10
0
def test_rslt_dictionary():
    """
    This test checks if the elements of the estimation dictionary are equal
    to their expected values when the initialization file of the
    semipar tutorial is used.
    """
    fname = TEST_RESOURCES_DIR + "/tutorial-semipar.grmpy.yml"
    simulate(fname)

    rslt = fit(fname, semipar=True)
    expected_rslt = pickle.load(
        open(TEST_RESOURCES_DIR + "/tutorial-semipar-results.pkl", "rb"))

    np.testing.assert_equal(rslt["quantiles"], expected_rslt["quantiles"])
    np.testing.assert_almost_equal(rslt["mte"], expected_rslt["mte"], 7)
    np.testing.assert_almost_equal(rslt["mte_u"], expected_rslt["mte_u"], 7)
    np.testing.assert_almost_equal(rslt["mte_min"], expected_rslt["mte_min"],
                                   5)
    np.testing.assert_almost_equal(rslt["mte_max"], expected_rslt["mte_max"],
                                   5)
    np.testing.assert_almost_equal(rslt["b0"], expected_rslt["b0"], 7)
    np.testing.assert_almost_equal(rslt["b1"], expected_rslt["b1"], 7)
Ejemplo n.º 11
0
def test3():
    """The test checks if the criteria function value of the simulated and the
    'estimated' sample is equal if both samples include an identical number of
    individuals.
    """
    for _ in range(5):
        constr = dict()
        constr["DETERMINISTIC"], constr["AGENTS"], constr[
            "START"] = False, 1000, "init"
        constr["OPTIMIZER"], constr["SAME_SIZE"] = "SCIPY-BFGS", True
        generate_random_dict(constr)
        df1 = simulate("test.grmpy.yml")
        rslt = fit("test.grmpy.yml")
        init_dict = read("test.grmpy.yml")
        _, df2 = simulate_estimation(rslt)
        start = start_values(init_dict, df1, "init")

        criteria = []
        for data in [df1, df2]:
            _, X1, X0, Z1, Z0, Y1, Y0 = process_data(data, init_dict)
            criteria += [
                calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, start)
            ]
        np.testing.assert_allclose(criteria[1], criteria[0], rtol=0.1)
    ax.plot(quantiles,
            mte_original_d,
            color="orange",
            linestyle=":",
            linewidth=3)
    ax.plot(quantiles,
            mte_original_u,
            color="orange",
            linestyle=":",
            linewidth=3)
    ax.xaxis.set_ticks(np.arange(0, 1.1, step=0.1))
    ax.yaxis.set_ticks(np.arange(-0.5, 0.5, step=0.1))

    ax.set_ylim([-0.37, 0.47])
    ax.set_xlim([0, 1])
    ax.margins(x=0.003)
    ax.margins(y=0.03)

    blue_patch = mpatches.Patch(color="blue", label="original $MTE$")
    orange_patch = mpatches.Patch(color="orange", label="replicated $MTE$")
    plt.legend(handles=[blue_patch, orange_patch], prop={"size": 16})
    plt.savefig(OUTPUT_DIR +
                "/fig-marginal-benefit-parametric-replication.png",
                dpi=300)


if __name__ == "__main__":

    rslt_dict = fit(RESOURCE_DIR + "/replication.grmpy.yml")
    plot_rslts(rslt_dict, RESOURCE_DIR + "/replication.grmpy.yml")
Ejemplo n.º 13
0
    x = np.mean(data_frame[covariates]).tolist()
    x_neg = [-i for i in x]
    x += x_neg
    x = np.array(x)

    # Create auxiliary parameters
    part1 = np.dot(x, np.dot(param_cov, x))
    part2 = np.dot(dist_gradients, np.dot(dist_cov, dist_gradients))
    # Prepare two lists for storing the values
    mte_up = []
    mte_d = []

    # Combine all auxiliary parameters and calculate the confidence intervals
    for counter, i in enumerate(quantiles):
        value = part2 * (norm.ppf(i)) ** 2
        aux = np.sqrt(part1 + value) / 4
        mte_up += [mte[counter] + norm.ppf(0.95) * aux]
        mte_d += [mte[counter] - norm.ppf(0.95) * aux]

    return mte_up, mte_d


if __name__ == "__main__":

    init_dict = read("replication.grmpy.yml")
    # Estimate the coefficients
    rslt = fit("replication.grmpy.yml")
    # Calculate and plot the marginal treatment effect
    data = pd.read_pickle("aer-replication-mock.pkl")
    mte = plot_est_mte(rslt, init_dict, data)
Ejemplo n.º 14
0
def monte_carlo(file, grid_points):
    """This function estimates the ATE for a sample with different correlation
    structures between U1 and V. Two different strategies for (OLS,LATE) are
    implemented.
     """

    ATE = 0.5

    # Define a dictionary with a key for each estimation strategy
    effects = {}
    for key_ in ["grmpy", "ols", "true", "random", "rho", "iv", "means"]:
        effects[key_] = []

    # Loop over different correlations between V and U_1
    for rho in np.linspace(0.00, 0.99, grid_points):
        effects["rho"] += [rho]
        # Readjust the initialization file values to add correlation
        model_spec = read(file)
        X = model_spec["TREATED"]["order"]
        update_correlation_structure(model_spec, rho)
        sim_spec = read(file)
        # Simulate a Data set and specify exogeneous and endogeneous variables
        df_mc = create_data(file)
        endog, exog, exog_ols = df_mc["wage"], df_mc[X], df_mc[["state"] + X]
        instr = sim_spec["CHOICE"]["order"]
        instr = [i for i in instr if i != "const"]
        # Calculate true average treatment effect
        ATE = np.mean(df_mc["wage1"] - df_mc["wage0"])
        effects["true"] += [ATE]

        # Estimate  via grmpy
        rslt = fit(file)
        beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"]
        stat = np.dot(np.mean(exog), beta_diff)

        effects["grmpy"] += [stat]

        # Estimate via OLS
        ols = sm.OLS(endog, exog_ols).fit()
        stat = ols.params[0]
        effects["ols"] += [stat]

        # Estimate via 2SLS
        iv = IV2SLS(endog, exog, df_mc["state"], df_mc[instr]).fit()
        stat = iv.params["state"]
        effects["iv"] += [stat]

        # Estimate via random
        random = np.mean(df_mc[df_mc.state == 1]["wage"]) - np.mean(
            df_mc[df_mc.state == 0]["wage"]
        )
        stat = random
        effects["random"] += [stat]

        # outcomes
        stat = [
            [
                np.mean(df_mc[df_mc.state == 1]["wage"]),
                df_mc[df_mc.state == 1].shape[0],
            ],
            [
                np.mean(df_mc[df_mc.state == 0]["wage"]),
                df_mc[df_mc.state == 0].shape[0],
            ],
        ]
        effects["means"] += stat

    create_plots(effects, effects["true"])
Ejemplo n.º 15
0
def test11():
    """This test ensures that the tutorial configuration works as intended."""
    fname = TEST_RESOURCES_DIR + "/tutorial.grmpy.yml"
    simulate(fname)
    fit(fname)
Ejemplo n.º 16
0
"""This module contains a tutorial illustrating the basic capabilities of the grmpy
package.
"""
import os

from grmpy.estimate.estimate import fit
from grmpy.simulate.simulate import simulate

f = os.path.dirname(__file__) + "/tutorial.grmpy.yml"
simulate(f)
rslt = fit(f)