def test6(): """Additionally to test5 this test checks if the comparison file provides the expected output when maxiter is set to zero and the estimation process uses the initialization file values as start values. """ for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["MAXITER"], constr[ "AGENTS"] = False, 0, 15000 constr["START"], constr["SAME_SIZE"] = "init", True dict_ = generate_random_dict(constr) dict_["DIST"]["params"][1], dict_["DIST"]["params"][5] = 0.0, 1.0 print_dict(dict_) simulate("test.grmpy.yml") fit("test.grmpy.yml") dict_ = read_desc("comparison.grmpy.info") for section in ["ALL", "TREATED", "UNTREATED"]: np.testing.assert_equal(len(set(dict_[section]["Number"])), 1) np.testing.assert_almost_equal( dict_[section]["Observed Sample"], dict_[section]["Simulated Sample (finish)"], 0.001, ) np.testing.assert_array_almost_equal( dict_[section]["Simulated Sample (finish)"], dict_[section]["Simulated Sample (start)"], 0.001, )
def test5(): """The test checks if the estimation process works properly when maxiter is set to zero. """ for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["MAXITER"] = False, 0 generate_random_dict(constr) simulate("test.grmpy.yml") fit("test.grmpy.yml")
def test4(): """The test checks if the estimation process works if the Powell algorithm is specified as the optimizer option. """ for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["AGENTS"], constr["start"] = ( False, 10000, "init", ) constr["optimizer"] = "SCIPY-Powell" generate_random_dict(constr) simulate("test.grmpy.yml") fit("test.grmpy.yml")
def test9(): """This test ensures that the random initialization file generating process, the read in process and the simulation process works if the constraints function allows for different number of covariates for each treatment state and the occurence of cost-benefit shifters.""" for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["AGENT"], constr["STATE_DIFF"] = ( False, 1000, True, ) constr["OVERLAP"] = True generate_random_dict(constr) read("test.grmpy.yml") simulate("test.grmpy.yml") fit("test.grmpy.yml") cleanup()
def test1(): """ This module contains a simple test for the equality of the results of R's locpoly function and grmpy's locpoly function. Therefore, the mock data set from Carneiro et al (2011) is used. """ init_dict = read(TEST_RESOURCES_DIR + "/replication_semipar.yml") init_dict["ESTIMATION"]["file"] = TEST_RESOURCES_DIR + "/aer-replication-mock.pkl" print_dict(init_dict, TEST_RESOURCES_DIR + "/replication_semipar") test_rslt = fit(TEST_RESOURCES_DIR + "/replication_semipar.grmpy.yml", semipar=True) expected_mte_u = pd.read_pickle( TEST_RESOURCES_DIR + "/replication-results-mte_u.pkl" ) np.testing.assert_array_almost_equal(test_rslt["mte_u"], expected_mte_u, 6)
def test10(): """This test checks if the refactor auxiliary function returns an unchanged init file if the maximum number of iterations is set to zero. """ for _ in range(10): constr = dict() constr["DETERMINISTIC"], constr["AGENTS"] = False, 1000 constr["MAXITER"], constr["START"] = 0, "init" generate_random_dict(constr) init_dict = read("test.grmpy.yml") df = simulate("test.grmpy.yml") start = start_values(init_dict, df, "init") start = backward_transformation(start) rslt = fit("test.grmpy.yml") np.testing.assert_equal(start, rslt["AUX"]["x_internal"])
def test10(): """This test checks if the refactor auxiliary function returns an unchanged init file if the maximum number of iterations is set to zero. """ for _ in range(10): constr = dict() constr["DETERMINISTIC"], constr["AGENTS"] = False, 1000 constr["MAXITER"], constr["START"], constr[ "OPTIMIZER"] = 0, "init", "BFGS" generate_random_dict(constr) dict_ = read("test.grmpy.yml") df = simulate("test.grmpy.yml") D, X1, X0, Z1, Z0, Y1, Y0 = process_data(df, dict_) start = start_values(dict_, D, X1, X0, Z1, Z0, Y1, Y0, "init") start = backward_transformation(start) rslt = fit("test.grmpy.yml") np.testing.assert_equal(start, rslt["opt_rslt"]["params"].values)
def monte_carlo(file, grid_points): """This function estimates the ATE for a sample with different correlation structures between U1 and V. Two different strategies for (OLS,LATE) are implemented. """ # Define a dictionary with a key for each estimation strategy effects = {} for key_ in ["grmpy", "ols", "true"]: effects[key_] = [] # Loop over different correlations between V and U_1 for rho in np.linspace(0.00, 0.99, grid_points): # Readjust the initialization file values to add correlation model_spec = read(file) sim_spec = read("reliability.grmpy.yml") X = sim_spec["TREATED"]["order"] update_correlation_structure(model_spec, rho) # Simulate a Data set and specify exogeneous and endogeneous variables df_mc = create_data() endog, exog, exog_ols = df_mc["wage"], df_mc[X], df_mc[["state"] + X] # Calculate true average treatment effect ATE = np.mean(df_mc["wage1"] - df_mc["wage0"]) effects["true"] += [ATE] # Estimate via grmpy rslt = fit("reliability.grmpy.yml") beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"] stat = np.dot(np.mean(exog), beta_diff) effects["grmpy"] += [stat] # Estimate via OLS ols = sm.OLS(endog, exog_ols).fit() stat = ols.params[0] effects["ols"] += [stat] return effects
def test_replication_carneiro(): """ This function checks the equality of the results of R's locpoly function and grmpy's locpoly function. The mock data set from Carneiro et al (2011) is used and both the mte_u and the final mte are compared. """ init_dict = read(TEST_RESOURCES_DIR + "/replication_semipar.yml") init_dict["ESTIMATION"][ "file"] = TEST_RESOURCES_DIR + "/aer-replication-mock.pkl" print_dict(init_dict, TEST_RESOURCES_DIR + "/replication_semipar") test_rslt = fit(TEST_RESOURCES_DIR + "/replication_semipar.grmpy.yml", semipar=True) expected_mte_u = pd.read_pickle(TEST_RESOURCES_DIR + "/replication-results-mte_u.pkl") expected_mte = pd.read_pickle(TEST_RESOURCES_DIR + "/replication-results-mte.pkl") np.testing.assert_array_almost_equal(test_rslt["mte_u"], expected_mte_u, 6) np.testing.assert_array_almost_equal(test_rslt["mte"], expected_mte, 6)
def test_rslt_dictionary(): """ This test checks if the elements of the estimation dictionary are equal to their expected values when the initialization file of the semipar tutorial is used. """ fname = TEST_RESOURCES_DIR + "/tutorial-semipar.grmpy.yml" simulate(fname) rslt = fit(fname, semipar=True) expected_rslt = pickle.load( open(TEST_RESOURCES_DIR + "/tutorial-semipar-results.pkl", "rb")) np.testing.assert_equal(rslt["quantiles"], expected_rslt["quantiles"]) np.testing.assert_almost_equal(rslt["mte"], expected_rslt["mte"], 7) np.testing.assert_almost_equal(rslt["mte_u"], expected_rslt["mte_u"], 7) np.testing.assert_almost_equal(rslt["mte_min"], expected_rslt["mte_min"], 5) np.testing.assert_almost_equal(rslt["mte_max"], expected_rslt["mte_max"], 5) np.testing.assert_almost_equal(rslt["b0"], expected_rslt["b0"], 7) np.testing.assert_almost_equal(rslt["b1"], expected_rslt["b1"], 7)
def test3(): """The test checks if the criteria function value of the simulated and the 'estimated' sample is equal if both samples include an identical number of individuals. """ for _ in range(5): constr = dict() constr["DETERMINISTIC"], constr["AGENTS"], constr[ "START"] = False, 1000, "init" constr["OPTIMIZER"], constr["SAME_SIZE"] = "SCIPY-BFGS", True generate_random_dict(constr) df1 = simulate("test.grmpy.yml") rslt = fit("test.grmpy.yml") init_dict = read("test.grmpy.yml") _, df2 = simulate_estimation(rslt) start = start_values(init_dict, df1, "init") criteria = [] for data in [df1, df2]: _, X1, X0, Z1, Z0, Y1, Y0 = process_data(data, init_dict) criteria += [ calculate_criteria(init_dict, X1, X0, Z1, Z0, Y1, Y0, start) ] np.testing.assert_allclose(criteria[1], criteria[0], rtol=0.1)
ax.plot(quantiles, mte_original_d, color="orange", linestyle=":", linewidth=3) ax.plot(quantiles, mte_original_u, color="orange", linestyle=":", linewidth=3) ax.xaxis.set_ticks(np.arange(0, 1.1, step=0.1)) ax.yaxis.set_ticks(np.arange(-0.5, 0.5, step=0.1)) ax.set_ylim([-0.37, 0.47]) ax.set_xlim([0, 1]) ax.margins(x=0.003) ax.margins(y=0.03) blue_patch = mpatches.Patch(color="blue", label="original $MTE$") orange_patch = mpatches.Patch(color="orange", label="replicated $MTE$") plt.legend(handles=[blue_patch, orange_patch], prop={"size": 16}) plt.savefig(OUTPUT_DIR + "/fig-marginal-benefit-parametric-replication.png", dpi=300) if __name__ == "__main__": rslt_dict = fit(RESOURCE_DIR + "/replication.grmpy.yml") plot_rslts(rslt_dict, RESOURCE_DIR + "/replication.grmpy.yml")
x = np.mean(data_frame[covariates]).tolist() x_neg = [-i for i in x] x += x_neg x = np.array(x) # Create auxiliary parameters part1 = np.dot(x, np.dot(param_cov, x)) part2 = np.dot(dist_gradients, np.dot(dist_cov, dist_gradients)) # Prepare two lists for storing the values mte_up = [] mte_d = [] # Combine all auxiliary parameters and calculate the confidence intervals for counter, i in enumerate(quantiles): value = part2 * (norm.ppf(i)) ** 2 aux = np.sqrt(part1 + value) / 4 mte_up += [mte[counter] + norm.ppf(0.95) * aux] mte_d += [mte[counter] - norm.ppf(0.95) * aux] return mte_up, mte_d if __name__ == "__main__": init_dict = read("replication.grmpy.yml") # Estimate the coefficients rslt = fit("replication.grmpy.yml") # Calculate and plot the marginal treatment effect data = pd.read_pickle("aer-replication-mock.pkl") mte = plot_est_mte(rslt, init_dict, data)
def monte_carlo(file, grid_points): """This function estimates the ATE for a sample with different correlation structures between U1 and V. Two different strategies for (OLS,LATE) are implemented. """ ATE = 0.5 # Define a dictionary with a key for each estimation strategy effects = {} for key_ in ["grmpy", "ols", "true", "random", "rho", "iv", "means"]: effects[key_] = [] # Loop over different correlations between V and U_1 for rho in np.linspace(0.00, 0.99, grid_points): effects["rho"] += [rho] # Readjust the initialization file values to add correlation model_spec = read(file) X = model_spec["TREATED"]["order"] update_correlation_structure(model_spec, rho) sim_spec = read(file) # Simulate a Data set and specify exogeneous and endogeneous variables df_mc = create_data(file) endog, exog, exog_ols = df_mc["wage"], df_mc[X], df_mc[["state"] + X] instr = sim_spec["CHOICE"]["order"] instr = [i for i in instr if i != "const"] # Calculate true average treatment effect ATE = np.mean(df_mc["wage1"] - df_mc["wage0"]) effects["true"] += [ATE] # Estimate via grmpy rslt = fit(file) beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"] stat = np.dot(np.mean(exog), beta_diff) effects["grmpy"] += [stat] # Estimate via OLS ols = sm.OLS(endog, exog_ols).fit() stat = ols.params[0] effects["ols"] += [stat] # Estimate via 2SLS iv = IV2SLS(endog, exog, df_mc["state"], df_mc[instr]).fit() stat = iv.params["state"] effects["iv"] += [stat] # Estimate via random random = np.mean(df_mc[df_mc.state == 1]["wage"]) - np.mean( df_mc[df_mc.state == 0]["wage"] ) stat = random effects["random"] += [stat] # outcomes stat = [ [ np.mean(df_mc[df_mc.state == 1]["wage"]), df_mc[df_mc.state == 1].shape[0], ], [ np.mean(df_mc[df_mc.state == 0]["wage"]), df_mc[df_mc.state == 0].shape[0], ], ] effects["means"] += stat create_plots(effects, effects["true"])
def test11(): """This test ensures that the tutorial configuration works as intended.""" fname = TEST_RESOURCES_DIR + "/tutorial.grmpy.yml" simulate(fname) fit(fname)
"""This module contains a tutorial illustrating the basic capabilities of the grmpy package. """ import os from grmpy.estimate.estimate import fit from grmpy.simulate.simulate import simulate f = os.path.dirname(__file__) + "/tutorial.grmpy.yml" simulate(f) rslt = fit(f)