# edit PYTHONPATH sys.path.insert(0, 'grmpy') # project library import grmpy as gp # project library from tests._auxiliary import random_init from tools.economics.clsAgent import AgentCls from tools.optimization.estimation import _load_data, _object_negative_log_likelihood # Generate random request if False: init_dict = gp.process('init.ini') # Simulate synthetic sample gp.simulate(init_dict) # Load dataset Y, D, X, Z, agent_objs = _load_data(init_dict) _object_negative_log_likelihood(init_dict, agent_objs) # Process initialization file init_dict = gp.process('init.ini') # Simulate synthetic sample gp.simulate(init_dict) # Estimate model rslt = gp.estimate(init_dict)
from grmpy.test.random_init import generate_random_dict from grmpy.test.random_init import print_dict import grmpy # We simply specify a minimum number of minutes for our package to run with different requests. MINUTES = 1 end_time = datetime.datetime.now() + datetime.timedelta(minutes=MINUTES) counter = 1 while True: if datetime.datetime.now() >= end_time: break print('\n Iteration ', counter) dict_ = generate_random_dict() print_dict(dict_) grmpy.simulate('test.grmpy.ini') # This is a temporary fix so that the determination of starting values by PROBIT does # not work if we have a perfect separation. try: grmpy.estimate('test.grmpy.ini') except statsmodels.tools.sm_exceptions.PerfectSeparationError: print('separation error, skip') subprocess.check_call(['git', 'clean', '-d', '-f']) counter += 1
def monte_carlo(file, which, grid_points=10): """This function estimates various effect parameters for increasing presence of essential heterogeneity, which is reflected by increasing correlation between U_1 and V. """ # simulate a new data set with essential heterogeneity present model_dict = read(file) original_correlation = model_dict["DIST"]["params"][2] model_dict["DIST"]["params"][2] = -0.191 print_dict(model_dict, file.replace(".grmpy.yml", "")) grmpy.simulate(file) effects = [] # Loop over different correlations between V and U_1 for rho in np.linspace(0.00, -0.99, grid_points): # effects["rho"] += [rho] # Readjust the initialization file values to add correlation model_spec = read(file) X = model_spec["TREATED"]["order"] update_correlation_structure(file, model_spec, rho) sim_spec = read(file) # Simulate a Data set and specify exogeneous and endogeneous variables df_mc = create_data(file) treated = df_mc["D"] == 1 Xvar = df_mc[X] instr = sim_spec["CHOICE"]["order"] instr = [i for i in instr if i != "const"] # We calculate our parameter of interest label = which.lower() if label == "conventional_average_effects": ATE = np.mean(df_mc["Y1"] - df_mc["Y0"]) TT = np.mean(df_mc["Y1"].loc[treated] - df_mc["Y0"].loc[treated]) stat = (ATE, TT) elif label in ["random", "randomization"]: random = np.mean(df_mc[df_mc.D == 1]["Y"]) - np.mean( df_mc[df_mc.D == 0]["Y"]) stat = random elif label in ["ordinary_least_squares", "ols"]: results = sm.OLS(df_mc["Y"], df_mc[["const", "D"]]).fit() stat = results.params[1] elif label in ["instrumental_variables", "iv"]: iv = IV2SLS(df_mc["Y"], Xvar, df_mc["D"], df_mc[instr]).fit() stat = iv.params["D"] elif label in ["grmpy", "grmpy-par"]: rslt = grmpy.fit(file) beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"] stat = np.dot(np.mean(Xvar), beta_diff) elif label in ["grmpy-semipar", "grmpy-liv"]: rslt = grmpy.fit(file, semipar=True) y0_fitted = np.dot(rslt["X"], rslt["b0"]) y1_fitted = np.dot(rslt["X"], rslt["b1"]) mte_x_ = y1_fitted - y0_fitted mte_u = rslt["mte_u"] us = np.linspace(0.005, 0.995, len(rslt["quantiles"])) mte_mat = np.zeros((len(mte_x_), len(mte_u))) for i in range(len(mte_x_)): for j in range(len(mte_u)): mte_mat[i, j] = mte_x_[i] + mte_u[j] ate_tilde_p = np.mean(mte_mat, axis=1) stat = ate_tilde_p.mean() else: raise NotImplementedError effects += [stat] # Restore original init file model_dict = read(file) model_dict["DIST"]["params"][2] = original_correlation print_dict(model_dict, file.replace(".grmpy.yml", "")) grmpy.simulate(file) return effects
"""This module contains a tutorial illustrating the basic capabilities of the grmpy package.""" import grmpy grmpy.simulate('tutorial.grmpy.ini') grmpy.estimate('tutorial.grmpy.ini')
def monte_carlo(file, which, grid_points=10): """ This function conducts a Monte Carlo simulation to compare the true and estimated treatment parameters for increasing (absolute) correlation between U_1 and V (i.e essential heterogeneity). In the example here, the correlation between U_1 and V becomes increasingly more negative. As we consider the absolute value of the correlation coefficient, values closer to -1 (or in the analogous case closer to +1) denote a higher degree of essential heterogeneity. The results of the Monte Carlo simulation can be used to evaluate the performance of different estimation strategies in the presence of essential heterogeneity. Depending on the specification of *which*, either the true ATE and TT, or an estimate of the ATE are returned. Options for *which*: Comparison of ATE and TT - "conventional_average_effects" Different estimation strategies for ATE - "randomization" ("random") - "ordinary_least_squares" ("ols") - "instrumental_variables" ("iv") - "grmpy_par" ("grmpy") - "grmpy_semipar"("grmpy-liv") Post-estimation: To plot the comparison between the true ATE and the respective parameter, use the function - plot_effects() for *which* = "conventional_average_effects", and - plot_estimates() else. Parameters ---------- file: yaml grmpy initialization file, provides information for the simulation process. which: string String denoting whether conventional average effects shall be computed or, alternatively, which estimation approach shall be implemented for the ATE. grid_points: int, default 10 Number of different values for rho, the correlation coefficient between U_1 and V, on the interval [0, -1), along which the parameters shall be evaluated. Returns ------- effects: list If *which* = "conventional_average_effects", list of lenght *grid_points* x 2 containing the true ATE and TT. Else, list of length *grid_points* x 1 containing an estimate of the ATE. """ # simulate a new data set with essential heterogeneity present model_dict = read(file) original_correlation = model_dict["DIST"]["params"][2] model_dict["DIST"]["params"][2] = -0.191 print_dict(model_dict, file.replace(".grmpy.yml", "")) grmpy.simulate(file) effects = [] # Loop over different correlations between U_1 and V for rho in np.linspace(0.00, -0.99, grid_points): # effects["rho"] += [rho] # Readjust the initialization file values to add correlation model_spec = read(file) X = model_spec["TREATED"]["order"] _update_correlation_structure(file, model_spec, rho) sim_spec = read(file) # Simulate a Data set and specify exogeneous and endogeneous variables df_mc = _create_data(file) treated = df_mc["D"] == 1 Xvar = df_mc[X] instr = sim_spec["CHOICE"]["order"] instr = [i for i in instr if i != "const"] # We calculate our parameter of interest label = which.lower() if label == "conventional_average_effects": ATE = np.mean(df_mc["Y1"] - df_mc["Y0"]) TT = np.mean(df_mc["Y1"].loc[treated] - df_mc["Y0"].loc[treated]) stat = (ATE, TT) elif label in ["randomization", "random"]: random = np.mean(df_mc[df_mc.D == 1]["Y"]) - np.mean( df_mc[df_mc.D == 0]["Y"] ) stat = random elif label in ["ordinary_least_squares", "ols"]: results = sm.OLS(df_mc["Y"], df_mc[["const", "D"]]).fit() stat = results.params[1] elif label in ["instrumental_variables", "iv"]: iv = IV2SLS(df_mc["Y"], Xvar, df_mc["D"], df_mc[instr]).fit() stat = iv.params["D"] elif label in ["grmpy", "grmpy-par"]: rslt = grmpy.fit(file) beta_diff = rslt["TREATED"]["params"] - rslt["UNTREATED"]["params"] stat = np.dot(np.mean(Xvar), beta_diff) elif label in ["grmpy-semipar", "grmpy-liv"]: rslt = grmpy.fit(file, semipar=True) y0_fitted = np.dot(rslt["X"], rslt["b0"]) y1_fitted = np.dot(rslt["X"], rslt["b1"]) mte_x_ = y1_fitted - y0_fitted mte_u = rslt["mte_u"] us = np.linspace(0.005, 0.995, len(rslt["quantiles"])) mte_mat = np.zeros((len(mte_x_), len(mte_u))) for i in range(len(mte_x_)): for j in range(len(mte_u)): mte_mat[i, j] = mte_x_[i] + mte_u[j] ate_tilde_p = np.mean(mte_mat, axis=1) stat = ate_tilde_p.mean() else: raise NotImplementedError effects += [stat] # Restore original init file model_dict = read(file) model_dict["DIST"]["params"][2] = original_correlation print_dict(model_dict, file.replace(".grmpy.yml", "")) grmpy.simulate(file) return effects