def dml_iivm_pyvsr_fixture(generate_data_iivm, idx, score, dml_procedure): boot_methods = ['normal'] n_folds = 2 # collect data data = generate_data_iivm[idx] X_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & gg learner_classif = LogisticRegression(penalty='none', solver='newton-cg') learner_reg = LinearRegression() ml_g = clone(learner_reg) ml_m = clone(learner_classif) ml_r = clone(learner_classif) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], X_cols, 'z') dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure) np.random.seed(3141) dml_iivm_obj.fit() # fit the DML model in R all_train, all_test = export_smpl_split_to_r(dml_iivm_obj.smpls[0]) r_dataframe = pandas2ri.py2rpy(data) res_r = r_IIVM(r_dataframe, score, dml_procedure, all_train, all_test) res_dict = { 'coef_py': dml_iivm_obj.coef, 'coef_r': res_r[0], 'se_py': dml_iivm_obj.se, 'se_r': res_r[1] } return res_dict
def dml_iivm_fixture(generate_data_iivm, learner_g, learner_m, learner_r, score, dml_procedure, tune_on_folds): par_grid = { 'ml_g': get_par_grid(learner_g), 'ml_m': get_par_grid(learner_m), 'ml_r': get_par_grid(learner_r) } n_folds_tune = 4 boot_methods = ['normal'] n_folds = 2 n_rep_boot = 491 # collect data data = generate_data_iivm x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m, g & r ml_g = clone(learner_g) ml_m = clone(learner_m) ml_r = clone(learner_r) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'z') dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure) # tune hyperparameters _ = dml_iivm_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune) dml_iivm_obj.fit() np.random.seed(3141) y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values z = data['z'].values resampling = KFold(n_splits=n_folds, shuffle=True) smpls = [(train, test) for train, test in resampling.split(x)] if tune_on_folds: g0_params, g1_params, m_params, r0_params, r1_params = \ tune_nuisance_iivm(y, x, d, z, clone(learner_m), clone(learner_g), clone(learner_r), smpls, n_folds_tune, par_grid['ml_g'], par_grid['ml_m'], par_grid['ml_r']) g_hat0, g_hat1, m_hat, r_hat0, r_hat1 = \ fit_nuisance_iivm(y, x, d, z, clone(learner_m), clone(learner_g), clone(learner_r), smpls, g0_params, g1_params, m_params, r0_params, r1_params) else: xx = [(np.arange(data.shape[0]), np.array([]))] g0_params, g1_params, m_params, r0_params, r1_params = \ tune_nuisance_iivm(y, x, d, z, clone(learner_m), clone(learner_g), clone(learner_r), xx, n_folds_tune, par_grid['ml_g'], par_grid['ml_m'], par_grid['ml_r']) g_hat0, g_hat1, m_hat, r_hat0, r_hat1 = \ fit_nuisance_iivm(y, x, d, z, clone(learner_m), clone(learner_g), clone(learner_r), smpls, g0_params * n_folds, g1_params * n_folds, m_params * n_folds, r0_params * n_folds, r1_params * n_folds) if dml_procedure == 'dml1': res_manual, se_manual = iivm_dml1(y, x, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score) else: assert dml_procedure == 'dml2' res_manual, se_manual = iivm_dml2(y, x, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score) res_dict = { 'coef': dml_iivm_obj.coef, 'coef_manual': res_manual, 'se': dml_iivm_obj.se, 'se_manual': se_manual, 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_iivm(res_manual, y, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score, se_manual, bootstrap, n_rep_boot, dml_procedure) np.random.seed(3141) dml_iivm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_iivm_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_iivm_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_iivm_fixture(generate_data_iivm, learner, score, dml_procedure, trimming_threshold): boot_methods = ['normal'] n_folds = 2 n_rep_boot = 491 # collect data data = generate_data_iivm x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = clone(learner[0]) ml_m = clone(learner[1]) ml_r = clone(learner[1]) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'z') dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure, trimming_threshold=trimming_threshold) dml_iivm_obj.fit() np.random.seed(3141) y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values z = data['z'].values n_obs = len(y) all_smpls = draw_smpls(n_obs, n_folds) res_manual = fit_iivm(y, x, d, z, clone(learner[0]), clone(learner[1]), clone(learner[1]), all_smpls, dml_procedure, score, trimming_threshold=trimming_threshold) res_dict = { 'coef': dml_iivm_obj.coef, 'coef_manual': res_manual['theta'], 'se': dml_iivm_obj.se, 'se_manual': res_manual['se'], 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_iivm( y, d, z, res_manual['thetas'], res_manual['ses'], res_manual['all_g_hat0'], res_manual['all_g_hat1'], res_manual['all_m_hat'], res_manual['all_r_hat0'], res_manual['all_r_hat1'], all_smpls, score, bootstrap, n_rep_boot) np.random.seed(3141) dml_iivm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_iivm_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_iivm_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_iivm_fixture(generate_data_iivm, learner, score, dml_procedure, trimming_threshold): boot_methods = ['normal'] n_folds = 2 n_rep_boot = 491 # collect data data = generate_data_iivm x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = clone(learner[1]) ml_m = clone(learner[0]) ml_r = clone(learner[0]) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'z') dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure, trimming_threshold=trimming_threshold) dml_iivm_obj.fit() np.random.seed(3141) y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values z = data['z'].values resampling = KFold(n_splits=n_folds, shuffle=True) smpls = [(train, test) for train, test in resampling.split(x)] g_hat0, g_hat1, m_hat, r_hat0, r_hat1 = fit_nuisance_iivm( y, x, d, z, clone(learner[0]), clone(learner[1]), clone(learner[0]), smpls, trimming_threshold=trimming_threshold) if dml_procedure == 'dml1': res_manual, se_manual = iivm_dml1(y, x, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score) else: assert dml_procedure == 'dml2' res_manual, se_manual = iivm_dml2(y, x, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score) res_dict = { 'coef': dml_iivm_obj.coef, 'coef_manual': res_manual, 'se': dml_iivm_obj.se, 'se_manual': se_manual, 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_iivm(res_manual, y, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score, se_manual, bootstrap, n_rep_boot, dml_procedure) np.random.seed(3141) dml_iivm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_iivm_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_iivm_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_iivm_classifier_fixture(generate_data_iivm_binary, learner, score, dml_procedure, trimming_threshold): boot_methods = ['normal'] n_folds = 2 n_rep_boot = 491 # collect data (x, y, d, z) = generate_data_iivm_binary # Set machine learning methods for m & g ml_g = clone(learner[0]) ml_m = clone(learner[1]) ml_r = clone(learner[1]) np.random.seed(3141) obj_dml_data = dml.DoubleMLData.from_arrays(x, y, d, z) dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure, trimming_threshold=trimming_threshold) dml_iivm_obj.fit() np.random.seed(3141) n_obs = len(y) all_smpls = draw_smpls(n_obs, n_folds) res_manual = fit_iivm(y, x, d, z, clone(learner[0]), clone(learner[1]), clone(learner[1]), all_smpls, dml_procedure, score, trimming_threshold=trimming_threshold) res_dict = { 'coef': dml_iivm_obj.coef, 'coef_manual': res_manual['theta'], 'se': dml_iivm_obj.se, 'se_manual': res_manual['se'], 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_iivm( y, d, z, res_manual['thetas'], res_manual['ses'], res_manual['all_g_hat0'], res_manual['all_g_hat1'], res_manual['all_m_hat'], res_manual['all_r_hat0'], res_manual['all_r_hat1'], all_smpls, score, bootstrap, n_rep_boot) np.random.seed(3141) dml_iivm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_iivm_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_iivm_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict