def dml_plr_fixture(generate_data1, learner, score, dml_procedure): boot_methods = ['normal'] n_folds = 2 n_rep_boot = 502 # collect data data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = clone(learner) ml_m = clone(learner) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, score=score, dml_procedure=dml_procedure) dml_plr_obj.fit() np.random.seed(3141) y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values n_obs = len(y) all_smpls = draw_smpls(n_obs, n_folds) res_manual = fit_plr(y, x, d, clone(learner), clone(learner), all_smpls, dml_procedure, score) res_dict = { 'coef': dml_plr_obj.coef, 'coef_manual': res_manual['theta'], 'se': dml_plr_obj.se, 'se_manual': res_manual['se'], 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'], res_manual['all_g_hat'], res_manual['all_m_hat'], all_smpls, score, bootstrap, n_rep_boot) np.random.seed(3141) dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_plr_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_pliv_multiway_cluster_old_vs_new_fixture(generate_data_iv, learner): n_folds = 3 dml_procedure = 'dml1' # same results are only obtained for dml1 np.random.seed(3141) smpl_sizes = [N, M] obj_dml_multiway_resampling = DoubleMLMultiwayResampling( n_folds, smpl_sizes) _, smpls_lin_ind = obj_dml_multiway_resampling.split_samples() # Set machine learning methods for g, m & r ml_g = clone(learner) ml_m = clone(learner) ml_r = clone(learner) df = obj_dml_cluster_data.data.set_index( ['cluster_var_i', 'cluster_var_j']) obj_dml_data = dml.DoubleMLData(df, y_col=obj_dml_cluster_data.y_col, d_cols=obj_dml_cluster_data.d_cols, x_cols=obj_dml_cluster_data.x_cols, z_cols=obj_dml_cluster_data.z_cols) dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure, draw_sample_splitting=False) dml_pliv_obj.set_sample_splitting(smpls_lin_ind) dml_pliv_obj.fit() np.random.seed(3141) dml_pliv_obj_cluster = dml.DoubleMLPLIV(obj_dml_cluster_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure) dml_pliv_obj_cluster.fit() res_dict = { 'coef': dml_pliv_obj.coef, 'coef_manual': dml_pliv_obj_cluster.coef } return res_dict
def dml_plr_cluster_with_index(generate_data1, learner, dml_procedure): # in the one-way cluster case with exactly one observation per cluster, we get the same result w & w/o clustering n_folds = 2 # collect data data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = clone(learner) ml_m = clone(learner) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) np.random.seed(3141) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, dml_procedure=dml_procedure) dml_plr_obj.fit() df = data.reset_index() dml_cluster_data = dml.DoubleMLClusterData(df, y_col='y', d_cols='d', x_cols=x_cols, cluster_cols='index') np.random.seed(3141) dml_plr_cluster_obj = dml.DoubleMLPLR(dml_cluster_data, ml_g, ml_m, n_folds, dml_procedure=dml_procedure) dml_plr_cluster_obj.fit() res_dict = { 'coef': dml_plr_obj.coef, 'coef_manual': dml_plr_cluster_obj.coef, 'se': dml_plr_obj.se, 'se_manual': dml_plr_cluster_obj.se } return res_dict
def dml_irm_pyvsr_fixture(generate_data_irm, idx, score, dml_procedure): n_folds = 2 # collect data (X, y, d) = generate_data_irm[idx] x_cols = [f'X{i + 1}' for i in np.arange(X.shape[1])] data = pd.DataFrame(np.column_stack((X, y, d)), columns=x_cols + ['y', 'd']) # Set machine learning methods for m & g learner_classif = LogisticRegression(penalty='none', solver='newton-cg') learner_reg = LinearRegression() ml_g = clone(learner_reg) ml_m = clone(learner_classif) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m, n_folds, score=score, dml_procedure=dml_procedure) np.random.seed(3141) dml_irm_obj.fit() # fit the DML model in R all_train, all_test = export_smpl_split_to_r(dml_irm_obj.smpls[0]) r_dataframe = pandas2ri.py2rpy(data) res_r = r_IRM(r_dataframe, score, dml_procedure, all_train, all_test) res_dict = { 'coef_py': dml_irm_obj.coef, 'coef_r': res_r[0], 'se_py': dml_irm_obj.se, 'se_r': res_r[1] } return res_dict
def dml_iivm_pyvsr_fixture(generate_data_iivm, idx, score, dml_procedure): boot_methods = ['normal'] n_folds = 2 # collect data data = generate_data_iivm[idx] X_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & gg learner_classif = LogisticRegression(penalty='none', solver='newton-cg') learner_reg = LinearRegression() ml_g = clone(learner_reg) ml_m = clone(learner_classif) ml_r = clone(learner_classif) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], X_cols, 'z') dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure) np.random.seed(3141) dml_iivm_obj.fit() # fit the DML model in R all_train, all_test = export_smpl_split_to_r(dml_iivm_obj.smpls[0]) r_dataframe = pandas2ri.py2rpy(data) res_r = r_IIVM(r_dataframe, score, dml_procedure, all_train, all_test) res_dict = { 'coef_py': dml_iivm_obj.coef, 'coef_r': res_r[0], 'se_py': dml_iivm_obj.se, 'se_r': res_r[1] } return res_dict
def dml_plr_reestimate_fixture(generate_data1, learner, score, dml_procedure, n_rep): n_folds = 3 # collect data data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = clone(learner) ml_m = clone(learner) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, n_rep, score, dml_procedure) dml_plr_obj.fit() np.random.seed(3141) dml_plr_obj2 = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, n_rep, score, dml_procedure) dml_plr_obj2.fit() dml_plr_obj2._coef[0] = np.nan dml_plr_obj2._se[0] = np.nan dml_plr_obj2._est_causal_pars_and_se() res_dict = {'coef': dml_plr_obj.coef, 'coef2': dml_plr_obj2.coef, 'se': dml_plr_obj.se, 'se2': dml_plr_obj2.se} return res_dict
def dml_plr_pyvsr_fixture(generate_data1, idx, score, dml_procedure): n_folds = 2 n_rep_boot = 483 # collect data data = generate_data1[idx] X_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g learner = LinearRegression() ml_g = clone(learner) ml_m = clone(learner) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], X_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, score=score, dml_procedure=dml_procedure) #np.random.seed(3141) dml_plr_obj.fit() # fit the DML model in R all_train, all_test = export_smpl_split_to_r(dml_plr_obj.smpls[0]) r_dataframe = pandas2ri.py2rpy(data) res_r = r_MLPLR(r_dataframe, score, dml_procedure, all_train, all_test) res_dict = { 'coef_py': dml_plr_obj.coef, 'coef_r': res_r[0], 'se_py': dml_plr_obj.se, 'se_r': res_r[1] } return res_dict
def dml_plr_smpls_fixture(generate_data1, learner, score, dml_procedure, n_rep): n_folds = 3 # collect data data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = clone(learner) ml_m = clone(learner) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, n_rep, score, dml_procedure) dml_plr_obj.fit() smpls = dml_plr_obj.smpls dml_plr_obj2 = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, score=score, dml_procedure=dml_procedure, draw_sample_splitting=False) dml_plr_obj2.set_sample_splitting(smpls) dml_plr_obj2.fit() res_dict = { 'coef': dml_plr_obj.coef, 'coef2': dml_plr_obj2.coef, 'se': dml_plr_obj.se, 'se2': dml_plr_obj2.se } return res_dict
def dml_plr_no_cross_fit_fixture(generate_data1, idx, learner, score, n_folds): boot_methods = ['normal'] n_rep_boot = 502 dml_procedure = 'dml1' # collect data data = generate_data1[idx] X_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = clone(learner) ml_m = clone(learner) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], X_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, score=score, dml_procedure=dml_procedure, apply_cross_fitting=False) dml_plr_obj.fit() np.random.seed(3141) y = data['y'].values X = data.loc[:, X_cols].values d = data['d'].values if n_folds == 1: smpls = [(np.arange(len(y)), np.arange(len(y)))] else: resampling = KFold(n_splits=n_folds, shuffle=True) smpls = [(train, test) for train, test in resampling.split(X)] smpls = [smpls[0]] g_hat, m_hat = fit_nuisance_plr(y, X, d, clone(learner), clone(learner), smpls) assert dml_procedure == 'dml1' res_manual, se_manual = plr_dml1(y, X, d, g_hat, m_hat, smpls, score) res_dict = { 'coef': dml_plr_obj.coef, 'coef_manual': res_manual, 'se': dml_plr_obj.se, 'se_manual': se_manual, 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(res_manual, y, d, g_hat, m_hat, smpls, score, se_manual, bootstrap, n_rep_boot, dml_procedure, apply_cross_fitting=False) np.random.seed(3141) dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_plr_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_plr_no_cross_fit_tune_fixture(generate_data1, idx, learner, score, tune_on_folds): par_grid = { 'ml_g': { 'alpha': np.linspace(0.05, .95, 7) }, 'ml_m': { 'alpha': np.linspace(0.05, .95, 7) } } n_folds_tune = 3 boot_methods = ['normal'] n_rep_boot = 502 dml_procedure = 'dml1' # collect data data = generate_data1[idx] X_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = Lasso() ml_m = Lasso() np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], X_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds=2, score=score, dml_procedure=dml_procedure, apply_cross_fitting=False) # tune hyperparameters res_tuning = dml_plr_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune) # fit with tuned parameters dml_plr_obj.fit() np.random.seed(3141) y = obj_dml_data.y X = obj_dml_data.x d = obj_dml_data.d resampling = KFold(n_splits=2, shuffle=True) smpls = [(train, test) for train, test in resampling.split(X)] smpls = [smpls[0]] if tune_on_folds: g_params, m_params = tune_nuisance_plr(y, X, d, clone(ml_m), clone(ml_g), smpls, n_folds_tune, par_grid['ml_g'], par_grid['ml_m']) g_hat, m_hat = fit_nuisance_plr(y, X, d, clone(ml_m), clone(ml_g), smpls, g_params, m_params) else: xx = [(np.arange(len(y)), np.array([]))] g_params, m_params = tune_nuisance_plr(y, X, d, clone(ml_m), clone(ml_g), xx, n_folds_tune, par_grid['ml_g'], par_grid['ml_m']) g_hat, m_hat = fit_nuisance_plr(y, X, d, clone(ml_m), clone(ml_g), smpls, g_params, m_params) assert dml_procedure == 'dml1' res_manual, se_manual = plr_dml1(y, X, d, g_hat, m_hat, smpls, score) res_dict = { 'coef': dml_plr_obj.coef, 'coef_manual': res_manual, 'se': dml_plr_obj.se, 'se_manual': se_manual, 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(res_manual, y, d, g_hat, m_hat, smpls, score, se_manual, bootstrap, n_rep_boot, dml_procedure, apply_cross_fitting=False) np.random.seed(3141) dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_plr_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_plr_rep_no_cross_fit_fixture(generate_data1, idx, learner, score, n_rep): boot_methods = ['normal'] n_folds = 2 n_rep_boot = 498 dml_procedure = 'dml1' # collect data data = generate_data1[idx] X_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = clone(learner) ml_m = clone(learner) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], X_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, n_rep, score, dml_procedure, apply_cross_fitting=False) dml_plr_obj.fit() np.random.seed(3141) y = data['y'].values X = data.loc[:, X_cols].values d = data['d'].values all_smpls = [] for i_rep in range(n_rep): resampling = KFold(n_splits=n_folds, shuffle=True) smpls = [(train, test) for train, test in resampling.split(X)] all_smpls.append(smpls) # adapt to do no-cross-fitting in each repetition all_smpls = [[xx[0]] for xx in all_smpls] thetas = np.zeros(n_rep) ses = np.zeros(n_rep) all_g_hat = list() all_m_hat = list() for i_rep in range(n_rep): smpls = all_smpls[i_rep] g_hat, m_hat = fit_nuisance_plr(y, X, d, clone(learner), clone(learner), smpls) all_g_hat.append(g_hat) all_m_hat.append(m_hat) thetas[i_rep], ses[i_rep] = plr_dml1(y, X, d, all_g_hat[i_rep], all_m_hat[i_rep], smpls, score) res_manual = np.median(thetas) se_manual = np.sqrt( np.median( np.power(ses, 2) * len(smpls[0][1]) + np.power(thetas - res_manual, 2)) / len(smpls[0][1])) res_dict = { 'coef': dml_plr_obj.coef, 'coef_manual': res_manual, 'se': dml_plr_obj.se, 'se_manual': se_manual, 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) all_boot_theta = list() all_boot_t_stat = list() for i_rep in range(n_rep): smpls = all_smpls[i_rep] boot_theta, boot_t_stat = boot_plr(thetas[i_rep], y, d, all_g_hat[i_rep], all_m_hat[i_rep], smpls, score, ses[i_rep], bootstrap, n_rep_boot, dml_procedure, apply_cross_fitting=False) all_boot_theta.append(boot_theta) all_boot_t_stat.append(boot_t_stat) boot_theta = np.hstack(all_boot_theta) boot_t_stat = np.hstack(all_boot_t_stat) np.random.seed(3141) dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_plr_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_pliv_fixture(generate_data_iv, learner, score, dml_procedure): boot_methods = ['Bayes', 'normal', 'wild'] n_folds = 2 n_rep_boot = 503 # collect data data = generate_data_iv x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for g, m & r ml_g = clone(learner) ml_m = clone(learner) ml_r = clone(learner) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'Z1') dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure) dml_pliv_obj.fit() np.random.seed(3141) y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values z = data['Z1'].values resampling = KFold(n_splits=n_folds, shuffle=True) smpls = [(train, test) for train, test in resampling.split(x)] g_hat, m_hat, r_hat = fit_nuisance_pliv(y, x, d, z, clone(learner), clone(learner), clone(learner), smpls) if dml_procedure == 'dml1': res_manual, se_manual = pliv_dml1(y, x, d, z, g_hat, m_hat, r_hat, smpls, score) else: assert dml_procedure == 'dml2' res_manual, se_manual = pliv_dml2(y, x, d, z, g_hat, m_hat, r_hat, smpls, score) res_dict = { 'coef': dml_pliv_obj.coef, 'coef_manual': res_manual, 'se': dml_pliv_obj.se, 'se_manual': se_manual, 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_pliv(res_manual, y, d, z, g_hat, m_hat, r_hat, smpls, score, se_manual, bootstrap, n_rep_boot, dml_procedure) np.random.seed(3141) dml_pliv_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_pliv_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_pliv_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_plr_fixture(generate_data1, learner, score, dml_procedure, n_rep): boot_methods = ['normal'] n_folds = 2 n_rep_boot = 498 # collect data data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = clone(learner) ml_m = clone(learner) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, n_rep, score, dml_procedure) dml_plr_obj.fit() np.random.seed(3141) y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values n_obs = len(y) all_smpls = [] for i_rep in range(n_rep): resampling = KFold(n_splits=n_folds, shuffle=True) smpls = [(train, test) for train, test in resampling.split(x)] all_smpls.append(smpls) thetas = np.zeros(n_rep) ses = np.zeros(n_rep) all_g_hat = list() all_m_hat = list() for i_rep in range(n_rep): smpls = all_smpls[i_rep] g_hat, m_hat = fit_nuisance_plr(y, x, d, clone(learner), clone(learner), smpls) all_g_hat.append(g_hat) all_m_hat.append(m_hat) if dml_procedure == 'dml1': thetas[i_rep], ses[i_rep] = plr_dml1(y, x, d, all_g_hat[i_rep], all_m_hat[i_rep], smpls, score) else: assert dml_procedure == 'dml2' thetas[i_rep], ses[i_rep] = plr_dml2(y, x, d, all_g_hat[i_rep], all_m_hat[i_rep], smpls, score) res_manual = np.median(thetas) se_manual = np.sqrt(np.median(np.power(ses, 2)*n_obs + np.power(thetas - res_manual, 2))/n_obs) res_dict = {'coef': dml_plr_obj.coef, 'coef_manual': res_manual, 'se': dml_plr_obj.se, 'se_manual': se_manual, 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) all_boot_theta = list() all_boot_t_stat = list() for i_rep in range(n_rep): smpls = all_smpls[i_rep] boot_theta, boot_t_stat = boot_plr(thetas[i_rep], y, d, all_g_hat[i_rep], all_m_hat[i_rep], smpls, score, ses[i_rep], bootstrap, n_rep_boot, dml_procedure) all_boot_theta.append(boot_theta) all_boot_t_stat.append(boot_t_stat) boot_theta = np.hstack(all_boot_theta) boot_t_stat = np.hstack(all_boot_t_stat) np.random.seed(3141) dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_plr_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_iivm_fixture(generate_data_iivm, learner_g, learner_m, learner_r, score, dml_procedure, tune_on_folds): par_grid = { 'ml_g': get_par_grid(learner_g), 'ml_m': get_par_grid(learner_m), 'ml_r': get_par_grid(learner_r) } n_folds_tune = 4 boot_methods = ['normal'] n_folds = 2 n_rep_boot = 491 # collect data data = generate_data_iivm x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m, g & r ml_g = clone(learner_g) ml_m = clone(learner_m) ml_r = clone(learner_r) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'z') dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure) # tune hyperparameters _ = dml_iivm_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune) dml_iivm_obj.fit() np.random.seed(3141) y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values z = data['z'].values resampling = KFold(n_splits=n_folds, shuffle=True) smpls = [(train, test) for train, test in resampling.split(x)] if tune_on_folds: g0_params, g1_params, m_params, r0_params, r1_params = \ tune_nuisance_iivm(y, x, d, z, clone(learner_m), clone(learner_g), clone(learner_r), smpls, n_folds_tune, par_grid['ml_g'], par_grid['ml_m'], par_grid['ml_r']) g_hat0, g_hat1, m_hat, r_hat0, r_hat1 = \ fit_nuisance_iivm(y, x, d, z, clone(learner_m), clone(learner_g), clone(learner_r), smpls, g0_params, g1_params, m_params, r0_params, r1_params) else: xx = [(np.arange(data.shape[0]), np.array([]))] g0_params, g1_params, m_params, r0_params, r1_params = \ tune_nuisance_iivm(y, x, d, z, clone(learner_m), clone(learner_g), clone(learner_r), xx, n_folds_tune, par_grid['ml_g'], par_grid['ml_m'], par_grid['ml_r']) g_hat0, g_hat1, m_hat, r_hat0, r_hat1 = \ fit_nuisance_iivm(y, x, d, z, clone(learner_m), clone(learner_g), clone(learner_r), smpls, g0_params * n_folds, g1_params * n_folds, m_params * n_folds, r0_params * n_folds, r1_params * n_folds) if dml_procedure == 'dml1': res_manual, se_manual = iivm_dml1(y, x, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score) else: assert dml_procedure == 'dml2' res_manual, se_manual = iivm_dml2(y, x, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score) res_dict = { 'coef': dml_iivm_obj.coef, 'coef_manual': res_manual, 'se': dml_iivm_obj.se, 'se_manual': se_manual, 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_iivm(res_manual, y, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score, se_manual, bootstrap, n_rep_boot, dml_procedure) np.random.seed(3141) dml_iivm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_iivm_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_iivm_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_iivm_fixture(generate_data_iivm, learner, score, dml_procedure, trimming_threshold): boot_methods = ['normal'] n_folds = 2 n_rep_boot = 491 # collect data data = generate_data_iivm x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = clone(learner[0]) ml_m = clone(learner[1]) ml_r = clone(learner[1]) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'z') dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure, trimming_threshold=trimming_threshold) dml_iivm_obj.fit() np.random.seed(3141) y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values z = data['z'].values n_obs = len(y) all_smpls = draw_smpls(n_obs, n_folds) res_manual = fit_iivm(y, x, d, z, clone(learner[0]), clone(learner[1]), clone(learner[1]), all_smpls, dml_procedure, score, trimming_threshold=trimming_threshold) res_dict = { 'coef': dml_iivm_obj.coef, 'coef_manual': res_manual['theta'], 'se': dml_iivm_obj.se, 'se_manual': res_manual['se'], 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_iivm( y, d, z, res_manual['thetas'], res_manual['ses'], res_manual['all_g_hat0'], res_manual['all_g_hat1'], res_manual['all_m_hat'], res_manual['all_r_hat0'], res_manual['all_r_hat1'], all_smpls, score, bootstrap, n_rep_boot) np.random.seed(3141) dml_iivm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_iivm_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_iivm_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_iivm_fixture(generate_data_iivm, learner, score, dml_procedure, trimming_threshold): boot_methods = ['normal'] n_folds = 2 n_rep_boot = 491 # collect data data = generate_data_iivm x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = clone(learner[1]) ml_m = clone(learner[0]) ml_r = clone(learner[0]) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'z') dml_iivm_obj = dml.DoubleMLIIVM(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure, trimming_threshold=trimming_threshold) dml_iivm_obj.fit() np.random.seed(3141) y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values z = data['z'].values resampling = KFold(n_splits=n_folds, shuffle=True) smpls = [(train, test) for train, test in resampling.split(x)] g_hat0, g_hat1, m_hat, r_hat0, r_hat1 = fit_nuisance_iivm( y, x, d, z, clone(learner[0]), clone(learner[1]), clone(learner[0]), smpls, trimming_threshold=trimming_threshold) if dml_procedure == 'dml1': res_manual, se_manual = iivm_dml1(y, x, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score) else: assert dml_procedure == 'dml2' res_manual, se_manual = iivm_dml2(y, x, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score) res_dict = { 'coef': dml_iivm_obj.coef, 'coef_manual': res_manual, 'se': dml_iivm_obj.se, 'se_manual': se_manual, 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_iivm(res_manual, y, d, z, g_hat0, g_hat1, m_hat, r_hat0, r_hat1, smpls, score, se_manual, bootstrap, n_rep_boot, dml_procedure) np.random.seed(3141) dml_iivm_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_iivm_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_iivm_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_plr_ols_manual_fixture(generate_data1, score, dml_procedure): learner = LinearRegression() boot_methods = ['Bayes', 'normal', 'wild'] n_folds = 2 n_rep_boot = 501 # collect data data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = clone(learner) ml_m = clone(learner) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, score=score, dml_procedure=dml_procedure) n = data.shape[0] this_smpl = list() xx = int(n / 2) this_smpl.append((np.arange(xx, n), np.arange(0, xx))) this_smpl.append((np.arange(0, xx), np.arange(xx, n))) smpls = [this_smpl] dml_plr_obj.set_sample_splitting(smpls) dml_plr_obj.fit() y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values # add column of ones for intercept o = np.ones((n, 1)) x = np.append(x, o, axis=1) smpls = dml_plr_obj.smpls[0] g_hat = [] for (train_index, test_index) in smpls: ols_est = scipy.linalg.lstsq(x[train_index], y[train_index])[0] g_hat.append(np.dot(x[test_index], ols_est)) m_hat = [] for (train_index, test_index) in smpls: ols_est = scipy.linalg.lstsq(x[train_index], d[train_index])[0] m_hat.append(np.dot(x[test_index], ols_est)) if dml_procedure == 'dml1': res_manual, se_manual = plr_dml1(y, x, d, g_hat, m_hat, smpls, score) else: assert dml_procedure == 'dml2' res_manual, se_manual = plr_dml2(y, x, d, g_hat, m_hat, smpls, score) res_dict = { 'coef': dml_plr_obj.coef, 'coef_manual': res_manual, 'se': dml_plr_obj.se, 'se_manual': se_manual, 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(y, d, [res_manual], [se_manual], [g_hat], [m_hat], [smpls], score, bootstrap, n_rep_boot) np.random.seed(3141) dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_plr_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_pliv_partial_z_fixture(generate_data_pliv_partialZ, learner_r, score, dml_procedure, tune_on_folds): par_grid = {'ml_r': get_par_grid(learner_r)} n_folds_tune = 4 boot_methods = ['Bayes', 'normal', 'wild'] n_folds = 2 n_rep_boot = 503 # collect data data = generate_data_pliv_partialZ x_cols = data.columns[data.columns.str.startswith('X')].tolist() z_cols = data.columns[data.columns.str.startswith('Z')].tolist() # Set machine learning methods for r ml_r = clone(learner_r) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, z_cols) dml_pliv_obj = dml.DoubleMLPLIV._partialZ(obj_dml_data, ml_r, n_folds, dml_procedure=dml_procedure) # tune hyperparameters _ = dml_pliv_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune) dml_pliv_obj.fit() np.random.seed(3141) y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values z = data.loc[:, z_cols].values resampling = KFold(n_splits=n_folds, shuffle=True) smpls = [(train, test) for train, test in resampling.split(x)] if tune_on_folds: r_params = tune_nuisance_pliv_partial_z(y, x, d, z, clone(learner_r), smpls, n_folds_tune, par_grid['ml_r']) r_hat = fit_nuisance_pliv_partial_z(y, x, d, z, clone(learner_r), smpls, r_params) else: xx = [(np.arange(len(y)), np.array([]))] r_params = tune_nuisance_pliv_partial_z(y, x, d, z, clone(learner_r), xx, n_folds_tune, par_grid['ml_r']) r_hat = fit_nuisance_pliv_partial_z(y, x, d, z, clone(learner_r), smpls, r_params * n_folds) if dml_procedure == 'dml1': res_manual, se_manual = pliv_partial_z_dml1(y, x, d, z, r_hat, smpls, score) else: assert dml_procedure == 'dml2' res_manual, se_manual = pliv_partial_z_dml2(y, x, d, z, r_hat, smpls, score) res_dict = { 'coef': dml_pliv_obj.coef, 'coef_manual': res_manual, 'se': dml_pliv_obj.se, 'se_manual': se_manual, 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_pliv_partial_z(res_manual, y, d, z, r_hat, smpls, score, se_manual, bootstrap, n_rep_boot, dml_procedure) np.random.seed(3141) dml_pliv_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_pliv_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_pliv_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_pliv_no_cross_fit_fixture(generate_data_iv, learner, score, n_folds): boot_methods = ['normal'] n_rep_boot = 503 dml_procedure = 'dml1' # collect data data = generate_data_iv x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for g, m & r ml_g = clone(learner) ml_m = clone(learner) ml_r = clone(learner) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'Z1') dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure, apply_cross_fitting=False) dml_pliv_obj.fit() np.random.seed(3141) y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values z = data['Z1'].values if n_folds == 1: smpls = [(np.arange(len(y)), np.arange(len(y)))] else: n_obs = len(y) all_smpls = draw_smpls(n_obs, n_folds) smpls = all_smpls[0] smpls = [smpls[0]] res_manual = fit_pliv(y, x, d, z, clone(learner), clone(learner), clone(learner), [smpls], dml_procedure, score) res_dict = { 'coef': dml_pliv_obj.coef, 'coef_manual': res_manual['theta'], 'se': dml_pliv_obj.se, 'se_manual': res_manual['se'], 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_pliv(y, d, z, res_manual['thetas'], res_manual['ses'], res_manual['all_g_hat'], res_manual['all_m_hat'], res_manual['all_r_hat'], [smpls], score, bootstrap, n_rep_boot, apply_cross_fitting=False) np.random.seed(3141) dml_pliv_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_pliv_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_pliv_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_plr_fixture(generate_data1, idx, score, dml_procedure): boot_methods = ['normal'] n_folds = 2 n_rep_boot = 502 # collect data data = generate_data1[idx] X_cols = data.columns[data.columns.str.startswith('X')].tolist() alpha = 0.05 learner = Lasso(alpha=alpha) # Set machine learning methods for m & g ml_g = clone(learner) ml_m = clone(learner) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d']) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, score=score, dml_procedure=dml_procedure) dml_plr_obj.fit() np.random.seed(3141) learner = Lasso() # Set machine learning methods for m & g ml_g = clone(learner) ml_m = clone(learner) dml_plr_obj_ext_set_par = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, score=score, dml_procedure=dml_procedure) dml_plr_obj_ext_set_par.set_ml_nuisance_params('ml_g', 'd', {'alpha': alpha}) dml_plr_obj_ext_set_par.set_ml_nuisance_params('ml_m', 'd', {'alpha': alpha}) dml_plr_obj_ext_set_par.fit() res_dict = { 'coef': dml_plr_obj.coef, 'coef_manual': dml_plr_obj_ext_set_par.coef, 'se': dml_plr_obj.se, 'se_manual': dml_plr_obj_ext_set_par.se, 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(314122) dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_plr_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat np.random.seed(314122) dml_plr_obj_ext_set_par.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap + '_manual'] = dml_plr_obj_ext_set_par.boot_coef res_dict['boot_t_stat' + bootstrap + '_manual'] = dml_plr_obj_ext_set_par.boot_t_stat return res_dict
def dml_plr_multitreat_fixture(generate_data_bivariate, generate_data_toeplitz, idx, learner, score, dml_procedure): boot_methods = ['normal'] n_folds = 2 n_rep_boot = 483 # collect data if idx < n_datasets: data = generate_data_bivariate[idx] else: data = generate_data_toeplitz[idx-n_datasets] X_cols = data.columns[data.columns.str.startswith('X')].tolist() d_cols = data.columns[data.columns.str.startswith('d')].tolist() # Set machine learning methods for m & g ml_g = clone(learner) ml_m = clone(learner) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', d_cols, X_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds, score=score, dml_procedure=dml_procedure) dml_plr_obj.fit() np.random.seed(3141) y = data['y'].values X = data.loc[:, X_cols].values d = data.loc[:, d_cols].values resampling = KFold(n_splits=n_folds, shuffle=True) smpls = [(train, test) for train, test in resampling.split(X)] n_d = d.shape[1] coef_manual = np.full(n_d, np.nan) se_manual = np.full(n_d, np.nan) all_g_hat = [] all_m_hat = [] for i_d in range(n_d): Xd = np.hstack((X, np.delete(d, i_d , axis=1))) g_hat, m_hat = fit_nuisance_plr(y, Xd, d[:, i_d], clone(learner), clone(learner), smpls) all_g_hat.append(g_hat) all_m_hat.append(m_hat) if dml_procedure == 'dml1': coef_manual[i_d], se_manual[i_d] = plr_dml1(y, Xd, d[:, i_d], g_hat, m_hat, smpls, score) elif dml_procedure == 'dml2': coef_manual[i_d], se_manual[i_d] = plr_dml2(y, Xd, d[:, i_d], g_hat, m_hat, smpls, score) res_dict = {'coef': dml_plr_obj.coef, 'coef_manual': coef_manual, 'se': dml_plr_obj.se, 'se_manual': se_manual, 'boot_methods': boot_methods} for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(coef_manual, y, d, all_g_hat, all_m_hat, smpls, score, se_manual, bootstrap, n_rep_boot, dml_procedure) np.random.seed(3141) dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_plr_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_pliv_fixture(generate_data_iv, learner_g, learner_m, learner_r, score, dml_procedure, tune_on_folds): par_grid = { 'ml_g': get_par_grid(learner_g), 'ml_m': get_par_grid(learner_m), 'ml_r': get_par_grid(learner_r) } n_folds_tune = 4 boot_methods = ['Bayes', 'normal', 'wild'] n_folds = 2 n_rep_boot = 503 # collect data data = generate_data_iv x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for g, m & r ml_g = clone(learner_g) ml_m = clone(learner_m) ml_r = clone(learner_r) np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols, 'Z1') dml_pliv_obj = dml.DoubleMLPLIV(obj_dml_data, ml_g, ml_m, ml_r, n_folds, dml_procedure=dml_procedure) # tune hyperparameters _ = dml_pliv_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune) dml_pliv_obj.fit() np.random.seed(3141) y = data['y'].values x = data.loc[:, x_cols].values d = data['d'].values z = data['Z1'].values n_obs = len(y) all_smpls = draw_smpls(n_obs, n_folds) smpls = all_smpls[0] if tune_on_folds: g_params, m_params, r_params = tune_nuisance_pliv( y, x, d, z, clone(learner_g), clone(learner_m), clone(learner_r), smpls, n_folds_tune, par_grid['ml_g'], par_grid['ml_m'], par_grid['ml_r']) else: xx = [(np.arange(len(y)), np.array([]))] g_params, m_params, r_params = tune_nuisance_pliv( y, x, d, z, clone(learner_g), clone(learner_m), clone(learner_r), xx, n_folds_tune, par_grid['ml_g'], par_grid['ml_m'], par_grid['ml_r']) g_params = g_params * n_folds m_params = m_params * n_folds r_params = r_params * n_folds res_manual = fit_pliv(y, x, d, z, clone(learner_g), clone(learner_m), clone(learner_r), all_smpls, dml_procedure, score, g_params=g_params, m_params=m_params, r_params=r_params) res_dict = { 'coef': dml_pliv_obj.coef, 'coef_manual': res_manual['theta'], 'se': dml_pliv_obj.se, 'se_manual': res_manual['se'], 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_pliv(y, d, z, res_manual['thetas'], res_manual['ses'], res_manual['all_g_hat'], res_manual['all_m_hat'], res_manual['all_r_hat'], all_smpls, score, bootstrap, n_rep_boot) np.random.seed(3141) dml_pliv_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_pliv_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_pliv_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict
def dml_plr_no_cross_fit_tune_fixture(generate_data1, learner, score, tune_on_folds): par_grid = { 'ml_g': { 'alpha': np.linspace(0.05, .95, 7) }, 'ml_m': { 'alpha': np.linspace(0.05, .95, 7) } } n_folds_tune = 3 boot_methods = ['normal'] n_rep_boot = 502 dml_procedure = 'dml1' # collect data data = generate_data1 x_cols = data.columns[data.columns.str.startswith('X')].tolist() # Set machine learning methods for m & g ml_g = Lasso() ml_m = Lasso() np.random.seed(3141) obj_dml_data = dml.DoubleMLData(data, 'y', ['d'], x_cols) dml_plr_obj = dml.DoubleMLPLR(obj_dml_data, ml_g, ml_m, n_folds=2, score=score, dml_procedure=dml_procedure, apply_cross_fitting=False) # tune hyperparameters _ = dml_plr_obj.tune(par_grid, tune_on_folds=tune_on_folds, n_folds_tune=n_folds_tune) # fit with tuned parameters dml_plr_obj.fit() np.random.seed(3141) y = obj_dml_data.y x = obj_dml_data.x d = obj_dml_data.d n_obs = len(y) all_smpls = draw_smpls(n_obs, 2) smpls = all_smpls[0] smpls = [smpls[0]] if tune_on_folds: g_params, m_params = tune_nuisance_plr(y, x, d, clone(ml_g), clone(ml_m), smpls, n_folds_tune, par_grid['ml_g'], par_grid['ml_m']) else: xx = [(np.arange(len(y)), np.array([]))] g_params, m_params = tune_nuisance_plr(y, x, d, clone(ml_g), clone(ml_m), xx, n_folds_tune, par_grid['ml_g'], par_grid['ml_m']) res_manual = fit_plr(y, x, d, clone(ml_m), clone(ml_g), [smpls], dml_procedure, score, g_params=g_params, m_params=m_params) res_dict = { 'coef': dml_plr_obj.coef, 'coef_manual': res_manual['theta'], 'se': dml_plr_obj.se, 'se_manual': res_manual['se'], 'boot_methods': boot_methods } for bootstrap in boot_methods: np.random.seed(3141) boot_theta, boot_t_stat = boot_plr(y, d, res_manual['thetas'], res_manual['ses'], res_manual['all_g_hat'], res_manual['all_m_hat'], [smpls], score, bootstrap, n_rep_boot, apply_cross_fitting=False) np.random.seed(3141) dml_plr_obj.bootstrap(method=bootstrap, n_rep_boot=n_rep_boot) res_dict['boot_coef' + bootstrap] = dml_plr_obj.boot_coef res_dict['boot_t_stat' + bootstrap] = dml_plr_obj.boot_t_stat res_dict['boot_coef' + bootstrap + '_manual'] = boot_theta res_dict['boot_t_stat' + bootstrap + '_manual'] = boot_t_stat return res_dict