def test_propensity_score(simulate_test_data): """ Check whether propensity score has the same number of observation as the input data frame (for both the logit and probit model). """ dict_, data = simulate_test_data ps_logit = estimate_treatment_propensity(dict_, data, logit=True) ps_probit = estimate_treatment_propensity(dict_, data, logit=False) np.testing.assert_equal(len(ps_logit), data.shape[0]) np.testing.assert_equal(len(ps_logit), len(ps_probit))
def test_trim2(simulate_test_data): """ Test whether trim function returns original data when common support is set to the entire unit interval. """ dict_, data = simulate_test_data data = estimate_treatment_propensity(dict_, data, logit=True) logit, trim, reestimate_p = True, True, False prop_score = data["prop_score"] common_support = [0, 1] # Trim the data. Recommended. if trim is True: # data, prop_score = trim_data(prop_score, common_support, data) data_trim = data[ (data.prop_score >= common_support[0]) & (data.prop_score <= common_support[1]) ] prop_score_trim = prop_score[ (prop_score >= common_support[0]) & (prop_score <= common_support[1]) ] # Optional. Not recommended # Re-estimate baseline propensity score on the trimmed sample if reestimate_p is True: # Re-estimate the parameters of the decision equation based # on the new trimmed data set data_trim = estimate_treatment_propensity(dict_, data_trim, logit) else: pass else: data_trim = data prop_score_trim = prop_score data_trim = data_trim.sort_values(by="prop_score", ascending=True) X_trim = data_trim[dict_["TREATED"]["order"]] Y_trim = data_trim[[dict_["ESTIMATION"]["dependent"]]] prop_score_trim = np.sort(prop_score_trim) X_expected, Y_expected, prop_score_expected = expected_data_no_trim(dict_, data) np.testing.assert_array_equal(X_trim, X_expected) np.testing.assert_array_equal(Y_trim, Y_expected) np.testing.assert_array_equal(prop_score_trim, prop_score_expected)
def test_trim(simulate_test_data): """ Test whether original data is returned if *trim* is set to False but *reestimate_p* to True. """ dict_, data = simulate_test_data data = estimate_treatment_propensity(dict_, data, logit=True) X_expected, Y_expected, prop_score_expected = expected_data_no_trim(dict_, data) logit, trim, reestimate_p = False, False, True X, Y, prop_score = trim_support(dict_, data, logit, 25, trim, reestimate_p) pytest.X_testing = X pytest.Y_testing = Y pytest.prop_score_testing = prop_score np.testing.assert_array_equal(X, X_expected) np.testing.assert_array_equal(Y, Y_expected) np.testing.assert_array_equal(prop_score, prop_score_expected)
def bootstrap(init_file, nboot): """ This function generates bootsrapped standard errors given an init_file and the number of bootstraps to be drawn. Parameters ---------- init_file: yaml Initialization file containing parameters for the estimation process. nboot: int Number of bootstrap iterations, i.e. number of times the MTE is computed via bootstrap. Returns ------- mte_boot: np.ndarray Array containing *nbootstrap* estimates of the MTE. """ check_presence_init(init_file) dict_ = read(init_file, semipar=True) # Process the information specified in the initialization file bins, logit, bandwidth, gridsize, startgrid, endgrid = process_primary_inputs( dict_) trim, rbandwidth, reestimate_p, show_output = process_secondary_inputs( dict_) # Suppress output show_output = False # Prepare empty array to store output values mte_boot = np.zeros([gridsize, nboot]) # Load the baseline data data = read_data(dict_["ESTIMATION"]["file"]) counter = 0 while counter < nboot: boot_data = resample(data, replace=True, n_samples=len(data), random_state=None) # Estimate propensity score P(z) boot_data = estimate_treatment_propensity(dict_, boot_data, logit, show_output) prop_score = boot_data["prop_score"] if isinstance(prop_score, pd.Series): # Define common support and trim the data (if trim=True) X, Y, prop_score = trim_support(dict_, boot_data, logit, bins, trim, reestimate_p, show_output=False) b0, b1_b0 = double_residual_reg(X, Y, prop_score) # # Construct the MTE mte_x = mte_observed(X, b1_b0) mte_u = mte_unobserved_semipar(X, Y, b0, b1_b0, prop_score, bandwidth, gridsize, startgrid, endgrid) # Put the MTE together mte = mte_x.mean(axis=0) + mte_u mte_boot[:, counter] = mte counter += 1 else: continue return mte_boot
def bootstrap(init_file, nbootstraps): """ This function generates bootsrapped standard errors given an init_file and the number of bootsraps to be drawn. """ check_presence_init(init_file) dict_ = read(init_file, semipar=True) # Process the information specified in the initialization file nbins, logit, bandwidth, gridsize, a, b = process_user_input(dict_) trim, rbandwidth, reestimate_p = process_default_input(dict_) # Suppress output show_output = False # Prepare empty array to store output values mte_boot = np.zeros([gridsize, nbootstraps]) # Load the baseline data data = read_data(dict_["ESTIMATION"]["file"]) counter = 0 while counter < nbootstraps: boot_data = resample(data, replace=True, n_samples=len(data), random_state=None) # Process the inputs for the decision equation indicator, D, Z = process_choice_data(dict_, boot_data) # Estimate propensity score P(z) ps = estimate_treatment_propensity(D, Z, logit, show_output) if isinstance(ps, np.ndarray): # Define common support and trim the data, if trim=True boot_data, ps = trim_support( dict_, boot_data, logit, ps, indicator, nbins, trim, reestimate_p, show_output, ) # Estimate the observed and unobserved component of the MTE X, b1_b0, b0, mte_u = mte_components(dict_, boot_data, ps, rbandwidth, bandwidth, gridsize, a, b, show_output) # Calculate the MTE component that depends on X mte_x = np.dot(X, b1_b0).mean(axis=0) # Put the MTE together mte = mte_x + mte_u mte_boot[:, counter] = mte counter += 1 else: continue return mte_boot