def test_BaseXLearner_without_p(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() learner = BaseXLearner(learner=XGBRegressor()) # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD # check the accuracy of the CATE estimation with the bootstrap CI cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10) auuc_metrics = pd.DataFrame({ 'cate_p': cate_p.flatten(), 'W': treatment, 'y': y, 'treatment_effect_col': tau }) cumgain = get_cumgain(auuc_metrics, outcome_col='y', treatment_col='W', treatment_effect_col='tau') # Check if the cumulative gain when using the model's prediction is # higher than it would be under random targeting assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
def test_BaseXLearner_without_p(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() learner = BaseXLearner(learner=XGBRegressor()) # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD # check the accuracy of the CATE estimation with the bootstrap CI cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10) assert gini(tau, cate_p.flatten()) > .5
def test_SensitivityRandomCause(): y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0) # generate the dataset format for sensitivity analysis INFERENCE_FEATURES = ["feature_" + str(i) for i in range(NUM_FEATURES)] df = pd.DataFrame(X, columns=INFERENCE_FEATURES) df[TREATMENT_COL] = treatment df[OUTCOME_COL] = y df[SCORE_COL] = e # calling the Base XLearner class and return the sensitivity analysis summary report learner = BaseXLearner(LinearRegression()) sens = SensitivityRandomCause( df=df, inference_features=INFERENCE_FEATURES, p_col=SCORE_COL, treatment_col=TREATMENT_COL, outcome_col=OUTCOME_COL, learner=learner, ) sens_summary = sens.summary(method="Random Cause") print(sens_summary)
def test_SensitivitySelectionBias(): y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0) # generate the dataset format for sensitivity analysis INFERENCE_FEATURES = ["feature_" + str(i) for i in range(NUM_FEATURES)] df = pd.DataFrame(X, columns=INFERENCE_FEATURES) df[TREATMENT_COL] = treatment df[OUTCOME_COL] = y df[SCORE_COL] = e # calling the Base XLearner class and return the sensitivity analysis summary report learner = BaseXLearner(LinearRegression()) sens = SensitivitySelectionBias( df, INFERENCE_FEATURES, p_col=SCORE_COL, treatment_col=TREATMENT_COL, outcome_col=OUTCOME_COL, learner=learner, confound="alignment", alpha_range=None, ) lls_bias_alignment, partial_rsqs_bias_alignment = sens.causalsens() print(lls_bias_alignment, partial_rsqs_bias_alignment) # Plot the results by confounding vector and plot Confidence Intervals for ATE sens.plot(lls_bias_alignment, ci=True)
def test_pandas_input(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() # convert to pandas types y = pd.Series(y) X = pd.DataFrame(X) treatment = pd.Series(treatment) try: learner = BaseSLearner(learner=LinearRegression()) ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True) except AttributeError: assert False try: learner = BaseTLearner(learner=LinearRegression()) ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y) except AttributeError: assert False try: learner = BaseXLearner(learner=LinearRegression()) ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e) except AttributeError: assert False try: learner = BaseRLearner(learner=LinearRegression()) ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e) except AttributeError: assert False try: learner = TMLELearner(learner=LinearRegression()) ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e) except AttributeError: assert False
def test_Sensitivity(): y, X, treatment, tau, b, e = synthetic_data(mode=1, n=100000, p=NUM_FEATURES, sigma=1.0) # generate the dataset format for sensitivity analysis INFERENCE_FEATURES = ['feature_' + str(i) for i in range(NUM_FEATURES)] df = pd.DataFrame(X, columns=INFERENCE_FEATURES) df[TREATMENT_COL] = treatment df[OUTCOME_COL] = y df[SCORE_COL] = e # calling the Base XLearner class and return the sensitivity analysis summary report learner = BaseXLearner(LinearRegression()) sens = Sensitivity(df=df, inference_features=INFERENCE_FEATURES, p_col=SCORE_COL, treatment_col=TREATMENT_COL, outcome_col=OUTCOME_COL, learner=learner) # check the sensitivity summary report sens_summary = sens.sensitivity_analysis(methods=[ 'Placebo Treatment', 'Random Cause', 'Subset Data', 'Random Replace', 'Selection Bias' ], sample_size=0.5) print(sens_summary)
def test_BaseXLearner(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() learner = BaseXLearner(learner=XGBRegressor()) # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD # check pre-train model ate_p_pt, lb_pt, ub_pt = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e, pretrain=True) assert (ate_p_pt == ate_p) and (lb_pt == lb) and (ub_pt == ub) # check the accuracy of the CATE estimation with the bootstrap CI cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, p=e, return_ci=True, n_bootstraps=10) auuc_metrics = pd.DataFrame({ "cate_p": cate_p.flatten(), "W": treatment, "y": y, "treatment_effect_col": tau, }) cumgain = get_cumgain(auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau") # Check if the cumulative gain when using the model's prediction is # higher than it would be under random targeting assert cumgain["cate_p"].sum() > cumgain["Random"].sum() # basic test of using outcome_learner and effect_learner learner = BaseXLearner( learner=XGBRegressor(), control_outcome_learner=RandomForestRegressor(), treatment_outcome_learner=RandomForestRegressor(), control_effect_learner=RandomForestRegressor(), treatment_effect_learner=RandomForestRegressor(), ) # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD