def test_BaseTLearner(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() learner = BaseTLearner(learner=XGBRegressor()) # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD # check the accuracy of the CATE estimation with the bootstrap CI cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10) auuc_metrics = pd.DataFrame({'cate_p': cate_p.flatten(), 'W': treatment, 'y': y, 'treatment_effect_col': tau}) cumgain = get_cumgain(auuc_metrics, outcome_col='y', treatment_col='W', treatment_effect_col='tau') # Check if the cumulative gain when using the model's prediction is # higher than it would be under random targeting assert cumgain['cate_p'].sum() > cumgain['Random'].sum() # test of using control_learner and treatment_learner learner = BaseTLearner(learner=XGBRegressor(), control_learner=RandomForestRegressor(), treatment_learner=RandomForestRegressor()) # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
def test_BaseXLearner(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() learner = BaseXLearner(learner=XGBRegressor()) # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD # check pre-train model ate_p_pt, lb_pt, ub_pt = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e, pretrain=True) assert (ate_p_pt == ate_p) and (lb_pt == lb) and (ub_pt == ub) # check the accuracy of the CATE estimation with the bootstrap CI cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, p=e, return_ci=True, n_bootstraps=10) auuc_metrics = pd.DataFrame({ "cate_p": cate_p.flatten(), "W": treatment, "y": y, "treatment_effect_col": tau, }) cumgain = get_cumgain(auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau") # Check if the cumulative gain when using the model's prediction is # higher than it would be under random targeting assert cumgain["cate_p"].sum() > cumgain["Random"].sum() # basic test of using outcome_learner and effect_learner learner = BaseXLearner( learner=XGBRegressor(), control_outcome_learner=RandomForestRegressor(), treatment_outcome_learner=RandomForestRegressor(), control_effect_learner=RandomForestRegressor(), treatment_effect_learner=RandomForestRegressor(), ) # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
def test_drivlearner(): np.random.seed(RANDOM_SEED) n = 1000 p = 8 sigma = 1.0 X = np.random.uniform(size=n * p).reshape((n, -1)) b = np.sin(np.pi * X[:, 0] * X[:, 1]) + 2 * (X[:, 2] - 0.5)**2 + X[:, 3] + 0.5 * X[:, 4] assignment = (np.random.uniform(size=n) > 0.5).astype(int) eta = 0.1 e_raw = np.maximum( np.repeat(eta, n), np.minimum(np.sin(np.pi * X[:, 0] * X[:, 1]), np.repeat(1 - eta, n))) e = e_raw.copy() e[assignment == 0] = 0 tau = (X[:, 0] + X[:, 1]) / 2 X_obs = X[:, [i for i in range(8) if i != 1]] w = np.random.binomial(1, e, size=n) treatment = w y = b + (w - 0.5) * tau + sigma * np.random.normal(size=n) learner = BaseDRIVLearner(learner=XGBRegressor(), treatment_effect_learner=LinearRegression()) # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, assignment=assignment, treatment=treatment, y=y, p=(np.ones(n) * 1e-6, e_raw)) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD # check the accuracy of the CATE estimation with the bootstrap CI cate_p, _, _ = learner.fit_predict(X=X, assignment=assignment, treatment=treatment, y=y, p=(np.ones(n) * 1e-6, e_raw), return_ci=True, n_bootstraps=10) auuc_metrics = pd.DataFrame({ 'cate_p': cate_p.flatten(), 'W': treatment, 'y': y, 'treatment_effect_col': tau }) cumgain = get_cumgain(auuc_metrics, outcome_col='y', treatment_col='W', treatment_effect_col='tau') # Check if the cumulative gain when using the model's prediction is # higher than it would be under random targeting assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
def test_TMLELearner(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() learner = TMLELearner(learner=XGBRegressor()) # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, p=e, treatment=treatment, y=y) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
def test_BaseSLearner(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() learner = BaseSLearner(learner=LinearRegression()) # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD
def test_BaseTRegressor(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() learner = BaseTRegressor(learner=XGBRegressor()) # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD # check the accuracy of the CATE estimation with the bootstrap CI cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10) assert gini(tau, cate_p.flatten()) > .5
def test_LRSRegressor(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() learner = LRSRegressor() # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD # check pre-train model ate_p_pt, lb_pt, ub_pt = learner.estimate_ate(X=X, treatment=treatment, y=y, pretrain=True) assert (ate_p_pt == ate_p) and (lb_pt == lb) and (ub_pt == ub)