def test_XGBTRegressor(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() learner = XGBTRegressor() # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD # check the accuracy of the CATE estimation with the bootstrap CI cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10) auuc_metrics = pd.DataFrame({ 'cate_p': cate_p.flatten(), 'W': treatment, 'y': y, 'treatment_effect_col': tau }) cumgain = get_cumgain(auuc_metrics, outcome_col='y', treatment_col='W', treatment_effect_col='tau') # Check if the cumulative gain when using the model's prediction is # higher than it would be under random targeting assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
def test_XGBTRegressor(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() learner = XGBTRegressor() # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD # check the accuracy of the CATE estimation with the bootstrap CI cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10) assert gini(tau, cate_p.flatten()) > .5
def test_get_synthetic_summary(): summary = get_synthetic_summary( synthetic_data_func=simulate_nuisance_and_easy_treatment, estimators={ 'S Learner (LR)': LRSRegressor(), 'T Learner (XGB)': XGBTRegressor() }) print(summary)
def test_get_synthetic_preds(synthetic_data_func): preds_dict = get_synthetic_preds(synthetic_data_func=synthetic_data_func, n=1000, estimators={ 'S Learner (LR)': LRSRegressor(), 'T Learner (XGB)': XGBTRegressor() }) assert preds_dict['S Learner (LR)'].shape[0] == preds_dict[ 'T Learner (XGB)'].shape[0]
def test_get_synthetic_auuc(): preds_dict = get_synthetic_preds( synthetic_data_func=simulate_nuisance_and_easy_treatment, n=1000, estimators={ 'S Learner (LR)': LRSRegressor(), 'T Learner (XGB)': XGBTRegressor() }) auuc_df = get_synthetic_auuc(preds_dict, plot=False) print(auuc_df)
def test_get_synthetic_preds(): preds_dict = get_synthetic_preds( synthetic_data_func=simulate_nuisance_and_easy_treatment, n=1000, estimators={ 'S Learner (LR)': LRSRegressor(), 'T Learner (XGB)': XGBTRegressor() }) assert preds_dict['S Learner (LR)'].shape[0] == preds_dict[ 'T Learner (XGB)'].shape[0]
def get_model(PARAMS): """ Get model according to parameters """ model = XGBTRegressor(max_depth=PARAMS.get('max_depth'), min_child_weight=PARAMS.get('min_child_weight'), gamma=PARAMS.get('gamma'), colsample_bytree=PARAMS.get('colsample_bytree'), learning_rate=PARAMS.get('learning_rate'), n_estimators=int(PARAMS.get('n_estimators')), random_state=123) return model
def test_XGBTRegressor(generate_regression_data): y, X, treatment, tau, b, e = generate_regression_data() learner = XGBTRegressor() # check the accuracy of the ATE estimation ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y) assert (ate_p >= lb) and (ate_p <= ub) assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD # check pre-train model ate_p_pt, lb_pt, ub_pt = learner.estimate_ate(X=X, treatment=treatment, y=y, pretrain=True) assert (ate_p_pt == ate_p) and (lb_pt == lb) and (ub_pt == ub) # check the accuracy of the CATE estimation with the bootstrap CI cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10) auuc_metrics = pd.DataFrame({ "cate_p": cate_p.flatten(), "W": treatment, "y": y, "treatment_effect_col": tau, }) cumgain = get_cumgain(auuc_metrics, outcome_col="y", treatment_col="W", treatment_effect_col="tau") # Check if the cumulative gain when using the model's prediction is # higher than it would be under random targeting assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
def test_get_synthetic_preds(synthetic_data_func): preds_dict = get_synthetic_preds( synthetic_data_func=synthetic_data_func, n=1000, estimators={ "S Learner (LR)": LRSRegressor(), "T Learner (XGB)": XGBTRegressor(), }, ) assert ( preds_dict["S Learner (LR)"].shape[0] == preds_dict["T Learner (XGB)"].shape[0] )
def test_get_synthetic_preds_holdout(): preds_train, preds_valid = get_synthetic_preds_holdout( synthetic_data_func=simulate_nuisance_and_easy_treatment, n=1000, estimators={ "S Learner (LR)": LRSRegressor(), "T Learner (XGB)": XGBTRegressor(), }, ) assert ( preds_train["S Learner (LR)"].shape[0] == preds_train["T Learner (XGB)"].shape[0] ) assert ( preds_valid["S Learner (LR)"].shape[0] == preds_valid["T Learner (XGB)"].shape[0] )