Ejemplo n.º 1
0
def test_BaseTRegressor(generate_regression_data):
    y, X, treatment, tau, b, e = generate_regression_data()

    learner = BaseTRegressor(learner=XGBRegressor())

    # check the accuracy of the ATE estimation
    ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y)
    assert (ate_p >= lb) and (ate_p <= ub)
    assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD

    # check the accuracy of the CATE estimation with the bootstrap CI
    cate_p, _, _ = learner.fit_predict(X=X,
                                       treatment=treatment,
                                       y=y,
                                       return_ci=True,
                                       n_bootstraps=10)

    auuc_metrics = pd.DataFrame({
        'cate_p': cate_p.flatten(),
        'W': treatment,
        'y': y,
        'treatment_effect_col': tau
    })

    cumgain = get_cumgain(auuc_metrics,
                          outcome_col='y',
                          treatment_col='W',
                          treatment_effect_col='tau')

    # Check if the cumulative gain when using the model's prediction is
    # higher than it would be under random targeting
    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
Ejemplo n.º 2
0
def test_BaseTRegressor(generate_regression_data):
    y, X, treatment, tau, b, e = generate_regression_data()

    learner = BaseTRegressor(learner=XGBRegressor())

    # check the accuracy of the ATE estimation
    ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y)
    assert (ate_p >= lb) and (ate_p <= ub)
    assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD

    # check the accuracy of the CATE estimation with the bootstrap CI
    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
    assert gini(tau, cate_p.flatten()) > .5
Ejemplo n.º 3
0
def test_BaseTRegressor(generate_regression_data):
    y, X, treatment, tau, b, e = generate_regression_data()

    learner = BaseTRegressor(learner=XGBRegressor())

    # check the accuracy of the ATE estimation
    ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y)
    assert (ate_p >= lb) and (ate_p <= ub)
    assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD

    # check pre-train model
    ate_p_pt, lb_pt, ub_pt = learner.estimate_ate(X=X,
                                                  treatment=treatment,
                                                  y=y,
                                                  pretrain=True)
    assert (ate_p_pt == ate_p) and (lb_pt == lb) and (ub_pt == ub)

    # check the accuracy of the CATE estimation with the bootstrap CI
    cate_p, _, _ = learner.fit_predict(X=X,
                                       treatment=treatment,
                                       y=y,
                                       return_ci=True,
                                       n_bootstraps=10)

    auuc_metrics = pd.DataFrame({
        "cate_p": cate_p.flatten(),
        "W": treatment,
        "y": y,
        "treatment_effect_col": tau,
    })

    cumgain = get_cumgain(auuc_metrics,
                          outcome_col="y",
                          treatment_col="W",
                          treatment_effect_col="tau")

    # Check if the cumulative gain when using the model's prediction is
    # higher than it would be under random targeting
    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()
Ejemplo n.º 4
0
    predictions[
        'predictions_randomized_trial_test'] = predictions_randomized_trial_test
    predictions['predictions_easy_propensity'] = predictions_easy_propensity
    predictions[
        'predictions_easy_propensity_test'] = predictions_easy_propensity_test

    return predictions


estimators_R = {  #'learner_dtr': BaseRRegressor(learner=DecisionTreeRegressor()),
    'learner_xgb': BaseRRegressor(learner=XGBRegressor()),
    'learner_lr': BaseRRegressor(learner=LinearRegression())
}

estimators_T = {
    'learner_xgb': BaseTRegressor(learner=XGBRegressor()),
    'learner_lr': BaseTRegressor(learner=LinearRegression())
}

import stacking_helpers

predictions_R = generate_predicitons_by_learner(estimators_R)
predictions_T = generate_predicitons_by_learner(estimators_T)

pred_R = predictions_R['predictions_randomized_trial']
pred_R_test = predictions_R['predictions_randomized_trial_test']

plt.figure(figsize=(15, 6))
##final r rand is predictions made using the ensemble weights
final_R_rand = stacking_helpers.fit_and_eval_learners(
    pred_R, pred_R_test, 'Randomized Trial R learner')