예제 #1
0
def test_BaseXLearner_without_p(generate_regression_data):
    y, X, treatment, tau, b, e = generate_regression_data()

    learner = BaseXLearner(learner=XGBRegressor())

    # check the accuracy of the ATE estimation
    ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y)
    assert (ate_p >= lb) and (ate_p <= ub)
    assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD

    # check the accuracy of the CATE estimation with the bootstrap CI
    cate_p, _, _ = learner.fit_predict(X=X,
                                       treatment=treatment,
                                       y=y,
                                       return_ci=True,
                                       n_bootstraps=10)

    auuc_metrics = pd.DataFrame({
        'cate_p': cate_p.flatten(),
        'W': treatment,
        'y': y,
        'treatment_effect_col': tau
    })

    cumgain = get_cumgain(auuc_metrics,
                          outcome_col='y',
                          treatment_col='W',
                          treatment_effect_col='tau')

    # Check if the cumulative gain when using the model's prediction is
    # higher than it would be under random targeting
    assert cumgain['cate_p'].sum() > cumgain['Random'].sum()
예제 #2
0
def test_BaseXLearner_without_p(generate_regression_data):
    y, X, treatment, tau, b, e = generate_regression_data()

    learner = BaseXLearner(learner=XGBRegressor())

    # check the accuracy of the ATE estimation
    ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y)
    assert (ate_p >= lb) and (ate_p <= ub)
    assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD

    # check the accuracy of the CATE estimation with the bootstrap CI
    cate_p, _, _ = learner.fit_predict(X=X, treatment=treatment, y=y, return_ci=True, n_bootstraps=10)
    assert gini(tau, cate_p.flatten()) > .5
예제 #3
0
def test_SensitivityRandomCause():
    y, X, treatment, tau, b, e = synthetic_data(mode=1,
                                                n=100000,
                                                p=NUM_FEATURES,
                                                sigma=1.0)

    # generate the dataset format for sensitivity analysis
    INFERENCE_FEATURES = ["feature_" + str(i) for i in range(NUM_FEATURES)]
    df = pd.DataFrame(X, columns=INFERENCE_FEATURES)
    df[TREATMENT_COL] = treatment
    df[OUTCOME_COL] = y
    df[SCORE_COL] = e

    # calling the Base XLearner class and return the sensitivity analysis summary report
    learner = BaseXLearner(LinearRegression())
    sens = SensitivityRandomCause(
        df=df,
        inference_features=INFERENCE_FEATURES,
        p_col=SCORE_COL,
        treatment_col=TREATMENT_COL,
        outcome_col=OUTCOME_COL,
        learner=learner,
    )

    sens_summary = sens.summary(method="Random Cause")
    print(sens_summary)
예제 #4
0
def test_SensitivitySelectionBias():
    y, X, treatment, tau, b, e = synthetic_data(mode=1,
                                                n=100000,
                                                p=NUM_FEATURES,
                                                sigma=1.0)

    # generate the dataset format for sensitivity analysis
    INFERENCE_FEATURES = ["feature_" + str(i) for i in range(NUM_FEATURES)]
    df = pd.DataFrame(X, columns=INFERENCE_FEATURES)
    df[TREATMENT_COL] = treatment
    df[OUTCOME_COL] = y
    df[SCORE_COL] = e

    # calling the Base XLearner class and return the sensitivity analysis summary report
    learner = BaseXLearner(LinearRegression())
    sens = SensitivitySelectionBias(
        df,
        INFERENCE_FEATURES,
        p_col=SCORE_COL,
        treatment_col=TREATMENT_COL,
        outcome_col=OUTCOME_COL,
        learner=learner,
        confound="alignment",
        alpha_range=None,
    )

    lls_bias_alignment, partial_rsqs_bias_alignment = sens.causalsens()
    print(lls_bias_alignment, partial_rsqs_bias_alignment)

    # Plot the results by confounding vector and plot Confidence Intervals for ATE
    sens.plot(lls_bias_alignment, ci=True)
예제 #5
0
def test_pandas_input(generate_regression_data):
    y, X, treatment, tau, b, e = generate_regression_data()
    # convert to pandas types
    y = pd.Series(y)
    X = pd.DataFrame(X)
    treatment = pd.Series(treatment)

    try:
        learner = BaseSLearner(learner=LinearRegression())
        ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, return_ci=True)
    except AttributeError:
        assert False
    try:
        learner = BaseTLearner(learner=LinearRegression())
        ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y)
    except AttributeError:
        assert False
    try:
        learner = BaseXLearner(learner=LinearRegression())
        ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e)
    except AttributeError:
        assert False
    try:
        learner = BaseRLearner(learner=LinearRegression())
        ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e)
    except AttributeError:
        assert False
    try:
        learner = TMLELearner(learner=LinearRegression())
        ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e)
    except AttributeError:
        assert False
예제 #6
0
def test_Sensitivity():
    y, X, treatment, tau, b, e = synthetic_data(mode=1,
                                                n=100000,
                                                p=NUM_FEATURES,
                                                sigma=1.0)

    # generate the dataset format for sensitivity analysis
    INFERENCE_FEATURES = ['feature_' + str(i) for i in range(NUM_FEATURES)]
    df = pd.DataFrame(X, columns=INFERENCE_FEATURES)
    df[TREATMENT_COL] = treatment
    df[OUTCOME_COL] = y
    df[SCORE_COL] = e

    # calling the Base XLearner class and return the sensitivity analysis summary report
    learner = BaseXLearner(LinearRegression())
    sens = Sensitivity(df=df,
                       inference_features=INFERENCE_FEATURES,
                       p_col=SCORE_COL,
                       treatment_col=TREATMENT_COL,
                       outcome_col=OUTCOME_COL,
                       learner=learner)

    # check the sensitivity summary report
    sens_summary = sens.sensitivity_analysis(methods=[
        'Placebo Treatment', 'Random Cause', 'Subset Data', 'Random Replace',
        'Selection Bias'
    ],
                                             sample_size=0.5)

    print(sens_summary)
예제 #7
0
def test_BaseXLearner(generate_regression_data):
    y, X, treatment, tau, b, e = generate_regression_data()

    learner = BaseXLearner(learner=XGBRegressor())

    # check the accuracy of the ATE estimation
    ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e)
    assert (ate_p >= lb) and (ate_p <= ub)
    assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD

    # check pre-train model
    ate_p_pt, lb_pt, ub_pt = learner.estimate_ate(X=X,
                                                  treatment=treatment,
                                                  y=y,
                                                  p=e,
                                                  pretrain=True)
    assert (ate_p_pt == ate_p) and (lb_pt == lb) and (ub_pt == ub)

    # check the accuracy of the CATE estimation with the bootstrap CI
    cate_p, _, _ = learner.fit_predict(X=X,
                                       treatment=treatment,
                                       y=y,
                                       p=e,
                                       return_ci=True,
                                       n_bootstraps=10)

    auuc_metrics = pd.DataFrame({
        "cate_p": cate_p.flatten(),
        "W": treatment,
        "y": y,
        "treatment_effect_col": tau,
    })

    cumgain = get_cumgain(auuc_metrics,
                          outcome_col="y",
                          treatment_col="W",
                          treatment_effect_col="tau")

    # Check if the cumulative gain when using the model's prediction is
    # higher than it would be under random targeting
    assert cumgain["cate_p"].sum() > cumgain["Random"].sum()

    # basic test of using outcome_learner and effect_learner
    learner = BaseXLearner(
        learner=XGBRegressor(),
        control_outcome_learner=RandomForestRegressor(),
        treatment_outcome_learner=RandomForestRegressor(),
        control_effect_learner=RandomForestRegressor(),
        treatment_effect_learner=RandomForestRegressor(),
    )
    # check the accuracy of the ATE estimation
    ate_p, lb, ub = learner.estimate_ate(X=X, treatment=treatment, y=y, p=e)
    assert (ate_p >= lb) and (ate_p <= ub)
    assert ape(tau.mean(), ate_p) < ERROR_THRESHOLD