Example #1
0
    def test_sklearn_in_tmle_missing(self, mf):
        log = LogisticRegression(C=1.0)
        tmle = TMLE(mf, exposure='art', outcome='dead')
        tmle.exposure_model('male + age0 + cd40 + dvl0',
                            custom_model=log,
                            print_results=False)
        tmle.missing_model('male + age0 + cd40 + dvl0',
                           custom_model=log,
                           print_results=False)
        tmle.outcome_model('art + male + age0 + cd40 + dvl0',
                           custom_model=log,
                           print_results=False)
        tmle.fit()

        # Testing RD match
        npt.assert_allclose(tmle.risk_difference, -0.090086, rtol=1e-5)
        npt.assert_allclose(tmle.risk_difference_ci, [-0.160371, -0.019801],
                            rtol=1e-4)
        # Testing RR match
        npt.assert_allclose(tmle.risk_ratio, 0.507997, rtol=1e-5)
        npt.assert_allclose(tmle.risk_ratio_ci, [0.256495, 1.006108],
                            rtol=1e-4)
        # Testing OR match
        npt.assert_allclose(tmle.odds_ratio, 0.457541, rtol=1e-5)
        npt.assert_allclose(tmle.odds_ratio_ci, [0.213980, 0.978331],
                            rtol=1e-4)
Example #2
0
    def test_missing_binary_outcome(self, mf):
        r_rd = -0.08168098
        r_rd_ci = -0.15163818, -0.01172378
        r_rr = 0.5495056
        r_rr_ci = 0.2893677, 1.0435042
        r_or = 0.4996546
        r_or_ci = 0.2435979, 1.0248642

        tmle = TMLE(mf, exposure='art', outcome='dead')
        tmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
            print_results=False)
        tmle.outcome_model(
            'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
            print_results=False)
        tmle.missing_model(
            'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
            print_results=False)
        tmle.fit()

        npt.assert_allclose(tmle.risk_difference, r_rd)
        npt.assert_allclose(tmle.risk_difference_ci, r_rd_ci, rtol=1e-5)
        npt.assert_allclose(tmle.risk_ratio, r_rr)
        npt.assert_allclose(tmle.risk_ratio_ci, r_rr_ci, rtol=1e-5)
        npt.assert_allclose(tmle.odds_ratio, r_or)
        npt.assert_allclose(tmle.odds_ratio_ci, r_or_ci, rtol=1e-5)
Example #3
0
    def test_missing_continuous_outcome(self, mcf):
        r_ate = 211.8295
        r_ci = 107.7552, 315.9038

        tmle = TMLE(mcf, exposure='art', outcome='cd4_wk45')
        tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                            print_results=False)
        tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                           print_results=False)
        tmle.missing_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                           print_results=False)
        tmle.fit()
        npt.assert_allclose(tmle.average_treatment_effect, r_ate, rtol=1e-3)
        npt.assert_allclose(tmle.average_treatment_effect_ci, r_ci, rtol=1e-3)
Example #4
0
    def test_compare_tmle_continuous(self, cf):
        cf['cd4_wk45'] = np.log(cf['cd4_wk45'])
        stmle = StochasticTMLE(cf, exposure='art', outcome='cd4_wk45')
        stmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        stmle.outcome_model(
            'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        stmle.fit(p=1.0, samples=1)
        all_treat = stmle.marginal_outcome
        stmle.fit(p=0.0, samples=1)
        non_treat = stmle.marginal_outcome

        tmle = TMLE(cf, exposure='art', outcome='cd4_wk45')
        tmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
            print_results=False)
        tmle.outcome_model(
            'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
            print_results=False)
        tmle.fit()
        expected = tmle.average_treatment_effect

        npt.assert_allclose(expected, all_treat - non_treat, atol=1e-3)
Example #5
0
 def test_match_r_epsilons(self, df):
     r_epsilons = [-0.016214091, 0.003304079]
     tmle = TMLE(df, exposure='art', outcome='dead')
     tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)
     tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                        print_results=False)
     tmle.fit()
     npt.assert_allclose(tmle._epsilon, r_epsilons, rtol=1e-5)
Example #6
0
 def test_match_r_epsilons_continuous(self, cf):
     r_epsilons = [-0.0046411652, 0.0002270186]
     tmle = TMLE(cf, exposure='art', outcome='cd4_wk45')
     tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)
     tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                        print_results=False)
     tmle.fit()
     npt.assert_allclose(tmle._epsilon, r_epsilons, rtol=1e-4, atol=1e-4)
Example #7
0
 def test_match_r_tmle_riskdifference(self, df):
     r_rd = -0.08440622
     tmle = TMLE(df, exposure='art', outcome='dead')
     tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)
     tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                        print_results=False)
     tmle.fit()
     npt.assert_allclose(tmle.risk_difference, r_rd)
Example #8
0
 def test_match_r_tmle_rd_ci(self, df):
     r_ci = -0.1541104, -0.01470202
     tmle = TMLE(df, exposure='art', outcome='dead')
     tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)
     tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                        print_results=False)
     tmle.fit()
     npt.assert_allclose(tmle.risk_difference_ci, r_ci, rtol=1e-5)
Example #9
0
 def test_match_r_tmle_riskratio(self, df):
     r_rr = 0.5344266
     tmle = TMLE(df, exposure='art', outcome='dead')
     tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)
     tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                        print_results=False)
     tmle.fit()
     npt.assert_allclose(tmle.risk_ratio, r_rr)
Example #10
0
 def test_match_r_tmle_rr_ci(self, df):
     r_ci = 0.2773936, 1.0296262
     tmle = TMLE(df, exposure='art', outcome='dead')
     tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)
     tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                        print_results=False)
     tmle.fit()
     npt.assert_allclose(tmle.risk_ratio_ci, r_ci, rtol=1e-5)
Example #11
0
 def test_no_missing_data(self, df):
     tmle = TMLE(df, exposure='art', outcome='dead')
     tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                         print_results=False)
     tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                        print_results=False)
     with pytest.raises(ValueError):
         tmle.missing_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                            print_results=False)
Example #12
0
 def test_no_ate_with_binary(self, df):
     tmle = TMLE(df, exposure='art', outcome='dead')
     tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                         bound=[0.025, 0.9], print_results=False)
     tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                        print_results=False)
     tmle.fit()
     assert tmle.average_treatment_effect is None
     assert tmle.average_treatment_effect_ci is None
Example #13
0
    def test_sklearn_in_tmle2(self, cf):
        log = LogisticRegression(C=1.0)
        lin = LinearRegression()
        tmle = TMLE(cf, exposure='art', outcome='cd4_wk45')
        tmle.exposure_model('male + age0 + cd40 + dvl0', custom_model=log)
        tmle.outcome_model('art + male + age0 + cd40 + dvl0', custom_model=lin)
        tmle.fit()

        npt.assert_allclose(tmle.average_treatment_effect, 236.049719, rtol=1e-5)
        npt.assert_allclose(tmle.average_treatment_effect_ci, [135.999264, 336.100175], rtol=1e-5)
Example #14
0
def tmle(outcome, treatment, data):
    tml = TMLE(data, exposure=treatment, outcome=outcome)
    cols = data.drop(columns=[outcome, treatment]).columns
    s = str(cols[0])
    for j in range(1, len(cols)):
        s = s + ' + ' + str(cols[j])
    tml.exposure_model(s)
    tml.outcome_model(s)
    tml.fit()
    return tml.average_treatment_effect
Example #15
0
 def test_asymmetric_bounds_on_gW(self, df):
     r_rd = -0.08433208
     r_ci = -0.1541296, -0.01453453
     tmle = TMLE(df, exposure='art', outcome='dead')
     tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                         bound=[0.025, 0.9], print_results=False)
     tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                        print_results=False)
     tmle.fit()
     npt.assert_allclose(tmle.risk_difference, r_rd)
     npt.assert_allclose(tmle.risk_difference_ci, r_ci, rtol=1e-5)
Example #16
0
 def test_symmetric_bounds_on_gW(self, df):
     r_rd = -0.08203143
     r_ci = -0.1498092, -0.01425363
     tmle = TMLE(df, exposure='art', outcome='dead')
     tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                         bound=0.1, print_results=False)
     tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                        print_results=False)
     tmle.fit()
     npt.assert_allclose(tmle.risk_difference, r_rd)
     npt.assert_allclose(tmle.risk_difference_ci, r_ci, rtol=1e-5)
Example #17
0
    def test_match_r_continuous_poisson(self, cf):
        r_ate = 223.4648
        r_ci = 118.6276, 328.3019

        tmle = TMLE(cf, exposure='art', outcome='cd4_wk45')
        tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)
        tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                           print_results=False, continuous_distribution='poisson')
        tmle.fit()
        npt.assert_allclose(tmle.average_treatment_effect, r_ate, rtol=1e-3)
        npt.assert_allclose(tmle.average_treatment_effect_ci, r_ci, rtol=1e-3)
Example #18
0
    def test_match_r_continuous_outcome(self, cf):
        r_ate = 223.4022
        r_ci = 118.6037, 328.2008

        tmle = TMLE(cf, exposure='art', outcome='cd4_wk45')
        tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                            print_results=False)
        tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                           print_results=False)
        tmle.fit()
        npt.assert_allclose(tmle.average_treatment_effect, r_ate, rtol=1e-3)
        npt.assert_allclose(tmle.average_treatment_effect_ci, r_ci, rtol=1e-3)
Example #19
0
    def test_match_r_continuous_outcome_gbounds(self, cf):
        r_ate = 223.3958
        r_ci = 118.4178, 328.3737

        tmle = TMLE(cf, exposure='art', outcome='cd4_wk45')
        tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                            bound=[0.025, 0.9], print_results=False)
        tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                           print_results=False)
        tmle.fit()
        npt.assert_allclose(tmle.average_treatment_effect, r_ate, rtol=1e-3)
        npt.assert_allclose(tmle.average_treatment_effect_ci, r_ci, rtol=1e-3)
Example #20
0
 def test_no_risk_with_continuous(self, cf):
     tmle = TMLE(cf, exposure='art', outcome='cd4_wk45')
     tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                         bound=[0.025, 0.9], print_results=False)
     tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                        print_results=False)
     tmle.fit()
     assert tmle.risk_difference is None
     assert tmle.risk_ratio is None
     assert tmle.odds_ratio is None
     assert tmle.risk_difference_ci is None
     assert tmle.risk_ratio_ci is None
     assert tmle.odds_ratio_ci is None
Example #21
0
 def test_error_when_no_models_specified3(self, df):
     tmle = TMLE(df, exposure='art', outcome='dead')
     tmle.outcome_model(
         'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
         print_results=False)
     with pytest.raises(ValueError):
         tmle.fit()
Example #22
0
    def test_sklearn_in_tmle(self, df):
        log = LogisticRegression(C=1.0)
        tmle = TMLE(df, exposure='art', outcome='dead')
        tmle.exposure_model('male + age0 + cd40 + dvl0', custom_model=log)
        tmle.outcome_model('art + male + age0 + cd40 + dvl0', custom_model=log)
        tmle.fit()

        # Testing RD match
        npt.assert_allclose(tmle.risk_difference, -0.091372098)
        npt.assert_allclose(tmle.risk_difference_ci, [-0.1595425678, -0.0232016282], rtol=1e-5)
        # Testing RR match
        npt.assert_allclose(tmle.risk_ratio, 0.4998833415)
        npt.assert_allclose(tmle.risk_ratio_ci, [0.2561223823, 0.9756404452], rtol=1e-5)
        # Testing OR match
        npt.assert_allclose(tmle.odds_ratio, 0.4496171689)
        npt.assert_allclose(tmle.odds_ratio_ci, [0.2139277755, 0.944971255], rtol=1e-5)
Example #23
0
    def test_compare_tmle_binary(self, df):
        stmle = StochasticTMLE(df, exposure='art', outcome='dead')
        stmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        stmle.outcome_model(
            'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        stmle.fit(p=1.0, samples=1)
        all_treat = stmle.marginal_outcome
        stmle.fit(p=0.0, samples=1)
        non_treat = stmle.marginal_outcome

        tmle = TMLE(df, exposure='art', outcome='dead')
        tmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
            print_results=False)
        tmle.outcome_model(
            'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
            print_results=False)
        tmle.fit()
        expected = tmle.risk_difference

        npt.assert_allclose(expected, all_treat - non_treat, atol=1e-4)
Example #24
0
def causal_check():
    data = load_sample_data(False).drop(columns=['cd4_wk45'])
    data[['cd4_rs1', 'cd4_rs2']] = spline(data,
                                          'cd40',
                                          n_knots=3,
                                          term=2,
                                          restricted=True)
    data[['age_rs1', 'age_rs2']] = spline(data,
                                          'age0',
                                          n_knots=3,
                                          term=2,
                                          restricted=True)

    # Check TimeFixedGFormula diagnostics
    g = TimeFixedGFormula(data, exposure='art', outcome='dead')
    g.outcome_model(
        model=
        'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
    )
    g.run_diagnostics(decimal=3)

    # Check IPTW plots
    ipt = IPTW(data, treatment='art', outcome='dead')
    ipt.treatment_model(
        'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
        stabilized=True)
    ipt.marginal_structural_model('art')
    ipt.fit()
    ipt.plot_love()
    plt.tight_layout()
    plt.show()
    ipt.plot_kde()
    plt.show()
    ipt.plot_kde(measure='logit')
    plt.show()
    ipt.plot_boxplot()
    plt.show()
    ipt.plot_boxplot(measure='logit')
    plt.show()
    ipt.run_diagnostics()

    # Check AIPTW Diagnostics
    aipw = AIPTW(data, exposure='art', outcome='dead')
    aipw.exposure_model(
        'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0')
    aipw.outcome_model(
        'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
    )
    aipw.fit()
    aipw.run_diagnostics()
    aipw.plot_kde(to_plot='exposure')
    plt.show()
    aipw.plot_kde(to_plot='outcome')
    plt.show()
    aipw.plot_love()
    plt.show()

    # Check TMLE diagnostics
    tmle = TMLE(data, exposure='art', outcome='dead')
    tmle.exposure_model(
        'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0')
    tmle.outcome_model(
        'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
    )
    tmle.fit()
    tmle.run_diagnostics()
    tmle.plot_kde(to_plot='exposure')
    plt.show()
    tmle.plot_kde(to_plot='outcome')
    plt.show()
    tmle.plot_love()
    plt.show()

    # Check SurvivalGFormula plots
    df = load_sample_data(False).drop(columns=['cd4_wk45'])
    df['t'] = np.round(df['t']).astype(int)
    df = pd.DataFrame(np.repeat(df.values, df['t'], axis=0),
                      columns=df.columns)
    df['t'] = df.groupby('id')['t'].cumcount() + 1
    df.loc[((df['dead'] == 1) & (df['id'] != df['id'].shift(-1))), 'd'] = 1
    df['d'] = df['d'].fillna(0)
    df['t_sq'] = df['t']**2
    df['t_cu'] = df['t']**3
    sgf = SurvivalGFormula(df,
                           idvar='id',
                           exposure='art',
                           outcome='d',
                           time='t')
    sgf.outcome_model(
        model='art + male + age0 + cd40 + dvl0 + t + t_sq + t_cu')
    sgf.fit(treatment='all')
    sgf.plot()
    plt.show()
    sgf.plot(c='r', linewidth=3, alpha=0.8)
    plt.show()
Example #25
0
# Treatment model
aipw.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                    print_results=False, bound=0.01)
# Outcome model
aipw.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                   print_results=False)
# Calculating estimate
aipw.fit()
# Printing summary results
aipw.summary()

#############################
# TMLE
from zepid.causal.doublyrobust import TMLE

tmle = TMLE(df, exposure='art', outcome='dead')
tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                    print_results=False, bound=0.01)
tmle.missing_model('art + male + age0 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                   print_results=False)
tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
                   print_results=False)
tmle.fit()
tmle.summary()

#############################
# Cross-fitting
from sklearn.ensemble import RandomForestClassifier
from zepid.superlearner import GLMSL, StepwiseSL, SuperLearner
from zepid.causal.doublyrobust import SingleCrossfitTMLE
Example #26
0
 def test_error_when_no_models_specified1(self, df):
     tmle = TMLE(df, exposure='art', outcome='dead')
     with pytest.raises(ValueError):
         tmle.fit()
Example #27
0
 def test_drop_missing_data(self):
     df = ze.load_sample_data(False)
     tmle = TMLE(df, exposure='art', outcome='dead')
     assert df.dropna(subset=['cd4_wk45']).shape[0] == tmle.df.shape[0]