def test_sklearn_in_tmle_missing(self, mf): log = LogisticRegression(C=1.0) tmle = TMLE(mf, exposure='art', outcome='dead') tmle.exposure_model('male + age0 + cd40 + dvl0', custom_model=log, print_results=False) tmle.missing_model('male + age0 + cd40 + dvl0', custom_model=log, print_results=False) tmle.outcome_model('art + male + age0 + cd40 + dvl0', custom_model=log, print_results=False) tmle.fit() # Testing RD match npt.assert_allclose(tmle.risk_difference, -0.090086, rtol=1e-5) npt.assert_allclose(tmle.risk_difference_ci, [-0.160371, -0.019801], rtol=1e-4) # Testing RR match npt.assert_allclose(tmle.risk_ratio, 0.507997, rtol=1e-5) npt.assert_allclose(tmle.risk_ratio_ci, [0.256495, 1.006108], rtol=1e-4) # Testing OR match npt.assert_allclose(tmle.odds_ratio, 0.457541, rtol=1e-5) npt.assert_allclose(tmle.odds_ratio_ci, [0.213980, 0.978331], rtol=1e-4)
def test_missing_binary_outcome(self, mf): r_rd = -0.08168098 r_rd_ci = -0.15163818, -0.01172378 r_rr = 0.5495056 r_rr_ci = 0.2893677, 1.0435042 r_or = 0.4996546 r_or_ci = 0.2435979, 1.0248642 tmle = TMLE(mf, exposure='art', outcome='dead') tmle.exposure_model( 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) tmle.outcome_model( 'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) tmle.missing_model( 'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) tmle.fit() npt.assert_allclose(tmle.risk_difference, r_rd) npt.assert_allclose(tmle.risk_difference_ci, r_rd_ci, rtol=1e-5) npt.assert_allclose(tmle.risk_ratio, r_rr) npt.assert_allclose(tmle.risk_ratio_ci, r_rr_ci, rtol=1e-5) npt.assert_allclose(tmle.odds_ratio, r_or) npt.assert_allclose(tmle.odds_ratio_ci, r_or_ci, rtol=1e-5)
def test_no_missing_data(self, df): tmle = TMLE(df, exposure='art', outcome='dead') tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) with pytest.raises(ValueError): tmle.missing_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)
def test_missing_continuous_outcome(self, mcf): r_ate = 211.8295 r_ci = 107.7552, 315.9038 tmle = TMLE(mcf, exposure='art', outcome='cd4_wk45') tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) tmle.missing_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) tmle.fit() npt.assert_allclose(tmle.average_treatment_effect, r_ate, rtol=1e-3) npt.assert_allclose(tmle.average_treatment_effect_ci, r_ci, rtol=1e-3)
# Outcome model aipw.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) # Calculating estimate aipw.fit() # Printing summary results aipw.summary() ############################# # TMLE from zepid.causal.doublyrobust import TMLE tmle = TMLE(df, exposure='art', outcome='dead') tmle.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False, bound=0.01) tmle.missing_model('art + male + age0 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) tmle.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) tmle.fit() tmle.summary() ############################# # Cross-fitting from sklearn.ensemble import RandomForestClassifier from zepid.superlearner import GLMSL, StepwiseSL, SuperLearner from zepid.causal.doublyrobust import SingleCrossfitTMLE # SuperLearner set-up labels = ["LogR", "Step.int", "RandFor"] candidates = [GLMSL(sm.families.family.Binomial()), StepwiseSL(sm.families.family.Binomial(), selection="forward", order_interaction=0),