def df_iptw(df_c): dfs = df_c.loc[df_c['S'] == 1].copy() ipt = IPTW(dfs, treatment='A', outcome='Y') ipt.treatment_model('L', stabilized=True, print_results=False) dfs['iptw'] = ipt.iptw return pd.concat([dfs, df_c.loc[df_c['S'] == 0]], ignore_index=True, sort=False)
def test_match_r_stddiff(self): # Simulated data for variable detection and standardized differences df = pd.DataFrame() df['y'] = [1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0] df['treat'] = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0] df['bin'] = [0, 1, 0, np.nan, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1] df['con'] = [ 0.1, 0.0, 1.0, 1.1, 2.2, 1.3, 0.1, 0.5, 0.9, 0.5, 0.3, 0.2, 0.7, 0.9, 1.4 ] df['dis'] = [0, 1, 3, 2, 1, 0, 0, 0, 0, 0, 1, 3, 2, 2, 1] df['cat'] = [1, 2, 3, 1, 1, 2, 3, 1, 3, 2, 1, 2, 3, 2, 1] ipt = IPTW(df, treatment='treat', outcome='y') ipt.treatment_model(model_denominator='bin + con + dis + C(cat)', print_results=False) ipt.marginal_structural_model('treat') ipt.fit() smd = ipt.standardized_mean_differences() npt.assert_allclose(np.array(smd['smd_u']), np.array([0.342997, 0.0, 0.06668, -0.513553]), rtol=1e-4) # for unweighted # TODO need to find an R package or something that calculates weighted SMD # currently compares to my own calculations npt.assert_allclose(np.array(smd['smd_w']), np.array([0.206072, -0.148404, 0.035683, 0.085775]), rtol=1e-4) # for weighted
def test_stabilized_weights(self, data): ipt = IPTW(data, treatment='A', outcome='Y') ipt.treatment_model(model_denominator='L', print_results=False) ipt.marginal_structural_model('A') ipt.fit() npt.assert_allclose( ipt.iptw, [1.5, 1.5, 2 / 3, 2 / 3, 2 / 3, 3 / 4, 3 / 4, 3 / 4, 3 / 4, 2])
def test_positivity_calculator(self, data): ipt = IPTW(data, treatment='A', outcome='Y') ipt.treatment_model(model_denominator='L', print_results=False) ipt.marginal_structural_model('A') ipt.fit() ipt.positivity() npt.assert_allclose(ipt._pos_avg, 1) npt.assert_allclose(ipt._pos_sd, 0.456435, rtol=1e-5) npt.assert_allclose(ipt._pos_min, 2 / 3) npt.assert_allclose(ipt._pos_max, 2)
def test_iptw_w_censor2(self, cdata): iptw = IPTW(cdata, treatment='art', outcome='cd4_wk45') iptw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) iptw.missing_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) iptw.marginal_structural_model('art') iptw.fit() npt.assert_allclose(iptw.average_treatment_effect['ATE'][1], 205.11238, rtol=1e-5) npt.assert_allclose((iptw.average_treatment_effect['95%LCL'][1], iptw.average_treatment_effect['95%UCL'][1]), (96.88535, 313.33941), atol=1e-4, rtol=1e-4)
def test_iptw_w_censor(self, sdata): iptw = IPTW(sdata, treatment='art', outcome='dead') iptw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) iptw.missing_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False) iptw.marginal_structural_model('art') iptw.fit() npt.assert_allclose(iptw.risk_difference['RD'][1], -0.08092, rtol=1e-5) npt.assert_allclose((iptw.risk_difference['95%LCL'][1], iptw.risk_difference['95%UCL'][1]), (-0.15641, -0.00543), atol=1e-4, rtol=1e-4)
def test_match_sas_smr_u_stabilized(self, sdata): sas_rd = -0.080048197 sas_rd_ci = -0.153567335, -0.006529058 model = 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ipt = IPTW(sdata, treatment='art', outcome='dead', standardize='unexposed') ipt.treatment_model(model_denominator=model, print_results=False) ipt.marginal_structural_model('art') ipt.fit() npt.assert_allclose(ipt.risk_difference['RD'][1], sas_rd, rtol=1e-5) npt.assert_allclose((ipt.risk_difference['95%LCL'][1], ipt.risk_difference['95%UCL'][1]), sas_rd_ci, rtol=1e-4)
def test_match_sas_gbound3(self, sdata): sas_rd = -0.045129870 sas_rd_ci = -0.128184899, 0.037925158 model = 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ipt = IPTW(sdata, treatment='art', outcome='dead') ipt.treatment_model(model_denominator=model, print_results=False, bound=0.5) ipt.marginal_structural_model('art') ipt.fit() npt.assert_allclose(ipt.risk_difference['RD'][1], sas_rd, rtol=1e-5) npt.assert_allclose((ipt.risk_difference['95%LCL'][1], ipt.risk_difference['95%UCL'][1]), sas_rd_ci, atol=1e-4, rtol=1e-4)
def test_match_sas_gbound2(self, sdata): sas_rd = -0.050924398 sas_rd_ci = -0.133182382, 0.031333585 model = 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ipt = IPTW(sdata, treatment='art', outcome='dead') ipt.treatment_model(model_denominator=model, print_results=False, bound=[0.2, 0.9]) ipt.marginal_structural_model('art') ipt.fit() npt.assert_allclose(ipt.risk_difference['RD'][1], sas_rd, rtol=1e-5) npt.assert_allclose((ipt.risk_difference['95%LCL'][1], ipt.risk_difference['95%UCL'][1]), sas_rd_ci, atol=1e-4, rtol=1e-4)
def test_match_sas_smr_e(self, sdata): sas_w_sum = 158.288404 sas_rd = -0.090875986 sas_rd_ci = -0.180169444, -0.001582527 model = 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ipt = IPTW(sdata, treatment='art', outcome='dead', standardize='exposed') ipt.treatment_model(model_denominator=model, stabilized=False, print_results=False) ipt.marginal_structural_model('art') ipt.fit() npt.assert_allclose(np.sum(ipt.iptw), sas_w_sum, rtol=1e-4) npt.assert_allclose(ipt.risk_difference['RD'][1], sas_rd, rtol=1e-5) npt.assert_allclose((ipt.risk_difference['95%LCL'][1], ipt.risk_difference['95%UCL'][1]), sas_rd_ci, rtol=1e-4)
def test_match_sas_stabilized(self, sdata): sas_w_sum = 546.0858419 sas_rd = -0.081519085 sas_rd_ci = -0.156199938, -0.006838231 model = 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ipt = IPTW(sdata, treatment='art', outcome='dead') ipt.treatment_model(model_denominator=model, print_results=False) ipt.marginal_structural_model('art') ipt.fit() npt.assert_allclose(np.sum(ipt.iptw), sas_w_sum, rtol=1e-4) npt.assert_allclose(ipt.risk_difference['RD'][1], sas_rd, rtol=1e-5) npt.assert_allclose((ipt.risk_difference['95%LCL'][1], ipt.risk_difference['95%UCL'][1]), sas_rd_ci, rtol=1e-4)
def test_standardized_differences(self, sdata): ipt = IPTW(sdata, treatment='art', outcome='dead') ipt.treatment_model(model_denominator='male + age0 + cd40 + dvl0', print_results=False) ipt.marginal_structural_model('art') ipt.fit() smd = ipt.standardized_mean_differences() npt.assert_allclose(np.array(smd['smd_u']), np.array([-0.015684, 0.022311, -0.4867, -0.015729]), rtol=1e-4) # for unweighted # TODO find R package to test these weighted SMD's npt.assert_allclose(np.array(smd['smd_w']), np.array([-0.097789, -0.012395, -0.018591, 0.050719]), rtol=1e-4) # for weighted
def test_match_iptw_continuous(self, cdata): model = 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' cdata = cdata.dropna().copy() # Estimating Marginal Structural Model ipt = IPTW(cdata, treatment='art', outcome='cd4_wk45') ipt.treatment_model(model_denominator=model, stabilized=False, print_results=False) ipt.marginal_structural_model('art') ipt.fit() # Estimating 'Stochastic Treatment' sipw = StochasticIPTW(cdata, treatment='art', outcome='cd4_wk45') sipw.treatment_model(model=model, print_results=False) sipw.fit(p=1.0) r_all = sipw.marginal_outcome sipw.fit(p=0.0) r_non = sipw.marginal_outcome npt.assert_allclose(ipt.average_treatment_effect['ATE'][1], r_all - r_non, atol=1e-4)
def test_match_iptw(self, sdata): model = 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' sdata = sdata.dropna().copy() # Estimating Marginal Structural Model ipt = IPTW(sdata, treatment='art', outcome='dead') ipt.treatment_model(model_denominator=model, stabilized=False, print_results=False) ipt.marginal_structural_model('art') ipt.fit() # Estimating 'Stochastic Treatment' sipw = StochasticIPTW(sdata, treatment='art', outcome='dead') sipw.treatment_model(model=model, print_results=False) sipw.fit(p=1.0) r_all = sipw.marginal_outcome sipw.fit(p=0.0) r_non = sipw.marginal_outcome npt.assert_allclose(ipt.risk_difference['RD'][1], r_all - r_non, atol=1e-7)
def causal_check(): data = load_sample_data(False).drop(columns=['cd4_wk45']) data[['cd4_rs1', 'cd4_rs2']] = spline(data, 'cd40', n_knots=3, term=2, restricted=True) data[['age_rs1', 'age_rs2']] = spline(data, 'age0', n_knots=3, term=2, restricted=True) # Check TimeFixedGFormula diagnostics g = TimeFixedGFormula(data, exposure='art', outcome='dead') g.outcome_model( model= 'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ) g.run_diagnostics(decimal=3) # Check IPTW plots ipt = IPTW(data, treatment='art', outcome='dead') ipt.treatment_model( 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', stabilized=True) ipt.marginal_structural_model('art') ipt.fit() ipt.plot_love() plt.tight_layout() plt.show() ipt.plot_kde() plt.show() ipt.plot_kde(measure='logit') plt.show() ipt.plot_boxplot() plt.show() ipt.plot_boxplot(measure='logit') plt.show() ipt.run_diagnostics() # Check AIPTW Diagnostics aipw = AIPTW(data, exposure='art', outcome='dead') aipw.exposure_model( 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0') aipw.outcome_model( 'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ) aipw.fit() aipw.run_diagnostics() aipw.plot_kde(to_plot='exposure') plt.show() aipw.plot_kde(to_plot='outcome') plt.show() aipw.plot_love() plt.show() # Check TMLE diagnostics tmle = TMLE(data, exposure='art', outcome='dead') tmle.exposure_model( 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0') tmle.outcome_model( 'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ) tmle.fit() tmle.run_diagnostics() tmle.plot_kde(to_plot='exposure') plt.show() tmle.plot_kde(to_plot='outcome') plt.show() tmle.plot_love() plt.show() # Check SurvivalGFormula plots df = load_sample_data(False).drop(columns=['cd4_wk45']) df['t'] = np.round(df['t']).astype(int) df = pd.DataFrame(np.repeat(df.values, df['t'], axis=0), columns=df.columns) df['t'] = df.groupby('id')['t'].cumcount() + 1 df.loc[((df['dead'] == 1) & (df['id'] != df['id'].shift(-1))), 'd'] = 1 df['d'] = df['d'].fillna(0) df['t_sq'] = df['t']**2 df['t_cu'] = df['t']**3 sgf = SurvivalGFormula(df, idvar='id', exposure='art', outcome='d', time='t') sgf.outcome_model( model='art + male + age0 + cd40 + dvl0 + t + t_sq + t_cu') sgf.fit(treatment='all') sgf.plot() plt.show() sgf.plot(c='r', linewidth=3, alpha=0.8) plt.show()
g.fit(treatment='all') r_all = g.marginal_outcome g.fit(treatment='none') r_none = g.marginal_outcome rd_results.append(r_all - r_none) se = np.std(rd_results) print('95% LCL', riskd - 1.96*se) print('95% UCL', riskd + 1.96*se) ############################# # IPTW from zepid.causal.ipw import IPTW iptw = IPTW(df, treatment='art', outcome='dead') iptw.treatment_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', bound=0.01, print_results=False) iptw.marginal_structural_model('art') iptw.fit() iptw.summary() ############################# # AIPTW from zepid.causal.doublyrobust import AIPTW aipw = AIPTW(df, exposure='art', outcome='dead') # Treatment model aipw.exposure_model('male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False, bound=0.01) # Outcome model aipw.outcome_model('art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_results=False)