def test_continuous_outcome(self, continuous_data): g = TimeFixedGFormula(continuous_data, exposure='A', outcome='Y', outcome_type='normal') g.outcome_model(model='A + W1 + W2 + W3', print_results=False) g.fit(treatment='all') npt.assert_allclose(g.marginal_outcome, -0.730375, rtol=1e-5)
def test_warn_categorical_treatment(self, cat_data): g = TimeFixedGFormula(cat_data, exposure=['A1', 'A2'], exposure_type='categorical', outcome='Y') g.outcome_model(model='A1 + A2', print_results=False) with pytest.warns(UserWarning): g.fit(treatment=['True', 'True'])
def test_g_formula1(self, sim_t_fixed_data): g = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y') g.outcome_model(model='A + W1_sq + W2 + W3', print_results=False) g.fit(treatment='all') r1 = g.marginal_outcome g.fit(treatment='none') r0 = g.marginal_outcome npt.assert_allclose(r1 - r0, -0.075186, rtol=1e-2)
def test_error_mismatch_categorical_treatment(self, cat_data): g = TimeFixedGFormula(cat_data, exposure=['A1', 'A2'], exposure_type='categorical', outcome='Y') g.outcome_model(model='A1 + A2', print_results=False) with pytest.raises(ValueError): g.fit(treatment=['True', 'False', 'False'])
def test_conditional_stochastic_warning(self): data = pd.DataFrame() data['A'] = [1]*50 + [0]*50 data['L'] = [1]*25 + [0]*25 + [1]*40 + [0]*10 data['Y'] = [1]*25 + [0]*25 + [1]*25 + [0]*25 g = TimeFixedGFormula(data, exposure='A', outcome='Y') g.outcome_model(model='A + L + A:L', print_results=False) with pytest.warns(UserWarning): g.fit_stochastic(p=[1.0, 1.0], conditional=["(g['L']==1) | (g['L']==0)", "g['L']==0"])
def test_weighted_data(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y', weights='w') g.outcome_model(model='A + L + A:L', print_results=False) g.fit(treatment='all') r1 = g.marginal_outcome npt.assert_allclose(r1, 0.564286, rtol=1e-5) g.fit(treatment='none') r0 = g.marginal_outcome npt.assert_allclose(r0, 0.404286, rtol=1e-5)
def test_categorical_treat(self, cat_data): g = TimeFixedGFormula(cat_data, exposure=['A1', 'A2'], exposure_type='categorical', outcome='Y') g.outcome_model(model='A1 + A2', print_results=False) g.fit(treatment=["False", "False"]) npt.assert_allclose(g.marginal_outcome, 0.373091, rtol=1e-5) g.fit(treatment=["True", "False"]) npt.assert_allclose(g.marginal_outcome, 0.8128, rtol=1e-5) g.fit(treatment=["False", "True"]) npt.assert_allclose(g.marginal_outcome, 0.5025, rtol=1e-5)
def test_g_formula2(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y') g.outcome_model(model='A + L + A:L', print_results=False) g.fit(treatment='all') r1 = g.marginal_outcome npt.assert_allclose(r1, 0.575) g.fit(treatment='none') r0 = g.marginal_outcome npt.assert_allclose(r0, 0.415)
def test_weight_stochastic_between_marginal(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y', weights='w') g.outcome_model(model='A + L + A:L', print_results=False) g.fit(treatment='all') r1 = g.marginal_outcome g.fit(treatment='none') r0 = g.marginal_outcome g.fit_stochastic(p=0.5) r_star = g.marginal_outcome assert r0 < r_star < r1
def test_directions_correct(self, sim_t_fixed_data): g = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y') g.outcome_model(model='A + W1_sq + W2 + W3', print_results=False) g.fit(treatment='all') r1 = g.marginal_outcome g.fit(treatment='none') r0 = g.marginal_outcome g.fit(treatment="g['W3'] > 2") rc = g.marginal_outcome assert r1 < rc < r0
def test_iterative_for_single_t(self, sim_t_fixed_data): # Estimating sequential regression for single t gt = IterativeCondGFormula(sim_t_fixed_data, exposures=['A'], outcomes=['Y']) gt.outcome_model(['A + W1_sq + W2 + W3'], print_results=False) gt.fit(treatments=[1]) # Estimating with TimeFixedGFormula gf = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y') gf.outcome_model(model='A + W1_sq + W2 + W3', print_results=False) gf.fit(treatment='all') # Expected behavior; same results between the estimation methods npt.assert_allclose(gf.marginal_outcome, gt.marginal_outcome)
def test_monte_carlo_for_single_t(self, sim_t_fixed_data): # Estimating monte carlo for single t gt = MonteCarloGFormula(sim_t_fixed_data, idvar='id', exposure='A', outcome='Y', time_out='t', time_in='t0') gt.outcome_model('A + W1_sq + W2 + W3', print_results=False) gt.exposure_model('W1_sq', print_results=False) gt.fit(treatment="all", sample=1000000) # Keep this a high number to reduce simulation errors print(gt.predicted_outcomes) # Estimating with TimeFixedGFormula gf = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y') gf.outcome_model(model='A + W1_sq + W2 + W3', print_results=False) gf.fit(treatment='all') # Expected behavior; same results between the estimation methods npt.assert_allclose(gf.marginal_outcome, np.mean(gt.predicted_outcomes['Y']), rtol=1e-3)
def test_weight_stochastic_matches_marginal(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y', weights='w') g.outcome_model(model='A + L + A:L', print_results=False) g.fit(treatment='all') rm = g.marginal_outcome g.fit_stochastic(p=1.0) rs = g.marginal_outcome npt.assert_allclose(rm, rs, rtol=1e-7) g.fit(treatment='none') rm = g.marginal_outcome g.fit_stochastic(p=0.0) rs = g.marginal_outcome npt.assert_allclose(rm, rs, rtol=1e-7)
def test_conditional_weight_stochastic_matches_marginal(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y', weights='w') g.outcome_model(model='A + L + A:L', print_results=False) g.fit(treatment='all') rm = g.marginal_outcome g.fit_stochastic(p=[1.0, 1.0], conditional=["g['L']==1", "g['L']==0"]) rs = g.marginal_outcome npt.assert_allclose(rm, rs, rtol=1e-7) g.fit(treatment='none') rn = g.marginal_outcome g.fit_stochastic(p=[0.0, 0.0], conditional=["g['L']==1", "g['L']==0"]) rs = g.marginal_outcome npt.assert_allclose(rn, rs, rtol=1e-7)
def test_exposed_standardized1(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y', standardize='exposed') g.outcome_model(model='A + L + A:L', print_results=False) g.fit(treatment='all') rm = g.marginal_outcome g.fit(treatment='none') rs = g.marginal_outcome npt.assert_allclose(rm - rs, 0.16, rtol=1e-5) npt.assert_allclose(rm / rs, 1.387097, rtol=1e-5) g = TimeFixedGFormula(data, exposure='A', outcome='Y', standardize='unexposed') g.outcome_model(model='A + L + A:L', print_results=False) g.fit(treatment='all') rm = g.marginal_outcome g.fit(treatment='none') rs = g.marginal_outcome npt.assert_allclose(rm - rs, 0.16, rtol=1e-5) npt.assert_allclose(rm / rs, 1.384615, rtol=1e-5)
def test_sequential_regression_for_single_t(self, sim_t_fixed_data): # Estimating sequential regression for single t gt = TimeVaryGFormula(sim_t_fixed_data, idvar='id', exposure='A', outcome='Y', time_out='t', method='SequentialRegression') gt.outcome_model('A + W1_sq + W2 + W3', print_results=False) gt.fit(treatment="all") # Estimating with TimeFixedGFormula gf = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y') gf.outcome_model(model='A + W1_sq + W2 + W3', print_results=False) gf.fit(treatment='all') # Expected behavior; same results between the estimation methods npt.assert_allclose(gf.marginal_outcome, gt.predicted_outcomes)
def causal_check(): data = load_sample_data(False).drop(columns=['cd4_wk45']) data[['cd4_rs1', 'cd4_rs2']] = spline(data, 'cd40', n_knots=3, term=2, restricted=True) data[['age_rs1', 'age_rs2']] = spline(data, 'age0', n_knots=3, term=2, restricted=True) # Check TimeFixedGFormula diagnostics g = TimeFixedGFormula(data, exposure='art', outcome='dead') g.outcome_model( model= 'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ) g.run_diagnostics(decimal=3) # Check IPTW plots ipt = IPTW(data, treatment='art', outcome='dead') ipt.treatment_model( 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', stabilized=True) ipt.marginal_structural_model('art') ipt.fit() ipt.plot_love() plt.tight_layout() plt.show() ipt.plot_kde() plt.show() ipt.plot_kde(measure='logit') plt.show() ipt.plot_boxplot() plt.show() ipt.plot_boxplot(measure='logit') plt.show() ipt.run_diagnostics() # Check AIPTW Diagnostics aipw = AIPTW(data, exposure='art', outcome='dead') aipw.exposure_model( 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0') aipw.outcome_model( 'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ) aipw.fit() aipw.run_diagnostics() aipw.plot_kde(to_plot='exposure') plt.show() aipw.plot_kde(to_plot='outcome') plt.show() aipw.plot_love() plt.show() # Check TMLE diagnostics tmle = TMLE(data, exposure='art', outcome='dead') tmle.exposure_model( 'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0') tmle.outcome_model( 'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0' ) tmle.fit() tmle.run_diagnostics() tmle.plot_kde(to_plot='exposure') plt.show() tmle.plot_kde(to_plot='outcome') plt.show() tmle.plot_love() plt.show() # Check SurvivalGFormula plots df = load_sample_data(False).drop(columns=['cd4_wk45']) df['t'] = np.round(df['t']).astype(int) df = pd.DataFrame(np.repeat(df.values, df['t'], axis=0), columns=df.columns) df['t'] = df.groupby('id')['t'].cumcount() + 1 df.loc[((df['dead'] == 1) & (df['id'] != df['id'].shift(-1))), 'd'] = 1 df['d'] = df['d'].fillna(0) df['t_sq'] = df['t']**2 df['t_cu'] = df['t']**3 sgf = SurvivalGFormula(df, idvar='id', exposure='art', outcome='d', time='t') sgf.outcome_model( model='art + male + age0 + cd40 + dvl0 + t + t_sq + t_cu') sgf.fit(treatment='all') sgf.plot() plt.show() sgf.plot(c='r', linewidth=3, alpha=0.8) plt.show()
#Crude Model ze.RiskRatio(df, exposure='art', outcome='dead') ze.RiskDiff(df, exposure='art', outcome='dead') #Adjusted Model model = 'art + male + age0 + cd40 + dvl0' f = sm.families.family.Binomial(sm.families.links.identity) linrisk = smf.glm('dead ~ ' + model, df, family=f).fit() linrisk.summary() f = sm.families.family.Binomial(sm.families.links.log) log = smf.glm('dead ~ art', df, family=f).fit() log.summary() #g-formula g = TimeFixedGFormula(df, exposure='art', outcome='dead') g.outcome_model( model= 'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0') g.fit(treatment='all') r_all = g.marginal_outcome g.fit(treatment='none') r_none = g.marginal_outcome print('RD1 = ', r_all - r_none) print('RR1 = ', r_all / r_none) rd_results = [] rr_results = [] for i in range(500): dfs = df.sample(n=df.shape[0], replace=True) g = TimeFixedGFormula(dfs, exposure='art', outcome='dead') g.outcome_model( model= 'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
def test_custom_treatment(self, sim_t_fixed_data): g = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y') g.outcome_model(model='A + W1_sq + W2 + W3', print_results=False) g.fit(treatment="g['W3'] > 2") npt.assert_allclose(g.marginal_outcome, 0.682829, rtol=1e-5)
def test_error_binary_exposure_list_treatments(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y') g.outcome_model(model='A + L + A:L', print_results=False) with pytest.raises(ValueError): g.fit(treatment=['True', 'False'])
def test_stochastic_conditional_probability(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y') g.outcome_model(model='A + L + A:L', print_results=False) with pytest.raises(ValueError): g.fit_stochastic(p=[0.0], conditional=["g['L']==1", "g['L']==0"])
def test_error_wrong_treatment_object(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y') g.outcome_model(model='A + L + A:L', print_results=False) with pytest.raises(ValueError): g.fit(treatment=5)