def test_weight_stochastic_between_marginal(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y', weights='w') g.outcome_model(model='A + L + A:L', print_results=False) g.fit(treatment='all') r1 = g.marginal_outcome g.fit(treatment='none') r0 = g.marginal_outcome g.fit_stochastic(p=0.5) r_star = g.marginal_outcome assert r0 < r_star < r1
def test_iterative_for_single_t(self, sim_t_fixed_data): # Estimating sequential regression for single t gt = IterativeCondGFormula(sim_t_fixed_data, exposures=['A'], outcomes=['Y']) gt.outcome_model(['A + W1_sq + W2 + W3'], print_results=False) gt.fit(treatments=[1]) # Estimating with TimeFixedGFormula gf = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y') gf.outcome_model(model='A + W1_sq + W2 + W3', print_results=False) gf.fit(treatment='all') # Expected behavior; same results between the estimation methods npt.assert_allclose(gf.marginal_outcome, gt.marginal_outcome)
def test_monte_carlo_for_single_t(self, sim_t_fixed_data): # Estimating monte carlo for single t gt = MonteCarloGFormula(sim_t_fixed_data, idvar='id', exposure='A', outcome='Y', time_out='t', time_in='t0') gt.outcome_model('A + W1_sq + W2 + W3', print_results=False) gt.exposure_model('W1_sq', print_results=False) gt.fit(treatment="all", sample=1000000) # Keep this a high number to reduce simulation errors print(gt.predicted_outcomes) # Estimating with TimeFixedGFormula gf = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y') gf.outcome_model(model='A + W1_sq + W2 + W3', print_results=False) gf.fit(treatment='all') # Expected behavior; same results between the estimation methods npt.assert_allclose(gf.marginal_outcome, np.mean(gt.predicted_outcomes['Y']), rtol=1e-3)
def test_conditional_weight_stochastic_matches_marginal(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y', weights='w') g.outcome_model(model='A + L + A:L', print_results=False) g.fit(treatment='all') rm = g.marginal_outcome g.fit_stochastic(p=[1.0, 1.0], conditional=["g['L']==1", "g['L']==0"]) rs = g.marginal_outcome npt.assert_allclose(rm, rs, rtol=1e-7) g.fit(treatment='none') rn = g.marginal_outcome g.fit_stochastic(p=[0.0, 0.0], conditional=["g['L']==1", "g['L']==0"]) rs = g.marginal_outcome npt.assert_allclose(rn, rs, rtol=1e-7)
def test_weight_stochastic_matches_marginal(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y', weights='w') g.outcome_model(model='A + L + A:L', print_results=False) g.fit(treatment='all') rm = g.marginal_outcome g.fit_stochastic(p=1.0) rs = g.marginal_outcome npt.assert_allclose(rm, rs, rtol=1e-7) g.fit(treatment='none') rm = g.marginal_outcome g.fit_stochastic(p=0.0) rs = g.marginal_outcome npt.assert_allclose(rm, rs, rtol=1e-7)
def test_sequential_regression_for_single_t(self, sim_t_fixed_data): # Estimating sequential regression for single t gt = TimeVaryGFormula(sim_t_fixed_data, idvar='id', exposure='A', outcome='Y', time_out='t', method='SequentialRegression') gt.outcome_model('A + W1_sq + W2 + W3', print_results=False) gt.fit(treatment="all") # Estimating with TimeFixedGFormula gf = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y') gf.outcome_model(model='A + W1_sq + W2 + W3', print_results=False) gf.fit(treatment='all') # Expected behavior; same results between the estimation methods npt.assert_allclose(gf.marginal_outcome, gt.predicted_outcomes)
def test_categorical_treat(self, cat_data): g = TimeFixedGFormula(cat_data, exposure=['A1', 'A2'], exposure_type='categorical', outcome='Y') g.outcome_model(model='A1 + A2', print_results=False) g.fit(treatment=["False", "False"]) npt.assert_allclose(g.marginal_outcome, 0.373091, rtol=1e-5) g.fit(treatment=["True", "False"]) npt.assert_allclose(g.marginal_outcome, 0.8128, rtol=1e-5) g.fit(treatment=["False", "True"]) npt.assert_allclose(g.marginal_outcome, 0.5025, rtol=1e-5)
def test_directions_correct(self, sim_t_fixed_data): g = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y') g.outcome_model(model='A + W1_sq + W2 + W3', print_results=False) g.fit(treatment='all') r1 = g.marginal_outcome g.fit(treatment='none') r0 = g.marginal_outcome g.fit(treatment="g['W3'] > 2") rc = g.marginal_outcome assert r1 < rc < r0
def test_exposed_standardized1(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y', standardize='exposed') g.outcome_model(model='A + L + A:L', print_results=False) g.fit(treatment='all') rm = g.marginal_outcome g.fit(treatment='none') rs = g.marginal_outcome npt.assert_allclose(rm - rs, 0.16, rtol=1e-5) npt.assert_allclose(rm / rs, 1.387097, rtol=1e-5) g = TimeFixedGFormula(data, exposure='A', outcome='Y', standardize='unexposed') g.outcome_model(model='A + L + A:L', print_results=False) g.fit(treatment='all') rm = g.marginal_outcome g.fit(treatment='none') rs = g.marginal_outcome npt.assert_allclose(rm - rs, 0.16, rtol=1e-5) npt.assert_allclose(rm / rs, 1.384615, rtol=1e-5)
ze.RiskRatio(df, exposure='art', outcome='dead') ze.RiskDiff(df, exposure='art', outcome='dead') #Adjusted Model model = 'art + male + age0 + cd40 + dvl0' f = sm.families.family.Binomial(sm.families.links.identity) linrisk = smf.glm('dead ~ ' + model, df, family=f).fit() linrisk.summary() f = sm.families.family.Binomial(sm.families.links.log) log = smf.glm('dead ~ art', df, family=f).fit() log.summary() #g-formula g = TimeFixedGFormula(df, exposure='art', outcome='dead') g.outcome_model( model= 'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0') g.fit(treatment='all') r_all = g.marginal_outcome g.fit(treatment='none') r_none = g.marginal_outcome print('RD1 = ', r_all - r_none) print('RR1 = ', r_all / r_none) rd_results = [] rr_results = [] for i in range(500): dfs = df.sample(n=df.shape[0], replace=True) g = TimeFixedGFormula(dfs, exposure='art', outcome='dead') g.outcome_model( model= 'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0', print_model_results=False) g.fit(treatment='all')
def test_custom_treatment(self, sim_t_fixed_data): g = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y') g.outcome_model(model='A + W1_sq + W2 + W3', print_results=False) g.fit(treatment="g['W3'] > 2") npt.assert_allclose(g.marginal_outcome, 0.682829, rtol=1e-5)
def test_error_wrong_treatment_object(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y') g.outcome_model(model='A + L + A:L', print_results=False) with pytest.raises(ValueError): g.fit(treatment=5)
def test_warn_categorical_treatment(self, cat_data): g = TimeFixedGFormula(cat_data, exposure=['A1', 'A2'], exposure_type='categorical', outcome='Y') g.outcome_model(model='A1 + A2', print_results=False) with pytest.warns(UserWarning): g.fit(treatment=['True', 'True'])
def test_error_mismatch_categorical_treatment(self, cat_data): g = TimeFixedGFormula(cat_data, exposure=['A1', 'A2'], exposure_type='categorical', outcome='Y') g.outcome_model(model='A1 + A2', print_results=False) with pytest.raises(ValueError): g.fit(treatment=['True', 'False', 'False'])
def test_error_binary_exposure_list_treatments(self, data): g = TimeFixedGFormula(data, exposure='A', outcome='Y') g.outcome_model(model='A + L + A:L', print_results=False) with pytest.raises(ValueError): g.fit(treatment=['True', 'False'])
def test_continuous_outcome(self, continuous_data): g = TimeFixedGFormula(continuous_data, exposure='A', outcome='Y', outcome_type='normal') g.outcome_model(model='A + W1 + W2 + W3', print_results=False) g.fit(treatment='all') npt.assert_allclose(g.marginal_outcome, -0.730375, rtol=1e-5)