Ejemplo n.º 1
0
 def test_continuous_outcome(self, continuous_data):
     g = TimeFixedGFormula(continuous_data,
                           exposure='A',
                           outcome='Y',
                           outcome_type='normal')
     g.outcome_model(model='A + W1 + W2 + W3', print_results=False)
     g.fit(treatment='all')
     npt.assert_allclose(g.marginal_outcome, -0.730375, rtol=1e-5)
Ejemplo n.º 2
0
 def test_warn_categorical_treatment(self, cat_data):
     g = TimeFixedGFormula(cat_data,
                           exposure=['A1', 'A2'],
                           exposure_type='categorical',
                           outcome='Y')
     g.outcome_model(model='A1 + A2', print_results=False)
     with pytest.warns(UserWarning):
         g.fit(treatment=['True', 'True'])
Ejemplo n.º 3
0
 def test_g_formula1(self, sim_t_fixed_data):
     g = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y')
     g.outcome_model(model='A + W1_sq + W2 + W3', print_results=False)
     g.fit(treatment='all')
     r1 = g.marginal_outcome
     g.fit(treatment='none')
     r0 = g.marginal_outcome
     npt.assert_allclose(r1 - r0, -0.075186, rtol=1e-2)
Ejemplo n.º 4
0
 def test_error_mismatch_categorical_treatment(self, cat_data):
     g = TimeFixedGFormula(cat_data,
                           exposure=['A1', 'A2'],
                           exposure_type='categorical',
                           outcome='Y')
     g.outcome_model(model='A1 + A2', print_results=False)
     with pytest.raises(ValueError):
         g.fit(treatment=['True', 'False', 'False'])
Ejemplo n.º 5
0
 def test_conditional_stochastic_warning(self):
     data = pd.DataFrame()
     data['A'] = [1]*50 + [0]*50
     data['L'] = [1]*25 + [0]*25 + [1]*40 + [0]*10
     data['Y'] = [1]*25 + [0]*25 + [1]*25 + [0]*25
     g = TimeFixedGFormula(data, exposure='A', outcome='Y')
     g.outcome_model(model='A + L + A:L', print_results=False)
     with pytest.warns(UserWarning):
         g.fit_stochastic(p=[1.0, 1.0], conditional=["(g['L']==1) | (g['L']==0)", "g['L']==0"])
Ejemplo n.º 6
0
 def test_weighted_data(self, data):
     g = TimeFixedGFormula(data, exposure='A', outcome='Y', weights='w')
     g.outcome_model(model='A + L + A:L', print_results=False)
     g.fit(treatment='all')
     r1 = g.marginal_outcome
     npt.assert_allclose(r1, 0.564286, rtol=1e-5)
     g.fit(treatment='none')
     r0 = g.marginal_outcome
     npt.assert_allclose(r0, 0.404286, rtol=1e-5)
Ejemplo n.º 7
0
 def test_categorical_treat(self, cat_data):
     g = TimeFixedGFormula(cat_data, exposure=['A1', 'A2'], exposure_type='categorical', outcome='Y')
     g.outcome_model(model='A1 + A2', print_results=False)
     g.fit(treatment=["False", "False"])
     npt.assert_allclose(g.marginal_outcome, 0.373091, rtol=1e-5)
     g.fit(treatment=["True", "False"])
     npt.assert_allclose(g.marginal_outcome, 0.8128, rtol=1e-5)
     g.fit(treatment=["False", "True"])
     npt.assert_allclose(g.marginal_outcome, 0.5025, rtol=1e-5)
Ejemplo n.º 8
0
 def test_g_formula2(self, data):
     g = TimeFixedGFormula(data, exposure='A', outcome='Y')
     g.outcome_model(model='A + L + A:L', print_results=False)
     g.fit(treatment='all')
     r1 = g.marginal_outcome
     npt.assert_allclose(r1, 0.575)
     g.fit(treatment='none')
     r0 = g.marginal_outcome
     npt.assert_allclose(r0, 0.415)
Ejemplo n.º 9
0
 def test_weight_stochastic_between_marginal(self, data):
     g = TimeFixedGFormula(data, exposure='A', outcome='Y', weights='w')
     g.outcome_model(model='A + L + A:L', print_results=False)
     g.fit(treatment='all')
     r1 = g.marginal_outcome
     g.fit(treatment='none')
     r0 = g.marginal_outcome
     g.fit_stochastic(p=0.5)
     r_star = g.marginal_outcome
     assert r0 < r_star < r1
Ejemplo n.º 10
0
 def test_directions_correct(self, sim_t_fixed_data):
     g = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y')
     g.outcome_model(model='A + W1_sq + W2 + W3', print_results=False)
     g.fit(treatment='all')
     r1 = g.marginal_outcome
     g.fit(treatment='none')
     r0 = g.marginal_outcome
     g.fit(treatment="g['W3'] > 2")
     rc = g.marginal_outcome
     assert r1 < rc < r0
Ejemplo n.º 11
0
    def test_iterative_for_single_t(self, sim_t_fixed_data):
        # Estimating sequential regression for single t
        gt = IterativeCondGFormula(sim_t_fixed_data, exposures=['A'], outcomes=['Y'])
        gt.outcome_model(['A + W1_sq + W2 + W3'], print_results=False)
        gt.fit(treatments=[1])

        # Estimating with TimeFixedGFormula
        gf = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y')
        gf.outcome_model(model='A + W1_sq + W2 + W3', print_results=False)
        gf.fit(treatment='all')

        # Expected behavior; same results between the estimation methods
        npt.assert_allclose(gf.marginal_outcome, gt.marginal_outcome)
Ejemplo n.º 12
0
    def test_monte_carlo_for_single_t(self, sim_t_fixed_data):
        # Estimating monte carlo for single t
        gt = MonteCarloGFormula(sim_t_fixed_data, idvar='id', exposure='A', outcome='Y', time_out='t', time_in='t0')
        gt.outcome_model('A + W1_sq + W2 + W3', print_results=False)
        gt.exposure_model('W1_sq', print_results=False)
        gt.fit(treatment="all", sample=1000000)  # Keep this a high number to reduce simulation errors
        print(gt.predicted_outcomes)

        # Estimating with TimeFixedGFormula
        gf = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y')
        gf.outcome_model(model='A + W1_sq + W2 + W3', print_results=False)
        gf.fit(treatment='all')

        # Expected behavior; same results between the estimation methods
        npt.assert_allclose(gf.marginal_outcome, np.mean(gt.predicted_outcomes['Y']), rtol=1e-3)
Ejemplo n.º 13
0
    def test_weight_stochastic_matches_marginal(self, data):
        g = TimeFixedGFormula(data, exposure='A', outcome='Y', weights='w')
        g.outcome_model(model='A + L + A:L', print_results=False)

        g.fit(treatment='all')
        rm = g.marginal_outcome
        g.fit_stochastic(p=1.0)
        rs = g.marginal_outcome
        npt.assert_allclose(rm, rs, rtol=1e-7)

        g.fit(treatment='none')
        rm = g.marginal_outcome
        g.fit_stochastic(p=0.0)
        rs = g.marginal_outcome
        npt.assert_allclose(rm, rs, rtol=1e-7)
Ejemplo n.º 14
0
    def test_conditional_weight_stochastic_matches_marginal(self, data):
        g = TimeFixedGFormula(data, exposure='A', outcome='Y', weights='w')
        g.outcome_model(model='A + L + A:L', print_results=False)

        g.fit(treatment='all')
        rm = g.marginal_outcome
        g.fit_stochastic(p=[1.0, 1.0], conditional=["g['L']==1", "g['L']==0"])
        rs = g.marginal_outcome
        npt.assert_allclose(rm, rs, rtol=1e-7)

        g.fit(treatment='none')
        rn = g.marginal_outcome
        g.fit_stochastic(p=[0.0, 0.0], conditional=["g['L']==1", "g['L']==0"])
        rs = g.marginal_outcome
        npt.assert_allclose(rn, rs, rtol=1e-7)
Ejemplo n.º 15
0
    def test_exposed_standardized1(self, data):
        g = TimeFixedGFormula(data, exposure='A', outcome='Y', standardize='exposed')
        g.outcome_model(model='A + L + A:L', print_results=False)
        g.fit(treatment='all')
        rm = g.marginal_outcome
        g.fit(treatment='none')
        rs = g.marginal_outcome
        npt.assert_allclose(rm - rs, 0.16, rtol=1e-5)
        npt.assert_allclose(rm / rs, 1.387097, rtol=1e-5)

        g = TimeFixedGFormula(data, exposure='A', outcome='Y', standardize='unexposed')
        g.outcome_model(model='A + L + A:L', print_results=False)
        g.fit(treatment='all')
        rm = g.marginal_outcome
        g.fit(treatment='none')
        rs = g.marginal_outcome
        npt.assert_allclose(rm - rs, 0.16, rtol=1e-5)
        npt.assert_allclose(rm / rs, 1.384615, rtol=1e-5)
Ejemplo n.º 16
0
    def test_sequential_regression_for_single_t(self, sim_t_fixed_data):
        # Estimating sequential regression for single t
        gt = TimeVaryGFormula(sim_t_fixed_data,
                              idvar='id',
                              exposure='A',
                              outcome='Y',
                              time_out='t',
                              method='SequentialRegression')
        gt.outcome_model('A + W1_sq + W2 + W3', print_results=False)
        gt.fit(treatment="all")

        # Estimating with TimeFixedGFormula
        gf = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y')
        gf.outcome_model(model='A + W1_sq + W2 + W3', print_results=False)
        gf.fit(treatment='all')

        # Expected behavior; same results between the estimation methods
        npt.assert_allclose(gf.marginal_outcome, gt.predicted_outcomes)
Ejemplo n.º 17
0
def causal_check():
    data = load_sample_data(False).drop(columns=['cd4_wk45'])
    data[['cd4_rs1', 'cd4_rs2']] = spline(data,
                                          'cd40',
                                          n_knots=3,
                                          term=2,
                                          restricted=True)
    data[['age_rs1', 'age_rs2']] = spline(data,
                                          'age0',
                                          n_knots=3,
                                          term=2,
                                          restricted=True)

    # Check TimeFixedGFormula diagnostics
    g = TimeFixedGFormula(data, exposure='art', outcome='dead')
    g.outcome_model(
        model=
        'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
    )
    g.run_diagnostics(decimal=3)

    # Check IPTW plots
    ipt = IPTW(data, treatment='art', outcome='dead')
    ipt.treatment_model(
        'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
        stabilized=True)
    ipt.marginal_structural_model('art')
    ipt.fit()
    ipt.plot_love()
    plt.tight_layout()
    plt.show()
    ipt.plot_kde()
    plt.show()
    ipt.plot_kde(measure='logit')
    plt.show()
    ipt.plot_boxplot()
    plt.show()
    ipt.plot_boxplot(measure='logit')
    plt.show()
    ipt.run_diagnostics()

    # Check AIPTW Diagnostics
    aipw = AIPTW(data, exposure='art', outcome='dead')
    aipw.exposure_model(
        'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0')
    aipw.outcome_model(
        'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
    )
    aipw.fit()
    aipw.run_diagnostics()
    aipw.plot_kde(to_plot='exposure')
    plt.show()
    aipw.plot_kde(to_plot='outcome')
    plt.show()
    aipw.plot_love()
    plt.show()

    # Check TMLE diagnostics
    tmle = TMLE(data, exposure='art', outcome='dead')
    tmle.exposure_model(
        'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0')
    tmle.outcome_model(
        'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
    )
    tmle.fit()
    tmle.run_diagnostics()
    tmle.plot_kde(to_plot='exposure')
    plt.show()
    tmle.plot_kde(to_plot='outcome')
    plt.show()
    tmle.plot_love()
    plt.show()

    # Check SurvivalGFormula plots
    df = load_sample_data(False).drop(columns=['cd4_wk45'])
    df['t'] = np.round(df['t']).astype(int)
    df = pd.DataFrame(np.repeat(df.values, df['t'], axis=0),
                      columns=df.columns)
    df['t'] = df.groupby('id')['t'].cumcount() + 1
    df.loc[((df['dead'] == 1) & (df['id'] != df['id'].shift(-1))), 'd'] = 1
    df['d'] = df['d'].fillna(0)
    df['t_sq'] = df['t']**2
    df['t_cu'] = df['t']**3
    sgf = SurvivalGFormula(df,
                           idvar='id',
                           exposure='art',
                           outcome='d',
                           time='t')
    sgf.outcome_model(
        model='art + male + age0 + cd40 + dvl0 + t + t_sq + t_cu')
    sgf.fit(treatment='all')
    sgf.plot()
    plt.show()
    sgf.plot(c='r', linewidth=3, alpha=0.8)
    plt.show()
Ejemplo n.º 18
0
#Crude Model
ze.RiskRatio(df, exposure='art', outcome='dead')
ze.RiskDiff(df, exposure='art', outcome='dead')
#Adjusted Model
model = 'art + male + age0 + cd40 + dvl0'
f = sm.families.family.Binomial(sm.families.links.identity)
linrisk = smf.glm('dead ~ ' + model, df, family=f).fit()
linrisk.summary()
f = sm.families.family.Binomial(sm.families.links.log)
log = smf.glm('dead ~ art', df, family=f).fit()
log.summary()
#g-formula
g = TimeFixedGFormula(df, exposure='art', outcome='dead')
g.outcome_model(
    model=
    'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0')
g.fit(treatment='all')
r_all = g.marginal_outcome
g.fit(treatment='none')
r_none = g.marginal_outcome
print('RD1 = ', r_all - r_none)
print('RR1 = ', r_all / r_none)
rd_results = []
rr_results = []
for i in range(500):
    dfs = df.sample(n=df.shape[0], replace=True)
    g = TimeFixedGFormula(dfs, exposure='art', outcome='dead')
    g.outcome_model(
        model=
        'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
Ejemplo n.º 19
0
 def test_custom_treatment(self, sim_t_fixed_data):
     g = TimeFixedGFormula(sim_t_fixed_data, exposure='A', outcome='Y')
     g.outcome_model(model='A + W1_sq + W2 + W3', print_results=False)
     g.fit(treatment="g['W3'] > 2")
     npt.assert_allclose(g.marginal_outcome, 0.682829, rtol=1e-5)
Ejemplo n.º 20
0
 def test_error_binary_exposure_list_treatments(self, data):
     g = TimeFixedGFormula(data, exposure='A', outcome='Y')
     g.outcome_model(model='A + L + A:L', print_results=False)
     with pytest.raises(ValueError):
         g.fit(treatment=['True', 'False'])
Ejemplo n.º 21
0
 def test_stochastic_conditional_probability(self, data):
     g = TimeFixedGFormula(data, exposure='A', outcome='Y')
     g.outcome_model(model='A + L + A:L', print_results=False)
     with pytest.raises(ValueError):
         g.fit_stochastic(p=[0.0], conditional=["g['L']==1", "g['L']==0"])
Ejemplo n.º 22
0
 def test_error_wrong_treatment_object(self, data):
     g = TimeFixedGFormula(data, exposure='A', outcome='Y')
     g.outcome_model(model='A + L + A:L', print_results=False)
     with pytest.raises(ValueError):
         g.fit(treatment=5)