Python StochasticTMLE Examples, amonhen.StochasticTMLE Python Examples

Example #1

0

Show file

    def test_compare_tmle_continuous(self, cf):
        cf['cd4_wk45'] = np.log(cf['cd4_wk45'])
        stmle = StochasticTMLE(cf, exposure='art', outcome='cd4_wk45')
        stmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        stmle.outcome_model(
            'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        stmle.fit(p=1.0, samples=1)
        all_treat = stmle.marginal_outcome
        stmle.fit(p=0.0, samples=1)
        non_treat = stmle.marginal_outcome

        tmle = TMLE(cf, exposure='art', outcome='cd4_wk45')
        tmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
            print_results=False)
        tmle.outcome_model(
            'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
            print_results=False)
        tmle.fit()
        expected = tmle.average_treatment_effect

        npt.assert_allclose(expected, all_treat - non_treat, atol=1e-3)

Example #2

0

Show file

    def test_gmodel_bound(self, simple_df):
        # Comparing to SAS Poisson model
        sas_preds = [
            2, 1 / 0.55, 1 / 0.45, 1 / 0.55, 2, 2, 1 / 0.45, 1 / 0.55, 2
        ]

        stmle = StochasticTMLE(df=simple_df, exposure='A', outcome='C')
        stmle.exposure_model('W', bound=[0.45, 0.55])
        est_preds = 1 / stmle._denominator_

        npt.assert_allclose(sas_preds, est_preds, atol=1e-6)

Example #3

0

Show file

    def test_gmodel_params(self, simple_df):
        # Comparing to SAS Poisson model
        sas_preds = [
            2.0, 1.6666666667, 2.5, 1.6666666667, 2, 2, 2.5, 1.6666666667, 2
        ]

        stmle = StochasticTMLE(df=simple_df, exposure='A', outcome='C')
        stmle.exposure_model('W')
        est_preds = 1 / stmle._denominator_

        npt.assert_allclose(sas_preds, est_preds, atol=1e-6)

Example #4

0

Show file

    def test_error_p_oob(self, df):
        stmle = StochasticTMLE(df=df, exposure='art', outcome='dead')
        stmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        stmle.outcome_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        with pytest.raises(ValueError):
            stmle.fit(p=1.1)

        with pytest.raises(ValueError):
            stmle.fit(p=-0.1)

Example #5

0

Show file

    def test_error_p_cond_len(self, df):
        stmle = StochasticTMLE(df=df, exposure='art', outcome='dead')
        stmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        stmle.outcome_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        with pytest.raises(ValueError):
            stmle.fit(p=[0.1], conditional=["df['male']==1", "df['male']==0"])

        with pytest.raises(ValueError):
            stmle.fit(p=[0.1, 0.3], conditional=["df['male']==1"])

Example #6

0

Show file

    def test_calculate_epsilon2(self, cf):
        stmle = StochasticTMLE(cf, exposure='art', outcome='cd4_wk45')
        stmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        stmle.outcome_model(
            'art + male + age0 + age_rs1 + age_rs2 + dvl0  + cd40 + cd4_rs1 + cd4_rs2'
        )

        stmle.fit(p=0.15, samples=1)
        npt.assert_allclose(-0.0059476590, stmle.epsilon, atol=1e-6)

        stmle.fit(p=0.4, samples=1)
        npt.assert_allclose(-0.0154923643, stmle.epsilon, atol=1e-6)

Example #7

0

Show file

    def test_calculate_epsilon1(self, df):
        stmle = StochasticTMLE(df, exposure='art', outcome='dead')
        stmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        stmle.outcome_model(
            'art + male + age0 + age_rs1 + age_rs2 + dvl0  + cd40 + cd4_rs1 + cd4_rs2'
        )

        stmle.fit(p=0.15, samples=1)
        npt.assert_allclose(-0.0157043107, stmle.epsilon, atol=1e-6)

        stmle.fit(p=0.4, samples=1)
        npt.assert_allclose(-0.0381559025, stmle.epsilon, atol=1e-6)

Example #8

0

Show file

    def test_qmodel_params3(self, simple_df):
        # Comparing to SAS Poisson model
        sas_params = [-1.0478, 0.9371, -0.5321, -0.2733]
        sas_preds = [
            0.4000579, 0.2030253, 0.6811115, 0.3507092, 0.1567304, 0.20599265,
            0.6811115, 0.3507092, 0.3043857
        ]

        stmle = StochasticTMLE(df=simple_df, exposure='A', outcome='C')
        stmle.outcome_model('A + W + S', continuous_distribution='Poisson')
        est_params = stmle._outcome_model.params
        est_preds = stmle._Qinit_

        npt.assert_allclose(sas_params, est_params, atol=1e-4)
        npt.assert_allclose(sas_preds, est_preds, atol=1e-6)

Example #9

0

Show file

    def test_qmodel_params2(self, simple_df):
        # Comparing to SAS linear model
        sas_params = [0.3876, 0.3409, -0.2030, -0.0883]
        sas_preds = [
            0.437265, 0.210957, 0.6402345, 0.3876202, 0.0963188, 0.1846502,
            0.6402345, 0.38762016, 0.34893314
        ]

        stmle = StochasticTMLE(df=simple_df, exposure='A', outcome='C')
        stmle.outcome_model('A + W + S', continuous_distribution='normal')
        est_params = stmle._outcome_model.params
        est_preds = stmle._Qinit_

        npt.assert_allclose(sas_params, est_params, atol=1e-4)
        npt.assert_allclose(sas_preds, est_preds, atol=1e-6)

Example #10

0

Show file

    def test_qmodel_params(self, simple_df):
        # Comparing to SAS logit model
        sas_params = [-1.0699, -0.9525, 1.5462]
        sas_preds = [
            0.3831332, 0.2554221, 0.1168668, 0.2554221, 0.6168668, 0.6168668,
            0.1168668, 0.2554221, 0.3831332
        ]

        stmle = StochasticTMLE(df=simple_df, exposure='A', outcome='Y')
        stmle.outcome_model('A + W')
        est_params = stmle._outcome_model.params
        est_preds = stmle._Qinit_

        npt.assert_allclose(sas_params, est_params, atol=1e-4)
        npt.assert_allclose(sas_preds, est_preds, atol=1e-6)

Example #11

0

Show file

    def test_machine_learning_runs(self, df):
        # Only verifies that machine learning doesn't throw an error
        log = LogisticRegression(penalty='l1',
                                 solver='liblinear',
                                 random_state=201)

        tmle = StochasticTMLE(df, exposure='art', outcome='dead')
        tmle.exposure_model(
            'male + age0 + cd40 + cd4_rs1 + cd4_rs2 + dvl0 + male:dvl0',
            custom_model=log)
        tmle.outcome_model('art + male + age0 + dvl0  + cd40',
                           custom_model=log)
        tmle.fit(p=0.4, samples=20)

Example #12

0

Show file

    def test_compare_tmle_binary(self, df):
        stmle = StochasticTMLE(df, exposure='art', outcome='dead')
        stmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        stmle.outcome_model(
            'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0'
        )
        stmle.fit(p=1.0, samples=1)
        all_treat = stmle.marginal_outcome
        stmle.fit(p=0.0, samples=1)
        non_treat = stmle.marginal_outcome

        tmle = TMLE(df, exposure='art', outcome='dead')
        tmle.exposure_model(
            'male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
            print_results=False)
        tmle.outcome_model(
            'art + male + age0 + age_rs1 + age_rs2 + cd40 + cd4_rs1 + cd4_rs2 + dvl0',
            print_results=False)
        tmle.fit()
        expected = tmle.risk_difference

        npt.assert_allclose(expected, all_treat - non_treat, atol=1e-4)

Example #13

0

Show file

    def test_continuous_processing(self):
        a_list = [0, 1, 1, 0, 1, 1, 0, 0]
        y_list = [1, -1, 5, 0, 0, 0, 10, -5]
        df = pd.DataFrame()
        df['A'] = a_list
        df['Y'] = y_list

        stmle = StochasticTMLE(df=df,
                               exposure='A',
                               outcome='Y',
                               continuous_bound=0.0001)

        # Checking all flagged parts are correct
        assert stmle._continuous_outcome is True
        assert stmle._continuous_min == -5
        assert stmle._continuous_max == 10
        assert stmle._cb == 0.0001

        # Checking that TMLE bounding works as intended
        y_bound = [2 / 5, 4 / 15, 2 / 3, 1 / 3, 1 / 3, 1 / 3, 0.9999, 0.0001]
        pdt.assert_series_equal(pd.Series(y_bound),
                                stmle.df['Y'],
                                check_dtype=False,
                                check_names=False)

Example #14

0

Show file

 def test_error_continuous_exp(self, df):
     with pytest.raises(ValueError):
         StochasticTMLE(df=df, exposure='cd40', outcome='dead')

Example #15

0

Show file

 def test_drop_missing_data(self):
     df = pd.DataFrame()
     df['A'] = [1, 1, 0, 0, np.nan]
     df['Y'] = [np.nan, 0, 1, 0, 1]
     stmle = StochasticTMLE(df=df, exposure='A', outcome='Y')
     assert stmle.df.shape[0] == 3

Example #16

0

Show file

 def test_marginal_vector_length_stoch(self, df):
     stmle = StochasticTMLE(df=df, exposure='art', outcome='dead')
     stmle.exposure_model('male')
     stmle.outcome_model('art + male + age0')
     stmle.fit(p=0.4, samples=7)
     assert len(stmle.marginals_vector) == 7

Example #17

0

Show file

    ['ucl_' + str(p)
     for p in prop_treated] + ['var_' + str(p) for p in prop_treated])

########################################
# Running simulation
########################################
for i in range(n_mc):
    # Generating Data
    H = naloxone_dgm(network=G, restricted=restrict)
    df = network_to_df(H)
    results.loc[i, 'inc_' + exposure] = np.mean(df[exposure])
    results.loc[i, 'inc_' + outcome] = np.mean(df[outcome])

    if independent:
        # Stochastic TMLE
        stmle = StochasticTMLE(df, exposure=exposure, outcome=outcome)
        stmle.exposure_model(gi_model, bound=0.01)
        stmle.outcome_model(qi_model)
        for p in prop_treated:  # loops through all treatment plans
            try:
                if shift:
                    z = odds_to_probability(np.exp(log_odds + p))
                    stmle.fit(p=z)
                else:
                    stmle.fit(p=p)
                results.loc[i, 'bias_' +
                            str(p)] = stmle.marginal_outcome - truth[p]
                results.loc[i, 'var_' + str(p)] = stmle.conditional_se**2
                results.loc[i, 'lcl_' + str(p)] = stmle.conditional_ci[0]
                results.loc[i, 'ucl_' + str(p)] = stmle.conditional_ci[1]
            except:

Example #18

0

Show file

 def test_warn_missing_data(self):
     df = pd.DataFrame()
     df['A'] = [1, 1, 0, 0, np.nan]
     df['Y'] = [np.nan, 0, 1, 0, 1]
     with pytest.warns(UserWarning):
         StochasticTMLE(df=df, exposure='A', outcome='Y')