Python Mediation Examples, statsmodels.stats.mediation.Mediation Python Examples

Example #1

0

Show file

File: test_mediation.py Project: bashtage/statsmodels

def test_framing_example_moderator():
    # moderation without formulas, generally not useful but test anyway

    cur_dir = os.path.dirname(os.path.abspath(__file__))
    data = pd.read_csv(os.path.join(cur_dir, 'results', "framing.csv"))

    outcome = np.asarray(data["cong_mesg"])
    outcome_exog = patsy.dmatrix("emo + treat + age + educ + gender + income", data,
                                  return_type='dataframe')
    probit = sm.families.links.probit
    outcome_model = sm.GLM(outcome, outcome_exog, family=sm.families.Binomial(link=probit()))

    mediator = np.asarray(data["emo"])
    mediator_exog = patsy.dmatrix("treat + age + educ + gender + income", data,
                                 return_type='dataframe')
    mediator_model = sm.OLS(mediator, mediator_exog)

    tx_pos = [outcome_exog.columns.tolist().index("treat"),
              mediator_exog.columns.tolist().index("treat")]
    med_pos = outcome_exog.columns.tolist().index("emo")

    ix = (outcome_exog.columns.tolist().index("age"),
          mediator_exog.columns.tolist().index("age"))
    moderators = {ix : 20}
    med = Mediation(outcome_model, mediator_model, tx_pos, med_pos,
                    moderators=moderators)

    # Just a smoke test
    np.random.seed(4231)
    med_rslt = med.fit(method='parametric', n_rep=100)

Example #2

0

Show file

File: test_mediation.py Project: sunnyweilai/Finding-Theme-Color-Palettes

def test_framing_example_moderator():
    # moderation without formulas, generally not useful but test anyway

    cur_dir = os.path.dirname(os.path.abspath(__file__))
    data = pd.read_csv(os.path.join(cur_dir, '#1lab_results', "framing.csv"))

    outcome = np.asarray(data["cong_mesg"])
    outcome_exog = patsy.dmatrix("emo + treat + age + educ + gender + income", data,
                                  return_type='dataframe')
    probit = sm.families.links.probit
    outcome_model = sm.GLM(outcome, outcome_exog, family=sm.families.Binomial(link=probit))

    mediator = np.asarray(data["emo"])
    mediator_exog = patsy.dmatrix("treat + age + educ + gender + income", data,
                                 return_type='dataframe')
    mediator_model = sm.OLS(mediator, mediator_exog)

    tx_pos = [outcome_exog.columns.tolist().index("treat"),
              mediator_exog.columns.tolist().index("treat")]
    med_pos = outcome_exog.columns.tolist().index("emo")

    ix = (outcome_exog.columns.tolist().index("age"),
          mediator_exog.columns.tolist().index("age"))
    moderators = {ix : 20}
    med = Mediation(outcome_model, mediator_model, tx_pos, med_pos,
                    moderators=moderators)

    # Just a smoke test
    np.random.seed(4231)
    med_rslt = med.fit(method='parametric', n_rep=100)

Example #3

0

Show file

File: test_mediation.py Project: bashtage/statsmodels

def test_framing_example():

    cur_dir = os.path.dirname(os.path.abspath(__file__))
    data = pd.read_csv(os.path.join(cur_dir, 'results', "framing.csv"))

    outcome = np.asarray(data["cong_mesg"])
    outcome_exog = patsy.dmatrix("emo + treat + age + educ + gender + income", data,
                                  return_type='dataframe')
    probit = sm.families.links.probit
    outcome_model = sm.GLM(outcome, outcome_exog, family=sm.families.Binomial(link=probit()))

    mediator = np.asarray(data["emo"])
    mediator_exog = patsy.dmatrix("treat + age + educ + gender + income", data,
                                 return_type='dataframe')
    mediator_model = sm.OLS(mediator, mediator_exog)

    tx_pos = [outcome_exog.columns.tolist().index("treat"),
              mediator_exog.columns.tolist().index("treat")]
    med_pos = outcome_exog.columns.tolist().index("emo")

    med = Mediation(outcome_model, mediator_model, tx_pos, med_pos,
                    outcome_fit_kwargs={'atol':1e-11})

    np.random.seed(4231)
    para_rslt = med.fit(method='parametric', n_rep=100)
    diff = np.asarray(para_rslt.summary() - framing_para_4231)
    assert_allclose(diff, 0, atol=1e-6)

    np.random.seed(4231)
    boot_rslt = med.fit(method='boot', n_rep=100)
    diff = np.asarray(boot_rslt.summary() - framing_boot_4231)
    assert_allclose(diff, 0, atol=1e-6)

Example #4

0

Show file

File: test_mediation.py Project: PaulGureghian1/Statsmodels

def test_framing_example_moderator_formula():

    cur_dir = os.path.dirname(os.path.abspath(__file__))
    data = pd.read_csv(os.path.join(cur_dir, 'results', "framing.csv"))

    probit = sm.families.links.probit
    outcome_model = sm.GLM.from_formula(
        "cong_mesg ~ emo + treat*age + emo*age + educ + gender + income",
        data,
        family=sm.families.Binomial(link=probit()))

    mediator_model = sm.OLS.from_formula(
        "emo ~ treat*age + educ + gender + income", data)

    moderators = {"age": 20}
    med = Mediation(outcome_model,
                    mediator_model,
                    "treat",
                    "emo",
                    moderators=moderators)

    np.random.seed(4231)
    med_rslt = med.fit(method='parametric', n_rep=100)
    diff = np.asarray(med_rslt.summary() - framing_moderated_4231)
    assert_allclose(diff, 0, atol=1e-6)

Example #5

0

Show file

File: test_mediation.py Project: sunnyweilai/Finding-Theme-Color-Palettes

def test_framing_example():

    cur_dir = os.path.dirname(os.path.abspath(__file__))
    data = pd.read_csv(os.path.join(cur_dir, '#1lab_results', "framing.csv"))

    outcome = np.asarray(data["cong_mesg"])
    outcome_exog = patsy.dmatrix("emo + treat + age + educ + gender + income", data,
                                  return_type='dataframe')
    probit = sm.families.links.probit
    outcome_model = sm.GLM(outcome, outcome_exog, family=sm.families.Binomial(link=probit))

    mediator = np.asarray(data["emo"])
    mediator_exog = patsy.dmatrix("treat + age + educ + gender + income", data,
                                 return_type='dataframe')
    mediator_model = sm.OLS(mediator, mediator_exog)

    tx_pos = [outcome_exog.columns.tolist().index("treat"),
              mediator_exog.columns.tolist().index("treat")]
    med_pos = outcome_exog.columns.tolist().index("emo")

    med = Mediation(outcome_model, mediator_model, tx_pos, med_pos,
                    outcome_fit_kwargs={'atol':1e-11})

    np.random.seed(4231)
    para_rslt = med.fit(method='parametric', n_rep=100)
    diff = np.asarray(para_rslt.summary() - framing_para_4231)
    assert_allclose(diff, 0, atol=1e-6)

    np.random.seed(4231)
    boot_rslt = med.fit(method='boot', n_rep=100)
    diff = np.asarray(boot_rslt.summary() - framing_boot_4231)
    assert_allclose(diff, 0, atol=1e-6)

Example #6

0

Show file

File: test_mediation.py Project: mussabota/time-series-analysis

def test_mixedlm():

    np.random.seed(3424)

    n = 200

    # The exposure (not time varying)
    x = np.random.normal(size=n)
    xv = np.outer(x, np.ones(3))

    # The mediator (with random intercept)
    mx = np.asarray([4., 4, 1])
    mx /= np.sqrt(np.sum(mx**2))
    med = mx[0] * np.outer(x, np.ones(3))
    med += mx[1] * np.outer(np.random.normal(size=n), np.ones(3))
    med += mx[2] * np.random.normal(size=(n, 3))

    # The outcome (exposure and mediator effects)
    ey = np.outer(x, np.r_[0, 0.5, 1]) + med

    # Random structure of the outcome (random intercept and slope)
    ex = np.asarray([5., 2, 2])
    ex /= np.sqrt(np.sum(ex**2))
    e = ex[0] * np.outer(np.random.normal(size=n), np.ones(3))
    e += ex[1] * np.outer(np.random.normal(size=n), np.r_[-1, 0, 1])
    e += ex[2] * np.random.normal(size=(n, 3))
    y = ey + e

    # Group membership
    idx = np.outer(np.arange(n), np.ones(3))

    # Time
    tim = np.outer(np.ones(n), np.r_[-1, 0, 1])

    df = pd.DataFrame({
        "y": y.flatten(),
        "x": xv.flatten(),
        "id": idx.flatten(),
        "time": tim.flatten(),
        "med": med.flatten()
    })

    mediator_model = sm.MixedLM.from_formula("med ~ x", groups="id", data=df)
    outcome_model = sm.MixedLM.from_formula("y ~ med + x",
                                            groups="id",
                                            data=df)
    me = Mediation(outcome_model, mediator_model, "x", "med")
    mr = me.fit(n_rep=2)
    st = mr.summary()
    pm = st.loc["Prop. mediated (average)", "Estimate"]
    assert_allclose(pm, 0.52, rtol=1e-2, atol=1e-2)

Example #7

0

Show file

File: test_mediation.py Project: bashtage/statsmodels

def test_mixedlm():

    np.random.seed(3424)

    n = 200

    # The exposure (not time varying)
    x = np.random.normal(size=n)
    xv = np.outer(x, np.ones(3))

    # The mediator (with random intercept)
    mx = np.asarray([4., 4, 1])
    mx /= np.sqrt(np.sum(mx**2))
    med = mx[0] * np.outer(x, np.ones(3))
    med += mx[1] * np.outer(np.random.normal(size=n), np.ones(3))
    med += mx[2] * np.random.normal(size=(n, 3))

    # The outcome (exposure and mediator effects)
    ey = np.outer(x, np.r_[0, 0.5, 1]) + med

    # Random structure of the outcome (random intercept and slope)
    ex = np.asarray([5., 2, 2])
    ex /= np.sqrt(np.sum(ex**2))
    e = ex[0] * np.outer(np.random.normal(size=n), np.ones(3))
    e += ex[1] * np.outer(np.random.normal(size=n), np.r_[-1, 0, 1])
    e += ex[2] * np.random.normal(size=(n, 3))
    y = ey + e

    # Group membership
    idx = np.outer(np.arange(n), np.ones(3))

    # Time
    tim = np.outer(np.ones(n), np.r_[-1, 0, 1])

    df = pd.DataFrame({"y": y.flatten(), "x": xv.flatten(),
                       "id": idx.flatten(), "time": tim.flatten(),
                       "med": med.flatten()})

    mediator_model = sm.MixedLM.from_formula("med ~ x", groups="id", data=df)
    outcome_model = sm.MixedLM.from_formula("y ~ med + x", groups="id", data=df)
    me = Mediation(outcome_model, mediator_model, "x", "med")
    mr = me.fit(n_rep=2)
    st = mr.summary()
    pm = st.loc["Prop. mediated (average)", "Estimate"]
    assert_allclose(pm, 0.52, rtol=1e-2, atol=1e-2)

Example #8

0

Show file

File: test_mediation.py Project: bashtage/statsmodels

def test_framing_example_moderator_formula():

    cur_dir = os.path.dirname(os.path.abspath(__file__))
    data = pd.read_csv(os.path.join(cur_dir, 'results', "framing.csv"))

    probit = sm.families.links.probit
    outcome_model = sm.GLM.from_formula("cong_mesg ~ emo + treat*age + emo*age + educ + gender + income",
                                        data, family=sm.families.Binomial(link=probit()))

    mediator_model = sm.OLS.from_formula("emo ~ treat*age + educ + gender + income", data)

    moderators = {"age" : 20}
    med = Mediation(outcome_model, mediator_model, "treat", "emo",
                    moderators=moderators)

    np.random.seed(4231)
    med_rslt = med.fit(method='parametric', n_rep=100)
    diff = np.asarray(med_rslt.summary() - framing_moderated_4231)
    assert_allclose(diff, 0, atol=1e-6)

Example #9

0

Show file

    def run_mediation(self, predict_col_categorical=False):
        if predict_col_categorical:
            probit = links.probit
            outcome_model = sm.GLM.from_formula(
                "cont1 ~ rdt1 + jelt1",
                self.data.X,
                family=sm.families.Binomial(link=probit()))
        else:
            outcome_model = sm.GLM.from_formula("cont1 ~ rdt1 + jelt1",
                                                self.data.X)

        mediator_model = sm.OLS.from_formula("rdt1 ~ jelt1", self.data.X)
        med = Mediation(outcome_model,
                        mediator_model,
                        "jelt1",
                        mediator="rdt1").fit()
        with pd.option_context('display.max_rows', None, 'display.max_columns',
                               None):
            print(med.summary())

Example #10

0

Show file

File: mediation_survival.py Project: timgates42/statsmodels

def run(otype):

    mtime0, mtime, mstatus = gen_mediator()
    ytime, ystatus = gen_outcome(otype, mtime0)
    df = build_df(ytime, ystatus, mtime0, mtime, mstatus)

    outcome_model = sm.PHReg.from_formula("ytime ~ exp + mtime",
                                          status="ystatus",
                                          data=df)
    mediator_model = sm.PHReg.from_formula("mtime ~ exp",
                                           status="mstatus",
                                           data=df)

    med = Mediation(
        outcome_model,
        mediator_model,
        "exp",
        "mtime",
        outcome_predict_kwargs={"pred_only": True},
    )
    med_result = med.fit(n_rep=20)
    print(med_result.summary())

Example #11

0

Show file

File: post-stats.py Project: NBCLab/IDConn

df_f.rename({'Full Scale IQ_2': 'IQ', 'le left central executive phy': 'le-rCEN'}, axis=1, inplace=True)
df_f.rename({'Full Scale IQ_2': 'IQ', 'le-rCEN': 'le_rCEN'}, axis=1, inplace=True)
df_f.rename({('2', 'GID Post'): 'GIDPost'}, axis=1, inplace=True)

big_df.rename({('2', 'GID Post'): 'GIDPost'}, axis=1, inplace=True)


import statsmodels.api as sm
from statsmodels.stats.mediation import Mediation, MediationResults

outcome_model = sm.GLM.from_formula("Phy48Grade ~ le_rCEN + IQ",

                                     no_na_m)
mediator_model = sm.OLS.from_formula("IQ ~ le_rCEN", no_na_m)

med = Mediation(outcome_model, mediator_model, "le_rCEN", "IQ").fit()
med.summary(alpha=0.01)

outcome_model = sm.GLM.from_formula("Phy48Grade ~ le_rCEN + GIDPost",
                                     no_na_m)
mediator_model = sm.OLS.from_formula("GIDPost ~ le_rCEN", no_na_m)
med = Mediation(outcome_model, mediator_model, "le_rCEN", "GIDPost").fit()
med.summary(alpha=0.01)

#average causal mediation effect (ACME) = a*b = c - c'
#average direct effect (ADE) = c'
#total effect = a*b + c' = c

df_f['HcDMN_phy_minus_gen'] = df_f['fc hippo-default mode phy'] - df_f['fc hippo-default mode gen']

spearmanr(df_f['HcDMN_phy_minus_gen'], df_f['GIDPost'], nan_policy='omit')

Example #12

0

Show file

def test_surv():

    np.random.seed(2341)

    n = 1000

    # Generate exposures
    exp = np.random.normal(size=n)

    # Generate mediators
    mn = np.exp(exp)
    mtime0 = -mn * np.log(np.random.uniform(size=n))
    ctime = -2 * mn * np.log(np.random.uniform(size=n))
    mstatus = (ctime >= mtime0).astype(np.int)
    mtime = np.where(mtime0 <= ctime, mtime0, ctime)

    for mt in "full", "partial", "no":

        # Outcome
        if mt == "full":
            lp = 0.5 * mtime0
        elif mt == "partial":
            lp = exp + mtime0
        else:
            lp = exp

        # Generate outcomes
        mn = np.exp(-lp)
        ytime0 = -mn * np.log(np.random.uniform(size=n))
        ctime = -2 * mn * np.log(np.random.uniform(size=n))
        ystatus = (ctime >= ytime0).astype(np.int)
        ytime = np.where(ytime0 <= ctime, ytime0, ctime)

        df = pd.DataFrame({
            "ytime": ytime,
            "ystatus": ystatus,
            "mtime": mtime,
            "mstatus": mstatus,
            "exp": exp
        })

        fml = "ytime ~ exp + mtime"
        outcome_model = sm.PHReg.from_formula(fml, status="ystatus", data=df)
        fml = "mtime ~ exp"
        mediator_model = sm.PHReg.from_formula(fml, status="mstatus", data=df)

        med = Mediation(outcome_model,
                        mediator_model,
                        "exp",
                        "mtime",
                        outcome_predict_kwargs={"pred_only": True},
                        outcome_fit_kwargs={"method": "lbfgs"},
                        mediator_fit_kwargs={"method": "lbfgs"})
        med_result = med.fit(n_rep=2)
        dr = med_result.summary()
        pm = dr.loc["Prop. mediated (average)", "Estimate"]
        if mt == "no":
            assert_allclose(pm, 0, atol=0.1, rtol=0.1)
        elif mt == "full":
            assert_allclose(pm, 1, atol=0.1, rtol=0.1)
        else:
            assert_allclose(pm, 0.5, atol=0.1, rtol=0.1)