def test_framing_example_moderator(): # moderation without formulas, generally not useful but test anyway cur_dir = os.path.dirname(os.path.abspath(__file__)) data = pd.read_csv(os.path.join(cur_dir, 'results', "framing.csv")) outcome = np.asarray(data["cong_mesg"]) outcome_exog = patsy.dmatrix("emo + treat + age + educ + gender + income", data, return_type='dataframe') probit = sm.families.links.probit outcome_model = sm.GLM(outcome, outcome_exog, family=sm.families.Binomial(link=probit())) mediator = np.asarray(data["emo"]) mediator_exog = patsy.dmatrix("treat + age + educ + gender + income", data, return_type='dataframe') mediator_model = sm.OLS(mediator, mediator_exog) tx_pos = [outcome_exog.columns.tolist().index("treat"), mediator_exog.columns.tolist().index("treat")] med_pos = outcome_exog.columns.tolist().index("emo") ix = (outcome_exog.columns.tolist().index("age"), mediator_exog.columns.tolist().index("age")) moderators = {ix : 20} med = Mediation(outcome_model, mediator_model, tx_pos, med_pos, moderators=moderators) # Just a smoke test np.random.seed(4231) med_rslt = med.fit(method='parametric', n_rep=100)
def test_framing_example_moderator(): # moderation without formulas, generally not useful but test anyway cur_dir = os.path.dirname(os.path.abspath(__file__)) data = pd.read_csv(os.path.join(cur_dir, '#1lab_results', "framing.csv")) outcome = np.asarray(data["cong_mesg"]) outcome_exog = patsy.dmatrix("emo + treat + age + educ + gender + income", data, return_type='dataframe') probit = sm.families.links.probit outcome_model = sm.GLM(outcome, outcome_exog, family=sm.families.Binomial(link=probit)) mediator = np.asarray(data["emo"]) mediator_exog = patsy.dmatrix("treat + age + educ + gender + income", data, return_type='dataframe') mediator_model = sm.OLS(mediator, mediator_exog) tx_pos = [outcome_exog.columns.tolist().index("treat"), mediator_exog.columns.tolist().index("treat")] med_pos = outcome_exog.columns.tolist().index("emo") ix = (outcome_exog.columns.tolist().index("age"), mediator_exog.columns.tolist().index("age")) moderators = {ix : 20} med = Mediation(outcome_model, mediator_model, tx_pos, med_pos, moderators=moderators) # Just a smoke test np.random.seed(4231) med_rslt = med.fit(method='parametric', n_rep=100)
def test_framing_example(): cur_dir = os.path.dirname(os.path.abspath(__file__)) data = pd.read_csv(os.path.join(cur_dir, 'results', "framing.csv")) outcome = np.asarray(data["cong_mesg"]) outcome_exog = patsy.dmatrix("emo + treat + age + educ + gender + income", data, return_type='dataframe') probit = sm.families.links.probit outcome_model = sm.GLM(outcome, outcome_exog, family=sm.families.Binomial(link=probit())) mediator = np.asarray(data["emo"]) mediator_exog = patsy.dmatrix("treat + age + educ + gender + income", data, return_type='dataframe') mediator_model = sm.OLS(mediator, mediator_exog) tx_pos = [outcome_exog.columns.tolist().index("treat"), mediator_exog.columns.tolist().index("treat")] med_pos = outcome_exog.columns.tolist().index("emo") med = Mediation(outcome_model, mediator_model, tx_pos, med_pos, outcome_fit_kwargs={'atol':1e-11}) np.random.seed(4231) para_rslt = med.fit(method='parametric', n_rep=100) diff = np.asarray(para_rslt.summary() - framing_para_4231) assert_allclose(diff, 0, atol=1e-6) np.random.seed(4231) boot_rslt = med.fit(method='boot', n_rep=100) diff = np.asarray(boot_rslt.summary() - framing_boot_4231) assert_allclose(diff, 0, atol=1e-6)
def test_framing_example_moderator_formula(): cur_dir = os.path.dirname(os.path.abspath(__file__)) data = pd.read_csv(os.path.join(cur_dir, 'results', "framing.csv")) probit = sm.families.links.probit outcome_model = sm.GLM.from_formula( "cong_mesg ~ emo + treat*age + emo*age + educ + gender + income", data, family=sm.families.Binomial(link=probit())) mediator_model = sm.OLS.from_formula( "emo ~ treat*age + educ + gender + income", data) moderators = {"age": 20} med = Mediation(outcome_model, mediator_model, "treat", "emo", moderators=moderators) np.random.seed(4231) med_rslt = med.fit(method='parametric', n_rep=100) diff = np.asarray(med_rslt.summary() - framing_moderated_4231) assert_allclose(diff, 0, atol=1e-6)
def test_framing_example(): cur_dir = os.path.dirname(os.path.abspath(__file__)) data = pd.read_csv(os.path.join(cur_dir, '#1lab_results', "framing.csv")) outcome = np.asarray(data["cong_mesg"]) outcome_exog = patsy.dmatrix("emo + treat + age + educ + gender + income", data, return_type='dataframe') probit = sm.families.links.probit outcome_model = sm.GLM(outcome, outcome_exog, family=sm.families.Binomial(link=probit)) mediator = np.asarray(data["emo"]) mediator_exog = patsy.dmatrix("treat + age + educ + gender + income", data, return_type='dataframe') mediator_model = sm.OLS(mediator, mediator_exog) tx_pos = [outcome_exog.columns.tolist().index("treat"), mediator_exog.columns.tolist().index("treat")] med_pos = outcome_exog.columns.tolist().index("emo") med = Mediation(outcome_model, mediator_model, tx_pos, med_pos, outcome_fit_kwargs={'atol':1e-11}) np.random.seed(4231) para_rslt = med.fit(method='parametric', n_rep=100) diff = np.asarray(para_rslt.summary() - framing_para_4231) assert_allclose(diff, 0, atol=1e-6) np.random.seed(4231) boot_rslt = med.fit(method='boot', n_rep=100) diff = np.asarray(boot_rslt.summary() - framing_boot_4231) assert_allclose(diff, 0, atol=1e-6)
def test_mixedlm(): np.random.seed(3424) n = 200 # The exposure (not time varying) x = np.random.normal(size=n) xv = np.outer(x, np.ones(3)) # The mediator (with random intercept) mx = np.asarray([4., 4, 1]) mx /= np.sqrt(np.sum(mx**2)) med = mx[0] * np.outer(x, np.ones(3)) med += mx[1] * np.outer(np.random.normal(size=n), np.ones(3)) med += mx[2] * np.random.normal(size=(n, 3)) # The outcome (exposure and mediator effects) ey = np.outer(x, np.r_[0, 0.5, 1]) + med # Random structure of the outcome (random intercept and slope) ex = np.asarray([5., 2, 2]) ex /= np.sqrt(np.sum(ex**2)) e = ex[0] * np.outer(np.random.normal(size=n), np.ones(3)) e += ex[1] * np.outer(np.random.normal(size=n), np.r_[-1, 0, 1]) e += ex[2] * np.random.normal(size=(n, 3)) y = ey + e # Group membership idx = np.outer(np.arange(n), np.ones(3)) # Time tim = np.outer(np.ones(n), np.r_[-1, 0, 1]) df = pd.DataFrame({ "y": y.flatten(), "x": xv.flatten(), "id": idx.flatten(), "time": tim.flatten(), "med": med.flatten() }) mediator_model = sm.MixedLM.from_formula("med ~ x", groups="id", data=df) outcome_model = sm.MixedLM.from_formula("y ~ med + x", groups="id", data=df) me = Mediation(outcome_model, mediator_model, "x", "med") mr = me.fit(n_rep=2) st = mr.summary() pm = st.loc["Prop. mediated (average)", "Estimate"] assert_allclose(pm, 0.52, rtol=1e-2, atol=1e-2)
def test_mixedlm(): np.random.seed(3424) n = 200 # The exposure (not time varying) x = np.random.normal(size=n) xv = np.outer(x, np.ones(3)) # The mediator (with random intercept) mx = np.asarray([4., 4, 1]) mx /= np.sqrt(np.sum(mx**2)) med = mx[0] * np.outer(x, np.ones(3)) med += mx[1] * np.outer(np.random.normal(size=n), np.ones(3)) med += mx[2] * np.random.normal(size=(n, 3)) # The outcome (exposure and mediator effects) ey = np.outer(x, np.r_[0, 0.5, 1]) + med # Random structure of the outcome (random intercept and slope) ex = np.asarray([5., 2, 2]) ex /= np.sqrt(np.sum(ex**2)) e = ex[0] * np.outer(np.random.normal(size=n), np.ones(3)) e += ex[1] * np.outer(np.random.normal(size=n), np.r_[-1, 0, 1]) e += ex[2] * np.random.normal(size=(n, 3)) y = ey + e # Group membership idx = np.outer(np.arange(n), np.ones(3)) # Time tim = np.outer(np.ones(n), np.r_[-1, 0, 1]) df = pd.DataFrame({"y": y.flatten(), "x": xv.flatten(), "id": idx.flatten(), "time": tim.flatten(), "med": med.flatten()}) mediator_model = sm.MixedLM.from_formula("med ~ x", groups="id", data=df) outcome_model = sm.MixedLM.from_formula("y ~ med + x", groups="id", data=df) me = Mediation(outcome_model, mediator_model, "x", "med") mr = me.fit(n_rep=2) st = mr.summary() pm = st.loc["Prop. mediated (average)", "Estimate"] assert_allclose(pm, 0.52, rtol=1e-2, atol=1e-2)
def test_framing_example_moderator_formula(): cur_dir = os.path.dirname(os.path.abspath(__file__)) data = pd.read_csv(os.path.join(cur_dir, 'results', "framing.csv")) probit = sm.families.links.probit outcome_model = sm.GLM.from_formula("cong_mesg ~ emo + treat*age + emo*age + educ + gender + income", data, family=sm.families.Binomial(link=probit())) mediator_model = sm.OLS.from_formula("emo ~ treat*age + educ + gender + income", data) moderators = {"age" : 20} med = Mediation(outcome_model, mediator_model, "treat", "emo", moderators=moderators) np.random.seed(4231) med_rslt = med.fit(method='parametric', n_rep=100) diff = np.asarray(med_rslt.summary() - framing_moderated_4231) assert_allclose(diff, 0, atol=1e-6)
def run_mediation(self, predict_col_categorical=False): if predict_col_categorical: probit = links.probit outcome_model = sm.GLM.from_formula( "cont1 ~ rdt1 + jelt1", self.data.X, family=sm.families.Binomial(link=probit())) else: outcome_model = sm.GLM.from_formula("cont1 ~ rdt1 + jelt1", self.data.X) mediator_model = sm.OLS.from_formula("rdt1 ~ jelt1", self.data.X) med = Mediation(outcome_model, mediator_model, "jelt1", mediator="rdt1").fit() with pd.option_context('display.max_rows', None, 'display.max_columns', None): print(med.summary())
def run(otype): mtime0, mtime, mstatus = gen_mediator() ytime, ystatus = gen_outcome(otype, mtime0) df = build_df(ytime, ystatus, mtime0, mtime, mstatus) outcome_model = sm.PHReg.from_formula("ytime ~ exp + mtime", status="ystatus", data=df) mediator_model = sm.PHReg.from_formula("mtime ~ exp", status="mstatus", data=df) med = Mediation( outcome_model, mediator_model, "exp", "mtime", outcome_predict_kwargs={"pred_only": True}, ) med_result = med.fit(n_rep=20) print(med_result.summary())
df_f.rename({'Full Scale IQ_2': 'IQ', 'le left central executive phy': 'le-rCEN'}, axis=1, inplace=True) df_f.rename({'Full Scale IQ_2': 'IQ', 'le-rCEN': 'le_rCEN'}, axis=1, inplace=True) df_f.rename({('2', 'GID Post'): 'GIDPost'}, axis=1, inplace=True) big_df.rename({('2', 'GID Post'): 'GIDPost'}, axis=1, inplace=True) import statsmodels.api as sm from statsmodels.stats.mediation import Mediation, MediationResults outcome_model = sm.GLM.from_formula("Phy48Grade ~ le_rCEN + IQ", no_na_m) mediator_model = sm.OLS.from_formula("IQ ~ le_rCEN", no_na_m) med = Mediation(outcome_model, mediator_model, "le_rCEN", "IQ").fit() med.summary(alpha=0.01) outcome_model = sm.GLM.from_formula("Phy48Grade ~ le_rCEN + GIDPost", no_na_m) mediator_model = sm.OLS.from_formula("GIDPost ~ le_rCEN", no_na_m) med = Mediation(outcome_model, mediator_model, "le_rCEN", "GIDPost").fit() med.summary(alpha=0.01) #average causal mediation effect (ACME) = a*b = c - c' #average direct effect (ADE) = c' #total effect = a*b + c' = c df_f['HcDMN_phy_minus_gen'] = df_f['fc hippo-default mode phy'] - df_f['fc hippo-default mode gen'] spearmanr(df_f['HcDMN_phy_minus_gen'], df_f['GIDPost'], nan_policy='omit')
def test_surv(): np.random.seed(2341) n = 1000 # Generate exposures exp = np.random.normal(size=n) # Generate mediators mn = np.exp(exp) mtime0 = -mn * np.log(np.random.uniform(size=n)) ctime = -2 * mn * np.log(np.random.uniform(size=n)) mstatus = (ctime >= mtime0).astype(np.int) mtime = np.where(mtime0 <= ctime, mtime0, ctime) for mt in "full", "partial", "no": # Outcome if mt == "full": lp = 0.5 * mtime0 elif mt == "partial": lp = exp + mtime0 else: lp = exp # Generate outcomes mn = np.exp(-lp) ytime0 = -mn * np.log(np.random.uniform(size=n)) ctime = -2 * mn * np.log(np.random.uniform(size=n)) ystatus = (ctime >= ytime0).astype(np.int) ytime = np.where(ytime0 <= ctime, ytime0, ctime) df = pd.DataFrame({ "ytime": ytime, "ystatus": ystatus, "mtime": mtime, "mstatus": mstatus, "exp": exp }) fml = "ytime ~ exp + mtime" outcome_model = sm.PHReg.from_formula(fml, status="ystatus", data=df) fml = "mtime ~ exp" mediator_model = sm.PHReg.from_formula(fml, status="mstatus", data=df) med = Mediation(outcome_model, mediator_model, "exp", "mtime", outcome_predict_kwargs={"pred_only": True}, outcome_fit_kwargs={"method": "lbfgs"}, mediator_fit_kwargs={"method": "lbfgs"}) med_result = med.fit(n_rep=2) dr = med_result.summary() pm = dr.loc["Prop. mediated (average)", "Estimate"] if mt == "no": assert_allclose(pm, 0, atol=0.1, rtol=0.1) elif mt == "full": assert_allclose(pm, 1, atol=0.1, rtol=0.1) else: assert_allclose(pm, 0.5, atol=0.1, rtol=0.1)