def run_arrays(n, get_model, noise): y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(n, get_model, noise) preg = ProcessMLE(y, x_mean, x_sc, x_sm, x_no, time, groups) return preg.fit()
def run_arrays(n, get_model): y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(n, get_model) preg = ProcessMLE(y, x_mean, x_sc, x_sm, x_no, time, groups) return preg.fit()
def run_formula(n, get_model): y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(n, get_model) df = pd.DataFrame({ "y": y, "x1": x_mean[:, 0], "x2": x_mean[:, 1], "x3": x_mean[:, 2], "x4": x_mean[:, 3], "xsc1": x_sc[:, 0], "xsc2": x_sc[:, 1], "xsm1": x_sm[:, 0], "xsm2": x_sm[:, 1], "xno1": x_no[:, 0], "xno2": x_no[:, 1], "time": time, "groups": groups }) mean_formula = "y ~ 0 + x1 + x2 + x3 + x4" scale_formula = "0 + xsc1 + xsc2" smooth_formula = "0 + xsm1 + xsm2" noise_formula = "0 + xno1 + xno2" preg = ProcessMLE.from_formula( mean_formula, data=df, scale_formula=scale_formula, smooth_formula=smooth_formula, noise_formula=noise_formula, time="time", groups="groups") f = preg.fit() return f, df
def run_formula(n, get_model): y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(n, get_model) df = pd.DataFrame({ "y": y, "x1": x_mean[:, 0], "x2": x_mean[:, 1], "x3": x_mean[:, 2], "x4": x_mean[:, 3], "xsc1": x_sc[:, 0], "xsc2": x_sc[:, 1], "xsm1": x_sm[:, 0], "xsm2": x_sm[:, 1], "xno1": x_no[:, 0], "xno2": x_no[:, 1], "time": time, "groups": groups }) mean_formula = "y ~ 0 + x1 + x2 + x3 + x4" scale_formula = "0 + xsc1 + xsc2" smooth_formula = "0 + xsm1 + xsm2" noise_formula = "0 + xno1 + xno2" preg = ProcessMLE.from_formula(mean_formula, data=df, scale_formula=scale_formula, smooth_formula=smooth_formula, noise_formula=noise_formula, time="time", groups="groups") f = preg.fit() return f, df
def test_score_numdiff(): y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(1000, model1) preg = ProcessMLE(y, x_mean, x_sc, x_sm, x_no, time, groups) def loglike(x): return preg.loglike(x) q = x_mean.shape[1] + x_sc.shape[1] + x_sm.shape[1] + x_no.shape[1] np.random.seed(342) for _ in range(5): par0 = preg._get_start() par = par0 + 0.1 * np.random.normal(size=q) score = preg.score(par) score_nd = nd.approx_fprime(par, loglike, epsilon=1e-7) assert_allclose(score, score_nd, atol=1e-3, rtol=1e-4)
age_mean = dx[female].Age.mean() age_sd = dx[female].Age.std() dx[female]["AgeZ"] = (dx[female].Age - age_mean) / age_sd ddm = dx[female].dropna() outf.write("%d x %d values used to fit model\n" % ddm.shape) outf.write("%d distinct people used to fit model\n\n" % ddm.ID.unique().size) # Fit the model -- note that degrees of freedom are fixed here. # TODO: are these good choices of the df values? # TODO: maybe this should be refit each imputation cycle with bootstrapped data preg[female] = ProcessMLE.from_formula(impvar + " ~ " + " + ".join(others[female]), scale_formula="AgeZ + I(AgeZ**2)", smooth_formula="1", noise_formula="1", time="Age", groups="ID", data=ddm) rslt[female] = preg[female].fit(verbose=True, maxiter=[2, 200]) ages = np.linspace(minage, maxage, 100) dv = pd.DataFrame({"Age": ages, "AgeZ": (ages - age_mean) / age_sd}) # Plot the mean function #minage = min(ages) #maxage = max(ages) #plt.clf() #plt.axes([0.1, 0.1, 0.7, 0.8]) #for wtz in -1, 0, 1:
if female == 1: xf = [] for fem in 0, 1: xf.append(dx[fem].groupby("ID").size()) xf = pd.concat(xf, axis=0) xf = xf.reset_index() xf.columns = ["ID", "n_" + impvar] xf.ID = xf.ID.astype(np.int) xf.to_csv("mixed/stats/%s.csv" % impvar, index=False) # Fit the model -- note that degrees of freedom are fixed here. # TODO: are these good choices of the df values? # TODO: maybe this should be refit each imputation cycle with bootstrapped data preg[female] = ProcessMLE.from_formula("%s ~ bs(Age, 4)" % impvar, scale_formula="bs(Age, 4)", smooth_formula="bs(Age, 4)", time="Age", groups="ID", data=dx[female]) rslt[female] = preg[female].fit() # Plot the fitted covariance matrix ages = np.linspace(1, maxage, 100) dv = pd.DataFrame({"Age": ages}) mnpar = rslt[female].mean_params scpar = rslt[female].scale_params smpar = rslt[female].smooth_params cm = preg[female].covariance(ages, scpar, smpar, dv, dv) for k in 0, 1: # First plot covariance, then correlation plt.clf()
vname_cen = vname + "_z" resp_mean = kid[vname].mean() resp_sd = kid[vname].std() kid[vname_cen] = (kid[vname] - resp_mean) / resp_sd kid = kid.dropna() kid = kid.sort_values(by=["ID_F2", "Age"]) fml = "%s ~ Age_z + I(Age_z**2) + Sex" % (vname + "_z") if False: # The process model reduces to a random intercept model mod = ProcessMLE.from_formula(fml, scale_formula="Age_z", smooth_formula="1", noise_formula="1", time="Age", groups="ID_F2", data=kid) rslt = mod.fit(verbose=True) mod = sm.MixedLM.from_formula(fml, groups="ID_F2", data=kid) rslt = mod.fit() # Drop people not in other analyses. dk = dk.loc[~dk.ID_F2.isin([5084, 5126, 5128, 5138, 5143, 5149]), :] new_exog = kid.reset_index().copy() new_exog = new_exog.loc[new_exog.ID_F2.isin(dk.ID_F2), :] new_exog = new_exog.groupby("ID_F2").head(1) new_exog.loc[:, "Age"] = 1