예제 #1
0
def run_arrays(n, get_model, noise):

    y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(n, get_model, noise)

    preg = ProcessMLE(y, x_mean, x_sc, x_sm, x_no, time, groups)

    return preg.fit()
예제 #2
0
def run_arrays(n, get_model):

    y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(n, get_model)

    preg = ProcessMLE(y, x_mean, x_sc, x_sm, x_no, time, groups)

    return preg.fit()
예제 #3
0
def run_formula(n, get_model):

    y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(n, get_model)

    df = pd.DataFrame({
        "y": y,
        "x1": x_mean[:, 0],
        "x2": x_mean[:, 1],
        "x3": x_mean[:, 2],
        "x4": x_mean[:, 3],
        "xsc1": x_sc[:, 0],
        "xsc2": x_sc[:, 1],
        "xsm1": x_sm[:, 0],
        "xsm2": x_sm[:, 1],
        "xno1": x_no[:, 0],
        "xno2": x_no[:, 1],
        "time": time,
        "groups": groups
    })

    mean_formula = "y ~ 0 + x1 + x2 + x3 + x4"
    scale_formula = "0 + xsc1 + xsc2"
    smooth_formula = "0 + xsm1 + xsm2"
    noise_formula = "0 + xno1 + xno2"
    preg = ProcessMLE.from_formula(
        mean_formula,
        data=df,
        scale_formula=scale_formula,
        smooth_formula=smooth_formula,
        noise_formula=noise_formula,
        time="time",
        groups="groups")
    f = preg.fit()

    return f, df
예제 #4
0
def run_formula(n, get_model):

    y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(n, get_model)

    df = pd.DataFrame({
        "y": y,
        "x1": x_mean[:, 0],
        "x2": x_mean[:, 1],
        "x3": x_mean[:, 2],
        "x4": x_mean[:, 3],
        "xsc1": x_sc[:, 0],
        "xsc2": x_sc[:, 1],
        "xsm1": x_sm[:, 0],
        "xsm2": x_sm[:, 1],
        "xno1": x_no[:, 0],
        "xno2": x_no[:, 1],
        "time": time,
        "groups": groups
    })

    mean_formula = "y ~ 0 + x1 + x2 + x3 + x4"
    scale_formula = "0 + xsc1 + xsc2"
    smooth_formula = "0 + xsm1 + xsm2"
    noise_formula = "0 + xno1 + xno2"
    preg = ProcessMLE.from_formula(mean_formula,
                                   data=df,
                                   scale_formula=scale_formula,
                                   smooth_formula=smooth_formula,
                                   noise_formula=noise_formula,
                                   time="time",
                                   groups="groups")
    f = preg.fit()

    return f, df
예제 #5
0
def test_score_numdiff():

    y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(1000, model1)

    preg = ProcessMLE(y, x_mean, x_sc, x_sm, x_no, time, groups)

    def loglike(x):
        return preg.loglike(x)
    q = x_mean.shape[1] + x_sc.shape[1] + x_sm.shape[1] + x_no.shape[1]

    np.random.seed(342)

    for _ in range(5):
        par0 = preg._get_start()
        par = par0 + 0.1 * np.random.normal(size=q)
        score = preg.score(par)
        score_nd = nd.approx_fprime(par, loglike, epsilon=1e-7)
        assert_allclose(score, score_nd, atol=1e-3, rtol=1e-4)
예제 #6
0
def test_score_numdiff():

    y, x_mean, x_sc, x_sm, x_no, time, groups = setup1(1000, model1)

    preg = ProcessMLE(y, x_mean, x_sc, x_sm, x_no, time, groups)

    def loglike(x):
        return preg.loglike(x)

    q = x_mean.shape[1] + x_sc.shape[1] + x_sm.shape[1] + x_no.shape[1]

    np.random.seed(342)

    for _ in range(5):
        par0 = preg._get_start()
        par = par0 + 0.1 * np.random.normal(size=q)
        score = preg.score(par)
        score_nd = nd.approx_fprime(par, loglike, epsilon=1e-7)
        assert_allclose(score, score_nd, atol=1e-3, rtol=1e-4)
예제 #7
0
    age_mean = dx[female].Age.mean()
    age_sd = dx[female].Age.std()
    dx[female]["AgeZ"] = (dx[female].Age - age_mean) / age_sd

    ddm = dx[female].dropna()
    outf.write("%d x %d values used to fit model\n" % ddm.shape)
    outf.write("%d distinct people used to fit model\n\n" %
               ddm.ID.unique().size)

    # Fit the model -- note that degrees of freedom are fixed here.
    # TODO: are these good choices of the df values?
    # TODO: maybe this should be refit each imputation cycle with bootstrapped data
    preg[female] = ProcessMLE.from_formula(impvar + " ~ " +
                                           " + ".join(others[female]),
                                           scale_formula="AgeZ + I(AgeZ**2)",
                                           smooth_formula="1",
                                           noise_formula="1",
                                           time="Age",
                                           groups="ID",
                                           data=ddm)

    rslt[female] = preg[female].fit(verbose=True, maxiter=[2, 200])

    ages = np.linspace(minage, maxage, 100)
    dv = pd.DataFrame({"Age": ages, "AgeZ": (ages - age_mean) / age_sd})

    # Plot the mean function
    #minage = min(ages)
    #maxage = max(ages)
    #plt.clf()
    #plt.axes([0.1, 0.1, 0.7, 0.8])
    #for wtz in -1, 0, 1:
예제 #8
0
    if female == 1:
        xf = []
        for fem in 0, 1:
            xf.append(dx[fem].groupby("ID").size())
        xf = pd.concat(xf, axis=0)
        xf = xf.reset_index()
        xf.columns = ["ID", "n_" + impvar]
        xf.ID = xf.ID.astype(np.int)
        xf.to_csv("mixed/stats/%s.csv" % impvar, index=False)

    # Fit the model -- note that degrees of freedom are fixed here.
    # TODO: are these good choices of the df values?
    # TODO: maybe this should be refit each imputation cycle with bootstrapped data
    preg[female] = ProcessMLE.from_formula("%s ~ bs(Age, 4)" % impvar,
                                           scale_formula="bs(Age, 4)",
                                           smooth_formula="bs(Age, 4)",
                                           time="Age",
                                           groups="ID",
                                           data=dx[female])

    rslt[female] = preg[female].fit()

    # Plot the fitted covariance matrix
    ages = np.linspace(1, maxage, 100)
    dv = pd.DataFrame({"Age": ages})
    mnpar = rslt[female].mean_params
    scpar = rslt[female].scale_params
    smpar = rslt[female].smooth_params
    cm = preg[female].covariance(ages, scpar, smpar, dv, dv)
    for k in 0, 1:
        # First plot covariance, then correlation
        plt.clf()
예제 #9
0
파일: f2_demog.py 프로젝트: kshedden/ar31qd
vname_cen = vname + "_z"
resp_mean = kid[vname].mean()
resp_sd = kid[vname].std()
kid[vname_cen] = (kid[vname] - resp_mean) / resp_sd

kid = kid.dropna()
kid = kid.sort_values(by=["ID_F2", "Age"])

fml = "%s ~ Age_z + I(Age_z**2) + Sex" % (vname + "_z")
if False:
    # The process model reduces to a random intercept model
    mod = ProcessMLE.from_formula(fml,
                                  scale_formula="Age_z",
                                  smooth_formula="1",
                                  noise_formula="1",
                                  time="Age",
                                  groups="ID_F2",
                                  data=kid)
    rslt = mod.fit(verbose=True)

mod = sm.MixedLM.from_formula(fml, groups="ID_F2", data=kid)
rslt = mod.fit()

# Drop people not in other analyses.
dk = dk.loc[~dk.ID_F2.isin([5084, 5126, 5128, 5138, 5143, 5149]), :]

new_exog = kid.reset_index().copy()
new_exog = new_exog.loc[new_exog.ID_F2.isin(dk.ID_F2), :]
new_exog = new_exog.groupby("ID_F2").head(1)
new_exog.loc[:, "Age"] = 1