Example #1
0
def test_MI():

    np.random.seed(414)
    x = np.random.normal(size=(200, 4))
    x[[1, 3, 9], 0] = np.nan
    x[[1, 4, 3], 1] = np.nan
    x[[2, 11, 21], 2] = np.nan
    x[[11, 22, 99], 3] = np.nan

    def model_args_fn(x):
        # Return endog, exog
        # Regress x0 on x1 and x2
        if type(x) is np.ndarray:
            return (x[:, 0], x[:, 1:])
        else:
            return (x.iloc[:, 0].values, x.iloc[:, 1:].values)

    for j in (0, 1):
        np.random.seed(2342)
        imp = BayesGaussMI(x.copy())
        mi = MI(imp, sm.OLS, model_args_fn, burn=0)
        r = mi.fit()
        r.summary()  # smoke test
        # TODO: why does the test tolerance need to be so slack?
        # There is unexpected variation across versions on travis.
        assert_allclose(r.params, np.r_[-0.05347919, -0.02479701, 0.10075517],
                        0.25, 0)

        c = np.asarray([[0.00418232, 0.00029746, -0.00035057],
                        [0.00029746, 0.00407264, 0.00019496],
                        [-0.00035057, 0.00019496, 0.00509413]])
        assert_allclose(r.cov_params(), c, 0.3, 0)

        # Test with ndarray and pandas input
        x = pd.DataFrame(x)
Example #2
0
def test_mi_formula():

    np.random.seed(414)
    x = np.random.normal(size=(200, 4))
    x[[1, 3, 9], 0] = np.nan
    x[[1, 4, 3], 1] = np.nan
    x[[2, 11, 21], 2] = np.nan
    x[[11, 22, 99], 3] = np.nan
    df = pd.DataFrame({
        "y": x[:, 0],
        "x1": x[:, 1],
        "x2": x[:, 2],
        "x3": x[:, 3]
    })
    fml = "y ~ 0 + x1 + x2 + x3"

    np.random.seed(2342)
    imp = BayesGaussMI(df.copy())
    mi = MI(imp, sm.OLS, formula=fml, burn=0)
    r = mi.fit()
    r.summary()  # smoke test
    # TODO: why does the test tolerance need to be so slack?
    # There is unexpected variation across versions on travis.
    assert_allclose(r.params, np.r_[-0.05347919, -0.02479701, 0.10075517],
                    0.25, 0)

    c = np.asarray([[0.00418232, 0.00029746, -0.00035057],
                    [0.00029746, 0.00407264, 0.00019496],
                    [-0.00035057, 0.00019496, 0.00509413]])
    assert_allclose(r.cov_params(), c, 0.3, 0)
def test_MI():

    np.random.seed(414)
    x = np.random.normal(size=(200, 4))
    x[[1, 3, 9], 0] = np.nan
    x[[1, 4, 3], 1] = np.nan
    x[[2, 11, 21], 2] = np.nan
    x[[11, 22, 99], 3] = np.nan

    def model_args_fn(x):
        # Return endog, exog
        # Regress x0 on x1 and x2
        if type(x) is np.ndarray:
            return (x[:, 0], x[:, 1:])
        else:
            return (x.iloc[:, 0].values, x.iloc[:, 1:].values)

    for j in (0, 1):
        np.random.seed(2342)
        imp = BayesGaussMI(x.copy())
        mi = MI(imp, sm.OLS, model_args_fn, burn=0)
        r = mi.fit()
        r.summary()  # smoke test
        # TODO: why does the test tolerance need to be so slack?
        # There is unexpected variation across versions on travis.
        assert_allclose(r.params, np.r_[
            -0.05347919, -0.02479701, 0.10075517], 0.25, 0)

        c = np.asarray([[0.00418232, 0.00029746, -0.00035057],
                        [0.00029746, 0.00407264, 0.00019496],
                        [-0.00035057, 0.00019496, 0.00509413]])
        assert_allclose(r.cov_params(), c, 0.3, 0)

        # Test with ndarray and pandas input
        x = pd.DataFrame(x)
def test_mi_formula():

    np.random.seed(414)
    x = np.random.normal(size=(200, 4))
    x[[1, 3, 9], 0] = np.nan
    x[[1, 4, 3], 1] = np.nan
    x[[2, 11, 21], 2] = np.nan
    x[[11, 22, 99], 3] = np.nan
    df = pd.DataFrame({"y": x[:, 0], "x1": x[:, 1],
                       "x2": x[:, 2], "x3": x[:, 3]})
    fml = "y ~ 0 + x1 + x2 + x3"

    np.random.seed(2342)
    imp = BayesGaussMI(df.copy())
    mi = MI(imp, sm.OLS, formula=fml, burn=0)
    r = mi.fit()
    r.summary()  # smoke test
    # TODO: why does the test tolerance need to be so slack?
    # There is unexpected variation across versions on travis.
    assert_allclose(r.params, np.r_[
            -0.05347919, -0.02479701, 0.10075517], 0.25, 0)

    c = np.asarray([[0.00418232, 0.00029746, -0.00035057],
                    [0.00029746, 0.00407264, 0.00019496],
                    [-0.00035057, 0.00019496, 0.00509413]])
    assert_allclose(r.cov_params(), c, 0.3, 0)
Example #5
0
def test_MI_stat():
    # Test for MI where we know statistically what should happen. The
    # analysis model is x0 ~ x1 with standard error 1/sqrt(n) for the
    # slope parameter.  The nominal n is 1000, but half of the cases
    # have missing x1.  Then we introduce x2 that is either
    # independent of x1, or almost perfectly correlated with x1.  In
    # the first case the SE is 1/sqrt(500), in the second case the SE
    # is 1/sqrt(1000).

    np.random.seed(414)
    z = np.random.normal(size=(1000, 3))
    z[:, 0] += 0.5 * z[:, 1]

    # Control the degree to which x2 proxies for x1
    exp = [1 / np.sqrt(500), 1 / np.sqrt(1000)]
    fmi = [0.5, 0]
    for j, r in enumerate((0, 0.9999)):

        x = z.copy()
        x[:, 2] = r * x[:, 1] + np.sqrt(1 - r**2) * x[:, 2]
        x[0:500, 1] = np.nan

        def model_args(x):
            # Return endog, exog
            # Regress x1 on x2
            return (x[:, 0], x[:, 1])

        np.random.seed(2342)
        imp = BayesGaussMI(x.copy())
        mi = MI(imp, sm.OLS, model_args, nrep=100, skip=10)
        r = mi.fit()

        # Check the SE
        d = np.abs(r.bse[0] - exp[j]) / exp[j]
        assert (d < 0.03)

        # Check the FMI
        d = np.abs(r.fmi[0] - fmi[j])
        assert (d < 0.05)
def test_MI_stat():
    # Test for MI where we know statistically what should happen. The
    # analysis model is x0 ~ x1 with standard error 1/sqrt(n) for the
    # slope parameter.  The nominal n is 1000, but half of the cases
    # have missing x1.  Then we introduce x2 that is either
    # independent of x1, or almost perfectly correlated with x1.  In
    # the first case the SE is 1/sqrt(500), in the second case the SE
    # is 1/sqrt(1000).

    np.random.seed(414)
    z = np.random.normal(size=(1000, 3))
    z[:, 0] += 0.5*z[:, 1]

    # Control the degree to which x2 proxies for x1
    exp = [1/np.sqrt(500), 1/np.sqrt(1000)]
    fmi = [0.5, 0]
    for j, r in enumerate((0, 0.9999)):

        x = z.copy()
        x[:, 2] = r*x[:, 1] + np.sqrt(1 - r**2)*x[:, 2]
        x[0:500, 1] = np.nan

        def model_args(x):
            # Return endog, exog
            # Regress x1 on x2
            return (x[:, 0], x[:, 1])

        np.random.seed(2342)
        imp = BayesGaussMI(x.copy())
        mi = MI(imp, sm.OLS, model_args, nrep=100, skip=10)
        r = mi.fit()

        # Check the SE
        d = np.abs(r.bse[0] - exp[j]) / exp[j]
        assert(d < 0.03)

        # Check the FMI
        d = np.abs(r.fmi[0] - fmi[j])
        assert(d < 0.05)
Example #7
0
            a = dx.agg(stats)
            a = a.T
            a["Female"] = female
            a["Visit"] = "first" if dz is first else "last"
            astats.append(a)
    astats = pd.concat(astats, axis=0)
    astats = astats.rename(columns={"mean": "Mean", "len": "N"})
    sname = "%s_table1.csv" % bp_var
    sname = sname.lower()
    sname = sname.replace("mean_", "")
    astats.to_csv(sname)

    # Table based on observations
    y = x.groupby("Female").ID.agg(len)

rslt = imp.fit(results_cb=lambda x: x)

mm = rslt.results[0].model
nobs = sum([x.shape[1] for x in mm.exog_vc.mats[0]])

ic = [x.llf for x in rslt.results]

sca = [x.scale for x in rslt.results]

out.write("%s\n" % impvar)
out.write("%d distinct subjects\n" % nobs)
out.write("%d distinct mothers\n" % mm.n_groups)
out.write("mean IC %f\n" % np.mean(ic))
out.write("mean scale %f\n" % np.mean(sca))
out.write(rslt.summary().as_text())