コード例 #1
0
ファイル: test_gee.py プロジェクト: lema655/statsmodels
    def test_nominal(self):

        family = Multinomial(3)

        endog_orig, exog_orig, groups = load_data("gee_nominal_1.csv", icept=False)

        data = np.concatenate((endog_orig[:, None], exog_orig, groups[:, None]), axis=1)

        # Recode as indicators
        endog, exog, exog_ne, nlevel = gee_setup_nominal(data, 0, [3])

        groups = exog_ne[:, 0]

        # Test with independence correlation
        v = Independence()
        md = GEE(endog, exog, groups, None, family, v)
        mdf1 = md.fit()

        # From statsmodels.GEE (not an independent test)
        cf1 = np.r_[0.44944752, 0.45569985, -0.92007064, -0.46766728]
        se1 = np.r_[0.09801821, 0.07718842, 0.13229421, 0.08544553]
        assert_almost_equal(mdf1.params, cf1, decimal=5)
        assert_almost_equal(mdf1.standard_errors(), se1, decimal=5)

        # Test with global odds ratio dependence
        v = GlobalOddsRatio(nlevel, "nominal")
        md = GEE(endog, exog, groups, None, family, v)
        mdf2 = md.fit(start_params=mdf1.params)

        # From statsmodels.GEE (not an independent test)
        cf2 = np.r_[0.45397549, 0.42278345, -0.91997131, -0.50115943]
        se2 = np.r_[0.09646057, 0.07405713, 0.1324629, 0.09025019]
        assert_almost_equal(mdf2.params, cf2, decimal=5)
        assert_almost_equal(mdf2.standard_errors(), se2, decimal=5)
コード例 #2
0
    def test_nested_linear(self):

        family = Gaussian()

        endog, exog, group = load_data("gee_nested_linear_1.csv")

        group_n = []
        for i in range(endog.shape[0]//10):
            group_n.extend([0,]*5)
            group_n.extend([1,]*5)
        group_n = np.array(group_n)[:,None]

        dp = Independence()
        md = GEE(endog, exog, group, None, family, dp)
        mdf1 = md.fit()

        # From statsmodels.GEE (not an independent test)
        cf = np.r_[-0.1671073 ,  1.00467426, -2.01723004,  0.97297106]
        se = np.r_[0.08629606,  0.04058653,  0.04067038,  0.03777989]
        assert_almost_equal(mdf1.params, cf, decimal=6)
        assert_almost_equal(mdf1.standard_errors(), se,
                            decimal=6)

        ne = Nested()
        md = GEE(endog, exog, group, None, family, ne,
                 dep_data=group_n)
        mdf2 = md.fit(start_params=mdf1.params)

        # From statsmodels.GEE (not an independent test)
        cf = np.r_[-0.16655319,  1.02183688, -2.00858719,  1.00101969]
        se = np.r_[0.08632616,  0.02913582,  0.03114428,  0.02893991]
        assert_almost_equal(mdf2.params, cf, decimal=6)
        assert_almost_equal(mdf2.standard_errors(), se,
                            decimal=6)
コード例 #3
0
    def test_missing_formula(self):
        # Test missing data handling for formulas.

        endog = np.random.normal(size=100)
        exog1 = np.random.normal(size=100)
        exog2 = np.random.normal(size=100)
        exog3 = np.random.normal(size=100)
        groups = np.kron(lrange(20), np.ones(5))

        endog[0] = np.nan
        endog[5:7] = np.nan
        exog2[10:12] = np.nan

        data = pd.DataFrame({"endog": endog, "exog1": exog1, "exog2": exog2,
                             "exog3": exog3, "groups": groups})

        mod1 = GEE.from_formula("endog ~ exog1 + exog2 + exog3",
                                groups, data, missing='drop')
        rslt1 = mod1.fit()

        assert_almost_equal(len(mod1.endog), 95)
        assert_almost_equal(np.asarray(mod1.exog.shape), np.r_[95, 4])

        data = data.dropna()
        groups = groups[data.index.values]

        mod2 = GEE.from_formula("endog ~ exog1 + exog2 + exog3",
                                groups, data, missing='none')
        rslt2 = mod2.fit()

        assert_almost_equal(rslt1.params.values, rslt2.params.values)
        assert_almost_equal(rslt1.bse.values, rslt2.bse.values)
コード例 #4
0
    def test_nominal(self):

        family = Multinomial(3)

        endog, exog, groups = load_data("gee_nominal_1.csv", icept=False)

        # Test with independence correlation
        v = Independence()
        md = GEE(endog, exog, groups, None, family, v)
        md.setup_nominal()
        mdf1 = md.fit()

        # From statsmodels.GEE (not an independent test)
        cf1 = np.r_[0.44944752, 0.45569985, -0.92007064, -0.46766728]
        se1 = np.r_[0.09801821, 0.07718842, 0.13229421, 0.08544553]
        assert_almost_equal(mdf1.params, cf1, decimal=5)
        assert_almost_equal(mdf1.standard_errors(), se1, decimal=5)

        # Test with global odds ratio dependence
        v = GlobalOddsRatio("nominal")
        md = GEE(endog, exog, groups, None, family, v)
        md.setup_nominal()
        mdf2 = md.fit(start_params=mdf1.params)

        # From statsmodels.GEE (not an independent test)
        cf2 = np.r_[0.45397549, 0.42278345, -0.91997131, -0.50115943]
        se2 = np.r_[0.09646057, 0.07405713, 0.1324629, 0.09025019]
        assert_almost_equal(mdf2.params, cf2, decimal=5)
        assert_almost_equal(mdf2.standard_errors(), se2, decimal=5)
コード例 #5
0
ファイル: test_gee.py プロジェクト: nsolcampbell/statsmodels
    def test_ordinal(self):

        family = Binomial()

        endog_orig, exog_orig, groups = load_data("gee_ordinal_1.csv",
                                                  icept=False)

        data = np.concatenate((endog_orig[:,None], exog_orig,
                               groups[:,None]), axis=1)

        # Recode as cumulative indicators
        endog, exog, intercepts, nlevel = gee_setup_ordinal(data, 0)

        exog1 = np.concatenate((intercepts, exog), axis=1)
        groups = exog1[:,-1]
        exog1 = exog1[:,0:-1]

        v = GlobalOddsRatio(nlevel, "ordinal")

        beta = gee_ordinal_starting_values(endog_orig,
                                           exog_orig.shape[1])

        md = GEE(endog, exog1, groups, None, family, v)
        mdf = md.fit(start_params = beta)

        cf = np.r_[1.09238131, 0.02148193, -0.39879146, -0.01855666,
                   0.02983409, 1.18123172,  0.01845318, -1.10233886]
        se = np.r_[0.10878752,  0.10326078,  0.11171241, 0.05488705,
                   0.05995019, 0.0916574,  0.05951445,  0.08539281]

        assert_almost_equal(mdf.params, cf, decimal=5)
        assert_almost_equal(mdf.bse, se, decimal=5)
コード例 #6
0
    def test_missing(self):
        #Test missing data handling for calling from the api.  Missing
        #data handling does not currently work for formulas.

        endog = np.random.normal(size=100)
        exog = np.random.normal(size=(100, 3))
        exog[:, 0] = 1
        groups = np.kron(lrange(20), np.ones(5))

        endog[0] = np.nan
        endog[5:7] = np.nan
        exog[10:12, 1] = np.nan

        mod1 = GEE(endog, exog, groups, missing='drop')
        rslt1 = mod1.fit()

        assert_almost_equal(len(mod1.endog), 95)
        assert_almost_equal(np.asarray(mod1.exog.shape), np.r_[95, 3])

        ii = np.isfinite(endog) & np.isfinite(exog).all(1)

        mod2 = GEE(endog[ii], exog[ii, :], groups[ii], missing='none')
        rslt2 = mod2.fit()

        assert_almost_equal(rslt1.params, rslt2.params)
        assert_almost_equal(rslt1.bse, rslt2.bse)
コード例 #7
0
ファイル: test_gee.py プロジェクト: lema655/statsmodels
    def test_ordinal_pandas(self):

        family = Binomial()

        endog_orig, exog_orig, groups = load_data("gee_ordinal_1.csv",
                                                  icept=False)

        data = np.concatenate(
            (endog_orig[:, None], exog_orig, groups[:, None]), axis=1)
        data = pd.DataFrame(data)
        data.columns = ["endog", "x1", "x2", "x3", "x4", "x5", "group"]

        # Recode as cumulative indicators
        endog, exog, intercepts, nlevel = \
            gee_setup_ordinal(data, "endog")

        exog1 = np.concatenate((intercepts, exog), axis=1)
        groups = exog1[:, -1]
        exog1 = exog1[:, 0:-1]

        v = GlobalOddsRatio(nlevel, "ordinal")

        beta = gee_ordinal_starting_values(endog_orig, exog_orig.shape[1])

        md = GEE(endog, exog1, groups, None, family, v)
        mdf = md.fit(start_params=beta)

        cf = np.r_[1.09238131, 0.02148193, -0.39879146, -0.01855666,
                   0.02983409, 1.18123172, 0.01845318, -1.10233886]
        se = np.r_[0.10878752, 0.10326078, 0.11171241, 0.05488705, 0.05995019,
                   0.0916574, 0.05951445, 0.08539281]

        assert_almost_equal(mdf.params, cf, decimal=2)
        assert_almost_equal(mdf.bse, se, decimal=2)
コード例 #8
0
    def test_missing_formula(self):
        # Test missing data handling for formulas.

        endog = np.random.normal(size=100)
        exog1 = np.random.normal(size=100)
        exog2 = np.random.normal(size=100)
        exog3 = np.random.normal(size=100)
        groups = np.kron(lrange(20), np.ones(5))

        endog[0] = np.nan
        endog[5:7] = np.nan
        exog2[10:12] = np.nan

        data = pd.DataFrame({"endog": endog, "exog1": exog1, "exog2": exog2,
                             "exog3": exog3, "groups": groups})

        mod1 = GEE.from_formula("endog ~ exog1 + exog2 + exog3",
                                groups, data, missing='drop')
        rslt1 = mod1.fit()

        assert_almost_equal(len(mod1.endog), 95)
        assert_almost_equal(np.asarray(mod1.exog.shape), np.r_[95, 4])

        data = data.dropna()
        groups = groups[data.index.values]

        mod2 = GEE.from_formula("endog ~ exog1 + exog2 + exog3",
                                groups, data, missing='none')
        rslt2 = mod2.fit()

        assert_almost_equal(rslt1.params.values, rslt2.params.values)
        assert_almost_equal(rslt1.bse.values, rslt2.bse.values)
コード例 #9
0
    def test_default_time(self):
        # Check that the time defaults work correctly.

        endog,exog,group = load_data("gee_logistic_1.csv")

        # Time values for the autoregressive model
        T = np.zeros(len(endog))
        idx = set(group)
        for ii in idx:
            jj = np.flatnonzero(group == ii)
            T[jj] = lrange(len(jj))

        family = Binomial()
        va = Autoregressive()


        md1 = GEE(endog, exog, group, family=family, cov_struct=va)
        mdf1 = md1.fit()

        md2 = GEE(endog, exog, group, time=T, family=family,
                  cov_struct=va)
        mdf2 = md2.fit()

        assert_almost_equal(mdf1.params, mdf2.params, decimal=6)
        assert_almost_equal(mdf1.standard_errors(),
                            mdf2.standard_errors(), decimal=6)
コード例 #10
0
def BuildPoissonModels(hist_data, feature_list, comp_data=None):
    ''' Build score predictions via (linear) poisson regression. '''
    hist_data_1 = hist_data[["team_1_score"] + feature_list]
    hist_data_2 = hist_data[["team_2_score"] + feature_list]

    formula_1 = "team_1_score ~ " + " + ".join(feature_list)
    formula_2 = "team_2_score ~ " + " + ".join(feature_list)

    # using the GEE package along with independance assumptions to fit poisson model.
    # Am assuming this is using a maximum likleyhood approach?
    fam = Poisson()
    ind = Independence()

    model_1 = GEE.from_formula(formula_1,
                               "team_1_score",
                               hist_data,
                               cov_struct=ind,
                               family=fam)
    model_2 = GEE.from_formula(formula_2,
                               "team_2_score",
                               hist_data,
                               cov_struct=ind,
                               family=fam)

    model_1_fit = model_1.fit()
    model_2_fit = model_2.fit()
    print(model_1_fit.summary())

    hist_data['team_1_score_pred'] = model_1_fit.predict(hist_data)
    hist_data['team_2_score_pred'] = model_2_fit.predict(hist_data)

    # return historical data if comp_data wasn't passed.
    if comp_data is None:
        return hist_data

    # prepare comp data
    comp_data['team_1_score_pred'] = model_1_fit.predict(
        comp_data[feature_list])
    comp_data['team_2_score_pred'] = model_2_fit.predict(
        comp_data[feature_list])

    comp_data['team_1_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: 1 - skellam.cdf(0, x['team_1_score_pred'], x[
            'team_2_score_pred']), 1)
    comp_data['team_tie_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: skellam.pmf(0, x['team_1_score_pred'], x['team_2_score_pred']
                              ), 1)
    comp_data['team_2_prob'] = comp_data[[
        'team_1_score_pred', 'team_2_score_pred'
    ]].apply(
        lambda x: skellam.cdf(-1, x['team_1_score_pred'], x['team_2_score_pred'
                                                            ]), 1)

    return hist_data, comp_data
コード例 #11
0
def test_missing():
    # gh-1877
    data = [['id', 'al', 'status', 'fake', 'grps'],
            ['4A', 'A', 1, 1, 0],
            ['5A', 'A', 1, 2.0, 1],
            ['6A', 'A', 1, 3, 2],
            ['7A', 'A', 1, 2.0, 3],
            ['8A', 'A', 1, 1, 4],
            ['9A', 'A', 1, 2.0, 5],
            ['11A', 'A', 1, 1, 6],
            ['12A', 'A', 1, 2.0, 7],
            ['13A', 'A', 1, 1, 8],
            ['14A', 'A', 1, 1, 9],
            ['15A', 'A', 1, 1, 10],
            ['16A', 'A', 1, 2.0, 11],
            ['17A', 'A', 1, 3.0, 12],
            ['18A', 'A', 1, 3.0, 13],
            ['19A', 'A', 1, 2.0, 14],
            ['20A', 'A', 1, 2.0, 15],
            ['2C', 'C', 0, 3.0, 0],
            ['3C', 'C', 0, 1, 1],
            ['4C', 'C', 0, 1, 2],
            ['5C', 'C', 0, 2.0, 3],
            ['6C', 'C', 0, 1, 4],
            ['9C', 'C', 0, 1, 5],
            ['10C', 'C', 0, 3, 6],
            ['12C', 'C', 0, 3, 7],
            ['14C', 'C', 0, 2.5, 8],
            ['15C', 'C', 0, 1, 9],
            ['17C', 'C', 0, 1, 10],
            ['22C', 'C', 0, 1, 11],
            ['23C', 'C', 0, 1, 12],
            ['24C', 'C', 0, 1, 13],
            ['32C', 'C', 0, 2.0, 14],
            ['35C', 'C', 0, 1, 15]]

    df = pd.DataFrame(data[1:], columns=data[0])
    df.ix[df.fake == 1, 'fake'] = np.nan
    mod = smf.gee('status ~ fake', data=df, groups='grps',
                  cov_struct=sm.cov_struct.Independence(),
                  family=sm.families.Binomial())

    df = df.dropna()
    #df.loc[:, 'constant'] = 1
    df['constant'] = 1

    mod2 = GEE(df.status, df[['constant', 'fake']], groups=df.grps,
               cov_struct=sm.cov_struct.Independence(),
               family=sm.families.Binomial())

    assert_equal(mod.endog, mod2.endog)
    assert_equal(mod.exog, mod2.exog)
    assert_equal(mod.groups, mod2.groups)

    res = mod.fit()
    res2 = mod2.fit()

    assert_almost_equal(res.params.values, res2.params.values)
コード例 #12
0
def test_missing():
    # gh-1877
    data = [['id', 'al', 'status', 'fake', 'grps'],
            ['4A', 'A', 1, 1, 0],
            ['5A', 'A', 1, 2.0, 1],
            ['6A', 'A', 1, 3, 2],
            ['7A', 'A', 1, 2.0, 3],
            ['8A', 'A', 1, 1, 4],
            ['9A', 'A', 1, 2.0, 5],
            ['11A', 'A', 1, 1, 6],
            ['12A', 'A', 1, 2.0, 7],
            ['13A', 'A', 1, 1, 8],
            ['14A', 'A', 1, 1, 9],
            ['15A', 'A', 1, 1, 10],
            ['16A', 'A', 1, 2.0, 11],
            ['17A', 'A', 1, 3.0, 12],
            ['18A', 'A', 1, 3.0, 13],
            ['19A', 'A', 1, 2.0, 14],
            ['20A', 'A', 1, 2.0, 15],
            ['2C', 'C', 0, 3.0, 0],
            ['3C', 'C', 0, 1, 1],
            ['4C', 'C', 0, 1, 2],
            ['5C', 'C', 0, 2.0, 3],
            ['6C', 'C', 0, 1, 4],
            ['9C', 'C', 0, 1, 5],
            ['10C', 'C', 0, 3, 6],
            ['12C', 'C', 0, 3, 7],
            ['14C', 'C', 0, 2.5, 8],
            ['15C', 'C', 0, 1, 9],
            ['17C', 'C', 0, 1, 10],
            ['22C', 'C', 0, 1, 11],
            ['23C', 'C', 0, 1, 12],
            ['24C', 'C', 0, 1, 13],
            ['32C', 'C', 0, 2.0, 14],
            ['35C', 'C', 0, 1, 15]]

    df = pd.DataFrame(data[1:], columns=data[0])
    df.ix[df.fake == 1, 'fake'] = np.nan
    mod = smf.gee('status ~ fake', data=df, groups='grps',
                  cov_struct=sm.cov_struct.Independence(),
                  family=sm.families.Binomial())

    df = df.dropna()
    df['constant'] = 1

    mod2 = GEE(df.status, df[['constant', 'fake']], groups=df.grps,
               cov_struct=sm.cov_struct.Independence(),
               family=sm.families.Binomial())

    assert_equal(mod.endog, mod2.endog)
    assert_equal(mod.exog, mod2.exog)
    assert_equal(mod.groups, mod2.groups)

    res = mod.fit()
    res2 = mod2.fit()

    assert_almost_equal(res.params.values, res2.params.values)
コード例 #13
0
ファイル: test_gee.py プロジェクト: lema655/statsmodels
    def test_post_estimation(self):

        family = Gaussian()
        endog, exog, group = load_data("gee_linear_1.csv")

        ve = Exchangeable()

        md = GEE(endog, exog, group, None, family, ve)
        mdf = md.fit()

        assert_almost_equal(np.dot(exog, mdf.params), mdf.fittedvalues)
        assert_almost_equal(endog - np.dot(exog, mdf.params), mdf.resid)
コード例 #14
0
ファイル: test_gee.py プロジェクト: lema655/statsmodels
    def test_post_estimation(self):

        family = Gaussian()
        endog, exog, group = load_data("gee_linear_1.csv")

        ve = Exchangeable()

        md = GEE(endog, exog, group, None, family, ve)
        mdf = md.fit()

        assert_almost_equal(np.dot(exog, mdf.params), mdf.fittedvalues)
        assert_almost_equal(endog - np.dot(exog, mdf.params), mdf.resid)
コード例 #15
0
ファイル: test_gee.py プロジェクト: d8aninja/statsmodels
    def test_wrapper(self):

        endog, exog, group_n = load_data("gee_poisson_1.csv", icept=False)
        endog = pd.Series(endog)
        exog = pd.DataFrame(exog)
        group_n = pd.Series(group_n)

        family = Poisson()
        vi = Independence()

        mod = GEE(endog, exog, group_n, None, family, vi)
        rslt2 = mod.fit()

        check_wrapper(rslt2)
コード例 #16
0
    def test_scoretest(self):
        # Regression tests

        np.random.seed(6432)
        n = 200 # Must be divisible by 4
        exog = np.random.normal(size=(n, 4))
        endog = exog[:, 0] + exog[:, 1] + exog[:, 2]
        endog += 3*np.random.normal(size=n)
        group = np.kron(np.arange(n/4), np.ones(4))

        # Test under the null.
        L = np.array([[1., -1, 0, 0]])
        R = np.array([0.,])
        family = Gaussian()
        va = Independence()
        mod1 = GEE(endog, exog, group, family=family,
                  cov_struct=va, constraint=(L, R))
        rslt1 = mod1.fit()
        assert_almost_equal(mod1.score_test_results["statistic"],
                            1.08126334)
        assert_almost_equal(mod1.score_test_results["p-value"],
                            0.2984151086)

        # Test under the alternative.
        L = np.array([[1., -1, 0, 0]])
        R = np.array([1.0,])
        family = Gaussian()
        va = Independence()
        mod2 = GEE(endog, exog, group, family=family,
                   cov_struct=va, constraint=(L, R))
        rslt2 = mod2.fit()
        assert_almost_equal(mod2.score_test_results["statistic"],
                            3.491110965)
        assert_almost_equal(mod2.score_test_results["p-value"],
                            0.0616991659)

        # Compare to Wald tests
        exog = np.random.normal(size=(n, 2))
        L = np.array([[1, -1]])
        R = np.array([0.])
        f = np.r_[1, -1]
        for i in range(10):
            endog = exog[:, 0] + (0.5 + i/10.)*exog[:, 1] +\
                    np.random.normal(size=n)
            family = Gaussian()
            va = Independence()
            mod0 = GEE(endog, exog, group, family=family,
                       cov_struct=va)
            rslt0 = mod0.fit()
            family = Gaussian()
            va = Independence()
            mod1 = GEE(endog, exog, group, family=family,
                       cov_struct=va, constraint=(L, R))
            rslt1 = mod1.fit()
            se = np.sqrt(np.dot(f, np.dot(rslt0.cov_params(), f)))
            wald_z = np.dot(f, rslt0.params) / se
            wald_p = 2*norm.cdf(-np.abs(wald_z))
            score_p = mod1.score_test_results["p-value"]
            assert_array_less(np.abs(wald_p - score_p), 0.02)
コード例 #17
0
    def test_wrapper(self):

        endog, exog, group_n = load_data("gee_poisson_1.csv",
                                        icept=False)
        endog = pd.Series(endog)
        exog = pd.DataFrame(exog)
        group_n = pd.Series(group_n)

        family = Poisson()
        vi = Independence()

        mod = GEE(endog, exog, group_n, None, family, vi)
        rslt2 = mod.fit()

        check_wrapper(rslt2)
コード例 #18
0
    def test_predict_exposure(self):

        n = 50
        X1 = np.random.normal(size=n)
        X2 = np.random.normal(size=n)
        groups = np.kron(np.arange(25), np.r_[1, 1])
        offset = np.random.uniform(1, 2, size=n)
        exposure = np.random.uniform(1, 2, size=n)
        Y = np.random.poisson(0.1*(X1 + X2) + offset + np.log(exposure), size=n)
        data = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "groups": groups,
                             "offset": offset, "exposure": exposure})

        fml = "Y ~ X1 + X2"
        model = GEE.from_formula(fml, groups, data, family=Poisson(),
                                 offset="offset", exposure="exposure")
        result = model.fit()
        assert_equal(result.converged, True)

        pred1 = result.predict()
        pred2 = result.predict(offset=data["offset"])
        pred3 = result.predict(exposure=data["exposure"])
        pred4 = result.predict(offset=data["offset"], exposure=data["exposure"])
        pred5 = result.predict(exog=data[-10:],
                               offset=data["offset"][-10:],
                               exposure=data["exposure"][-10:])
        # without patsy
        pred6 = result.predict(exog=result.model.exog[-10:],
                               offset=data["offset"][-10:],
                               exposure=data["exposure"][-10:],
                               transform=False)
        assert_allclose(pred1, pred2)
        assert_allclose(pred1, pred3)
        assert_allclose(pred1, pred4)
        assert_allclose(pred1[-10:], pred5)
        assert_allclose(pred1[-10:], pred6)
コード例 #19
0
ファイル: test_gee.py プロジェクト: d8aninja/statsmodels
    def test_sensitivity(self):

        va = Exchangeable()
        family = Gaussian()

        n = 100
        Y = np.random.normal(size=n)
        X1 = np.random.normal(size=n)
        X2 = np.random.normal(size=n)
        groups = np.kron(np.arange(50), np.r_[1, 1])

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2})

        mod = GEE.from_formula("Y ~ X1 + X2",
                               groups,
                               D,
                               family=family,
                               cov_struct=va)
        rslt = mod.fit()
        ps = rslt.params_sensitivity(0, 0.5, 2)
        assert_almost_equal(len(ps), 2)
        assert_almost_equal([x.cov_struct.dep_params for x in ps], [0.0, 0.5])

        # Regression test
        assert_almost_equal([x.params[0] for x in ps], np.r_[-0.1256575,
                                                             -0.126747036])
コード例 #20
0
ファイル: test_gee.py プロジェクト: lema655/statsmodels
    def test_compare_OLS(self):
        """
        Gaussian GEE with independence correlation should agree
        exactly with OLS for parameter estimates and standard errors
        derived from the naive covariance estimate.
        """

        vs = Independence()
        family = Gaussian()

        Y = np.random.normal(size=100)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.kron(range(20), np.ones(5))

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None, groups=groups, family=family, covstruct=vs)
        mdf = md.fit()

        ols = sm.ols("Y ~ X1 + X2 + X3", data=D).fit()

        assert_almost_equal(ols.params.values, mdf.params, decimal=10)

        naive_tvalues = mdf.params / np.sqrt(np.diag(mdf.naive_covariance))
        assert_almost_equal(naive_tvalues, ols.tvalues, decimal=10)
コード例 #21
0
    def test_compare_OLS(self):
        #Gaussian GEE with independence correlation should agree
        #exactly with OLS for parameter estimates and standard errors
        #derived from the naive covariance estimate.

        vs = Independence()
        family = Gaussian()

        Y = np.random.normal(size=100)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.kron(lrange(20), np.ones(5))

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        md = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D,
                              family=family, cov_struct=vs)
        mdf = md.fit()

        ols = smf.ols("Y ~ X1 + X2 + X3", data=D).fit()

        # don't use wrapper, asserts_xxx don't work
        ols = ols._results

        assert_almost_equal(ols.params, mdf.params, decimal=10)

        se = mdf.standard_errors(cov_type="naive")
        assert_almost_equal(ols.bse, se, decimal=10)

        naive_tvalues = mdf.params / \
            np.sqrt(np.diag(mdf.cov_naive))
        assert_almost_equal(naive_tvalues, ols.tvalues, decimal=10)
コード例 #22
0
    def t_est_missing(self):

        Y = np.random.normal(size=100)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.kron(lrange(20), np.ones(5))

        Y[0] = np.nan
        Y[5:7] = np.nan
        X2[10:12] = np.nan

        D = pd.DataFrame({
            "Y": Y,
            "X1": X1,
            "X2": X2,
            "X3": X3,
            "groups": groups
        })

        md = GEE.from_formula("Y ~ X1 + X2 + X3",
                              D,
                              None,
                              groups=D["groups"],
                              missing='drop')
        mdf = md.fit()

        assert (len(md.endog) == 95)
        assert (md.exog.shape) == (95, 4)
コード例 #23
0
    def test_predict_exposure(self):

        n = 50
        X1 = np.random.normal(size=n)
        X2 = np.random.normal(size=n)
        groups = np.kron(np.arange(25), np.r_[1, 1])
        offset = np.random.uniform(1, 2, size=n)
        exposure = np.random.uniform(1, 2, size=n)
        Y = np.random.poisson(0.1*(X1 + X2) + offset + np.log(exposure), size=n)
        data = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "groups": groups,
                             "offset": offset, "exposure": exposure})

        fml = "Y ~ X1 + X2"
        model = GEE.from_formula(fml, groups, data, family=Poisson(),
                                 offset="offset", exposure="exposure")
        result = model.fit()
        assert_equal(result.converged, True)

        pred1 = result.predict()
        pred2 = result.predict(offset=data["offset"])
        pred3 = result.predict(exposure=data["exposure"])
        pred4 = result.predict(offset=data["offset"], exposure=data["exposure"])
        pred5 = result.predict(exog=data[-10:],
                               offset=data["offset"][-10:],
                               exposure=data["exposure"][-10:])
        # without patsy
        pred6 = result.predict(exog=result.model.exog[-10:],
                               offset=data["offset"][-10:],
                               exposure=data["exposure"][-10:],
                               transform=False)
        assert_allclose(pred1, pred2)
        assert_allclose(pred1, pred3)
        assert_allclose(pred1, pred4)
        assert_allclose(pred1[-10:], pred5)
        assert_allclose(pred1[-10:], pred6)
コード例 #24
0
ファイル: test_gee.py プロジェクト: d8aninja/statsmodels
    def setup_class(cls):

        endog, exog, group_n = load_data("gee_poisson_1.csv")

        family = Poisson()
        vi = Independence()
        # Test with formulas
        D = np.concatenate((endog[:, None], group_n[:, None], exog[:, 1:]),
                           axis=1)
        D = pd.DataFrame(D)
        D.columns = [
            "Y",
            "Id",
        ] + ["X%d" % (k + 1) for k in range(exog.shape[1] - 1)]

        cls.mod = GEE.from_formula("Y ~ X1 + X2 + X3 + X4 + X5",
                                   "Id",
                                   D,
                                   family=family,
                                   cov_struct=vi)

        cls.start_params = np.array([
            -0.03644504, -0.05432094, 0.01566427, 0.57628591, -0.0046566,
            -0.47709315
        ])
コード例 #25
0
    def test_poisson_epil(self):

        cur_dir = os.path.dirname(os.path.abspath(__file__))
        fname = os.path.join(cur_dir, "results", "epil.csv")
        data = pd.read_csv(fname)

        fam = Poisson()
        ind = Independence()
        mod1 = GEE.from_formula("y ~ age + trt + base", data["subject"],
                                data, cov_struct=ind, family=fam)
        rslt1 = mod1.fit()

        # Coefficients should agree with GLM
        from statsmodels.genmod.generalized_linear_model import GLM
        from statsmodels.genmod import families

        mod2 = GLM.from_formula("y ~ age + trt + base", data,
                               family=families.Poisson())
        rslt2 = mod2.fit(scale="X2")

        # don't use wrapper, asserts_xxx don't work
        rslt1 = rslt1._results
        rslt2 = rslt2._results

        assert_almost_equal(rslt1.params, rslt2.params, decimal=6)
        assert_almost_equal(rslt1.scale, rslt2.scale, decimal=6)
コード例 #26
0
ファイル: test_gee.py プロジェクト: lema655/statsmodels
    def test_compare_OLS(self):
        """
        Gaussian GEE with independence correlation should agree
        exactly with OLS for parameter estimates and standard errors
        derived from the naive covariance estimate.
        """

        vs = Independence()
        family = Gaussian()

        Y = np.random.normal(size=100)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.kron(range(20), np.ones(5))

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        md = GEE.from_formula("Y ~ X1 + X2 + X3",
                              D,
                              None,
                              groups=groups,
                              family=family,
                              covstruct=vs)
        mdf = md.fit()

        ols = sm.ols("Y ~ X1 + X2 + X3", data=D).fit()

        assert_almost_equal(ols.params.values, mdf.params, decimal=10)

        naive_tvalues = mdf.params / \
            np.sqrt(np.diag(mdf.naive_covariance))
        assert_almost_equal(naive_tvalues, ols.tvalues, decimal=10)
コード例 #27
0
    def test_compare_OLS(self):
        #Gaussian GEE with independence correlation should agree
        #exactly with OLS for parameter estimates and standard errors
        #derived from the naive covariance estimate.

        vs = Independence()
        family = Gaussian()

        Y = np.random.normal(size=100)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.kron(lrange(20), np.ones(5))

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        md = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D,
                              family=family, cov_struct=vs)
        mdf = md.fit()

        ols = smf.ols("Y ~ X1 + X2 + X3", data=D).fit()

        # don't use wrapper, asserts_xxx don't work
        ols = ols._results

        assert_almost_equal(ols.params, mdf.params, decimal=10)

        se = mdf.standard_errors(cov_type="naive")
        assert_almost_equal(ols.bse, se, decimal=10)

        naive_tvalues = mdf.params / \
            np.sqrt(np.diag(mdf.cov_naive))
        assert_almost_equal(naive_tvalues, ols.tvalues, decimal=10)
コード例 #28
0
ファイル: test_gee.py プロジェクト: d8aninja/statsmodels
    def test_poisson_epil(self):

        cur_dir = os.path.dirname(os.path.abspath(__file__))
        fname = os.path.join(cur_dir, "results", "epil.csv")
        data = pd.read_csv(fname)

        fam = Poisson()
        ind = Independence()
        mod1 = GEE.from_formula("y ~ age + trt + base",
                                data["subject"],
                                data,
                                cov_struct=ind,
                                family=fam)
        rslt1 = mod1.fit()

        # Coefficients should agree with GLM
        from statsmodels.genmod.generalized_linear_model import GLM
        from statsmodels.genmod import families

        mod2 = GLM.from_formula("y ~ age + trt + base",
                                data,
                                family=families.Poisson())
        rslt2 = mod2.fit(scale="X2")

        # don't use wrapper, asserts_xxx don't work
        rslt1 = rslt1._results
        rslt2 = rslt2._results

        assert_almost_equal(rslt1.params, rslt2.params, decimal=6)
        assert_almost_equal(rslt1.scale, rslt2.scale, decimal=6)
コード例 #29
0
ファイル: test_gee.py プロジェクト: d8aninja/statsmodels
    def test_compare_poisson(self):

        vs = Independence()
        family = Poisson()

        Y = np.ceil(-np.log(np.random.uniform(size=100)))
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.random.randint(0, 4, size=100)

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        mod1 = GEE.from_formula("Y ~ X1 + X2 + X3",
                                groups,
                                D,
                                family=family,
                                cov_struct=vs)
        rslt1 = mod1.fit()

        mod2 = sm.poisson("Y ~ X1 + X2 + X3", data=D)
        rslt2 = mod2.fit(disp=False)

        assert_almost_equal(rslt1.params.values,
                            rslt2.params.values,
                            decimal=10)
コード例 #30
0
    def test_predict(self):

        n = 50
        X1 = np.random.normal(size=n)
        X2 = np.random.normal(size=n)
        groups = np.kron(np.arange(25), np.r_[1, 1])
        offset = np.random.uniform(1, 2, size=n)
        Y = np.random.normal(0.1*(X1 + X2) + offset, size=n)
        data = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "groups": groups,
                             "offset": offset})

        fml = "Y ~ X1 + X2"
        model = GEE.from_formula(fml, groups, data, family=Gaussian(),
                                 offset="offset")
        result = model.fit()
        assert_equal(result.converged, True)

        pred1 = result.predict()
        pred2 = result.predict(offset=data.offset)
        pred3 = result.predict(exog=data[["X1", "X2"]], offset=data.offset)
        pred4 = result.predict(exog=data[["X1", "X2"]], offset=0*data.offset)
        pred5 = result.predict(offset=0*data.offset)

        assert_allclose(pred1, pred2)
        assert_allclose(pred1, pred3)
        assert_allclose(pred1, pred4 + data.offset)
        assert_allclose(pred1, pred5 + data.offset)

        x1_new = np.random.normal(size=10)
        x2_new = np.random.normal(size=10)
        new_exog = pd.DataFrame({"X1": x1_new, "X2": x2_new})
        pred6 = result.predict(exog=new_exog)
        params = result.params
        pred6_correct = params[0] + params[1]*x1_new + params[2]*x2_new
        assert_allclose(pred6, pred6_correct)
コード例 #31
0
    def test_poisson_epil(self):

        cur_dir = os.path.dirname(os.path.abspath(__file__))
        fname = os.path.join(cur_dir, "results", "epil.csv")
        data = pd.read_csv(fname)

        fam = Poisson()
        ind = Independence()
        md1 = GEE.from_formula("y ~ age + trt + base",
                               data,
                               groups=data["subject"],
                               cov_struct=ind,
                               family=fam)
        mdf1 = md1.fit()

        # Coefficients should agree with GLM
        from statsmodels.genmod.generalized_linear_model import GLM
        from statsmodels.genmod import families

        md2 = GLM.from_formula("y ~ age + trt + base",
                               data,
                               family=families.Poisson())
        mdf2 = md2.fit(scale="X2")

        assert_almost_equal(mdf1.params, mdf2.params, decimal=6)
        assert_almost_equal(mdf1.scale, mdf2.scale, decimal=6)
コード例 #32
0
ファイル: test_gee.py プロジェクト: Garlandal/statsmodels
    def test_sensitivity(self):

        va = Exchangeable()
        family = Gaussian()

        np.random.seed(34234)
        n = 100
        Y = np.random.normal(size=n)
        X1 = np.random.normal(size=n)
        X2 = np.random.normal(size=n)
        groups = np.kron(np.arange(50), np.r_[1, 1])

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2})

        mod = GEE.from_formula("Y ~ X1 + X2", groups, D,
                               family=family, cov_struct=va)
        rslt = mod.fit()
        ps = rslt.params_sensitivity(0, 0.5, 2)
        assert_almost_equal(len(ps), 2)
        assert_almost_equal([x.cov_struct.dep_params for x in ps],
                            [0.0, 0.5])

        # Regression test
        assert_almost_equal([x.params[0] for x in ps],
                            [0.1696214707458818, 0.17836097387799127])
コード例 #33
0
ファイル: test_gee.py プロジェクト: d8aninja/statsmodels
    def test_formulas(self):
        """
        Check formulas, especially passing groups and time as either
        variable names or arrays.
        """

        n = 100
        Y = np.random.normal(size=n)
        X1 = np.random.normal(size=n)
        mat = np.concatenate((np.ones((n, 1)), X1[:, None]), axis=1)
        Time = np.random.uniform(size=n)
        groups = np.kron(lrange(20), np.ones(5))

        data = pd.DataFrame({"Y": Y, "X1": X1, "Time": Time, "groups": groups})

        va = Autoregressive()
        family = Gaussian()

        mod1 = GEE(Y, mat, groups, time=Time, family=family, cov_struct=va)
        rslt1 = mod1.fit()

        mod2 = GEE.from_formula("Y ~ X1",
                                groups,
                                data,
                                time=Time,
                                family=family,
                                cov_struct=va)
        rslt2 = mod2.fit()

        mod3 = GEE.from_formula("Y ~ X1",
                                groups,
                                data,
                                time="Time",
                                family=family,
                                cov_struct=va)
        rslt3 = mod3.fit()

        mod4 = GEE.from_formula("Y ~ X1",
                                "groups",
                                data,
                                time=Time,
                                family=family,
                                cov_struct=va)
        rslt4 = mod4.fit()

        mod5 = GEE.from_formula("Y ~ X1",
                                "groups",
                                data,
                                time="Time",
                                family=family,
                                cov_struct=va)
        rslt5 = mod5.fit()

        assert_almost_equal(rslt1.params, rslt2.params, decimal=8)
        assert_almost_equal(rslt1.params, rslt3.params, decimal=8)
        assert_almost_equal(rslt1.params, rslt4.params, decimal=8)
        assert_almost_equal(rslt1.params, rslt5.params, decimal=8)

        check_wrapper(rslt2)
コード例 #34
0
ファイル: test_gee.py プロジェクト: loukakouma/statsmodels
    def test_autoregressive(self):

        dep_params_true = [0, 0.589208623896, 0.559823804948]

        params_true = [[1.08043787, 1.12709319, 0.90133927],
                       [0.9613677, 1.05826987, 0.90832055],
                       [1.05370439, 0.96084864, 0.93923374]]

        np.random.seed(342837482)

        num_group = 100
        ar_param = 0.5
        k = 3

        ga = Gaussian()

        for gsize in 1, 2, 3:

            ix = np.arange(gsize)[:, None] - np.arange(gsize)[None, :]
            ix = np.abs(ix)
            cmat = ar_param**ix
            cmat_r = np.linalg.cholesky(cmat)

            endog = []
            exog = []
            groups = []
            for i in range(num_group):
                x = np.random.normal(size=(gsize, k))
                exog.append(x)
                expval = x.sum(1)
                errors = np.dot(cmat_r, np.random.normal(size=gsize))
                endog.append(expval + errors)
                groups.append(i * np.ones(gsize))

            endog = np.concatenate(endog)
            groups = np.concatenate(groups)
            exog = np.concatenate(exog, axis=0)

            ar = Autoregressive()
            md = GEE(endog, exog, groups, family=ga, cov_struct=ar)
            mdf = md.fit()

            assert_almost_equal(ar.dep_params, dep_params_true[gsize - 1])
            assert_almost_equal(mdf.params, params_true[gsize - 1])
コード例 #35
0
ファイル: test_gee.py プロジェクト: Honglang/statsmodels
    def test_autoregressive(self):

        dep_params_true = [0, 0.589208623896, 0.559823804948]

        params_true = [[1.08043787, 1.12709319, 0.90133927],
                       [0.9613677, 1.05826987, 0.90832055],
                       [1.05370439, 0.96084864, 0.93923374]]

        np.random.seed(342837482)

        num_group = 100
        ar_param = 0.5
        k = 3

        ga = Gaussian()

        for gsize in 1,2,3:

            ix = np.arange(gsize)[:,None] - np.arange(gsize)[None,:]
            ix = np.abs(ix)
            cmat = ar_param ** ix
            cmat_r = np.linalg.cholesky(cmat)

            endog = []
            exog = []
            groups = []
            for i in range(num_group):
                x = np.random.normal(size=(gsize,k))
                exog.append(x)
                expval = x.sum(1)
                errors = np.dot(cmat_r, np.random.normal(size=gsize))
                endog.append(expval + errors)
                groups.append(i*np.ones(gsize))

            endog = np.concatenate(endog)
            groups = np.concatenate(groups)
            exog = np.concatenate(exog, axis=0)

            ar = Autoregressive()
            md = GEE(endog, exog, groups, family=ga, cov_struct = ar)
            mdf = md.fit()

            assert_almost_equal(ar.dep_params, dep_params_true[gsize-1])
            assert_almost_equal(mdf.params, params_true[gsize-1])
コード例 #36
0
    def test_ordinal(self):

        family = Binomial()

        endog, exog, groups = load_data("gee_ordinal_1.csv", icept=False)

        v = GlobalOddsRatio("ordinal")

        md = GEE(endog, exog, groups, None, family, v)
        md.setup_ordinal()
        mdf = md.fit()

        cf = np.r_[1.09238131, 0.02148193, -0.39879146, -0.01855666,
                   0.02983409, 1.18123172, 0.01845318, -1.10233886]
        se = np.r_[0.10878752, 0.10326078, 0.11171241, 0.05488705, 0.05995019,
                   0.0916574, 0.05951445, 0.08539281]

        assert_almost_equal(mdf.params, cf, decimal=5)
        assert_almost_equal(mdf.bse, se, decimal=5)
コード例 #37
0
ファイル: test_gee.py プロジェクト: lema655/statsmodels
    def test_linear_constrained(self):

        family = Gaussian()

        exog = np.random.normal(size=(300, 4))
        exog[:, 0] = 1
        endog = np.dot(exog, np.r_[1, 1, 0, 0.2]) + np.random.normal(size=300)
        group = np.kron(np.arange(100), np.r_[1, 1, 1])

        vi = Independence()
        ve = Exchangeable()

        L = np.r_[[[0, 0, 0, 1]]]
        R = np.r_[0,]

        for j, v in enumerate((vi, ve)):
            md = GEE(endog, exog, group, None, family, v, constraint=(L, R))
            mdf = md.fit()
            assert_almost_equal(mdf.params[3], 0, decimal=10)
コード例 #38
0
ファイル: test_gee.py プロジェクト: lema655/statsmodels
    def test_linear_constrained(self):

        family = Gaussian()

        exog = np.random.normal(size=(300, 4))
        exog[:, 0] = 1
        endog = np.dot(exog, np.r_[1, 1, 0, 0.2]) +\
            np.random.normal(size=300)
        group = np.kron(np.arange(100), np.r_[1, 1, 1])

        vi = Independence()
        ve = Exchangeable()

        L = np.r_[[[0, 0, 0, 1]]]
        R = np.r_[0, ]

        for j, v in enumerate((vi, ve)):
            md = GEE(endog, exog, group, None, family, v, constraint=(L, R))
            mdf = md.fit()
            assert_almost_equal(mdf.params[3], 0, decimal=10)
コード例 #39
0
ファイル: test_gee.py プロジェクト: athanszheyin/statsmodels
    def test_ordinal(self):

        family = Binomial()

        endog, exog, groups = load_data("gee_ordinal_1.csv",
                                        icept=False)

        v = GlobalOddsRatio("ordinal")

        md = GEE(endog, exog, groups, None, family, v)
        md.setup_ordinal()
        mdf = md.fit()

        cf = np.r_[1.09238131, 0.02148193, -0.39879146, -0.01855666,
                   0.02983409, 1.18123172,  0.01845318, -1.10233886]
        se = np.r_[0.10878752,  0.10326078,  0.11171241, 0.05488705,
                   0.05995019, 0.0916574,  0.05951445,  0.08539281]

        assert_almost_equal(mdf.params, cf, decimal=5)
        assert_almost_equal(mdf.bse, se, decimal=5)
コード例 #40
0
    def setup_class(cls):


        endog, exog, group_n = load_data("gee_poisson_1.csv")

        family = Poisson()
        vi = Independence()

        cls.mod = GEE(endog, exog, group_n, None, family, vi)

        cls.start_params = np.array([-0.03644504, -0.05432094,  0.01566427,
                                      0.57628591, -0.0046566,  -0.47709315])
コード例 #41
0
    def test_default_time(self):
        """
        Check that the time defaults work correctly.
        """

        endog, exog, group = load_data("gee_logistic_1.csv")

        # Time values for the autoregressive model
        T = np.zeros(len(endog))
        idx = set(group)
        for ii in idx:
            jj = np.flatnonzero(group == ii)
            T[jj] = lrange(len(jj))

        family = Binomial()
        va = Autoregressive()

        md1 = GEE(endog, exog, group, family=family, cov_struct=va)
        mdf1 = md1.fit()

        md2 = GEE(endog, exog, group, time=T, family=family, cov_struct=va)
        mdf2 = md2.fit()

        assert_almost_equal(mdf1.params, mdf2.params, decimal=6)
        assert_almost_equal(mdf1.standard_errors(),
                            mdf2.standard_errors(),
                            decimal=6)
コード例 #42
0
    def test_nested_linear(self):

        family = Gaussian()

        endog, exog, group = load_data("gee_nested_linear_1.csv")

        group_n = []
        for i in range(endog.shape[0] // 10):
            group_n.extend([
                0,
            ] * 5)
            group_n.extend([
                1,
            ] * 5)
        group_n = np.array(group_n)[:, None]

        dp = Independence()
        md = GEE(endog, exog, group, None, family, dp)
        mdf1 = md.fit()

        # From statsmodels.GEE (not an independent test)
        cf = np.r_[-0.1671073, 1.00467426, -2.01723004, 0.97297106]
        se = np.r_[0.08629606, 0.04058653, 0.04067038, 0.03777989]
        assert_almost_equal(mdf1.params, cf, decimal=6)
        assert_almost_equal(mdf1.standard_errors(), se, decimal=6)

        ne = Nested()
        md = GEE(endog, exog, group, None, family, ne, dep_data=group_n)
        mdf2 = md.fit(start_params=mdf1.params)

        # From statsmodels.GEE (not an independent test)
        cf = np.r_[-0.16655319, 1.02183688, -2.00858719, 1.00101969]
        se = np.r_[0.08632616, 0.02913582, 0.03114428, 0.02893991]
        assert_almost_equal(mdf2.params, cf, decimal=6)
        assert_almost_equal(mdf2.standard_errors(), se, decimal=6)
コード例 #43
0
ファイル: test_gee.py プロジェクト: d8aninja/statsmodels
    def test_missing(self):
        """
        Test missing data handling for calling from the api.  Missing
        data handling does not currently work for formulas.
        """

        endog = np.random.normal(size=100)
        exog = np.random.normal(size=(100, 3))
        exog[:, 0] = 1
        groups = np.kron(lrange(20), np.ones(5))

        endog[0] = np.nan
        endog[5:7] = np.nan
        exog[10:12, 1] = np.nan

        mod1 = GEE(endog, exog, groups, missing='drop')
        rslt1 = mod1.fit()

        assert_almost_equal(len(mod1.endog), 95)
        assert_almost_equal(np.asarray(mod1.exog.shape), np.r_[95, 3])

        ii = np.isfinite(endog) & np.isfinite(exog).all(1)

        mod2 = GEE(endog[ii], exog[ii, :], groups[ii], missing='none')
        rslt2 = mod2.fit()

        assert_almost_equal(rslt1.params, rslt2.params)
        assert_almost_equal(rslt1.bse, rslt2.bse)
コード例 #44
0
    def test_weighted(self):

        # Simple check where the answer can be computed by hand.
        exog = np.ones(20)
        weights = np.ones(20)
        weights[0:10] = 2
        endog = np.zeros(20)
        endog[0:10] += 1
        groups = np.kron(np.arange(10), np.r_[1, 1])
        model = GEE(endog, exog, groups, weights=weights)
        result = model.fit()
        assert_allclose(result.params, np.r_[2/3.])

        # Comparison against stata using groups with different sizes.
        weights = np.ones(20)
        weights[10:] = 2
        endog = np.r_[1, 2, 3, 2, 3, 4, 3, 4, 5, 4, 5, 6, 5, 6, 7, 6,
                      7, 8, 7, 8]
        exog1 = np.r_[1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4,
                      3, 3, 3, 3]
        groups = np.r_[1, 1, 2, 2, 2, 2, 4, 4, 5, 5, 6, 6, 6, 6,
                       8, 8, 9, 9, 10, 10]
        exog = np.column_stack((np.ones(20), exog1))

        # Comparison using independence model
        model = GEE(endog, exog, groups, weights=weights,
                    cov_struct=sm.cov_struct.Independence())
        g = np.mean([2, 4, 2, 2, 4, 2, 2, 2])
        fac = 20 / float(20 - g)
        result = model.fit(ddof_scale=0, scaling_factor=fac)

        assert_allclose(result.params, np.r_[1.247573, 1.436893], atol=1e-6)
        assert_allclose(result.scale, 1.808576)

        # Stata multiples robust SE by sqrt(N / (N - g)), where N is
        # the total sample size and g is the average group size.
        assert_allclose(result.bse, np.r_[0.895366, 0.3425498], atol=1e-5)

        # Comparison using exchangeable model
        # Smoke test for now
        model = GEE(endog, exog, groups, weights=weights,
                    cov_struct=sm.cov_struct.Exchangeable())
        result = model.fit(ddof_scale=0)
コード例 #45
0
ファイル: test_gee.py プロジェクト: lema655/statsmodels
    def test_margins(self):

        n = 300
        exog = np.random.normal(size=(n, 4))
        exog[:, 0] = 1
        exog[:, 1] = 1 * (exog[:, 2] < 0)

        group = np.kron(np.arange(n / 4), np.ones(4))
        time = np.zeros((n, 1))

        beta = np.r_[0, 1, -1, 0.5]
        lpr = np.dot(exog, beta)
        prob = 1 / (1 + np.exp(-lpr))

        endog = 1 * (np.random.uniform(size=n) < prob)

        fa = Binomial()
        ex = Exchangeable()

        md = GEE(endog, exog, group, time, fa, ex)
        mdf = md.fit()

        marg = GEEMargins(mdf, ())
        marg.summary()
コード例 #46
0
    def test_margins(self):

        n = 300
        exog = np.random.normal(size=(n, 4))
        exog[:,0] = 1
        exog[:,1] = 1*(exog[:,2] < 0)

        group = np.kron(np.arange(n/4), np.ones(4))
        time = np.zeros((n, 1))

        beta = np.r_[0, 1, -1, 0.5]
        lpr = np.dot(exog, beta)
        prob = 1 / (1 + np.exp(-lpr))

        endog = 1*(np.random.uniform(size=n) < prob)

        fa = Binomial()
        ex = Exchangeable()

        md = GEE(endog, exog, group, time, fa, ex)
        mdf = md.fit()

        marg = GEEMargins(mdf, ())
        marg.summary()
コード例 #47
0
    def setup_class(cls):

        endog, exog, group_n = load_data("gee_poisson_1.csv")

        family = Poisson()
        vi = Independence()
        # Test with formulas
        D = np.concatenate((endog[:,None], group_n[:,None],
                            exog[:,1:]), axis=1)
        D = pd.DataFrame(D)
        D.columns = ["Y","Id",] + ["X%d" % (k+1)
                                   for k in range(exog.shape[1]-1)]

        cls.mod = GEE.from_formula("Y ~ X1 + X2 + X3 + X4 + X5", "Id",
                                    D, family=family, cov_struct=vi)

        cls.start_params = np.array([-0.03644504, -0.05432094,  0.01566427,
                                      0.57628591, -0.0046566,  -0.47709315])
コード例 #48
0
ファイル: test_gee_glm.py プロジェクト: BranYang/statsmodels
    def setup_class(cls):

        vs = Independence()
        family = families.Gaussian()
        np.random.seed(987126)
        Y = np.random.normal(size=100)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.kron(np.arange(20), np.ones(5))

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        md = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D,
                              family=family, cov_struct=vs)
        cls.result1 = md.fit()

        cls.result2 = GLM.from_formula("Y ~ X1 + X2 + X3", data=D).fit()
コード例 #49
0
ファイル: test_gee_glm.py プロジェクト: BranYang/statsmodels
    def setup_class(cls):
        vs = Independence()
        family = families.Poisson()
        np.random.seed(987126)
        Y = np.exp(1 + np.random.normal(size=100))
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.random.randint(0, 4, size=100)

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        mod1 = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D,
                            family=family, cov_struct=vs)
        cls.result1 = mod1.fit()

        mod2 = GLM.from_formula("Y ~ X1 + X2 + X3", data=D, family=family)
        cls.result2 = mod2.fit(disp=False)
コード例 #50
0
ファイル: test_gee.py プロジェクト: lema655/statsmodels
    def test_compare_poisson(self):

        vs = Independence()
        family = Poisson()

        Y = np.ceil(-np.log(np.random.uniform(size=100)))
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.random.randint(0, 4, size=100)

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None, groups=groups, family=family, covstruct=vs).fit()

        sml = sm.poisson("Y ~ X1 + X2 + X3", data=D).fit()

        assert_almost_equal(sml.params.values, md.params, decimal=10)
コード例 #51
0
ファイル: test_gee.py プロジェクト: athanszheyin/statsmodels
    def test_compare_logit(self):

        vs = Independence()
        family = Binomial()

        Y = 1*(np.random.normal(size=100) < 0)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.random.randint(0, 4, size=100)

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None, groups=groups,
                               family=family, cov_struct=vs).fit()

        sml = sm.logit("Y ~ X1 + X2 + X3", data=D).fit(disp=False)

        assert_almost_equal(sml.params.values, md.params, decimal=10)
コード例 #52
0
ファイル: test_gee.py プロジェクト: d8aninja/statsmodels
    def test_predict(self):

        n = 50
        X1 = np.random.normal(size=n)
        X2 = np.random.normal(size=n)
        groups = np.kron(np.arange(25), np.r_[1, 1])
        offset = np.random.uniform(1, 2, size=n)
        Y = np.random.normal(0.1 * (X1 + X2) + offset, size=n)
        data = pd.DataFrame({
            "Y": Y,
            "X1": X1,
            "X2": X2,
            "groups": groups,
            "offset": offset
        })

        fml = "Y ~ X1 + X2"
        model = GEE.from_formula(fml,
                                 groups,
                                 data,
                                 family=Gaussian(),
                                 offset="offset")
        result = model.fit()
        assert_equal(result.converged, True)

        pred1 = result.predict()
        pred2 = result.predict(offset=data.offset)
        pred3 = result.predict(exog=data[["X1", "X2"]], offset=data.offset)
        pred4 = result.predict(exog=data[["X1", "X2"]], offset=0 * data.offset)
        pred5 = result.predict(offset=0 * data.offset)

        assert_allclose(pred1, pred2)
        assert_allclose(pred1, pred3)
        assert_allclose(pred1, pred4 + data.offset)
        assert_allclose(pred1, pred5 + data.offset)

        x1_new = np.random.normal(size=10)
        x2_new = np.random.normal(size=10)
        new_exog = pd.DataFrame({"X1": x1_new, "X2": x2_new})
        pred6 = result.predict(exog=new_exog)
        params = result.params
        pred6_correct = params[0] + params[1] * x1_new + params[2] * x2_new
        assert_allclose(pred6, pred6_correct)
コード例 #53
0
ファイル: test_gee.py プロジェクト: MridulS/statsmodels
    def test_offset_formula(self):
        """
        Test various ways of passing offset and exposure to `from_formula`.
        """

        n = 50
        X1 = np.random.normal(size=n)
        X2 = np.random.normal(size=n)
        groups = np.kron(np.arange(25), np.r_[1, 1])
        offset = np.random.uniform(1, 2, size=n)
        exposure = np.exp(offset)
        Y = np.random.poisson(0.1*(X1 + X2) + 2*offset, size=n)
        data = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "groups": groups,
                             "offset": offset, "exposure": exposure})

        fml = "Y ~ X1 + X2"
        model1 = GEE.from_formula(fml, groups, data, family=Poisson(),
                                  offset="offset")
        result1 = model1.fit()
        assert_equal(result1.converged, True)

        model2 = GEE.from_formula(fml, groups, data, family=Poisson(),
                                  offset=offset)
        result2 = model2.fit(start_params=result1.params)
        assert_allclose(result1.params, result2.params)
        assert_equal(result2.converged, True)

        model3 = GEE.from_formula(fml, groups, data, family=Poisson(),
                                  exposure=exposure)
        result3 = model3.fit(start_params=result1.params)
        assert_allclose(result1.params, result3.params)
        assert_equal(result3.converged, True)

        model4 = GEE.from_formula(fml, groups, data, family=Poisson(),
                                  exposure="exposure")
        result4 = model4.fit(start_params=result1.params)
        assert_allclose(result1.params, result4.params)
        assert_equal(result4.converged, True)

        model5 = GEE.from_formula(fml, groups, data, family=Poisson(),
                                  exposure="exposure", offset="offset")
        result5 = model5.fit()
        assert_equal(result5.converged, True)

        model6 = GEE.from_formula(fml, groups, data, family=Poisson(),
                                  offset=2*offset)
        result6 = model6.fit(start_params=result5.params)
        assert_allclose(result5.params, result6.params)
        assert_equal(result6.converged, True)
コード例 #54
0
ファイル: test_gee_glm.py プロジェクト: BranYang/statsmodels
    def setup_class(cls):
        # adjusted for Gamma, not in test_gee.py
        vs = Independence()
        family = families.Gamma(link=links.log)
        np.random.seed(987126)
        #Y = np.random.normal(size=100)**2
        Y = np.exp(0.1 + np.random.normal(size=100))   # log-normal
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.random.randint(0, 4, size=100)

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        mod1 = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D,
                                family=family, cov_struct=vs)
        cls.result1 = mod1.fit()

        mod2 = GLM.from_formula("Y ~ X1 + X2 + X3", data=D, family=family)
        cls.result2 = mod2.fit(disp=False)
コード例 #55
0
ファイル: test_gee.py プロジェクト: chagge/statsmodels
    def test_compare_poisson(self):

        vs = Independence()
        family = Poisson()

        Y = np.ceil(-np.log(np.random.uniform(size=100)))
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.random.randint(0, 4, size=100)

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        mod1 = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D,
                                family=family, cov_struct=vs)
        rslt1 = mod1.fit()

        mod2 = sm.poisson("Y ~ X1 + X2 + X3", data=D)
        rslt2 = mod2.fit(disp=False)

        assert_almost_equal(rslt1.params, rslt2.params, decimal=10)
コード例 #56
0
    def setup_class(cls):

        vs = Independence()
        family = families.Gaussian()
        np.random.seed(987126)
        Y = np.random.normal(size=100)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.kron(np.arange(20), np.ones(5))

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        md = GEE.from_formula("Y ~ X1 + X2 + X3",
                              groups,
                              D,
                              family=family,
                              cov_struct=vs)
        cls.result1 = md.fit()

        cls.result2 = GLM.from_formula("Y ~ X1 + X2 + X3", data=D).fit()
コード例 #57
0
    def setup_class(cls):
        vs = Independence()
        family = families.Poisson()
        np.random.seed(987126)
        Y = np.exp(1 + np.random.normal(size=100))
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.random.randint(0, 4, size=100)

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        mod1 = GEE.from_formula("Y ~ X1 + X2 + X3",
                                groups,
                                D,
                                family=family,
                                cov_struct=vs)
        cls.result1 = mod1.fit()

        mod2 = GLM.from_formula("Y ~ X1 + X2 + X3", data=D, family=family)
        cls.result2 = mod2.fit(disp=False)
コード例 #58
0
ファイル: test_gee.py プロジェクト: athanszheyin/statsmodels
    def t_est_missing(self):

        Y = np.random.normal(size=100)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.kron(lrange(20), np.ones(5))

        Y[0] = np.nan
        Y[5:7] = np.nan
        X2[10:12] = np.nan

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3,
                          "groups": groups})

        md = GEE.from_formula("Y ~ X1 + X2 + X3", D, None,
                              groups=D["groups"], missing='drop')
        mdf = md.fit()

        assert(len(md.endog) == 95)
        assert(md.exog.shape) == (95,4)
コード例 #59
0
ファイル: test_gee.py プロジェクト: chagge/statsmodels
    def test_compare_logit(self):

        vs = Independence()
        family = Binomial()

        Y = 1*(np.random.normal(size=100) < 0)
        X1 = np.random.normal(size=100)
        X2 = np.random.normal(size=100)
        X3 = np.random.normal(size=100)
        groups = np.random.randint(0, 4, size=100)

        D = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "X3": X3})

        mod1 = GEE.from_formula("Y ~ X1 + X2 + X3", groups, D,
                                family=family, cov_struct=vs)
        rslt1 = mod1.fit()

        mod2 = sm.logit("Y ~ X1 + X2 + X3", data=D)
        rslt2 = mod2.fit()

        assert_almost_equal(rslt1.params, rslt2.params, decimal=10)
コード例 #60
0
    def test_offset_formula(self):
        # Test various ways of passing offset and exposure to `from_formula`.

        n = 50
        X1 = np.random.normal(size=n)
        X2 = np.random.normal(size=n)
        groups = np.kron(np.arange(25), np.r_[1, 1])
        offset = np.random.uniform(1, 2, size=n)
        exposure = np.exp(offset)
        Y = np.random.poisson(0.1*(X1 + X2) + 2*offset, size=n)
        data = pd.DataFrame({"Y": Y, "X1": X1, "X2": X2, "groups": groups,
                             "offset": offset, "exposure": exposure})

        fml = "Y ~ X1 + X2"
        model1 = GEE.from_formula(fml, groups, data, family=Poisson(),
                                  offset="offset")
        result1 = model1.fit()
        assert_equal(result1.converged, True)

        model2 = GEE.from_formula(fml, groups, data, family=Poisson(),
                                  offset=offset)
        result2 = model2.fit(start_params=result1.params)
        assert_allclose(result1.params, result2.params)
        assert_equal(result2.converged, True)

        model3 = GEE.from_formula(fml, groups, data, family=Poisson(),
                                  exposure=exposure)
        result3 = model3.fit(start_params=result1.params)
        assert_allclose(result1.params, result3.params)
        assert_equal(result3.converged, True)

        model4 = GEE.from_formula(fml, groups, data, family=Poisson(),
                                  exposure="exposure")
        result4 = model4.fit(start_params=result1.params)
        assert_allclose(result1.params, result4.params)
        assert_equal(result4.converged, True)

        model5 = GEE.from_formula(fml, groups, data, family=Poisson(),
                                  exposure="exposure", offset="offset")
        result5 = model5.fit()
        assert_equal(result5.converged, True)

        model6 = GEE.from_formula(fml, groups, data, family=Poisson(),
                                  offset=2*offset)
        result6 = model6.fit(start_params=result5.params)
        assert_allclose(result5.params, result6.params)
        assert_equal(result6.converged, True)