Ejemplo n.º 1
0
    def test_glm(self):
        # prelimnimary, getting started with basic test for GLM.get_prediction
        from statsmodels.genmod.generalized_linear_model import GLM

        res_wls = self.res_wls
        mod_wls = res_wls.model
        y, X, wi = mod_wls.endog, mod_wls.exog, mod_wls.weights

        w_sqrt = np.sqrt(wi)  # notation wi is weights, `w` is var
        mod_glm = GLM(y * w_sqrt, X * w_sqrt[:,None])

        # compare using t distribution
        res_glm = mod_glm.fit(use_t=True)
        pred_glm = res_glm.get_prediction()
        sf_glm = pred_glm.summary_frame()

        pred_res_wls = res_wls.get_prediction()
        sf_wls = pred_res_wls.summary_frame()
        n_compare = 30   # in glm with predict wendog
        assert_allclose(sf_glm.values[:n_compare],
                        sf_wls.values[:n_compare, :4])

        # compare using normal distribution

        res_glm = mod_glm.fit() # default use_t=False
        pred_glm = res_glm.get_prediction()
        sf_glm = pred_glm.summary_frame()

        res_wls = mod_wls.fit(use_t=False)
        pred_res_wls = res_wls.get_prediction()
        sf_wls = pred_res_wls.summary_frame()
        assert_allclose(sf_glm.values[:n_compare],
                        sf_wls.values[:n_compare, :4])

        # function for parameter transformation
        # should be separate test method
        from statsmodels.genmod._prediction import params_transform_univariate
        rates = params_transform_univariate(res_glm.params, res_glm.cov_params())

        rates2 = np.column_stack((np.exp(res_glm.params),
                                  res_glm.bse * np.exp(res_glm.params),
                                  np.exp(res_glm.conf_int())))
        assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13)

        from statsmodels.genmod.families import links

        # with identity transform
        pt = params_transform_univariate(res_glm.params, res_glm.cov_params(), link=links.identity())

        assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13)
        assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13)
        ptt = pt.t_test()
        assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13)
        assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13)

        # prediction with exog and no weights does not error
        res_glm = mod_glm.fit()
        pred_glm = res_glm.get_prediction(X)
Ejemplo n.º 2
0
def params_transform_univariate(params, cov_params, link=None, transform=None,
                     row_labels=None):
    """
    results for univariate, nonlinear, monotonicaly transformed parameters

    This provides transformed values, standard errors and confidence interval
    for transformations of parameters, for example in calculating rates with
    `exp(params)` in the case of Poisson or other models with exponential
    mean function.

    """

    from statsmodels.genmod.families import links
    if link is None and transform is None:
        link = links.Log()

    if row_labels is None and hasattr(params, 'index'):
        row_labels = params.index

    params = np.asarray(params)

    predicted_mean = link.inverse(params)
    link_deriv = link.inverse_deriv(params)
    var_pred_mean = link_deriv**2 * np.diag(cov_params)
    # TODO: do we want covariance also, or just var/se

    dist = stats.norm

    # TODO: need ci for linear prediction, method of `lin_pred
    linpred = PredictionResults(params, np.diag(cov_params), dist=dist,
                             row_labels=row_labels, link=links.identity())

    res = PredictionResults(predicted_mean, var_pred_mean, dist=dist,
                             row_labels=row_labels, linpred=linpred, link=link)

    return res
    def test_glm(self):
        # prelimnimary, getting started with basic test for GLM.get_prediction
        from statsmodels.genmod.generalized_linear_model import GLM

        res_wls = self.res_wls
        mod_wls = res_wls.model
        y, X, wi = mod_wls.endog, mod_wls.exog, mod_wls.weights

        w_sqrt = np.sqrt(wi)  # notation wi is weights, `w` is var
        mod_glm = GLM(y * w_sqrt, X * w_sqrt[:,None])

        # compare using t distribution
        res_glm = mod_glm.fit(use_t=True)
        pred_glm = res_glm.get_prediction()
        sf_glm = pred_glm.summary_frame()

        pred_res_wls = res_wls.get_prediction()
        sf_wls = pred_res_wls.summary_frame()
        n_compare = 30   # in glm with predict wendog
        assert_allclose(sf_glm.values[:n_compare],
                        sf_wls.values[:n_compare, :4])

        # compare using normal distribution

        res_glm = mod_glm.fit() # default use_t=False
        pred_glm = res_glm.get_prediction()
        sf_glm = pred_glm.summary_frame()

        res_wls = mod_wls.fit(use_t=False)
        pred_res_wls = res_wls.get_prediction()
        sf_wls = pred_res_wls.summary_frame()
        assert_allclose(sf_glm.values[:n_compare],
                        sf_wls.values[:n_compare, :4])

        # function for parameter transformation
        # should be separate test method
        from statsmodels.genmod._prediction import params_transform_univariate
        rates = params_transform_univariate(res_glm.params, res_glm.cov_params())

        rates2 = np.column_stack((np.exp(res_glm.params),
                                  res_glm.bse * np.exp(res_glm.params),
                                  np.exp(res_glm.conf_int())))
        assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13)

        from statsmodels.genmod.families import links

        # with identity transform
        pt = params_transform_univariate(res_glm.params, res_glm.cov_params(), link=links.identity())

        assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13)
        assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13)
        ptt = pt.t_test()
        assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13)
        assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13)

        # prediction with exog and no weights does not error
        res_glm = mod_glm.fit()
        pred_glm = res_glm.get_prediction(X)

        # check that list works, issue 4437
        x = res_glm.model.exog.mean(0)
        pred_res3 = res_glm.get_prediction(x)
        ci3 = pred_res3.conf_int()
        pred_res3b = res_glm.get_prediction(x.tolist())
        ci3b = pred_res3b.conf_int()
        assert_allclose(pred_res3b.se_mean, pred_res3.se_mean, rtol=1e-13)
        assert_allclose(ci3b, ci3, rtol=1e-13)
        res_df = pred_res3b.summary_frame()
        assert_equal(res_df.index.values, [0])

        x = res_glm.model.exog[-2:]
        pred_res3 = res_glm.get_prediction(x)
        ci3 = pred_res3.conf_int()
        pred_res3b = res_glm.get_prediction(x.tolist())
        ci3b = pred_res3b.conf_int()
        assert_allclose(pred_res3b.se_mean, pred_res3.se_mean, rtol=1e-13)
        assert_allclose(ci3b, ci3, rtol=1e-13)
        res_df = pred_res3b.summary_frame()
        assert_equal(res_df.index.values, [0, 1])
Ejemplo n.º 4
0
w_sqrt = np.sqrt(w)
mod_glm = GLM(y/w_sqrt, X/w_sqrt[:,None])
res_glm = mod_glm.fit()
pred_glm = res_glm.get_prediction()
print(pred_glm.summary_frame().head())

res_glm_t = mod_glm.fit(use_t=True)
pred_glm_t = res_glm_t.get_prediction()
print(pred_glm_t.summary_frame().head())

rates = params_transform_univariate(res_glm.params, res_glm.cov_params())
print('\nRates exp(params)')
print(rates.summary_frame())

rates2 = np.column_stack((np.exp(res_glm.params),
                          res_glm.bse * np.exp(res_glm.params),
                          np.exp(res_glm.conf_int())))
assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13)

from statsmodels.genmod.families import links

# with identity transform
pt = params_transform_univariate(res_glm.params, res_glm.cov_params(), link=links.identity())
print(pt.tvalues)

assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13)
assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13)
ptt = pt.t_test()
assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13)
assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13)
Ejemplo n.º 5
0
res_glm = mod_glm.fit()
pred_glm = res_glm.get_prediction()
print(pred_glm.summary_frame().head())

res_glm_t = mod_glm.fit(use_t=True)
pred_glm_t = res_glm_t.get_prediction()
print(pred_glm_t.summary_frame().head())

rates = params_transform_univariate(res_glm.params, res_glm.cov_params())
print('\nRates exp(params)')
print(rates.summary_frame())

rates2 = np.column_stack(
    (np.exp(res_glm.params), res_glm.bse * np.exp(res_glm.params),
     np.exp(res_glm.conf_int())))
assert_allclose(rates.summary_frame().values, rates2, rtol=1e-13)

from statsmodels.genmod.families import links

# with identity transform
pt = params_transform_univariate(res_glm.params,
                                 res_glm.cov_params(),
                                 link=links.identity())
print(pt.tvalues)

assert_allclose(pt.tvalues, res_glm.tvalues, rtol=1e-13)
assert_allclose(pt.se_mean, res_glm.bse, rtol=1e-13)
ptt = pt.t_test()
assert_allclose(ptt[0], res_glm.tvalues, rtol=1e-13)
assert_allclose(ptt[1], res_glm.pvalues, rtol=1e-13)