Example #1
0
    def test_postestimation(self):
        res1 = self.res1
        res2 = self.res2
        resid_prob = res1.resid_prob

        assert_allclose(resid_prob[:len(res2.resid_prob)], res2.resid_prob,
                        atol=1e-4)
        stats_prob = [resid_prob.mean(), resid_prob.min(), resid_prob.max(),
                      resid_prob.var(ddof=1)]
        assert_allclose(stats_prob, res2.resid_prob_stats, atol=1e-5)

        # from R generalhoslem
        # > logitgof(ologit_ucla$apply2, fitted(r_logit), g = 10, ord = TRUE)
        chi2 = 20.958760713111
        df = 17
        p_value = 0.2281403796588
        # values in Stata using ologitgof are a bit different,
        # I guess different sort algorithm and because of ties, see #7095

        import statsmodels.stats.diagnostic_gen as dia

        # TODO: add more properties or methods to Results class
        fitted = res1.predict()
        y_dummy = (res1.model.endog[:, None] == np.arange(3)).astype(int)
        sv = (fitted * np.arange(1, 3+1)).sum(1)
        dt = dia.test_chisquare_binning(
            y_dummy, fitted, sort_var=sv, bins=10, df=None, ordered=True,
            sort_method="stable")
        assert_allclose(dt.statistic, chi2, rtol=5e-5)
        assert_allclose(dt.pvalue, p_value, rtol=1e-4)
        assert_equal(dt.df, df)
Example #2
0
    def _chisquare_binned(self,
                          sort_var=None,
                          bins=10,
                          k_max=None,
                          df=None,
                          sort_method="quicksort",
                          frac_upp=0.1,
                          alpha_nc=0.05):
        """Hosmer-Lemeshow style test for count data.

        Note, this does not take into account that parameters are estimated.
        The distribution of the test statistic is only an approximation.

        This corresponds to the Hosmer-Lemeshow type test for an ordinal
        response variable. The outcome space y = k is partitioned into bins
        and treated as ordinal variable.
        The observations are split into approximately equal sized groups
        of observations sorted according the ``sort_var``.

        """

        if sort_var is None:
            sort_var = self.results.predict(which="lin")

        endog = self.results.model.endog
        # not sure yet how this is supposed to work
        # max_count = endog.max * 2
        # no option for max count in predict
        # counts = (endog == np.arange(max_count)).astype(int)
        expected = self.results.predict(which="prob")
        counts = (endog[:, None] == np.arange(expected.shape[1])).astype(int)

        # truncate upper tail
        if k_max is None:
            nobs = len(endog)
            icumcounts_sum = nobs - counts.sum(0).cumsum(0)
            k_max = np.argmax(icumcounts_sum < nobs * frac_upp) - 1
        expected = expected[:, :k_max]
        counts = counts[:, :k_max]
        # we should correct for or include truncated upper bin
        # inplace modification, we cannot reuse expected and counts anymore
        expected[:, -1] += 1 - expected.sum(1)
        counts[:, -1] += 1 - counts.sum(1)

        # TODO: what's the correct df, same as for multinomial/ordered ?
        res = test_chisquare_binning(counts,
                                     expected,
                                     sort_var=sort_var,
                                     bins=bins,
                                     df=df,
                                     ordered=True,
                                     sort_method=sort_method,
                                     alpha_nc=alpha_nc)
        return res