Beispiel #1
0
    def test_chisquare_prob(self, bin_edges=None, method=None):
        """Moment test for binned probabilites using OPG.

        Paramters
        ---------
        binedges : array_like or None
            This defines which counts are included in the test on frequencies
            and how counts are combined in bins.
            The default if bin_edges is None will change in future.
            See Notes and Example sections below.
        method : str
            Currently only `method = "opg"` is available.
            If method is None, the OPG will be used, but the default might
            change in future versions.
            See Notes section below.

        Returns
        -------
        test result

        Notes
        -----
        Warning: The current default can have many empty or nearly empty bins.
        The default number of bins is given by max(endog).
        Currently it is recommended to limit the nuber of bins explicitly,
        see Examples below.
        Binning will change in future and automatic binning will be added.

        Currently only the outer product of gradient, OPG, method is
        implemented. In many case, the OPG version of a specification test
        overrejects in small samples.
        Specialized tests that use observed or expected information matrix
        often have better small sample properties.
        The default method will change if better methods are added.

        Examples
        --------
        The following call is a test for the probability of zeros
        `test_chisquare_prob(bin_edges=np.arange(3))`

        `test_chisquare_prob(bin_edges=np.arange(10))` tests the hypothesis
        that the frequences for counts up to 7 correspond to the estimated
        Poisson distributions.
        In this case, edges are 0, ..., 9 which defines 9 bins for
        counts 0 to 8. The last bin is dropped, so the joint test hypothesis is
        that the observed aggregated frequencies for counts 0 to 7 correspond
        to the model prediction for those frequencies. Predicted probabilites
        Prob(y_i = k | x) are aggregated over observations ``i``.

        """
        kwds = {}
        if bin_edges is not None:
            # TODO: verify upper bound, we drop last bin (may be open, inf)
            kwds["y_values"] = np.arange(bin_edges[-2] + 1)
        probs = self.results.predict(which="prob", **kwds)
        res = test_chisquare_prob(self.results,
                                  probs,
                                  bin_edges=bin_edges,
                                  method=method)
        return res
    def test_probs(self, close_figures):
        nobs = self.nobs
        probs = self.res.predict_prob()
        freq = np.bincount(self.endog) / nobs

        tzi = dia.test_chisquare_prob(self.res, probs[:, :2])
        # regression numbers
        tzi1 = (0.387770845, 0.5334734738)
        assert_allclose(tzi[:2], tzi1, rtol=5e-5)

        # smoke test for plot
        dia.plot_probs(freq, probs.mean(0))
    def test_probs(self):
        nobs = self.nobs
        probs = self.res.predict_prob()
        freq = np.bincount(self.endog) / nobs

        tzi = dia.test_chisquare_prob(self.res, probs[:, :2])
        # regression numbers
        tzi1 = (0.387770845, 0.5334734738)
        assert_allclose(tzi[:2], tzi1)

        # smoke test for plot

        try:
            import matplotlib.pyplot as plt
        except ImportError:
            return
        fig = dia.plot_probs(freq, probs.mean(0))
        plt.close(fig)
    def test_probs(self):
        nobs = self.nobs
        probs = self.res.predict_prob()
        freq = np.bincount(self.endog) / nobs

        tzi = dia.test_chisquare_prob(self.res, probs[:, :2])
        # regression numbers
        tzi1 = (0.387770845, 0.5334734738)
        assert_allclose(tzi[:2], tzi1, rtol=5e-5)

        # smoke test for plot

        try:
            import matplotlib.pyplot as plt
        except ImportError:
            return
        fig = dia.plot_probs(freq, probs.mean(0))
        plt.close(fig)
    def test_spec_tests(self):
        # regression test, numbers similar to Monte Carlo simulation
        res_dispersion = np.array([[0.1396096387543, 0.8889684245877],
                                   [0.1396096387543, 0.8889684245877],
                                   [0.2977840351238, 0.7658680002106],
                                   [0.1307899995877, 0.8959414342111],
                                   [0.1307899995877, 0.8959414342111],
                                   [0.1357101381056, 0.8920504328246],
                                   [0.2776587511235, 0.7812743277372]])

        res_zi = np.array([
            [00.1389582826821, 0.7093188241734],
            [-0.3727710861669, 0.7093188241734],
            [-0.2496729648642, 0.8028402670888],
            [00.0601651553909, 0.8062350958880],
        ])

        respoi = Poisson(self.endog, self.exog).fit(disp=0)
        dia = PoissonDiagnostic(respoi)
        t_disp = dia.test_dispersion()[0]
        assert_allclose(t_disp, res_dispersion, rtol=1e-8)

        nobs = self.endog.shape[0]
        t_zi_jh = dia.test_poisson_zeroinflation(method="broek",
                                                 exog_infl=np.ones(nobs))
        t_zib = dia.test_poisson_zeroinflation(method="broek")
        t_zim = dia.test_poisson_zeroinflation(method="prob")
        t_zichi2 = dia.test_chisquare_prob(bin_edges=np.arange(3))

        t_zi = np.vstack([t_zi_jh[:2], t_zib[:2], t_zim[:2], t_zichi2[:2]])
        assert_allclose(t_zi, res_zi, rtol=1e-8)

        # test jansakul and hinde with exog_infl
        t_zi_ex = dia.test_poisson_zeroinflation(method="broek",
                                                 exog_infl=self.exog)
        res_zi_ex = np.array([3.7813218150779, 0.1509719973257])
        assert_allclose(t_zi_ex[:2], res_zi_ex, rtol=1e-8)