Example #1
0
    def test_all(self):
        # expand frequencies to observations, (no freq_weights yet)
        freq = [46, 76, 24, 9, 1]
        y = np.repeat(np.arange(5), freq)
        # results from article table 7
        res1 = Bunch(
            params=[3.52636, 0.425617],
            llf=-187.469,
            chi2=1.701208,  # chisquare test
            df_model=2,
            p=0.4272,  # p-value for chi2
            aic=378.938,
            probs=[46.48, 73.72, 27.88, 6.5, 1.42])

        dp = DiscretizedCount(stats.gamma)
        mod = DiscretizedModel(y, distr=dp)
        res = mod.fit(start_params=[1, 1])
        nobs = len(y)

        assert_allclose(res.params, res1.params, rtol=1e-5)
        assert_allclose(res.llf, res1.llf, atol=6e-3)
        assert_allclose(res.aic, res1.aic, atol=6e-3)
        assert_equal(res.df_model, res1.df_model)

        probs = mod.predict(res.params, which="probs")
        probs_trunc = probs[:len(res1.probs)]
        probs_trunc[-1] += 1 - probs_trunc.sum()
        assert_allclose(probs_trunc * nobs, res1.probs, atol=6e-2)

        assert_allclose(np.sum(freq), (probs_trunc * nobs).sum(), rtol=1e-10)
        res_chi2 = stats.chisquare(freq,
                                   probs_trunc * nobs,
                                   ddof=len(res.params))
        # regression test, numbers from running test
        # close but not identical to article
        assert_allclose(res_chi2.statistic, 1.70409356, rtol=1e-7)
        assert_allclose(res_chi2.pvalue, 0.42654100, rtol=1e-7)

        # smoke test for summary
        res.summary()

        np.random.seed(987146)
        res_boots = res.bootstrap()
        # only loose check, small default n_rep=100, agreement at around 3%
        assert_allclose(res.params, res_boots[0], rtol=0.05)
        assert_allclose(res.bse, res_boots[1], rtol=0.05)
Example #2
0
    def test_all(self):
        p_geom = 0.6
        scale_dexpon = -1 / np.log(1-p_geom)
        dgeo = stats.geom(p_geom, loc=-1)
        dpg = DiscretizedCount(stats.expon)(scale_dexpon)

        xi = np.arange(6)
        pmf1 = dgeo.pmf(xi)
        pmf = dpg.pmf(xi)
        assert_allclose(pmf, pmf1, rtol=1e-10)
        cdf1 = dgeo.cdf(xi)
        cdf = dpg.cdf(xi)
        assert_allclose(cdf, cdf1, rtol=1e-10)
        sf1 = dgeo.sf(xi)
        sf = dpg.sf(xi)
        assert_allclose(sf, sf1, rtol=1e-10)

        ppf1 = dgeo.ppf(cdf1)
        ppf = dpg.ppf(cdf1)
        assert_equal(ppf, ppf1)
        ppf1 = dgeo.ppf(cdf1 - 1e-8)
        ppf = dpg.ppf(cdf1 - 1e-8)
        assert_equal(ppf, ppf1)
        ppf1 = dgeo.ppf(cdf1 + 1e-8)
        ppf = dpg.ppf(cdf1 + 1e-8)
        assert_equal(ppf, ppf1)
        ppf1 = dgeo.ppf(0)  # incorrect in scipy < 1.5.0
        ppf = dpg.ppf(0)
        assert_equal(ppf, -1)

        # isf
        isf1 = dgeo.isf(sf1)
        isf = dpg.isf(sf1)
        assert_equal(isf, isf1)
        isf1 = dgeo.isf(sf1 - 1e-8)
        isf = dpg.isf(sf1 - 1e-8)
        assert_equal(isf, isf1)
        isf1 = dgeo.isf(sf1 + 1e-8)
        isf = dpg.isf(sf1 + 1e-8)
        assert_equal(isf, isf1)
        isf1 = dgeo.isf(0)
        isf = dpg.isf(0)
        assert_equal(isf, isf1)  # inf
        isf1 = dgeo.isf(1)  # currently incorrect in scipy
        isf = dpg.isf(1)
        assert_equal(isf, -1)
Example #3
0
    def test_basic(self):
        d_offset = self.d_offset
        ddistr = self.ddistr
        paramg = self.paramg
        paramd = self.paramd
        shapes = self.shapes
        start_params = self.start_params

        np.random.seed(987146)

        dp = DiscretizedCount(ddistr, d_offset)
        assert dp.shapes == shapes
        xi = np.arange(5)
        p = dp._pmf(xi, *paramd)

        cdf1 = ddistr.cdf(xi, *paramg)
        p1 = np.diff(cdf1)
        assert_allclose(p[: len(p1)], p1, rtol=1e-13)
        cdf = dp._cdf(xi, *paramd)
        assert_allclose(cdf[: len(cdf1) - 1], cdf1[1:], rtol=1e-13)

        # check that scipy dispatch methods work
        p2 = dp.pmf(xi, *paramd)
        assert_allclose(p2, p, rtol=1e-13)
        cdf2 = dp.cdf(xi, *paramd)
        assert_allclose(cdf2, cdf, rtol=1e-13)
        sf = dp.sf(xi, *paramd)
        assert_allclose(sf, 1 - cdf, rtol=1e-13)

        nobs = 2000

        xx = dp.rvs(*paramd, size=nobs)  # , random_state=987146)
        # check that we go a non-trivial rvs
        assert len(xx) == nobs
        assert xx.var() > 0.001
        mod = DiscretizedModel(xx, distr=dp)
        res = mod.fit(start_params=start_params)
        p = mod.predict(res.params, which="probs")
        args = self.convert_params(res.params)
        p1 = -np.diff(ddistr.sf(np.arange(21), *args))
        assert_allclose(p, p1, rtol=1e-13)

        # using cdf limits precision to computation around 1
        p1 = np.diff(ddistr.cdf(np.arange(21), *args))
        assert_allclose(p, p1, rtol=1e-13, atol=1e-15)
        freq = np.bincount(xx.astype(int))
        # truncate at last observed
        k = len(freq)
        if k > 10:
            # reduce low count bins for heavy tailed distributions
            k = 10
            freq[k - 1] += freq[k:].sum()
            freq = freq[:k]
        p = mod.predict(res.params, which="probs", k_max=k)
        p[k - 1] += 1 - p[:k].sum()
        tchi2 = stats.chisquare(freq, p[:k] * nobs)
        assert tchi2.pvalue > 0.01

        # estimated distribution methods rvs, ppf
        # frozen distribution with estimated parameters
        # Todo results method
        dfr = mod.get_distr(res.params)
        nobs_rvs = 500
        rvs = dfr.rvs(size=nobs_rvs)
        freq = np.bincount(rvs)
        p = mod.predict(res.params, which="probs", k_max=nobs_rvs)
        k = len(freq)
        p[k - 1] += 1 - p[:k].sum()
        tchi2 = stats.chisquare(freq, p[:k] * nobs_rvs)
        assert tchi2.pvalue > 0.01

        # round trip cdf-ppf
        q = dfr.ppf(dfr.cdf(np.arange(-1, 5) + 1e-6))
        q1 = np.array([-1.,  1.,  2.,  3.,  4.,  5.])
        assert_equal(q, q1)
        p = np.maximum(dfr.cdf(np.arange(-1, 5)) - 1e-6, 0)
        q = dfr.ppf(p)
        q1 = np.arange(-1, 5)
        assert_equal(q, q1)
        q = dfr.ppf(dfr.cdf(np.arange(5)))
        q1 = np.arange(0, 5)
        assert_equal(q, q1)
        q = dfr.isf(1 - dfr.cdf(np.arange(-1, 5) + 1e-6))
        q1 = np.array([-1.,  1.,  2.,  3.,  4.,  5.])
        assert_equal(q, q1)