Ejemplo n.º 1
0
    def test_wallenius_against_mpmath(self):
        # precompute data with mpmath since naive implementation above
        # is not reliable. See source code in gh-13330.
        M = 50
        n = 30
        N = 20
        odds = 2.25
        # Expected results, computed with mpmath.
        sup = np.arange(21)
        pmf = np.array([
            3.699003068656875e-20, 5.89398584245431e-17,
            2.1594437742911123e-14, 3.221458044649955e-12,
            2.4658279241205077e-10, 1.0965862603981212e-08,
            3.057890479665704e-07, 5.622818831643761e-06,
            7.056482841531681e-05, 0.000618899425358671, 0.003854172932571669,
            0.01720592676256026, 0.05528844897093792, 0.12772363313574242,
            0.21065898367825722, 0.24465958845359234, 0.1955114898110033,
            0.10355390084949237, 0.03414490375225675, 0.006231989845775931,
            0.0004715577304677075
        ])
        mean = 14.808018384813426
        var = 2.6085975877923717

        # nchypergeom_wallenius.pmf returns 0 for pmf(0) and pmf(1), and pmf(2)
        # has only three digits of accuracy (~ 2.1511e-14).
        assert_allclose(nchypergeom_wallenius.pmf(sup, M, n, N, odds),
                        pmf,
                        rtol=1e-13,
                        atol=1e-13)
        assert_allclose(nchypergeom_wallenius.mean(M, n, N, odds),
                        mean,
                        rtol=1e-13)
        assert_allclose(nchypergeom_wallenius.var(M, n, N, odds),
                        var,
                        rtol=1e-11)
Ejemplo n.º 2
0
    def test_nchypergeom_wallenius_naive(self):
        # test against a very simple implementation

        np.random.seed(2)
        shape = (2, 4, 3)
        max_m = 100
        m1 = np.random.randint(1, max_m, size=shape)
        m2 = np.random.randint(1, max_m, size=shape)
        N = m1 + m2
        n = randint.rvs(0, N, size=N.shape)
        xl = np.maximum(0, n - m2)
        xu = np.minimum(n, m1)
        x = randint.rvs(xl, xu, size=xl.shape)
        w = np.random.rand(*x.shape) * 2

        def support(N, m1, n, w):
            m2 = N - m1
            xl = np.maximum(0, n - m2)
            xu = np.minimum(n, m1)
            return xl, xu

        @np.vectorize
        def mean(N, m1, n, w):
            m2 = N - m1
            xl, xu = support(N, m1, n, w)

            def fun(u):
                return u / m1 + (1 - (n - u) / m2)**w - 1

            return root_scalar(fun, bracket=(xl, xu)).root

        assert_allclose(nchypergeom_wallenius.mean(N, m1, n, w),
                        mean(N, m1, n, w),
                        rtol=2e-2)

        @np.vectorize
        def variance(N, m1, n, w):
            m2 = N - m1
            u = mean(N, m1, n, w)
            a = u * (m1 - u)
            b = (n - u) * (u + m2 - n)
            return N * a * b / ((N - 1) * (m1 * b + m2 * a))

        assert_allclose(nchypergeom_wallenius.stats(N, m1, n, w, moments='v'),
                        variance(N, m1, n, w),
                        rtol=5e-2)

        @np.vectorize
        def pmf(x, N, m1, n, w):
            m2 = N - m1
            xl, xu = support(N, m1, n, w)

            def integrand(t):
                D = w * (m1 - x) + (m2 - (n - x))
                res = (1 - t**(w / D))**x * (1 - t**(1 / D))**(n - x)
                return res

            def f(x):
                t1 = special_binom(m1, x)
                t2 = special_binom(m2, n - x)
                the_integral = quad(integrand,
                                    0,
                                    1,
                                    epsrel=1e-16,
                                    epsabs=1e-16)
                return t1 * t2 * the_integral[0]

            return f(x)

        pmf0 = pmf(x, N, m1, n, w)
        pmf1 = nchypergeom_wallenius.pmf(x, N, m1, n, w)

        atol, rtol = 1e-6, 1e-6
        i = np.abs(pmf1 - pmf0) < atol + rtol * np.abs(pmf0)
        assert (i.sum() > np.prod(shape) / 2)  # works at least half the time

        # for those that fail, discredit the naive implementation
        for N, m1, n, w in zip(N[~i], m1[~i], n[~i], w[~i]):
            # get the support
            m2 = N - m1
            xl, xu = support(N, m1, n, w)
            x = np.arange(xl, xu + 1)

            # calculate sum of pmf over the support
            # the naive implementation is very wrong in these cases
            assert pmf(x, N, m1, n, w).sum() < .5
            assert_allclose(nchypergeom_wallenius.pmf(x, N, m1, n, w).sum(), 1)