def test_wallenius_against_mpmath(self): # precompute data with mpmath since naive implementation above # is not reliable. See source code in gh-13330. M = 50 n = 30 N = 20 odds = 2.25 # Expected results, computed with mpmath. sup = np.arange(21) pmf = np.array([ 3.699003068656875e-20, 5.89398584245431e-17, 2.1594437742911123e-14, 3.221458044649955e-12, 2.4658279241205077e-10, 1.0965862603981212e-08, 3.057890479665704e-07, 5.622818831643761e-06, 7.056482841531681e-05, 0.000618899425358671, 0.003854172932571669, 0.01720592676256026, 0.05528844897093792, 0.12772363313574242, 0.21065898367825722, 0.24465958845359234, 0.1955114898110033, 0.10355390084949237, 0.03414490375225675, 0.006231989845775931, 0.0004715577304677075 ]) mean = 14.808018384813426 var = 2.6085975877923717 # nchypergeom_wallenius.pmf returns 0 for pmf(0) and pmf(1), and pmf(2) # has only three digits of accuracy (~ 2.1511e-14). assert_allclose(nchypergeom_wallenius.pmf(sup, M, n, N, odds), pmf, rtol=1e-13, atol=1e-13) assert_allclose(nchypergeom_wallenius.mean(M, n, N, odds), mean, rtol=1e-13) assert_allclose(nchypergeom_wallenius.var(M, n, N, odds), var, rtol=1e-11)
def test_nchypergeom_wallenius_naive(self): # test against a very simple implementation np.random.seed(2) shape = (2, 4, 3) max_m = 100 m1 = np.random.randint(1, max_m, size=shape) m2 = np.random.randint(1, max_m, size=shape) N = m1 + m2 n = randint.rvs(0, N, size=N.shape) xl = np.maximum(0, n - m2) xu = np.minimum(n, m1) x = randint.rvs(xl, xu, size=xl.shape) w = np.random.rand(*x.shape) * 2 def support(N, m1, n, w): m2 = N - m1 xl = np.maximum(0, n - m2) xu = np.minimum(n, m1) return xl, xu @np.vectorize def mean(N, m1, n, w): m2 = N - m1 xl, xu = support(N, m1, n, w) def fun(u): return u / m1 + (1 - (n - u) / m2)**w - 1 return root_scalar(fun, bracket=(xl, xu)).root assert_allclose(nchypergeom_wallenius.mean(N, m1, n, w), mean(N, m1, n, w), rtol=2e-2) @np.vectorize def variance(N, m1, n, w): m2 = N - m1 u = mean(N, m1, n, w) a = u * (m1 - u) b = (n - u) * (u + m2 - n) return N * a * b / ((N - 1) * (m1 * b + m2 * a)) assert_allclose(nchypergeom_wallenius.stats(N, m1, n, w, moments='v'), variance(N, m1, n, w), rtol=5e-2) @np.vectorize def pmf(x, N, m1, n, w): m2 = N - m1 xl, xu = support(N, m1, n, w) def integrand(t): D = w * (m1 - x) + (m2 - (n - x)) res = (1 - t**(w / D))**x * (1 - t**(1 / D))**(n - x) return res def f(x): t1 = special_binom(m1, x) t2 = special_binom(m2, n - x) the_integral = quad(integrand, 0, 1, epsrel=1e-16, epsabs=1e-16) return t1 * t2 * the_integral[0] return f(x) pmf0 = pmf(x, N, m1, n, w) pmf1 = nchypergeom_wallenius.pmf(x, N, m1, n, w) atol, rtol = 1e-6, 1e-6 i = np.abs(pmf1 - pmf0) < atol + rtol * np.abs(pmf0) assert (i.sum() > np.prod(shape) / 2) # works at least half the time # for those that fail, discredit the naive implementation for N, m1, n, w in zip(N[~i], m1[~i], n[~i], w[~i]): # get the support m2 = N - m1 xl, xu = support(N, m1, n, w) x = np.arange(xl, xu + 1) # calculate sum of pmf over the support # the naive implementation is very wrong in these cases assert pmf(x, N, m1, n, w).sum() < .5 assert_allclose(nchypergeom_wallenius.pmf(x, N, m1, n, w).sum(), 1)