def test_log_stirling1_row(): require_cython() from distributions.lp.special import log_stirling1_row MAX_N = 128 rows = [[1]] for n in range(1, MAX_N + 1): prev = rows[-1] middle = [(n - 1) * prev[k] + prev[k - 1] for k in range(1, n)] row = [0] + middle + [1] rows.append(row) for n in range(1, MAX_N + 1): print 'Row {}:'.format(n), row_py = numpy.log(numpy.array(rows[n][1:], dtype=numpy.double)) row_cpp = log_stirling1_row(n)[1:] assert_equal(len(row_py), len(row_cpp)) # Only the slopes need to be accurate # print 0, # assert_close(row_py[0], row_cpp[0]) # print len(row_py) # assert_close(row_py[-1], row_cpp[-1]) diff_py = numpy.diff(row_py) diff_cpp = numpy.diff(row_cpp) for k_minus_1, (dx_py, dx_cpp) in enumerate(zip(diff_py, diff_cpp)): k = k_minus_1 + 1 print '%d-%d' % (k, k + 1), assert_close(dx_py, dx_cpp, tol=0.5) print
def test_chisq_draw(): require_cython() import distributions.hp.random nus = [1.5 ** i for i in range(-10, 11)] for nu in nus: # Assume scipy.stats is correct # yield _test_chisq_draw, scipy.stats.chi2.rvs, nu _test_chisq_draw(distributions.hp.random.sample_chisq, nu)
def test_chisq_draw(): require_cython() import distributions.hp.random nus = [1.5 ** i for i in range(-10, 11)] for nu in nus: # Assume scipy.stats is correct #yield _test_chisq_draw, scipy.stats.chi2.rvs, nu _test_chisq_draw(distributions.hp.random.sample_chisq, nu)
def test_seed(): require_cython() import distributions.hp.random global_rng = distributions.hp.random.random distributions.hp.random.seed(0) values1 = [global_rng() for _ in xrange(10)] distributions.hp.random.seed(0) values2 = [global_rng() for _ in xrange(10)] assert_equal(values1, values2)
def test_log_sum_exp(): require_cython() import distributions.lp.random for size in xrange(20): scores = numpy.random.normal(size=size).tolist() expected = numpy.logaddexp.reduce(scores) if size else 0.0 actual = distributions.lp.random.log_sum_exp(scores) assert_close(actual, expected, err_msg='log_sum_exp')
def test_sample_prob_from_scores(): require_cython() import distributions.lp.random for size in range(1, 10): scores = numpy.random.normal(size=size).tolist() def sampler(): return distributions.lp.random.sample_prob_from_scores(scores) assert_samples_match_scores(sampler)
def test_normal_draw(): require_cython() import distributions.hp.random means = [1.0 * i for i in range(-2, 3)] variances = [10.0**i for i in range(-3, 4)] for mean, variance in itertools.product(means, variances): # Assume scipy.stats is correct # yield _test_normal_draw, scipy_normal_draw, mean, variance _test_normal_draw(distributions.hp.random.sample_normal, mean, variance)
def test_normal_draw(): require_cython() import distributions.hp.random means = [1.0 * i for i in range(-2, 3)] variances = [10.0 ** i for i in range(-3, 4)] for mean, variance in itertools.product(means, variances): # Assume scipy.stats is correct # yield _test_normal_draw, scipy_normal_draw, mean, variance _test_normal_draw( distributions.hp.random.sample_normal, mean, variance)
def test_prob_from_scores(): require_cython() import distributions.lp.random for size in range(1, 100): scores = numpy.random.normal(size=size).tolist() for _ in xrange(size): sample, prob1 = distributions.lp.random.sample_prob_from_scores( scores) assert 0 <= sample and sample < size prob2 = distributions.lp.random.prob_from_scores(sample, scores) assert_close(prob1, prob2, err_msg='sample_prob_from_scores != prob_from_scores')
def test_prob_from_scores(): require_cython() import distributions.lp.random for size in range(1, 100): scores = numpy.random.normal(size=size).tolist() for _ in xrange(size): sample, prob1 = distributions.lp.random.sample_prob_from_scores( scores) assert 0 <= sample and sample < size prob2 = distributions.lp.random.prob_from_scores( sample, scores) assert_close( prob1, prob2, err_msg='sample_prob_from_scores != prob_from_scores')
def test_sample_discrete(): require_cython() import distributions.lp.random assert_equal( distributions.lp.random.sample_discrete( numpy.array([.5], dtype=numpy.float32)), 0) assert_equal( distributions.lp.random.sample_discrete( numpy.array([1.], dtype=numpy.float32)), 0) assert_equal( distributions.lp.random.sample_discrete( numpy.array([1e-3], dtype=numpy.float32)), 0) assert_equal( distributions.lp.random.sample_discrete( numpy.array([1 - 1e-3, 1e-3], dtype=numpy.float32)), 0) assert_equal( distributions.lp.random.sample_discrete( numpy.array([1e-3, 1 - 1e-3], dtype=numpy.float32)), 1)
def test_sample_pair_from_urn(): require_cython() import distributions.lp.random TEST_FAIL_PROB = 1e-5 ITEM_COUNT = 10 items = range(ITEM_COUNT) counts = {(i, j): 0 for i in items for j in items if i != j} pair_count = len(counts) def test_fail_prob(sample_count): ''' Let X1,...,XK ~iid uniform({1, ..., N = pair_count}) and for n in {1,..,N} let Cn = sum_k (1 if Xk = n else 0). Then for each n, P(Cn = 0) = ((N-1) / N)^K P(Cn > 0) = 1 - ((N-1) / N)^K P(test fails) = 1 - P(for all n, Cn > 0) ~ 1 - (1 - ((N-1) / N)^K)^N ''' item_fail_prob = ((pair_count - 1.0) / pair_count) ** sample_count test_fail_prob = 1 - (1 - item_fail_prob) ** pair_count return test_fail_prob sample_count = 1 while test_fail_prob(sample_count) > TEST_FAIL_PROB: sample_count *= 2 print 'pair_count = {}'.format(pair_count) print 'sample_count = {}'.format(sample_count) for _ in xrange(sample_count): i, j = distributions.lp.random.sample_pair_from_urn(items) assert i != j counts[i, j] += 1 assert_less(0, min(counts.itervalues()))
assert_equal, assert_less, assert_greater, assert_is_instance, ) from distributions.dbg.random import sample_discrete from goftests import discrete_goodness_of_fit from distributions.tests.util import ( require_cython, seed_all, assert_hasattr, assert_close, ) from distributions.dbg.random import scores_to_probs import distributions.dbg.clustering require_cython() import distributions.lp.clustering from distributions.lp.clustering import count_assignments from distributions.lp.mixture import MixtureIdTracker MODELS = { 'dbg.LowEntropy': distributions.dbg.clustering.LowEntropy, 'lp.PitmanYor': distributions.lp.clustering.PitmanYor, 'lp.LowEntropy': distributions.lp.clustering.LowEntropy, } SKIP_EXPENSIVE_TESTS = False SAMPLE_COUNT = 2000 MIN_GOODNESS_OF_FIT = 1e-3