예제 #1
0
def test_multinmoial_goodness_of_fit():
    thresh = 1e-3
    n = int(1e5)
    ds = [3, 10, 20]
    for d in ds:
        for _ in range(5):
            probs = np.random.dirichlet([1] * d)
            counts = np.random.multinomial(n, probs)
            p_good = mgof(probs, counts, n)
            assert_greater(p_good, thresh)

        unif_counts = np.random.multinomial(n, [1. / d] * d)
        p_bad = mgof(probs, unif_counts, n)
        assert_less(p_bad, thresh)
예제 #2
0
def test_multinmoial_goodness_of_fit():
    thresh = 1e-3
    n = int(1e5)
    ds = [3, 10, 20]
    for d in ds:
        for _ in range(5):
            probs = np.random.dirichlet([1] * d)
            counts = np.random.multinomial(n, probs)
            p_good = mgof(probs, counts, n)
            assert_greater(p_good, thresh)

        unif_counts = np.random.multinomial(n, [1. / d] * d)
        p_bad = mgof(probs, unif_counts, n)
        assert_less(p_bad, thresh)
예제 #3
0
def check_dpm(impl, data_count, beta0):
    check_cm(impl)
    data = histogram(np.random.randint(50, size=data_count))
    data = dict([(str(i), obs) for i, obs in enumerate(data)])
    betas = dict([(str(i), (1 - beta0) / len(data))
        for i, obs in enumerate(data)])
    hp = {
            'gamma': 1.,
            'alpha': 1.,
            'beta0': beta0,
            'betas': betas
         }
    ss = {'counts': data}
    cm = ComponentModel(
            impl,
            ss=ss,
            hp=hp)
    samples = cm.sample_data(SAMPS)
    counts = list(histogram([y for y in samples if y != -1]))
    probs = list(np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)]))
    counts.append(len([y for y in samples if y == -1]))
    probs.append(np.exp(cm.pred_prob(-1)))
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
예제 #4
0
def _check_discrete(cm):
    samples = cm.sample_data(SAMPS)
    counts = histogram(samples)
    probs = np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)])
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
예제 #5
0
def _check_discrete(cm):
    samples = cm.sample_data(SAMPS)
    counts = histogram(samples)
    probs = np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)])
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
예제 #6
0
def check_nich(impl, data_count, mean, std):
    check_cm(impl)
    ss = None
    if data_count:
        data = np.random.normal(mean, std, size=data_count)
        ss = {'count': data_count, 'mean': data.mean(), 'variance': data.var()}
    cm = ComponentModel(impl, ss=ss)
    samples = cm.sample_data(SAMPS)
    counts, bin_ranges = bin_samples(samples)
    #use of quadrature is unfortunate but for now
    #it's the easiest way to score bins and seems to work
    pdf = lambda x: np.exp(cm.pred_prob(x))
    probs = [quad(pdf, m, M, epsabs=0., epsrel=1e-6)[0] for m, M in bin_ranges]
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
예제 #7
0
def check_dpm(impl, data_count, beta0):
    check_cm(impl)
    data = histogram(np.random.randint(50, size=data_count))
    data = dict([(str(i), obs) for i, obs in enumerate(data)])
    betas = dict([(str(i), (1 - beta0) / len(data))
                  for i, obs in enumerate(data)])
    hp = {'gamma': 1., 'alpha': 1., 'beta0': beta0, 'betas': betas}
    ss = {'counts': data}
    cm = ComponentModel(impl, ss=ss, hp=hp)
    samples = cm.sample_data(SAMPS)
    counts = list(histogram([y for y in samples if y != -1]))
    probs = list(np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)]))
    counts.append(len([y for y in samples if y == -1]))
    probs.append(np.exp(cm.pred_prob(-1)))
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
예제 #8
0
def check_nich(impl, data_count, mean, std):
    check_cm(impl)
    ss = None
    if data_count:
        data = np.random.normal(mean, std, size=data_count)
        ss = {
                'count': data_count,
                'mean': data.mean(),
                'variance': data.var()
             }
    cm = ComponentModel(impl, ss=ss)
    samples = cm.sample_data(SAMPS)
    counts, bin_ranges = bin_samples(samples)
    #use of quadrature is unfortunate but for now
    #it's the easiest way to score bins and seems to work
    pdf = lambda x: np.exp(cm.pred_prob(x))
    probs = [quad(pdf, m, M, epsabs=0., epsrel=1e-6)[0] for m, M in bin_ranges]
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)