def check_generate(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    params = cm.generate_post()
    b = BasicDistribution(name, pm=params)
    b.sample_data()
Exemple #2
0
def check_ss_io(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    assert_equal(ComponentModel(name, ss=cm.dump_ss()).dump_ss(), cm.dump_ss())
    cm.add_data(cm.sample_data())
    assert_equal(ComponentModel(name, ss=cm.dump_ss()).dump_ss(), cm.dump_ss())
def check_summarize(name):
    check_cm(name)
    cm = ComponentModel(name)
    x = []
    for _ in range(COUNT):
        x.append(cm.sample_data())
        summarize(name, x)
def check_summarize(name):
    check_cm(name)
    cm = ComponentModel(name)
    x = []
    for _ in range(COUNT):
        x.append(cm.sample_data())
        summarize(name, x)
def check_generate(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    params = cm.generate_post()
    b = BasicDistribution(name, pm=params)
    b.sample_data()
def check_dpm(impl, data_count, beta0):
    check_cm(impl)
    data = histogram(np.random.randint(50, size=data_count))
    data = dict([(str(i), obs) for i, obs in enumerate(data)])
    betas = dict([(str(i), (1 - beta0) / len(data))
        for i, obs in enumerate(data)])
    hp = {
            'gamma': 1.,
            'alpha': 1.,
            'beta0': beta0,
            'betas': betas
         }
    ss = {'counts': data}
    cm = ComponentModel(
            impl,
            ss=ss,
            hp=hp)
    samples = cm.sample_data(SAMPS)
    counts = list(histogram([y for y in samples if y != -1]))
    probs = list(np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)]))
    counts.append(len([y for y in samples if y == -1]))
    probs.append(np.exp(cm.pred_prob(-1)))
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
def check_dd(impl, data_count, D):
    check_cm(impl)
    data = histogram(np.random.randint(D, size=data_count), bin_count=D)
    cm = ComponentModel(
            impl,
            ss={'counts': data},
            p={'D': D})
    cm.realize_hp()
    _check_discrete(cm)
def check_sample_post_seed(name):
    check_cm(name)
    seed(0)
    cm1 = ComponentModel(name)
    post_values1 = [cm1.sample_post() for _ in range(COUNT)]
    seed(0)
    cm2 = ComponentModel(name)
    post_values2 = [cm2.sample_post() for _ in range(COUNT)]
    for i in range(COUNT):
        assert_array_almost_equal(post_values1[i], post_values2[i])
Exemple #9
0
def test_logprob():
    pyp = ComponentModel('PYP')
    assert_almost_equal(pyp.data_prob(), 0.)
    pyp.add_data(0)
    assert_almost_equal(pyp.data_prob(), 0.)
    pyp.add_data(1)
    assert_almost_equal(pyp.data_prob(), log(.5))
Exemple #10
0
def check_sample_data_seed(name):
    check_cm(name)
    n = 10
    seed(0)
    cm1 = ComponentModel(name)
    cm1.realize_hp()
    data_values1 = [cm1.sample_data() for _ in range(n)]
    seed(0)
    cm2 = ComponentModel(name)
    cm2.realize_hp()
    data_values2 = [cm2.sample_data() for _ in range(n)]
    for i in range(n):
        assert_almost_equal(data_values1[i], data_values2[i])
Exemple #11
0
def check_sums(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    values = [cm.sample_data() for _ in range(COUNT)]
    score = 0.
    for value in values:
        score += cm.pred_prob(value)
        cm.add_data(value)
    assert_almost_equal(score, cm.data_prob())
Exemple #12
0
def check_nich(impl, data_count, mean, std):
    check_cm(impl)
    ss = None
    if data_count:
        data = np.random.normal(mean, std, size=data_count)
        ss = {'count': data_count, 'mean': data.mean(), 'variance': data.var()}
    cm = ComponentModel(impl, ss=ss)
    samples = cm.sample_data(SAMPS)
    counts, bin_ranges = bin_samples(samples)
    #use of quadrature is unfortunate but for now
    #it's the easiest way to score bins and seems to work
    pdf = lambda x: np.exp(cm.pred_prob(x))
    probs = [quad(pdf, m, M, epsabs=0., epsrel=1e-6)[0] for m, M in bin_ranges]
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
Exemple #13
0
def check_sums(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    values = [cm.sample_data() for _ in range(COUNT)]
    score = 0.
    for value in values:
        score += cm.pred_prob(value)
        cm.add_data(value)
    assert_almost_equal(score, cm.data_prob())
Exemple #14
0
def check_dpm(impl, data_count, beta0):
    check_cm(impl)
    data = histogram(np.random.randint(50, size=data_count))
    data = dict([(str(i), obs) for i, obs in enumerate(data)])
    betas = dict([(str(i), (1 - beta0) / len(data))
                  for i, obs in enumerate(data)])
    hp = {'gamma': 1., 'alpha': 1., 'beta0': beta0, 'betas': betas}
    ss = {'counts': data}
    cm = ComponentModel(impl, ss=ss, hp=hp)
    samples = cm.sample_data(SAMPS)
    counts = list(histogram([y for y in samples if y != -1]))
    probs = list(np.exp([cm.pred_prob(x) for x in range(max(samples) + 1)]))
    counts.append(len([y for y in samples if y == -1]))
    probs.append(np.exp(cm.pred_prob(-1)))
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
Exemple #15
0
def check_gp(impl, data_count, lam):
    check_cm(impl)
    data = np.random.poisson(lam, size=data_count)
    ss = {
        'n': data_count,
        'sum': np.sum(data),
        'log_prod': np.sum(np.log(data))
    }
    cm = ComponentModel(impl, ss=ss)
    _check_discrete(cm)
Exemple #16
0
def check_sample_data_seed(name):
    check_cm(name)
    n = 10
    seed(0)
    cm1 = ComponentModel(name)
    cm1.realize_hp()
    data_values1 = [cm1.sample_data() for _ in range(n)]
    seed(0)
    cm2 = ComponentModel(name)
    cm2.realize_hp()
    data_values2 = [cm2.sample_data() for _ in range(n)]
    for i in range(n):
        assert_almost_equal(data_values1[i], data_values2[i])
def test_vectorize():
    for name in MODELS:
        check_cm(name)
        cm0 = ComponentModel(name)
        cm0.realize_hp()
        hp0 = cm0.dump_hp()
        cms = [ComponentModel(name, hp=hp0) for _ in range(COMPS)]
        for cm in cms:
            dps = [cm.sample_data() for _ in range(DPS)]
            for dp in dps:
                cm.add_data(dp)

        mod = cms[0].mod
        hp = cms[0].hp
        ss = [cm.ss for cm in cms]
        for cm in cms:
            y = cm.sample_data()
            scores = numpy.zeros(COMPS)
            mod.add_pred_probs(hp, ss, y, scores)
            for cm, score in zip(cms, scores):
                assert_almost_equal(score, cm.pred_prob(y))
def check_nich(impl, data_count, mean, std):
    check_cm(impl)
    ss = None
    if data_count:
        data = np.random.normal(mean, std, size=data_count)
        ss = {
                'count': data_count,
                'mean': data.mean(),
                'variance': data.var()
             }
    cm = ComponentModel(impl, ss=ss)
    samples = cm.sample_data(SAMPS)
    counts, bin_ranges = bin_samples(samples)
    #use of quadrature is unfortunate but for now
    #it's the easiest way to score bins and seems to work
    pdf = lambda x: np.exp(cm.pred_prob(x))
    probs = [quad(pdf, m, M, epsabs=0., epsrel=1e-6)[0] for m, M in bin_ranges]
    assert_less(1 - sum(probs), THRESH)
    probs, counts = zip(*sorted(zip(probs, counts), reverse=True)[:TOPN])
    p = mgof(probs, counts, SAMPS, truncated=True)
    assert_greater(p, THRESH)
def check_sample_post_seed(name):
    check_cm(name)
    seed(0)
    cm1 = ComponentModel(name)
    post_values1 = [cm1.sample_post() for _ in range(COUNT)]
    seed(0)
    cm2 = ComponentModel(name)
    post_values2 = [cm2.sample_post() for _ in range(COUNT)]
    for i in range(COUNT):
        assert_array_almost_equal(post_values1[i], post_values2[i])
def test_vectorize():
    for name in MODELS:
        check_cm(name)
        cm0 = ComponentModel(name)
        cm0.realize_hp()
        hp0 = cm0.dump_hp()
        cms = [ComponentModel(name, hp=hp0) for _ in range(COMPS)]
        for cm in cms:
            dps = [cm.sample_data() for _ in range(DPS)]
            for dp in dps:
                cm.add_data(dp)

        mod = cms[0].mod
        hp = cms[0].hp
        ss = [cm.ss for cm in cms]
        for cm in cms:
            y = cm.sample_data()
            scores = numpy.zeros(COMPS)
            mod.add_pred_probs(hp, ss, y, scores)
            for cm, score in zip(cms, scores):
                assert_almost_equal(score, cm.pred_prob(y))
Exemple #21
0
def check_probs(a, b):
    check_cm(a)
    check_cm(b)
    a = ComponentModel(a)
    a.realize_hp()
    b = ComponentModel(b, hp=a.dump_hp())
    dps = [a.sample_data() for _ in range(DPS)]
    for y in dps:
        assert_almost_equal(a.data_prob(), b.data_prob())
        assert_almost_equal(a.pred_prob(y), b.pred_prob(y))
        a.add_data(y)
        b.add_data(y)
Exemple #22
0
def check_ss(a, b):
    check_cm(a)
    check_cm(b)
    a = ComponentModel(a)
    a.realize_hp()
    b = ComponentModel(b, hp=a.dump_hp())
    dps = [a.sample_data() for _ in range(DPS)]
    assert_equal(a.dump_ss(), b.dump_ss())
    for y in dps:
        a.add_data(y)
        b.add_data(y)
        assert_close(a.dump_ss(), b.dump_ss())
    for y in dps:
        a.remove_data(y)
        b.remove_data(y)
        assert_close(a.dump_ss(), b.dump_ss())
Exemple #23
0
def check_hp(a, b):
    check_cm(a)
    check_cm(b)
    a = ComponentModel(a)
    b = ComponentModel(b)
    assert_equal(a.dump_hp(), b.dump_hp())
def add_remove_add(name, raw_hps, raw_ss0=None):
    '''
    This tests add_data, remove_data, pred_prob, data_prob
    '''

    DATA_COUNT = 20

    for raw_hp in raw_hps:

        cm = ComponentModel(name, hp=raw_hp, ss=raw_ss0)
        cm.realize_hp()
        data = []
        score = 0

        for _ in range(DATA_COUNT):
            dp = cm.sample_data()
            data.append(dp)
            score += cm.pred_prob(dp)
            cm.add_data(dp)

        cm_all = ComponentModel(name, ss=cm.dump_ss())
        assert_close(
                score,
                cm.data_prob(),
                err_msg='p(x1,...,xn) != p(x1) p(x2|x1) p(xn|...)')

        random.shuffle(data)

        for dp in data:
            cm.remove_data(dp)

        cm0 = ComponentModel(name, ss=raw_ss0)
        assert_close(cm.ss, cm0.ss, err_msg='ss + data - data != ss')

        random.shuffle(data)

        for dp in data:
            cm.add_data(dp)

        assert_close(cm.ss, cm_all.ss, err_msg='ss - data + data != ss')
Exemple #25
0
def check_exchangeable(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    values = [cm.sample_data() for _ in range(COUNT)]
    p1 = permutation(COUNT)
    p2 = permutation(COUNT)
    for i in range(COUNT):
        cm.add_data(values[p1[i]])
    prob1 = cm.data_prob()
    for i in range(COUNT):
        cm.remove_data(values[p1[i]])
    assert_almost_equal(cm.data_prob(), 0.)
    for i in range(COUNT):
        cm.add_data(values[p2[i]])
    prob2 = cm.data_prob()
    assert_almost_equal(prob1, prob2)
def add_remove_add(name, raw_hps, raw_ss0=None):
    '''
    This tests add_data, remove_data, pred_prob, data_prob
    '''

    DATA_COUNT = 20

    for raw_hp in raw_hps:

        cm = ComponentModel(name, hp=raw_hp, ss=raw_ss0)
        cm.realize_hp()
        data = []
        score = 0

        for _ in range(DATA_COUNT):
            dp = cm.sample_data()
            data.append(dp)
            score += cm.pred_prob(dp)
            cm.add_data(dp)

        cm_all = ComponentModel(name, ss=cm.dump_ss())
        assert_close(score,
                     cm.data_prob(),
                     err_msg='p(x1,...,xn) != p(x1) p(x2|x1) p(xn|...)')

        random.shuffle(data)

        for dp in data:
            cm.remove_data(dp)

        cm0 = ComponentModel(name, ss=raw_ss0)
        assert_close(cm.ss, cm0.ss, err_msg='ss + data - data != ss')

        random.shuffle(data)

        for dp in data:
            cm.add_data(dp)

        assert_close(cm.ss, cm_all.ss, err_msg='ss - data + data != ss')
Exemple #27
0
def check_hp_io(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    assert_equal(ComponentModel(name, hp=cm.dump_hp()).dump_hp(), cm.dump_hp())
Exemple #28
0
def check_ss_io(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    assert_equal(ComponentModel(name, ss=cm.dump_ss()).dump_ss(), cm.dump_ss())
    cm.add_data(cm.sample_data())
    assert_equal(ComponentModel(name, ss=cm.dump_ss()).dump_ss(), cm.dump_ss())
Exemple #29
0
def test_crp_equals_pyp():
    alphas = [1., 5., 10.]
    for alpha in alphas:
        n = 1000
        crp = ComponentModel('CRP', hp={'alpha': alpha})
        pyp = ComponentModel('PYP', hp={'alpha': alpha, 'd': 0.})
        y = [0] * n
        for i in range(n):
            y[i] = crp.sample_data()
            assert_almost_equal(crp.pred_prob(y[i]), pyp.pred_prob(y[i]))
            crp.add_data(y[i])
            pyp.add_data(y[i])
        assert_almost_equal(crp.data_prob(), pyp.data_prob())
Exemple #30
0
def check_dd(impl, data_count, D):
    check_cm(impl)
    data = histogram(np.random.randint(D, size=data_count), bin_count=D)
    cm = ComponentModel(impl, ss={'counts': data}, p={'D': D})
    cm.realize_hp()
    _check_discrete(cm)
def check_summarize_N(name):
    check_cm(name)
    cm = ComponentModel(name)
    x = cm.sample_data(COUNT)
    summarize(name, x)
Exemple #32
0
def check_exchangeable(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    values = [cm.sample_data() for _ in range(COUNT)]
    p1 = permutation(COUNT)
    p2 = permutation(COUNT)
    for i in range(COUNT):
        cm.add_data(values[p1[i]])
    prob1 = cm.data_prob()
    for i in range(COUNT):
        cm.remove_data(values[p1[i]])
    assert_almost_equal(cm.data_prob(), 0.)
    for i in range(COUNT):
        cm.add_data(values[p2[i]])
    prob2 = cm.data_prob()
    assert_almost_equal(prob1, prob2)
def check_summarize_N(name):
    check_cm(name)
    cm = ComponentModel(name)
    x = cm.sample_data(COUNT)
    summarize(name, x)
Exemple #34
0
def check_cm(name):
    try:
        ComponentModel(name)
    except KeyError:
        raise SkipTest
Exemple #35
0
def check_hp_io(name):
    check_cm(name)
    cm = ComponentModel(name)
    cm.realize_hp()
    assert_equal(ComponentModel(name, hp=cm.dump_hp()).dump_hp(), cm.dump_hp())