Ejemplo n.º 1
0
def test_crp_empirical():
    N = 4
    alpha = 2.5
    defn = model_definition(N, [bb])
    Y = np.array([(True, )] * N, dtype=[('', bool)])
    view = numpy_dataview(Y)
    r = rng()

    def crp_score(assignment):
        latent = initialize(defn,
                            view,
                            r=r,
                            cluster_hp={'alpha': alpha},
                            assignment=assignment)
        return latent.score_assignment()

    scores = np.array(list(map(crp_score, permutation_iter(N))))
    dist = scores_to_probs(scores)
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        sample = permutation_canonical(_sample_crp(N, alpha))
        return idmap[tuple(sample)]

    assert_discrete_dist_approx(sample_fn, dist, ntries=100)
def _test_convergence(bs,
                      posterior,
                      kernel,
                      burnin_niters,
                      skip,
                      ntries,
                      nsamples,
                      kl_places):
    N = bs.nentities()
    start = time.time()
    last = start
    for i in xrange(burnin_niters):
        kernel(bs)
        if not ((i + 1) % 1000):
            print 'burning finished iteration', (i + 1), \
                'in', (time.time() - last), 'seconds'
            last = time.time()
    print 'finished burnin of', burnin_niters, \
        'iters in', (time.time() - start), 'seconds'
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        for _ in xrange(skip):
            kernel(bs)
        return idmap[tuple(permutation_canonical(bs.assignments()))]
    assert_discrete_dist_approx(
        sample_fn, posterior,
        ntries=ntries, nsamples=nsamples, kl_places=kl_places)
def _test_convergence(bs, posterior, kernel, burnin_niters, skip, ntries,
                      nsamples, kl_places):
    N = bs.nentities()
    start = time.time()
    last = start
    for i in xrange(burnin_niters):
        kernel(bs)
        if not ((i + 1) % 1000):
            print 'burning finished iteration', (i + 1), \
                'in', (time.time() - last), 'seconds'
            last = time.time()
    print 'finished burnin of', burnin_niters, \
        'iters in', (time.time() - start), 'seconds'
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        for _ in xrange(skip):
            kernel(bs)
        return idmap[tuple(permutation_canonical(bs.assignments()))]

    assert_discrete_dist_approx(sample_fn,
                                posterior,
                                ntries=ntries,
                                nsamples=nsamples,
                                kl_places=kl_places)
Ejemplo n.º 4
0
def test_runner_multiprocessing_convergence():
    N, D = 4, 5
    defn = model_definition(N, [bb] * D)
    prng = rng()
    Y, posterior = data_with_posterior(defn, r=prng)
    view = numpy_dataview(Y)
    latents = [model.initialize(defn, view, prng)
               for _ in xrange(mp.cpu_count())]
    runners = [runner.runner(defn, view, latent, ['assign'])
               for latent in latents]
    r = parallel.runner(runners)
    r.run(r=prng, niters=1000)  # burnin
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_iter():
        r.run(r=prng, niters=10)
        for latent in r.get_latents():
            yield idmap[tuple(permutation_canonical(latent.assignments()))]

    ref = [None]

    def sample_fn():
        if ref[0] is None:
            ref[0] = sample_iter()
        try:
            return next(ref[0])
        except StopIteration:
            ref[0] = None
        return sample_fn()

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
Ejemplo n.º 5
0
def permutations(doclengths):
    """Generate a permutation of XXX

    WARNING: very quickly becomes intractable
    """

    perms = [permutation_iter(length) for length in doclengths]
    for prod in it.product(*perms):
        dishes = sum([max(x) + 1 for x in prod])
        for p in permutation_iter(dishes):
            idx = 0
            ret = []
            for d in prod:
                ntables = max(d) + 1
                ret.append(tuple(p[idx:idx+ntables]))
                idx += ntables
            yield prod, tuple(ret)
Ejemplo n.º 6
0
def permutations(doclengths):
    """Generate a permutation of XXX

    WARNING: very quickly becomes intractable
    """

    perms = [permutation_iter(length) for length in doclengths]
    for prod in it.product(*perms):
        dishes = sum([max(x) + 1 for x in prod])
        for p in permutation_iter(dishes):
            idx = 0
            ret = []
            for d in prod:
                ntables = max(d) + 1
                ret.append(tuple(p[idx:idx+ntables]))
                idx += ntables
            yield prod, tuple(ret)
Ejemplo n.º 7
0
def test_crp_empirical():
    N = 4
    alpha = 2.5
    defn = model_definition(N, [bb])
    Y = np.array([(True,)] * N, dtype=[('', bool)])
    view = numpy_dataview(Y)
    r = rng()

    def crp_score(assignment):
        latent = initialize(
            defn, view, r=r,
            cluster_hp={'alpha': alpha}, assignment=assignment)
        return latent.score_assignment()
    scores = np.array(list(map(crp_score, permutation_iter(N))))
    dist = scores_to_probs(scores)
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        sample = permutation_canonical(_sample_crp(N, alpha))
        return idmap[tuple(sample)]
    assert_discrete_dist_approx(sample_fn, dist, ntries=100)
Ejemplo n.º 8
0
def _test_crp(initialize_fn, dataview, alpha, r):
    N = 6
    defn = model_definition(N, [bb])
    Y = np.array([(True,)] * N, dtype=[('', bool)])
    view = dataview(Y)

    def crp_score(assignment):
        latent = initialize_fn(
            defn, view, r=r,
            cluster_hp={'alpha': alpha}, assignment=assignment)
        return latent.score_assignment()
    dist = np.array(list(map(crp_score, permutation_iter(N))))
    dist = np.exp(dist)
    assert_almost_equals(dist.sum(), 1.0, places=3)
Ejemplo n.º 9
0
def test_runner_convergence():
    N, D = 4, 5
    defn = model_definition(N, [bb] * D)
    prng = rng()
    Y, posterior = data_with_posterior(defn, r=prng)
    view = numpy_dataview(Y)
    latent = model.initialize(defn, view, prng)
    r = runner.runner(defn, view, latent, ['assign'])
    r.run(r=prng, niters=1000)  # burnin
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        r.run(r=prng, niters=10)
        new_latent = r.get_latent()
        return idmap[tuple(permutation_canonical(new_latent.assignments()))]

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
Ejemplo n.º 10
0
def _test_crp(initialize_fn, dataview, alpha, r):
    N = 6
    defn = model_definition(N, [bb])
    Y = np.array([(True, )] * N, dtype=[('', bool)])
    view = dataview(Y)

    def crp_score(assignment):
        latent = initialize_fn(defn,
                               view,
                               r=r,
                               cluster_hp={'alpha': alpha},
                               assignment=assignment)
        return latent.score_assignment()

    dist = np.array(list(map(crp_score, permutation_iter(N))))
    dist = np.exp(dist)
    assert_almost_equals(dist.sum(), 1.0, places=3)
Ejemplo n.º 11
0
def test_compare_to_mixture_model():
    r = rng()

    N, D = 4, 5

    Y = np.random.uniform(size=(N, D)) > 0.8
    Y_rec = np.array([tuple(y) for y in Y], dtype=[('', bool)] * D)

    mm_view = rec_numpy_dataview(Y_rec)
    irm_view = relation_numpy_dataview(Y)

    mm_def = mm_definition(N, [bb] * D)
    irm_def = irm_definition([N, D], [((0, 1), bb)])

    perms = list(permutation_iter(N))
    assignment = perms[np.random.randint(0, len(perms))]

    mm_s = mm_initialize(mm_def, mm_view, r=r, assignment=assignment)
    irm_s = irm_initialize(irm_def,
                           [irm_view],
                           r=r,
                           domain_assignments=[
                               assignment,
                               range(D),
                           ])

    def assert_suff_stats_equal():
        assert set(mm_s.groups()) == set(irm_s.groups(0))
        assert irm_s.groups(1) == range(D)
        groups = mm_s.groups()
        for g in groups:
            for i in xrange(D):
                a = mm_s.get_suffstats(g, i)
                b = irm_s.get_suffstats(0, [g, i])
                if b is None:
                    b = {'heads': 0L, 'tails': 0L}
                assert a['heads'] == b['heads'] and a['tails'] == b['tails']

    assert_suff_stats_equal()
    assert_almost_equals(
        mm_s.score_assignment(), irm_s.score_assignment(0), places=3)

    bound_mm_s = mm_bind(mm_s, mm_view)
    bound_irm_s = irm_bind(irm_s, 0, [irm_view])

    # XXX: doesn't really have to be true, just is true of impl
    assert not bound_mm_s.empty_groups()
    assert not bound_irm_s.empty_groups()

    bound_mm_s.create_group(r)
    bound_irm_s.create_group(r)

    gid_a = bound_mm_s.remove_value(0, r)
    gid_b = bound_irm_s.remove_value(0, r)

    assert gid_a == gid_b
    assert_suff_stats_equal()

    x0, y0 = bound_mm_s.score_value(0, r)
    x1, y1 = bound_irm_s.score_value(0, r)
    assert x0 == x1  # XXX: not really a requirement

    # XXX: should really normalize and then check
    for a, b in zip(y0, y1):
        assert_almost_equals(a, b, places=2)
Ejemplo n.º 12
0
def test_compare_to_mixture_model():
    r = rng()

    N, D = 4, 5

    Y = np.random.uniform(size=(N, D)) > 0.8
    Y_rec = np.array([tuple(y) for y in Y], dtype=[('', bool)] * D)

    mm_view = rec_numpy_dataview(Y_rec)
    irm_view = relation_numpy_dataview(Y)

    mm_def = mm_definition(N, [bb] * D)
    irm_def = irm_definition([N, D], [((0, 1), bb)])

    perms = list(permutation_iter(N))
    assignment = perms[np.random.randint(0, len(perms))]

    mm_s = mm_initialize(mm_def, mm_view, r=r, assignment=assignment)
    irm_s = irm_initialize(irm_def, [irm_view],
                           r=r,
                           domain_assignments=[
                               assignment,
                               range(D),
                           ])

    def assert_suff_stats_equal():
        assert set(mm_s.groups()) == set(irm_s.groups(0))
        assert irm_s.groups(1) == range(D)
        groups = mm_s.groups()
        for g in groups:
            for i in xrange(D):
                a = mm_s.get_suffstats(g, i)
                b = irm_s.get_suffstats(0, [g, i])
                if b is None:
                    b = {'heads': 0L, 'tails': 0L}
                assert a['heads'] == b['heads'] and a['tails'] == b['tails']

    assert_suff_stats_equal()
    assert_almost_equals(mm_s.score_assignment(),
                         irm_s.score_assignment(0),
                         places=3)

    bound_mm_s = mm_bind(mm_s, mm_view)
    bound_irm_s = irm_bind(irm_s, 0, [irm_view])

    # XXX: doesn't really have to be true, just is true of impl
    assert not bound_mm_s.empty_groups()
    assert not bound_irm_s.empty_groups()

    bound_mm_s.create_group(r)
    bound_irm_s.create_group(r)

    gid_a = bound_mm_s.remove_value(0, r)
    gid_b = bound_irm_s.remove_value(0, r)

    assert gid_a == gid_b
    assert_suff_stats_equal()

    x0, y0 = bound_mm_s.score_value(0, r)
    x1, y1 = bound_irm_s.score_value(0, r)
    assert x0 == x1  # XXX: not really a requirement

    # XXX: should really normalize and then check
    for a, b in zip(y0, y1):
        assert_almost_equals(a, b, places=2)