def test_crp_empirical(): N = 4 alpha = 2.5 defn = model_definition(N, [bb]) Y = np.array([(True, )] * N, dtype=[('', bool)]) view = numpy_dataview(Y) r = rng() def crp_score(assignment): latent = initialize(defn, view, r=r, cluster_hp={'alpha': alpha}, assignment=assignment) return latent.score_assignment() scores = np.array(list(map(crp_score, permutation_iter(N)))) dist = scores_to_probs(scores) idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_fn(): sample = permutation_canonical(_sample_crp(N, alpha)) return idmap[tuple(sample)] assert_discrete_dist_approx(sample_fn, dist, ntries=100)
def _test_convergence(bs, posterior, kernel, burnin_niters, skip, ntries, nsamples, kl_places): N = bs.nentities() start = time.time() last = start for i in xrange(burnin_niters): kernel(bs) if not ((i + 1) % 1000): print 'burning finished iteration', (i + 1), \ 'in', (time.time() - last), 'seconds' last = time.time() print 'finished burnin of', burnin_niters, \ 'iters in', (time.time() - start), 'seconds' idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_fn(): for _ in xrange(skip): kernel(bs) return idmap[tuple(permutation_canonical(bs.assignments()))] assert_discrete_dist_approx( sample_fn, posterior, ntries=ntries, nsamples=nsamples, kl_places=kl_places)
def _test_convergence(bs, posterior, kernel, burnin_niters, skip, ntries, nsamples, kl_places): N = bs.nentities() start = time.time() last = start for i in xrange(burnin_niters): kernel(bs) if not ((i + 1) % 1000): print 'burning finished iteration', (i + 1), \ 'in', (time.time() - last), 'seconds' last = time.time() print 'finished burnin of', burnin_niters, \ 'iters in', (time.time() - start), 'seconds' idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_fn(): for _ in xrange(skip): kernel(bs) return idmap[tuple(permutation_canonical(bs.assignments()))] assert_discrete_dist_approx(sample_fn, posterior, ntries=ntries, nsamples=nsamples, kl_places=kl_places)
def test_runner_multiprocessing_convergence(): N, D = 4, 5 defn = model_definition(N, [bb] * D) prng = rng() Y, posterior = data_with_posterior(defn, r=prng) view = numpy_dataview(Y) latents = [model.initialize(defn, view, prng) for _ in xrange(mp.cpu_count())] runners = [runner.runner(defn, view, latent, ['assign']) for latent in latents] r = parallel.runner(runners) r.run(r=prng, niters=1000) # burnin idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_iter(): r.run(r=prng, niters=10) for latent in r.get_latents(): yield idmap[tuple(permutation_canonical(latent.assignments()))] ref = [None] def sample_fn(): if ref[0] is None: ref[0] = sample_iter() try: return next(ref[0]) except StopIteration: ref[0] = None return sample_fn() assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
def permutations(doclengths): """Generate a permutation of XXX WARNING: very quickly becomes intractable """ perms = [permutation_iter(length) for length in doclengths] for prod in it.product(*perms): dishes = sum([max(x) + 1 for x in prod]) for p in permutation_iter(dishes): idx = 0 ret = [] for d in prod: ntables = max(d) + 1 ret.append(tuple(p[idx:idx+ntables])) idx += ntables yield prod, tuple(ret)
def test_crp_empirical(): N = 4 alpha = 2.5 defn = model_definition(N, [bb]) Y = np.array([(True,)] * N, dtype=[('', bool)]) view = numpy_dataview(Y) r = rng() def crp_score(assignment): latent = initialize( defn, view, r=r, cluster_hp={'alpha': alpha}, assignment=assignment) return latent.score_assignment() scores = np.array(list(map(crp_score, permutation_iter(N)))) dist = scores_to_probs(scores) idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_fn(): sample = permutation_canonical(_sample_crp(N, alpha)) return idmap[tuple(sample)] assert_discrete_dist_approx(sample_fn, dist, ntries=100)
def _test_crp(initialize_fn, dataview, alpha, r): N = 6 defn = model_definition(N, [bb]) Y = np.array([(True,)] * N, dtype=[('', bool)]) view = dataview(Y) def crp_score(assignment): latent = initialize_fn( defn, view, r=r, cluster_hp={'alpha': alpha}, assignment=assignment) return latent.score_assignment() dist = np.array(list(map(crp_score, permutation_iter(N)))) dist = np.exp(dist) assert_almost_equals(dist.sum(), 1.0, places=3)
def test_runner_convergence(): N, D = 4, 5 defn = model_definition(N, [bb] * D) prng = rng() Y, posterior = data_with_posterior(defn, r=prng) view = numpy_dataview(Y) latent = model.initialize(defn, view, prng) r = runner.runner(defn, view, latent, ['assign']) r.run(r=prng, niters=1000) # burnin idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_fn(): r.run(r=prng, niters=10) new_latent = r.get_latent() return idmap[tuple(permutation_canonical(new_latent.assignments()))] assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
def _test_crp(initialize_fn, dataview, alpha, r): N = 6 defn = model_definition(N, [bb]) Y = np.array([(True, )] * N, dtype=[('', bool)]) view = dataview(Y) def crp_score(assignment): latent = initialize_fn(defn, view, r=r, cluster_hp={'alpha': alpha}, assignment=assignment) return latent.score_assignment() dist = np.array(list(map(crp_score, permutation_iter(N)))) dist = np.exp(dist) assert_almost_equals(dist.sum(), 1.0, places=3)
def test_compare_to_mixture_model(): r = rng() N, D = 4, 5 Y = np.random.uniform(size=(N, D)) > 0.8 Y_rec = np.array([tuple(y) for y in Y], dtype=[('', bool)] * D) mm_view = rec_numpy_dataview(Y_rec) irm_view = relation_numpy_dataview(Y) mm_def = mm_definition(N, [bb] * D) irm_def = irm_definition([N, D], [((0, 1), bb)]) perms = list(permutation_iter(N)) assignment = perms[np.random.randint(0, len(perms))] mm_s = mm_initialize(mm_def, mm_view, r=r, assignment=assignment) irm_s = irm_initialize(irm_def, [irm_view], r=r, domain_assignments=[ assignment, range(D), ]) def assert_suff_stats_equal(): assert set(mm_s.groups()) == set(irm_s.groups(0)) assert irm_s.groups(1) == range(D) groups = mm_s.groups() for g in groups: for i in xrange(D): a = mm_s.get_suffstats(g, i) b = irm_s.get_suffstats(0, [g, i]) if b is None: b = {'heads': 0L, 'tails': 0L} assert a['heads'] == b['heads'] and a['tails'] == b['tails'] assert_suff_stats_equal() assert_almost_equals( mm_s.score_assignment(), irm_s.score_assignment(0), places=3) bound_mm_s = mm_bind(mm_s, mm_view) bound_irm_s = irm_bind(irm_s, 0, [irm_view]) # XXX: doesn't really have to be true, just is true of impl assert not bound_mm_s.empty_groups() assert not bound_irm_s.empty_groups() bound_mm_s.create_group(r) bound_irm_s.create_group(r) gid_a = bound_mm_s.remove_value(0, r) gid_b = bound_irm_s.remove_value(0, r) assert gid_a == gid_b assert_suff_stats_equal() x0, y0 = bound_mm_s.score_value(0, r) x1, y1 = bound_irm_s.score_value(0, r) assert x0 == x1 # XXX: not really a requirement # XXX: should really normalize and then check for a, b in zip(y0, y1): assert_almost_equals(a, b, places=2)
def test_compare_to_mixture_model(): r = rng() N, D = 4, 5 Y = np.random.uniform(size=(N, D)) > 0.8 Y_rec = np.array([tuple(y) for y in Y], dtype=[('', bool)] * D) mm_view = rec_numpy_dataview(Y_rec) irm_view = relation_numpy_dataview(Y) mm_def = mm_definition(N, [bb] * D) irm_def = irm_definition([N, D], [((0, 1), bb)]) perms = list(permutation_iter(N)) assignment = perms[np.random.randint(0, len(perms))] mm_s = mm_initialize(mm_def, mm_view, r=r, assignment=assignment) irm_s = irm_initialize(irm_def, [irm_view], r=r, domain_assignments=[ assignment, range(D), ]) def assert_suff_stats_equal(): assert set(mm_s.groups()) == set(irm_s.groups(0)) assert irm_s.groups(1) == range(D) groups = mm_s.groups() for g in groups: for i in xrange(D): a = mm_s.get_suffstats(g, i) b = irm_s.get_suffstats(0, [g, i]) if b is None: b = {'heads': 0L, 'tails': 0L} assert a['heads'] == b['heads'] and a['tails'] == b['tails'] assert_suff_stats_equal() assert_almost_equals(mm_s.score_assignment(), irm_s.score_assignment(0), places=3) bound_mm_s = mm_bind(mm_s, mm_view) bound_irm_s = irm_bind(irm_s, 0, [irm_view]) # XXX: doesn't really have to be true, just is true of impl assert not bound_mm_s.empty_groups() assert not bound_irm_s.empty_groups() bound_mm_s.create_group(r) bound_irm_s.create_group(r) gid_a = bound_mm_s.remove_value(0, r) gid_b = bound_irm_s.remove_value(0, r) assert gid_a == gid_b assert_suff_stats_equal() x0, y0 = bound_mm_s.score_value(0, r) x1, y1 = bound_irm_s.score_value(0, r) assert x0 == x1 # XXX: not really a requirement # XXX: should really normalize and then check for a, b in zip(y0, y1): assert_almost_equals(a, b, places=2)