def _test_convergence(bs, posterior, kernel, burnin_niters, skip, ntries, nsamples, kl_places): N = bs.nentities() start = time.time() last = start for i in xrange(burnin_niters): kernel(bs) if not ((i + 1) % 1000): print 'burning finished iteration', (i + 1), \ 'in', (time.time() - last), 'seconds' last = time.time() print 'finished burnin of', burnin_niters, \ 'iters in', (time.time() - start), 'seconds' idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_fn(): for _ in xrange(skip): kernel(bs) return idmap[tuple(permutation_canonical(bs.assignments()))] assert_discrete_dist_approx(sample_fn, posterior, ntries=ntries, nsamples=nsamples, kl_places=kl_places)
def test_runner_multiprocessing_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latents = [model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count())] kc = [('assign', range(len(domains)))] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) r.run(r=prng, niters=10000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_iter(): r.run(r=prng, niters=10) for latent in r.get_latents(): key = tuple(tuple(permutation_canonical(latent.assignments(i))) for i in xrange(len(domains))) yield idmap[key] ref = [None] def sample_fn(): if ref[0] is None: ref[0] = sample_iter() try: return next(ref[0]) except StopIteration: ref[0] = None return sample_fn() assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
def _test_convergence(bs, posterior, kernel, burnin_niters, skip, ntries, nsamples, kl_places): N = bs.nentities() start = time.time() last = start for i in xrange(burnin_niters): kernel(bs) if not ((i + 1) % 1000): print 'burning finished iteration', (i + 1), \ 'in', (time.time() - last), 'seconds' last = time.time() print 'finished burnin of', burnin_niters, \ 'iters in', (time.time() - start), 'seconds' idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_fn(): for _ in xrange(skip): kernel(bs) return idmap[tuple(permutation_canonical(bs.assignments()))] assert_discrete_dist_approx( sample_fn, posterior, ntries=ntries, nsamples=nsamples, kl_places=kl_places)
def test_runner_multiprocessing_convergence(): N, D = 4, 5 defn = model_definition(N, [bb] * D) prng = rng() Y, posterior = data_with_posterior(defn, r=prng) view = numpy_dataview(Y) latents = [model.initialize(defn, view, prng) for _ in xrange(mp.cpu_count())] runners = [runner.runner(defn, view, latent, ['assign']) for latent in latents] r = parallel.runner(runners) r.run(r=prng, niters=1000) # burnin idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_iter(): r.run(r=prng, niters=10) for latent in r.get_latents(): yield idmap[tuple(permutation_canonical(latent.assignments()))] ref = [None] def sample_fn(): if ref[0] is None: ref[0] = sample_iter() try: return next(ref[0]) except StopIteration: ref[0] = None return sample_fn() assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
def test_crp_empirical(): N = 4 alpha = 2.5 defn = model_definition(N, [bb]) Y = np.array([(True, )] * N, dtype=[('', bool)]) view = numpy_dataview(Y) r = rng() def crp_score(assignment): latent = initialize(defn, view, r=r, cluster_hp={'alpha': alpha}, assignment=assignment) return latent.score_assignment() scores = np.array(list(map(crp_score, permutation_iter(N)))) dist = scores_to_probs(scores) idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_fn(): sample = permutation_canonical(_sample_crp(N, alpha)) return idmap[tuple(sample)] assert_discrete_dist_approx(sample_fn, dist, ntries=100)
def test_convergence_simple(): N, V = 2, 10 defn = model_definition(N, V) data = [ np.array([5, 6]), np.array([0, 1, 2]), ] view = numpy_dataview(data) prng = rng() scores = [] idmap = {} for i, (tables, dishes) in enumerate(permutations([2, 3])): latent = model.initialize( defn, view, prng, table_assignments=tables, dish_assignments=dishes) scores.append( latent.score_assignment() + latent.score_data(prng)) idmap[(tables, dishes)] = i true_dist = scores_to_probs(scores) def kernel(latent): # mutates latent in place doc_model = model.bind(latent, data=view) kernels.assign2(doc_model, prng) for did in xrange(latent.nentities()): table_model = model.bind(latent, document=did) kernels.assign(table_model, prng) latent = model.initialize(defn, view, prng) skip = 10 def sample_fn(): for _ in xrange(skip): kernel(latent) table_assignments = latent.table_assignments() canon_table_assigments = tuple( map(tuple, map(permutation_canonical, table_assignments))) dish_maps = latent.dish_assignments() dish_assignments = [] for dm, (ta, ca) in zip(dish_maps, zip(table_assignments, canon_table_assigments)): dish_assignment = [] for t, c in zip(ta, ca): if c == len(dish_assignment): dish_assignment.append(dm[t]) dish_assignments.append(dish_assignment) canon_dish_assigments = tuple( map(tuple, map(permutation_canonical, dish_assignments))) return idmap[(canon_table_assigments, canon_dish_assigments)] assert_discrete_dist_approx( sample_fn, true_dist, ntries=100, nsamples=10000, kl_places=2)
def _test_convergence(domains, data, reg_relations, brute_relations, kernel, burnin_niters=10000, skip=10, ntries=50, nsamples=1000, places=2): r = rng() reg_defn = irm_definition(domains, reg_relations) brute_defn = irm_definition(domains, brute_relations) def score_fn(assignments): s = irm_initialize( brute_defn, data, r=r, domain_assignments=assignments) assign = sum(s.score_assignment(i) for i in xrange(len(assignments))) likelihood = s.score_likelihood(r) return assign + likelihood product_assignments = tuple(map(list, map(permutation_iter, domains))) posterior = scores_to_probs( np.array(map(score_fn, it.product(*product_assignments)))) s = irm_initialize(reg_defn, data, r=r) bounded_states = [irm_bind(s, i, data) for i in xrange(len(domains))] # burnin start = time.time() last = start for i in xrange(burnin_niters): for bs in bounded_states: kernel(bs, r) if not ((i + 1) % 1000): print 'burning finished iteration', (i + 1), \ 'in', (time.time() - last), 'seconds' last = time.time() print 'finished burnin of', burnin_niters, \ 'iters in', (time.time() - start), 'seconds' idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} #print idmap def sample_fn(): for _ in xrange(skip): for bs in bounded_states: kernel(bs, r) key = tuple(tuple(permutation_canonical(bs.assignments())) for bs in bounded_states) return idmap[key] assert_discrete_dist_approx( sample_fn, posterior, ntries=ntries, nsamples=nsamples, kl_places=places)
def test_runner_convergence(): N, D = 4, 5 defn = model_definition(N, [bb] * D) prng = rng() Y, posterior = data_with_posterior(defn, r=prng) view = numpy_dataview(Y) latent = model.initialize(defn, view, prng) r = runner.runner(defn, view, latent, ['assign']) r.run(r=prng, niters=1000) # burnin idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_fn(): r.run(r=prng, niters=10) new_latent = r.get_latent() return idmap[tuple(permutation_canonical(new_latent.assignments()))] assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
def test_runner_default_kernel_config_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latent = model.initialize(defn, views, prng) r = runner.runner(defn, views, latent, [('assign', range(len(domains)))]) r.run(r=prng, niters=1000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_fn(): r.run(r=prng, niters=10) new_latent = r.get_latent() key = tuple(tuple(permutation_canonical(new_latent.assignments(i))) for i in xrange(len(domains))) return idmap[key] assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
def test_crp_empirical(): N = 4 alpha = 2.5 defn = model_definition(N, [bb]) Y = np.array([(True,)] * N, dtype=[('', bool)]) view = numpy_dataview(Y) r = rng() def crp_score(assignment): latent = initialize( defn, view, r=r, cluster_hp={'alpha': alpha}, assignment=assignment) return latent.score_assignment() scores = np.array(list(map(crp_score, permutation_iter(N)))) dist = scores_to_probs(scores) idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_fn(): sample = permutation_canonical(_sample_crp(N, alpha)) return idmap[tuple(sample)] assert_discrete_dist_approx(sample_fn, dist, ntries=100)
def test_runner_default_kernel_config_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latent = model.initialize(defn, views, prng) r = runner.runner(defn, views, latent, [('assign', range(len(domains)))]) r.run(r=prng, niters=1000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_fn(): r.run(r=prng, niters=10) new_latent = r.get_latent() key = tuple( tuple(permutation_canonical(new_latent.assignments(i))) for i in xrange(len(domains))) return idmap[key] assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
def test_runner_multiprocessing_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latents = [ model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count()) ] kc = [('assign', range(len(domains)))] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) r.run(r=prng, niters=10000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_iter(): r.run(r=prng, niters=10) for latent in r.get_latents(): key = tuple( tuple(permutation_canonical(latent.assignments(i))) for i in xrange(len(domains))) yield idmap[key] ref = [None] def sample_fn(): if ref[0] is None: ref[0] = sample_iter() try: return next(ref[0]) except StopIteration: ref[0] = None return sample_fn() assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
def _test_convergence(domains, data, reg_relations, brute_relations, kernel, burnin_niters=10000, skip=10, ntries=50, nsamples=1000, places=2): r = rng() reg_defn = irm_definition(domains, reg_relations) brute_defn = irm_definition(domains, brute_relations) def score_fn(assignments): s = irm_initialize(brute_defn, data, r=r, domain_assignments=assignments) assign = sum(s.score_assignment(i) for i in xrange(len(assignments))) likelihood = s.score_likelihood(r) return assign + likelihood product_assignments = tuple(map(list, map(permutation_iter, domains))) posterior = scores_to_probs( np.array(map(score_fn, it.product(*product_assignments)))) s = irm_initialize(reg_defn, data, r=r) bounded_states = [irm_bind(s, i, data) for i in xrange(len(domains))] # burnin start = time.time() last = start for i in xrange(burnin_niters): for bs in bounded_states: kernel(bs, r) if not ((i + 1) % 1000): print 'burning finished iteration', (i + 1), \ 'in', (time.time() - last), 'seconds' last = time.time() print 'finished burnin of', burnin_niters, \ 'iters in', (time.time() - start), 'seconds' idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} #print idmap def sample_fn(): for _ in xrange(skip): for bs in bounded_states: kernel(bs, r) key = tuple( tuple(permutation_canonical(bs.assignments())) for bs in bounded_states) return idmap[key] assert_discrete_dist_approx(sample_fn, posterior, ntries=ntries, nsamples=nsamples, kl_places=places)
def test_convergence_simple(): N, V = 2, 10 defn = model_definition(N, V) data = [ np.array([5, 6]), np.array([0, 1, 2]), ] view = numpy_dataview(data) prng = rng() scores = [] idmap = {} for i, (tables, dishes) in enumerate(permutations([2, 3])): latent = model.initialize(defn, view, prng, table_assignments=tables, dish_assignments=dishes) scores.append(latent.score_assignment() + latent.score_data(prng)) idmap[(tables, dishes)] = i true_dist = scores_to_probs(scores) def kernel(latent): # mutates latent in place doc_model = model.bind(latent, data=view) kernels.assign2(doc_model, prng) for did in xrange(latent.nentities()): table_model = model.bind(latent, document=did) kernels.assign(table_model, prng) latent = model.initialize(defn, view, prng) skip = 10 def sample_fn(): for _ in xrange(skip): kernel(latent) table_assignments = latent.table_assignments() canon_table_assigments = tuple( map(tuple, map(permutation_canonical, table_assignments))) dish_maps = latent.dish_assignments() dish_assignments = [] for dm, (ta, ca) in zip(dish_maps, zip(table_assignments, canon_table_assigments)): dish_assignment = [] for t, c in zip(ta, ca): if c == len(dish_assignment): dish_assignment.append(dm[t]) dish_assignments.append(dish_assignment) canon_dish_assigments = tuple( map(tuple, map(permutation_canonical, dish_assignments))) return idmap[(canon_table_assigments, canon_dish_assigments)] assert_discrete_dist_approx(sample_fn, true_dist, ntries=100, nsamples=10000, kl_places=2)