def test_dense_vs_sparse(): # XXX: really belongs in irm test cases, but kernels has a nice cluster # enumeration iterator r = rng() n = 5 raw = ma.array(np.random.choice(np.arange(20), size=(n, n)), mask=np.random.choice([False, True], size=(n, n))) dense = [relation_numpy_dataview(raw)] sparse = [sparse_relation_dataview(_tocsr(raw))] domains = [n] relations = [((0, 0), gp)] defn = irm_definition(domains, relations) def score_fn(data): def f(assignments): s = irm_initialize(defn, data, r=r, domain_assignments=assignments) assign = sum( s.score_assignment(i) for i in xrange(len(assignments))) likelihood = s.score_likelihood(r) return assign + likelihood return f product_assignments = tuple(map(list, map(permutation_iter, domains))) dense_posterior = scores_to_probs( np.array(map(score_fn(dense), it.product(*product_assignments)))) sparse_posterior = scores_to_probs( np.array(map(score_fn(sparse), it.product(*product_assignments)))) assert_1d_lists_almost_equals(dense_posterior, sparse_posterior, places=3)
def test_dense_vs_sparse(): # XXX: really belongs in irm test cases, but kernels has a nice cluster # enumeration iterator r = rng() n = 5 raw = ma.array( np.random.choice(np.arange(20), size=(n, n)), mask=np.random.choice([False, True], size=(n, n))) dense = [relation_numpy_dataview(raw)] sparse = [sparse_relation_dataview(_tocsr(raw))] domains = [n] relations = [((0, 0), gp)] defn = irm_definition(domains, relations) def score_fn(data): def f(assignments): s = irm_initialize(defn, data, r=r, domain_assignments=assignments) assign = sum(s.score_assignment(i) for i in xrange(len(assignments))) likelihood = s.score_likelihood(r) return assign + likelihood return f product_assignments = tuple(map(list, map(permutation_iter, domains))) dense_posterior = scores_to_probs( np.array(map(score_fn(dense), it.product(*product_assignments)))) sparse_posterior = scores_to_probs( np.array(map(score_fn(sparse), it.product(*product_assignments)))) assert_1d_lists_almost_equals(dense_posterior, sparse_posterior, places=3)
def test_crp_empirical(): N = 4 alpha = 2.5 defn = model_definition(N, [bb]) Y = np.array([(True, )] * N, dtype=[('', bool)]) view = numpy_dataview(Y) r = rng() def crp_score(assignment): latent = initialize(defn, view, r=r, cluster_hp={'alpha': alpha}, assignment=assignment) return latent.score_assignment() scores = np.array(list(map(crp_score, permutation_iter(N)))) dist = scores_to_probs(scores) idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_fn(): sample = permutation_canonical(_sample_crp(N, alpha)) return idmap[tuple(sample)] assert_discrete_dist_approx(sample_fn, dist, ntries=100)
def test_convergence_simple(): N, V = 2, 10 defn = model_definition(N, V) data = [ np.array([5, 6]), np.array([0, 1, 2]), ] view = numpy_dataview(data) prng = rng() scores = [] idmap = {} for i, (tables, dishes) in enumerate(permutations([2, 3])): latent = model.initialize( defn, view, prng, table_assignments=tables, dish_assignments=dishes) scores.append( latent.score_assignment() + latent.score_data(prng)) idmap[(tables, dishes)] = i true_dist = scores_to_probs(scores) def kernel(latent): # mutates latent in place doc_model = model.bind(latent, data=view) kernels.assign2(doc_model, prng) for did in xrange(latent.nentities()): table_model = model.bind(latent, document=did) kernels.assign(table_model, prng) latent = model.initialize(defn, view, prng) skip = 10 def sample_fn(): for _ in xrange(skip): kernel(latent) table_assignments = latent.table_assignments() canon_table_assigments = tuple( map(tuple, map(permutation_canonical, table_assignments))) dish_maps = latent.dish_assignments() dish_assignments = [] for dm, (ta, ca) in zip(dish_maps, zip(table_assignments, canon_table_assigments)): dish_assignment = [] for t, c in zip(ta, ca): if c == len(dish_assignment): dish_assignment.append(dm[t]) dish_assignments.append(dish_assignment) canon_dish_assigments = tuple( map(tuple, map(permutation_canonical, dish_assignments))) return idmap[(canon_table_assigments, canon_dish_assigments)] assert_discrete_dist_approx( sample_fn, true_dist, ntries=100, nsamples=10000, kl_places=2)
def _test_convergence(domains, data, reg_relations, brute_relations, kernel, burnin_niters=10000, skip=10, ntries=50, nsamples=1000, places=2): r = rng() reg_defn = irm_definition(domains, reg_relations) brute_defn = irm_definition(domains, brute_relations) def score_fn(assignments): s = irm_initialize( brute_defn, data, r=r, domain_assignments=assignments) assign = sum(s.score_assignment(i) for i in xrange(len(assignments))) likelihood = s.score_likelihood(r) return assign + likelihood product_assignments = tuple(map(list, map(permutation_iter, domains))) posterior = scores_to_probs( np.array(map(score_fn, it.product(*product_assignments)))) s = irm_initialize(reg_defn, data, r=r) bounded_states = [irm_bind(s, i, data) for i in xrange(len(domains))] # burnin start = time.time() last = start for i in xrange(burnin_niters): for bs in bounded_states: kernel(bs, r) if not ((i + 1) % 1000): print 'burning finished iteration', (i + 1), \ 'in', (time.time() - last), 'seconds' last = time.time() print 'finished burnin of', burnin_niters, \ 'iters in', (time.time() - start), 'seconds' idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} #print idmap def sample_fn(): for _ in xrange(skip): for bs in bounded_states: kernel(bs, r) key = tuple(tuple(permutation_canonical(bs.assignments())) for bs in bounded_states) return idmap[key] assert_discrete_dist_approx( sample_fn, posterior, ntries=ntries, nsamples=nsamples, kl_places=places)
def data_with_posterior(defn, r=None): # XXX(stephentu): should only accept conjugate models if r is None: r = rng() relations = toy_dataset(defn) views = map(numpy_dataview, relations) def score_fn(assignments): s = model.initialize(defn, views, r=r, domain_assignments=assignments) assign = sum(s.score_assignment(i) for i in xrange(len(assignments))) likelihood = s.score_likelihood(r) return assign + likelihood domains = defn.domains() product_assignments = tuple(map(list, map(permutation_iter, domains))) posterior = scores_to_probs( np.array(map(score_fn, it.product(*product_assignments)))) return relations, posterior
def data_with_posterior(defn, r=None): # XXX(stephentu): should only accept conjugate models if r is None: r = rng() relations = toy_dataset(defn) views = map(numpy_dataview, relations) def score_fn(assignments): s = model.initialize(defn, views, r=r, domain_assignments=assignments) assign = sum(s.score_assignment(i) for i in xrange(len(assignments))) likelihood = s.score_likelihood(r) return assign + likelihood domains = defn.domains() product_assignments = tuple(map(list, map(permutation_iter, domains))) posterior = scores_to_probs( np.array(map(score_fn, it.product(*product_assignments)))) return relations, posterior
def test_crp_empirical(): N = 4 alpha = 2.5 defn = model_definition(N, [bb]) Y = np.array([(True,)] * N, dtype=[('', bool)]) view = numpy_dataview(Y) r = rng() def crp_score(assignment): latent = initialize( defn, view, r=r, cluster_hp={'alpha': alpha}, assignment=assignment) return latent.score_assignment() scores = np.array(list(map(crp_score, permutation_iter(N)))) dist = scores_to_probs(scores) idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_fn(): sample = permutation_canonical(_sample_crp(N, alpha)) return idmap[tuple(sample)] assert_discrete_dist_approx(sample_fn, dist, ntries=100)
def _test_convergence(domains, data, reg_relations, brute_relations, kernel, burnin_niters=10000, skip=10, ntries=50, nsamples=1000, places=2): r = rng() reg_defn = irm_definition(domains, reg_relations) brute_defn = irm_definition(domains, brute_relations) def score_fn(assignments): s = irm_initialize(brute_defn, data, r=r, domain_assignments=assignments) assign = sum(s.score_assignment(i) for i in xrange(len(assignments))) likelihood = s.score_likelihood(r) return assign + likelihood product_assignments = tuple(map(list, map(permutation_iter, domains))) posterior = scores_to_probs( np.array(map(score_fn, it.product(*product_assignments)))) s = irm_initialize(reg_defn, data, r=r) bounded_states = [irm_bind(s, i, data) for i in xrange(len(domains))] # burnin start = time.time() last = start for i in xrange(burnin_niters): for bs in bounded_states: kernel(bs, r) if not ((i + 1) % 1000): print 'burning finished iteration', (i + 1), \ 'in', (time.time() - last), 'seconds' last = time.time() print 'finished burnin of', burnin_niters, \ 'iters in', (time.time() - start), 'seconds' idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} #print idmap def sample_fn(): for _ in xrange(skip): for bs in bounded_states: kernel(bs, r) key = tuple( tuple(permutation_canonical(bs.assignments())) for bs in bounded_states) return idmap[key] assert_discrete_dist_approx(sample_fn, posterior, ntries=ntries, nsamples=nsamples, kl_places=places)
def test_convergence_simple(): N, V = 2, 10 defn = model_definition(N, V) data = [ np.array([5, 6]), np.array([0, 1, 2]), ] view = numpy_dataview(data) prng = rng() scores = [] idmap = {} for i, (tables, dishes) in enumerate(permutations([2, 3])): latent = model.initialize(defn, view, prng, table_assignments=tables, dish_assignments=dishes) scores.append(latent.score_assignment() + latent.score_data(prng)) idmap[(tables, dishes)] = i true_dist = scores_to_probs(scores) def kernel(latent): # mutates latent in place doc_model = model.bind(latent, data=view) kernels.assign2(doc_model, prng) for did in xrange(latent.nentities()): table_model = model.bind(latent, document=did) kernels.assign(table_model, prng) latent = model.initialize(defn, view, prng) skip = 10 def sample_fn(): for _ in xrange(skip): kernel(latent) table_assignments = latent.table_assignments() canon_table_assigments = tuple( map(tuple, map(permutation_canonical, table_assignments))) dish_maps = latent.dish_assignments() dish_assignments = [] for dm, (ta, ca) in zip(dish_maps, zip(table_assignments, canon_table_assigments)): dish_assignment = [] for t, c in zip(ta, ca): if c == len(dish_assignment): dish_assignment.append(dm[t]) dish_assignments.append(dish_assignment) canon_dish_assigments = tuple( map(tuple, map(permutation_canonical, dish_assignments))) return idmap[(canon_table_assigments, canon_dish_assigments)] assert_discrete_dist_approx(sample_fn, true_dist, ntries=100, nsamples=10000, kl_places=2)