def test_runner_multiprocessing_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latents = [model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count())] kc = [('assign', range(len(domains)))] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) r.run(r=prng, niters=10000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_iter(): r.run(r=prng, niters=10) for latent in r.get_latents(): key = tuple(tuple(permutation_canonical(latent.assignments(i))) for i in xrange(len(domains))) yield idmap[key] ref = [None] def sample_fn(): if ref[0] is None: ref[0] = sample_iter() try: return next(ref[0]) except StopIteration: ref[0] = None return sample_fn() assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
def _test_runner_simple(defn, kc_fn): views = map(numpy_dataview, toy_dataset(defn)) kc = kc_fn(defn) prng = rng() latent = model.initialize(defn, views, prng) r = runner.runner(defn, views, latent, kc) r.run(prng, 10)
def test_runner_multiprocessing(): defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)]) views = map(numpy_dataview, toy_dataset(defn)) kc = runner.default_kernel_config(defn) prng = rng() latents = [model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count())] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) # check it is restartable r.run(r=prng, niters=10) r.run(r=prng, niters=10)
def test_runner_multiprocessing(): defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)]) views = map(numpy_dataview, toy_dataset(defn)) kc = runner.default_kernel_config(defn) prng = rng() latents = [ model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count()) ] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) # check it is restartable r.run(r=prng, niters=10) r.run(r=prng, niters=10)
def test_runner_default_kernel_config_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latent = model.initialize(defn, views, prng) r = runner.runner(defn, views, latent, [('assign', range(len(domains)))]) r.run(r=prng, niters=1000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_fn(): r.run(r=prng, niters=10) new_latent = r.get_latent() key = tuple(tuple(permutation_canonical(new_latent.assignments(i))) for i in xrange(len(domains))) return idmap[key] assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
def test_runner_default_kernel_config_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latent = model.initialize(defn, views, prng) r = runner.runner(defn, views, latent, [('assign', range(len(domains)))]) r.run(r=prng, niters=1000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_fn(): r.run(r=prng, niters=10) new_latent = r.get_latent() key = tuple( tuple(permutation_canonical(new_latent.assignments(i))) for i in xrange(len(domains))) return idmap[key] assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
def test_runner_multiprocessing_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latents = [ model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count()) ] kc = [('assign', range(len(domains)))] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) r.run(r=prng, niters=10000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_iter(): r.run(r=prng, niters=10) for latent in r.get_latents(): key = tuple( tuple(permutation_canonical(latent.assignments(i))) for i in xrange(len(domains))) yield idmap[key] ref = [None] def sample_fn(): if ref[0] is None: ref[0] = sample_iter() try: return next(ref[0]) except StopIteration: ref[0] = None return sample_fn() assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
# 5. run the runners # In[5]: from microscopes.common.rng import rng from microscopes.common.relation.dataview import numpy_dataview from microscopes.models import bb as beta_bernoulli from microscopes.irm.definition import model_definition from microscopes.irm import model, runner, query from microscopes.kernels import parallel from microscopes.common.query import groups, zmatrix_heuristic_block_ordering, zmatrix_reorder defn = model_definition([N], [((0, 0), beta_bernoulli)]) views = [numpy_dataview(communications_relation)] prng = rng() nchains = 1 latents = [model.initialize(defn, views, r=prng, cluster_hps=[{'alpha':1}]) for _ in xrange(nchains)] kc = runner.default_assign_kernel_config(defn) print kc r = runner.runner(defn, views, latents[0], kc) # ##From here, we can finally run each chain of the sampler 1000 times # In[ ]: start = time.time() print start r.run(r=prng, niters=1) print "inference took {} seconds".format(time.time() - start)
defn = model_definition([N], [((0, 0), beta_bernoulli)]) views = [numpy_dataview(communications_relation)] prng = rng() # ##Next, let's initialize the model and define the runners. # # ##These runners are our MCMC chains. We'll use `cpu_count` to define our number of chains. # In[ ]: nchains = cpu_count() latents = [model.initialize(defn, views, r=prng, cluster_hps=[{'alpha':1e-3}]) for _ in xrange(nchains)] kc = runner.default_assign_kernel_config(defn) runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) # ##From here, we can finally run each chain of the sampler 1000 times # In[ ]: start = time.time() r.run(r=prng, niters=1000) print "inference took {} seconds".format(time.time() - start) # ##Now that we have learned our model let's get our cluster assignments # In[ ]:
def infinite_relational_model(corr_matrix, lag_matrix, threshold, sampled_coords, window_size): import numpy as np import math import json import time import itertools as it from multiprocessing import cpu_count from microscopes.common.rng import rng from microscopes.common.relation.dataview import numpy_dataview from microscopes.models import bb as beta_bernoulli from microscopes.irm.definition import model_definition from microscopes.irm import model, runner, query from microscopes.kernels import parallel from microscopes.common.query import groups, zmatrix_heuristic_block_ordering, zmatrix_reorder cluster_matrix = [] graph = [] # calculate graph for row in corr_matrix: graph_row = [] for corr in row: if corr < threshold: graph_row.append(False) else: graph_row.append(True) graph.append(graph_row) graph = np.array(graph, dtype=np.bool) graph_size = len(graph) # conduct Infinite Relational Model defn = model_definition([graph_size], [((0, 0), beta_bernoulli)]) views = [numpy_dataview(graph)] prng = rng() nchains = cpu_count() latents = [model.initialize(defn, views, r=prng, cluster_hps=[{'alpha':1e-3}]) for _ in xrange(nchains)] kc = runner.default_assign_kernel_config(defn) runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) start = time.time() # r.run(r=prng, niters=1000) # r.run(r=prng, niters=100) r.run(r=prng, niters=20) print ("inference took", time.time() - start, "seconds") infers = r.get_latents() clusters = groups(infers[0].assignments(0), sort=True) ordering = list(it.chain.from_iterable(clusters)) z = graph.copy() z = z[ordering] z = z[:,ordering] corr_matrix = corr_matrix[ordering] corr_matrix = corr_matrix[:,ordering] lag_matrix = lag_matrix[ordering] lag_matrix = lag_matrix[:,ordering] cluster_sampled_coords = np.array(sampled_coords) cluster_sampled_coords = cluster_sampled_coords[ordering] response_msg = { 'corrMatrix': corr_matrix.tolist(), 'lagMatrix': lag_matrix.tolist(), 'clusterMatrix': z.tolist(), 'clusterSampledCoords': cluster_sampled_coords.tolist(), 'nClusterList': [len(cluster) for cluster in clusters], 'ordering': ordering, } f = open("./expdata/clustermatrix-" + str(window_size) + ".json", "w") json.dump(response_msg, f) f.close() return response_msg
prng = rng() # ##Next, let's initialize the model and define the runners. # # ##These runners are our MCMC chains. We'll use `cpu_count` to define our number of chains. # In[ ]: nchains = cpu_count() latents = [ model.initialize(defn, views, r=prng, cluster_hps=[{ 'alpha': 1e-3 }]) for _ in xrange(nchains) ] kc = runner.default_assign_kernel_config(defn) runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) # ##From here, we can finally run each chain of the sampler 1000 times # In[ ]: start = time.time() r.run(r=prng, niters=1000) print "inference took {} seconds".format(time.time() - start) # ##Now that we have learned our model let's get our cluster assignments # In[ ]: infers = r.get_latents()
from microscopes.common.rng import rng from microscopes.common.relation.dataview import numpy_dataview from microscopes.models import bb as beta_bernoulli from microscopes.irm.definition import model_definition from microscopes.irm import model, runner, query from microscopes.kernels import parallel from microscopes.common.query import groups, zmatrix_heuristic_block_ordering, zmatrix_reorder defn = model_definition([N], [((0, 0), beta_bernoulli)]) views = [numpy_dataview(communications_relation)] prng = rng() nchains = 1 latents = [ model.initialize(defn, views, r=prng, cluster_hps=[{ 'alpha': 1 }]) for _ in xrange(nchains) ] kc = runner.default_assign_kernel_config(defn) print kc r = runner.runner(defn, views, latents[0], kc) # ##From here, we can finally run each chain of the sampler 1000 times # In[ ]: start = time.time() print start r.run(r=prng, niters=1) print "inference took {} seconds".format(time.time() - start)