def test_runner_multiprocessing_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latents = [model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count())] kc = [('assign', range(len(domains)))] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) r.run(r=prng, niters=10000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_iter(): r.run(r=prng, niters=10) for latent in r.get_latents(): key = tuple(tuple(permutation_canonical(latent.assignments(i))) for i in xrange(len(domains))) yield idmap[key] ref = [None] def sample_fn(): if ref[0] is None: ref[0] = sample_iter() try: return next(ref[0]) except StopIteration: ref[0] = None return sample_fn() assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
def test_runner_multiprocessing_convergence(): N, D = 4, 5 defn = model_definition(N, [bb] * D) prng = rng() Y, posterior = data_with_posterior(defn, r=prng) view = numpy_dataview(Y) latents = [model.initialize(defn, view, prng) for _ in xrange(mp.cpu_count())] runners = [runner.runner(defn, view, latent, ['assign']) for latent in latents] r = parallel.runner(runners) r.run(r=prng, niters=1000) # burnin idmap = {C: i for i, C in enumerate(permutation_iter(N))} def sample_iter(): r.run(r=prng, niters=10) for latent in r.get_latents(): yield idmap[tuple(permutation_canonical(latent.assignments()))] ref = [None] def sample_fn(): if ref[0] is None: ref[0] = sample_iter() try: return next(ref[0]) except StopIteration: ref[0] = None return sample_fn() assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
def test_runner_multiprocessing(): defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)]) views = map(numpy_dataview, toy_dataset(defn)) kc = runner.default_kernel_config(defn) prng = rng() latents = [model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count())] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) # check it is restartable r.run(r=prng, niters=10) r.run(r=prng, niters=10)
def test_runner_multyvac(): defn = model_definition(10, [bb, nich, niw(3)]) Y = toy_dataset(defn) view = numpy_dataview(Y) kc = runner.default_kernel_config(defn) prng = rng() latents = [model.initialize(defn, view, prng) for _ in xrange(2)] runners = [runner.runner(defn, view, latent, kc) for latent in latents] r = parallel.runner(runners, backend='multyvac', layer='perf', core='f2') r.run(r=prng, niters=1000) r.run(r=prng, niters=1000)
def test_runner_multiprocessing(): defn = model_definition(10, [bb, nich, niw(3)]) Y = toy_dataset(defn) view = numpy_dataview(Y) kc = runner.default_kernel_config(defn) prng = rng() latents = [model.initialize(defn, view, prng) for _ in xrange(mp.cpu_count())] runners = [runner.runner(defn, view, latent, kc) for latent in latents] r = parallel.runner(runners) # check it is restartable r.run(r=prng, niters=10) r.run(r=prng, niters=10)
def test_runner_multiprocessing(): defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)]) views = map(numpy_dataview, toy_dataset(defn)) kc = runner.default_kernel_config(defn) prng = rng() latents = [ model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count()) ] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) # check it is restartable r.run(r=prng, niters=10) r.run(r=prng, niters=10)
def run_dpgmm(niter=1000, datadir="../../", nfeatures=13): ranking = [10, 6, 7, 26, 5, 8, 4, 19, 12, 23, 24, 33, 28, 25, 14, 3, 0, 1, 21, 30, 11, 31, 13, 9, 22, 2, 27, 29, 32, 17, 18, 20, 16, 15] features, labels, lc, hr, tstart, \ features_lb, labels_lb, lc_lb, hr_lb, \ fscaled, fscaled_lb, fscaled_full, labels_all = \ load_data(datadir, tseg=1024.0, log_features=None, ranking=ranking) labels_phys = feature_engineering.convert_labels_to_physical(labels) labels_phys_lb = feature_engineering.convert_labels_to_physical(labels_lb) labels_all_phys = np.hstack([labels_phys["train"], labels_phys["val"], labels_phys["test"]]) fscaled_small = fscaled_full[:, :13] nchains = 8 # The random state object prng = rng() # Define a DP-GMM where the Gaussian is 2D defn = model_definition(fscaled_small.shape[0], [normal_inverse_wishart(fscaled_small.shape[1])]) fscaled_rec = np.array([(list(f),) for f in fscaled_small], dtype=[('', np.float32, fscaled_small.shape[1])]) # Create a wrapper around the numpy recarray which # data-microscopes understands view = numpy_dataview(fscaled_rec) # Initialize nchains start points randomly in the state space latents = [model.initialize(defn, view, prng) for _ in xrange(nchains)] # Create a runner for each chain runners = [runner.runner(defn, view, latent, kernel_config=['assign']) for latent in latents] r = parallel.runner(runners) r.run(r=prng, niters=niter) with open(datadir+"grs1915_dpgmm.pkl", "w") as f: pickle.dump(r, f) return
def test_runner_multiprocessing_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latents = [ model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count()) ] kc = [('assign', range(len(domains)))] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) r.run(r=prng, niters=10000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_iter(): r.run(r=prng, niters=10) for latent in r.get_latents(): key = tuple( tuple(permutation_canonical(latent.assignments(i))) for i in xrange(len(domains))) yield idmap[key] ref = [None] def sample_fn(): if ref[0] is None: ref[0] = sample_iter() try: return next(ref[0]) except StopIteration: ref[0] = None return sample_fn() assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
defn = model_definition([N], [((0, 0), beta_bernoulli)]) views = [numpy_dataview(communications_relation)] prng = rng() # ##Next, let's initialize the model and define the runners. # # ##These runners are our MCMC chains. We'll use `cpu_count` to define our number of chains. # In[ ]: nchains = cpu_count() latents = [model.initialize(defn, views, r=prng, cluster_hps=[{'alpha':1e-3}]) for _ in xrange(nchains)] kc = runner.default_assign_kernel_config(defn) runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) # ##From here, we can finally run each chain of the sampler 1000 times # In[ ]: start = time.time() r.run(r=prng, niters=1000) print "inference took {} seconds".format(time.time() - start) # ##Now that we have learned our model let's get our cluster assignments # In[ ]:
def infinite_relational_model(corr_matrix, lag_matrix, threshold, sampled_coords, window_size): import numpy as np import math import json import time import itertools as it from multiprocessing import cpu_count from microscopes.common.rng import rng from microscopes.common.relation.dataview import numpy_dataview from microscopes.models import bb as beta_bernoulli from microscopes.irm.definition import model_definition from microscopes.irm import model, runner, query from microscopes.kernels import parallel from microscopes.common.query import groups, zmatrix_heuristic_block_ordering, zmatrix_reorder cluster_matrix = [] graph = [] # calculate graph for row in corr_matrix: graph_row = [] for corr in row: if corr < threshold: graph_row.append(False) else: graph_row.append(True) graph.append(graph_row) graph = np.array(graph, dtype=np.bool) graph_size = len(graph) # conduct Infinite Relational Model defn = model_definition([graph_size], [((0, 0), beta_bernoulli)]) views = [numpy_dataview(graph)] prng = rng() nchains = cpu_count() latents = [model.initialize(defn, views, r=prng, cluster_hps=[{'alpha':1e-3}]) for _ in xrange(nchains)] kc = runner.default_assign_kernel_config(defn) runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) start = time.time() # r.run(r=prng, niters=1000) # r.run(r=prng, niters=100) r.run(r=prng, niters=20) print ("inference took", time.time() - start, "seconds") infers = r.get_latents() clusters = groups(infers[0].assignments(0), sort=True) ordering = list(it.chain.from_iterable(clusters)) z = graph.copy() z = z[ordering] z = z[:,ordering] corr_matrix = corr_matrix[ordering] corr_matrix = corr_matrix[:,ordering] lag_matrix = lag_matrix[ordering] lag_matrix = lag_matrix[:,ordering] cluster_sampled_coords = np.array(sampled_coords) cluster_sampled_coords = cluster_sampled_coords[ordering] response_msg = { 'corrMatrix': corr_matrix.tolist(), 'lagMatrix': lag_matrix.tolist(), 'clusterMatrix': z.tolist(), 'clusterSampledCoords': cluster_sampled_coords.tolist(), 'nClusterList': [len(cluster) for cluster in clusters], 'ordering': ordering, } f = open("./expdata/clustermatrix-" + str(window_size) + ".json", "w") json.dump(response_msg, f) f.close() return response_msg
# ##Next, let's initialize the model and define the runners. # # ##These runners are our MCMC chains. We'll use `cpu_count` to define our number of chains. # In[ ]: nchains = cpu_count() latents = [ model.initialize(defn, views, r=prng, cluster_hps=[{ 'alpha': 1e-3 }]) for _ in xrange(nchains) ] kc = runner.default_assign_kernel_config(defn) runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) # ##From here, we can finally run each chain of the sampler 1000 times # In[ ]: start = time.time() r.run(r=prng, niters=1000) print "inference took {} seconds".format(time.time() - start) # ##Now that we have learned our model let's get our cluster assignments # In[ ]: infers = r.get_latents() clusters = groups(infers[0].assignments(0), sort=True)