def test_runner_multiprocessing_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latents = [model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count())] kc = [('assign', range(len(domains)))] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) r.run(r=prng, niters=10000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_iter(): r.run(r=prng, niters=10) for latent in r.get_latents(): key = tuple(tuple(permutation_canonical(latent.assignments(i))) for i in xrange(len(domains))) yield idmap[key] ref = [None] def sample_fn(): if ref[0] is None: ref[0] = sample_iter() try: return next(ref[0]) except StopIteration: ref[0] = None return sample_fn() assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
def test_simple(): domains = [5, 6] relations = [((0, 1), bb)] relsize = (domains[0], domains[1]) raw_data = [ ma.array(np.random.choice([False, True], size=relsize), mask=np.random.choice([False, True], size=relsize)) ] def csr(raw): n, m = raw.shape def indices(): for i, j in it.product(range(n), range(m)): if not raw.mask[i, j]: yield i, j data = [raw[i, j] for i, j in indices()] i = list(map(op.itemgetter(0), indices())) j = list(map(op.itemgetter(1), indices())) return coo_matrix((data, (i, j)), shape=raw.shape).tocsr() defn = model_definition(domains, relations) data = map(numpy_dataview, raw_data) sparse_data = map(sparse_2d_dataview, map(csr, raw_data)) r = rng() s = initialize(defn, data, r=r) assert s and bind(s, 0, data) and bind(s, 1, data) s1 = initialize(defn, sparse_data, r=r) assert s1 and bind(s1, 0, sparse_data) and bind(s1, 1, sparse_data) def entity_data_positions(domain, eid): def f(domains, reln): for pos0 in xrange(reln.shape[0]): for pos1 in xrange(reln.shape[1]): if reln.mask[pos0, pos1]: continue if (domains[0] == domain and pos0 == eid) or \ (domains[1] == domain and pos1 == eid): yield [pos0, pos1] return list( it.chain.from_iterable( f(domains, reln) for (domains, _), reln in zip(relations, raw_data))) def test(s): for did, nentities in enumerate(domains): for eid in xrange(nentities): a = entity_data_positions(did, eid) b = s.entity_data_positions(did, eid, data) assert sorted(a) == sorted(b) test(s) test(s1)
def test_slice_theta_irm(): N = 10 defn = model_definition([N], [((0, 0), bbnc)]) data = np.random.random(size=(N, N)) < 0.8 view = numpy_dataview(data) r = rng() prior = {'alpha': 1.0, 'beta': 9.0} s = initialize( defn, [view], r=r, cluster_hps=[{'alpha': 2.0}], relation_hps=[prior], domain_assignments=[[0] * N]) bs = bind(s, 0, [view]) params = {0: {'p': 0.05}} heads = len([1 for y in data.flatten() if y]) tails = len([1 for y in data.flatten() if not y]) alpha1 = prior['alpha'] + heads beta1 = prior['beta'] + tails def sample_fn(): theta(bs, r, tparams=params) return s.get_suffstats(0, [0, 0])['p'] rv = beta(alpha1, beta1) assert_1d_cont_dist_approx_sps(sample_fn, rv, nsamples=50000)
def test_state_pickle(): defn = model_definition([5], [((0, 0), bb)]) r = rng() relations = toy_dataset(defn) views = map(numpy_dataview, relations) s1 = model.initialize(defn, views, r) s2 = pickle.loads(pickle.dumps(s1)) _assert_structure_equals(defn, s1, s2, views, r)
def test_runner_default_kernel_config_grid(): defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)]) def kc_fn(defn): return list(it.chain( runner.default_assign_kernel_config(defn), runner.default_relation_hp_kernel_config(defn))) _test_runner_simple(defn, kc_fn)
def test_model_definition_pickle(): defn = model_definition([10, 12], [((0, 0), bb), ((1, 0), niw(3))]) bstr = pickle.dumps(defn) defn1 = pickle.loads(bstr) assert_list_equal(defn.domains(), defn1.domains()) assert_equals(defn.relations(), defn1.relations()) zipped_models = zip(defn.relation_models(), defn1.relation_models()) for model, model1 in zipped_models: assert_equals(model.name(), model1.name())
def test_runner_default_kernel_config_grid(): defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)]) def kc_fn(defn): return list( it.chain(runner.default_assign_kernel_config(defn), runner.default_relation_hp_kernel_config(defn))) _test_runner_simple(defn, kc_fn)
def test_simple(): domains = [5, 6] relations = [((0, 1), bb)] relsize = (domains[0], domains[1]) raw_data = [ ma.array(np.random.choice([False, True], size=relsize), mask=np.random.choice([False, True], size=relsize)) ] def csr(raw): n, m = raw.shape def indices(): for i, j in it.product(range(n), range(m)): if not raw.mask[i, j]: yield i, j data = [raw[i, j] for i, j in indices()] i = list(map(op.itemgetter(0), indices())) j = list(map(op.itemgetter(1), indices())) return coo_matrix((data, (i, j)), shape=raw.shape).tocsr() defn = model_definition(domains, relations) data = map(numpy_dataview, raw_data) sparse_data = map(sparse_2d_dataview, map(csr, raw_data)) r = rng() s = initialize(defn, data, r=r) assert s and bind(s, 0, data) and bind(s, 1, data) s1 = initialize(defn, sparse_data, r=r) assert s1 and bind(s1, 0, sparse_data) and bind(s1, 1, sparse_data) def entity_data_positions(domain, eid): def f(domains, reln): for pos0 in xrange(reln.shape[0]): for pos1 in xrange(reln.shape[1]): if reln.mask[pos0, pos1]: continue if (domains[0] == domain and pos0 == eid) or (domains[1] == domain and pos1 == eid): yield [pos0, pos1] return list(it.chain.from_iterable(f(domains, reln) for (domains, _), reln in zip(relations, raw_data))) def test(s): for did, nentities in enumerate(domains): for eid in xrange(nentities): a = entity_data_positions(did, eid) b = s.entity_data_positions(did, eid, data) assert sorted(a) == sorted(b) test(s) test(s1)
def test_runner_multiprocessing(): defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)]) views = map(numpy_dataview, toy_dataset(defn)) kc = runner.default_kernel_config(defn) prng = rng() latents = [model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count())] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) # check it is restartable r.run(r=prng, niters=10) r.run(r=prng, niters=10)
def test_state_copy(): defn = model_definition([5], [((0, 0), bb)]) r = rng() relations = toy_dataset(defn) views = map(numpy_dataview, relations) s1 = model.initialize(defn, views, r) s2 = copy.copy(s1) assert_is_not(s1, s2) _assert_structure_equals(defn, s1, s2, views, r) s2 = copy.deepcopy(s1) assert_is_not(s1, s2) _assert_structure_equals(defn, s1, s2, views, r)
def test_runner_multiprocessing(): defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)]) views = map(numpy_dataview, toy_dataset(defn)) kc = runner.default_kernel_config(defn) prng = rng() latents = [ model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count()) ] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) # check it is restartable r.run(r=prng, niters=10) r.run(r=prng, niters=10)
def latent(groups, entities_per_group, features, r): N = groups * entities_per_group defn = model_definition([N], [((0, 0), bb)] * features) # generate fake data views = [] for i in xrange(features): Y = np.random.random(size=(N, N)) <= 0.5 view = numpy_dataview(Y) views.append(view) # assign entities to their respective groups assignment = [[g] * entities_per_group for g in xrange(groups)] assignment = list(it.chain.from_iterable(assignment)) latent = bind( initialize(defn, views, r, domain_assignments=[assignment]), 0, views) latent.create_group(r) # perftest() doesnt modify group assignments return latent
def latent(groups, entities_per_group, features, r): N = groups * entities_per_group defn = model_definition([N], [((0, 0), bb)] * features) # generate fake data views = [] for i in xrange(features): Y = np.random.random(size=(N, N)) <= 0.5 view = numpy_dataview(Y) views.append(view) # assign entities to their respective groups assignment = [[g] * entities_per_group for g in xrange(groups)] assignment = list(it.chain.from_iterable(assignment)) latent = bind(initialize(defn, views, r, domain_assignments=[assignment]), 0, views) latent.create_group(r) # perftest() doesnt modify group assignments return latent
def test_runner_default_kernel_config_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latent = model.initialize(defn, views, prng) r = runner.runner(defn, views, latent, [('assign', range(len(domains)))]) r.run(r=prng, niters=1000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_fn(): r.run(r=prng, niters=10) new_latent = r.get_latent() key = tuple(tuple(permutation_canonical(new_latent.assignments(i))) for i in xrange(len(domains))) return idmap[key] assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
def test_runner_default_kernel_config_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latent = model.initialize(defn, views, prng) r = runner.runner(defn, views, latent, [('assign', range(len(domains)))]) r.run(r=prng, niters=1000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_fn(): r.run(r=prng, niters=10) new_latent = r.get_latent() key = tuple( tuple(permutation_canonical(new_latent.assignments(i))) for i in xrange(len(domains))) return idmap[key] assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
def test_runner_multiprocessing_convergence(): domains = [4] defn = model_definition(domains, [((0, 0), bb)]) prng = rng() relations, posterior = data_with_posterior(defn, prng) views = map(numpy_dataview, relations) latents = [ model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count()) ] kc = [('assign', range(len(domains)))] runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) r.run(r=prng, niters=10000) # burnin product_assignments = tuple(map(list, map(permutation_iter, domains))) idmap = {C: i for i, C in enumerate(it.product(*product_assignments))} def sample_iter(): r.run(r=prng, niters=10) for latent in r.get_latents(): key = tuple( tuple(permutation_canonical(latent.assignments(i))) for i in xrange(len(domains))) yield idmap[key] ref = [None] def sample_fn(): if ref[0] is None: ref[0] = sample_iter() try: return next(ref[0]) except StopIteration: ref[0] = None return sample_fn() assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
def test_runner_default_kernel_config_nonconj(): defn = model_definition([10, 10], [((0, 0), bbnc), ((0, 1), nich)]) kc_fn = runner.default_kernel_config _test_runner_simple(defn, kc_fn)
# 2. load the data # 3. initialize the model # 4. define the runners (MCMC chains) # 5. run the runners # In[5]: from microscopes.common.rng import rng from microscopes.common.relation.dataview import numpy_dataview from microscopes.models import bb as beta_bernoulli from microscopes.irm.definition import model_definition from microscopes.irm import model, runner, query from microscopes.kernels import parallel from microscopes.common.query import groups, zmatrix_heuristic_block_ordering, zmatrix_reorder defn = model_definition([N], [((0, 0), beta_bernoulli)]) views = [numpy_dataview(communications_relation)] prng = rng() nchains = 1 latents = [model.initialize(defn, views, r=prng, cluster_hps=[{'alpha':1}]) for _ in xrange(nchains)] kc = runner.default_assign_kernel_config(defn) print kc r = runner.runner(defn, views, latents[0], kc) # ##From here, we can finally run each chain of the sampler 1000 times # In[ ]: start = time.time()
def infinite_relational_model(corr_matrix, lag_matrix, threshold, sampled_coords, window_size): import numpy as np import math import json import time import itertools as it from multiprocessing import cpu_count from microscopes.common.rng import rng from microscopes.common.relation.dataview import numpy_dataview from microscopes.models import bb as beta_bernoulli from microscopes.irm.definition import model_definition from microscopes.irm import model, runner, query from microscopes.kernels import parallel from microscopes.common.query import groups, zmatrix_heuristic_block_ordering, zmatrix_reorder cluster_matrix = [] graph = [] # calculate graph for row in corr_matrix: graph_row = [] for corr in row: if corr < threshold: graph_row.append(False) else: graph_row.append(True) graph.append(graph_row) graph = np.array(graph, dtype=np.bool) graph_size = len(graph) # conduct Infinite Relational Model defn = model_definition([graph_size], [((0, 0), beta_bernoulli)]) views = [numpy_dataview(graph)] prng = rng() nchains = cpu_count() latents = [model.initialize(defn, views, r=prng, cluster_hps=[{'alpha':1e-3}]) for _ in xrange(nchains)] kc = runner.default_assign_kernel_config(defn) runners = [runner.runner(defn, views, latent, kc) for latent in latents] r = parallel.runner(runners) start = time.time() # r.run(r=prng, niters=1000) # r.run(r=prng, niters=100) r.run(r=prng, niters=20) print ("inference took", time.time() - start, "seconds") infers = r.get_latents() clusters = groups(infers[0].assignments(0), sort=True) ordering = list(it.chain.from_iterable(clusters)) z = graph.copy() z = z[ordering] z = z[:,ordering] corr_matrix = corr_matrix[ordering] corr_matrix = corr_matrix[:,ordering] lag_matrix = lag_matrix[ordering] lag_matrix = lag_matrix[:,ordering] cluster_sampled_coords = np.array(sampled_coords) cluster_sampled_coords = cluster_sampled_coords[ordering] response_msg = { 'corrMatrix': corr_matrix.tolist(), 'lagMatrix': lag_matrix.tolist(), 'clusterMatrix': z.tolist(), 'clusterSampledCoords': cluster_sampled_coords.tolist(), 'nClusterList': [len(cluster) for cluster in clusters], 'ordering': ordering, } f = open("./expdata/clustermatrix-" + str(window_size) + ".json", "w") json.dump(response_msg, f) f.close() return response_msg
# In[5]: from microscopes.common.rng import rng from microscopes.common.relation.dataview import numpy_dataview from microscopes.models import bb as beta_bernoulli from microscopes.irm.definition import model_definition from microscopes.irm import model, runner, query from microscopes.kernels import parallel from microscopes.common.query import groups, zmatrix_heuristic_block_ordering, zmatrix_reorder # ##Let's start by defining the model and loading the data # In[6]: defn = model_definition([N], [((0, 0), beta_bernoulli)]) views = [numpy_dataview(communications_relation)] prng = rng() # ##Next, let's initialize the model and define the runners. # # ##These runners are our MCMC chains. We'll use `cpu_count` to define our number of chains. # In[ ]: nchains = cpu_count() latents = [ model.initialize(defn, views, r=prng, cluster_hps=[{ 'alpha': 1e-3 }]) for _ in xrange(nchains) ]