def test_simple(): domains = [5, 6] relations = [((0, 1), bb)] relsize = (domains[0], domains[1]) raw_data = [ ma.array(np.random.choice([False, True], size=relsize), mask=np.random.choice([False, True], size=relsize)) ] def csr(raw): n, m = raw.shape def indices(): for i, j in it.product(range(n), range(m)): if not raw.mask[i, j]: yield i, j data = [raw[i, j] for i, j in indices()] i = list(map(op.itemgetter(0), indices())) j = list(map(op.itemgetter(1), indices())) return coo_matrix((data, (i, j)), shape=raw.shape).tocsr() defn = model_definition(domains, relations) data = map(numpy_dataview, raw_data) sparse_data = map(sparse_2d_dataview, map(csr, raw_data)) r = rng() s = initialize(defn, data, r=r) assert s and bind(s, 0, data) and bind(s, 1, data) s1 = initialize(defn, sparse_data, r=r) assert s1 and bind(s1, 0, sparse_data) and bind(s1, 1, sparse_data) def entity_data_positions(domain, eid): def f(domains, reln): for pos0 in xrange(reln.shape[0]): for pos1 in xrange(reln.shape[1]): if reln.mask[pos0, pos1]: continue if (domains[0] == domain and pos0 == eid) or \ (domains[1] == domain and pos1 == eid): yield [pos0, pos1] return list( it.chain.from_iterable( f(domains, reln) for (domains, _), reln in zip(relations, raw_data))) def test(s): for did, nentities in enumerate(domains): for eid in xrange(nentities): a = entity_data_positions(did, eid) b = s.entity_data_positions(did, eid, data) assert sorted(a) == sorted(b) test(s) test(s1)
def test_simple(): domains = [5, 6] relations = [((0, 1), bb)] relsize = (domains[0], domains[1]) raw_data = [ ma.array(np.random.choice([False, True], size=relsize), mask=np.random.choice([False, True], size=relsize)) ] def csr(raw): n, m = raw.shape def indices(): for i, j in it.product(range(n), range(m)): if not raw.mask[i, j]: yield i, j data = [raw[i, j] for i, j in indices()] i = list(map(op.itemgetter(0), indices())) j = list(map(op.itemgetter(1), indices())) return coo_matrix((data, (i, j)), shape=raw.shape).tocsr() defn = model_definition(domains, relations) data = map(numpy_dataview, raw_data) sparse_data = map(sparse_2d_dataview, map(csr, raw_data)) r = rng() s = initialize(defn, data, r=r) assert s and bind(s, 0, data) and bind(s, 1, data) s1 = initialize(defn, sparse_data, r=r) assert s1 and bind(s1, 0, sparse_data) and bind(s1, 1, sparse_data) def entity_data_positions(domain, eid): def f(domains, reln): for pos0 in xrange(reln.shape[0]): for pos1 in xrange(reln.shape[1]): if reln.mask[pos0, pos1]: continue if (domains[0] == domain and pos0 == eid) or (domains[1] == domain and pos1 == eid): yield [pos0, pos1] return list(it.chain.from_iterable(f(domains, reln) for (domains, _), reln in zip(relations, raw_data))) def test(s): for did, nentities in enumerate(domains): for eid in xrange(nentities): a = entity_data_positions(did, eid) b = s.entity_data_positions(did, eid, data) assert sorted(a) == sorted(b) test(s) test(s1)
def _assert_structure_equals(defn, s1, s2, views, r): assert_equals(s1.ndomains(), s2.ndomains()) assert_equals(s1.nrelations(), s2.nrelations()) for did in xrange(s1.ndomains()): assert_equals(s1.nentities(did), s2.nentities(did)) assert_equals(s1.ngroups(did), s2.ngroups(did)) assert_equals(s1.assignments(did), s2.assignments(did)) assert_equals(set(s1.groups(did)), set(s2.groups(did))) assert_close(s1.get_domain_hp(did), s2.get_domain_hp(did)) assert_almost_equals(s1.score_assignment(did), s2.score_assignment(did)) for rid in xrange(s1.nrelations()): assert_close(s1.get_relation_hp(rid), s2.get_relation_hp(rid)) dids = defn.relations()[rid] groups = [s1.groups(did) for did in dids] for gids in it.product(*groups): ss1 = s1.get_suffstats(rid, gids) ss2 = s2.get_suffstats(rid, gids) if ss1 is None: assert_is_none(ss2) else: assert_close(ss1, ss2) assert_almost_equals(s1.score_likelihood(r), s2.score_likelihood(r)) before = list(s1.assignments(0)) bound = model.bind(s1, 0, views) gid = bound.remove_value(0, r) assert_equals(s1.assignments(0)[0], -1) assert_equals(before, s2.assignments(0)) bound.add_value(gid, 0, r) # restore
def run(self, r, niters=10000): """Run the specified mixturemodel kernel for `niters`, in a single thread. Parameters ---------- r : random state niters : int """ validator.validate_type(r, rng, param_name='r') validator.validate_positive(niters, param_name='niters') inds = xrange(len(self._defn.domains())) models = [bind(self._latent, i, self._views) for i in inds] for _ in xrange(niters): for name, config in self._kernel_config: if name == 'assign': for idx in config.keys(): gibbs.assign(models[idx], r) elif name == 'assign_resample': for idx, v in config.iteritems(): gibbs.assign_resample(models[idx], v['m'], r) elif name == 'slice_cluster_hp': for idx, v in config.iteritems(): slice.hp(models[idx], r, cparam=v['cparam']) elif name == 'grid_relation_hp': gibbs.hp(models[0], config, r) elif name == 'slice_relation_hp': slice.hp(models[0], r, hparams=config['hparams']) elif name == 'theta': slice.theta(models[0], r, tparams=config['tparams']) else: assert False, "should not be reached"
def test_slice_theta_irm(): N = 10 defn = model_definition([N], [((0, 0), bbnc)]) data = np.random.random(size=(N, N)) < 0.8 view = numpy_dataview(data) r = rng() prior = {'alpha': 1.0, 'beta': 9.0} s = initialize( defn, [view], r=r, cluster_hps=[{'alpha': 2.0}], relation_hps=[prior], domain_assignments=[[0] * N]) bs = bind(s, 0, [view]) params = {0: {'p': 0.05}} heads = len([1 for y in data.flatten() if y]) tails = len([1 for y in data.flatten() if not y]) alpha1 = prior['alpha'] + heads beta1 = prior['beta'] + tails def sample_fn(): theta(bs, r, tparams=params) return s.get_suffstats(0, [0, 0])['p'] rv = beta(alpha1, beta1) assert_1d_cont_dist_approx_sps(sample_fn, rv, nsamples=50000)
def latent(groups, entities_per_group, features, r): N = groups * entities_per_group defn = model_definition([N], [((0, 0), bb)] * features) # generate fake data views = [] for i in xrange(features): Y = np.random.random(size=(N, N)) <= 0.5 view = numpy_dataview(Y) views.append(view) # assign entities to their respective groups assignment = [[g] * entities_per_group for g in xrange(groups)] assignment = list(it.chain.from_iterable(assignment)) latent = bind(initialize(defn, views, r, domain_assignments=[assignment]), 0, views) latent.create_group(r) # perftest() doesnt modify group assignments return latent
def latent(groups, entities_per_group, features, r): N = groups * entities_per_group defn = model_definition([N], [((0, 0), bb)] * features) # generate fake data views = [] for i in xrange(features): Y = np.random.random(size=(N, N)) <= 0.5 view = numpy_dataview(Y) views.append(view) # assign entities to their respective groups assignment = [[g] * entities_per_group for g in xrange(groups)] assignment = list(it.chain.from_iterable(assignment)) latent = bind( initialize(defn, views, r, domain_assignments=[assignment]), 0, views) latent.create_group(r) # perftest() doesnt modify group assignments return latent