コード例 #1
0
ファイル: test_runner.py プロジェクト: jzf2101/irm
def test_runner_multiprocessing_convergence():
    domains = [4]
    defn = model_definition(domains, [((0, 0), bb)])
    prng = rng()
    relations, posterior = data_with_posterior(defn, prng)
    views = map(numpy_dataview, relations)
    latents = [model.initialize(defn, views, prng)
               for _ in xrange(mp.cpu_count())]
    kc = [('assign', range(len(domains)))]
    runners = [runner.runner(defn, views, latent, kc) for latent in latents]
    r = parallel.runner(runners)
    r.run(r=prng, niters=10000)  # burnin
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}

    def sample_iter():
        r.run(r=prng, niters=10)
        for latent in r.get_latents():
            key = tuple(tuple(permutation_canonical(latent.assignments(i)))
                        for i in xrange(len(domains)))
            yield idmap[key]

    ref = [None]

    def sample_fn():
        if ref[0] is None:
            ref[0] = sample_iter()
        try:
            return next(ref[0])
        except StopIteration:
            ref[0] = None
        return sample_fn()

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
コード例 #2
0
def test_simple():
    domains = [5, 6]

    relations = [((0, 1), bb)]

    relsize = (domains[0], domains[1])
    raw_data = [
        ma.array(np.random.choice([False, True], size=relsize),
                 mask=np.random.choice([False, True], size=relsize))
    ]

    def csr(raw):
        n, m = raw.shape

        def indices():
            for i, j in it.product(range(n), range(m)):
                if not raw.mask[i, j]:
                    yield i, j

        data = [raw[i, j] for i, j in indices()]
        i = list(map(op.itemgetter(0), indices()))
        j = list(map(op.itemgetter(1), indices()))
        return coo_matrix((data, (i, j)), shape=raw.shape).tocsr()

    defn = model_definition(domains, relations)
    data = map(numpy_dataview, raw_data)
    sparse_data = map(sparse_2d_dataview, map(csr, raw_data))

    r = rng()

    s = initialize(defn, data, r=r)
    assert s and bind(s, 0, data) and bind(s, 1, data)

    s1 = initialize(defn, sparse_data, r=r)
    assert s1 and bind(s1, 0, sparse_data) and bind(s1, 1, sparse_data)

    def entity_data_positions(domain, eid):
        def f(domains, reln):
            for pos0 in xrange(reln.shape[0]):
                for pos1 in xrange(reln.shape[1]):
                    if reln.mask[pos0, pos1]:
                        continue
                    if (domains[0] == domain and pos0 == eid) or \
                       (domains[1] == domain and pos1 == eid):
                        yield [pos0, pos1]

        return list(
            it.chain.from_iterable(
                f(domains, reln)
                for (domains, _), reln in zip(relations, raw_data)))

    def test(s):
        for did, nentities in enumerate(domains):
            for eid in xrange(nentities):
                a = entity_data_positions(did, eid)
                b = s.entity_data_positions(did, eid, data)
                assert sorted(a) == sorted(b)

    test(s)
    test(s1)
コード例 #3
0
def test_slice_theta_irm():
    N = 10
    defn = model_definition([N], [((0, 0), bbnc)])
    data = np.random.random(size=(N, N)) < 0.8
    view = numpy_dataview(data)
    r = rng()
    prior = {'alpha': 1.0, 'beta': 9.0}

    s = initialize(
        defn,
        [view],
        r=r,
        cluster_hps=[{'alpha': 2.0}],
        relation_hps=[prior],
        domain_assignments=[[0] * N])

    bs = bind(s, 0, [view])

    params = {0: {'p': 0.05}}

    heads = len([1 for y in data.flatten() if y])
    tails = len([1 for y in data.flatten() if not y])

    alpha1 = prior['alpha'] + heads
    beta1 = prior['beta'] + tails

    def sample_fn():
        theta(bs, r, tparams=params)
        return s.get_suffstats(0, [0, 0])['p']

    rv = beta(alpha1, beta1)
    assert_1d_cont_dist_approx_sps(sample_fn, rv, nsamples=50000)
コード例 #4
0
ファイル: test_state.py プロジェクト: tatabox2000/irm
def test_state_pickle():
    defn = model_definition([5], [((0, 0), bb)])
    r = rng()
    relations = toy_dataset(defn)
    views = map(numpy_dataview, relations)
    s1 = model.initialize(defn, views, r)
    s2 = pickle.loads(pickle.dumps(s1))
    _assert_structure_equals(defn, s1, s2, views, r)
コード例 #5
0
ファイル: test_state.py プロジェクト: gitter-badger/irm
def test_state_pickle():
    defn = model_definition([5], [((0, 0), bb)])
    r = rng()
    relations = toy_dataset(defn)
    views = map(numpy_dataview, relations)
    s1 = model.initialize(defn, views, r)
    s2 = pickle.loads(pickle.dumps(s1))
    _assert_structure_equals(defn, s1, s2, views, r)
コード例 #6
0
ファイル: test_runner.py プロジェクト: jzf2101/irm
def test_runner_default_kernel_config_grid():
    defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)])

    def kc_fn(defn):
        return list(it.chain(
            runner.default_assign_kernel_config(defn),
            runner.default_relation_hp_kernel_config(defn)))
    _test_runner_simple(defn, kc_fn)
コード例 #7
0
ファイル: test_definition.py プロジェクト: tatabox2000/irm
def test_model_definition_pickle():
    defn = model_definition([10, 12], [((0, 0), bb), ((1, 0), niw(3))])
    bstr = pickle.dumps(defn)
    defn1 = pickle.loads(bstr)
    assert_list_equal(defn.domains(), defn1.domains())
    assert_equals(defn.relations(), defn1.relations())
    zipped_models = zip(defn.relation_models(), defn1.relation_models())
    for model, model1 in zipped_models:
        assert_equals(model.name(), model1.name())
コード例 #8
0
def test_runner_default_kernel_config_grid():
    defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)])

    def kc_fn(defn):
        return list(
            it.chain(runner.default_assign_kernel_config(defn),
                     runner.default_relation_hp_kernel_config(defn)))

    _test_runner_simple(defn, kc_fn)
コード例 #9
0
ファイル: test_irm_simple.py プロジェクト: gitter-badger/irm
def test_simple():
    domains = [5, 6]

    relations = [((0, 1), bb)]

    relsize = (domains[0], domains[1])
    raw_data = [
        ma.array(np.random.choice([False, True], size=relsize), mask=np.random.choice([False, True], size=relsize))
    ]

    def csr(raw):
        n, m = raw.shape

        def indices():
            for i, j in it.product(range(n), range(m)):
                if not raw.mask[i, j]:
                    yield i, j

        data = [raw[i, j] for i, j in indices()]
        i = list(map(op.itemgetter(0), indices()))
        j = list(map(op.itemgetter(1), indices()))
        return coo_matrix((data, (i, j)), shape=raw.shape).tocsr()

    defn = model_definition(domains, relations)
    data = map(numpy_dataview, raw_data)
    sparse_data = map(sparse_2d_dataview, map(csr, raw_data))

    r = rng()

    s = initialize(defn, data, r=r)
    assert s and bind(s, 0, data) and bind(s, 1, data)

    s1 = initialize(defn, sparse_data, r=r)
    assert s1 and bind(s1, 0, sparse_data) and bind(s1, 1, sparse_data)

    def entity_data_positions(domain, eid):
        def f(domains, reln):
            for pos0 in xrange(reln.shape[0]):
                for pos1 in xrange(reln.shape[1]):
                    if reln.mask[pos0, pos1]:
                        continue
                    if (domains[0] == domain and pos0 == eid) or (domains[1] == domain and pos1 == eid):
                        yield [pos0, pos1]

        return list(it.chain.from_iterable(f(domains, reln) for (domains, _), reln in zip(relations, raw_data)))

    def test(s):
        for did, nentities in enumerate(domains):
            for eid in xrange(nentities):
                a = entity_data_positions(did, eid)
                b = s.entity_data_positions(did, eid, data)
                assert sorted(a) == sorted(b)

    test(s)
    test(s1)
コード例 #10
0
ファイル: test_runner.py プロジェクト: jzf2101/irm
def test_runner_multiprocessing():
    defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)])
    views = map(numpy_dataview, toy_dataset(defn))
    kc = runner.default_kernel_config(defn)
    prng = rng()
    latents = [model.initialize(defn, views, prng)
               for _ in xrange(mp.cpu_count())]
    runners = [runner.runner(defn, views, latent, kc) for latent in latents]
    r = parallel.runner(runners)
    # check it is restartable
    r.run(r=prng, niters=10)
    r.run(r=prng, niters=10)
コード例 #11
0
ファイル: test_state.py プロジェクト: tatabox2000/irm
def test_state_copy():
    defn = model_definition([5], [((0, 0), bb)])
    r = rng()
    relations = toy_dataset(defn)
    views = map(numpy_dataview, relations)
    s1 = model.initialize(defn, views, r)
    s2 = copy.copy(s1)
    assert_is_not(s1, s2)
    _assert_structure_equals(defn, s1, s2, views, r)

    s2 = copy.deepcopy(s1)
    assert_is_not(s1, s2)
    _assert_structure_equals(defn, s1, s2, views, r)
コード例 #12
0
def test_runner_multiprocessing():
    defn = model_definition([10, 10], [((0, 0), bb), ((0, 1), nich)])
    views = map(numpy_dataview, toy_dataset(defn))
    kc = runner.default_kernel_config(defn)
    prng = rng()
    latents = [
        model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count())
    ]
    runners = [runner.runner(defn, views, latent, kc) for latent in latents]
    r = parallel.runner(runners)
    # check it is restartable
    r.run(r=prng, niters=10)
    r.run(r=prng, niters=10)
コード例 #13
0
ファイル: test_state.py プロジェクト: gitter-badger/irm
def test_state_copy():
    defn = model_definition([5], [((0, 0), bb)])
    r = rng()
    relations = toy_dataset(defn)
    views = map(numpy_dataview, relations)
    s1 = model.initialize(defn, views, r)
    s2 = copy.copy(s1)
    assert_is_not(s1, s2)
    _assert_structure_equals(defn, s1, s2, views, r)

    s2 = copy.deepcopy(s1)
    assert_is_not(s1, s2)
    _assert_structure_equals(defn, s1, s2, views, r)
コード例 #14
0
ファイル: irm.py プロジェクト: datamicroscopes/kernels
def latent(groups, entities_per_group, features, r):
    N = groups * entities_per_group
    defn = model_definition([N], [((0, 0), bb)] * features)

    # generate fake data
    views = []
    for i in xrange(features):
        Y = np.random.random(size=(N, N)) <= 0.5
        view = numpy_dataview(Y)
        views.append(view)

    # assign entities to their respective groups
    assignment = [[g] * entities_per_group for g in xrange(groups)]
    assignment = list(it.chain.from_iterable(assignment))

    latent = bind(
        initialize(defn, views, r, domain_assignments=[assignment]), 0, views)
    latent.create_group(r)  # perftest() doesnt modify group assignments

    return latent
コード例 #15
0
ファイル: irm.py プロジェクト: pschulam/kernels
def latent(groups, entities_per_group, features, r):
    N = groups * entities_per_group
    defn = model_definition([N], [((0, 0), bb)] * features)

    # generate fake data
    views = []
    for i in xrange(features):
        Y = np.random.random(size=(N, N)) <= 0.5
        view = numpy_dataview(Y)
        views.append(view)

    # assign entities to their respective groups
    assignment = [[g] * entities_per_group for g in xrange(groups)]
    assignment = list(it.chain.from_iterable(assignment))

    latent = bind(initialize(defn, views, r, domain_assignments=[assignment]),
                  0, views)
    latent.create_group(r)  # perftest() doesnt modify group assignments

    return latent
コード例 #16
0
ファイル: test_runner.py プロジェクト: jzf2101/irm
def test_runner_default_kernel_config_convergence():
    domains = [4]
    defn = model_definition(domains, [((0, 0), bb)])
    prng = rng()
    relations, posterior = data_with_posterior(defn, prng)
    views = map(numpy_dataview, relations)
    latent = model.initialize(defn, views, prng)
    r = runner.runner(defn, views, latent, [('assign', range(len(domains)))])

    r.run(r=prng, niters=1000)  # burnin
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}

    def sample_fn():
        r.run(r=prng, niters=10)
        new_latent = r.get_latent()
        key = tuple(tuple(permutation_canonical(new_latent.assignments(i)))
                    for i in xrange(len(domains)))
        return idmap[key]

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
コード例 #17
0
def test_runner_default_kernel_config_convergence():
    domains = [4]
    defn = model_definition(domains, [((0, 0), bb)])
    prng = rng()
    relations, posterior = data_with_posterior(defn, prng)
    views = map(numpy_dataview, relations)
    latent = model.initialize(defn, views, prng)
    r = runner.runner(defn, views, latent, [('assign', range(len(domains)))])

    r.run(r=prng, niters=1000)  # burnin
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}

    def sample_fn():
        r.run(r=prng, niters=10)
        new_latent = r.get_latent()
        key = tuple(
            tuple(permutation_canonical(new_latent.assignments(i)))
            for i in xrange(len(domains)))
        return idmap[key]

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
コード例 #18
0
def test_runner_multiprocessing_convergence():
    domains = [4]
    defn = model_definition(domains, [((0, 0), bb)])
    prng = rng()
    relations, posterior = data_with_posterior(defn, prng)
    views = map(numpy_dataview, relations)
    latents = [
        model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count())
    ]
    kc = [('assign', range(len(domains)))]
    runners = [runner.runner(defn, views, latent, kc) for latent in latents]
    r = parallel.runner(runners)
    r.run(r=prng, niters=10000)  # burnin
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}

    def sample_iter():
        r.run(r=prng, niters=10)
        for latent in r.get_latents():
            key = tuple(
                tuple(permutation_canonical(latent.assignments(i)))
                for i in xrange(len(domains)))
            yield idmap[key]

    ref = [None]

    def sample_fn():
        if ref[0] is None:
            ref[0] = sample_iter()
        try:
            return next(ref[0])
        except StopIteration:
            ref[0] = None
        return sample_fn()

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
コード例 #19
0
ファイル: test_runner.py プロジェクト: jzf2101/irm
def test_runner_default_kernel_config_nonconj():
    defn = model_definition([10, 10], [((0, 0), bbnc), ((0, 1), nich)])
    kc_fn = runner.default_kernel_config
    _test_runner_simple(defn, kc_fn)
コード例 #20
0
ファイル: enron_test.py プロジェクト: anukat2015/release
# 2. load the data
# 3. initialize the model
# 4. define the runners (MCMC chains)
# 5. run the runners

# In[5]:

from microscopes.common.rng import rng
from microscopes.common.relation.dataview import numpy_dataview
from microscopes.models import bb as beta_bernoulli
from microscopes.irm.definition import model_definition
from microscopes.irm import model, runner, query
from microscopes.kernels import parallel
from microscopes.common.query import groups, zmatrix_heuristic_block_ordering, zmatrix_reorder

defn = model_definition([N], [((0, 0), beta_bernoulli)])
views = [numpy_dataview(communications_relation)]
prng = rng()

nchains = 1
latents = [model.initialize(defn, views, r=prng, cluster_hps=[{'alpha':1}]) for _ in xrange(nchains)]
kc = runner.default_assign_kernel_config(defn)
print kc
r = runner.runner(defn, views, latents[0], kc)


# ##From here, we can finally run each chain of the sampler 1000 times

# In[ ]:

start = time.time()
コード例 #21
0
def infinite_relational_model(corr_matrix, lag_matrix, threshold, sampled_coords, window_size):
    import numpy as np
    import math
    import json
    import time
    import itertools as it
    from multiprocessing import cpu_count
    from microscopes.common.rng import rng
    from microscopes.common.relation.dataview import numpy_dataview
    from microscopes.models import bb as beta_bernoulli
    from microscopes.irm.definition import model_definition
    from microscopes.irm import model, runner, query
    from microscopes.kernels import parallel
    from microscopes.common.query import groups, zmatrix_heuristic_block_ordering, zmatrix_reorder

    cluster_matrix = []
    graph = []

    # calculate graph
    for row in corr_matrix:
        graph_row = []
        for corr in row:
            if corr < threshold:
                graph_row.append(False)
            else:
                graph_row.append(True)

        graph.append(graph_row)

    graph = np.array(graph, dtype=np.bool)

    graph_size = len(graph)

    # conduct Infinite Relational Model
    defn = model_definition([graph_size], [((0, 0), beta_bernoulli)])
    views = [numpy_dataview(graph)]
    prng = rng()

    nchains = cpu_count()
    latents = [model.initialize(defn, views, r=prng, cluster_hps=[{'alpha':1e-3}]) for _ in xrange(nchains)]
    kc = runner.default_assign_kernel_config(defn)
    runners = [runner.runner(defn, views, latent, kc) for latent in latents]
    r = parallel.runner(runners)

    start = time.time()
    # r.run(r=prng, niters=1000)
    # r.run(r=prng, niters=100)
    r.run(r=prng, niters=20)
    print ("inference took", time.time() - start, "seconds")

    infers = r.get_latents()
    clusters = groups(infers[0].assignments(0), sort=True)
    ordering = list(it.chain.from_iterable(clusters))

    z = graph.copy()
    z = z[ordering]
    z = z[:,ordering]

    corr_matrix = corr_matrix[ordering]
    corr_matrix = corr_matrix[:,ordering]

    lag_matrix = lag_matrix[ordering]
    lag_matrix = lag_matrix[:,ordering]

    cluster_sampled_coords = np.array(sampled_coords)
    cluster_sampled_coords = cluster_sampled_coords[ordering]

    response_msg = {
        'corrMatrix': corr_matrix.tolist(),
        'lagMatrix': lag_matrix.tolist(),
        'clusterMatrix': z.tolist(),
        'clusterSampledCoords': cluster_sampled_coords.tolist(),
        'nClusterList': [len(cluster) for cluster in clusters],
        'ordering': ordering,
    }
    f = open("./expdata/clustermatrix-" + str(window_size) + ".json", "w")
    json.dump(response_msg, f)
    f.close()

    return response_msg
コード例 #22
0
ファイル: enron-email.py プロジェクト: kimmyjin/release
# In[5]:

from microscopes.common.rng import rng
from microscopes.common.relation.dataview import numpy_dataview
from microscopes.models import bb as beta_bernoulli
from microscopes.irm.definition import model_definition
from microscopes.irm import model, runner, query
from microscopes.kernels import parallel
from microscopes.common.query import groups, zmatrix_heuristic_block_ordering, zmatrix_reorder

# ##Let's start by defining the model and loading the data

# In[6]:

defn = model_definition([N], [((0, 0), beta_bernoulli)])
views = [numpy_dataview(communications_relation)]
prng = rng()

# ##Next, let's initialize the model and define the runners.
#
# ##These runners are our MCMC chains. We'll use `cpu_count` to define our number of chains.

# In[ ]:

nchains = cpu_count()
latents = [
    model.initialize(defn, views, r=prng, cluster_hps=[{
        'alpha': 1e-3
    }]) for _ in xrange(nchains)
]
コード例 #23
0
def test_runner_default_kernel_config_nonconj():
    defn = model_definition([10, 10], [((0, 0), bbnc), ((0, 1), nich)])
    kc_fn = runner.default_kernel_config
    _test_runner_simple(defn, kc_fn)