예제 #1
0
def test_simple():
    domains = [5, 6]

    relations = [((0, 1), bb)]

    relsize = (domains[0], domains[1])
    raw_data = [
        ma.array(np.random.choice([False, True], size=relsize),
                 mask=np.random.choice([False, True], size=relsize))
    ]

    def csr(raw):
        n, m = raw.shape

        def indices():
            for i, j in it.product(range(n), range(m)):
                if not raw.mask[i, j]:
                    yield i, j

        data = [raw[i, j] for i, j in indices()]
        i = list(map(op.itemgetter(0), indices()))
        j = list(map(op.itemgetter(1), indices()))
        return coo_matrix((data, (i, j)), shape=raw.shape).tocsr()

    defn = model_definition(domains, relations)
    data = map(numpy_dataview, raw_data)
    sparse_data = map(sparse_2d_dataview, map(csr, raw_data))

    r = rng()

    s = initialize(defn, data, r=r)
    assert s and bind(s, 0, data) and bind(s, 1, data)

    s1 = initialize(defn, sparse_data, r=r)
    assert s1 and bind(s1, 0, sparse_data) and bind(s1, 1, sparse_data)

    def entity_data_positions(domain, eid):
        def f(domains, reln):
            for pos0 in xrange(reln.shape[0]):
                for pos1 in xrange(reln.shape[1]):
                    if reln.mask[pos0, pos1]:
                        continue
                    if (domains[0] == domain and pos0 == eid) or \
                       (domains[1] == domain and pos1 == eid):
                        yield [pos0, pos1]

        return list(
            it.chain.from_iterable(
                f(domains, reln)
                for (domains, _), reln in zip(relations, raw_data)))

    def test(s):
        for did, nentities in enumerate(domains):
            for eid in xrange(nentities):
                a = entity_data_positions(did, eid)
                b = s.entity_data_positions(did, eid, data)
                assert sorted(a) == sorted(b)

    test(s)
    test(s1)
예제 #2
0
def test_simple():
    domains = [5, 6]

    relations = [((0, 1), bb)]

    relsize = (domains[0], domains[1])
    raw_data = [
        ma.array(np.random.choice([False, True], size=relsize), mask=np.random.choice([False, True], size=relsize))
    ]

    def csr(raw):
        n, m = raw.shape

        def indices():
            for i, j in it.product(range(n), range(m)):
                if not raw.mask[i, j]:
                    yield i, j

        data = [raw[i, j] for i, j in indices()]
        i = list(map(op.itemgetter(0), indices()))
        j = list(map(op.itemgetter(1), indices()))
        return coo_matrix((data, (i, j)), shape=raw.shape).tocsr()

    defn = model_definition(domains, relations)
    data = map(numpy_dataview, raw_data)
    sparse_data = map(sparse_2d_dataview, map(csr, raw_data))

    r = rng()

    s = initialize(defn, data, r=r)
    assert s and bind(s, 0, data) and bind(s, 1, data)

    s1 = initialize(defn, sparse_data, r=r)
    assert s1 and bind(s1, 0, sparse_data) and bind(s1, 1, sparse_data)

    def entity_data_positions(domain, eid):
        def f(domains, reln):
            for pos0 in xrange(reln.shape[0]):
                for pos1 in xrange(reln.shape[1]):
                    if reln.mask[pos0, pos1]:
                        continue
                    if (domains[0] == domain and pos0 == eid) or (domains[1] == domain and pos1 == eid):
                        yield [pos0, pos1]

        return list(it.chain.from_iterable(f(domains, reln) for (domains, _), reln in zip(relations, raw_data)))

    def test(s):
        for did, nentities in enumerate(domains):
            for eid in xrange(nentities):
                a = entity_data_positions(did, eid)
                b = s.entity_data_positions(did, eid, data)
                assert sorted(a) == sorted(b)

    test(s)
    test(s1)
예제 #3
0
def _assert_structure_equals(defn, s1, s2, views, r):
    assert_equals(s1.ndomains(), s2.ndomains())
    assert_equals(s1.nrelations(), s2.nrelations())
    for did in xrange(s1.ndomains()):
        assert_equals(s1.nentities(did), s2.nentities(did))
        assert_equals(s1.ngroups(did), s2.ngroups(did))
        assert_equals(s1.assignments(did),
                      s2.assignments(did))
        assert_equals(set(s1.groups(did)),
                      set(s2.groups(did)))
        assert_close(s1.get_domain_hp(did),
                     s2.get_domain_hp(did))
        assert_almost_equals(s1.score_assignment(did),
                             s2.score_assignment(did))
    for rid in xrange(s1.nrelations()):
        assert_close(s1.get_relation_hp(rid),
                     s2.get_relation_hp(rid))
        dids = defn.relations()[rid]
        groups = [s1.groups(did) for did in dids]
        for gids in it.product(*groups):
            ss1 = s1.get_suffstats(rid, gids)
            ss2 = s2.get_suffstats(rid, gids)
            if ss1 is None:
                assert_is_none(ss2)
            else:
                assert_close(ss1, ss2)
    assert_almost_equals(s1.score_likelihood(r),
                         s2.score_likelihood(r))
    before = list(s1.assignments(0))
    bound = model.bind(s1, 0, views)
    gid = bound.remove_value(0, r)
    assert_equals(s1.assignments(0)[0], -1)
    assert_equals(before, s2.assignments(0))
    bound.add_value(gid, 0, r)  # restore
예제 #4
0
    def run(self, r, niters=10000):
        """Run the specified mixturemodel kernel for `niters`, in a single
        thread.

        Parameters
        ----------
        r : random state
        niters : int

        """
        validator.validate_type(r, rng, param_name='r')
        validator.validate_positive(niters, param_name='niters')
        inds = xrange(len(self._defn.domains()))
        models = [bind(self._latent, i, self._views) for i in inds]
        for _ in xrange(niters):
            for name, config in self._kernel_config:
                if name == 'assign':
                    for idx in config.keys():
                        gibbs.assign(models[idx], r)
                elif name == 'assign_resample':
                    for idx, v in config.iteritems():
                        gibbs.assign_resample(models[idx], v['m'], r)
                elif name == 'slice_cluster_hp':
                    for idx, v in config.iteritems():
                        slice.hp(models[idx], r, cparam=v['cparam'])
                elif name == 'grid_relation_hp':
                    gibbs.hp(models[0], config, r)
                elif name == 'slice_relation_hp':
                    slice.hp(models[0], r, hparams=config['hparams'])
                elif name == 'theta':
                    slice.theta(models[0], r, tparams=config['tparams'])
                else:
                    assert False, "should not be reached"
예제 #5
0
def test_slice_theta_irm():
    N = 10
    defn = model_definition([N], [((0, 0), bbnc)])
    data = np.random.random(size=(N, N)) < 0.8
    view = numpy_dataview(data)
    r = rng()
    prior = {'alpha': 1.0, 'beta': 9.0}

    s = initialize(
        defn,
        [view],
        r=r,
        cluster_hps=[{'alpha': 2.0}],
        relation_hps=[prior],
        domain_assignments=[[0] * N])

    bs = bind(s, 0, [view])

    params = {0: {'p': 0.05}}

    heads = len([1 for y in data.flatten() if y])
    tails = len([1 for y in data.flatten() if not y])

    alpha1 = prior['alpha'] + heads
    beta1 = prior['beta'] + tails

    def sample_fn():
        theta(bs, r, tparams=params)
        return s.get_suffstats(0, [0, 0])['p']

    rv = beta(alpha1, beta1)
    assert_1d_cont_dist_approx_sps(sample_fn, rv, nsamples=50000)
예제 #6
0
def _assert_structure_equals(defn, s1, s2, views, r):
    assert_equals(s1.ndomains(), s2.ndomains())
    assert_equals(s1.nrelations(), s2.nrelations())
    for did in xrange(s1.ndomains()):
        assert_equals(s1.nentities(did), s2.nentities(did))
        assert_equals(s1.ngroups(did), s2.ngroups(did))
        assert_equals(s1.assignments(did), s2.assignments(did))
        assert_equals(set(s1.groups(did)), set(s2.groups(did)))
        assert_close(s1.get_domain_hp(did), s2.get_domain_hp(did))
        assert_almost_equals(s1.score_assignment(did), s2.score_assignment(did))
    for rid in xrange(s1.nrelations()):
        assert_close(s1.get_relation_hp(rid), s2.get_relation_hp(rid))
        dids = defn.relations()[rid]
        groups = [s1.groups(did) for did in dids]
        for gids in it.product(*groups):
            ss1 = s1.get_suffstats(rid, gids)
            ss2 = s2.get_suffstats(rid, gids)
            if ss1 is None:
                assert_is_none(ss2)
            else:
                assert_close(ss1, ss2)
    assert_almost_equals(s1.score_likelihood(r), s2.score_likelihood(r))
    before = list(s1.assignments(0))
    bound = model.bind(s1, 0, views)
    gid = bound.remove_value(0, r)
    assert_equals(s1.assignments(0)[0], -1)
    assert_equals(before, s2.assignments(0))
    bound.add_value(gid, 0, r)  # restore
예제 #7
0
파일: irm.py 프로젝트: pschulam/kernels
def latent(groups, entities_per_group, features, r):
    N = groups * entities_per_group
    defn = model_definition([N], [((0, 0), bb)] * features)

    # generate fake data
    views = []
    for i in xrange(features):
        Y = np.random.random(size=(N, N)) <= 0.5
        view = numpy_dataview(Y)
        views.append(view)

    # assign entities to their respective groups
    assignment = [[g] * entities_per_group for g in xrange(groups)]
    assignment = list(it.chain.from_iterable(assignment))

    latent = bind(initialize(defn, views, r, domain_assignments=[assignment]),
                  0, views)
    latent.create_group(r)  # perftest() doesnt modify group assignments

    return latent
예제 #8
0
def latent(groups, entities_per_group, features, r):
    N = groups * entities_per_group
    defn = model_definition([N], [((0, 0), bb)] * features)

    # generate fake data
    views = []
    for i in xrange(features):
        Y = np.random.random(size=(N, N)) <= 0.5
        view = numpy_dataview(Y)
        views.append(view)

    # assign entities to their respective groups
    assignment = [[g] * entities_per_group for g in xrange(groups)]
    assignment = list(it.chain.from_iterable(assignment))

    latent = bind(
        initialize(defn, views, r, domain_assignments=[assignment]), 0, views)
    latent.create_group(r)  # perftest() doesnt modify group assignments

    return latent