예제 #1
0
def test_dense_vs_sparse():
    # XXX: really belongs in irm test cases, but kernels has a nice cluster
    # enumeration iterator

    r = rng()

    n = 5
    raw = ma.array(np.random.choice(np.arange(20), size=(n, n)),
                   mask=np.random.choice([False, True], size=(n, n)))

    dense = [relation_numpy_dataview(raw)]
    sparse = [sparse_relation_dataview(_tocsr(raw))]

    domains = [n]
    relations = [((0, 0), gp)]
    defn = irm_definition(domains, relations)

    def score_fn(data):
        def f(assignments):
            s = irm_initialize(defn, data, r=r, domain_assignments=assignments)
            assign = sum(
                s.score_assignment(i) for i in xrange(len(assignments)))
            likelihood = s.score_likelihood(r)
            return assign + likelihood

        return f

    product_assignments = tuple(map(list, map(permutation_iter, domains)))

    dense_posterior = scores_to_probs(
        np.array(map(score_fn(dense), it.product(*product_assignments))))
    sparse_posterior = scores_to_probs(
        np.array(map(score_fn(sparse), it.product(*product_assignments))))

    assert_1d_lists_almost_equals(dense_posterior, sparse_posterior, places=3)
예제 #2
0
def test_dense_vs_sparse():
    # XXX: really belongs in irm test cases, but kernels has a nice cluster
    # enumeration iterator

    r = rng()

    n = 5
    raw = ma.array(
        np.random.choice(np.arange(20), size=(n, n)),
        mask=np.random.choice([False, True], size=(n, n)))

    dense = [relation_numpy_dataview(raw)]
    sparse = [sparse_relation_dataview(_tocsr(raw))]

    domains = [n]
    relations = [((0, 0), gp)]
    defn = irm_definition(domains, relations)

    def score_fn(data):
        def f(assignments):
            s = irm_initialize(defn, data, r=r, domain_assignments=assignments)
            assign = sum(s.score_assignment(i)
                         for i in xrange(len(assignments)))
            likelihood = s.score_likelihood(r)
            return assign + likelihood
        return f

    product_assignments = tuple(map(list, map(permutation_iter, domains)))

    dense_posterior = scores_to_probs(
        np.array(map(score_fn(dense), it.product(*product_assignments))))
    sparse_posterior = scores_to_probs(
        np.array(map(score_fn(sparse), it.product(*product_assignments))))

    assert_1d_lists_almost_equals(dense_posterior, sparse_posterior, places=3)
예제 #3
0
def test_crp_empirical():
    N = 4
    alpha = 2.5
    defn = model_definition(N, [bb])
    Y = np.array([(True, )] * N, dtype=[('', bool)])
    view = numpy_dataview(Y)
    r = rng()

    def crp_score(assignment):
        latent = initialize(defn,
                            view,
                            r=r,
                            cluster_hp={'alpha': alpha},
                            assignment=assignment)
        return latent.score_assignment()

    scores = np.array(list(map(crp_score, permutation_iter(N))))
    dist = scores_to_probs(scores)
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        sample = permutation_canonical(_sample_crp(N, alpha))
        return idmap[tuple(sample)]

    assert_discrete_dist_approx(sample_fn, dist, ntries=100)
예제 #4
0
파일: test_sampler.py 프로젝트: jzf2101/lda
def test_convergence_simple():
    N, V = 2, 10
    defn = model_definition(N, V)
    data = [
        np.array([5, 6]),
        np.array([0, 1, 2]),
    ]
    view = numpy_dataview(data)
    prng = rng()

    scores = []
    idmap = {}
    for i, (tables, dishes) in enumerate(permutations([2, 3])):
        latent = model.initialize(
            defn, view, prng,
            table_assignments=tables,
            dish_assignments=dishes)
        scores.append(
            latent.score_assignment() +
            latent.score_data(prng))
        idmap[(tables, dishes)] = i
    true_dist = scores_to_probs(scores)

    def kernel(latent):
        # mutates latent in place
        doc_model = model.bind(latent, data=view)
        kernels.assign2(doc_model, prng)
        for did in xrange(latent.nentities()):
            table_model = model.bind(latent, document=did)
            kernels.assign(table_model, prng)

    latent = model.initialize(defn, view, prng)

    skip = 10
    def sample_fn():
        for _ in xrange(skip):
            kernel(latent)
        table_assignments = latent.table_assignments()
        canon_table_assigments = tuple(
            map(tuple, map(permutation_canonical, table_assignments)))

        dish_maps = latent.dish_assignments()
        dish_assignments = []
        for dm, (ta, ca) in zip(dish_maps, zip(table_assignments, canon_table_assigments)):
            dish_assignment = []
            for t, c in zip(ta, ca):
                if c == len(dish_assignment):
                    dish_assignment.append(dm[t])
            dish_assignments.append(dish_assignment)

        canon_dish_assigments = tuple(
            map(tuple, map(permutation_canonical, dish_assignments)))

        return idmap[(canon_table_assigments, canon_dish_assigments)]

    assert_discrete_dist_approx(
        sample_fn, true_dist,
        ntries=100, nsamples=10000, kl_places=2)
예제 #5
0
def _test_convergence(domains,
                      data,
                      reg_relations,
                      brute_relations,
                      kernel,
                      burnin_niters=10000,
                      skip=10,
                      ntries=50,
                      nsamples=1000,
                      places=2):
    r = rng()

    reg_defn = irm_definition(domains, reg_relations)
    brute_defn = irm_definition(domains, brute_relations)

    def score_fn(assignments):
        s = irm_initialize(
            brute_defn, data, r=r,
            domain_assignments=assignments)
        assign = sum(s.score_assignment(i) for i in xrange(len(assignments)))
        likelihood = s.score_likelihood(r)
        return assign + likelihood
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    posterior = scores_to_probs(
        np.array(map(score_fn, it.product(*product_assignments))))

    s = irm_initialize(reg_defn, data, r=r)
    bounded_states = [irm_bind(s, i, data) for i in xrange(len(domains))]

    # burnin
    start = time.time()
    last = start
    for i in xrange(burnin_niters):
        for bs in bounded_states:
            kernel(bs, r)
        if not ((i + 1) % 1000):
            print 'burning finished iteration', (i + 1), \
                'in', (time.time() - last), 'seconds'
            last = time.time()
    print 'finished burnin of', burnin_niters, \
        'iters in', (time.time() - start), 'seconds'

    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}
    #print idmap

    def sample_fn():
        for _ in xrange(skip):
            for bs in bounded_states:
                kernel(bs, r)
        key = tuple(tuple(permutation_canonical(bs.assignments()))
                    for bs in bounded_states)
        return idmap[key]

    assert_discrete_dist_approx(
        sample_fn, posterior,
        ntries=ntries, nsamples=nsamples,
        kl_places=places)
예제 #6
0
def data_with_posterior(defn, r=None):
    # XXX(stephentu): should only accept conjugate models
    if r is None:
        r = rng()
    relations = toy_dataset(defn)
    views = map(numpy_dataview, relations)

    def score_fn(assignments):
        s = model.initialize(defn, views, r=r, domain_assignments=assignments)
        assign = sum(s.score_assignment(i) for i in xrange(len(assignments)))
        likelihood = s.score_likelihood(r)
        return assign + likelihood

    domains = defn.domains()
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    posterior = scores_to_probs(
        np.array(map(score_fn, it.product(*product_assignments))))

    return relations, posterior
예제 #7
0
파일: testutil.py 프로젝트: tatabox2000/irm
def data_with_posterior(defn, r=None):
    # XXX(stephentu): should only accept conjugate models
    if r is None:
        r = rng()
    relations = toy_dataset(defn)
    views = map(numpy_dataview, relations)

    def score_fn(assignments):
        s = model.initialize(defn, views, r=r, domain_assignments=assignments)
        assign = sum(s.score_assignment(i) for i in xrange(len(assignments)))
        likelihood = s.score_likelihood(r)
        return assign + likelihood

    domains = defn.domains()
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    posterior = scores_to_probs(
        np.array(map(score_fn, it.product(*product_assignments))))

    return relations, posterior
예제 #8
0
def test_crp_empirical():
    N = 4
    alpha = 2.5
    defn = model_definition(N, [bb])
    Y = np.array([(True,)] * N, dtype=[('', bool)])
    view = numpy_dataview(Y)
    r = rng()

    def crp_score(assignment):
        latent = initialize(
            defn, view, r=r,
            cluster_hp={'alpha': alpha}, assignment=assignment)
        return latent.score_assignment()
    scores = np.array(list(map(crp_score, permutation_iter(N))))
    dist = scores_to_probs(scores)
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        sample = permutation_canonical(_sample_crp(N, alpha))
        return idmap[tuple(sample)]
    assert_discrete_dist_approx(sample_fn, dist, ntries=100)
예제 #9
0
def _test_convergence(domains,
                      data,
                      reg_relations,
                      brute_relations,
                      kernel,
                      burnin_niters=10000,
                      skip=10,
                      ntries=50,
                      nsamples=1000,
                      places=2):
    r = rng()

    reg_defn = irm_definition(domains, reg_relations)
    brute_defn = irm_definition(domains, brute_relations)

    def score_fn(assignments):
        s = irm_initialize(brute_defn,
                           data,
                           r=r,
                           domain_assignments=assignments)
        assign = sum(s.score_assignment(i) for i in xrange(len(assignments)))
        likelihood = s.score_likelihood(r)
        return assign + likelihood

    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    posterior = scores_to_probs(
        np.array(map(score_fn, it.product(*product_assignments))))

    s = irm_initialize(reg_defn, data, r=r)
    bounded_states = [irm_bind(s, i, data) for i in xrange(len(domains))]

    # burnin
    start = time.time()
    last = start
    for i in xrange(burnin_niters):
        for bs in bounded_states:
            kernel(bs, r)
        if not ((i + 1) % 1000):
            print 'burning finished iteration', (i + 1), \
                'in', (time.time() - last), 'seconds'
            last = time.time()
    print 'finished burnin of', burnin_niters, \
        'iters in', (time.time() - start), 'seconds'

    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}

    #print idmap

    def sample_fn():
        for _ in xrange(skip):
            for bs in bounded_states:
                kernel(bs, r)
        key = tuple(
            tuple(permutation_canonical(bs.assignments()))
            for bs in bounded_states)
        return idmap[key]

    assert_discrete_dist_approx(sample_fn,
                                posterior,
                                ntries=ntries,
                                nsamples=nsamples,
                                kl_places=places)
예제 #10
0
def test_convergence_simple():
    N, V = 2, 10
    defn = model_definition(N, V)
    data = [
        np.array([5, 6]),
        np.array([0, 1, 2]),
    ]
    view = numpy_dataview(data)
    prng = rng()

    scores = []
    idmap = {}
    for i, (tables, dishes) in enumerate(permutations([2, 3])):
        latent = model.initialize(defn,
                                  view,
                                  prng,
                                  table_assignments=tables,
                                  dish_assignments=dishes)
        scores.append(latent.score_assignment() + latent.score_data(prng))
        idmap[(tables, dishes)] = i
    true_dist = scores_to_probs(scores)

    def kernel(latent):
        # mutates latent in place
        doc_model = model.bind(latent, data=view)
        kernels.assign2(doc_model, prng)
        for did in xrange(latent.nentities()):
            table_model = model.bind(latent, document=did)
            kernels.assign(table_model, prng)

    latent = model.initialize(defn, view, prng)

    skip = 10

    def sample_fn():
        for _ in xrange(skip):
            kernel(latent)
        table_assignments = latent.table_assignments()
        canon_table_assigments = tuple(
            map(tuple, map(permutation_canonical, table_assignments)))

        dish_maps = latent.dish_assignments()
        dish_assignments = []
        for dm, (ta, ca) in zip(dish_maps,
                                zip(table_assignments,
                                    canon_table_assigments)):
            dish_assignment = []
            for t, c in zip(ta, ca):
                if c == len(dish_assignment):
                    dish_assignment.append(dm[t])
            dish_assignments.append(dish_assignment)

        canon_dish_assigments = tuple(
            map(tuple, map(permutation_canonical, dish_assignments)))

        return idmap[(canon_table_assigments, canon_dish_assigments)]

    assert_discrete_dist_approx(sample_fn,
                                true_dist,
                                ntries=100,
                                nsamples=10000,
                                kl_places=2)