def _test_convergence(bs, posterior, kernel, burnin_niters, skip, ntries,
                      nsamples, kl_places):
    N = bs.nentities()
    start = time.time()
    last = start
    for i in xrange(burnin_niters):
        kernel(bs)
        if not ((i + 1) % 1000):
            print 'burning finished iteration', (i + 1), \
                'in', (time.time() - last), 'seconds'
            last = time.time()
    print 'finished burnin of', burnin_niters, \
        'iters in', (time.time() - start), 'seconds'
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        for _ in xrange(skip):
            kernel(bs)
        return idmap[tuple(permutation_canonical(bs.assignments()))]

    assert_discrete_dist_approx(sample_fn,
                                posterior,
                                ntries=ntries,
                                nsamples=nsamples,
                                kl_places=kl_places)
Example #2
0
def test_runner_multiprocessing_convergence():
    domains = [4]
    defn = model_definition(domains, [((0, 0), bb)])
    prng = rng()
    relations, posterior = data_with_posterior(defn, prng)
    views = map(numpy_dataview, relations)
    latents = [model.initialize(defn, views, prng)
               for _ in xrange(mp.cpu_count())]
    kc = [('assign', range(len(domains)))]
    runners = [runner.runner(defn, views, latent, kc) for latent in latents]
    r = parallel.runner(runners)
    r.run(r=prng, niters=10000)  # burnin
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}

    def sample_iter():
        r.run(r=prng, niters=10)
        for latent in r.get_latents():
            key = tuple(tuple(permutation_canonical(latent.assignments(i)))
                        for i in xrange(len(domains)))
            yield idmap[key]

    ref = [None]

    def sample_fn():
        if ref[0] is None:
            ref[0] = sample_iter()
        try:
            return next(ref[0])
        except StopIteration:
            ref[0] = None
        return sample_fn()

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
def _test_convergence(bs,
                      posterior,
                      kernel,
                      burnin_niters,
                      skip,
                      ntries,
                      nsamples,
                      kl_places):
    N = bs.nentities()
    start = time.time()
    last = start
    for i in xrange(burnin_niters):
        kernel(bs)
        if not ((i + 1) % 1000):
            print 'burning finished iteration', (i + 1), \
                'in', (time.time() - last), 'seconds'
            last = time.time()
    print 'finished burnin of', burnin_niters, \
        'iters in', (time.time() - start), 'seconds'
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        for _ in xrange(skip):
            kernel(bs)
        return idmap[tuple(permutation_canonical(bs.assignments()))]
    assert_discrete_dist_approx(
        sample_fn, posterior,
        ntries=ntries, nsamples=nsamples, kl_places=kl_places)
Example #4
0
def test_runner_multiprocessing_convergence():
    N, D = 4, 5
    defn = model_definition(N, [bb] * D)
    prng = rng()
    Y, posterior = data_with_posterior(defn, r=prng)
    view = numpy_dataview(Y)
    latents = [model.initialize(defn, view, prng)
               for _ in xrange(mp.cpu_count())]
    runners = [runner.runner(defn, view, latent, ['assign'])
               for latent in latents]
    r = parallel.runner(runners)
    r.run(r=prng, niters=1000)  # burnin
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_iter():
        r.run(r=prng, niters=10)
        for latent in r.get_latents():
            yield idmap[tuple(permutation_canonical(latent.assignments()))]

    ref = [None]

    def sample_fn():
        if ref[0] is None:
            ref[0] = sample_iter()
        try:
            return next(ref[0])
        except StopIteration:
            ref[0] = None
        return sample_fn()

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
Example #5
0
def test_crp_empirical():
    N = 4
    alpha = 2.5
    defn = model_definition(N, [bb])
    Y = np.array([(True, )] * N, dtype=[('', bool)])
    view = numpy_dataview(Y)
    r = rng()

    def crp_score(assignment):
        latent = initialize(defn,
                            view,
                            r=r,
                            cluster_hp={'alpha': alpha},
                            assignment=assignment)
        return latent.score_assignment()

    scores = np.array(list(map(crp_score, permutation_iter(N))))
    dist = scores_to_probs(scores)
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        sample = permutation_canonical(_sample_crp(N, alpha))
        return idmap[tuple(sample)]

    assert_discrete_dist_approx(sample_fn, dist, ntries=100)
Example #6
0
def test_convergence_simple():
    N, V = 2, 10
    defn = model_definition(N, V)
    data = [
        np.array([5, 6]),
        np.array([0, 1, 2]),
    ]
    view = numpy_dataview(data)
    prng = rng()

    scores = []
    idmap = {}
    for i, (tables, dishes) in enumerate(permutations([2, 3])):
        latent = model.initialize(
            defn, view, prng,
            table_assignments=tables,
            dish_assignments=dishes)
        scores.append(
            latent.score_assignment() +
            latent.score_data(prng))
        idmap[(tables, dishes)] = i
    true_dist = scores_to_probs(scores)

    def kernel(latent):
        # mutates latent in place
        doc_model = model.bind(latent, data=view)
        kernels.assign2(doc_model, prng)
        for did in xrange(latent.nentities()):
            table_model = model.bind(latent, document=did)
            kernels.assign(table_model, prng)

    latent = model.initialize(defn, view, prng)

    skip = 10
    def sample_fn():
        for _ in xrange(skip):
            kernel(latent)
        table_assignments = latent.table_assignments()
        canon_table_assigments = tuple(
            map(tuple, map(permutation_canonical, table_assignments)))

        dish_maps = latent.dish_assignments()
        dish_assignments = []
        for dm, (ta, ca) in zip(dish_maps, zip(table_assignments, canon_table_assigments)):
            dish_assignment = []
            for t, c in zip(ta, ca):
                if c == len(dish_assignment):
                    dish_assignment.append(dm[t])
            dish_assignments.append(dish_assignment)

        canon_dish_assigments = tuple(
            map(tuple, map(permutation_canonical, dish_assignments)))

        return idmap[(canon_table_assigments, canon_dish_assigments)]

    assert_discrete_dist_approx(
        sample_fn, true_dist,
        ntries=100, nsamples=10000, kl_places=2)
def _test_convergence(domains,
                      data,
                      reg_relations,
                      brute_relations,
                      kernel,
                      burnin_niters=10000,
                      skip=10,
                      ntries=50,
                      nsamples=1000,
                      places=2):
    r = rng()

    reg_defn = irm_definition(domains, reg_relations)
    brute_defn = irm_definition(domains, brute_relations)

    def score_fn(assignments):
        s = irm_initialize(
            brute_defn, data, r=r,
            domain_assignments=assignments)
        assign = sum(s.score_assignment(i) for i in xrange(len(assignments)))
        likelihood = s.score_likelihood(r)
        return assign + likelihood
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    posterior = scores_to_probs(
        np.array(map(score_fn, it.product(*product_assignments))))

    s = irm_initialize(reg_defn, data, r=r)
    bounded_states = [irm_bind(s, i, data) for i in xrange(len(domains))]

    # burnin
    start = time.time()
    last = start
    for i in xrange(burnin_niters):
        for bs in bounded_states:
            kernel(bs, r)
        if not ((i + 1) % 1000):
            print 'burning finished iteration', (i + 1), \
                'in', (time.time() - last), 'seconds'
            last = time.time()
    print 'finished burnin of', burnin_niters, \
        'iters in', (time.time() - start), 'seconds'

    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}
    #print idmap

    def sample_fn():
        for _ in xrange(skip):
            for bs in bounded_states:
                kernel(bs, r)
        key = tuple(tuple(permutation_canonical(bs.assignments()))
                    for bs in bounded_states)
        return idmap[key]

    assert_discrete_dist_approx(
        sample_fn, posterior,
        ntries=ntries, nsamples=nsamples,
        kl_places=places)
Example #8
0
def test_runner_convergence():
    N, D = 4, 5
    defn = model_definition(N, [bb] * D)
    prng = rng()
    Y, posterior = data_with_posterior(defn, r=prng)
    view = numpy_dataview(Y)
    latent = model.initialize(defn, view, prng)
    r = runner.runner(defn, view, latent, ['assign'])
    r.run(r=prng, niters=1000)  # burnin
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        r.run(r=prng, niters=10)
        new_latent = r.get_latent()
        return idmap[tuple(permutation_canonical(new_latent.assignments()))]

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
Example #9
0
def test_runner_default_kernel_config_convergence():
    domains = [4]
    defn = model_definition(domains, [((0, 0), bb)])
    prng = rng()
    relations, posterior = data_with_posterior(defn, prng)
    views = map(numpy_dataview, relations)
    latent = model.initialize(defn, views, prng)
    r = runner.runner(defn, views, latent, [('assign', range(len(domains)))])

    r.run(r=prng, niters=1000)  # burnin
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}

    def sample_fn():
        r.run(r=prng, niters=10)
        new_latent = r.get_latent()
        key = tuple(tuple(permutation_canonical(new_latent.assignments(i)))
                    for i in xrange(len(domains)))
        return idmap[key]

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
Example #10
0
def test_crp_empirical():
    N = 4
    alpha = 2.5
    defn = model_definition(N, [bb])
    Y = np.array([(True,)] * N, dtype=[('', bool)])
    view = numpy_dataview(Y)
    r = rng()

    def crp_score(assignment):
        latent = initialize(
            defn, view, r=r,
            cluster_hp={'alpha': alpha}, assignment=assignment)
        return latent.score_assignment()
    scores = np.array(list(map(crp_score, permutation_iter(N))))
    dist = scores_to_probs(scores)
    idmap = {C: i for i, C in enumerate(permutation_iter(N))}

    def sample_fn():
        sample = permutation_canonical(_sample_crp(N, alpha))
        return idmap[tuple(sample)]
    assert_discrete_dist_approx(sample_fn, dist, ntries=100)
Example #11
0
def test_runner_default_kernel_config_convergence():
    domains = [4]
    defn = model_definition(domains, [((0, 0), bb)])
    prng = rng()
    relations, posterior = data_with_posterior(defn, prng)
    views = map(numpy_dataview, relations)
    latent = model.initialize(defn, views, prng)
    r = runner.runner(defn, views, latent, [('assign', range(len(domains)))])

    r.run(r=prng, niters=1000)  # burnin
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}

    def sample_fn():
        r.run(r=prng, niters=10)
        new_latent = r.get_latent()
        key = tuple(
            tuple(permutation_canonical(new_latent.assignments(i)))
            for i in xrange(len(domains)))
        return idmap[key]

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100)
Example #12
0
def test_runner_multiprocessing_convergence():
    domains = [4]
    defn = model_definition(domains, [((0, 0), bb)])
    prng = rng()
    relations, posterior = data_with_posterior(defn, prng)
    views = map(numpy_dataview, relations)
    latents = [
        model.initialize(defn, views, prng) for _ in xrange(mp.cpu_count())
    ]
    kc = [('assign', range(len(domains)))]
    runners = [runner.runner(defn, views, latent, kc) for latent in latents]
    r = parallel.runner(runners)
    r.run(r=prng, niters=10000)  # burnin
    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}

    def sample_iter():
        r.run(r=prng, niters=10)
        for latent in r.get_latents():
            key = tuple(
                tuple(permutation_canonical(latent.assignments(i)))
                for i in xrange(len(domains)))
            yield idmap[key]

    ref = [None]

    def sample_fn():
        if ref[0] is None:
            ref[0] = sample_iter()
        try:
            return next(ref[0])
        except StopIteration:
            ref[0] = None
        return sample_fn()

    assert_discrete_dist_approx(sample_fn, posterior, ntries=100, kl_places=2)
Example #13
0
def _test_convergence(domains,
                      data,
                      reg_relations,
                      brute_relations,
                      kernel,
                      burnin_niters=10000,
                      skip=10,
                      ntries=50,
                      nsamples=1000,
                      places=2):
    r = rng()

    reg_defn = irm_definition(domains, reg_relations)
    brute_defn = irm_definition(domains, brute_relations)

    def score_fn(assignments):
        s = irm_initialize(brute_defn,
                           data,
                           r=r,
                           domain_assignments=assignments)
        assign = sum(s.score_assignment(i) for i in xrange(len(assignments)))
        likelihood = s.score_likelihood(r)
        return assign + likelihood

    product_assignments = tuple(map(list, map(permutation_iter, domains)))
    posterior = scores_to_probs(
        np.array(map(score_fn, it.product(*product_assignments))))

    s = irm_initialize(reg_defn, data, r=r)
    bounded_states = [irm_bind(s, i, data) for i in xrange(len(domains))]

    # burnin
    start = time.time()
    last = start
    for i in xrange(burnin_niters):
        for bs in bounded_states:
            kernel(bs, r)
        if not ((i + 1) % 1000):
            print 'burning finished iteration', (i + 1), \
                'in', (time.time() - last), 'seconds'
            last = time.time()
    print 'finished burnin of', burnin_niters, \
        'iters in', (time.time() - start), 'seconds'

    idmap = {C: i for i, C in enumerate(it.product(*product_assignments))}

    #print idmap

    def sample_fn():
        for _ in xrange(skip):
            for bs in bounded_states:
                kernel(bs, r)
        key = tuple(
            tuple(permutation_canonical(bs.assignments()))
            for bs in bounded_states)
        return idmap[key]

    assert_discrete_dist_approx(sample_fn,
                                posterior,
                                ntries=ntries,
                                nsamples=nsamples,
                                kl_places=places)
Example #14
0
def test_convergence_simple():
    N, V = 2, 10
    defn = model_definition(N, V)
    data = [
        np.array([5, 6]),
        np.array([0, 1, 2]),
    ]
    view = numpy_dataview(data)
    prng = rng()

    scores = []
    idmap = {}
    for i, (tables, dishes) in enumerate(permutations([2, 3])):
        latent = model.initialize(defn,
                                  view,
                                  prng,
                                  table_assignments=tables,
                                  dish_assignments=dishes)
        scores.append(latent.score_assignment() + latent.score_data(prng))
        idmap[(tables, dishes)] = i
    true_dist = scores_to_probs(scores)

    def kernel(latent):
        # mutates latent in place
        doc_model = model.bind(latent, data=view)
        kernels.assign2(doc_model, prng)
        for did in xrange(latent.nentities()):
            table_model = model.bind(latent, document=did)
            kernels.assign(table_model, prng)

    latent = model.initialize(defn, view, prng)

    skip = 10

    def sample_fn():
        for _ in xrange(skip):
            kernel(latent)
        table_assignments = latent.table_assignments()
        canon_table_assigments = tuple(
            map(tuple, map(permutation_canonical, table_assignments)))

        dish_maps = latent.dish_assignments()
        dish_assignments = []
        for dm, (ta, ca) in zip(dish_maps,
                                zip(table_assignments,
                                    canon_table_assigments)):
            dish_assignment = []
            for t, c in zip(ta, ca):
                if c == len(dish_assignment):
                    dish_assignment.append(dm[t])
            dish_assignments.append(dish_assignment)

        canon_dish_assigments = tuple(
            map(tuple, map(permutation_canonical, dish_assignments)))

        return idmap[(canon_table_assigments, canon_dish_assigments)]

    assert_discrete_dist_approx(sample_fn,
                                true_dist,
                                ntries=100,
                                nsamples=10000,
                                kl_places=2)