def test_score_student_t_dbg_lp_equiv():
    seed_all(0)

    def random_vec(dim):
        return numpy.random.uniform(low=-3., high=3., size=dim)

    def random_cov(dim):
        Q = random_orthonormal_matrix(dim)
        return numpy.dot(Q, Q.T)

    def random_values(dim):
        return (random_vec(dim),
                float(dim) + 1.,
                random_vec(dim),
                random_cov(dim))

    values = (
        [random_values(2) for _ in xrange(10)] +
        [random_values(3) for _ in xrange(10)]
    )

    for x, nu, mu, cov in values:
        dbg_mv_score = dbg_score_student_t(x, nu, mu, cov)
        lp_mv_score = lp_score_student_t(x, nu, mu, cov)
        assert_close(dbg_mv_score, lp_mv_score)
def test_log_stirling1_row():
    require_cython()
    from distributions.lp.special import log_stirling1_row
    MAX_N = 128

    rows = [[1]]
    for n in range(1, MAX_N + 1):
        prev = rows[-1]
        middle = [(n - 1) * prev[k] + prev[k - 1] for k in range(1, n)]
        row = [0] + middle + [1]
        rows.append(row)

    for n in range(1, MAX_N + 1):
        print 'Row {}:'.format(n),
        row_py = numpy.log(numpy.array(rows[n][1:], dtype=numpy.double))
        row_cpp = log_stirling1_row(n)[1:]
        assert_equal(len(row_py), len(row_cpp))

        # Only the slopes need to be accurate
        # print 0,
        # assert_close(row_py[0], row_cpp[0])
        # print len(row_py)
        # assert_close(row_py[-1], row_cpp[-1])

        diff_py = numpy.diff(row_py)
        diff_cpp = numpy.diff(row_cpp)
        for k_minus_1, (dx_py, dx_cpp) in enumerate(zip(diff_py, diff_cpp)):
            k = k_minus_1 + 1
            print '%d-%d' % (k, k + 1),
            assert_close(dx_py, dx_cpp, tol=0.5)
        print
Exemple #3
0
def test_score_add_value_matches_score_counts(Model, EXAMPLE, sample_count):
    for sample_size in iter_valid_sizes(EXAMPLE, min_size=2, max_size=10):
        model = Model()
        model.load(EXAMPLE)

        samples = set(
            canonicalize(model.sample_assignments(sample_size - 1))
            for _ in xrange(sample_count))

        for sample in samples:
            nonempty_group_count = len(sample)
            counts = map(len, sample)
            actual = numpy.zeros(len(counts) + 1)
            expected = numpy.zeros(len(counts) + 1)

            # add to existing group
            for i, group in enumerate(sample):
                group_size = len(sample[i])
                expected[i] = model.score_counts(add_to_counts(counts, i))
                actual[i] = model.score_add_value(group_size,
                                                  nonempty_group_count,
                                                  sample_size - 1)

            # add to new group
            i = len(counts)
            group_size = 0
            expected[i] = model.score_counts(counts + [1])
            actual[i] = model.score_add_value(group_size, nonempty_group_count,
                                              sample_size - 1)

            actual = scores_to_probs(actual)
            expected = scores_to_probs(expected)
            print actual, expected
            assert_close(actual, expected, tol=0.05)
Exemple #4
0
def test_group(module, EXAMPLE):
    assert_hasattr(module, 'Group')
    assert_is_instance(module.Group, type)

    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']
    for value in values:
        shared.add_value(value)

    group1 = module.Group()
    group1.init(shared)
    for value in values:
        group1.add_value(shared, value)
    group2 = module.Group.from_values(shared, values)
    assert_close(group1.dump(), group2.dump())

    group = module.Group.from_values(shared, values)
    dumped = group.dump()
    group.init(shared)
    group.load(dumped)
    assert_close(group.dump(), dumped)

    for value in values:
        group2.remove_value(shared, value)
    assert_not_equal(group1, group2)
    group2.merge(shared, group1)

    for value in values:
        group1.score_value(shared, value)
    for _ in xrange(10):
        value = group1.sample_value(shared)
        group1.score_value(shared, value)
        module.sample_group(shared, 10)
    group1.score_data(shared)
    group2.score_data(shared)
def test_group(module, EXAMPLE):
    assert_hasattr(module, 'Group')
    assert_is_instance(module.Group, type)

    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']
    for value in values:
        shared.add_value(value)

    group1 = module.Group()
    group1.init(shared)
    for value in values:
        group1.add_value(shared, value)
    group2 = module.Group.from_values(shared, values)
    assert_close(group1.dump(), group2.dump())

    group = module.Group.from_values(shared, values)
    dumped = group.dump()
    group.init(shared)
    group.load(dumped)
    assert_close(group.dump(), dumped)

    for value in values:
        group2.remove_value(shared, value)
    assert_not_equal(group1, group2)
    group2.merge(shared, group1)

    for value in values:
        group1.score_value(shared, value)
    for _ in xrange(10):
        value = group1.sample_value(shared)
        group1.score_value(shared, value)
        module.sample_group(shared, 10)
    group1.score_data(shared)
    group2.score_data(shared)
def test_score_add_value_matches_score_counts(Model, EXAMPLE, sample_count):
    for sample_size in iter_valid_sizes(EXAMPLE, min_size=2, max_size=10):
        model = Model()
        model.load(EXAMPLE)

        samples = set(canonicalize(model.sample_assignments(sample_size - 1)) for _ in xrange(sample_count))

        for sample in samples:
            nonempty_group_count = len(sample)
            counts = map(len, sample)
            actual = numpy.zeros(len(counts) + 1)
            expected = numpy.zeros(len(counts) + 1)

            # add to existing group
            for i, group in enumerate(sample):
                group_size = len(sample[i])
                expected[i] = model.score_counts(add_to_counts(counts, i))
                actual[i] = model.score_add_value(group_size, nonempty_group_count, sample_size - 1)

            # add to new group
            i = len(counts)
            group_size = 0
            expected[i] = model.score_counts(counts + [1])
            actual[i] = model.score_add_value(group_size, nonempty_group_count, sample_size - 1)

            actual = scores_to_probs(actual)
            expected = scores_to_probs(expected)
            print actual, expected
            assert_close(actual, expected, tol=0.05)
def test_log_sum_exp():
    require_cython()
    import distributions.lp.random

    for size in xrange(20):
        scores = numpy.random.normal(size=size).tolist()
        expected = numpy.logaddexp.reduce(scores) if size else 0.0
        actual = distributions.lp.random.log_sum_exp(scores)
        assert_close(actual, expected, err_msg='log_sum_exp')
def test_log_sum_exp():
    require_cython()
    import distributions.lp.random

    for size in xrange(20):
        scores = numpy.random.normal(size=size).tolist()
        expected = numpy.logaddexp.reduce(scores) if size else 0.0
        actual = distributions.lp.random.log_sum_exp(scores)
        assert_close(actual, expected, err_msg='log_sum_exp')
Exemple #9
0
 def check_score_value(value):
     expected = [group.score_value(shared, value) for group in groups]
     actual = numpy.zeros(len(mixture), dtype=numpy.float32)
     noise = numpy.random.randn(len(actual))
     actual += noise
     mixture.score_value(shared, value, actual)
     actual -= noise
     assert_close(actual, expected, err_msg='score_value {}'.format(value))
     return actual
def test_sample_seed(Model, EXAMPLE):
    model = Model.model_load(EXAMPLE['model'])

    seed_all(0)
    group1 = model.group_create()
    values1 = [model.sample_value(group1) for _ in xrange(DATA_COUNT)]

    seed_all(0)
    group2 = model.group_create()
    values2 = [model.sample_value(group2) for _ in xrange(DATA_COUNT)]

    assert_close(values1, values2, err_msg='values')
def test_sample_seed(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])

    seed_all(0)
    group1 = module.Group.from_values(shared)
    values1 = [group1.sample_value(shared) for _ in xrange(DATA_COUNT)]

    seed_all(0)
    group2 = module.Group.from_values(shared)
    values2 = [group2.sample_value(shared) for _ in xrange(DATA_COUNT)]

    assert_close(values1, values2, err_msg='values')
Exemple #12
0
def test_sample_seed(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])

    seed_all(0)
    group1 = module.Group.from_values(shared)
    values1 = [group1.sample_value(shared) for _ in xrange(DATA_COUNT)]

    seed_all(0)
    group2 = module.Group.from_values(shared)
    values2 = [group2.sample_value(shared) for _ in xrange(DATA_COUNT)]

    assert_close(values1, values2, err_msg='values')
def test_add_merge(Model, EXAMPLE):
    # Test group_add_value, group_merge
    model = Model.model_load(EXAMPLE['model'])
    values = EXAMPLE['values'][:]
    random.shuffle(values)
    group = model.group_create(values)

    for i in xrange(len(values) + 1):
        random.shuffle(values)
        group1 = model.group_create(values[:i])
        group2 = model.group_create(values[i:])
        model.group_merge(group1, group2)
        assert_close(group.dump(), group1.dump())
 def check_scores(mixture, counts, empty_group_count):
     sample_count = sum(counts)
     nonempty_group_count = len(counts) - empty_group_count
     expected = [
         model.score_add_value(group_size, nonempty_group_count, sample_count, empty_group_count)
         for group_size in counts
     ]
     noise = numpy.random.randn(len(counts))
     actual = numpy.zeros(len(counts), dtype=numpy.float32)
     actual[:] = noise
     mixture.score_value(model, actual)
     assert_close(actual, expected)
     return actual
Exemple #15
0
def test_prob_from_scores():
    require_cython()
    import distributions.lp.random
    for size in range(1, 100):
        scores = numpy.random.normal(size=size).tolist()
        for _ in xrange(size):
            sample, prob1 = distributions.lp.random.sample_prob_from_scores(
                scores)
            assert 0 <= sample and sample < size
            prob2 = distributions.lp.random.prob_from_scores(sample, scores)
            assert_close(prob1,
                         prob2,
                         err_msg='sample_prob_from_scores != prob_from_scores')
Exemple #16
0
def test_protobuf(module, EXAMPLE):
    if not has_protobuf:
        raise SkipTest('protobuf not available')
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']
    Message = getattr(distributions.io.schema_pb2, module.NAME)

    message = Message.Shared()
    shared.dump_protobuf(message)
    shared2 = module.Shared()
    shared2.load_protobuf(message)
    assert_close(shared2.dump(), shared.dump())

    message.Clear()
    dumped = shared.dump()
    module.Shared.to_protobuf(dumped, message)
    assert_close(module.Shared.from_protobuf(message), dumped)

    if hasattr(module.Group, 'load_protobuf'):
        for value in values:
            shared.add_value(value)
        group = module.Group.from_values(shared, values)

        message = Message.Group()
        group.dump_protobuf(message)
        group2 = module.Group()
        group2.load_protobuf(message)
        assert_close(group2.dump(), group.dump())

        message.Clear()
        dumped = group.dump()
        module.Group.to_protobuf(dumped, message)
        assert_close(module.Group.from_protobuf(message), dumped)
def test_protobuf(module, EXAMPLE):
    if not has_protobuf:
        raise SkipTest('protobuf not available')
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']
    Message = getattr(distributions.io.schema_pb2, module.NAME)

    message = Message.Shared()
    shared.dump_protobuf(message)
    shared2 = module.Shared()
    shared2.load_protobuf(message)
    assert_close(shared2.dump(), shared.dump())

    message.Clear()
    dumped = shared.dump()
    module.Shared.to_protobuf(dumped, message)
    assert_close(module.Shared.from_protobuf(message), dumped)

    if hasattr(module.Group, 'load_protobuf'):
        for value in values:
            shared.add_value(value)
        group = module.Group.from_values(shared, values)

        message = Message.Group()
        group.dump_protobuf(message)
        group2 = module.Group()
        group2.load_protobuf(message)
        assert_close(group2.dump(), group.dump())

        message.Clear()
        dumped = group.dump()
        module.Group.to_protobuf(dumped, message)
        assert_close(module.Group.from_protobuf(message), dumped)
def test_add_repeated(module, EXAMPLE):
    # Test add_repeated value vs n * add
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    for value in EXAMPLE['values']:
        group = module.Group.from_values(shared)
        for _ in range(DATA_COUNT):
            group.add_value(shared, value)

        group_repeated = module.Group.from_values(shared)
        group_repeated.add_repeated_value(shared, value, count=DATA_COUNT)
        assert_close(group.dump(),
                     group_repeated.dump(),
                     err_msg='n * add_value != add_repeated_value n')
Exemple #19
0
 def check_scores(mixture, counts, empty_group_count):
     sample_count = sum(counts)
     nonempty_group_count = len(counts) - empty_group_count
     expected = [
         model.score_add_value(group_size, nonempty_group_count,
                               sample_count, empty_group_count)
         for group_size in counts
     ]
     noise = numpy.random.randn(len(counts))
     actual = numpy.zeros(len(counts), dtype=numpy.float32)
     actual[:] = noise
     mixture.score_value(model, actual)
     assert_close(actual, expected)
     return actual
def test_add_repeated(module, EXAMPLE):
    # Test add_repeated value vs n * add
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    for value in EXAMPLE['values']:
        group = module.Group.from_values(shared)
        for _ in range(DATA_COUNT):
            group.add_value(shared, value)

        group_repeated = module.Group.from_values(shared)
        group_repeated.add_repeated_value(shared, value, count=DATA_COUNT)
        assert_close(
            group.dump(),
            group_repeated.dump(),
            err_msg='n * add_value != add_repeated_value n')
def test_add_merge(module, EXAMPLE):
    # Test group_add_value, group_merge
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values'][:]
    for value in values:
        shared.add_value(value)

    numpy.random.shuffle(values)
    group = module.Group.from_values(shared, values)

    for i in xrange(len(values) + 1):
        numpy.random.shuffle(values)
        group1 = module.Group.from_values(shared, values[:i])
        group2 = module.Group.from_values(shared, values[i:])
        group1.merge(shared, group2)
        assert_close(group.dump(), group1.dump())
def test_prob_from_scores():
    require_cython()
    import distributions.lp.random
    for size in range(1, 100):
        scores = numpy.random.normal(size=size).tolist()
        for _ in xrange(size):
            sample, prob1 = distributions.lp.random.sample_prob_from_scores(
                scores)
            assert 0 <= sample and sample < size
            prob2 = distributions.lp.random.prob_from_scores(
                sample,
                scores)
            assert_close(
                prob1,
                prob2,
                err_msg='sample_prob_from_scores != prob_from_scores')
 def check_score_value(value):
     expected = [group.score_value(shared, value) for group in groups]
     actual = numpy.zeros(len(mixture), dtype=numpy.float32)
     noise = numpy.random.randn(len(actual))
     actual += noise
     mixture.score_value(shared, value, actual)
     actual -= noise
     assert_close(actual, expected, err_msg='score_value {}'.format(value))
     another = [
         mixture.score_value_group(shared, i, value)
         for i in xrange(len(groups))
     ]
     assert_close(another,
                  expected,
                  err_msg='score_value_group {}'.format(value))
     return actual
Exemple #24
0
def test_add_merge(module, EXAMPLE):
    # Test group_add_value, group_merge
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values'][:]
    for value in values:
        shared.add_value(value)

    numpy.random.shuffle(values)
    group = module.Group.from_values(shared, values)

    for i in xrange(len(values) + 1):
        numpy.random.shuffle(values)
        group1 = module.Group.from_values(shared, values[:i])
        group2 = module.Group.from_values(shared, values[i:])
        group1.merge(shared, group2)
        assert_close(group.dump(), group1.dump())
Exemple #25
0
def test_relate(root, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        with loom.preql.get_server(root, debug=True) as preql:
            result_out = 'related_out.csv'
            preql.relate(preql.feature_names, result_out, sample_count=10)
            with open(result_out, 'r') as f:
                reader = csv.reader(f)
                header = reader.next()
                columns = header[1:]
                assert_equal(columns, preql.feature_names)
                zmatrix = numpy.zeros((len(columns), len(columns)))
                for i, row in enumerate(reader):
                    column = row.pop(0)
                    assert_equal(column, preql.feature_names[i])
                    for j, score in enumerate(row):
                        score = float(score)
                        zmatrix[i][j] = score
                assert_close(zmatrix, zmatrix.T)
Exemple #26
0
def test_relate(root, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        with loom.preql.get_server(root, debug=True) as preql:
            result_out = 'related_out.csv'
            preql.relate(preql.feature_names, result_out, sample_count=10)
            with open(result_out, 'r') as f:
                reader = csv.reader(f)
                header = reader.next()
                columns = header[1:]
                assert_equal(columns, preql.feature_names)
                zmatrix = numpy.zeros((len(columns), len(columns)))
                for i, row in enumerate(reader):
                    column = row.pop(0)
                    assert_equal(column, preql.feature_names[i])
                    for j, score in enumerate(row):
                        score = float(score)
                        zmatrix[i][j] = score
                assert_close(zmatrix, zmatrix.T)
def test_group_merge(Model, EXAMPLE):
    model = Model.model_load(EXAMPLE['model'])
    group1 = model.group_create()
    group2 = model.group_create()
    expected = model.group_create()
    actual = model.group_create()
    for _ in xrange(100):
        value = model.sample_value(expected)
        model.group_add_value(expected, value)
        model.group_add_value(group1, value)

        value = model.sample_value(expected)
        model.group_add_value(expected, value)
        model.group_add_value(group2, value)

        actual.load(group1.dump())
        model.group_merge(actual, group2)
        assert_close(actual.dump(), expected.dump())
def test_group_merge(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    group1 = module.Group.from_values(shared)
    group2 = module.Group.from_values(shared)
    expected = module.Group.from_values(shared)
    actual = module.Group.from_values(shared)
    for _ in xrange(100):
        value = expected.sample_value(shared)
        expected.add_value(shared, value)
        group1.add_value(shared, value)

        value = expected.sample_value(shared)
        expected.add_value(shared, value)
        group2.add_value(shared, value)

        actual.load(group1.dump())
        actual.merge(shared, group2)
        assert_close(actual.dump(), expected.dump())
Exemple #29
0
def test_group_merge(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    group1 = module.Group.from_values(shared)
    group2 = module.Group.from_values(shared)
    expected = module.Group.from_values(shared)
    actual = module.Group.from_values(shared)
    for _ in xrange(100):
        value = expected.sample_value(shared)
        expected.add_value(shared, value)
        group1.add_value(shared, value)

        value = expected.sample_value(shared)
        expected.add_value(shared, value)
        group2.add_value(shared, value)

        actual.load(group1.dump())
        actual.merge(shared, group2)
        assert_close(actual.dump(), expected.dump())
Exemple #30
0
def _test_normals(nich, niw):
    mu = np.array([30.0])
    kappa = 0.3
    psi = np.array([[2.]])
    nu = 3

    # make the NIW case
    niw_shared = niw.Shared()
    niw_shared.load({'mu': mu, 'kappa': kappa, 'psi': psi, 'nu': nu})
    niw_group = niw.Group()
    niw_group.init(niw_shared)

    # make the NIX case
    nix_shared = nich.Shared()
    nix_shared.load({
        'mu': mu[0],
        'kappa': kappa,
        'sigmasq': psi[0, 0] / nu,
        'nu': nu
    })
    nix_group = nich.Group()
    nix_group.init(nix_shared)

    data = np.array([4., 54., 3., -12., 7., 10.])
    for d in data:
        niw_group.add_value(niw_shared, np.array([d]))
        nix_group.add_value(nix_shared, d)

    # check marginals
    assert_close(niw_group.score_data(niw_shared),
                 nix_group.score_data(nix_shared))

    # remove and check
    niw_group.remove_value(niw_shared, np.array([data[1]]))
    nix_group.remove_value(nix_shared, np.array([data[1]]))

    assert_close(niw_group.score_data(niw_shared),
                 nix_group.score_data(nix_shared))

    niw_group.remove_value(niw_shared, np.array([data[3]]))
    nix_group.remove_value(nix_shared, np.array([data[3]]))

    assert_close(niw_group.score_data(niw_shared),
                 nix_group.score_data(nix_shared))

    # check posterior predictive
    values = np.array([32., -0.1])

    for value in values:
        assert_close(niw_group.score_value(niw_shared, np.array([value])),
                     nix_group.score_value(nix_shared, value))
Exemple #31
0
def _assert_copy(s1, s2, bind_fn, view, r):
    assert_equals(s1.nentities(), s2.nentities())
    assert_equals(s1.nfeatures(), s2.nfeatures())
    assert_equals(set(s1.groups()), set(s2.groups()))
    assert_equals(s1.assignments(), s2.assignments())
    for i in xrange(s1.nfeatures()):
        hp1 = s1.get_feature_hp(i)
        hp2 = s2.get_feature_hp(i)
        assert_close(hp1, hp2)
    for gid, fid in it.product(s1.groups(), range(s1.nfeatures())):
        ss1 = s1.get_suffstats(gid, fid)
        ss2 = s2.get_suffstats(gid, fid)
        assert_close(ss1, ss2)
    assert_almost_equals(s1.score_assignment(), s2.score_assignment())
    assert_almost_equals(s1.score_data(None, None, r),
                         s2.score_data(None, None, r))
    before = list(s1.assignments())
    gid = bind_fn(s1, view).remove_value(0, r)
    assert_equals(s1.assignments()[0], -1)
    assert_equals(before, s2.assignments())
    bind_fn(s1, view).add_value(gid, 0, r)  # restore s1
Exemple #32
0
def test_score_student_t_dbg_lp_equiv():
    seed_all(0)

    def random_vec(dim):
        return numpy.random.uniform(low=-3., high=3., size=dim)

    def random_cov(dim):
        Q = random_orthonormal_matrix(dim)
        return numpy.dot(Q, Q.T)

    def random_values(dim):
        return (random_vec(dim), float(dim) + 1., random_vec(dim),
                random_cov(dim))

    values = ([random_values(2)
               for _ in xrange(10)] + [random_values(3) for _ in xrange(10)])

    for x, nu, mu, cov in values:
        dbg_mv_score = dbg_score_student_t(x, nu, mu, cov)
        lp_mv_score = lp_score_student_t(x, nu, mu, cov)
        assert_close(dbg_mv_score, lp_mv_score)
def test_interface(Model, EXAMPLE):
    for typename in ['Value', 'Group']:
        assert_hasattr(Model, typename)
        assert_is_instance(getattr(Model, typename), type)

    model = Model.model_load(EXAMPLE['model'])
    values = EXAMPLE['values']
    for value in values:
        assert_is_instance(value, Model.Value)

    group1 = model.Group()
    model.group_init(group1)
    for value in values:
        model.group_add_value(group1, value)
    group2 = model.group_create(values)
    assert_close(group1.dump(), group2.dump())

    group = model.group_create(values)
    dumped = group.dump()
    model.group_init(group)
    group.load(dumped)
    assert_close(group.dump(), dumped)

    for value in values:
        model.group_remove_value(group2, value)
    assert_not_equal(group1, group2)
    model.group_merge(group2, group1)

    for value in values:
        model.score_value(group1, value)
    for _ in xrange(10):
        value = model.sample_value(group1)
        model.score_value(group1, value)
        model.sample_group(10)
    model.score_group(group1)
    model.score_group(group2)

    assert_close(model.dump(), EXAMPLE['model'])
    assert_close(model.dump(), Model.model_dump(model))
    assert_close(group1.dump(), Model.group_dump(group1))
Exemple #34
0
def _assert_structure_equals(defn, s1, s2, views, r):
    assert_equals(s1.ndomains(), s2.ndomains())
    assert_equals(s1.nrelations(), s2.nrelations())
    for did in xrange(s1.ndomains()):
        assert_equals(s1.nentities(did), s2.nentities(did))
        assert_equals(s1.ngroups(did), s2.ngroups(did))
        assert_equals(s1.assignments(did), s2.assignments(did))
        assert_equals(set(s1.groups(did)), set(s2.groups(did)))
        assert_close(s1.get_domain_hp(did), s2.get_domain_hp(did))
        assert_almost_equals(s1.score_assignment(did), s2.score_assignment(did))
    for rid in xrange(s1.nrelations()):
        assert_close(s1.get_relation_hp(rid), s2.get_relation_hp(rid))
        dids = defn.relations()[rid]
        groups = [s1.groups(did) for did in dids]
        for gids in it.product(*groups):
            ss1 = s1.get_suffstats(rid, gids)
            ss2 = s2.get_suffstats(rid, gids)
            if ss1 is None:
                assert_is_none(ss2)
            else:
                assert_close(ss1, ss2)
    assert_almost_equals(s1.score_likelihood(r), s2.score_likelihood(r))
    before = list(s1.assignments(0))
    bound = model.bind(s1, 0, views)
    gid = bound.remove_value(0, r)
    assert_equals(s1.assignments(0)[0], -1)
    assert_equals(before, s2.assignments(0))
    bound.add_value(gid, 0, r)  # restore
Exemple #35
0
def _assert_structure_equals(defn, s1, s2, views, r):
    assert_equals(s1.ndomains(), s2.ndomains())
    assert_equals(s1.nrelations(), s2.nrelations())
    for did in xrange(s1.ndomains()):
        assert_equals(s1.nentities(did), s2.nentities(did))
        assert_equals(s1.ngroups(did), s2.ngroups(did))
        assert_equals(s1.assignments(did),
                      s2.assignments(did))
        assert_equals(set(s1.groups(did)),
                      set(s2.groups(did)))
        assert_close(s1.get_domain_hp(did),
                     s2.get_domain_hp(did))
        assert_almost_equals(s1.score_assignment(did),
                             s2.score_assignment(did))
    for rid in xrange(s1.nrelations()):
        assert_close(s1.get_relation_hp(rid),
                     s2.get_relation_hp(rid))
        dids = defn.relations()[rid]
        groups = [s1.groups(did) for did in dids]
        for gids in it.product(*groups):
            ss1 = s1.get_suffstats(rid, gids)
            ss2 = s2.get_suffstats(rid, gids)
            if ss1 is None:
                assert_is_none(ss2)
            else:
                assert_close(ss1, ss2)
    assert_almost_equals(s1.score_likelihood(r),
                         s2.score_likelihood(r))
    before = list(s1.assignments(0))
    bound = model.bind(s1, 0, views)
    gid = bound.remove_value(0, r)
    assert_equals(s1.assignments(0)[0], -1)
    assert_equals(before, s2.assignments(0))
    bound.add_value(gid, 0, r)  # restore
def test_prob_from_scores():
    try:
        import distributions.lp.random
    except ImportError:
        raise SkipTest('no cython support')
    rng1 = distributions.lp.random.RNG(0)
    rng2 = distributions.lp.random.RNG(0)
    for size in range(1, 100):
        scores = numpy.random.normal(size=size).tolist()
        for _ in xrange(size):
            sample, prob1 = distributions.lp.random.sample_prob_from_scores(
                rng1,
                scores)
            assert 0 <= sample and sample < size
            prob2 = distributions.lp.random.prob_from_scores(
                rng2,
                sample,
                scores)
            assert_close(
                prob1,
                prob2,
                err_msg='sample_prob_from_scores != prob_from_scores')
Exemple #37
0
def test_export_rows(encoding, rows, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        rows_csv = os.path.abspath('rows_csv')
        rows_pbs = os.path.abspath('rows.pbs.gz')
        loom.format.export_rows(encoding_in=encoding,
                                rows_in=rows,
                                rows_csv_out=rows_csv,
                                chunk_size=51)
        assert_found(rows_csv)
        assert_found(os.path.join(rows_csv, 'rows.0.csv.gz'))
        loom.format.import_rows(encoding_in=encoding,
                                rows_csv_in=rows_csv,
                                rows_out=rows_pbs)
        assert_found(rows_pbs)
        expected = load_rows(rows)
        actual = load_rows(rows_pbs)
        assert_equal(len(actual), len(expected))
        actual.sort(key=lambda row: row.id)
        expected.sort(key=lambda row: row.id)
        expected_data = [row.diff for row in expected]
        actual_data = [row.diff for row in actual]
        assert_close(actual_data, expected_data)
Exemple #38
0
def _assert_copy(s1, s2, bind_fn, view, r):
    assert_equals(s1.nentities(), s2.nentities())
    assert_equals(s1.nfeatures(), s2.nfeatures())
    assert_equals(set(s1.groups()), set(s2.groups()))
    assert_equals(s1.assignments(), s2.assignments())
    for i in xrange(s1.nfeatures()):
        hp1 = s1.get_feature_hp(i)
        hp2 = s2.get_feature_hp(i)
        assert_close(hp1, hp2)
    for gid, fid in it.product(s1.groups(), range(s1.nfeatures())):
        ss1 = s1.get_suffstats(gid, fid)
        ss2 = s2.get_suffstats(gid, fid)
        assert_close(ss1, ss2)
    assert_almost_equals(s1.score_assignment(),
                         s2.score_assignment())
    assert_almost_equals(s1.score_data(None, None, r),
                         s2.score_data(None, None, r))
    before = list(s1.assignments())
    gid = bind_fn(s1, view).remove_value(0, r)
    assert_equals(s1.assignments()[0], -1)
    assert_equals(before, s2.assignments())
    bind_fn(s1, view).add_value(gid, 0, r)  # restore s1
def test_group_allows_debt(module, EXAMPLE):
    # Test that group.add_value can safely go into data debt
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    values = []
    group1 = module.Group.from_values(shared, values)
    for _ in range(DATA_COUNT):
        value = group1.sample_value(shared)
        values.append(value)
        group1.add_value(shared, value)

    group2 = module.Group.from_values(shared)
    pos_values = [(v, +1) for v in values]
    neg_values = [(v, -1) for v in values]
    signed_values = pos_values * 3 + neg_values * 2
    numpy.random.shuffle(signed_values)
    for value, sign in signed_values:
        if sign > 0:
            group2.add_value(shared, value)
        else:
            group2.remove_value(shared, value)

    assert_close(group1.dump(), group2.dump())
def test_group_allows_debt(module, EXAMPLE):
    # Test that group.add_value can safely go into data debt
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    values = []
    group1 = module.Group.from_values(shared, values)
    for _ in range(DATA_COUNT):
        value = group1.sample_value(shared)
        values.append(value)
        group1.add_value(shared, value)

    group2 = module.Group.from_values(shared)
    pos_values = [(v, +1) for v in values]
    neg_values = [(v, -1) for v in values]
    signed_values = pos_values * 3 + neg_values * 2
    numpy.random.shuffle(signed_values)
    for value, sign in signed_values:
        if sign > 0:
            group2.add_value(shared, value)
        else:
            group2.remove_value(shared, value)

    assert_close(group1.dump(), group2.dump())
Exemple #41
0
def test_export_rows(encoding, rows, **unused):
    with tempdir(cleanup_on_error=CLEANUP_ON_ERROR):
        rows_csv = os.path.abspath('rows_csv')
        rows_pbs = os.path.abspath('rows.pbs.gz')
        loom.format.export_rows(
            encoding_in=encoding,
            rows_in=rows,
            rows_csv_out=rows_csv,
            chunk_size=51)
        assert_found(rows_csv)
        assert_found(os.path.join(rows_csv, 'rows.0.csv.gz'))
        loom.format.import_rows(
            encoding_in=encoding,
            rows_csv_in=rows_csv,
            rows_out=rows_pbs)
        assert_found(rows_pbs)
        expected = load_rows(rows)
        actual = load_rows(rows_pbs)
        assert_equal(len(actual), len(expected))
        actual.sort(key=lambda row: row.id)
        expected.sort(key=lambda row: row.id)
        expected_data = [row.diff for row in expected]
        actual_data = [row.diff for row in actual]
        assert_close(actual_data, expected_data)
Exemple #42
0
def test_add_remove(module, EXAMPLE):
    # Test group_add_value, group_remove_value, score_data, score_value

    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    print 'DEBUG', shared.dump()

    values = []
    group = module.Group.from_values(shared)
    score = 0.0
    assert_close(group.score_data(shared), score, err_msg='p(empty) != 1')

    for _ in range(DATA_COUNT):
        value = group.sample_value(shared)
        values.append(value)
        score += group.score_value(shared, value)
        group.add_value(shared, value)

    group_all = module.Group.from_dict(group.dump())
    assert_close(
        score,
        group.score_data(shared),
        err_msg='p(x1,...,xn) != p(x1) p(x2|x1) p(xn|...)')

    numpy.random.shuffle(values)

    for value in values:
        group.remove_value(shared, value)

    group_empty = module.Group.from_values(shared)
    assert_close(
        group.dump(),
        group_empty.dump(),
        err_msg='group + values - values != group')

    numpy.random.shuffle(values)
    for value in values:
        group.add_value(shared, value)
    assert_close(
        group.dump(),
        group_all.dump(),
        err_msg='group - values + values != group')
def test_add_remove(Model, EXAMPLE):
    # Test group_add_value, group_remove_value, score_group, score_value

    model = Model.model_load(EXAMPLE['model'])
    #model.realize()
    #values = model['values'][:]

    values = []
    group = model.group_create()
    score = 0.0
    assert_close(model.score_group(group), score, err_msg='p(empty) != 1')

    for _ in range(DATA_COUNT):
        value = model.sample_value(group)
        values.append(value)
        score += model.score_value(group, value)
        model.group_add_value(group, value)

    group_all = model.group_load(model.group_dump(group))
    assert_close(
        score,
        model.score_group(group),
        err_msg='p(x1,...,xn) != p(x1) p(x2|x1) p(xn|...)')

    random.shuffle(values)

    for value in values:
        model.group_remove_value(group, value)

    group_empty = model.group_create()
    assert_close(
        group.dump(),
        group_empty.dump(),
        err_msg='group + values - values != group')

    random.shuffle(values)
    for value in values:
        model.group_add_value(group, value)
    assert_close(
        group.dump(),
        group_all.dump(),
        err_msg='group - values + values != group')
def test_add_remove(module, EXAMPLE):
    # Test group_add_value, group_remove_value, score_data, score_value

    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()

    values = []
    group = module.Group.from_values(shared)
    score = 0.0
    assert_close(group.score_data(shared), score, err_msg='p(empty) != 1')

    for _ in range(DATA_COUNT):
        value = group.sample_value(shared)
        values.append(value)
        score += group.score_value(shared, value)
        group.add_value(shared, value)

    group_all = module.Group.from_dict(group.dump())
    assert_close(score,
                 group.score_data(shared),
                 err_msg='p(x1,...,xn) != p(x1) p(x2|x1) p(xn|...)')

    numpy.random.shuffle(values)

    for value in values:
        group.remove_value(shared, value)

    group_empty = module.Group.from_values(shared)
    assert_close(group.dump(),
                 group_empty.dump(),
                 err_msg='group + values - values != group')

    numpy.random.shuffle(values)
    for value in values:
        group.add_value(shared, value)
    assert_close(group.dump(),
                 group_all.dump(),
                 err_msg='group - values + values != group')
Exemple #45
0
def test_score_student_t_scalar_equiv():
    values = (
        (1.2, 5., -0.2, 0.7),
        (-3., 3., 1.2, 1.3),
    )
    for x, nu, mu, sigmasq in values:
        mv_args = [
            numpy.array([x]), nu,
            numpy.array([mu]),
            numpy.array([[sigmasq]])
        ]

        scalar_score = scalar_score_student_t(x, nu, mu, sigmasq)
        dbg_mv_score = dbg_score_student_t(*mv_args)
        lp_mv_score = lp_score_student_t(*mv_args)

        assert_close(scalar_score, dbg_mv_score)
        assert_close(scalar_score, lp_mv_score)
        assert_close(dbg_mv_score, lp_mv_score)
def test_score_student_t_scalar_equiv():
    values = (
        (1.2, 5., -0.2, 0.7),
        (-3., 3., 1.2, 1.3),
    )
    for x, nu, mu, sigmasq in values:
        mv_args = [
            numpy.array([x]),
            nu,
            numpy.array([mu]),
            numpy.array([[sigmasq]])]

        scalar_score = scalar_score_student_t(x, nu, mu, sigmasq)
        dbg_mv_score = dbg_score_student_t(*mv_args)
        lp_mv_score = lp_score_student_t(*mv_args)

        assert_close(scalar_score, dbg_mv_score)
        assert_close(scalar_score, lp_mv_score)
        assert_close(dbg_mv_score, lp_mv_score)
Exemple #47
0
def test_shared(module, EXAMPLE):
    assert_hasattr(module, 'Shared')
    assert_is_instance(module.Shared, type)

    shared1 = module.Shared.from_dict(EXAMPLE['shared'])
    shared2 = module.Shared.from_dict(EXAMPLE['shared'])
    assert_close(shared1.dump(), EXAMPLE['shared'])

    values = EXAMPLE['values']
    seed_all(0)
    for value in values:
        shared1.add_value(value)
    seed_all(0)
    for value in values:
        shared2.add_value(value)
    assert_close(shared1.dump(), shared2.dump())

    for value in values:
        shared1.remove_value(value)
    assert_close(shared1.dump(), EXAMPLE['shared'])
def test_shared(module, EXAMPLE):
    assert_hasattr(module, 'Shared')
    assert_is_instance(module.Shared, type)

    shared1 = module.Shared.from_dict(EXAMPLE['shared'])
    shared2 = module.Shared.from_dict(EXAMPLE['shared'])
    assert_close(shared1.dump(), EXAMPLE['shared'])

    values = EXAMPLE['values']
    seed_all(0)
    for value in values:
        shared1.add_value(value)
    seed_all(0)
    for value in values:
        shared2.add_value(value)
    assert_close(shared1.dump(), shared2.dump())

    for value in values:
        shared1.remove_value(value)
    assert_close(shared1.dump(), EXAMPLE['shared'])
Exemple #49
0
 def check_score_data():
     expected = sum(group.score_data(shared) for group in groups)
     actual = mixture.score_data(shared)
     assert_close(actual, expected, err_msg='score_data')
def test_load_and_dump(Model, EXAMPLE, *unused):
    model = Model()
    model.load(EXAMPLE)
    expected = EXAMPLE
    actual = model.dump()
    assert_close(expected, actual)
Exemple #51
0
def test_load_and_dump(Model, EXAMPLE, *unused):
    model = Model()
    model.load(EXAMPLE)
    expected = EXAMPLE
    actual = model.dump()
    assert_close(expected, actual)
 def check_score_data():
     expected = sum(group.score_data(shared) for group in groups)
     actual = mixture.score_data(shared)
     assert_close(actual, expected, err_msg='score_data')