Ejemplo n.º 1
0
def test_score_add_value_matches_score_counts(Model, EXAMPLE, sample_count):
    for sample_size in iter_valid_sizes(EXAMPLE, min_size=2, max_size=10):
        model = Model()
        model.load(EXAMPLE)

        samples = set(
            canonicalize(model.sample_assignments(sample_size - 1))
            for _ in xrange(sample_count))

        for sample in samples:
            nonempty_group_count = len(sample)
            counts = map(len, sample)
            actual = numpy.zeros(len(counts) + 1)
            expected = numpy.zeros(len(counts) + 1)

            # add to existing group
            for i, group in enumerate(sample):
                group_size = len(sample[i])
                expected[i] = model.score_counts(add_to_counts(counts, i))
                actual[i] = model.score_add_value(group_size,
                                                  nonempty_group_count,
                                                  sample_size - 1)

            # add to new group
            i = len(counts)
            group_size = 0
            expected[i] = model.score_counts(counts + [1])
            actual[i] = model.score_add_value(group_size, nonempty_group_count,
                                              sample_size - 1)

            actual = scores_to_probs(actual)
            expected = scores_to_probs(expected)
            print actual, expected
            assert_close(actual, expected, tol=0.05)
def test_score_add_value_matches_score_counts(Model, EXAMPLE, sample_count):
    for sample_size in iter_valid_sizes(EXAMPLE, min_size=2, max_size=10):
        model = Model()
        model.load(EXAMPLE)

        samples = set(canonicalize(model.sample_assignments(sample_size - 1)) for _ in xrange(sample_count))

        for sample in samples:
            nonempty_group_count = len(sample)
            counts = map(len, sample)
            actual = numpy.zeros(len(counts) + 1)
            expected = numpy.zeros(len(counts) + 1)

            # add to existing group
            for i, group in enumerate(sample):
                group_size = len(sample[i])
                expected[i] = model.score_counts(add_to_counts(counts, i))
                actual[i] = model.score_add_value(group_size, nonempty_group_count, sample_size - 1)

            # add to new group
            i = len(counts)
            group_size = 0
            expected[i] = model.score_counts(counts + [1])
            actual[i] = model.score_add_value(group_size, nonempty_group_count, sample_size - 1)

            actual = scores_to_probs(actual)
            expected = scores_to_probs(expected)
            print actual, expected
            assert_close(actual, expected, tol=0.05)
def test_mixture_score_matches_score_add_value(Model, EXAMPLE, *unused):
    sample_count = 200
    model = Model()
    model.load(EXAMPLE)

    if Model.__name__ == 'LowEntropy' and sample_count > model.dataset_size:
        raise SkipTest('skipping trivial example')

    assignment_vector = model.sample_assignments(sample_count)
    assignments = dict(enumerate(assignment_vector))
    nonempty_counts = count_assignments(assignments)
    nonempty_group_count = len(nonempty_counts)
    assert_greater(nonempty_group_count, 1, "test is inaccurate")

    def check_counts(mixture, counts, empty_group_count):
        # print 'counts =', counts
        empty_groupids = frozenset(mixture.empty_groupids)
        assert_equal(len(empty_groupids), empty_group_count)
        for groupid in empty_groupids:
            assert_equal(counts[groupid], 0)

    def check_scores(mixture, counts, empty_group_count):
        sample_count = sum(counts)
        nonempty_group_count = len(counts) - empty_group_count
        expected = [
            model.score_add_value(
                group_size,
                nonempty_group_count,
                sample_count,
                empty_group_count)
            for group_size in counts
        ]
        noise = numpy.random.randn(len(counts))
        actual = numpy.zeros(len(counts), dtype=numpy.float32)
        actual[:] = noise
        mixture.score_value(model, actual)
        assert_close(actual, expected)
        return actual

    for empty_group_count in [1, 10]:
        print 'empty_group_count =', empty_group_count
        counts = nonempty_counts + [0] * empty_group_count
        numpy.random.shuffle(counts)
        mixture = Model.Mixture()
        id_tracker = MixtureIdTracker()

        print 'init'
        mixture.init(model, counts)
        id_tracker.init(len(counts))
        check_counts(mixture, counts, empty_group_count)
        check_scores(mixture, counts, empty_group_count)

        print 'adding'
        groupids = []
        for _ in xrange(sample_count):
            check_counts(mixture, counts, empty_group_count)
            scores = check_scores(mixture, counts, empty_group_count)
            probs = scores_to_probs(scores)
            groupid = sample_discrete(probs)
            expected_group_added = (counts[groupid] == 0)
            counts[groupid] += 1
            actual_group_added = mixture.add_value(model, groupid)
            assert_equal(actual_group_added, expected_group_added)
            groupids.append(groupid)
            if actual_group_added:
                id_tracker.add_group()
                counts.append(0)

        check_counts(mixture, counts, empty_group_count)
        check_scores(mixture, counts, empty_group_count)

        print 'removing'
        for global_groupid in groupids:
            groupid = id_tracker.global_to_packed(global_groupid)
            counts[groupid] -= 1
            expected_group_removed = (counts[groupid] == 0)
            actual_group_removed = mixture.remove_value(model, groupid)
            assert_equal(actual_group_removed, expected_group_removed)
            if expected_group_removed:
                id_tracker.remove_group(groupid)
                back = counts.pop()
                if groupid < len(counts):
                    counts[groupid] = back
            check_counts(mixture, counts, empty_group_count)
            check_scores(mixture, counts, empty_group_count)
Ejemplo n.º 4
0
def test_mixture_score_matches_score_add_value(Model, EXAMPLE, *unused):
    sample_count = 200
    model = Model()
    model.load(EXAMPLE)

    if Model.__name__ == 'LowEntropy' and sample_count > model.dataset_size:
        raise SkipTest('skipping trivial example')

    assignment_vector = model.sample_assignments(sample_count)
    assignments = dict(enumerate(assignment_vector))
    nonempty_counts = count_assignments(assignments)
    nonempty_group_count = len(nonempty_counts)
    assert_greater(nonempty_group_count, 1, "test is inaccurate")

    def check_counts(mixture, counts, empty_group_count):
        # print 'counts =', counts
        empty_groupids = frozenset(mixture.empty_groupids)
        assert_equal(len(empty_groupids), empty_group_count)
        for groupid in empty_groupids:
            assert_equal(counts[groupid], 0)

    def check_scores(mixture, counts, empty_group_count):
        sample_count = sum(counts)
        nonempty_group_count = len(counts) - empty_group_count
        expected = [
            model.score_add_value(group_size, nonempty_group_count,
                                  sample_count, empty_group_count)
            for group_size in counts
        ]
        noise = numpy.random.randn(len(counts))
        actual = numpy.zeros(len(counts), dtype=numpy.float32)
        actual[:] = noise
        mixture.score_value(model, actual)
        assert_close(actual, expected)
        return actual

    for empty_group_count in [1, 10]:
        print 'empty_group_count =', empty_group_count
        counts = nonempty_counts + [0] * empty_group_count
        numpy.random.shuffle(counts)
        mixture = Model.Mixture()
        id_tracker = MixtureIdTracker()

        print 'init'
        mixture.init(model, counts)
        id_tracker.init(len(counts))
        check_counts(mixture, counts, empty_group_count)
        check_scores(mixture, counts, empty_group_count)

        print 'adding'
        groupids = []
        for _ in xrange(sample_count):
            check_counts(mixture, counts, empty_group_count)
            scores = check_scores(mixture, counts, empty_group_count)
            probs = scores_to_probs(scores)
            groupid = sample_discrete(probs)
            expected_group_added = (counts[groupid] == 0)
            counts[groupid] += 1
            actual_group_added = mixture.add_value(model, groupid)
            assert_equal(actual_group_added, expected_group_added)
            groupids.append(groupid)
            if actual_group_added:
                id_tracker.add_group()
                counts.append(0)

        check_counts(mixture, counts, empty_group_count)
        check_scores(mixture, counts, empty_group_count)

        print 'removing'
        for global_groupid in groupids:
            groupid = id_tracker.global_to_packed(global_groupid)
            counts[groupid] -= 1
            expected_group_removed = (counts[groupid] == 0)
            actual_group_removed = mixture.remove_value(model, groupid)
            assert_equal(actual_group_removed, expected_group_removed)
            if expected_group_removed:
                id_tracker.remove_group(groupid)
                back = counts.pop()
                if groupid < len(counts):
                    counts[groupid] = back
            check_counts(mixture, counts, empty_group_count)
            check_scores(mixture, counts, empty_group_count)