def test_score_counts_is_normalized(Model, EXAMPLE, sample_count):

    for sample_size in iter_valid_sizes(EXAMPLE, max_size=10):
        model = Model()
        model.load(EXAMPLE)

        if Model.__name__ == 'LowEntropy' and sample_size < model.dataset_size:
            print 'WARNING LowEntropy.score_counts normalization is imprecise'
            print '  when sample_size < dataset_size'
            tol = 0.5
        else:
            tol = 0.01

        probs_dict = {}
        for _ in xrange(sample_count):
            value = model.sample_assignments(sample_size)
            sample = canonicalize(value)
            if sample not in probs_dict:
                assignments = dict(enumerate(value))
                counts = count_assignments(assignments)
                prob = math.exp(model.score_counts(counts))
                probs_dict[sample] = prob

        total = sum(probs_dict.values())
        assert_less(abs(total - 1), tol, 'not normalized: {}'.format(total))
def test_sample_matches_score_counts(Model, EXAMPLE, sample_count):
    for size in iter_valid_sizes(EXAMPLE, max_size=10):
        model = Model()
        model.load(EXAMPLE)

        samples = []
        probs_dict = {}
        for _ in xrange(sample_count):
            value = model.sample_assignments(size)
            sample = canonicalize(value)
            samples.append(sample)
            if sample not in probs_dict:
                assignments = dict(enumerate(value))
                counts = count_assignments(assignments)
                prob = math.exp(model.score_counts(counts))
                probs_dict[sample] = prob

        # renormalize here; test normalization separately
        total = sum(probs_dict.values())
        for key in probs_dict:
            probs_dict[key] /= total

        gof = discrete_goodness_of_fit(samples, probs_dict, plot=True)
        print '{} gof = {:0.3g}'.format(Model.__name__, gof)
        assert_greater(gof, MIN_GOODNESS_OF_FIT)
Esempio n. 3
0
def test_score_counts_is_normalized(Model, EXAMPLE, sample_count):

    for sample_size in iter_valid_sizes(EXAMPLE, max_size=10):
        model = Model()
        model.load(EXAMPLE)

        if Model.__name__ == 'LowEntropy' and sample_size < model.dataset_size:
            print 'WARNING LowEntropy.score_counts normalization is imprecise'
            print '  when sample_size < dataset_size'
            tol = 0.5
        else:
            tol = 0.01

        probs_dict = {}
        for _ in xrange(sample_count):
            value = model.sample_assignments(sample_size)
            sample = canonicalize(value)
            if sample not in probs_dict:
                assignments = dict(enumerate(value))
                counts = count_assignments(assignments)
                prob = math.exp(model.score_counts(counts))
                probs_dict[sample] = prob

        total = sum(probs_dict.values())
        assert_less(abs(total - 1), tol, 'not normalized: {}'.format(total))
Esempio n. 4
0
def test_sample_matches_score_counts(Model, EXAMPLE, sample_count):
    for size in iter_valid_sizes(EXAMPLE, max_size=10):
        model = Model()
        model.load(EXAMPLE)

        samples = []
        probs_dict = {}
        for _ in xrange(sample_count):
            value = model.sample_assignments(size)
            sample = canonicalize(value)
            samples.append(sample)
            if sample not in probs_dict:
                assignments = dict(enumerate(value))
                counts = count_assignments(assignments)
                prob = math.exp(model.score_counts(counts))
                probs_dict[sample] = prob

        # renormalize here; test normalization separately
        total = sum(probs_dict.values())
        for key in probs_dict:
            probs_dict[key] /= total

        gof = discrete_goodness_of_fit(samples, probs_dict, plot=True)
        print '{} gof = {:0.3g}'.format(Model.__name__, gof)
        assert_greater(gof, MIN_GOODNESS_OF_FIT)
def _test_models(Model, size):
        model = Model()

        if Model.__name__ == 'LowEntropy':
            raise SkipTest('FIXME LowEntropy.score_counts is not normalized')

        for i, EXAMPLE in enumerate(Model.EXAMPLES):
            print 'Example {}'.format(i)
            model.load(EXAMPLE)
            samples = []
            probs_dict = {}
            for _ in xrange(SAMPLE_COUNT):
                value = model.sample_assignments(size)
                assignments = dict(enumerate(value))
                counts = count_assignments(assignments)
                prob = math.exp(model.score_counts(counts))
                sample = canonicalize(value)
                samples.append(sample)
                probs_dict[sample] = prob

            total = sum(probs_dict.values())
            assert_less(
                abs(total - 1),
                1e-2,
                'not normalized: {}'.format(total))

            gof = discrete_goodness_of_fit(samples, probs_dict, plot=True)
            print '{} gof = {:0.3g}'.format(Model.__name__, gof)
            assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_mixture_score_matches_score_add_value(Model, EXAMPLE, *unused):
    sample_count = 200
    model = Model()
    model.load(EXAMPLE)

    if Model.__name__ == 'LowEntropy' and sample_count > model.dataset_size:
        raise SkipTest('skipping trivial example')

    assignment_vector = model.sample_assignments(sample_count)
    assignments = dict(enumerate(assignment_vector))
    nonempty_counts = count_assignments(assignments)
    nonempty_group_count = len(nonempty_counts)
    assert_greater(nonempty_group_count, 1, "test is inaccurate")

    def check_counts(mixture, counts, empty_group_count):
        # print 'counts =', counts
        empty_groupids = frozenset(mixture.empty_groupids)
        assert_equal(len(empty_groupids), empty_group_count)
        for groupid in empty_groupids:
            assert_equal(counts[groupid], 0)

    def check_scores(mixture, counts, empty_group_count):
        sample_count = sum(counts)
        nonempty_group_count = len(counts) - empty_group_count
        expected = [
            model.score_add_value(
                group_size,
                nonempty_group_count,
                sample_count,
                empty_group_count)
            for group_size in counts
        ]
        noise = numpy.random.randn(len(counts))
        actual = numpy.zeros(len(counts), dtype=numpy.float32)
        actual[:] = noise
        mixture.score_value(model, actual)
        assert_close(actual, expected)
        return actual

    for empty_group_count in [1, 10]:
        print 'empty_group_count =', empty_group_count
        counts = nonempty_counts + [0] * empty_group_count
        numpy.random.shuffle(counts)
        mixture = Model.Mixture()
        id_tracker = MixtureIdTracker()

        print 'init'
        mixture.init(model, counts)
        id_tracker.init(len(counts))
        check_counts(mixture, counts, empty_group_count)
        check_scores(mixture, counts, empty_group_count)

        print 'adding'
        groupids = []
        for _ in xrange(sample_count):
            check_counts(mixture, counts, empty_group_count)
            scores = check_scores(mixture, counts, empty_group_count)
            probs = scores_to_probs(scores)
            groupid = sample_discrete(probs)
            expected_group_added = (counts[groupid] == 0)
            counts[groupid] += 1
            actual_group_added = mixture.add_value(model, groupid)
            assert_equal(actual_group_added, expected_group_added)
            groupids.append(groupid)
            if actual_group_added:
                id_tracker.add_group()
                counts.append(0)

        check_counts(mixture, counts, empty_group_count)
        check_scores(mixture, counts, empty_group_count)

        print 'removing'
        for global_groupid in groupids:
            groupid = id_tracker.global_to_packed(global_groupid)
            counts[groupid] -= 1
            expected_group_removed = (counts[groupid] == 0)
            actual_group_removed = mixture.remove_value(model, groupid)
            assert_equal(actual_group_removed, expected_group_removed)
            if expected_group_removed:
                id_tracker.remove_group(groupid)
                back = counts.pop()
                if groupid < len(counts):
                    counts[groupid] = back
            check_counts(mixture, counts, empty_group_count)
            check_scores(mixture, counts, empty_group_count)
Esempio n. 7
0
def test_mixture_score_matches_score_add_value(Model, EXAMPLE, *unused):
    sample_count = 200
    model = Model()
    model.load(EXAMPLE)

    if Model.__name__ == 'LowEntropy' and sample_count > model.dataset_size:
        raise SkipTest('skipping trivial example')

    assignment_vector = model.sample_assignments(sample_count)
    assignments = dict(enumerate(assignment_vector))
    nonempty_counts = count_assignments(assignments)
    nonempty_group_count = len(nonempty_counts)
    assert_greater(nonempty_group_count, 1, "test is inaccurate")

    def check_counts(mixture, counts, empty_group_count):
        # print 'counts =', counts
        empty_groupids = frozenset(mixture.empty_groupids)
        assert_equal(len(empty_groupids), empty_group_count)
        for groupid in empty_groupids:
            assert_equal(counts[groupid], 0)

    def check_scores(mixture, counts, empty_group_count):
        sample_count = sum(counts)
        nonempty_group_count = len(counts) - empty_group_count
        expected = [
            model.score_add_value(group_size, nonempty_group_count,
                                  sample_count, empty_group_count)
            for group_size in counts
        ]
        noise = numpy.random.randn(len(counts))
        actual = numpy.zeros(len(counts), dtype=numpy.float32)
        actual[:] = noise
        mixture.score_value(model, actual)
        assert_close(actual, expected)
        return actual

    for empty_group_count in [1, 10]:
        print 'empty_group_count =', empty_group_count
        counts = nonempty_counts + [0] * empty_group_count
        numpy.random.shuffle(counts)
        mixture = Model.Mixture()
        id_tracker = MixtureIdTracker()

        print 'init'
        mixture.init(model, counts)
        id_tracker.init(len(counts))
        check_counts(mixture, counts, empty_group_count)
        check_scores(mixture, counts, empty_group_count)

        print 'adding'
        groupids = []
        for _ in xrange(sample_count):
            check_counts(mixture, counts, empty_group_count)
            scores = check_scores(mixture, counts, empty_group_count)
            probs = scores_to_probs(scores)
            groupid = sample_discrete(probs)
            expected_group_added = (counts[groupid] == 0)
            counts[groupid] += 1
            actual_group_added = mixture.add_value(model, groupid)
            assert_equal(actual_group_added, expected_group_added)
            groupids.append(groupid)
            if actual_group_added:
                id_tracker.add_group()
                counts.append(0)

        check_counts(mixture, counts, empty_group_count)
        check_scores(mixture, counts, empty_group_count)

        print 'removing'
        for global_groupid in groupids:
            groupid = id_tracker.global_to_packed(global_groupid)
            counts[groupid] -= 1
            expected_group_removed = (counts[groupid] == 0)
            actual_group_removed = mixture.remove_value(model, groupid)
            assert_equal(actual_group_removed, expected_group_removed)
            if expected_group_removed:
                id_tracker.remove_group(groupid)
                back = counts.pop()
                if groupid < len(counts):
                    counts[groupid] = back
            check_counts(mixture, counts, empty_group_count)
            check_scores(mixture, counts, empty_group_count)