def test_score_counts_is_normalized(Model, EXAMPLE, sample_count): for sample_size in iter_valid_sizes(EXAMPLE, max_size=10): model = Model() model.load(EXAMPLE) if Model.__name__ == 'LowEntropy' and sample_size < model.dataset_size: print 'WARNING LowEntropy.score_counts normalization is imprecise' print ' when sample_size < dataset_size' tol = 0.5 else: tol = 0.01 probs_dict = {} for _ in xrange(sample_count): value = model.sample_assignments(sample_size) sample = canonicalize(value) if sample not in probs_dict: assignments = dict(enumerate(value)) counts = count_assignments(assignments) prob = math.exp(model.score_counts(counts)) probs_dict[sample] = prob total = sum(probs_dict.values()) assert_less(abs(total - 1), tol, 'not normalized: {}'.format(total))
def test_sample_matches_score_counts(Model, EXAMPLE, sample_count): for size in iter_valid_sizes(EXAMPLE, max_size=10): model = Model() model.load(EXAMPLE) samples = [] probs_dict = {} for _ in xrange(sample_count): value = model.sample_assignments(size) sample = canonicalize(value) samples.append(sample) if sample not in probs_dict: assignments = dict(enumerate(value)) counts = count_assignments(assignments) prob = math.exp(model.score_counts(counts)) probs_dict[sample] = prob # renormalize here; test normalization separately total = sum(probs_dict.values()) for key in probs_dict: probs_dict[key] /= total gof = discrete_goodness_of_fit(samples, probs_dict, plot=True) print '{} gof = {:0.3g}'.format(Model.__name__, gof) assert_greater(gof, MIN_GOODNESS_OF_FIT)
def _test_models(Model, size): model = Model() if Model.__name__ == 'LowEntropy': raise SkipTest('FIXME LowEntropy.score_counts is not normalized') for i, EXAMPLE in enumerate(Model.EXAMPLES): print 'Example {}'.format(i) model.load(EXAMPLE) samples = [] probs_dict = {} for _ in xrange(SAMPLE_COUNT): value = model.sample_assignments(size) assignments = dict(enumerate(value)) counts = count_assignments(assignments) prob = math.exp(model.score_counts(counts)) sample = canonicalize(value) samples.append(sample) probs_dict[sample] = prob total = sum(probs_dict.values()) assert_less( abs(total - 1), 1e-2, 'not normalized: {}'.format(total)) gof = discrete_goodness_of_fit(samples, probs_dict, plot=True) print '{} gof = {:0.3g}'.format(Model.__name__, gof) assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_mixture_score_matches_score_add_value(Model, EXAMPLE, *unused): sample_count = 200 model = Model() model.load(EXAMPLE) if Model.__name__ == 'LowEntropy' and sample_count > model.dataset_size: raise SkipTest('skipping trivial example') assignment_vector = model.sample_assignments(sample_count) assignments = dict(enumerate(assignment_vector)) nonempty_counts = count_assignments(assignments) nonempty_group_count = len(nonempty_counts) assert_greater(nonempty_group_count, 1, "test is inaccurate") def check_counts(mixture, counts, empty_group_count): # print 'counts =', counts empty_groupids = frozenset(mixture.empty_groupids) assert_equal(len(empty_groupids), empty_group_count) for groupid in empty_groupids: assert_equal(counts[groupid], 0) def check_scores(mixture, counts, empty_group_count): sample_count = sum(counts) nonempty_group_count = len(counts) - empty_group_count expected = [ model.score_add_value( group_size, nonempty_group_count, sample_count, empty_group_count) for group_size in counts ] noise = numpy.random.randn(len(counts)) actual = numpy.zeros(len(counts), dtype=numpy.float32) actual[:] = noise mixture.score_value(model, actual) assert_close(actual, expected) return actual for empty_group_count in [1, 10]: print 'empty_group_count =', empty_group_count counts = nonempty_counts + [0] * empty_group_count numpy.random.shuffle(counts) mixture = Model.Mixture() id_tracker = MixtureIdTracker() print 'init' mixture.init(model, counts) id_tracker.init(len(counts)) check_counts(mixture, counts, empty_group_count) check_scores(mixture, counts, empty_group_count) print 'adding' groupids = [] for _ in xrange(sample_count): check_counts(mixture, counts, empty_group_count) scores = check_scores(mixture, counts, empty_group_count) probs = scores_to_probs(scores) groupid = sample_discrete(probs) expected_group_added = (counts[groupid] == 0) counts[groupid] += 1 actual_group_added = mixture.add_value(model, groupid) assert_equal(actual_group_added, expected_group_added) groupids.append(groupid) if actual_group_added: id_tracker.add_group() counts.append(0) check_counts(mixture, counts, empty_group_count) check_scores(mixture, counts, empty_group_count) print 'removing' for global_groupid in groupids: groupid = id_tracker.global_to_packed(global_groupid) counts[groupid] -= 1 expected_group_removed = (counts[groupid] == 0) actual_group_removed = mixture.remove_value(model, groupid) assert_equal(actual_group_removed, expected_group_removed) if expected_group_removed: id_tracker.remove_group(groupid) back = counts.pop() if groupid < len(counts): counts[groupid] = back check_counts(mixture, counts, empty_group_count) check_scores(mixture, counts, empty_group_count)
def test_mixture_score_matches_score_add_value(Model, EXAMPLE, *unused): sample_count = 200 model = Model() model.load(EXAMPLE) if Model.__name__ == 'LowEntropy' and sample_count > model.dataset_size: raise SkipTest('skipping trivial example') assignment_vector = model.sample_assignments(sample_count) assignments = dict(enumerate(assignment_vector)) nonempty_counts = count_assignments(assignments) nonempty_group_count = len(nonempty_counts) assert_greater(nonempty_group_count, 1, "test is inaccurate") def check_counts(mixture, counts, empty_group_count): # print 'counts =', counts empty_groupids = frozenset(mixture.empty_groupids) assert_equal(len(empty_groupids), empty_group_count) for groupid in empty_groupids: assert_equal(counts[groupid], 0) def check_scores(mixture, counts, empty_group_count): sample_count = sum(counts) nonempty_group_count = len(counts) - empty_group_count expected = [ model.score_add_value(group_size, nonempty_group_count, sample_count, empty_group_count) for group_size in counts ] noise = numpy.random.randn(len(counts)) actual = numpy.zeros(len(counts), dtype=numpy.float32) actual[:] = noise mixture.score_value(model, actual) assert_close(actual, expected) return actual for empty_group_count in [1, 10]: print 'empty_group_count =', empty_group_count counts = nonempty_counts + [0] * empty_group_count numpy.random.shuffle(counts) mixture = Model.Mixture() id_tracker = MixtureIdTracker() print 'init' mixture.init(model, counts) id_tracker.init(len(counts)) check_counts(mixture, counts, empty_group_count) check_scores(mixture, counts, empty_group_count) print 'adding' groupids = [] for _ in xrange(sample_count): check_counts(mixture, counts, empty_group_count) scores = check_scores(mixture, counts, empty_group_count) probs = scores_to_probs(scores) groupid = sample_discrete(probs) expected_group_added = (counts[groupid] == 0) counts[groupid] += 1 actual_group_added = mixture.add_value(model, groupid) assert_equal(actual_group_added, expected_group_added) groupids.append(groupid) if actual_group_added: id_tracker.add_group() counts.append(0) check_counts(mixture, counts, empty_group_count) check_scores(mixture, counts, empty_group_count) print 'removing' for global_groupid in groupids: groupid = id_tracker.global_to_packed(global_groupid) counts[groupid] -= 1 expected_group_removed = (counts[groupid] == 0) actual_group_removed = mixture.remove_value(model, groupid) assert_equal(actual_group_removed, expected_group_removed) if expected_group_removed: id_tracker.remove_group(groupid) back = counts.pop() if groupid < len(counts): counts[groupid] = back check_counts(mixture, counts, empty_group_count) check_scores(mixture, counts, empty_group_count)