def _test_models(Model, size): model = Model() if Model.__name__ == 'LowEntropy': raise SkipTest('FIXME LowEntropy.score_counts is not normalized') for i, EXAMPLE in enumerate(Model.EXAMPLES): print 'Example {}'.format(i) model.load(EXAMPLE) samples = [] probs_dict = {} for _ in xrange(SAMPLE_COUNT): value = model.sample_assignments(size) assignments = dict(enumerate(value)) counts = count_assignments(assignments) prob = math.exp(model.score_counts(counts)) sample = canonicalize(value) samples.append(sample) probs_dict[sample] = prob total = sum(probs_dict.values()) assert_less( abs(total - 1), 1e-2, 'not normalized: {}'.format(total)) gof = discrete_goodness_of_fit(samples, probs_dict, plot=True) print '{} gof = {:0.3g}'.format(Model.__name__, gof) assert_greater(gof, MIN_GOODNESS_OF_FIT)
def _check_marginal_samples_match_scores(server, row, fi): row = loom.query.protobuf_to_data_row(row.diff) row[fi] = None to_sample = [i == fi for i in range(len(row))] samples = server.sample(to_sample, row, SAMPLE_COUNT) val = samples[0][fi] base_score = server.score(row) if isinstance(val, bool) or isinstance(val, int): probs_dict = {} samples = [sample[fi] for sample in samples] for sample in set(samples): row[fi] = sample probs_dict[sample] = numpy.exp(server.score(row) - base_score) if len(probs_dict) == 1: assert_almost_equal(probs_dict[sample], 1., places=SCORE_PLACES) return if min(probs_dict.values()) < MIN_CATEGORICAL_PROB: return gof = discrete_goodness_of_fit(samples, probs_dict, plot=True) elif isinstance(val, float): probs = numpy.exp( [server.score(sample) - base_score for sample in samples]) samples = [sample[fi] for sample in samples] gof = density_goodness_of_fit(samples, probs, plot=True) assert_greater(gof, MIN_GOODNESS_OF_FIT)
def _check_marginal_samples_match_scores(server, row, fi): row = loom.query.protobuf_to_data_row(row.diff) row[fi] = None to_sample = [i == fi for i in range(len(row))] samples = server.sample(to_sample, row, SAMPLE_COUNT) val = samples[0][fi] base_score = server.score(row) if isinstance(val, bool) or isinstance(val, int): probs_dict = {} samples = [sample[fi] for sample in samples] for sample in set(samples): row[fi] = sample probs_dict[sample] = numpy.exp( server.score(row) - base_score) if len(probs_dict) == 1: assert_almost_equal(probs_dict[sample], 1., places=SCORE_PLACES) return if min(probs_dict.values()) < MIN_CATEGORICAL_PROB: return gof = discrete_goodness_of_fit(samples, probs_dict, plot=True) elif isinstance(val, float): probs = numpy.exp([ server.score(sample) - base_score for sample in samples ]) samples = [sample[fi] for sample in samples] gof = density_goodness_of_fit(samples, probs, plot=True) assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_sample_matches_score_counts(Model, EXAMPLE, sample_count): for size in iter_valid_sizes(EXAMPLE, max_size=10): model = Model() model.load(EXAMPLE) samples = [] probs_dict = {} for _ in xrange(sample_count): value = model.sample_assignments(size) sample = canonicalize(value) samples.append(sample) if sample not in probs_dict: assignments = dict(enumerate(value)) counts = count_assignments(assignments) prob = math.exp(model.score_counts(counts)) probs_dict[sample] = prob # renormalize here; test normalization separately total = sum(probs_dict.values()) for key in probs_dict: probs_dict[key] /= total gof = discrete_goodness_of_fit(samples, probs_dict, plot=True) print '{} gof = {:0.3g}'.format(Model.__name__, gof) assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_sample_matches_score_counts(Model, EXAMPLE, sample_count): for size in iter_valid_sizes(EXAMPLE, max_size=10): model = Model() model.load(EXAMPLE) samples = [] probs_dict = {} for _ in xrange(sample_count): value = model.sample_assignments(size) sample = canonicalize(value) samples.append(sample) if sample not in probs_dict: assignments = dict(enumerate(value)) counts = count_assignments(assignments) prob = math.exp(model.score_counts(counts)) probs_dict[sample] = prob # renormalize here; test normalization separately total = sum(probs_dict.values()) for key in probs_dict: probs_dict[key] /= total gof = discrete_goodness_of_fit(samples, probs_dict, plot=True) print "{} gof = {:0.3g}".format(Model.__name__, gof) assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_sample_group(Model, EXAMPLE): seed_all(0) SIZE = 2 model = Model.model_load(EXAMPLE['model']) for values in [[], EXAMPLE['values']]: if Model.Value == int: samples = [] probs_dict = {} for _ in xrange(SAMPLE_COUNT): values = model.sample_group(SIZE) sample = tuple(values) samples.append(sample) group = model.group_create(values) probs_dict[sample] = math.exp(model.score_group(group)) gof = discrete_goodness_of_fit(samples, probs_dict, plot=True) else: raise SkipTest('Not implemented for {}'.format(Model.Value)) print '{} gof = {:0.3g}'.format(Model.__name__, gof) assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_sample_group(module, EXAMPLE): seed_all(0) SIZE = 2 shared = module.Shared.from_dict(EXAMPLE['shared']) shared.realize() for values in [[], EXAMPLE['values']]: if module.Value in [bool, int]: samples = [] probs_dict = {} for _ in xrange(SAMPLE_COUNT): values = module.sample_group(shared, SIZE) sample = tuple(values) samples.append(sample) group = module.Group.from_values(shared, values) probs_dict[sample] = math.exp(group.score_data(shared)) gof = discrete_goodness_of_fit(samples, probs_dict, plot=True) else: raise SkipTest('Not implemented for {}'.format(module.Value)) print '{} gof = {:0.3g}'.format(module.__name__, gof) assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_sample_value(module, EXAMPLE): seed_all(0) shared = module.Shared.from_dict(EXAMPLE['shared']) shared.realize() for values in [[], EXAMPLE['values']]: group = module.Group.from_values(shared, values) samples = [group.sample_value(shared) for _ in xrange(SAMPLE_COUNT)] if module.Value in [bool, int]: probs_dict = { value: math.exp(group.score_value(shared, value)) for value in set(samples) } gof = discrete_goodness_of_fit(samples, probs_dict, plot=True) elif module.Value == float: probs = numpy.exp( [group.score_value(shared, value) for value in samples]) gof = density_goodness_of_fit(samples, probs, plot=True) else: raise SkipTest('Not implemented for {}'.format(module.Value)) print '{} gof = {:0.3g}'.format(module.__name__, gof) assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_sample_value(Model, EXAMPLE): seed_all(0) model = Model.model_load(EXAMPLE['model']) for values in [[], EXAMPLE['values']]: group = model.group_create(values) samples = [model.sample_value(group) for _ in xrange(SAMPLE_COUNT)] if Model.Value == int: probs_dict = { value: math.exp(model.score_value(group, value)) for value in set(samples) } gof = discrete_goodness_of_fit(samples, probs_dict, plot=True) elif Model.Value == float: probs = numpy.exp([ model.score_value(group, value) for value in samples ]) gof = density_goodness_of_fit(samples, probs, plot=True) else: raise SkipTest('Not implemented for {}'.format(Model.Value)) print '{} gof = {:0.3g}'.format(Model.__name__, gof) assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_sample_value(module, EXAMPLE): seed_all(0) shared = module.Shared.from_dict(EXAMPLE['shared']) shared.realize() for values in [[], EXAMPLE['values']]: group = module.Group.from_values(shared, values) samples = [group.sample_value(shared) for _ in xrange(SAMPLE_COUNT)] if module.Value in [bool, int]: probs_dict = { value: math.exp(group.score_value(shared, value)) for value in set(samples) } gof = discrete_goodness_of_fit(samples, probs_dict, plot=True) elif module.Value == float: probs = numpy.exp([ group.score_value(shared, value) for value in samples ]) gof = density_goodness_of_fit(samples, probs, plot=True) else: raise SkipTest('Not implemented for {}'.format(module.Value)) print '{} gof = {:0.3g}'.format(module.__name__, gof) assert_greater(gof, MIN_GOODNESS_OF_FIT)