예제 #1
0
def test_classifier_runs(Model, EXAMPLE):
    model = Model.model_load(EXAMPLE['model'])
    values = EXAMPLE['values']

    classifier = Model.Classifier()
    for value in values:
        classifier.append(model.group_create([value]))
    model.classifier_init(classifier)

    groupids = []
    for value in values:
        scores = numpy.zeros(len(classifier), dtype=numpy.float32)
        model.classifier_score(classifier, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        model.classifier_add_value(classifier, groupid, value)
        groupids.append(groupid)

    model.classifier_add_group(classifier)
    assert len(classifier) == len(values) + 1
    scores = numpy.zeros(len(classifier), dtype=numpy.float32)

    for value, groupid in zip(values, groupids):
        model.classifier_remove_value(classifier, groupid, value)

    model.classifier_remove_group(classifier, 0)
    model.classifier_remove_group(classifier, len(classifier) - 1)
    assert len(classifier) == len(values) - 1

    for value in values:
        scores = numpy.zeros(len(classifier), dtype=numpy.float32)
        model.classifier_score(classifier, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        model.classifier_add_value(classifier, groupid, value)
예제 #2
0
def test_scores_to_probs():
    scores = [-10000, 10000, 10001, 9999, 0, 5, 6, 6, 7]
    probs = scores_to_probs(scores)
    assert_less(abs(sum(probs) - 1), 1e-6)
    for prob in probs:
        assert_less_equal(0, prob)
        assert_less_equal(prob, 1)
예제 #3
0
def test_scores_to_probs():
    scores = [-10000, 10000, 10001, 9999, 0, 5, 6, 6, 7]
    probs = scores_to_probs(scores)
    assert_less(abs(sum(probs) - 1), 1e-6)
    for prob in probs:
        assert_less_equal(0, prob)
        assert_less_equal(prob, 1)
예제 #4
0
def test_mixture_score(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']
    for value in values:
        shared.add_value(value)

    groups = [module.Group.from_values(shared, [value]) for value in values]
    mixture = module.Mixture()
    for group in groups:
        mixture.append(group)
    mixture.init(shared)

    def check_score_value(value):
        expected = [group.score_value(shared, value) for group in groups]
        actual = numpy.zeros(len(mixture), dtype=numpy.float32)
        noise = numpy.random.randn(len(actual))
        actual += noise
        mixture.score_value(shared, value, actual)
        actual -= noise
        assert_close(actual, expected, err_msg='score_value {}'.format(value))
        another = [
            mixture.score_value_group(shared, i, value)
            for i in xrange(len(groups))
        ]
        assert_close(
            another,
            expected,
            err_msg='score_value_group {}'.format(value))
        return actual

    def check_score_data():
        expected = sum(group.score_data(shared) for group in groups)
        actual = mixture.score_data(shared)
        assert_close(actual, expected, err_msg='score_data')

    print 'init'
    for value in values:
        check_score_value(value)
    check_score_data()

    print 'adding'
    groupids = []
    for value in values:
        scores = check_score_value(value)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        groups[groupid].add_value(shared, value)
        mixture.add_value(shared, groupid, value)
        groupids.append(groupid)
        check_score_data()

    print 'removing'
    for value, groupid in zip(values, groupids):
        groups[groupid].remove_value(shared, value)
        mixture.remove_value(shared, groupid, value)
        scores = check_score_value(value)
        check_score_data()
def test_mixture_score(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']
    for value in values:
        shared.add_value(value)

    groups = [module.Group.from_values(shared, [value]) for value in values]
    mixture = module.Mixture()
    for group in groups:
        mixture.append(group)
    mixture.init(shared)

    def check_score_value(value):
        expected = [group.score_value(shared, value) for group in groups]
        actual = numpy.zeros(len(mixture), dtype=numpy.float32)
        noise = numpy.random.randn(len(actual))
        actual += noise
        mixture.score_value(shared, value, actual)
        actual -= noise
        assert_close(actual, expected, err_msg='score_value {}'.format(value))
        another = [
            mixture.score_value_group(shared, i, value)
            for i in xrange(len(groups))
        ]
        assert_close(another,
                     expected,
                     err_msg='score_value_group {}'.format(value))
        return actual

    def check_score_data():
        expected = sum(group.score_data(shared) for group in groups)
        actual = mixture.score_data(shared)
        assert_close(actual, expected, err_msg='score_data')

    print 'init'
    for value in values:
        check_score_value(value)
    check_score_data()

    print 'adding'
    groupids = []
    for value in values:
        scores = check_score_value(value)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        groups[groupid].add_value(shared, value)
        mixture.add_value(shared, groupid, value)
        groupids.append(groupid)
        check_score_data()

    print 'removing'
    for value, groupid in zip(values, groupids):
        groups[groupid].remove_value(shared, value)
        mixture.remove_value(shared, groupid, value)
        scores = check_score_value(value)
        check_score_data()
예제 #6
0
def test_mixture_runs(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']

    mixture = module.Mixture()
    for value in values:
        shared.add_value(value)
        mixture.append(module.Group.from_values(shared, [value]))
    mixture.init(shared)

    groupids = []
    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
        groupids.append(groupid)

    mixture.add_group(shared)
    assert len(mixture) == len(values) + 1
    scores = numpy.zeros(len(mixture), dtype=numpy.float32)

    for value, groupid in zip(values, groupids):
        mixture.remove_value(shared, groupid, value)

    mixture.remove_group(shared, 0)
    if module.__name__ == 'distributions.lp.models.dpd':
        raise SkipTest('FIXME known segfault here')
    mixture.remove_group(shared, len(mixture) - 1)
    assert len(mixture) == len(values) - 1

    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
예제 #7
0
def test_mixture_runs(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']

    mixture = module.Mixture()
    for value in values:
        shared.add_value(value)
        mixture.append(module.Group.from_values(shared, [value]))
    mixture.init(shared)

    groupids = []
    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
        groupids.append(groupid)

    mixture.add_group(shared)
    assert len(mixture) == len(values) + 1
    scores = numpy.zeros(len(mixture), dtype=numpy.float32)

    for value, groupid in zip(values, groupids):
        mixture.remove_value(shared, groupid, value)

    mixture.remove_group(shared, 0)
    if module.__name__ == 'distributions.lp.models.dpd':
        raise SkipTest('FIXME known segfault here')
    mixture.remove_group(shared, len(mixture) - 1)
    assert len(mixture) == len(values) - 1

    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
예제 #8
0
def test_mixture_runs(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']

    mixture = module.Mixture()
    for value in values:
        shared.add_value(value)
        mixture.append(module.Group.from_values(shared, [value]))
    mixture.init(shared)

    groupids = []
    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
        groupids.append(groupid)

    mixture.add_group(shared)
    assert len(mixture) == len(values) + 1
    scores = numpy.zeros(len(mixture), dtype=numpy.float32)

    for value, groupid in zip(values, groupids):
        mixture.remove_value(shared, groupid, value)

    mixture.remove_group(shared, 0)
    mixture.remove_group(shared, len(mixture) - 1)
    assert len(mixture) == len(values) - 1

    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
def test_mixture_runs(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    values = EXAMPLE['values']

    mixture = module.Mixture()
    for value in values:
        shared.add_value(value)
        mixture.append(module.Group.from_values(shared, [value]))
    mixture.init(shared)

    groupids = []
    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
        groupids.append(groupid)

    mixture.add_group(shared)
    assert len(mixture) == len(values) + 1
    scores = numpy.zeros(len(mixture), dtype=numpy.float32)

    for value, groupid in zip(values, groupids):
        mixture.remove_value(shared, groupid, value)

    mixture.remove_group(shared, 0)
    mixture.remove_group(shared, len(mixture) - 1)
    assert len(mixture) == len(values) - 1

    for value in values:
        scores = numpy.zeros(len(mixture), dtype=numpy.float32)
        mixture.score_value(shared, value, scores)
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        mixture.add_value(shared, groupid, value)
예제 #10
0
def test_classifier_score(Model, EXAMPLE):
    model = Model.model_load(EXAMPLE['model'])
    values = EXAMPLE['values']

    groups = [model.group_create([value]) for value in values]
    classifier = Model.Classifier()
    for group in groups:
        classifier.append(group)
    model.classifier_init(classifier)

    def check_scores():
        expected = [model.score_value(group, value) for group in groups]
        actual = numpy.zeros(len(classifier), dtype=numpy.float32)
        model.classifier_score(classifier, value, actual)
        assert_close(actual, expected, err_msg='scores')
        return actual

    print 'init'
    for value in values:
        check_scores()

    print 'adding'
    groupids = []
    for value in values:
        scores = check_scores()
        probs = scores_to_probs(scores)
        groupid = sample_discrete(probs)
        model.group_add_value(groups[groupid], value)
        model.classifier_add_value(classifier, groupid, value)
        groupids.append(groupid)

    print 'removing'
    for value, groupid in zip(values, groupids):
        model.group_remove_value(groups[groupid], value)
        model.classifier_remove_value(classifier, groupid, value)
        scores = check_scores()
예제 #11
0
def sample_discrete_log(scores):
    probs = scores_to_probs(scores)
    return sample_discrete(probs, total=1.0)
예제 #12
0
def _test_dataset_config(casename, object_count, feature_count, config_name,
                         model_name, fixed_model_names, rows_name, config,
                         debug):
    dataset = {'model': model_name, 'rows': rows_name, 'config': config_name}
    samples = generate_samples(casename, dataset, debug)

    fixed_hyper_samples = []
    for fixed_model_name in fixed_model_names:
        fixed_dataset = dataset.copy()
        fixed_dataset['model'] = fixed_model_name
        fs = generate_samples(None, fixed_dataset, debug)
        fixed_hyper_samples.append(fs)

    sample_count = config['posterior_enum']['sample_count']
    counts_dict = {}
    scores_dict = {}
    actual_count = 0
    for sample, score in samples:
        actual_count += 1
        add_sample(sample, score, counts_dict, scores_dict)
    assert_equal(actual_count, sample_count)

    if fixed_hyper_samples:
        latents, scores_dict = process_fixed_samples(fixed_hyper_samples,
                                                     scores_dict.keys())
        useable_count = sum([counts_dict[lat] for lat in latents])
        if useable_count < sample_count:
            LOG(
                'Warn', casename, 'scores found for {} / {} samples'.format(
                    useable_count, sample_count))
        sample_count = useable_count
    else:
        latents = scores_dict.keys()
    actual_latent_count = len(latents)
    infer_kinds = (config['kernels']['kind']['iterations'] > 0)
    if infer_kinds:
        expected_latent_count = count_crosscats(object_count, feature_count)
    else:
        expected_latent_count = BELL_NUMBERS[object_count]
    assert actual_latent_count <= expected_latent_count, 'programmer error'
    if actual_latent_count < expected_latent_count:
        LOG(
            'Warn', casename,
            'found only {} / {} latents'.format(actual_latent_count,
                                                expected_latent_count))

    counts = numpy.array([counts_dict[key] for key in latents])
    scores = numpy.array([scores_dict[key] for key in latents])
    probs = scores_to_probs(scores)

    highest_by_prob = numpy.argsort(probs)[::-1][:TRUNCATE_COUNT]
    is_accurate = lambda p: sample_count * p * (1 - p) >= 1
    highest_by_prob = [i for i in highest_by_prob if is_accurate(probs[i])]
    highest_by_count = numpy.argsort(counts)[::-1][:TRUNCATE_COUNT]
    highest = list(set(highest_by_prob) | set(highest_by_count))
    truncated = len(highest_by_prob) < len(probs)
    if len(highest_by_prob) < 1:
        LOG('Warn', casename, 'test is inaccurate; use more samples')
        return None

    goodness_of_fit = multinomial_goodness_of_fit(probs[highest_by_prob],
                                                  counts[highest_by_prob],
                                                  total_count=sample_count,
                                                  truncated=truncated)

    comment = 'goodness of fit = {:0.3g}'.format(goodness_of_fit)
    if goodness_of_fit > MIN_GOODNESS_OF_FIT:
        LOG('Pass', casename, comment)
        return None
    else:
        print 'EXPECT\tACTUAL\tCHI\tVALUE'
        lines = [(probs[i], counts[i], latents[i]) for i in highest]
        for prob, count, latent in sorted(lines, reverse=True):
            expect = prob * sample_count
            chi = (count - expect) * expect**-0.5
            pretty = pretty_latent(latent)
            print '{:0.1f}\t{}\t{:+0.1f}\t{}'.format(expect, count, chi,
                                                     pretty)
        return LOG('Fail', casename, comment)
예제 #13
0
def sample_discrete_log(scores):
    probs = scores_to_probs(scores)
    return sample_discrete(probs, total=1.0)
예제 #14
0
def _test_dataset_config(
        casename,
        object_count,
        feature_count,
        config_name,
        model_name,
        fixed_model_names,
        rows_name,
        config,
        debug):
    dataset = {'model': model_name, 'rows': rows_name, 'config': config_name}
    samples = generate_samples(casename, dataset, debug)

    fixed_hyper_samples = []
    for fixed_model_name in fixed_model_names:
        fixed_dataset = dataset.copy()
        fixed_dataset['model'] = fixed_model_name
        fs = generate_samples(None, fixed_dataset, debug)
        fixed_hyper_samples.append(fs)

    sample_count = config['posterior_enum']['sample_count']
    counts_dict = {}
    scores_dict = {}
    actual_count = 0
    for sample, score in samples:
        actual_count += 1
        add_sample(sample, score, counts_dict, scores_dict)
    assert_equal(actual_count, sample_count)

    if fixed_hyper_samples:
        latents, scores_dict = process_fixed_samples(
            fixed_hyper_samples,
            scores_dict.keys())
        useable_count = sum([counts_dict[lat] for lat in latents])
        if useable_count < sample_count:
            LOG('Warn', casename, 'scores found for {} / {} samples'.format(
                useable_count,
                sample_count))
        sample_count = useable_count
    else:
        latents = scores_dict.keys()
    actual_latent_count = len(latents)
    infer_kinds = (config['kernels']['kind']['iterations'] > 0)
    if infer_kinds:
        expected_latent_count = count_crosscats(object_count, feature_count)
    else:
        expected_latent_count = BELL_NUMBERS[object_count]
    assert actual_latent_count <= expected_latent_count, 'programmer error'
    if actual_latent_count < expected_latent_count:
        LOG('Warn', casename, 'found only {} / {} latents'.format(
            actual_latent_count,
            expected_latent_count))

    counts = numpy.array([counts_dict[key] for key in latents])
    scores = numpy.array([scores_dict[key] for key in latents])
    probs = scores_to_probs(scores)

    highest_by_prob = numpy.argsort(probs)[::-1][:TRUNCATE_COUNT]
    is_accurate = lambda p: sample_count * p * (1 - p) >= 1
    highest_by_prob = [i for i in highest_by_prob if is_accurate(probs[i])]
    highest_by_count = numpy.argsort(counts)[::-1][:TRUNCATE_COUNT]
    highest = list(set(highest_by_prob) | set(highest_by_count))
    truncated = len(highest_by_prob) < len(probs)
    if len(highest_by_prob) < 1:
        LOG('Warn', casename, 'test is inaccurate; use more samples')
        return None

    goodness_of_fit = multinomial_goodness_of_fit(
        probs[highest_by_prob],
        counts[highest_by_prob],
        total_count=sample_count,
        truncated=truncated)

    comment = 'goodness of fit = {:0.3g}'.format(goodness_of_fit)
    if goodness_of_fit > MIN_GOODNESS_OF_FIT:
        LOG('Pass', casename, comment)
        return None
    else:
        print 'EXPECT\tACTUAL\tCHI\tVALUE'
        lines = [(probs[i], counts[i], latents[i]) for i in highest]
        for prob, count, latent in sorted(lines, reverse=True):
            expect = prob * sample_count
            chi = (count - expect) * expect ** -0.5
            pretty = pretty_latent(latent)
            print '{:0.1f}\t{}\t{:+0.1f}\t{}'.format(
                expect,
                count,
                chi,
                pretty)
        return LOG('Fail', casename, comment)