Example #1
0
def _test_multinomial_goodness_of_fit(dim):
    thresh = 1e-3
    sample_count = int(1e5)
    probs = numpy.random.dirichlet([1] * dim)

    counts = numpy.random.multinomial(sample_count, probs)
    p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
    assert_greater(p_good, thresh)

    unif_counts = numpy.random.multinomial(sample_count, [1. / dim] * dim)
    p_bad = multinomial_goodness_of_fit(probs, unif_counts, sample_count)
    assert_less(p_bad, thresh)
Example #2
0
def _test_multinomial_goodness_of_fit(dim):
    seed_all(0)
    thresh = 1e-3
    sample_count = int(1e5)
    probs = numpy.random.dirichlet([1] * dim)

    counts = numpy.random.multinomial(sample_count, probs)
    p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
    assert_greater(p_good, thresh)

    unif_counts = numpy.random.multinomial(sample_count, [1. / dim] * dim)
    p_bad = multinomial_goodness_of_fit(probs, unif_counts, sample_count)
    assert_less(p_bad, thresh)
Example #3
0
def assert_counts_match_probs(counts, probs, tol=1e-3):
    '''
    Check goodness of fit of observed counts to predicted probabilities
    using Pearson's chi-squared test.

    Inputs:
        - counts : key -> int
        - probs : key -> float
    '''
    keys = counts.keys()
    probs = [probs[key] for key in keys]
    counts = [counts[key] for key in keys]
    total_count = sum(counts)

    print 'EXPECT\tACTUAL\tVALUE'
    for prob, count, key in sorted(izip(probs, counts, keys), reverse=True):
        expect = prob * total_count
        print '{:0.1f}\t{}\t{}'.format(expect, count, key)

    gof = multinomial_goodness_of_fit(probs, counts, total_count)
    print 'goodness of fit = {}'.format(gof)
    assert gof > tol, 'failed with goodness of fit {}'.format(gof)
Example #4
0
def assert_counts_match_probs(counts, probs, tol=1e-3):
    '''
    Check goodness of fit of observed counts to predicted probabilities
    using Pearson's chi-squared test.

    Inputs:
        - counts : key -> int
        - probs : key -> float
    '''
    keys = counts.keys()
    probs = [probs[key] for key in keys]
    counts = [counts[key] for key in keys]
    total_count = sum(counts)

    print 'EXPECT\tACTUAL\tVALUE'
    for prob, count, key in sorted(izip(probs, counts, keys), reverse=True):
        expect = prob * total_count
        print '{:0.1f}\t{}\t{}'.format(expect, count, key)

    gof = multinomial_goodness_of_fit(probs, counts, total_count)
    print 'goodness of fit = {}'.format(gof)
    assert gof > tol, 'failed with goodness of fit {}'.format(gof)
Example #5
0
def _test_dataset_config(casename, object_count, feature_count, config_name,
                         model_name, fixed_model_names, rows_name, config,
                         debug):
    dataset = {'model': model_name, 'rows': rows_name, 'config': config_name}
    samples = generate_samples(casename, dataset, debug)

    fixed_hyper_samples = []
    for fixed_model_name in fixed_model_names:
        fixed_dataset = dataset.copy()
        fixed_dataset['model'] = fixed_model_name
        fs = generate_samples(None, fixed_dataset, debug)
        fixed_hyper_samples.append(fs)

    sample_count = config['posterior_enum']['sample_count']
    counts_dict = {}
    scores_dict = {}
    actual_count = 0
    for sample, score in samples:
        actual_count += 1
        add_sample(sample, score, counts_dict, scores_dict)
    assert_equal(actual_count, sample_count)

    if fixed_hyper_samples:
        latents, scores_dict = process_fixed_samples(fixed_hyper_samples,
                                                     scores_dict.keys())
        useable_count = sum([counts_dict[lat] for lat in latents])
        if useable_count < sample_count:
            LOG(
                'Warn', casename, 'scores found for {} / {} samples'.format(
                    useable_count, sample_count))
        sample_count = useable_count
    else:
        latents = scores_dict.keys()
    actual_latent_count = len(latents)
    infer_kinds = (config['kernels']['kind']['iterations'] > 0)
    if infer_kinds:
        expected_latent_count = count_crosscats(object_count, feature_count)
    else:
        expected_latent_count = BELL_NUMBERS[object_count]
    assert actual_latent_count <= expected_latent_count, 'programmer error'
    if actual_latent_count < expected_latent_count:
        LOG(
            'Warn', casename,
            'found only {} / {} latents'.format(actual_latent_count,
                                                expected_latent_count))

    counts = numpy.array([counts_dict[key] for key in latents])
    scores = numpy.array([scores_dict[key] for key in latents])
    probs = scores_to_probs(scores)

    highest_by_prob = numpy.argsort(probs)[::-1][:TRUNCATE_COUNT]
    is_accurate = lambda p: sample_count * p * (1 - p) >= 1
    highest_by_prob = [i for i in highest_by_prob if is_accurate(probs[i])]
    highest_by_count = numpy.argsort(counts)[::-1][:TRUNCATE_COUNT]
    highest = list(set(highest_by_prob) | set(highest_by_count))
    truncated = len(highest_by_prob) < len(probs)
    if len(highest_by_prob) < 1:
        LOG('Warn', casename, 'test is inaccurate; use more samples')
        return None

    goodness_of_fit = multinomial_goodness_of_fit(probs[highest_by_prob],
                                                  counts[highest_by_prob],
                                                  total_count=sample_count,
                                                  truncated=truncated)

    comment = 'goodness of fit = {:0.3g}'.format(goodness_of_fit)
    if goodness_of_fit > MIN_GOODNESS_OF_FIT:
        LOG('Pass', casename, comment)
        return None
    else:
        print 'EXPECT\tACTUAL\tCHI\tVALUE'
        lines = [(probs[i], counts[i], latents[i]) for i in highest]
        for prob, count, latent in sorted(lines, reverse=True):
            expect = prob * sample_count
            chi = (count - expect) * expect**-0.5
            pretty = pretty_latent(latent)
            print '{:0.1f}\t{}\t{:+0.1f}\t{}'.format(expect, count, chi,
                                                     pretty)
        return LOG('Fail', casename, comment)
Example #6
0
def _test_dataset_config(
        casename,
        object_count,
        feature_count,
        config_name,
        model_name,
        fixed_model_names,
        rows_name,
        config,
        debug):
    dataset = {'model': model_name, 'rows': rows_name, 'config': config_name}
    samples = generate_samples(casename, dataset, debug)

    fixed_hyper_samples = []
    for fixed_model_name in fixed_model_names:
        fixed_dataset = dataset.copy()
        fixed_dataset['model'] = fixed_model_name
        fs = generate_samples(None, fixed_dataset, debug)
        fixed_hyper_samples.append(fs)

    sample_count = config['posterior_enum']['sample_count']
    counts_dict = {}
    scores_dict = {}
    actual_count = 0
    for sample, score in samples:
        actual_count += 1
        add_sample(sample, score, counts_dict, scores_dict)
    assert_equal(actual_count, sample_count)

    if fixed_hyper_samples:
        latents, scores_dict = process_fixed_samples(
            fixed_hyper_samples,
            scores_dict.keys())
        useable_count = sum([counts_dict[lat] for lat in latents])
        if useable_count < sample_count:
            LOG('Warn', casename, 'scores found for {} / {} samples'.format(
                useable_count,
                sample_count))
        sample_count = useable_count
    else:
        latents = scores_dict.keys()
    actual_latent_count = len(latents)
    infer_kinds = (config['kernels']['kind']['iterations'] > 0)
    if infer_kinds:
        expected_latent_count = count_crosscats(object_count, feature_count)
    else:
        expected_latent_count = BELL_NUMBERS[object_count]
    assert actual_latent_count <= expected_latent_count, 'programmer error'
    if actual_latent_count < expected_latent_count:
        LOG('Warn', casename, 'found only {} / {} latents'.format(
            actual_latent_count,
            expected_latent_count))

    counts = numpy.array([counts_dict[key] for key in latents])
    scores = numpy.array([scores_dict[key] for key in latents])
    probs = scores_to_probs(scores)

    highest_by_prob = numpy.argsort(probs)[::-1][:TRUNCATE_COUNT]
    is_accurate = lambda p: sample_count * p * (1 - p) >= 1
    highest_by_prob = [i for i in highest_by_prob if is_accurate(probs[i])]
    highest_by_count = numpy.argsort(counts)[::-1][:TRUNCATE_COUNT]
    highest = list(set(highest_by_prob) | set(highest_by_count))
    truncated = len(highest_by_prob) < len(probs)
    if len(highest_by_prob) < 1:
        LOG('Warn', casename, 'test is inaccurate; use more samples')
        return None

    goodness_of_fit = multinomial_goodness_of_fit(
        probs[highest_by_prob],
        counts[highest_by_prob],
        total_count=sample_count,
        truncated=truncated)

    comment = 'goodness of fit = {:0.3g}'.format(goodness_of_fit)
    if goodness_of_fit > MIN_GOODNESS_OF_FIT:
        LOG('Pass', casename, comment)
        return None
    else:
        print 'EXPECT\tACTUAL\tCHI\tVALUE'
        lines = [(probs[i], counts[i], latents[i]) for i in highest]
        for prob, count, latent in sorted(lines, reverse=True):
            expect = prob * sample_count
            chi = (count - expect) * expect ** -0.5
            pretty = pretty_latent(latent)
            print '{:0.1f}\t{}\t{:+0.1f}\t{}'.format(
                expect,
                count,
                chi,
                pretty)
        return LOG('Fail', casename, comment)