コード例 #1
0
ファイル: test.py プロジェクト: fritzo/goftests
def _test_multinomial_goodness_of_fit(dim):
    seed_all(0)
    sample_count = int(1e5)
    probs = numpy.random.dirichlet([1] * dim)

    counts = numpy.random.multinomial(sample_count, probs)
    p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
    assert_greater(p_good, TEST_FAILURE_RATE)

    unif_counts = numpy.random.multinomial(sample_count, [1. / dim] * dim)
    p_bad = multinomial_goodness_of_fit(probs, unif_counts, sample_count)
    assert_less(p_bad, TEST_FAILURE_RATE)
コード例 #2
0
ファイル: test.py プロジェクト: jfinkels/goftests
    def test_multinomial_goodness_of_fit(self):
        random.seed(0)
        numpy.random.seed(0)
        for dim in range(2, 20):
            sample_count = int(1e5)
            probs = numpy.random.dirichlet([1] * dim)

            counts = numpy.random.multinomial(sample_count, probs)
            p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
            self.assertGreater(p_good, TEST_FAILURE_RATE)

            unif = [1 / dim] * dim
            unif_counts = numpy.random.multinomial(sample_count, unif)
            p_bad = multinomial_goodness_of_fit(probs, unif_counts,
                                                sample_count)
            self.assertLess(p_bad, TEST_FAILURE_RATE)
コード例 #3
0
ファイル: test.py プロジェクト: sacheendra/goftests
    def test_multinomial_goodness_of_fit(self):
        random.seed(0)
        numpy.random.seed(0)
        for dim in range(2, 20):
            sample_count = int(1e5)
            probs = numpy.random.dirichlet([1] * dim)

            counts = numpy.random.multinomial(sample_count, probs)
            p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
            self.assertGreater(p_good, TEST_FAILURE_RATE)

            unif = [1 / dim] * dim
            unif_counts = numpy.random.multinomial(sample_count, unif)
            p_bad = multinomial_goodness_of_fit(probs, unif_counts,
                                                sample_count)
            self.assertLess(p_bad, TEST_FAILURE_RATE)
コード例 #4
0
def test_sample_from_probs_gof(size):
    set_random_seed(size)
    probs = np.exp(2 * np.random.random(size)).astype(np.float32)
    counts = np.zeros(size, dtype=np.int32)
    num_samples = 2000 * size
    for _ in range(num_samples):
        counts[sample_from_probs(probs)] += 1
    probs /= probs.sum()  # Normalize afterwards.
    print(counts)
    print(probs * num_samples)
    gof = multinomial_goodness_of_fit(probs, counts, num_samples, plot=True)
    assert 1e-2 < gof
コード例 #5
0
def test_sample_from_probs2_gof(size):
    set_random_seed(size)
    probs = np.exp(2 * np.random.random(size)).astype(np.float32)
    counts = np.zeros(size, dtype=np.int32)
    num_samples = 2000 * size
    probs2 = np.tile(probs, (num_samples, 1))
    samples = sample_from_probs2(probs2)
    probs /= probs.sum()  # Normalize afterwards.
    counts = np.bincount(samples, minlength=size)
    print(counts)
    print(probs * num_samples)
    gof = multinomial_goodness_of_fit(probs, counts, num_samples, plot=True)
    assert 1e-2 < gof
コード例 #6
0
ファイル: serving_test.py プロジェクト: vishalbelsare/treecat
def validate_gof(N, V, C, M, server, conditional):
    # Generate samples.
    expected = C**V
    num_samples = 1000 * expected
    ones = np.ones(V, dtype=np.int8)
    if conditional:
        cond_data = server.sample(1, ones)[0, :]
    else:
        cond_data = server.make_zero_row()
    samples = server.sample(num_samples, ones, cond_data)
    logprobs = server.logprob(samples + cond_data[np.newaxis, :])
    counts = {}
    probs = {}
    for sample, logprob in zip(samples, logprobs):
        key = tuple(sample)
        if key in counts:
            counts[key] += 1
        else:
            counts[key] = 1
            probs[key] = np.exp(logprob)
    assert len(counts) == expected

    # Check accuracy using Pearson's chi-squared test.
    keys = sorted(counts.keys(), key=lambda key: -probs[key])
    counts = np.array([counts[k] for k in keys], dtype=np.int32)
    probs = np.array([probs[k] for k in keys])
    probs /= probs.sum()

    # Truncate to avoid low-precision.
    truncated = False
    valid = (probs * num_samples > 20)
    if not valid.all():
        T = valid.argmin()
        T = max(8, T)  # Avoid truncating too much
        probs = probs[:T]
        counts = counts[:T]
        truncated = True

    gof = multinomial_goodness_of_fit(probs,
                                      counts,
                                      num_samples,
                                      plot=True,
                                      truncated=truncated)
    assert 1e-2 < gof
コード例 #7
0
def test_assignment_sampler_gof(N, V, C, M):
    config = make_config(model_num_clusters=M)
    K = V * (V - 1) // 2
    dataset = generate_dataset(num_rows=N, num_cols=V, num_cats=C)
    table = dataset['table']
    tree_prior = np.exp(np.random.random(K), dtype=np.float32)
    trainer = TreeCatTrainer(table, tree_prior, config)
    print('Data:')
    print(dataset['table'].data)

    # Add all rows.
    set_random_seed(1)
    for row_id in range(N):
        trainer.add_row(row_id)

    # Collect samples.
    num_samples = 500 * M**(N * V)
    counts = {}
    logprobs = {}
    for _ in range(num_samples):
        for row_id in range(N):
            # This is a single-site Gibbs sampler.
            trainer.remove_row(row_id)
            trainer.add_row(row_id)
        key = hash_assignments(trainer._assignments)
        if key in counts:
            counts[key] += 1
        else:
            counts[key] = 1
            logprobs[key] = trainer.logprob()
    assert len(counts) == M**(N * V)

    # Check accuracy using Pearson's chi-squared test.
    keys = sorted(counts.keys())
    counts = np.array([counts[k] for k in keys], dtype=np.int32)
    probs = np.exp(np.array([logprobs[k] for k in keys]))
    probs /= probs.sum()
    print('Actual\tExpected\tAssignment')
    for count, prob, key in zip(counts, probs, keys):
        print('{:}\t{:0.1f}\t{}'.format(count, prob * num_samples, key))
    gof = multinomial_goodness_of_fit(probs, counts, num_samples, plot=True)
    assert 1e-2 < gof
コード例 #8
0
def test_sample_tree_gof(num_edges):
    set_random_seed(num_edges)
    E = num_edges
    V = 1 + E
    grid = make_complete_graph(V)
    K = grid.shape[1]
    edge_logits = np.random.random([K])
    edge_probs = np.exp(edge_logits)
    edge_probs_dict = {(v1, v2): edge_probs[k] for k, v1, v2 in grid.T}

    # Generate many samples via MCMC.
    num_samples = 30 * NUM_SPANNING_TREES[V]
    counts = defaultdict(lambda: 0)
    edges = [(v, v + 1) for v in range(V - 1)]
    for _ in range(num_samples):
        edges = sample_tree(grid, edge_logits, edges)
        counts[tuple(edges)] += 1
    assert len(counts) == NUM_SPANNING_TREES[V]

    # Check accuracy using Pearson's chi-squared test.
    keys = counts.keys()
    counts = np.array([counts[key] for key in keys])
    probs = np.array(
        [np.prod([edge_probs_dict[edge] for edge in key]) for key in keys])
    probs /= probs.sum()

    # Possibly truncate.
    T = 100
    truncated = False
    if len(counts) > T:
        counts = counts[:T]
        probs = probs[:T]
        truncated = True

    gof = multinomial_goodness_of_fit(probs,
                                      counts,
                                      num_samples,
                                      plot=True,
                                      truncated=truncated)
    assert 1e-2 < gof
コード例 #9
0
ファイル: util.py プロジェクト: datamicroscopes/distributions
def assert_counts_match_probs(counts, probs, tol=1e-3):
    '''
    Check goodness of fit of observed counts to predicted probabilities
    using Pearson's chi-squared test.

    Inputs:
        - counts : key -> int
        - probs : key -> float
    '''
    keys = counts.keys()
    probs = [probs[key] for key in keys]
    counts = [counts[key] for key in keys]
    total_count = sum(counts)

    print 'EXPECT\tACTUAL\tVALUE'
    for prob, count, key in sorted(izip(probs, counts, keys), reverse=True):
        expect = prob * total_count
        print '{:0.1f}\t{}\t{}'.format(expect, count, key)

    gof = multinomial_goodness_of_fit(probs, counts, total_count)
    print 'goodness of fit = {}'.format(gof)
    assert gof > tol, 'failed with goodness of fit {}'.format(gof)
コード例 #10
0
def assert_counts_match_probs(counts, probs, tol=1e-3):
    '''
    Check goodness of fit of observed counts to predicted probabilities
    using Pearson's chi-squared test.

    Inputs:
        - counts : key -> int
        - probs : key -> float
    '''
    keys = counts.keys()
    probs = [probs[key] for key in keys]
    counts = [counts[key] for key in keys]
    total_count = sum(counts)

    print 'EXPECT\tACTUAL\tVALUE'
    for prob, count, key in sorted(izip(probs, counts, keys), reverse=True):
        expect = prob * total_count
        print '{:0.1f}\t{}\t{}'.format(expect, count, key)

    gof = multinomial_goodness_of_fit(probs, counts, total_count)
    print 'goodness of fit = {}'.format(gof)
    assert gof > tol, 'failed with goodness of fit {}'.format(gof)
コード例 #11
0
ファイル: test_posterior_enum.py プロジェクト: fritzo/loom
def _test_dataset_config(
        casename,
        object_count,
        feature_count,
        config_name,
        model_name,
        fixed_model_names,
        rows_name,
        config,
        debug):
    dataset = {'model': model_name, 'rows': rows_name, 'config': config_name}
    samples = generate_samples(casename, dataset, debug)

    fixed_hyper_samples = []
    for fixed_model_name in fixed_model_names:
        fixed_dataset = dataset.copy()
        fixed_dataset['model'] = fixed_model_name
        fs = generate_samples(None, fixed_dataset, debug)
        fixed_hyper_samples.append(fs)

    sample_count = config['posterior_enum']['sample_count']
    counts_dict = {}
    scores_dict = {}
    actual_count = 0
    for sample, score in samples:
        actual_count += 1
        add_sample(sample, score, counts_dict, scores_dict)
    assert_equal(actual_count, sample_count)

    if fixed_hyper_samples:
        latents, scores_dict = process_fixed_samples(
            fixed_hyper_samples,
            scores_dict.keys())
        useable_count = sum([counts_dict[lat] for lat in latents])
        if useable_count < sample_count:
            LOG('Warn', casename, 'scores found for {} / {} samples'.format(
                useable_count,
                sample_count))
        sample_count = useable_count
    else:
        latents = scores_dict.keys()
    actual_latent_count = len(latents)
    infer_kinds = (config['kernels']['kind']['iterations'] > 0)
    if infer_kinds:
        expected_latent_count = count_crosscats(object_count, feature_count)
    else:
        expected_latent_count = BELL_NUMBERS[object_count]
    assert actual_latent_count <= expected_latent_count, 'programmer error'
    if actual_latent_count < expected_latent_count:
        LOG('Warn', casename, 'found only {} / {} latents'.format(
            actual_latent_count,
            expected_latent_count))

    counts = numpy.array([counts_dict[key] for key in latents])
    scores = numpy.array([scores_dict[key] for key in latents])
    probs = scores_to_probs(scores)

    highest_by_prob = numpy.argsort(probs)[::-1][:TRUNCATE_COUNT]
    is_accurate = lambda p: sample_count * p * (1 - p) >= 1
    highest_by_prob = [i for i in highest_by_prob if is_accurate(probs[i])]
    highest_by_count = numpy.argsort(counts)[::-1][:TRUNCATE_COUNT]
    highest = list(set(highest_by_prob) | set(highest_by_count))
    truncated = len(highest_by_prob) < len(probs)
    if len(highest_by_prob) < 1:
        LOG('Warn', casename, 'test is inaccurate; use more samples')
        return None

    goodness_of_fit = multinomial_goodness_of_fit(
        probs[highest_by_prob],
        counts[highest_by_prob],
        total_count=sample_count,
        truncated=truncated)

    comment = 'goodness of fit = {:0.3g}'.format(goodness_of_fit)
    if goodness_of_fit > MIN_GOODNESS_OF_FIT:
        LOG('Pass', casename, comment)
        return None
    else:
        print 'EXPECT\tACTUAL\tCHI\tVALUE'
        lines = [(probs[i], counts[i], latents[i]) for i in highest]
        for prob, count, latent in sorted(lines, reverse=True):
            expect = prob * sample_count
            chi = (count - expect) * expect ** -0.5
            pretty = pretty_latent(latent)
            print '{:0.1f}\t{}\t{:+0.1f}\t{}'.format(
                expect,
                count,
                chi,
                pretty)
        return LOG('Fail', casename, comment)
コード例 #12
0
def _test_dataset_config(casename, object_count, feature_count, config_name,
                         model_name, fixed_model_names, rows_name, config,
                         debug):
    dataset = {'model': model_name, 'rows': rows_name, 'config': config_name}
    samples = generate_samples(casename, dataset, debug)

    fixed_hyper_samples = []
    for fixed_model_name in fixed_model_names:
        fixed_dataset = dataset.copy()
        fixed_dataset['model'] = fixed_model_name
        fs = generate_samples(None, fixed_dataset, debug)
        fixed_hyper_samples.append(fs)

    sample_count = config['posterior_enum']['sample_count']
    counts_dict = {}
    scores_dict = {}
    actual_count = 0
    for sample, score in samples:
        actual_count += 1
        add_sample(sample, score, counts_dict, scores_dict)
    assert_equal(actual_count, sample_count)

    if fixed_hyper_samples:
        latents, scores_dict = process_fixed_samples(fixed_hyper_samples,
                                                     scores_dict.keys())
        useable_count = sum([counts_dict[lat] for lat in latents])
        if useable_count < sample_count:
            LOG(
                'Warn', casename, 'scores found for {} / {} samples'.format(
                    useable_count, sample_count))
        sample_count = useable_count
    else:
        latents = scores_dict.keys()
    actual_latent_count = len(latents)
    infer_kinds = (config['kernels']['kind']['iterations'] > 0)
    if infer_kinds:
        expected_latent_count = count_crosscats(object_count, feature_count)
    else:
        expected_latent_count = BELL_NUMBERS[object_count]
    assert actual_latent_count <= expected_latent_count, 'programmer error'
    if actual_latent_count < expected_latent_count:
        LOG(
            'Warn', casename,
            'found only {} / {} latents'.format(actual_latent_count,
                                                expected_latent_count))

    counts = numpy.array([counts_dict[key] for key in latents])
    scores = numpy.array([scores_dict[key] for key in latents])
    probs = scores_to_probs(scores)

    highest_by_prob = numpy.argsort(probs)[::-1][:TRUNCATE_COUNT]
    is_accurate = lambda p: sample_count * p * (1 - p) >= 1
    highest_by_prob = [i for i in highest_by_prob if is_accurate(probs[i])]
    highest_by_count = numpy.argsort(counts)[::-1][:TRUNCATE_COUNT]
    highest = list(set(highest_by_prob) | set(highest_by_count))
    truncated = len(highest_by_prob) < len(probs)
    if len(highest_by_prob) < 1:
        LOG('Warn', casename, 'test is inaccurate; use more samples')
        return None

    goodness_of_fit = multinomial_goodness_of_fit(probs[highest_by_prob],
                                                  counts[highest_by_prob],
                                                  total_count=sample_count,
                                                  truncated=truncated)

    comment = 'goodness of fit = {:0.3g}'.format(goodness_of_fit)
    if goodness_of_fit > MIN_GOODNESS_OF_FIT:
        LOG('Pass', casename, comment)
        return None
    else:
        print 'EXPECT\tACTUAL\tCHI\tVALUE'
        lines = [(probs[i], counts[i], latents[i]) for i in highest]
        for prob, count, latent in sorted(lines, reverse=True):
            expect = prob * sample_count
            chi = (count - expect) * expect**-0.5
            pretty = pretty_latent(latent)
            print '{:0.1f}\t{}\t{:+0.1f}\t{}'.format(expect, count, chi,
                                                     pretty)
        return LOG('Fail', casename, comment)