Example #1
0
def test_joint(module, EXAMPLE):
    # \cite{geweke04getting}
    seed_all(0)
    SIZE = 10
    SKIP = 100
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    marginal_conditional_samples = defaultdict(lambda: [])
    successive_conditional_samples = defaultdict(lambda: [])
    cond_group = sample_marginal_conditional(module, shared, SIZE)
    for _ in xrange(SAMPLE_COUNT):
        marg_group = sample_marginal_conditional(module, shared, SIZE)
        _append_ss(marg_group, marginal_conditional_samples)

        for __ in range(SKIP):
            cond_group = sample_successive_conditional(
                module,
                shared,
                cond_group,
                SIZE)
        _append_ss(cond_group, successive_conditional_samples)
    for key in marginal_conditional_samples.keys():
        gof = scipy.stats.ttest_ind(
            marginal_conditional_samples[key],
            successive_conditional_samples[key])[1]
        print '{}:{} gof = {:0.3g}'.format(module.__name__, key, gof)
        if not numpy.isfinite(gof):
            raise SkipTest('Test fails with gof = {}'.format(gof))
        assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_sample_value(module, EXAMPLE):
    seed_all(0)
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    for values in [[], EXAMPLE['values']]:
        group = module.Group.from_values(shared, values)
        sample_count = SAMPLE_COUNT
        if module.Value == numpy.ndarray:
            sample_count *= 10
        samples = [group.sample_value(shared) for _ in xrange(sample_count)]
        if module.Value in [bool, int]:
            probs_dict = {
                value: math.exp(group.score_value(shared, value))
                for value in set(samples)
            }
            gof = discrete_goodness_of_fit(samples, probs_dict, plot=True)
        elif module.Value == float:
            probs = numpy.exp([
                group.score_value(shared, value)
                for value in samples
            ])
            gof = density_goodness_of_fit(samples, probs, plot=True)
        elif module.Value == numpy.ndarray:
            if module.__name__ == 'distributions.lp.models.niw':
                raise SkipTest('FIXME known sampling bug')
            probs = numpy.exp([
                group.score_value(shared, value)
                for value in samples
            ])
            gof = vector_density_goodness_of_fit(samples, probs, plot=True)
        else:
            raise SkipTest('Not implemented for {}'.format(module.Value))
        print '{} gof = {:0.3g}'.format(module.__name__, gof)
        assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_joint(module, EXAMPLE):
    # \cite{geweke04getting}
    seed_all(0)
    SIZE = 10
    SKIP = 100
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    marginal_conditional_samples = defaultdict(lambda: [])
    successive_conditional_samples = defaultdict(lambda: [])
    cond_group = sample_marginal_conditional(module, shared, SIZE)
    for _ in xrange(SAMPLE_COUNT):
        marg_group = sample_marginal_conditional(module, shared, SIZE)
        _append_ss(marg_group, marginal_conditional_samples)

        for __ in range(SKIP):
            cond_group = sample_successive_conditional(module, shared,
                                                       cond_group, SIZE)
        _append_ss(cond_group, successive_conditional_samples)
    for key in marginal_conditional_samples.keys():
        gof = scipy.stats.ttest_ind(marginal_conditional_samples[key],
                                    successive_conditional_samples[key])[1]
        print '{}:{} gof = {:0.3g}'.format(module.__name__, key, gof)
        if not numpy.isfinite(gof):
            raise SkipTest('Test fails with gof = {}'.format(gof))
        assert_greater(gof, MIN_GOODNESS_OF_FIT)
def plot_edges(sample_count=1000, seed=0):
    '''
    Plot edges of niw examples.
    '''
    seed_all(seed)
    fig, axes = pyplot.subplots(len(niw.EXAMPLES),
                                2,
                                sharey='row',
                                figsize=(8, 12))

    model = niw
    for EXAMPLE, (ax1, ax2) in izip(model.EXAMPLES, axes):
        dim = get_dim(EXAMPLE['shared']['mu'])
        samples, scores = get_samples(model, EXAMPLE, sample_count)
        edges = get_edge_stats(samples, scores)

        edge_lengths = numpy.log(edges['lengths'])
        edge_scores = edges['scores']
        edge_stats = [
            numpy.exp((s - d) / dim)
            for d, s in izip(edge_lengths, edge_scores)
        ]

        ax1.set_title('NIW, dim = {}'.format(dim))
        ax1.scatter(edge_lengths, edge_scores, lw=0, alpha=0.5)
        ax1.set_ylabel('log(edge prob)')

        ax2.scatter(edge_stats, edge_scores, lw=0, alpha=0.5)
        ax2.yaxis.set_label_position('right')

    ax1.set_xlabel('log(edge length)')
    ax2.set_ylabel('statistic')
    fig.tight_layout()
    fig.subplots_adjust(wspace=0)
    pyplot.show()
Example #5
0
def test_sample_value(module, EXAMPLE):
    seed_all(0)
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    for values in [[], EXAMPLE['values']]:
        group = module.Group.from_values(shared, values)
        sample_count = SAMPLE_COUNT
        if module.Value == numpy.ndarray:
            sample_count *= 10
        samples = [group.sample_value(shared) for _ in xrange(sample_count)]
        if module.Value in [bool, int]:
            probs_dict = {
                value: math.exp(group.score_value(shared, value))
                for value in set(samples)
            }
            gof = discrete_goodness_of_fit(samples, probs_dict, plot=True)
        elif module.Value == float:
            probs = numpy.exp(
                [group.score_value(shared, value) for value in samples])
            gof = density_goodness_of_fit(samples, probs, plot=True)
        elif module.Value == numpy.ndarray:
            if module.__name__ == 'distributions.lp.models.niw':
                raise SkipTest('FIXME known sampling bug')
            probs = numpy.exp(
                [group.score_value(shared, value) for value in samples])
            gof = vector_density_goodness_of_fit(samples, probs, plot=True)
        else:
            raise SkipTest('Not implemented for {}'.format(module.Value))
        print '{} gof = {:0.3g}'.format(module.__name__, gof)
        assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_score_student_t_dbg_lp_equiv():
    seed_all(0)

    def random_vec(dim):
        return numpy.random.uniform(low=-3., high=3., size=dim)

    def random_cov(dim):
        Q = random_orthonormal_matrix(dim)
        return numpy.dot(Q, Q.T)

    def random_values(dim):
        return (random_vec(dim),
                float(dim) + 1.,
                random_vec(dim),
                random_cov(dim))

    values = (
        [random_values(2) for _ in xrange(10)] +
        [random_values(3) for _ in xrange(10)]
    )

    for x, nu, mu, cov in values:
        dbg_mv_score = dbg_score_student_t(x, nu, mu, cov)
        lp_mv_score = lp_score_student_t(x, nu, mu, cov)
        assert_close(dbg_mv_score, lp_mv_score)
Example #7
0
def _test_dataset(args):
    dim, feature_type, density, infer_kinds, debug, hyper_prior = args
    object_count, feature_count = dim
    with tempdir(cleanup_on_error=(not debug)):
        seed_all(SEED)

        config_name = os.path.abspath("config.pb")
        model_base_name = "model.pb"
        model_name = os.path.abspath(model_base_name)
        rows_name = os.path.abspath("rows.pbs")

        models = generate_model(feature_count, feature_type, hyper_prior)
        model, fixed_hyper_models = models
        dump_model(model, model_name)
        fixed_model_names = []
        for i, fm in enumerate(fixed_hyper_models):
            fixed_model_base = "fixed-{}-{}".format(i, model_base_name)
            fixed_model_name = os.path.abspath(fixed_model_base)
            fixed_model_names.append(fixed_model_name)
            dump_model(fm, fixed_model_name)
        if hyper_prior is None:
            assert len(fixed_model_names) == 0

        rows = generate_rows(object_count, feature_count, feature_type, density)
        dump_rows(rows, rows_name)

        infer_cats = object_count > 1
        infer_hypers = hyper_prior is not None

        if infer_kinds:
            sample_count = 10 * LATENT_SIZES[object_count][feature_count]
            iterations = 32
        else:
            sample_count = 10 * LATENT_SIZES[object_count][1]
            iterations = 0

        config = {
            "posterior_enum": {"sample_count": sample_count, "sample_skip": 10},
            "kernels": {
                "hyper": {"run": infer_hypers, "parallel": False},
                "kind": {"iterations": iterations, "row_queue_capacity": 0, "score_parallel": False},
            },
        }
        loom.config.config_dump(config, config_name)

        casename = "{}-{}-{}-{}-{}{}{}".format(
            object_count,
            feature_count,
            feature_type,
            density,
            ("C" if infer_cats else ""),
            ("K" if infer_kinds else ""),
            ("H" if infer_hypers else ""),
        )
        # LOG('Run', casename)
        error = _test_dataset_config(
            casename, object_count, feature_count, config_name, model_name, fixed_model_names, rows_name, config, debug
        )
        return [] if error is None else [error]
Example #8
0
 def test_one_model(name):
     Model = MODELS[name]
     for EXAMPLE in iter_examples(Model):
         seed_all(0)
         if SKIP_EXPENSIVE_TESTS and name.startswith('dbg'):
             sample_count = SAMPLE_COUNT / 10
         else:
             sample_count = SAMPLE_COUNT
         test_fun(Model, EXAMPLE, sample_count)
 def test_one_model(name):
     Model = MODELS[name]
     for EXAMPLE in iter_examples(Model):
         seed_all(0)
         if SKIP_EXPENSIVE_TESTS and name.startswith('dbg'):
             sample_count = SAMPLE_COUNT / 10
         else:
             sample_count = SAMPLE_COUNT
         test_fun(Model, EXAMPLE, sample_count)
Example #10
0
def plot_cdf(sample_count=1000, seed=0):
    '''
    Plot test statistic cdf based on the Nearest Neighbor distribution [1,2,3].

    [1] http://projecteuclid.org/download/pdf_1/euclid.aop/1176993668
    [2] http://arxiv.org/pdf/1006.3019v2.pdf
    [3] http://en.wikipedia.org/wiki/Nearest_neighbour_distribution
    [4] http://en.wikipedia.org/wiki/Volume_of_an_n-ball
    '''
    seed_all(seed)

    fig, (ax1, ax2) = pyplot.subplots(2, 1, sharex=True, figsize=(8, 10))
    ax1.plot([0, 1], [0, 1], 'k--')
    ax2.plot([0, 1], [1, 1], 'k--')

    for model in [nich, lp_nich, niw, lp_niw]:
        name = model.__name__.replace('distributions.', '')
        name = name.replace('models.', '')
        for EXAMPLE in model.EXAMPLES:
            dim = get_dim(EXAMPLE['shared']['mu'])
            samples, scores = get_samples(model, EXAMPLE, sample_count)
            edges = get_edge_stats(samples, scores)
            radii = edges['lengths']
            intensities = sample_count * numpy.exp(edges['scores'])

            cdf = numpy.array([
                1 - numpy.exp(-intensity * volume_of_sphere(dim, radius))
                for intensity, radius in izip(intensities, radii)
            ])
            cdf.sort()
            X = numpy.arange(0.5 / sample_count, 1, 1.0 / sample_count)

            pdf, Xp = cdf_to_pdf(cdf, X)
            pdf *= sample_count

            error = 2 * (sum(cdf) / sample_count) - 1
            if abs(error) < 0.05:
                status = 'PASS'
                linestyle = '-'
            else:
                status = 'FAIL'
                linestyle = '--'
            label = '{} {}({}) error = {:.3g}'.format(status, name, dim, error)
            ax1.plot(X, cdf, linestyle=linestyle, label=label)
            ax2.plot(Xp, pdf, linestyle=linestyle, label=label)

    ax1.set_title('GOF of Nearest Neighbor Statistic')
    ax1.legend(loc='best', prop={'size': 10}, fancybox=True, framealpha=0.5)
    ax1.set_ylabel('CDF')
    ax2.set_ylabel('PDF')
    pyplot.tight_layout()
    fig.subplots_adjust(hspace=0)
    pyplot.show()
Example #11
0
def test_sample_seed(Model, EXAMPLE):
    model = Model.model_load(EXAMPLE['model'])

    seed_all(0)
    group1 = model.group_create()
    values1 = [model.sample_value(group1) for _ in xrange(DATA_COUNT)]

    seed_all(0)
    group2 = model.group_create()
    values2 = [model.sample_value(group2) for _ in xrange(DATA_COUNT)]

    assert_close(values1, values2, err_msg='values')
Example #12
0
def test_sample_seed(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])

    seed_all(0)
    group1 = module.Group.from_values(shared)
    values1 = [group1.sample_value(shared) for _ in xrange(DATA_COUNT)]

    seed_all(0)
    group2 = module.Group.from_values(shared)
    values2 = [group2.sample_value(shared) for _ in xrange(DATA_COUNT)]

    assert_close(values1, values2, err_msg='values')
def plot_cdf(sample_count=1000, seed=0):
    '''
    Plot test statistic cdf based on the Nearest Neighbor distribution [1,2,3].

    [1] http://projecteuclid.org/download/pdf_1/euclid.aop/1176993668
    [2] http://arxiv.org/pdf/1006.3019v2.pdf
    [3] http://en.wikipedia.org/wiki/Nearest_neighbour_distribution
    [4] http://en.wikipedia.org/wiki/Volume_of_an_n-ball
    '''
    seed_all(seed)

    fig, (ax1, ax2) = pyplot.subplots(2, 1, sharex=True, figsize=(8, 10))
    ax1.plot([0, 1], [0, 1], 'k--')
    ax2.plot([0, 1], [1, 1], 'k--')

    for model in [nich, lp_nich, niw, lp_niw]:
        name = model.__name__.replace('distributions.', '')
        name = name.replace('models.', '')
        for EXAMPLE in model.EXAMPLES:
            dim = get_dim(EXAMPLE['shared']['mu'])
            samples, scores = get_samples(model, EXAMPLE, sample_count)
            edges = get_edge_stats(samples, scores)
            radii = edges['lengths']
            intensities = sample_count * numpy.exp(edges['scores'])

            cdf = numpy.array([
                1 - numpy.exp(-intensity * volume_of_sphere(dim, radius))
                for intensity, radius in izip(intensities, radii)
            ])
            cdf.sort()
            X = numpy.arange(0.5 / sample_count, 1, 1.0 / sample_count)

            pdf, Xp = cdf_to_pdf(cdf, X)
            pdf *= sample_count

            error = 2 * (sum(cdf) / sample_count) - 1
            if abs(error) < 0.05:
                status = 'PASS'
                linestyle = '-'
            else:
                status = 'FAIL'
                linestyle = '--'
            label = '{} {}({}) error = {:.3g}'.format(status, name, dim, error)
            ax1.plot(X, cdf, linestyle=linestyle, label=label)
            ax2.plot(Xp, pdf, linestyle=linestyle, label=label)

    ax1.set_title('GOF of Nearest Neighbor Statistic')
    ax1.legend(loc='best', prop={'size': 10}, fancybox=True, framealpha=0.5)
    ax1.set_ylabel('CDF')
    ax2.set_ylabel('PDF')
    pyplot.tight_layout()
    fig.subplots_adjust(hspace=0)
    pyplot.show()
def test_sample_seed(module, EXAMPLE):
    shared = module.Shared.from_dict(EXAMPLE['shared'])

    seed_all(0)
    group1 = module.Group.from_values(shared)
    values1 = [group1.sample_value(shared) for _ in xrange(DATA_COUNT)]

    seed_all(0)
    group2 = module.Group.from_values(shared)
    values2 = [group2.sample_value(shared) for _ in xrange(DATA_COUNT)]

    assert_close(values1, values2, err_msg='values')
Example #15
0
def _test_multinomial_goodness_of_fit(dim):
    seed_all(0)
    thresh = 1e-3
    sample_count = int(1e5)
    probs = numpy.random.dirichlet([1] * dim)

    counts = numpy.random.multinomial(sample_count, probs)
    p_good = multinomial_goodness_of_fit(probs, counts, sample_count)
    assert_greater(p_good, thresh)

    unif_counts = numpy.random.multinomial(sample_count, [1. / dim] * dim)
    p_bad = multinomial_goodness_of_fit(probs, unif_counts, sample_count)
    assert_less(p_bad, thresh)
def scatter(sample_count=1000, seed=0):
    '''
    Plot test statistic cdf for all datatpoints in a 2d dataset.
    '''
    seed_all(seed)

    examples = {
        (0, 0): get_normal_example,
        (1, 0): get_mvn_example,
        (0, 1): get_dbg_nich_example,
        (1, 1): get_lp_nich_example,
        (0, 2): get_dbg_niw_example,
        (1, 2): get_lp_niw_example,
    }

    rows = 1 + max(key[0] for key in examples)
    cols = 1 + max(key[1] for key in examples)
    fig, axes = pyplot.subplots(rows, cols, figsize=(12, 8))
    cmap = pyplot.get_cmap('bwr')

    for (row, col), get_example in examples.iteritems():
        example = get_example(sample_count)
        edges = get_edge_stats(example['samples'], example['scores'])
        radii = edges['lengths']
        intensities = sample_count * numpy.exp(edges['scores'])

        dim = 2
        cdf = numpy.array([
            1 - numpy.exp(-intensity * volume_of_sphere(dim, radius))
            for intensity, radius in izip(intensities, radii)
        ])
        error = 2 * (sum(cdf) / sample_count) - 1

        X = [value[0] for value in example['samples']]
        Y = [value[1] for value in example['samples']]
        colors = cdf

        ax = axes[row][col]
        ax.set_title('{} error = {:0.3g}'.format(example['name'], error))
        ax.scatter(X, Y, 50, alpha=0.5, c=colors, cmap=cmap)

    pyplot.tight_layout()
    pyplot.show()
Example #17
0
def test_sample_group(Model, EXAMPLE):
    seed_all(0)
    SIZE = 2
    model = Model.model_load(EXAMPLE['model'])
    for values in [[], EXAMPLE['values']]:
        if Model.Value == int:
            samples = []
            probs_dict = {}
            for _ in xrange(SAMPLE_COUNT):
                values = model.sample_group(SIZE)
                sample = tuple(values)
                samples.append(sample)
                group = model.group_create(values)
                probs_dict[sample] = math.exp(model.score_group(group))
            gof = discrete_goodness_of_fit(samples, probs_dict, plot=True)
        else:
            raise SkipTest('Not implemented for {}'.format(Model.Value))
        print '{} gof = {:0.3g}'.format(Model.__name__, gof)
        assert_greater(gof, MIN_GOODNESS_OF_FIT)
Example #18
0
def scatter(sample_count=1000, seed=0):
    '''
    Plot test statistic cdf for all datatpoints in a 2d dataset.
    '''
    seed_all(seed)

    examples = {
        (0, 0): get_normal_example,
        (1, 0): get_mvn_example,
        (0, 1): get_dbg_nich_example,
        (1, 1): get_lp_nich_example,
        (0, 2): get_dbg_niw_example,
        (1, 2): get_lp_niw_example,
    }

    rows = 1 + max(key[0] for key in examples)
    cols = 1 + max(key[1] for key in examples)
    fig, axes = pyplot.subplots(rows, cols, figsize=(12, 8))
    cmap = pyplot.get_cmap('bwr')

    for (row, col), get_example in examples.iteritems():
        example = get_example(sample_count)
        edges = get_edge_stats(example['samples'], example['scores'])
        radii = edges['lengths']
        intensities = sample_count * numpy.exp(edges['scores'])

        dim = 2
        cdf = numpy.array([
            1 - numpy.exp(-intensity * volume_of_sphere(dim, radius))
            for intensity, radius in izip(intensities, radii)
        ])
        error = 2 * (sum(cdf) / sample_count) - 1

        X = [value[0] for value in example['samples']]
        Y = [value[1] for value in example['samples']]
        colors = cdf

        ax = axes[row][col]
        ax.set_title('{} error = {:0.3g}'.format(example['name'], error))
        ax.scatter(X, Y, 50, alpha=0.5, c=colors, cmap=cmap)

    pyplot.tight_layout()
    pyplot.show()
def test_sample_group(module, EXAMPLE):
    seed_all(0)
    SIZE = 2
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    for values in [[], EXAMPLE['values']]:
        if module.Value in [bool, int]:
            samples = []
            probs_dict = {}
            for _ in xrange(SAMPLE_COUNT):
                values = module.sample_group(shared, SIZE)
                sample = tuple(values)
                samples.append(sample)
                group = module.Group.from_values(shared, values)
                probs_dict[sample] = math.exp(group.score_data(shared))
            gof = discrete_goodness_of_fit(samples, probs_dict, plot=True)
        else:
            raise SkipTest('Not implemented for {}'.format(module.Value))
        print '{} gof = {:0.3g}'.format(module.__name__, gof)
        assert_greater(gof, MIN_GOODNESS_OF_FIT)
def test_shared(module, EXAMPLE):
    assert_hasattr(module, 'Shared')
    assert_is_instance(module.Shared, type)

    shared1 = module.Shared.from_dict(EXAMPLE['shared'])
    shared2 = module.Shared.from_dict(EXAMPLE['shared'])
    assert_close(shared1.dump(), EXAMPLE['shared'])

    values = EXAMPLE['values']
    seed_all(0)
    for value in values:
        shared1.add_value(value)
    seed_all(0)
    for value in values:
        shared2.add_value(value)
    assert_close(shared1.dump(), shared2.dump())

    for value in values:
        shared1.remove_value(value)
    assert_close(shared1.dump(), EXAMPLE['shared'])
Example #21
0
def test_sample_group(module, EXAMPLE):
    seed_all(0)
    SIZE = 2
    shared = module.Shared.from_dict(EXAMPLE['shared'])
    shared.realize()
    for values in [[], EXAMPLE['values']]:
        if module.Value in [bool, int]:
            samples = []
            probs_dict = {}
            for _ in xrange(SAMPLE_COUNT):
                values = module.sample_group(shared, SIZE)
                sample = tuple(values)
                samples.append(sample)
                group = module.Group.from_values(shared, values)
                probs_dict[sample] = math.exp(group.score_data(shared))
            gof = discrete_goodness_of_fit(samples, probs_dict, plot=True)
        else:
            raise SkipTest('Not implemented for {}'.format(module.Value))
        print '{} gof = {:0.3g}'.format(module.__name__, gof)
        assert_greater(gof, MIN_GOODNESS_OF_FIT)
Example #22
0
def test_shared(module, EXAMPLE):
    assert_hasattr(module, 'Shared')
    assert_is_instance(module.Shared, type)

    shared1 = module.Shared.from_dict(EXAMPLE['shared'])
    shared2 = module.Shared.from_dict(EXAMPLE['shared'])
    assert_close(shared1.dump(), EXAMPLE['shared'])

    values = EXAMPLE['values']
    seed_all(0)
    for value in values:
        shared1.add_value(value)
    seed_all(0)
    for value in values:
        shared2.add_value(value)
    assert_close(shared1.dump(), shared2.dump())

    for value in values:
        shared1.remove_value(value)
    assert_close(shared1.dump(), EXAMPLE['shared'])
Example #23
0
def test_score_student_t_dbg_lp_equiv():
    seed_all(0)

    def random_vec(dim):
        return numpy.random.uniform(low=-3., high=3., size=dim)

    def random_cov(dim):
        Q = random_orthonormal_matrix(dim)
        return numpy.dot(Q, Q.T)

    def random_values(dim):
        return (random_vec(dim), float(dim) + 1., random_vec(dim),
                random_cov(dim))

    values = ([random_values(2)
               for _ in xrange(10)] + [random_values(3) for _ in xrange(10)])

    for x, nu, mu, cov in values:
        dbg_mv_score = dbg_score_student_t(x, nu, mu, cov)
        lp_mv_score = lp_score_student_t(x, nu, mu, cov)
        assert_close(dbg_mv_score, lp_mv_score)
Example #24
0
def test_sample_value(Model, EXAMPLE):
    seed_all(0)
    model = Model.model_load(EXAMPLE['model'])
    for values in [[], EXAMPLE['values']]:
        group = model.group_create(values)
        samples = [model.sample_value(group) for _ in xrange(SAMPLE_COUNT)]
        if Model.Value == int:
            probs_dict = {
                value: math.exp(model.score_value(group, value))
                for value in set(samples)
            }
            gof = discrete_goodness_of_fit(samples, probs_dict, plot=True)
        elif Model.Value == float:
            probs = numpy.exp([
                model.score_value(group, value)
                for value in samples
            ])
            gof = density_goodness_of_fit(samples, probs, plot=True)
        else:
            raise SkipTest('Not implemented for {}'.format(Model.Value))
        print '{} gof = {:0.3g}'.format(Model.__name__, gof)
        assert_greater(gof, MIN_GOODNESS_OF_FIT)
def plot_edges(sample_count=1000, seed=0):
    '''
    Plot edges of niw examples.
    '''
    seed_all(seed)
    fig, axes = pyplot.subplots(
        len(niw.EXAMPLES),
        2,
        sharey='row',
        figsize=(8, 12))

    model = niw
    for EXAMPLE, (ax1, ax2) in izip(model.EXAMPLES, axes):
        dim = get_dim(EXAMPLE['shared']['mu'])
        samples, scores = get_samples(model, EXAMPLE, sample_count)
        edges = get_edge_stats(samples, scores)

        edge_lengths = numpy.log(edges['lengths'])
        edge_scores = edges['scores']
        edge_stats = [
            numpy.exp((s - d) / dim)
            for d, s in izip(edge_lengths, edge_scores)
        ]

        ax1.set_title('NIW, dim = {}'.format(dim))
        ax1.scatter(edge_lengths, edge_scores, lw=0, alpha=0.5)
        ax1.set_ylabel('log(edge prob)')

        ax2.scatter(edge_stats, edge_scores, lw=0, alpha=0.5)
        ax2.yaxis.set_label_position('right')

    ax1.set_xlabel('log(edge length)')
    ax2.set_ylabel('statistic')
    fig.tight_layout()
    fig.subplots_adjust(wspace=0)
    pyplot.show()
Example #26
0
def _test_dataset(args):
    dim, feature_type, density, infer_kinds, debug, hyper_prior = args
    object_count, feature_count = dim
    with tempdir(cleanup_on_error=(not debug)):
        seed_all(SEED)

        config_name = os.path.abspath('config.pb')
        model_base_name = 'model.pb'
        model_name = os.path.abspath(model_base_name)
        rows_name = os.path.abspath('rows.pbs')

        models = generate_model(feature_count, feature_type, hyper_prior)
        model, fixed_hyper_models = models
        dump_model(model, model_name)
        fixed_model_names = []
        for i, fm in enumerate(fixed_hyper_models):
            fixed_model_base = 'fixed-{}-{}'.format(i, model_base_name)
            fixed_model_name = os.path.abspath(fixed_model_base)
            fixed_model_names.append(fixed_model_name)
            dump_model(fm, fixed_model_name)
        if hyper_prior is None:
            assert len(fixed_model_names) == 0

        rows = generate_rows(object_count, feature_count, feature_type,
                             density)
        dump_rows(rows, rows_name)

        infer_cats = (object_count > 1)
        infer_hypers = (hyper_prior is not None)

        if infer_kinds:
            sample_count = 10 * LATENT_SIZES[object_count][feature_count]
            iterations = 32
        else:
            sample_count = 10 * LATENT_SIZES[object_count][1]
            iterations = 0

        config = {
            'posterior_enum': {
                'sample_count': sample_count,
                'sample_skip': 10,
            },
            'kernels': {
                'hyper': {
                    'run': infer_hypers,
                    'parallel': False,
                },
                'kind': {
                    'iterations': iterations,
                    'row_queue_capacity': 0,
                    'score_parallel': False,
                },
            },
        }
        loom.config.config_dump(config, config_name)

        casename = '{}-{}-{}-{}-{}{}{}'.format(object_count, feature_count,
                                               feature_type, density,
                                               ('C' if infer_cats else ''),
                                               ('K' if infer_kinds else ''),
                                               ('H' if infer_hypers else ''))
        # LOG('Run', casename)
        error = _test_dataset_config(casename, object_count, feature_count,
                                     config_name, model_name,
                                     fixed_model_names, rows_name, config,
                                     debug)
        return [] if error is None else [error]
Example #27
0
def _test_dataset(args):
    dim, feature_type, density, infer_kinds, debug, hyper_prior = args
    object_count, feature_count = dim
    with tempdir(cleanup_on_error=(not debug)):
        seed_all(SEED)

        config_name = os.path.abspath('config.pb')
        model_base_name = 'model.pb'
        model_name = os.path.abspath(model_base_name)
        rows_name = os.path.abspath('rows.pbs')

        models = generate_model(feature_count, feature_type, hyper_prior)
        model, fixed_hyper_models = models
        dump_model(model, model_name)
        fixed_model_names = []
        for i, fm in enumerate(fixed_hyper_models):
            fixed_model_base = 'fixed-{}-{}'.format(i, model_base_name)
            fixed_model_name = os.path.abspath(fixed_model_base)
            fixed_model_names.append(fixed_model_name)
            dump_model(fm, fixed_model_name)
        if hyper_prior is None:
            assert len(fixed_model_names) == 0

        rows = generate_rows(
            object_count,
            feature_count,
            feature_type,
            density)
        dump_rows(rows, rows_name)

        infer_cats = (object_count > 1)
        infer_hypers = (hyper_prior is not None)

        if infer_kinds:
            sample_count = 10 * LATENT_SIZES[object_count][feature_count]
            iterations = 32
        else:
            sample_count = 10 * LATENT_SIZES[object_count][1]
            iterations = 0

        config = {
            'posterior_enum': {
                'sample_count': sample_count,
                'sample_skip': 10,
            },
            'kernels': {
                'hyper': {
                    'run': infer_hypers,
                    'parallel': False,
                },
                'kind': {
                    'iterations': iterations,
                    'row_queue_capacity': 0,
                    'score_parallel': False,
                },
            },
        }
        loom.config.config_dump(config, config_name)

        casename = '{}-{}-{}-{}-{}{}{}'.format(
            object_count,
            feature_count,
            feature_type,
            density,
            ('C' if infer_cats else ''),
            ('K' if infer_kinds else ''),
            ('H' if infer_hypers else ''))
        # LOG('Run', casename)
        error = _test_dataset_config(
            casename,
            object_count,
            feature_count,
            config_name,
            model_name,
            fixed_model_names,
            rows_name,
            config,
            debug)
        return [] if error is None else [error]
def test_models(Model=None, size=None):
    seed_all(0)
    for Model in MODELS:
        for size in xrange(2, MAX_SIZE + 1):
            yield _test_models, Model, size