Ejemplo n.º 1
0
def plot_cdf(sample_count=1000, seed=0):
    '''
    Plot test statistic cdf based on the Nearest Neighbor distribution [1,2,3].

    [1] http://projecteuclid.org/download/pdf_1/euclid.aop/1176993668
    [2] http://arxiv.org/pdf/1006.3019v2.pdf
    [3] http://en.wikipedia.org/wiki/Nearest_neighbour_distribution
    [4] http://en.wikipedia.org/wiki/Volume_of_an_n-ball
    '''
    seed_all(seed)

    fig, (ax1, ax2) = pyplot.subplots(2, 1, sharex=True, figsize=(8, 10))
    ax1.plot([0, 1], [0, 1], 'k--')
    ax2.plot([0, 1], [1, 1], 'k--')

    for model in [nich, lp_nich, niw, lp_niw]:
        name = model.__name__.replace('distributions.', '')
        name = name.replace('models.', '')
        for EXAMPLE in model.EXAMPLES:
            dim = get_dim(EXAMPLE['shared']['mu'])
            samples, scores = get_samples(model, EXAMPLE, sample_count)
            edges = get_edge_stats(samples, scores)
            radii = edges['lengths']
            intensities = sample_count * numpy.exp(edges['scores'])

            cdf = numpy.array([
                1 - numpy.exp(-intensity * volume_of_sphere(dim, radius))
                for intensity, radius in izip(intensities, radii)
            ])
            cdf.sort()
            X = numpy.arange(0.5 / sample_count, 1, 1.0 / sample_count)

            pdf, Xp = cdf_to_pdf(cdf, X)
            pdf *= sample_count

            error = 2 * (sum(cdf) / sample_count) - 1
            if abs(error) < 0.05:
                status = 'PASS'
                linestyle = '-'
            else:
                status = 'FAIL'
                linestyle = '--'
            label = '{} {}({}) error = {:.3g}'.format(status, name, dim, error)
            ax1.plot(X, cdf, linestyle=linestyle, label=label)
            ax2.plot(Xp, pdf, linestyle=linestyle, label=label)

    ax1.set_title('GOF of Nearest Neighbor Statistic')
    ax1.legend(loc='best', prop={'size': 10}, fancybox=True, framealpha=0.5)
    ax1.set_ylabel('CDF')
    ax2.set_ylabel('PDF')
    pyplot.tight_layout()
    fig.subplots_adjust(hspace=0)
    pyplot.show()
Ejemplo n.º 2
0
def plot_cdf(sample_count=1000, seed=0):
    '''
    Plot test statistic cdf based on the Nearest Neighbor distribution [1,2,3].

    [1] http://projecteuclid.org/download/pdf_1/euclid.aop/1176993668
    [2] http://arxiv.org/pdf/1006.3019v2.pdf
    [3] http://en.wikipedia.org/wiki/Nearest_neighbour_distribution
    [4] http://en.wikipedia.org/wiki/Volume_of_an_n-ball
    '''
    seed_all(seed)

    fig, (ax1, ax2) = pyplot.subplots(2, 1, sharex=True, figsize=(8, 10))
    ax1.plot([0, 1], [0, 1], 'k--')
    ax2.plot([0, 1], [1, 1], 'k--')

    for model in [nich, lp_nich, niw, lp_niw]:
        name = model.__name__.replace('distributions.', '')
        name = name.replace('models.', '')
        for EXAMPLE in model.EXAMPLES:
            dim = get_dim(EXAMPLE['shared']['mu'])
            samples, scores = get_samples(model, EXAMPLE, sample_count)
            edges = get_edge_stats(samples, scores)
            radii = edges['lengths']
            intensities = sample_count * numpy.exp(edges['scores'])

            cdf = numpy.array([
                1 - numpy.exp(-intensity * volume_of_sphere(dim, radius))
                for intensity, radius in izip(intensities, radii)
            ])
            cdf.sort()
            X = numpy.arange(0.5 / sample_count, 1, 1.0 / sample_count)

            pdf, Xp = cdf_to_pdf(cdf, X)
            pdf *= sample_count

            error = 2 * (sum(cdf) / sample_count) - 1
            if abs(error) < 0.05:
                status = 'PASS'
                linestyle = '-'
            else:
                status = 'FAIL'
                linestyle = '--'
            label = '{} {}({}) error = {:.3g}'.format(status, name, dim, error)
            ax1.plot(X, cdf, linestyle=linestyle, label=label)
            ax2.plot(Xp, pdf, linestyle=linestyle, label=label)

    ax1.set_title('GOF of Nearest Neighbor Statistic')
    ax1.legend(loc='best', prop={'size': 10}, fancybox=True, framealpha=0.5)
    ax1.set_ylabel('CDF')
    ax2.set_ylabel('PDF')
    pyplot.tight_layout()
    fig.subplots_adjust(hspace=0)
    pyplot.show()
Ejemplo n.º 3
0
def scatter(sample_count=1000, seed=0):
    '''
    Plot test statistic cdf for all datatpoints in a 2d dataset.
    '''
    seed_all(seed)

    examples = {
        (0, 0): get_normal_example,
        (1, 0): get_mvn_example,
        (0, 1): get_dbg_nich_example,
        (1, 1): get_lp_nich_example,
        (0, 2): get_dbg_niw_example,
        (1, 2): get_lp_niw_example,
    }

    rows = 1 + max(key[0] for key in examples)
    cols = 1 + max(key[1] for key in examples)
    fig, axes = pyplot.subplots(rows, cols, figsize=(12, 8))
    cmap = pyplot.get_cmap('bwr')

    for (row, col), get_example in examples.iteritems():
        example = get_example(sample_count)
        edges = get_edge_stats(example['samples'], example['scores'])
        radii = edges['lengths']
        intensities = sample_count * numpy.exp(edges['scores'])

        dim = 2
        cdf = numpy.array([
            1 - numpy.exp(-intensity * volume_of_sphere(dim, radius))
            for intensity, radius in izip(intensities, radii)
        ])
        error = 2 * (sum(cdf) / sample_count) - 1

        X = [value[0] for value in example['samples']]
        Y = [value[1] for value in example['samples']]
        colors = cdf

        ax = axes[row][col]
        ax.set_title('{} error = {:0.3g}'.format(example['name'], error))
        ax.scatter(X, Y, 50, alpha=0.5, c=colors, cmap=cmap)

    pyplot.tight_layout()
    pyplot.show()
Ejemplo n.º 4
0
def scatter(sample_count=1000, seed=0):
    '''
    Plot test statistic cdf for all datatpoints in a 2d dataset.
    '''
    seed_all(seed)

    examples = {
        (0, 0): get_normal_example,
        (1, 0): get_mvn_example,
        (0, 1): get_dbg_nich_example,
        (1, 1): get_lp_nich_example,
        (0, 2): get_dbg_niw_example,
        (1, 2): get_lp_niw_example,
    }

    rows = 1 + max(key[0] for key in examples)
    cols = 1 + max(key[1] for key in examples)
    fig, axes = pyplot.subplots(rows, cols, figsize=(12, 8))
    cmap = pyplot.get_cmap('bwr')

    for (row, col), get_example in examples.iteritems():
        example = get_example(sample_count)
        edges = get_edge_stats(example['samples'], example['scores'])
        radii = edges['lengths']
        intensities = sample_count * numpy.exp(edges['scores'])

        dim = 2
        cdf = numpy.array([
            1 - numpy.exp(-intensity * volume_of_sphere(dim, radius))
            for intensity, radius in izip(intensities, radii)
        ])
        error = 2 * (sum(cdf) / sample_count) - 1

        X = [value[0] for value in example['samples']]
        Y = [value[1] for value in example['samples']]
        colors = cdf

        ax = axes[row][col]
        ax.set_title('{} error = {:0.3g}'.format(example['name'], error))
        ax.scatter(X, Y, 50, alpha=0.5, c=colors, cmap=cmap)

    pyplot.tight_layout()
    pyplot.show()
Ejemplo n.º 5
0
 def test_volume_of_sphere(self):
     for r in [0.1, 1.0, 10.0]:
         self.assertAlmostEqual(volume_of_sphere(1, r), 2 * r)
         self.assertAlmostEqual(volume_of_sphere(2, r), pi * r ** 2)
         self.assertAlmostEqual(volume_of_sphere(3, r), 4 / 3 * pi * r ** 3)
Ejemplo n.º 6
0
 def test_volume_of_sphere(self):
     for r in [0.1, 1.0, 10.0]:
         self.assertAlmostEqual(volume_of_sphere(1, r), 2 * r)
         self.assertAlmostEqual(volume_of_sphere(2, r), pi * r**2)
         self.assertAlmostEqual(volume_of_sphere(3, r), 4 / 3 * pi * r**3)
Ejemplo n.º 7
0
def test_volume_of_sphere():
    for r in [0.1, 1.0, 10.0]:
        assert_almost_equal(volume_of_sphere(1, r), 2.0 * r)
        assert_almost_equal(volume_of_sphere(2, r), pi * r ** 2)
        assert_almost_equal(volume_of_sphere(3, r), 4 / 3.0 * pi * r ** 3)