def plot_cdf(sample_count=1000, seed=0): ''' Plot test statistic cdf based on the Nearest Neighbor distribution [1,2,3]. [1] http://projecteuclid.org/download/pdf_1/euclid.aop/1176993668 [2] http://arxiv.org/pdf/1006.3019v2.pdf [3] http://en.wikipedia.org/wiki/Nearest_neighbour_distribution [4] http://en.wikipedia.org/wiki/Volume_of_an_n-ball ''' seed_all(seed) fig, (ax1, ax2) = pyplot.subplots(2, 1, sharex=True, figsize=(8, 10)) ax1.plot([0, 1], [0, 1], 'k--') ax2.plot([0, 1], [1, 1], 'k--') for model in [nich, lp_nich, niw, lp_niw]: name = model.__name__.replace('distributions.', '') name = name.replace('models.', '') for EXAMPLE in model.EXAMPLES: dim = get_dim(EXAMPLE['shared']['mu']) samples, scores = get_samples(model, EXAMPLE, sample_count) edges = get_edge_stats(samples, scores) radii = edges['lengths'] intensities = sample_count * numpy.exp(edges['scores']) cdf = numpy.array([ 1 - numpy.exp(-intensity * volume_of_sphere(dim, radius)) for intensity, radius in izip(intensities, radii) ]) cdf.sort() X = numpy.arange(0.5 / sample_count, 1, 1.0 / sample_count) pdf, Xp = cdf_to_pdf(cdf, X) pdf *= sample_count error = 2 * (sum(cdf) / sample_count) - 1 if abs(error) < 0.05: status = 'PASS' linestyle = '-' else: status = 'FAIL' linestyle = '--' label = '{} {}({}) error = {:.3g}'.format(status, name, dim, error) ax1.plot(X, cdf, linestyle=linestyle, label=label) ax2.plot(Xp, pdf, linestyle=linestyle, label=label) ax1.set_title('GOF of Nearest Neighbor Statistic') ax1.legend(loc='best', prop={'size': 10}, fancybox=True, framealpha=0.5) ax1.set_ylabel('CDF') ax2.set_ylabel('PDF') pyplot.tight_layout() fig.subplots_adjust(hspace=0) pyplot.show()
def scatter(sample_count=1000, seed=0): ''' Plot test statistic cdf for all datatpoints in a 2d dataset. ''' seed_all(seed) examples = { (0, 0): get_normal_example, (1, 0): get_mvn_example, (0, 1): get_dbg_nich_example, (1, 1): get_lp_nich_example, (0, 2): get_dbg_niw_example, (1, 2): get_lp_niw_example, } rows = 1 + max(key[0] for key in examples) cols = 1 + max(key[1] for key in examples) fig, axes = pyplot.subplots(rows, cols, figsize=(12, 8)) cmap = pyplot.get_cmap('bwr') for (row, col), get_example in examples.iteritems(): example = get_example(sample_count) edges = get_edge_stats(example['samples'], example['scores']) radii = edges['lengths'] intensities = sample_count * numpy.exp(edges['scores']) dim = 2 cdf = numpy.array([ 1 - numpy.exp(-intensity * volume_of_sphere(dim, radius)) for intensity, radius in izip(intensities, radii) ]) error = 2 * (sum(cdf) / sample_count) - 1 X = [value[0] for value in example['samples']] Y = [value[1] for value in example['samples']] colors = cdf ax = axes[row][col] ax.set_title('{} error = {:0.3g}'.format(example['name'], error)) ax.scatter(X, Y, 50, alpha=0.5, c=colors, cmap=cmap) pyplot.tight_layout() pyplot.show()
def test_volume_of_sphere(self): for r in [0.1, 1.0, 10.0]: self.assertAlmostEqual(volume_of_sphere(1, r), 2 * r) self.assertAlmostEqual(volume_of_sphere(2, r), pi * r ** 2) self.assertAlmostEqual(volume_of_sphere(3, r), 4 / 3 * pi * r ** 3)
def test_volume_of_sphere(self): for r in [0.1, 1.0, 10.0]: self.assertAlmostEqual(volume_of_sphere(1, r), 2 * r) self.assertAlmostEqual(volume_of_sphere(2, r), pi * r**2) self.assertAlmostEqual(volume_of_sphere(3, r), 4 / 3 * pi * r**3)
def test_volume_of_sphere(): for r in [0.1, 1.0, 10.0]: assert_almost_equal(volume_of_sphere(1, r), 2.0 * r) assert_almost_equal(volume_of_sphere(2, r), pi * r ** 2) assert_almost_equal(volume_of_sphere(3, r), 4 / 3.0 * pi * r ** 3)