def test_gauss_suff_stats(): # High mean, tiny variance would lead to catastrophic cancellation # in a naive implementation that maintained the sum of squares. big = 400 small = 0.0000001 data = [big - small, big, big + small] true_sigma = math.sqrt(2 * small**2 / 3) (ct, mean, sigma) = stats.gauss_suff_stats(data) assert ct == 3 assert mean == big assert relerr(true_sigma, sigma) < 1e-5
def estimate_mean(samples): """Estimate the mean of a distribution from samples. Return the triple (count, mean, error). `count` is the number of input samples. `mean` is the mean of the samples, which estimates the true mean of the distribution. `error` is an estimate of the standard deviation of the returned `mean`. This is computed from the variance of the input samples, on the assumption that the Central Limit Theorem applies. This is will be so if the underlying distribution has a finite variance, and enough samples were drawn. """ (n, mean, stddev) = stats.gauss_suff_stats(samples) return (n, mean, stddev / math.sqrt(n))