def check_mean_sigma_keepdims(a, axis): mu1, sigma1 = mean_sigma(a, axis, keepdims=False) mu2, sigma2 = mean_sigma(a, axis, keepdims=True) assert_array_equal(mu1.ravel(), mu2.ravel()) assert_array_equal(sigma1.ravel(), sigma2.ravel()) assert_array_equal(np.broadcast(a, mu2).shape, a.shape) assert_array_equal(np.broadcast(a, sigma2).shape, a.shape)
def test_mean_sigma_keepdims(axis): np.random.seed(0) a = np.random.random((4, 5, 6)) mu1, sigma1 = mean_sigma(a, axis, keepdims=False) mu2, sigma2 = mean_sigma(a, axis, keepdims=True) assert_array_equal(mu1.ravel(), mu2.ravel()) assert_array_equal(sigma1.ravel(), sigma2.ravel()) assert_array_equal(np.broadcast(a, mu2).shape, a.shape) assert_array_equal(np.broadcast(a, sigma2).shape, a.shape)
def check_mean_sigma(a, axis=None, ddof=0): mu1, sigma1 = mean_sigma(a, axis=axis, ddof=ddof) mu2 = np.mean(a, axis=axis) sigma2 = np.std(a, axis=axis, ddof=ddof) assert_array_almost_equal(mu1, mu2) assert_array_almost_equal(sigma1, sigma2)
def test_bootstrap_results(): np.random.seed(0) x = np.random.normal(0, 1, 100) distribution = bootstrap(x, 100, np.mean, kwargs=dict(axis=1), random_state=0) mu, sigma = mean_sigma(distribution) assert_allclose([mu, sigma], [0.08139846, 0.10465327])
def check_median_sigmaG_approx(a, axis, keepdims, atol=0.15): med, sigmaG = median_sigmaG(a, axis=axis, keepdims=keepdims) mu, sigma = mean_sigma(a, axis=axis, ddof=1, keepdims=keepdims) print np.max(abs(med - mu)) print np.max(abs(sigmaG - sigma)) assert_allclose(med, mu, atol=atol) assert_allclose(sigmaG, sigma, atol=atol)
def test_median_sigmaG_approx(axis, keepdims, atol=0.02): np.random.seed(0) a = np.random.normal(0, 1, size=(10, 10000)) med, sigmaG = median_sigmaG(a, axis=axis, keepdims=keepdims) mu, sigma = mean_sigma(a, axis=axis, ddof=1, keepdims=keepdims) assert_allclose(med, mu, atol=atol) assert_allclose(sigmaG, sigma, atol=atol)
def test_mean_sigma(a_shape, axis, ddof): np.random.seed(0) a = np.random.random(a_shape) mu1, sigma1 = mean_sigma(a, axis=axis, ddof=ddof) mu2 = np.mean(a, axis=axis) sigma2 = np.std(a, axis=axis, ddof=ddof) assert_array_almost_equal(mu1, mu2) assert_array_almost_equal(sigma1, sigma2)
sigma1 = 1 sigma2 = 3 np.random.seed(1) x = np.hstack((np.random.normal(0, sigma1, Npts - N_out), np.random.normal(0, sigma2, N_out))) #------------------------------------------------------------ # Compute anderson-darling test A2, sig, crit = anderson(x) print("anderson-darling A^2 = {0:.1f}".format(A2)) #------------------------------------------------------------ # Compute non-robust and robust point statistics mu_sample, sig_sample = mean_sigma(x) med_sample, sigG_sample = median_sigmaG(x) #------------------------------------------------------------ # Plot the results fig, ax = plt.subplots(figsize=(5, 3.75)) # histogram of data ax.hist(x, 100, histtype='stepfilled', alpha=0.2, color='k', normed=True) # best-fit normal curves x_sample = np.linspace(-15, 15, 1000) ax.plot(x_sample, norm(mu_sample, sig_sample).pdf(x_sample), '-k', label='$\sigma$ fit') ax.plot(x_sample, norm(med_sample, sigG_sample).pdf(x_sample), '--k',
def check_median_sigmaG_approx(a, axis, keepdims, atol=0.15): med, sigmaG = median_sigmaG(a, axis=axis, keepdims=keepdims) mu, sigma = mean_sigma(a, axis=axis, ddof=1, keepdims=keepdims) assert_allclose(med, mu, atol=atol) assert_allclose(sigmaG, sigma, atol=atol)
def gaussian(x, mu, sigma): return np.exp(-0.5 * (x - mu) ** 2 / sigma ** 2) #------------------------------------------------------------ # Draw a random sample from the distribution, and compute # some quantities n = 10 xbar = 1 V = 4 sigma_x = np.sqrt(V) np.random.seed(10) xi = np.random.normal(xbar, sigma_x, size=n) mu_mean, sig_mean = mean_sigma(xi, ddof=1) # compute the analytically expected spread in measurements mu_std = sig_mean / np.sqrt(n) sig_std = sig_mean / np.sqrt(2 * (n - 1)) #------------------------------------------------------------ # bootstrap estimates mu_bootstrap, sig_bootstrap = bootstrap(xi, 1E6, mean_sigma, kwargs=dict(ddof=1, axis=1)) #------------------------------------------------------------ # Compute analytic posteriors # distributions for the mean mu = np.linspace(-3, 5, 1000)
bottom=0.06, top=0.95, hspace=0.1) ax = fig.add_subplot(1, 1, 1) avg = np.mean(qsos_m[:, i]) std = np.std(qsos_m[:, i]) data = (qsos_m[:, i] - avg) / std x = np.linspace(-5, 5, 1000) pdf = stats.norm(0, 1).pdf(x) A2, sig, crit = stats.anderson(data) D, pD = stats.kstest(data, "norm") W, pW = stats.shapiro(data) mu, sigma = mean_sigma(data, ddof=1) median, sigmaG = median_sigmaG(data) N = len(data) Z1 = 1.3 * abs(mu - median) / sigma * np.sqrt(N) Z2 = 1.1 * abs(sigma / sigmaG - 1) * np.sqrt(N) print 70 * '_' print " Kolmogorov-Smirnov test: D = %.2g p = %.2g" % (D, pD) print " Anderson-Darling test: A^2 = %.2g" % A2 print " significance | critical value " print " --------------|----------------" for j in range(len(sig)): print " %.2f | %.1f%%" % (sig[j], crit[j]) print " Shapiro-Wilk test: W = %.2g p = %.2g" % (W, pW) print " Z_1 = %.1f" % Z1
# draw underlying points np.random.seed(0) Npts = 1E6 x = np.random.normal(loc=0, scale=1, size=Npts) # add error for each point e = 3 * np.random.random(Npts) x += np.random.normal(0, e) # compute anderson-darling test A2, sig, crit = anderson(x) print "anderson-darling A^2 = %.1f" % A2 # compute point statistics mu_sample, sig_sample = mean_sigma(x, ddof=1) med_sample, sigG_sample = median_sigmaG(x) #------------------------------------------------------------ # plot the results fig, ax = plt.subplots(figsize=(5, 3.75)) ax.hist(x, 100, histtype='stepfilled', alpha=0.2, color='k', normed=True) # plot the fitting normal curves x_sample = np.linspace(-15, 15, 1000) ax.plot(x_sample, norm(mu_sample, sig_sample).pdf(x_sample), '-k', label='$\sigma$ fit') ax.plot(x_sample, norm(med_sample, sigG_sample).pdf(x_sample), '--k', label='$\sigma_G$ fit') ax.legend()
#------------------------------------------------------------ # Compute the statistics and plot the results fig = plt.figure(figsize=(5, 7)) fig.subplots_adjust(left=0.13, right=0.95, bottom=0.06, top=0.95, hspace=0.1) for i in range(2): ax = fig.add_subplot(2, 1, 1 + i) # 2 x 1 subplot # compute some statistics A2, sig, crit = stats.anderson(vals[i]) D, pD = stats.kstest(vals[i], "norm") W, pW = stats.shapiro(vals[i]) mu, sigma = mean_sigma(vals[i], ddof=1) median, sigmaG = median_sigmaG(vals[i]) N = len(vals[i]) Z1 = 1.3 * abs(mu - median) / sigma * np.sqrt(N) Z2 = 1.1 * abs(sigma / sigmaG - 1) * np.sqrt(N) print 70 * '_' print " Kolmogorov-Smirnov test: D = %.2g p = %.2g" % (D, pD) print " Anderson-Darling test: A^2 = %.2g" % A2 print " significance | critical value " print " --------------|----------------" for j in range(len(sig)): print " %.2f | %.1f%%" % (sig[j], crit[j]) print " Shapiro-Wilk test: W = %.2g p = %.2g" % (W, pW) print " Z_1 = %.1f" % Z1
sigma1 = 1 sigma2 = 3 np.random.seed(1) x = np.hstack( (np.random.normal(0, sigma1, Npts - N_out), np.random.normal(0, sigma2, N_out))) #------------------------------------------------------------ # Compute anderson-darling test A2, sig, crit = anderson(x) print "anderson-darling A^2 = %.1f" % A2 #------------------------------------------------------------ # Compute non-robust and robust point statistics mu_sample, sig_sample = mean_sigma(x) med_sample, sigG_sample = median_sigmaG(x) #------------------------------------------------------------ # Plot the results fig, ax = plt.subplots(figsize=(5, 3.75)) # histogram of data ax.hist(x, 100, histtype='stepfilled', alpha=0.2, color='k', normed=True) # best-fit normal curves x_sample = np.linspace(-15, 15, 1000) ax.plot(x_sample, norm(mu_sample, sig_sample).pdf(x_sample), '-k', label='$\sigma$ fit')
# draw underlying points np.random.seed(0) Npts = int(1E6) x = np.random.normal(loc=0, scale=1, size=Npts) # add error for each point e = 3 * np.random.random(Npts) x += np.random.normal(0, e) # compute anderson-darling test A2, sig, crit = anderson(x) print("anderson-darling A^2 = %.1f" % A2) # compute point statistics mu_sample, sig_sample = mean_sigma(x, ddof=1) med_sample, sigG_sample = median_sigmaG(x) #------------------------------------------------------------ # plot the results fig, ax = plt.subplots(figsize=(5, 3.75)) ax.hist(x, 100, histtype='stepfilled', alpha=0.2, color='k', density=True) # plot the fitting normal curves x_sample = np.linspace(-15, 15, 1000) ax.plot(x_sample, norm(mu_sample, sig_sample).pdf(x_sample), '-k', label='$\sigma$ fit') ax.plot(x_sample, norm(med_sample, sigG_sample).pdf(x_sample),
#------------------------------------------------------------ # Compute the statistics and plot the results fig = plt.figure(figsize=(5, 7)) fig.subplots_adjust(left=0.13, right=0.95, bottom=0.06, top=0.95, hspace=0.1) for i in range(2): ax = fig.add_subplot(2, 1, 1 + i) # 2 x 1 subplot # compute some statistics A2, sig, crit = stats.anderson(vals[i]) D, pD = stats.kstest(vals[i], "norm") W, pW = stats.shapiro(vals[i]) mu, sigma = mean_sigma(vals[i], ddof=1) median, sigmaG = median_sigmaG(vals[i]) N = len(vals[i]) Z1 = 1.3 * abs(mu - median) / sigma * np.sqrt(N) Z2 = 1.1 * abs(sigma / sigmaG - 1) * np.sqrt(N) print(70 * '_') print(" Kolmogorov-Smirnov test: D = %.2g p = %.2g" % (D, pD)) print(" Anderson-Darling test: A^2 = %.2g" % A2) print(" significance | critical value ") print(" --------------|----------------") for j in range(len(sig)): print(" {0:.2f} | {1:.1f}%".format(sig[j], crit[j])) print(" Shapiro-Wilk test: W = %.2g p = %.2g" % (W, pW)) print(" Z_1 = %.1f" % Z1)