Esempio n. 1
0
def check_fit_bivariate_normal(sigma1, sigma2, mu, alpha, N=1000):
    # poisson stats
    rtol = 2 * np.sqrt(N) / N

    x, y = bivariate_normal(mu, sigma1, sigma2, alpha, N).T
    mu_fit, sigma1_fit, sigma2_fit, alpha_fit = fit_bivariate_normal(x, y)

    if alpha_fit > np.pi / 2:
        alpha_fit -= np.pi
    elif alpha_fit < -np.pi / 2:
        alpha_fit += np.pi

    # Circular degeneracy in alpha: test sin(2*alpha) instead
    assert_allclose(np.sin(2 * alpha_fit), np.sin(2 * alpha), atol=2 * rtol)
    assert_allclose(mu, mu_fit, rtol=rtol)
    assert_allclose(sigma1_fit, sigma1, rtol=rtol)
    assert_allclose(sigma2_fit, sigma2, rtol=rtol)
Esempio n. 2
0
def check_fit_bivariate_normal(sigma1, sigma2, mu, alpha, N=1000):
    # poisson stats
    rtol = 2 * np.sqrt(N) / N

    x, y = bivariate_normal(mu, sigma1, sigma2, alpha, N).T
    mu_fit, sigma1_fit, sigma2_fit, alpha_fit = fit_bivariate_normal(x, y)

    if alpha_fit > np.pi / 2:
        alpha_fit -= np.pi
    elif alpha_fit < -np.pi / 2:
        alpha_fit += np.pi

    # Circular degeneracy in alpha: test sin(2*alpha) instead
    assert_allclose(np.sin(2 * alpha_fit), np.sin(2 * alpha), atol=2 * rtol)
    assert_allclose(mu, mu_fit, rtol=rtol)
    assert_allclose(sigma1_fit, sigma1, rtol=rtol)
    assert_allclose(sigma2_fit, sigma2, rtol=rtol)
# it's easier to visualize the x and y sigmas with alpha=0.0
alpha_deg = 0.0 #used to be 45 deg...

alpha = alpha_deg * np.pi / 180

#------------------------------------------------------------
# Draw N points from a multivariate normal distribution
#
#   we use the bivariate_normal function from astroML.  A more
#   general function for this is numpy.random.multivariate_normal(),
#   which requires the user to specify the full covariance matrix.
#   bivariate_normal() generates this covariance matrix for the
#   given inputs

np.random.seed(0)
X = bivariate_normal(mu, sigma1, sigma2, alpha, N)

#------------------------------------------------------------
# Create the figure showing the fits
fig = plt.figure(figsize=(8, 8))
fig.subplots_adjust(left=0.1, right=0.90, wspace=0.25,
                    bottom=0.1, top=0.9, hspace=0.3)


ax = fig.add_subplot(2, 2, 0)

# add outliers distributed using a bivariate normal.

x, y = X.T

# scatter the points
Esempio n. 4
0
sigma2 = 1.0
mu = [10, 10]
alpha_deg = 45.0
alpha = alpha_deg * np.pi / 180

#------------------------------------------------------------
# Draw N points from a multivariate normal distribution
#
#   we use the bivariate_normal function from astroML.  A more
#   general function for this is numpy.random.multivariate_normal(),
#   which requires the user to specify the full covariance matrix.
#   bivariate_normal() generates this covariance matrix for the
#   given inputs

np.random.seed(0)
X = bivariate_normal(mu, sigma1, sigma2, alpha, N)

#------------------------------------------------------------
# Create the figure showing the fits
fig = plt.figure(figsize=(10, 5))
fig.subplots_adjust(left=0.07, right=0.95, wspace=0.05, bottom=0.1, top=0.95)

# We'll create two figures, with two levels of contamination
for i, f in enumerate([0.05, 0.15]):
    ax = fig.add_subplot(1, 2, i + 1)

    # add outliers distributed using a bivariate normal.
    X[:int(f * N)] = bivariate_normal((10, 10), 2, 5, 45 * np.pi / 180.,
                                      int(f * N))
    x, y = X.T
Esempio n. 5
0
# Define the mean, principal axes, and rotation of the ellipse
mean = np.array([0, 0])
sigma_1 = 2
sigma_2 = 1
alpha = np.pi / 4

#------------------------------------------------------------
# Draw 10^5 points from a multivariate normal distribution
#
#   we use the bivariate_normal function from astroML.  A more
#   general function for this is numpy.random.multivariate_normal(),
#   which requires the user to specify the full covariance matrix.
#   bivariate_normal() generates this covariance matrix for the
#   given inputs.
np.random.seed(0)
x, cov = bivariate_normal(mean, sigma_1, sigma_2, alpha, size=100000,
                          return_cov=True)

sigma_x = np.sqrt(cov[0, 0])
sigma_y = np.sqrt(cov[1, 1])
sigma_xy = cov[0, 1]

#------------------------------------------------------------
# Plot the results
fig = plt.figure(figsize=(5, 5))
ax = fig.add_subplot(111)

# plot a 2D histogram/hess diagram of the points
H, bins = np.histogramdd(x, bins=2 * [np.linspace(-4.5, 4.5, 51)])
ax.imshow(H, origin='lower', cmap=plt.cm.binary, interpolation='nearest',
          extent=[bins[0][0], bins[0][-1], bins[1][0], bins[1][-1]])
Esempio n. 6
0
# Set parameters for the distributions
Nbootstraps = 5000
N = 1000

sigma1 = 2.0
sigma2 = 1.0
mu = (10.0, 10.0)
alpha_deg = 45.0
alpha = alpha_deg * np.pi / 180
f = 0.01

#------------------------------------------------------------
# sample the distribution
# without outliers and with outliers
np.random.seed(0)
X = bivariate_normal(mu, sigma1, sigma2, alpha, N)

X_out = X.copy()
X_out[:int(f * N)] = bivariate_normal(mu, 2, 5,
                                      45 * np.pi / 180., int(f * N))

# true values of rho (pearson/spearman r) and tau
# tau value comes from Eq. 41 of arXiv:1011.2009
rho_true = 0.6
tau_true = 2 / np.pi * np.arcsin(rho_true)


#------------------------------------------------------------
# Create a function to compute the statistics.  Since this
#  takes a while, we'll use the "pickle_results" decorator
#  to save the results of the computation to disk
# Set parameters for the distributions
Nbootstraps = 5000
N = 1000

sigma1 = 2.0
sigma2 = 1.0
mu = (10.0, 10.0)
alpha_deg = 45.0
alpha = alpha_deg * np.pi / 180
f = 0.01

#------------------------------------------------------------
# sample the distribution
# without outliers and with outliers
np.random.seed(0)
X = bivariate_normal(mu, sigma1, sigma2, alpha, N)

X_out = X.copy()
X_out[:int(f * N)] = bivariate_normal(mu, 2, 5,
                                      45 * np.pi / 180., int(f * N))

# true values of rho (pearson/spearman r) and tau
# tau value comes from Eq. 41 of arXiv:1011.2009
rho_true = 0.6
tau_true = 2 / np.pi * np.arcsin(rho_true)


#------------------------------------------------------------
# Create a function to compute the statistics.  Since this
#  takes a while, we'll use the "pickle_results" decorator
#  to save the results of the computation to disk
Esempio n. 8
0
sigma1 = 2.0
sigma2 = 1.0
mu = (10.0, 10.0)
alpha_deg = 45.0
alpha = alpha_deg * np.pi / 180
fracout = 0.01  # fraction of points to be made into outliers
sigmaout1 = 2.
sigmaout2 = 5.
alphaout = alpha

#------------------------------------------------------------
# sample the distribution
# without outliers and with outliers
np.random.seed(0)
dataXY = bivariate_normal(mu, sigma1, sigma2, alpha, Npts)

dataXY_out = dataXY.copy()
dataXY_out[:int(fracout * Npts)] = bivariate_normal(mu, sigmaout1, sigmaout2,
                                                    alphaout,
                                                    int(fracout * Npts))

# true values of rho (pearson/spearman r) and tau
# tau value comes from Eq. 41 of arXiv:1011.2009
rho_true = 0.6
tau_true = 2 / np.pi * np.arcsin(rho_true)

#------------------------------------------------------------
# Create a function to compute the statistics.  Since this
#  takes a while, we'll use the "pickle_results" decorator
#  to save the results of the computation to disk