Example #1
0
def test_1D_density():
    np.random.seed(0)
    dist = norm(0, 1)

    X = dist.rvs((5000, 1))
    X2 = np.linspace(-5, 5, 10).reshape((10, 1))
    true_dens = dist.pdf(X2[:, 0]) * X.shape[0]

    classifiers = [
        KNeighborsDensity(method='simple', n_neighbors=250),
        KNeighborsDensity(method='bayesian', n_neighbors=250)
    ]

    for clf in classifiers:
        yield (check_1D_density, clf, X, X2, true_dens, 100)
Example #2
0
def NN_bayesian_density(x, y, NN, grid_size):
    """
    Function to compute the density of a distribution of particles
    using the K-Nearest Neighboors
    method from:
    http://www.astroml.org/modules/generated/astroML.density_estimation.KNeighborsDensity.html#astroML.density_estimation.KNeighborsDensity
    See Ivezic 10? for the details on how the algorithm works.

    Input:
    ------
    x : 1D numpy.array
        Array with the x-coordinates of the data.
    y : 1D numpy.array
        Array with the y-coordinates of the data.
    NN : int
        Number of neighboors to compute the desnity.
    grid_size : int
        Grid size in which the density is going to be evaluated.

    """
    assert len(x) == len(y), "Input data have different size"
    assert type(NN) == int, "NN should be of type int"
    assert type(grid_size) == int, "grid_zise should be of type int"

    # Grid parameters
    Nx = grid_size
    Ny = grid_size
    xmin, xmax = (min(x), max(x))
    ymin, ymax = (min(y), max(y))

    # Making a grid
    Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx),\
                                                np.linspace(ymin, ymax, Ny)))).T
    # Putting data in 2d-array
    X = np.array([x, y]).T

    # Computing the density
    knn = KNeighborsDensity('bayesian', NN)
    dens_KNN = knn.fit(X).eval(Xgrid).reshape((Ny, Nx))

    return dens_KNN
Example #3
0
    def __init__(self, evts, t1, t2, k=10000):
        self.evts = evts

        #Decide the bandwidth
        self.t1 = t1
        self.t2 = t2
        self.dt = self.t2 - self.t1

        #Perform the Kernel Density Estimation
        knd = KNeighborsDensity('bayesian', n_neighbors=k)
        sys.stderr.write("Fitting...")
        knd.fit(evts[:, numpy.newaxis])
        sys.stderr.write("done")

        #Evaluate the KDE and interpolate it
        sys.stderr.write("Evaluating...")
        x_grid = numpy.arange(self.t1, self.t2, 10.0)
        pdf = knd.eval(x_grid[:, numpy.newaxis]) / evts.shape[0]
        sys.stderr.write("done")

        sys.stderr.write("Interpolating...")
        self.model = interpolate.InterpolatedUnivariateSpline(x_grid, pdf, k=1)
        sys.stderr.write("done")
Example #4
0
setup_text_plots(fontsize=8, usetex=False)
ra, dec = np.loadtxt('simbad3.tsv',usecols=(5,6),unpack=True)

X = np.vstack((dec,ra)).T
k=500
Nx = 300
Ny = 300
xmin, xmax = (-30, 70)
ymin, ymax = (270, 0)

#------------------------------------------------------------
# Evaluate density
Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx),
                                            np.linspace(ymin, ymax, Ny)))).T

knn = KNeighborsDensity('simple', k)
dens = knn.fit(X).eval(Xgrid).reshape((Ny, Nx))

plt.figure(figsize=(9,9))
plt.scatter(X[:, 1], X[:, 0], s=2, lw=0, c='r')
plt.imshow(dens.T, origin='lower',
           extent=(ymin, ymax, xmin, xmax), cmap='gray_r', norm=LogNorm())

s = 'k = '+str(k)
plt.title(s)
plt.xlabel('RA')
plt.ylabel('Dec')
name = 'sgrstream'+str(k)+'.png'
#plt.savefig(name)
plt.show()
Example #5
0
#------------------------------------------------------------
# Create  the grid on which to evaluate the results
Nx = 50
Ny = 125
xmin, xmax = (-375, -175)
ymin, ymax = (-300, 200)

#------------------------------------------------------------
# Evaluate for several models
Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx),
                                            np.linspace(ymin, ymax, Ny)))).T

kde = KDE(metric='gaussian', h=5)
dens_KDE = kde.fit(X).eval(Xgrid).reshape((Ny, Nx))

knn5 = KNeighborsDensity('bayesian', 5)
dens_k5 = knn5.fit(X).eval(Xgrid).reshape((Ny, Nx))

knn40 = KNeighborsDensity('bayesian', 40)
dens_k40 = knn40.fit(X).eval(Xgrid).reshape((Ny, Nx))

#------------------------------------------------------------
# Plot the results
fig = plt.figure(figsize=(9, 4.0))
fig.subplots_adjust(left=0.1, right=0.95, bottom=0.14, top=0.9,
                    hspace=0.01, wspace=0.01)

# First plot: scatter the points
ax1 = plt.subplot(221, aspect='equal')
ax1.scatter(X[:, 1], X[:, 0], s=1, lw=0, c='k')
ax1.text(0.98, 0.95, "input", ha='right', va='top',
Example #6
0
    delimiter=",",
    skiprows=1,
    usecols=(1, 2, 3, 10, 11, 12, 13, 14, 35, 37, 39, 41, 43, 49, 50, 51)
)  #dr7objid, petromag_u, petromag_g, petromag_r, pertomag_i, petromag_z, z(redshift),h alpha ew, h beta ew, OII ew, h delta ew, spiral, elliptical, uncertain

umr = data[:, 3] - data[:, 5]  #u-r color

data = np.column_stack((data, umr))

coords = data[:, 1:3]

print coords.shape

print "started knd"
knd = KNeighborsDensity(
    "bayesian", 10
)  #try using something other than 10 for n_neighbors values, to experiment + optimize
knd.fit(coords)
density = knd.eval(coords)

data[:, 1] = density  #!!!!! CHECK, is this still in order??

data = np.delete(data, 2, 1)  #(col# 2 , 0/1 for row/col)
print "finished knd"

isSpiral = data[:, 12]  #with one col removed!!
#print data[:5, :]
isElliptical = data[:, 13]  #corresponds to the elliptical bool value

isUncertain = data[:, 14]
Example #7
0
Nx = 50
Ny = 125
xmin, xmax = (-375, -175)
ymin, ymax = (-300, 200)

#------------------------------------------------------------
# Evaluate for several models
Xgrid = np.vstack(
    map(np.ravel,
        np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax,
                                                             Ny)))).T

kde = KDE(metric='gaussian', h=5)
dens_KDE = kde.fit(X).eval(Xgrid).reshape((Ny, Nx))

knn5 = KNeighborsDensity('bayesian', 5)
dens_k5 = knn5.fit(X).eval(Xgrid).reshape((Ny, Nx))

knn40 = KNeighborsDensity('bayesian', 40)
dens_k40 = knn40.fit(X).eval(Xgrid).reshape((Ny, Nx))

#------------------------------------------------------------
# Plot the results
fig = plt.figure(figsize=(5, 2.2))
fig.subplots_adjust(left=0.12,
                    right=0.95,
                    bottom=0.2,
                    top=0.9,
                    hspace=0.01,
                    wspace=0.01)
Example #8
0
for N, k, subplot in zip(N_values, k_values, subplots):
    ax = fig.add_subplot(subplot)
    xN = x[:N]
    t = np.linspace(-10, 30, 1000)

    # Compute density with KDE
    if use_sklearn_KDE:
        kde = KernelDensity(0.1, kernel='gaussian')
        kde.fit(xN[:, None])
        dens_kde = np.exp(kde.score_samples(t[:, None]))
    else:
        kde = KDE('gaussian', h=0.1).fit(xN[:, None])
        dens_kde = kde.eval(t[:, None]) / N

    # Compute density with Bayesian nearest neighbors
    nbrs = KNeighborsDensity('bayesian', n_neighbors=k).fit(xN[:, None])
    dens_nbrs = nbrs.eval(t[:, None]) / N

    # plot the results
    ax.plot(t,
            true_pdf(t),
            ':',
            color='black',
            zorder=3,
            label="Generating Distribution")
    ax.plot(xN, -0.005 * np.ones(len(xN)), '|k')
    hist(xN,
         bins='blocks',
         ax=ax,
         normed=True,
         zorder=1,
Example #9
0
fig.subplots_adjust()
N_values = (500, 5000)
subplots = (211, 212)
k_values = (10, 100)

for N, k, subplot in zip(N_values, k_values, subplots):
    ax = fig.add_subplot(subplot)
    xN = ent2[:N]
    t = np.linspace(-10, 30, 1000)

    # Compute density with KDE
    kde = KDE('gaussian', h=0.1).fit(xN[:, None])
    dens_kde = kde.eval(t[:, None]) / N

    # Compute density with Bayesian nearest neighbors
    nbrs = KNeighborsDensity('bayesian', n_neighbors=k).fit(xN[:, None])
    dens_nbrs = nbrs.eval(t[:, None]) / N

    # plot the results
    #ax.plot(t, true_pdf(t), ':', color='black', zorder=3,
    #        label="Generating Distribution")
    ax.plot(xN, -0.005 * np.ones(len(xN)), '|k', lw=1.5)
    hist(xN, bins='blocks', ax=ax, normed=True, zorder=1,
         histtype='stepfilled', lw=1.5, color='k', alpha=0.2,
         label="Bayesian Blocks")
    ax.plot(t, dens_nbrs, '-', lw=2, color='gray', zorder=2,
            label="Nearest Neighbors (k=%i)" % k)
    ax.plot(t, dens_kde, '-', color='black', zorder=3,
            label="Kernel Density (h=0.1)")

    # label the plot
Example #10
0
"""
Test density estimation techniques
"""
import pytest
import numpy as np
from numpy.testing import assert_allclose
from scipy.stats import norm
from astroML.density_estimation import KNeighborsDensity, GaussianMixture1D

classifiers = [
    KNeighborsDensity(method='simple', n_neighbors=250),
    KNeighborsDensity(method='bayesian', n_neighbors=250)
]


@pytest.mark.parametrize("clf", classifiers)
def test_1D_density(clf, atol=100):
    np.random.seed(0)
    dist = norm(0, 1)

    X = dist.rvs((5000, 1))
    X2 = np.linspace(-5, 5, 10).reshape((10, 1))
    true_dens = dist.pdf(X2[:, 0]) * X.shape[0]

    clf.fit(X)
    dens = clf.eval(X2)

    assert_allclose(dens, true_dens, atol=atol)


def test_gaussian1d():
from astroML.datasets import fetch_sdss_specgals
from astroML.density_estimation import KNeighborsDensity
import time

#Trying u-g, g-r, r-i, i-z AS WELL AS raw u, g, r, i, z, still including all 5 spectral lines



#get just spirals and ellipticals in 1 array, shuffle them, then extract the label column
data = np.loadtxt("crossmatched3_combineddata1_srane.txt", delimiter = ",", skiprows = 1, usecols = (1, 2, 3, 49, 50, 51)) #dr7objid, petromag_u, petromag_g, petromag_r, pertomag_i, petromag_z, z(redshift),h alpha ew, h beta ew, OII ew, h delta ew, spiral, elliptical, uncertain

coords = data[:, 1:3]

print coords.shape

knd = KNeighborsDensity("bayesian", 10)
knd.fit(coords)
density = knd.eval(coords)

data[:, 1] = density

data = np.delete(data, 2, 1) #(col# 2 , 0/1 for row/col)

isSpiral = data[:,2]#with one col removed!!
print data[:5, :]
isElliptical = data[:,3] #corresponds to the elliptical bool value

isUncertain = data[:,4]

ellipticals = data[isElliptical == 1]
Example #12
0
#fit_table=np.delete(fit_table,np.argwhere(fit_table[:,1] < 0),0)
#fit_table=fit_table[(fit_table[:,0] >= 0)]

#Preparing the grid for evaluation. Nx and Ny can be set separately
N = len(fit)
Ny = N
Nx = N
xmin, xmax = (min(fit[:, 0]), max(fit[:, 0]))
ymin, ymax = (min(fit[:, 1]), max(fit[:, 1]))
x = np.linspace(xmin, xmax, Nx)
y = np.linspace(ymin, ymax, Ny)
b = np.vstack(map(np.ravel, np.meshgrid(x, y))).T

#Density estimation fit and evaluation
k = 40
knn = KNeighborsDensity('bayesian', k)
knn.fit(fit)
c = knn.eval(b).reshape((Ny, Nx))

#Getting optimization data and science data from what's left
others = np.delete(table.data, selection, 0)

N2 = 1000
np.random.seed(1)
optimize = np.random.randint(0, len(others), size=N2)

opt = others[optimize]
others = np.delete(others, optimize, 0)

np.random.seed(2)
science = np.random.randint(0, len(others), size=N2)