def test_1D_density(): np.random.seed(0) dist = norm(0, 1) X = dist.rvs((5000, 1)) X2 = np.linspace(-5, 5, 10).reshape((10, 1)) true_dens = dist.pdf(X2[:, 0]) * X.shape[0] classifiers = [ KNeighborsDensity(method='simple', n_neighbors=250), KNeighborsDensity(method='bayesian', n_neighbors=250) ] for clf in classifiers: yield (check_1D_density, clf, X, X2, true_dens, 100)
def NN_bayesian_density(x, y, NN, grid_size): """ Function to compute the density of a distribution of particles using the K-Nearest Neighboors method from: http://www.astroml.org/modules/generated/astroML.density_estimation.KNeighborsDensity.html#astroML.density_estimation.KNeighborsDensity See Ivezic 10? for the details on how the algorithm works. Input: ------ x : 1D numpy.array Array with the x-coordinates of the data. y : 1D numpy.array Array with the y-coordinates of the data. NN : int Number of neighboors to compute the desnity. grid_size : int Grid size in which the density is going to be evaluated. """ assert len(x) == len(y), "Input data have different size" assert type(NN) == int, "NN should be of type int" assert type(grid_size) == int, "grid_zise should be of type int" # Grid parameters Nx = grid_size Ny = grid_size xmin, xmax = (min(x), max(x)) ymin, ymax = (min(y), max(y)) # Making a grid Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx),\ np.linspace(ymin, ymax, Ny)))).T # Putting data in 2d-array X = np.array([x, y]).T # Computing the density knn = KNeighborsDensity('bayesian', NN) dens_KNN = knn.fit(X).eval(Xgrid).reshape((Ny, Nx)) return dens_KNN
def __init__(self, evts, t1, t2, k=10000): self.evts = evts #Decide the bandwidth self.t1 = t1 self.t2 = t2 self.dt = self.t2 - self.t1 #Perform the Kernel Density Estimation knd = KNeighborsDensity('bayesian', n_neighbors=k) sys.stderr.write("Fitting...") knd.fit(evts[:, numpy.newaxis]) sys.stderr.write("done") #Evaluate the KDE and interpolate it sys.stderr.write("Evaluating...") x_grid = numpy.arange(self.t1, self.t2, 10.0) pdf = knd.eval(x_grid[:, numpy.newaxis]) / evts.shape[0] sys.stderr.write("done") sys.stderr.write("Interpolating...") self.model = interpolate.InterpolatedUnivariateSpline(x_grid, pdf, k=1) sys.stderr.write("done")
setup_text_plots(fontsize=8, usetex=False) ra, dec = np.loadtxt('simbad3.tsv',usecols=(5,6),unpack=True) X = np.vstack((dec,ra)).T k=500 Nx = 300 Ny = 300 xmin, xmax = (-30, 70) ymin, ymax = (270, 0) #------------------------------------------------------------ # Evaluate density Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T knn = KNeighborsDensity('simple', k) dens = knn.fit(X).eval(Xgrid).reshape((Ny, Nx)) plt.figure(figsize=(9,9)) plt.scatter(X[:, 1], X[:, 0], s=2, lw=0, c='r') plt.imshow(dens.T, origin='lower', extent=(ymin, ymax, xmin, xmax), cmap='gray_r', norm=LogNorm()) s = 'k = '+str(k) plt.title(s) plt.xlabel('RA') plt.ylabel('Dec') name = 'sgrstream'+str(k)+'.png' #plt.savefig(name) plt.show()
#------------------------------------------------------------ # Create the grid on which to evaluate the results Nx = 50 Ny = 125 xmin, xmax = (-375, -175) ymin, ymax = (-300, 200) #------------------------------------------------------------ # Evaluate for several models Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T kde = KDE(metric='gaussian', h=5) dens_KDE = kde.fit(X).eval(Xgrid).reshape((Ny, Nx)) knn5 = KNeighborsDensity('bayesian', 5) dens_k5 = knn5.fit(X).eval(Xgrid).reshape((Ny, Nx)) knn40 = KNeighborsDensity('bayesian', 40) dens_k40 = knn40.fit(X).eval(Xgrid).reshape((Ny, Nx)) #------------------------------------------------------------ # Plot the results fig = plt.figure(figsize=(9, 4.0)) fig.subplots_adjust(left=0.1, right=0.95, bottom=0.14, top=0.9, hspace=0.01, wspace=0.01) # First plot: scatter the points ax1 = plt.subplot(221, aspect='equal') ax1.scatter(X[:, 1], X[:, 0], s=1, lw=0, c='k') ax1.text(0.98, 0.95, "input", ha='right', va='top',
delimiter=",", skiprows=1, usecols=(1, 2, 3, 10, 11, 12, 13, 14, 35, 37, 39, 41, 43, 49, 50, 51) ) #dr7objid, petromag_u, petromag_g, petromag_r, pertomag_i, petromag_z, z(redshift),h alpha ew, h beta ew, OII ew, h delta ew, spiral, elliptical, uncertain umr = data[:, 3] - data[:, 5] #u-r color data = np.column_stack((data, umr)) coords = data[:, 1:3] print coords.shape print "started knd" knd = KNeighborsDensity( "bayesian", 10 ) #try using something other than 10 for n_neighbors values, to experiment + optimize knd.fit(coords) density = knd.eval(coords) data[:, 1] = density #!!!!! CHECK, is this still in order?? data = np.delete(data, 2, 1) #(col# 2 , 0/1 for row/col) print "finished knd" isSpiral = data[:, 12] #with one col removed!! #print data[:5, :] isElliptical = data[:, 13] #corresponds to the elliptical bool value isUncertain = data[:, 14]
Nx = 50 Ny = 125 xmin, xmax = (-375, -175) ymin, ymax = (-300, 200) #------------------------------------------------------------ # Evaluate for several models Xgrid = np.vstack( map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T kde = KDE(metric='gaussian', h=5) dens_KDE = kde.fit(X).eval(Xgrid).reshape((Ny, Nx)) knn5 = KNeighborsDensity('bayesian', 5) dens_k5 = knn5.fit(X).eval(Xgrid).reshape((Ny, Nx)) knn40 = KNeighborsDensity('bayesian', 40) dens_k40 = knn40.fit(X).eval(Xgrid).reshape((Ny, Nx)) #------------------------------------------------------------ # Plot the results fig = plt.figure(figsize=(5, 2.2)) fig.subplots_adjust(left=0.12, right=0.95, bottom=0.2, top=0.9, hspace=0.01, wspace=0.01)
for N, k, subplot in zip(N_values, k_values, subplots): ax = fig.add_subplot(subplot) xN = x[:N] t = np.linspace(-10, 30, 1000) # Compute density with KDE if use_sklearn_KDE: kde = KernelDensity(0.1, kernel='gaussian') kde.fit(xN[:, None]) dens_kde = np.exp(kde.score_samples(t[:, None])) else: kde = KDE('gaussian', h=0.1).fit(xN[:, None]) dens_kde = kde.eval(t[:, None]) / N # Compute density with Bayesian nearest neighbors nbrs = KNeighborsDensity('bayesian', n_neighbors=k).fit(xN[:, None]) dens_nbrs = nbrs.eval(t[:, None]) / N # plot the results ax.plot(t, true_pdf(t), ':', color='black', zorder=3, label="Generating Distribution") ax.plot(xN, -0.005 * np.ones(len(xN)), '|k') hist(xN, bins='blocks', ax=ax, normed=True, zorder=1,
fig.subplots_adjust() N_values = (500, 5000) subplots = (211, 212) k_values = (10, 100) for N, k, subplot in zip(N_values, k_values, subplots): ax = fig.add_subplot(subplot) xN = ent2[:N] t = np.linspace(-10, 30, 1000) # Compute density with KDE kde = KDE('gaussian', h=0.1).fit(xN[:, None]) dens_kde = kde.eval(t[:, None]) / N # Compute density with Bayesian nearest neighbors nbrs = KNeighborsDensity('bayesian', n_neighbors=k).fit(xN[:, None]) dens_nbrs = nbrs.eval(t[:, None]) / N # plot the results #ax.plot(t, true_pdf(t), ':', color='black', zorder=3, # label="Generating Distribution") ax.plot(xN, -0.005 * np.ones(len(xN)), '|k', lw=1.5) hist(xN, bins='blocks', ax=ax, normed=True, zorder=1, histtype='stepfilled', lw=1.5, color='k', alpha=0.2, label="Bayesian Blocks") ax.plot(t, dens_nbrs, '-', lw=2, color='gray', zorder=2, label="Nearest Neighbors (k=%i)" % k) ax.plot(t, dens_kde, '-', color='black', zorder=3, label="Kernel Density (h=0.1)") # label the plot
""" Test density estimation techniques """ import pytest import numpy as np from numpy.testing import assert_allclose from scipy.stats import norm from astroML.density_estimation import KNeighborsDensity, GaussianMixture1D classifiers = [ KNeighborsDensity(method='simple', n_neighbors=250), KNeighborsDensity(method='bayesian', n_neighbors=250) ] @pytest.mark.parametrize("clf", classifiers) def test_1D_density(clf, atol=100): np.random.seed(0) dist = norm(0, 1) X = dist.rvs((5000, 1)) X2 = np.linspace(-5, 5, 10).reshape((10, 1)) true_dens = dist.pdf(X2[:, 0]) * X.shape[0] clf.fit(X) dens = clf.eval(X2) assert_allclose(dens, true_dens, atol=atol) def test_gaussian1d():
from astroML.datasets import fetch_sdss_specgals from astroML.density_estimation import KNeighborsDensity import time #Trying u-g, g-r, r-i, i-z AS WELL AS raw u, g, r, i, z, still including all 5 spectral lines #get just spirals and ellipticals in 1 array, shuffle them, then extract the label column data = np.loadtxt("crossmatched3_combineddata1_srane.txt", delimiter = ",", skiprows = 1, usecols = (1, 2, 3, 49, 50, 51)) #dr7objid, petromag_u, petromag_g, petromag_r, pertomag_i, petromag_z, z(redshift),h alpha ew, h beta ew, OII ew, h delta ew, spiral, elliptical, uncertain coords = data[:, 1:3] print coords.shape knd = KNeighborsDensity("bayesian", 10) knd.fit(coords) density = knd.eval(coords) data[:, 1] = density data = np.delete(data, 2, 1) #(col# 2 , 0/1 for row/col) isSpiral = data[:,2]#with one col removed!! print data[:5, :] isElliptical = data[:,3] #corresponds to the elliptical bool value isUncertain = data[:,4] ellipticals = data[isElliptical == 1]
#fit_table=np.delete(fit_table,np.argwhere(fit_table[:,1] < 0),0) #fit_table=fit_table[(fit_table[:,0] >= 0)] #Preparing the grid for evaluation. Nx and Ny can be set separately N = len(fit) Ny = N Nx = N xmin, xmax = (min(fit[:, 0]), max(fit[:, 0])) ymin, ymax = (min(fit[:, 1]), max(fit[:, 1])) x = np.linspace(xmin, xmax, Nx) y = np.linspace(ymin, ymax, Ny) b = np.vstack(map(np.ravel, np.meshgrid(x, y))).T #Density estimation fit and evaluation k = 40 knn = KNeighborsDensity('bayesian', k) knn.fit(fit) c = knn.eval(b).reshape((Ny, Nx)) #Getting optimization data and science data from what's left others = np.delete(table.data, selection, 0) N2 = 1000 np.random.seed(1) optimize = np.random.randint(0, len(others), size=N2) opt = others[optimize] others = np.delete(others, optimize, 0) np.random.seed(2) science = np.random.randint(0, len(others), size=N2)