def __init__(self, evts, t1, t2, k=10000): self.evts = evts #Decide the bandwidth self.t1 = t1 self.t2 = t2 self.dt = self.t2 - self.t1 #Perform the Kernel Density Estimation knd = KNeighborsDensity('bayesian', n_neighbors=k) sys.stderr.write("Fitting...") knd.fit(evts[:, numpy.newaxis]) sys.stderr.write("done") #Evaluate the KDE and interpolate it sys.stderr.write("Evaluating...") x_grid = numpy.arange(self.t1, self.t2, 10.0) pdf = knd.eval(x_grid[:, numpy.newaxis]) / evts.shape[0] sys.stderr.write("done") sys.stderr.write("Interpolating...") self.model = interpolate.InterpolatedUnivariateSpline(x_grid, pdf, k=1) sys.stderr.write("done")
def NN_bayesian_density(x, y, NN, grid_size): """ Function to compute the density of a distribution of particles using the K-Nearest Neighboors method from: http://www.astroml.org/modules/generated/astroML.density_estimation.KNeighborsDensity.html#astroML.density_estimation.KNeighborsDensity See Ivezic 10? for the details on how the algorithm works. Input: ------ x : 1D numpy.array Array with the x-coordinates of the data. y : 1D numpy.array Array with the y-coordinates of the data. NN : int Number of neighboors to compute the desnity. grid_size : int Grid size in which the density is going to be evaluated. """ assert len(x) == len(y), "Input data have different size" assert type(NN) == int, "NN should be of type int" assert type(grid_size) == int, "grid_zise should be of type int" # Grid parameters Nx = grid_size Ny = grid_size xmin, xmax = (min(x), max(x)) ymin, ymax = (min(y), max(y)) # Making a grid Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx),\ np.linspace(ymin, ymax, Ny)))).T # Putting data in 2d-array X = np.array([x, y]).T # Computing the density knn = KNeighborsDensity('bayesian', NN) dens_KNN = knn.fit(X).eval(Xgrid).reshape((Ny, Nx)) return dens_KNN
setup_text_plots(fontsize=8, usetex=False) ra, dec = np.loadtxt('simbad3.tsv',usecols=(5,6),unpack=True) X = np.vstack((dec,ra)).T k=500 Nx = 300 Ny = 300 xmin, xmax = (-30, 70) ymin, ymax = (270, 0) #------------------------------------------------------------ # Evaluate density Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T knn = KNeighborsDensity('simple', k) dens = knn.fit(X).eval(Xgrid).reshape((Ny, Nx)) plt.figure(figsize=(9,9)) plt.scatter(X[:, 1], X[:, 0], s=2, lw=0, c='r') plt.imshow(dens.T, origin='lower', extent=(ymin, ymax, xmin, xmax), cmap='gray_r', norm=LogNorm()) s = 'k = '+str(k) plt.title(s) plt.xlabel('RA') plt.ylabel('Dec') name = 'sgrstream'+str(k)+'.png' #plt.savefig(name) plt.show()
# Create the grid on which to evaluate the results Nx = 50 Ny = 125 xmin, xmax = (-375, -175) ymin, ymax = (-300, 200) #------------------------------------------------------------ # Evaluate for several models Xgrid = np.vstack(map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T kde = KDE(metric='gaussian', h=5) dens_KDE = kde.fit(X).eval(Xgrid).reshape((Ny, Nx)) knn5 = KNeighborsDensity('bayesian', 5) dens_k5 = knn5.fit(X).eval(Xgrid).reshape((Ny, Nx)) knn40 = KNeighborsDensity('bayesian', 40) dens_k40 = knn40.fit(X).eval(Xgrid).reshape((Ny, Nx)) #------------------------------------------------------------ # Plot the results fig = plt.figure(figsize=(9, 4.0)) fig.subplots_adjust(left=0.1, right=0.95, bottom=0.14, top=0.9, hspace=0.01, wspace=0.01) # First plot: scatter the points ax1 = plt.subplot(221, aspect='equal') ax1.scatter(X[:, 1], X[:, 0], s=1, lw=0, c='k') ax1.text(0.98, 0.95, "input", ha='right', va='top', transform=ax1.transAxes, fontsize=12,
usecols=(1, 2, 3, 10, 11, 12, 13, 14, 35, 37, 39, 41, 43, 49, 50, 51) ) #dr7objid, petromag_u, petromag_g, petromag_r, pertomag_i, petromag_z, z(redshift),h alpha ew, h beta ew, OII ew, h delta ew, spiral, elliptical, uncertain umr = data[:, 3] - data[:, 5] #u-r color data = np.column_stack((data, umr)) coords = data[:, 1:3] print coords.shape print "started knd" knd = KNeighborsDensity( "bayesian", 10 ) #try using something other than 10 for n_neighbors values, to experiment + optimize knd.fit(coords) density = knd.eval(coords) data[:, 1] = density #!!!!! CHECK, is this still in order?? data = np.delete(data, 2, 1) #(col# 2 , 0/1 for row/col) print "finished knd" isSpiral = data[:, 12] #with one col removed!! #print data[:5, :] isElliptical = data[:, 13] #corresponds to the elliptical bool value isUncertain = data[:, 14] ellipticals = data[isElliptical == 1] spirals = data[isSpiral == 1]
Ny = 125 xmin, xmax = (-375, -175) ymin, ymax = (-300, 200) #------------------------------------------------------------ # Evaluate for several models Xgrid = np.vstack( map(np.ravel, np.meshgrid(np.linspace(xmin, xmax, Nx), np.linspace(ymin, ymax, Ny)))).T kde = KDE(metric='gaussian', h=5) dens_KDE = kde.fit(X).eval(Xgrid).reshape((Ny, Nx)) knn5 = KNeighborsDensity('bayesian', 5) dens_k5 = knn5.fit(X).eval(Xgrid).reshape((Ny, Nx)) knn40 = KNeighborsDensity('bayesian', 40) dens_k40 = knn40.fit(X).eval(Xgrid).reshape((Ny, Nx)) #------------------------------------------------------------ # Plot the results fig = plt.figure(figsize=(5, 2.2)) fig.subplots_adjust(left=0.12, right=0.95, bottom=0.2, top=0.9, hspace=0.01, wspace=0.01) # First plot: scatter the points
from astroML.density_estimation import KNeighborsDensity import time #Trying u-g, g-r, r-i, i-z AS WELL AS raw u, g, r, i, z, still including all 5 spectral lines #get just spirals and ellipticals in 1 array, shuffle them, then extract the label column data = np.loadtxt("crossmatched3_combineddata1_srane.txt", delimiter = ",", skiprows = 1, usecols = (1, 2, 3, 49, 50, 51)) #dr7objid, petromag_u, petromag_g, petromag_r, pertomag_i, petromag_z, z(redshift),h alpha ew, h beta ew, OII ew, h delta ew, spiral, elliptical, uncertain coords = data[:, 1:3] print coords.shape knd = KNeighborsDensity("bayesian", 10) knd.fit(coords) density = knd.eval(coords) data[:, 1] = density data = np.delete(data, 2, 1) #(col# 2 , 0/1 for row/col) isSpiral = data[:,2]#with one col removed!! print data[:5, :] isElliptical = data[:,3] #corresponds to the elliptical bool value isUncertain = data[:,4] ellipticals = data[isElliptical == 1] spirals = data[isSpiral == 1]
#fit_table=fit_table[(fit_table[:,0] >= 0)] #Preparing the grid for evaluation. Nx and Ny can be set separately N = len(fit) Ny = N Nx = N xmin, xmax = (min(fit[:, 0]), max(fit[:, 0])) ymin, ymax = (min(fit[:, 1]), max(fit[:, 1])) x = np.linspace(xmin, xmax, Nx) y = np.linspace(ymin, ymax, Ny) b = np.vstack(map(np.ravel, np.meshgrid(x, y))).T #Density estimation fit and evaluation k = 40 knn = KNeighborsDensity('bayesian', k) knn.fit(fit) c = knn.eval(b).reshape((Ny, Nx)) #Getting optimization data and science data from what's left others = np.delete(table.data, selection, 0) N2 = 1000 np.random.seed(1) optimize = np.random.randint(0, len(others), size=N2) opt = others[optimize] others = np.delete(others, optimize, 0) np.random.seed(2) science = np.random.randint(0, len(others), size=N2)