def __init__(self, evts, t1, t2, k=10000): self.evts = evts #Decide the bandwidth self.t1 = t1 self.t2 = t2 self.dt = self.t2 - self.t1 #Perform the Kernel Density Estimation knd = KNeighborsDensity('bayesian', n_neighbors=k) sys.stderr.write("Fitting...") knd.fit(evts[:, numpy.newaxis]) sys.stderr.write("done") #Evaluate the KDE and interpolate it sys.stderr.write("Evaluating...") x_grid = numpy.arange(self.t1, self.t2, 10.0) pdf = knd.eval(x_grid[:, numpy.newaxis]) / evts.shape[0] sys.stderr.write("done") sys.stderr.write("Interpolating...") self.model = interpolate.InterpolatedUnivariateSpline(x_grid, pdf, k=1) sys.stderr.write("done")
) #dr7objid, petromag_u, petromag_g, petromag_r, pertomag_i, petromag_z, z(redshift),h alpha ew, h beta ew, OII ew, h delta ew, spiral, elliptical, uncertain umr = data[:, 3] - data[:, 5] #u-r color data = np.column_stack((data, umr)) coords = data[:, 1:3] print coords.shape print "started knd" knd = KNeighborsDensity( "bayesian", 10 ) #try using something other than 10 for n_neighbors values, to experiment + optimize knd.fit(coords) density = knd.eval(coords) data[:, 1] = density #!!!!! CHECK, is this still in order?? data = np.delete(data, 2, 1) #(col# 2 , 0/1 for row/col) print "finished knd" isSpiral = data[:, 12] #with one col removed!! #print data[:5, :] isElliptical = data[:, 13] #corresponds to the elliptical bool value isUncertain = data[:, 14] ellipticals = data[isElliptical == 1] spirals = data[isSpiral == 1] uncertains = data[isUncertain == 1]
ax = fig.add_subplot(subplot) xN = x[:N] t = np.linspace(-10, 30, 1000) # Compute density with KDE if use_sklearn_KDE: kde = KernelDensity(0.1, kernel='gaussian') kde.fit(xN[:, None]) dens_kde = np.exp(kde.score_samples(t[:, None])) else: kde = KDE('gaussian', h=0.1).fit(xN[:, None]) dens_kde = kde.eval(t[:, None]) / N # Compute density with Bayesian nearest neighbors nbrs = KNeighborsDensity('bayesian', n_neighbors=k).fit(xN[:, None]) dens_nbrs = nbrs.eval(t[:, None]) / N # plot the results ax.plot(t, true_pdf(t), ':', color='black', zorder=3, label="Generating Distribution") ax.plot(xN, -0.005 * np.ones(len(xN)), '|k') hist(xN, bins='blocks', ax=ax, normed=True, zorder=1, histtype='stepfilled',
N_values = (500, 5000) subplots = (211, 212) k_values = (10, 100) for N, k, subplot in zip(N_values, k_values, subplots): ax = fig.add_subplot(subplot) xN = ent2[:N] t = np.linspace(-10, 30, 1000) # Compute density with KDE kde = KDE('gaussian', h=0.1).fit(xN[:, None]) dens_kde = kde.eval(t[:, None]) / N # Compute density with Bayesian nearest neighbors nbrs = KNeighborsDensity('bayesian', n_neighbors=k).fit(xN[:, None]) dens_nbrs = nbrs.eval(t[:, None]) / N # plot the results #ax.plot(t, true_pdf(t), ':', color='black', zorder=3, # label="Generating Distribution") ax.plot(xN, -0.005 * np.ones(len(xN)), '|k', lw=1.5) hist(xN, bins='blocks', ax=ax, normed=True, zorder=1, histtype='stepfilled', lw=1.5, color='k', alpha=0.2, label="Bayesian Blocks") ax.plot(t, dens_nbrs, '-', lw=2, color='gray', zorder=2, label="Nearest Neighbors (k=%i)" % k) ax.plot(t, dens_kde, '-', color='black', zorder=3, label="Kernel Density (h=0.1)") # label the plot ax.text(0.02, 0.95, "%i points" % N, ha='left', va='top',
import time #Trying u-g, g-r, r-i, i-z AS WELL AS raw u, g, r, i, z, still including all 5 spectral lines #get just spirals and ellipticals in 1 array, shuffle them, then extract the label column data = np.loadtxt("crossmatched3_combineddata1_srane.txt", delimiter = ",", skiprows = 1, usecols = (1, 2, 3, 49, 50, 51)) #dr7objid, petromag_u, petromag_g, petromag_r, pertomag_i, petromag_z, z(redshift),h alpha ew, h beta ew, OII ew, h delta ew, spiral, elliptical, uncertain coords = data[:, 1:3] print coords.shape knd = KNeighborsDensity("bayesian", 10) knd.fit(coords) density = knd.eval(coords) data[:, 1] = density data = np.delete(data, 2, 1) #(col# 2 , 0/1 for row/col) isSpiral = data[:,2]#with one col removed!! print data[:5, :] isElliptical = data[:,3] #corresponds to the elliptical bool value isUncertain = data[:,4] ellipticals = data[isElliptical == 1] spirals = data[isSpiral == 1] uncertains = data[isUncertain == 1]
#Preparing the grid for evaluation. Nx and Ny can be set separately N = len(fit) Ny = N Nx = N xmin, xmax = (min(fit[:, 0]), max(fit[:, 0])) ymin, ymax = (min(fit[:, 1]), max(fit[:, 1])) x = np.linspace(xmin, xmax, Nx) y = np.linspace(ymin, ymax, Ny) b = np.vstack(map(np.ravel, np.meshgrid(x, y))).T #Density estimation fit and evaluation k = 40 knn = KNeighborsDensity('bayesian', k) knn.fit(fit) c = knn.eval(b).reshape((Ny, Nx)) #Getting optimization data and science data from what's left others = np.delete(table.data, selection, 0) N2 = 1000 np.random.seed(1) optimize = np.random.randint(0, len(others), size=N2) opt = others[optimize] others = np.delete(others, optimize, 0) np.random.seed(2) science = np.random.randint(0, len(others), size=N2) sci = others[science]