def hat_linear(data, bandwidth = 1.0, xmin = None, xmax = None, npoints = 100, code = 'C'): """ A Kernel density estimate using a hat (linear) kernel on a linear grid Parameters ---------- data : numpy array (one dimensional) The data we are building the kernel density estimator from. bandwidth : float Half width of the linear hat function, i.e., (-bandwidth, bandwidth) xmin : float or None Bottom range of grid. If none, then xmin = np.min(data). xmax : float or None Top range of grid. If none, then xmax = np.max(data). npoints : positive integer Number of grid points inclusive of the end points Returns ------- xgrid : numpy array Coordinates where the density estimator is evaluated. den : numpy array Value of the kernel density estimator on xgrid. """ if PYTHON_ONLY: code = 'python' if xmin is None: xmin = np.min(data) if xmax is None: xmax = np.max(data) xmax = float(xmax) xmin = float(xmin) if code == 'C': try: den = _kde.hat_linear(data, bandwidth, xmin, xmax, npoints) except: # If the C code fails, default to slow python code den = hat_linear(data, bandwidth, xmin, xmax, points, code = 'python') elif code == 'python': h = (xmax - xmin)/(npoints - 1) den = np.zeros(npoints) for x in data: bottom = max(int(ceil((x - bandwidth - xmin)/h)),0) top = min(int(floor((x + bandwidth - xmin)/h)), npoints - 1) for j in range(bottom, top + 1): den[j] += (1 - abs(x - (j*h + xmin))/bandwidth)/bandwidth den = den/len(data) else: raise ValueError('Code type {} not allowed'.format(code)) return den
def test_hat_linear(self): t0 = time() (xgrid, den) = kde.hat_linear( self.data, bandwidth=self.bandwidth, xmin=self.xmin, xmax=self.xmax, npoints=self.npoints, code="python" ) t1 = time() t2 = time() den2 = _kde.hat_linear(self.data, self.bandwidth, self.xmin, self.xmax, self.npoints) t3 = time() print "Pure python {:3.3g} seconds".format(t1 - t0) print "C implementation {:3.3g} seconds".format(t3 - t2) print "Speedup {:3.0f} x".format((t1 - t0) / (t3 - t2)) self.assertTrue(np.linalg.norm(den - den2, np.inf) < 1e-13)
def test_hat_linear(self): t0 = time() (xgrid, den) = kde.hat_linear(self.data, bandwidth=self.bandwidth, xmin=self.xmin, xmax=self.xmax, npoints=self.npoints, code='python') t1 = time() t2 = time() den2 = _kde.hat_linear(self.data, self.bandwidth, self.xmin, self.xmax, self.npoints) t3 = time() print "Pure python {:3.3g} seconds".format(t1 - t0) print "C implementation {:3.3g} seconds".format(t3 - t2) print "Speedup {:3.0f} x".format((t1 - t0) / (t3 - t2)) self.assertTrue(np.linalg.norm(den - den2, np.inf) < 1e-13)
data = np.random.rand(1e5) #data = [.5] npoints = 101 bandwidth = 0.1 xmin = 0 xmax = 1 t0 = time() (xgrid, den) = kde.hat_linear(data, bandwidth = bandwidth, xmin=xmin, xmax=xmax, npoints = npoints) t1 = time() print "Elapsed time {}".format(t1-t0) t0 = time() den2 = _kde.hat_linear(data, 0.1, xmin, xmax, npoints) t1 = time() print "Elapsed time {}".format(t1-t0) print "Error {}".format( np.max(den-den2)) print den print den2 fig, ax = plt.subplots() ax.plot(xgrid,den) ax.plot(xgrid,den2) plt.show()
npoints = 101 bandwidth = 0.1 xmin = 0 xmax = 1 t0 = time() (xgrid, den) = kde.hat_linear(data, bandwidth=bandwidth, xmin=xmin, xmax=xmax, npoints=npoints) t1 = time() print "Elapsed time {}".format(t1 - t0) t0 = time() den2 = _kde.hat_linear(data, 0.1, xmin, xmax, npoints) t1 = time() print "Elapsed time {}".format(t1 - t0) print "Error {}".format(np.max(den - den2)) print den print den2 fig, ax = plt.subplots() ax.plot(xgrid, den) ax.plot(xgrid, den2) plt.show()