Example #1
0
def hat_linear(data, bandwidth = 1.0, xmin = None, xmax = None, npoints = 100, code = 'C'):
    """ A Kernel density estimate using a hat (linear) kernel on a linear grid
    Parameters
    ----------
    data : numpy array (one dimensional)
        The data we are building the kernel density estimator from.

    bandwidth : float
        Half width of the linear hat function, i.e., (-bandwidth, bandwidth)

    xmin : float or None
        Bottom range of grid.  If none, then xmin = np.min(data).

    xmax : float or None
        Top range of grid.  If none, then xmax = np.max(data).

    npoints : positive integer
        Number of grid points inclusive of the end points

    Returns
    -------
    xgrid : numpy array
        Coordinates where the density estimator is evaluated.

    den : numpy array
        Value of the kernel density estimator on xgrid.
    """

    if PYTHON_ONLY:
        code = 'python'

    if xmin is None:
        xmin = np.min(data)
    if xmax is None:
        xmax = np.max(data)

    xmax = float(xmax)
    xmin = float(xmin)
    
    if code == 'C':
        try:
            den = _kde.hat_linear(data, bandwidth, xmin, xmax, npoints)
        except:
            # If the C code fails, default to slow python code
            den = hat_linear(data, bandwidth, xmin, xmax, points, code = 'python')
    elif code == 'python':

        h = (xmax - xmin)/(npoints - 1)
        den = np.zeros(npoints)
        for x in data:
            bottom = max(int(ceil((x - bandwidth - xmin)/h)),0)
            top = min(int(floor((x + bandwidth - xmin)/h)), npoints - 1)
            for j in range(bottom, top + 1):
                den[j] += (1 - abs(x - (j*h + xmin))/bandwidth)/bandwidth

        den = den/len(data)
    else:
        raise ValueError('Code type {} not allowed'.format(code))

    return den
Example #2
0
    def test_hat_linear(self):
        t0 = time()
        (xgrid, den) = kde.hat_linear(
            self.data, bandwidth=self.bandwidth, xmin=self.xmin, xmax=self.xmax, npoints=self.npoints, code="python"
        )
        t1 = time()

        t2 = time()
        den2 = _kde.hat_linear(self.data, self.bandwidth, self.xmin, self.xmax, self.npoints)
        t3 = time()

        print "Pure python      {:3.3g} seconds".format(t1 - t0)
        print "C implementation {:3.3g} seconds".format(t3 - t2)
        print "Speedup          {:3.0f} x".format((t1 - t0) / (t3 - t2))
        self.assertTrue(np.linalg.norm(den - den2, np.inf) < 1e-13)
Example #3
0
    def test_hat_linear(self):
        t0 = time()
        (xgrid, den) = kde.hat_linear(self.data,
                                      bandwidth=self.bandwidth,
                                      xmin=self.xmin,
                                      xmax=self.xmax,
                                      npoints=self.npoints,
                                      code='python')
        t1 = time()

        t2 = time()
        den2 = _kde.hat_linear(self.data, self.bandwidth, self.xmin, self.xmax,
                               self.npoints)
        t3 = time()

        print "Pure python      {:3.3g} seconds".format(t1 - t0)
        print "C implementation {:3.3g} seconds".format(t3 - t2)
        print "Speedup          {:3.0f} x".format((t1 - t0) / (t3 - t2))
        self.assertTrue(np.linalg.norm(den - den2, np.inf) < 1e-13)
Example #4
0
data = np.random.rand(1e5)
#data = [.5]

npoints = 101
bandwidth = 0.1
xmin = 0
xmax = 1
t0 = time()
(xgrid, den) = kde.hat_linear(data, bandwidth = bandwidth, xmin=xmin, xmax=xmax, npoints = npoints)
t1 = time()

print "Elapsed time {}".format(t1-t0)

t0 = time()
den2 = _kde.hat_linear(data, 0.1, xmin, xmax, npoints)
t1 = time()
print "Elapsed time {}".format(t1-t0)

print "Error {}".format( np.max(den-den2))

print den
print den2


fig, ax = plt.subplots()
ax.plot(xgrid,den)
ax.plot(xgrid,den2)

plt.show()
Example #5
0
npoints = 101
bandwidth = 0.1
xmin = 0
xmax = 1
t0 = time()
(xgrid, den) = kde.hat_linear(data,
                              bandwidth=bandwidth,
                              xmin=xmin,
                              xmax=xmax,
                              npoints=npoints)
t1 = time()

print "Elapsed time {}".format(t1 - t0)

t0 = time()
den2 = _kde.hat_linear(data, 0.1, xmin, xmax, npoints)
t1 = time()
print "Elapsed time {}".format(t1 - t0)

print "Error {}".format(np.max(den - den2))

print den
print den2

fig, ax = plt.subplots()
ax.plot(xgrid, den)
ax.plot(xgrid, den2)

plt.show()