Example #1
0
def kde_demo4(N=50):
    """Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
       for 1D multimodal distributions

    KDEDEMO4 shows that the improved Sheather-Jones plug-in smoothing is a
    better compared to normal reference rules (in this case the hns)

    Examples
    --------
    >>> kde_demo4()
    """
    data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(N,)),
                      st.norm.rvs(loc=-5, scale=1, size=(N,))))

    # x = np.linspace(1.5e-3, 5, 55)

    kde = KDE(data, kernel=Kernel('gauss', 'hns'))
    f = kde(output='plot', title='Ordinary KDE', plotflag=1)

    kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
    f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1)

    plt.figure(0)
    f.plot('r', label='hns={0}'.format(kde.hs))
    # plt.figure(2)
    f1.plot('b', label='hisj={0}'.format(kde1.hs))
    x = np.linspace(-9, 9)
    plt.plot(x, (st.norm.pdf(x, loc=-5, scale=1) +
                 st.norm.pdf(x, loc=5, scale=1)) / 2, 'k:',
             label='True density')
    plt.legend()
Example #2
0
def kde_demo1():
    """KDEDEMO1 Demonstrate the smoothing parameter impact on KDE.

    KDEDEMO1 shows the true density (dotted) compared to KDE based on 7
    observations (solid) and their individual kernels (dashed) for 3
    different values of the smoothing parameter, hs.

    Examples
    --------
    >>> kde_demo1()
    """
    x = np.linspace(-4, 4, 101)
    x0 = x / 2.0
    data = np.random.normal(loc=0, scale=1.0, size=7)
    kernel = Kernel('gauss')
    hs = kernel.hns(data)
    h_vec = [hs / 2, hs, 2 * hs]

    for ix, h in enumerate(h_vec):
        plt.figure(ix)
        kde = KDE(data, hs=h, kernel=kernel)
        f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(float(h)),
                 ylab='Density')
        f2.plot('k-')

        plt.plot(x, st.norm.pdf(x, 0, 1), 'k:')
        n = len(data)
        plt.plot(data, np.zeros(data.shape), 'bx')
        y = kernel(x0) / (n * h * kernel.norm_factor(d=1, n=n))
        for i in range(n):
            plt.plot(data[i] + x0 * h, y, 'b--')
            plt.plot([data[i], data[i]], [0, np.max(y)], 'b')

        plt.axis([min(x), max(x), 0, 0.5])
Example #3
0
def kde_demo5(N=500):
    """Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior
       for 2D multimodal distributions

    KDEDEMO5 shows that the improved Sheather-Jones plug-in smoothing is better
    compared to normal reference rules (in this case the hns)

    Examples
    --------
    >>> kde_demo5()
    """
    data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(2, N,)),
                      st.norm.rvs(loc=-5, scale=1, size=(2, N,))))
    kde = KDE(data, kernel=Kernel('gauss', 'hns'))
    f = kde(output='plot', plotflag=1,
            title='Ordinary KDE, hns={0:s}'.format(str(list(kde.hs))))

    kde1 = KDE(data, kernel=Kernel('gauss', 'hisj'))
    f1 = kde1(output='plot', plotflag=1,
              title='Ordinary KDE, hisj={0:s}'.format(str(list(kde1.hs))))

    plt.figure(0)
    plt.clf()
    f.plot()
    plt.plot(data[0], data[1], '.')
    plt.figure(1)
    plt.clf()
    f1.plot()
    plt.plot(data[0], data[1], '.')
Example #4
0
def kreg_demo1(hs=None, fast=True, fun='hisj'):
    """Compare KRegression to KernelReg from statsmodels.nonparametric

    Examples
    --------
    >>> kreg_demo1()
    """
    N = 100
    # ei = np.random.normal(loc=0, scale=0.075, size=(N,))
    ei = np.array([
        -0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525,
        0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784,
        -0.04304887, -0.13365788, -0.0185279, -0.07289167, 0.02319097,
        0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183,
        -0.0378058, -0.06312874, 0.01485772, 0.06307944, -0.0632959,
        0.18963205, 0.0369126, -0.01485447, 0.04037722, 0.0085057,
        -0.06912903, 0.02073998, 0.1174351, 0.17599277, -0.06842139,
        0.12587608, 0.07698113, -0.0032394, -0.12045792, -0.03132877,
        0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899,
        -0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498,
        0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802,
        0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446,
        -0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111,
        -0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788,
        -0.05893749, 0.00089538, 0.0432551, 0.04001474, 0.04888828,
        -0.17708392, 0.16478644, 0.1171006, 0.11664846, 0.01410477,
        -0.12458953, -0.11692081, 0.0413047, -0.09292439, -0.07042327,
        0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406,
        -0.01603065, -0.1933216, 0.19352763, 0.11819496, 0.04567619,
        -0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065])
    x = np.linspace(0, 1, N)

    va_1 = 0.3 ** 2
    va_2 = 0.7 ** 2
    y0 = np.exp(-x ** 2 / (2 * va_1)) + 1.3 * np.exp(-(x - 1) ** 2 / (2 * va_2))
    y = y0 + ei
    kernel = Kernel('gauss', fun=fun)
    hopt = kernel.hisj(x)
    kreg = KRegression(
        x, y, p=0, hs=hs, kernel=kernel, xmin=-2 * hopt, xmax=1 + 2 * hopt)
    if fast:
        kreg.__call__ = kreg.eval_grid_fast

    f = kreg(x, output='plot', title='Kernel regression', plotflag=1)
    plt.figure(0)
    f.plot(label='p=0')

    kreg.p = 1
    f1 = kreg(x, output='plot', title='Kernel regression', plotflag=1)
    f1.plot(label='p=1')
    # print(f1.data)
    plt.plot(x, y, '.', label='data')
    plt.plot(x, y0, 'k', label='True model')
    from statsmodels.nonparametric.kernel_regression import KernelReg
    kreg2 = KernelReg(y, x, ('c'))
    y2 = kreg2.fit(x)
    plt.plot(x, y2[0], 'm', label='statsmodel')

    plt.legend()
Example #5
0
def _plot_error(neval, err_dic, plot_error):
    if plot_error:
        plt.figure(0)
        for name in err_dic:
            plt.loglog(neval, err_dic[name], label=name)

        plt.xlabel('number of function evaluations')
        plt.ylabel('error')
        plt.legend()
Example #6
0
def demo_savitzky_on_noisy_chirp():
    """
    Examples
    --------
    >>> demo_savitzky_on_noisy_chirp()

    >>> plt.close()
    """
    plt.figure(figsize=(7, 12))

    # generate chirp signal
    tvec = np.arange(0, 6.28, .02)
    true_signal = np.sin(tvec * (2.0 + tvec))
    true_d_signal = (2 + tvec) * np.cos(tvec * (2.0 + tvec))

    # add noise to signal
    noise = np.random.normal(size=true_signal.shape)
    signal = true_signal + .15 * noise

    # plot signal
    plt.subplot(311)
    plt.plot(signal)
    plt.title('signal')

    # smooth and plot signal
    plt.subplot(312)
    savgol = SavitzkyGolay(n=8, degree=4)
    s_signal = savgol.smooth(signal)
    s2 = smoothn(signal, robust=True)
    plt.plot(s_signal)
    plt.plot(s2)
    plt.plot(true_signal, 'r--')
    plt.title('smoothed signal')

    # smooth derivative of signal and plot it
    plt.subplot(313)
    savgol1 = SavitzkyGolay(n=8, degree=1, diff_order=1)

    dt = tvec[1] - tvec[0]
    d_signal = savgol1.smooth(signal) / dt

    plt.plot(d_signal)
    plt.plot(true_d_signal, 'r--')
    plt.title('smoothed derivative of signal')
Example #7
0
def kde_demo3():
    """Demonstrate the difference between transformation and ordinary-KDE in 2D

    KDEDEMO3 shows that the transformation KDE is a better estimate for
    Rayleigh distributed data around 0 than the ordinary KDE.

    Examples
    --------
    >>> kde_demo3()
    """
    data = st.rayleigh.rvs(scale=1, size=(2, 300))

    # x = np.linspace(1.5e-3, 5, 55)

    kde = KDE(data)
    f = kde(output='plot', title='Ordinary KDE', plotflag=1)
    plt.figure(0)
    f.plot()

    plt.plot(data[0], data[1], '.')

    # plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable
    hs = Kernel('gauss').get_smoothing(data**0.5)
    tkde = TKDE(data, hs=hs, L2=0.5)
    ft = tkde.eval_grid_fast(
        output='plot', title='Transformation KDE', plotflag=1)

    plt.figure(1)
    ft.plot()

    plt.plot(data[0], data[1], '.')

    plt.figure(0)
Example #8
0
def kde_demo2():
    """Demonstrate the difference between transformation- and ordinary-KDE.

    KDEDEMO2 shows that the transformation KDE is a better estimate for
    Rayleigh distributed data around 0 than the ordinary KDE.

    Examples
    --------
    >>> kde_demo2()
    """
    data = st.rayleigh.rvs(scale=1, size=300)

    x = np.linspace(1.5e-2, 5, 55)

    kde = KDE(data)
    f = kde(output='plot', title='Ordinary KDE (hs={0:})'.format(kde.hs))
    plt.figure(0)
    f.plot()

    plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')

    # plotnorm((data).^(L2))  # gives a straight line => L2 = 0.5 reasonable
    hs = Kernel('gauss').get_smoothing(data**0.5)
    tkde = TKDE(data, hs=hs, L2=0.5)
    ft = tkde(x, output='plot',
              title='Transformation KDE (hs={0:})'.format(tkde.tkde.hs))
    plt.figure(1)
    ft.plot()

    plt.plot(x, st.rayleigh.pdf(x, scale=1), ':')

    plt.figure(0)
Example #9
0
def test_hampel():
    randint = np.random.randint
    Y = 5000 + np.random.randn(1000)
    outliers = randint(0, 1000, size=(10,))
    Y[outliers] = Y[outliers] + randint(1000, size=(10,))
    YY, res = HampelFilter(dx=3, t=3, fulloutput=True)(Y)
    YY1, res1 = HampelFilter(dx=1, t=3, adaptive=0.1, fulloutput=True)(Y)
    YY2, res2 = HampelFilter(dx=3, t=0, fulloutput=True)(Y)  # median
    plt.figure(1)
    plot_hampel(Y, YY, res)
    plt.title('Standard HampelFilter')
    plt.figure(2)
    plot_hampel(Y, YY1, res1)
    plt.title('Adaptive HampelFilter')
    plt.figure(3)
    plot_hampel(Y, YY2, res2)
    plt.title('Median filter')
    plt.show('hold')
Example #10
0
def check_bkregression():
    """
    Check binomial regression

     Example
    -------
    >>> check_bkregression()
    """
    # plt.ion()
    k = 0
    for _i, n in enumerate([50, 100, 300, 600]):
        x, y, fun1 = _get_data(n,
                               symmetric=True,
                               loc1=0.1,
                               scale1=0.6,
                               scale2=0.75)
        bkreg = BKRegression(x, y, a=0.05, b=0.05)
        fbest = bkreg.prb_search_best(hsfun='hste',
                                      alpha=0.05,
                                      color='g',
                                      label='Transit_D')

        figk = plt.figure(k)
        ax = figk.gca()
        k += 1
        #        fbest.score.plot(axis=ax)
        #        axsize = ax.axis()
        #        ax.vlines(fbest.hs,axsize[2]+1,axsize[3])
        #        ax.set(yscale='log')
        fbest.labels.title = 'N = {:d}'.format(n)
        fbest.plot(axis=ax)
        ax.plot(x, fun1(x), 'r')
        ax.legend(frameon=False, markerscale=4)
        # ax = plt.gca()
        ax.set_yticklabels(ax.get_yticks() * 100.0)
        ax.grid(True)
Example #11
0
def demo_hampel():
    """
    Examples
    --------
    >>> demo_hampel()

    >>> plt.close()
    """
    randint = np.random.randint
    Y = 5000 + np.random.randn(1000)
    outliers = randint(0, 1000, size=(10, ))
    Y[outliers] = Y[outliers] + randint(1000, size=(10, ))
    YY, res = HampelFilter(dx=3, t=3, fulloutput=True)(Y)
    YY1, res1 = HampelFilter(dx=1, t=3, adaptive=0.1, fulloutput=True)(Y)
    YY2, res2 = HampelFilter(dx=3, t=0, fulloutput=True)(Y)  # median
    plt.figure(1)
    plot_hampel(Y, YY, res)
    plt.title('Standard HampelFilter')
    plt.figure(2)
    plot_hampel(Y, YY1, res1)
    plt.title('Adaptive HampelFilter')
    plt.figure(3)
    plot_hampel(Y, YY2, res2)
    plt.title('Median filter')
Example #12
0
## Chapter 5 Extreme value analysis

## Section 5.1 Weibull and Gumbel papers
from __future__ import division
import numpy as np
import scipy.interpolate as si
from wafo.plotbackend import plotbackend as plt
import wafo.data as wd
import wafo.objects as wo
import wafo.stats as ws
import wafo.kdetools as wk
pstate = 'off'

# Significant wave-height data on Weibull paper,

fig = plt.figure()
ax = fig.add_subplot(111)
Hs = wd.atlantic()
wei = ws.weibull_min.fit(Hs)
tmp = ws.probplot(Hs, wei, ws.weibull_min, plot=ax)
plt.show()
#wafostamp([],'(ER)')
#disp('Block = 1'),pause(pstate)

##
# Significant wave-height data on Gumbel paper,
plt.clf()
ax = fig.add_subplot(111)
gum = ws.gumbel_r.fit(Hs)
tmp1 = ws.probplot(Hs, gum, ws.gumbel_r, plot=ax)
#wafostamp([],'(ER)')
Example #13
0
def qdemo(f, a, b, kmax=9, plot_error=False):
    '''
    Compares different quadrature rules.

    Parameters
    ----------
    f : callable
        function
    a,b : scalars
        lower and upper integration limits

    Details
    -------
    qdemo(f,a,b) computes and compares various approximations to
    the integral of f from a to b.  Three approximations are used,
    the composite trapezoid, Simpson's, and Boole's rules, all with
    equal length subintervals.
    In a case like qdemo(exp,0,3) one can see the expected
    convergence rates for each of the three methods.
    In a case like qdemo(sqrt,0,3), the convergence rate is limited
    not by the method, but by the singularity of the integrand.

    Example
    -------
    >>> import numpy as np
    >>> qdemo(np.exp,0,3)
    true value =  19.08553692
     ftn,                Boole,            Chebychev
    evals       approx        error       approx        error
       3, 19.4008539142, 0.3153169910, 19.5061466023, 0.4206096791
       5, 19.0910191534, 0.0054822302, 19.0910191534, 0.0054822302
       9, 19.0856414320, 0.0001045088, 19.0855374134, 0.0000004902
      17, 19.0855386464, 0.0000017232, 19.0855369232, 0.0000000000
      33, 19.0855369505, 0.0000000273, 19.0855369232, 0.0000000000
      65, 19.0855369236, 0.0000000004, 19.0855369232, 0.0000000000
     129, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000
     257, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000
     513, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000
     ftn,      Clenshaw-Curtis,       Gauss-Legendre
    evals       approx        error       approx        error
       3, 19.5061466023, 0.4206096791, 19.0803304585, 0.0052064647
       5, 19.0834145766, 0.0021223465, 19.0855365951, 0.0000003281
       9, 19.0855369150, 0.0000000082, 19.0855369232, 0.0000000000
      17, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000
      33, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000
      65, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000
     129, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000
     257, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000
     513, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000
     ftn,                Simps,                Trapz
    evals       approx        error       approx        error
       3, 19.5061466023, 0.4206096791, 22.5366862979, 3.4511493747
       5, 19.1169646189, 0.0314276957, 19.9718950387, 0.8863581155
       9, 19.0875991312, 0.0020622080, 19.3086731081, 0.2231361849
      17, 19.0856674267, 0.0001305035, 19.1414188470, 0.0558819239
      33, 19.0855451052, 0.0000081821, 19.0995135407, 0.0139766175
      65, 19.0855374350, 0.0000005118, 19.0890314614, 0.0034945382
     129, 19.0855369552, 0.0000000320, 19.0864105817, 0.0008736585
     257, 19.0855369252, 0.0000000020, 19.0857553393, 0.0002184161
     513, 19.0855369233, 0.0000000001, 19.0855915273, 0.0000546041
    '''
    true_val, _tol = intg.quad(f, a, b)
    print('true value = %12.8f' % (true_val,))
    neval = zeros(kmax, dtype=int)
    vals_dic = {}
    err_dic = {}

    # try various approximations
    methods = [trapz, simps, boole, ]

    for k in xrange(kmax):
        n = 2 ** (k + 1) + 1
        neval[k] = n
        x = np.linspace(a, b, n)
        y = f(x)
        for method in methods:
            name = method.__name__.title()
            q = method(y, x)
            vals_dic.setdefault(name, []).append(q)
            err_dic.setdefault(name, []).append(abs(q - true_val))

        name = 'Clenshaw-Curtis'
        q, _ec3 = clencurt(f, a, b, (n - 1) / 2)
        vals_dic.setdefault(name, []).append(q[0])
        err_dic.setdefault(name, []).append(abs(q[0] - true_val))

        name = 'Chebychev'
        ck = np.polynomial.chebyshev.chebfit(x, y, deg=min(n-1, 36))
        cki = np.polynomial.chebyshev.chebint(ck)
        q = np.polynomial.chebyshev.chebval(x[-1], cki)
        vals_dic.setdefault(name, []).append(q)
        err_dic.setdefault(name, []).append(abs(q - true_val))
        # ck = chebfit(f,n,a,b)
        # q  = chebval(b,chebint(ck,a,b),a,b)
        # qc2[k] = q; ec2[k] = abs(q - true)

        name = 'Gauss-Legendre'  # quadrature
        q = intg.fixed_quad(f, a, b, n=n)[0]
        # [x, w]=qrule(n,1)
        # x = (b-a)/2*x + (a+b)/2     % Transform base points X.
        # w = (b-a)/2*w               % Adjust weigths.
        # q = sum(feval(f,x)*w)
        vals_dic.setdefault(name, []).append(q)
        err_dic.setdefault(name, []).append(abs(q - true_val))

    # display results
    names = sorted(vals_dic.keys())
    num_cols = 2
    formats = ['%4.0f, ', ] + ['%10.10f, ', ] * num_cols * 2
    formats[-1] = formats[-1].split(',')[0]
    formats_h = ['%4s, ', ] + ['%20s, ', ] * num_cols
    formats_h[-1] = formats_h[-1].split(',')[0]
    headers = ['evals'] + ['%12s %12s' % ('approx', 'error')] * num_cols
    while len(names) > 0:
        print(''.join(fi % t for fi, t in zip(formats_h,
                                              ['ftn'] + names[:num_cols])))
        print(' '.join(headers))

        data = [neval]
        for name in names[:num_cols]:
            data.append(vals_dic[name])
            data.append(err_dic[name])
        data = np.vstack(tuple(data)).T
        for k in xrange(kmax):
            tmp = data[k].tolist()
            print(''.join(fi % t for fi, t in zip(formats, tmp)))
        if plot_error:
            plt.figure(0)
            for name in names[:num_cols]:
                plt.loglog(neval, err_dic[name], label=name)

        names = names[num_cols:]
    if plot_error:
        plt.xlabel('number of function evaluations')
        plt.ylabel('error')
        plt.legend()
        plt.show('hold')
Example #14
0
## Chapter 5 Extreme value analysis

## Section 5.1 Weibull and Gumbel papers
from __future__ import division
import numpy as np
import scipy.interpolate as si
from wafo.plotbackend import plotbackend as plt
import wafo.data as wd
import wafo.objects as wo
import wafo.stats as ws
import wafo.kdetools as wk
pstate = 'off'

# Significant wave-height data on Weibull paper,

fig = plt.figure()
ax = fig.add_subplot(111)
Hs = wd.atlantic()
wei = ws.weibull_min.fit(Hs)
tmp = ws.probplot(Hs, wei, ws.weibull_min, plot=ax)
plt.show()
#wafostamp([],'(ER)')
#disp('Block = 1'),pause(pstate)

##
# Significant wave-height data on Gumbel paper,
plt.clf()
ax = fig.add_subplot(111)
gum = ws.gumbel_r.fit(Hs)
tmp1 = ws.probplot(Hs, gum, ws.gumbel_r, plot=ax)
#wafostamp([],'(ER)')