def kde_demo4(N=50): """Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior for 1D multimodal distributions KDEDEMO4 shows that the improved Sheather-Jones plug-in smoothing is a better compared to normal reference rules (in this case the hns) Examples -------- >>> kde_demo4() """ data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(N,)), st.norm.rvs(loc=-5, scale=1, size=(N,)))) # x = np.linspace(1.5e-3, 5, 55) kde = KDE(data, kernel=Kernel('gauss', 'hns')) f = kde(output='plot', title='Ordinary KDE', plotflag=1) kde1 = KDE(data, kernel=Kernel('gauss', 'hisj')) f1 = kde1(output='plot', label='Ordinary KDE', plotflag=1) plt.figure(0) f.plot('r', label='hns={0}'.format(kde.hs)) # plt.figure(2) f1.plot('b', label='hisj={0}'.format(kde1.hs)) x = np.linspace(-9, 9) plt.plot(x, (st.norm.pdf(x, loc=-5, scale=1) + st.norm.pdf(x, loc=5, scale=1)) / 2, 'k:', label='True density') plt.legend()
def kde_demo1(): """KDEDEMO1 Demonstrate the smoothing parameter impact on KDE. KDEDEMO1 shows the true density (dotted) compared to KDE based on 7 observations (solid) and their individual kernels (dashed) for 3 different values of the smoothing parameter, hs. Examples -------- >>> kde_demo1() """ x = np.linspace(-4, 4, 101) x0 = x / 2.0 data = np.random.normal(loc=0, scale=1.0, size=7) kernel = Kernel('gauss') hs = kernel.hns(data) h_vec = [hs / 2, hs, 2 * hs] for ix, h in enumerate(h_vec): plt.figure(ix) kde = KDE(data, hs=h, kernel=kernel) f2 = kde(x, output='plot', title='h_s = {0:2.2f}'.format(float(h)), ylab='Density') f2.plot('k-') plt.plot(x, st.norm.pdf(x, 0, 1), 'k:') n = len(data) plt.plot(data, np.zeros(data.shape), 'bx') y = kernel(x0) / (n * h * kernel.norm_factor(d=1, n=n)) for i in range(n): plt.plot(data[i] + x0 * h, y, 'b--') plt.plot([data[i], data[i]], [0, np.max(y)], 'b') plt.axis([min(x), max(x), 0, 0.5])
def kde_demo5(N=500): """Demonstrate that the improved Sheather-Jones plug-in (hisj) is superior for 2D multimodal distributions KDEDEMO5 shows that the improved Sheather-Jones plug-in smoothing is better compared to normal reference rules (in this case the hns) Examples -------- >>> kde_demo5() """ data = np.hstack((st.norm.rvs(loc=5, scale=1, size=(2, N,)), st.norm.rvs(loc=-5, scale=1, size=(2, N,)))) kde = KDE(data, kernel=Kernel('gauss', 'hns')) f = kde(output='plot', plotflag=1, title='Ordinary KDE, hns={0:s}'.format(str(list(kde.hs)))) kde1 = KDE(data, kernel=Kernel('gauss', 'hisj')) f1 = kde1(output='plot', plotflag=1, title='Ordinary KDE, hisj={0:s}'.format(str(list(kde1.hs)))) plt.figure(0) plt.clf() f.plot() plt.plot(data[0], data[1], '.') plt.figure(1) plt.clf() f1.plot() plt.plot(data[0], data[1], '.')
def kreg_demo1(hs=None, fast=True, fun='hisj'): """Compare KRegression to KernelReg from statsmodels.nonparametric Examples -------- >>> kreg_demo1() """ N = 100 # ei = np.random.normal(loc=0, scale=0.075, size=(N,)) ei = np.array([ -0.08508516, 0.10462496, 0.07694448, -0.03080661, 0.05777525, 0.06096313, -0.16572389, 0.01838912, -0.06251845, -0.09186784, -0.04304887, -0.13365788, -0.0185279, -0.07289167, 0.02319097, 0.06887854, -0.08938374, -0.15181813, 0.03307712, 0.08523183, -0.0378058, -0.06312874, 0.01485772, 0.06307944, -0.0632959, 0.18963205, 0.0369126, -0.01485447, 0.04037722, 0.0085057, -0.06912903, 0.02073998, 0.1174351, 0.17599277, -0.06842139, 0.12587608, 0.07698113, -0.0032394, -0.12045792, -0.03132877, 0.05047314, 0.02013453, 0.04080741, 0.00158392, 0.10237899, -0.09069682, 0.09242174, -0.15445323, 0.09190278, 0.07138498, 0.03002497, 0.02495252, 0.01286942, 0.06449978, 0.03031802, 0.11754861, -0.02322272, 0.00455867, -0.02132251, 0.09119446, -0.03210086, -0.06509545, 0.07306443, 0.04330647, 0.078111, -0.04146907, 0.05705476, 0.02492201, -0.03200572, -0.02859788, -0.05893749, 0.00089538, 0.0432551, 0.04001474, 0.04888828, -0.17708392, 0.16478644, 0.1171006, 0.11664846, 0.01410477, -0.12458953, -0.11692081, 0.0413047, -0.09292439, -0.07042327, 0.14119701, -0.05114335, 0.04994696, -0.09520663, 0.04829406, -0.01603065, -0.1933216, 0.19352763, 0.11819496, 0.04567619, -0.08348306, 0.00812816, -0.00908206, 0.14528945, 0.02901065]) x = np.linspace(0, 1, N) va_1 = 0.3 ** 2 va_2 = 0.7 ** 2 y0 = np.exp(-x ** 2 / (2 * va_1)) + 1.3 * np.exp(-(x - 1) ** 2 / (2 * va_2)) y = y0 + ei kernel = Kernel('gauss', fun=fun) hopt = kernel.hisj(x) kreg = KRegression( x, y, p=0, hs=hs, kernel=kernel, xmin=-2 * hopt, xmax=1 + 2 * hopt) if fast: kreg.__call__ = kreg.eval_grid_fast f = kreg(x, output='plot', title='Kernel regression', plotflag=1) plt.figure(0) f.plot(label='p=0') kreg.p = 1 f1 = kreg(x, output='plot', title='Kernel regression', plotflag=1) f1.plot(label='p=1') # print(f1.data) plt.plot(x, y, '.', label='data') plt.plot(x, y0, 'k', label='True model') from statsmodels.nonparametric.kernel_regression import KernelReg kreg2 = KernelReg(y, x, ('c')) y2 = kreg2.fit(x) plt.plot(x, y2[0], 'm', label='statsmodel') plt.legend()
def _plot_error(neval, err_dic, plot_error): if plot_error: plt.figure(0) for name in err_dic: plt.loglog(neval, err_dic[name], label=name) plt.xlabel('number of function evaluations') plt.ylabel('error') plt.legend()
def demo_savitzky_on_noisy_chirp(): """ Examples -------- >>> demo_savitzky_on_noisy_chirp() >>> plt.close() """ plt.figure(figsize=(7, 12)) # generate chirp signal tvec = np.arange(0, 6.28, .02) true_signal = np.sin(tvec * (2.0 + tvec)) true_d_signal = (2 + tvec) * np.cos(tvec * (2.0 + tvec)) # add noise to signal noise = np.random.normal(size=true_signal.shape) signal = true_signal + .15 * noise # plot signal plt.subplot(311) plt.plot(signal) plt.title('signal') # smooth and plot signal plt.subplot(312) savgol = SavitzkyGolay(n=8, degree=4) s_signal = savgol.smooth(signal) s2 = smoothn(signal, robust=True) plt.plot(s_signal) plt.plot(s2) plt.plot(true_signal, 'r--') plt.title('smoothed signal') # smooth derivative of signal and plot it plt.subplot(313) savgol1 = SavitzkyGolay(n=8, degree=1, diff_order=1) dt = tvec[1] - tvec[0] d_signal = savgol1.smooth(signal) / dt plt.plot(d_signal) plt.plot(true_d_signal, 'r--') plt.title('smoothed derivative of signal')
def kde_demo3(): """Demonstrate the difference between transformation and ordinary-KDE in 2D KDEDEMO3 shows that the transformation KDE is a better estimate for Rayleigh distributed data around 0 than the ordinary KDE. Examples -------- >>> kde_demo3() """ data = st.rayleigh.rvs(scale=1, size=(2, 300)) # x = np.linspace(1.5e-3, 5, 55) kde = KDE(data) f = kde(output='plot', title='Ordinary KDE', plotflag=1) plt.figure(0) f.plot() plt.plot(data[0], data[1], '.') # plotnorm((data).^(L2)) % gives a straight line => L2 = 0.5 reasonable hs = Kernel('gauss').get_smoothing(data**0.5) tkde = TKDE(data, hs=hs, L2=0.5) ft = tkde.eval_grid_fast( output='plot', title='Transformation KDE', plotflag=1) plt.figure(1) ft.plot() plt.plot(data[0], data[1], '.') plt.figure(0)
def kde_demo2(): """Demonstrate the difference between transformation- and ordinary-KDE. KDEDEMO2 shows that the transformation KDE is a better estimate for Rayleigh distributed data around 0 than the ordinary KDE. Examples -------- >>> kde_demo2() """ data = st.rayleigh.rvs(scale=1, size=300) x = np.linspace(1.5e-2, 5, 55) kde = KDE(data) f = kde(output='plot', title='Ordinary KDE (hs={0:})'.format(kde.hs)) plt.figure(0) f.plot() plt.plot(x, st.rayleigh.pdf(x, scale=1), ':') # plotnorm((data).^(L2)) # gives a straight line => L2 = 0.5 reasonable hs = Kernel('gauss').get_smoothing(data**0.5) tkde = TKDE(data, hs=hs, L2=0.5) ft = tkde(x, output='plot', title='Transformation KDE (hs={0:})'.format(tkde.tkde.hs)) plt.figure(1) ft.plot() plt.plot(x, st.rayleigh.pdf(x, scale=1), ':') plt.figure(0)
def test_hampel(): randint = np.random.randint Y = 5000 + np.random.randn(1000) outliers = randint(0, 1000, size=(10,)) Y[outliers] = Y[outliers] + randint(1000, size=(10,)) YY, res = HampelFilter(dx=3, t=3, fulloutput=True)(Y) YY1, res1 = HampelFilter(dx=1, t=3, adaptive=0.1, fulloutput=True)(Y) YY2, res2 = HampelFilter(dx=3, t=0, fulloutput=True)(Y) # median plt.figure(1) plot_hampel(Y, YY, res) plt.title('Standard HampelFilter') plt.figure(2) plot_hampel(Y, YY1, res1) plt.title('Adaptive HampelFilter') plt.figure(3) plot_hampel(Y, YY2, res2) plt.title('Median filter') plt.show('hold')
def check_bkregression(): """ Check binomial regression Example ------- >>> check_bkregression() """ # plt.ion() k = 0 for _i, n in enumerate([50, 100, 300, 600]): x, y, fun1 = _get_data(n, symmetric=True, loc1=0.1, scale1=0.6, scale2=0.75) bkreg = BKRegression(x, y, a=0.05, b=0.05) fbest = bkreg.prb_search_best(hsfun='hste', alpha=0.05, color='g', label='Transit_D') figk = plt.figure(k) ax = figk.gca() k += 1 # fbest.score.plot(axis=ax) # axsize = ax.axis() # ax.vlines(fbest.hs,axsize[2]+1,axsize[3]) # ax.set(yscale='log') fbest.labels.title = 'N = {:d}'.format(n) fbest.plot(axis=ax) ax.plot(x, fun1(x), 'r') ax.legend(frameon=False, markerscale=4) # ax = plt.gca() ax.set_yticklabels(ax.get_yticks() * 100.0) ax.grid(True)
def demo_hampel(): """ Examples -------- >>> demo_hampel() >>> plt.close() """ randint = np.random.randint Y = 5000 + np.random.randn(1000) outliers = randint(0, 1000, size=(10, )) Y[outliers] = Y[outliers] + randint(1000, size=(10, )) YY, res = HampelFilter(dx=3, t=3, fulloutput=True)(Y) YY1, res1 = HampelFilter(dx=1, t=3, adaptive=0.1, fulloutput=True)(Y) YY2, res2 = HampelFilter(dx=3, t=0, fulloutput=True)(Y) # median plt.figure(1) plot_hampel(Y, YY, res) plt.title('Standard HampelFilter') plt.figure(2) plot_hampel(Y, YY1, res1) plt.title('Adaptive HampelFilter') plt.figure(3) plot_hampel(Y, YY2, res2) plt.title('Median filter')
## Chapter 5 Extreme value analysis ## Section 5.1 Weibull and Gumbel papers from __future__ import division import numpy as np import scipy.interpolate as si from wafo.plotbackend import plotbackend as plt import wafo.data as wd import wafo.objects as wo import wafo.stats as ws import wafo.kdetools as wk pstate = 'off' # Significant wave-height data on Weibull paper, fig = plt.figure() ax = fig.add_subplot(111) Hs = wd.atlantic() wei = ws.weibull_min.fit(Hs) tmp = ws.probplot(Hs, wei, ws.weibull_min, plot=ax) plt.show() #wafostamp([],'(ER)') #disp('Block = 1'),pause(pstate) ## # Significant wave-height data on Gumbel paper, plt.clf() ax = fig.add_subplot(111) gum = ws.gumbel_r.fit(Hs) tmp1 = ws.probplot(Hs, gum, ws.gumbel_r, plot=ax) #wafostamp([],'(ER)')
def qdemo(f, a, b, kmax=9, plot_error=False): ''' Compares different quadrature rules. Parameters ---------- f : callable function a,b : scalars lower and upper integration limits Details ------- qdemo(f,a,b) computes and compares various approximations to the integral of f from a to b. Three approximations are used, the composite trapezoid, Simpson's, and Boole's rules, all with equal length subintervals. In a case like qdemo(exp,0,3) one can see the expected convergence rates for each of the three methods. In a case like qdemo(sqrt,0,3), the convergence rate is limited not by the method, but by the singularity of the integrand. Example ------- >>> import numpy as np >>> qdemo(np.exp,0,3) true value = 19.08553692 ftn, Boole, Chebychev evals approx error approx error 3, 19.4008539142, 0.3153169910, 19.5061466023, 0.4206096791 5, 19.0910191534, 0.0054822302, 19.0910191534, 0.0054822302 9, 19.0856414320, 0.0001045088, 19.0855374134, 0.0000004902 17, 19.0855386464, 0.0000017232, 19.0855369232, 0.0000000000 33, 19.0855369505, 0.0000000273, 19.0855369232, 0.0000000000 65, 19.0855369236, 0.0000000004, 19.0855369232, 0.0000000000 129, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000 257, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000 513, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000 ftn, Clenshaw-Curtis, Gauss-Legendre evals approx error approx error 3, 19.5061466023, 0.4206096791, 19.0803304585, 0.0052064647 5, 19.0834145766, 0.0021223465, 19.0855365951, 0.0000003281 9, 19.0855369150, 0.0000000082, 19.0855369232, 0.0000000000 17, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000 33, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000 65, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000 129, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000 257, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000 513, 19.0855369232, 0.0000000000, 19.0855369232, 0.0000000000 ftn, Simps, Trapz evals approx error approx error 3, 19.5061466023, 0.4206096791, 22.5366862979, 3.4511493747 5, 19.1169646189, 0.0314276957, 19.9718950387, 0.8863581155 9, 19.0875991312, 0.0020622080, 19.3086731081, 0.2231361849 17, 19.0856674267, 0.0001305035, 19.1414188470, 0.0558819239 33, 19.0855451052, 0.0000081821, 19.0995135407, 0.0139766175 65, 19.0855374350, 0.0000005118, 19.0890314614, 0.0034945382 129, 19.0855369552, 0.0000000320, 19.0864105817, 0.0008736585 257, 19.0855369252, 0.0000000020, 19.0857553393, 0.0002184161 513, 19.0855369233, 0.0000000001, 19.0855915273, 0.0000546041 ''' true_val, _tol = intg.quad(f, a, b) print('true value = %12.8f' % (true_val,)) neval = zeros(kmax, dtype=int) vals_dic = {} err_dic = {} # try various approximations methods = [trapz, simps, boole, ] for k in xrange(kmax): n = 2 ** (k + 1) + 1 neval[k] = n x = np.linspace(a, b, n) y = f(x) for method in methods: name = method.__name__.title() q = method(y, x) vals_dic.setdefault(name, []).append(q) err_dic.setdefault(name, []).append(abs(q - true_val)) name = 'Clenshaw-Curtis' q, _ec3 = clencurt(f, a, b, (n - 1) / 2) vals_dic.setdefault(name, []).append(q[0]) err_dic.setdefault(name, []).append(abs(q[0] - true_val)) name = 'Chebychev' ck = np.polynomial.chebyshev.chebfit(x, y, deg=min(n-1, 36)) cki = np.polynomial.chebyshev.chebint(ck) q = np.polynomial.chebyshev.chebval(x[-1], cki) vals_dic.setdefault(name, []).append(q) err_dic.setdefault(name, []).append(abs(q - true_val)) # ck = chebfit(f,n,a,b) # q = chebval(b,chebint(ck,a,b),a,b) # qc2[k] = q; ec2[k] = abs(q - true) name = 'Gauss-Legendre' # quadrature q = intg.fixed_quad(f, a, b, n=n)[0] # [x, w]=qrule(n,1) # x = (b-a)/2*x + (a+b)/2 % Transform base points X. # w = (b-a)/2*w % Adjust weigths. # q = sum(feval(f,x)*w) vals_dic.setdefault(name, []).append(q) err_dic.setdefault(name, []).append(abs(q - true_val)) # display results names = sorted(vals_dic.keys()) num_cols = 2 formats = ['%4.0f, ', ] + ['%10.10f, ', ] * num_cols * 2 formats[-1] = formats[-1].split(',')[0] formats_h = ['%4s, ', ] + ['%20s, ', ] * num_cols formats_h[-1] = formats_h[-1].split(',')[0] headers = ['evals'] + ['%12s %12s' % ('approx', 'error')] * num_cols while len(names) > 0: print(''.join(fi % t for fi, t in zip(formats_h, ['ftn'] + names[:num_cols]))) print(' '.join(headers)) data = [neval] for name in names[:num_cols]: data.append(vals_dic[name]) data.append(err_dic[name]) data = np.vstack(tuple(data)).T for k in xrange(kmax): tmp = data[k].tolist() print(''.join(fi % t for fi, t in zip(formats, tmp))) if plot_error: plt.figure(0) for name in names[:num_cols]: plt.loglog(neval, err_dic[name], label=name) names = names[num_cols:] if plot_error: plt.xlabel('number of function evaluations') plt.ylabel('error') plt.legend() plt.show('hold')