def _smooth(self, z, s): auto_smooth = self.auto_smooth norm = linalg.norm y = self.y Wtot = self.Wtot gamma_s = self.gamma(s) # "amount" of weights (see the function GCVscore) aow = Wtot.sum() / y.size # 0 < aow <= 1 for nit in range(self.maxiter): DCTy = dctn(Wtot * (y - z) + z) if auto_smooth and not np.remainder(np.log2(nit + 1), 1): # The generalized cross-validation (GCV) method is used. # We seek the smoothing parameter s that minimizes the GCV # score i.e. s = Argmin(GCVscore). # Because this process is time-consuming, it is performed from # time to time (when nit is a power of 2) log10s = optimize.fminbound(self.gcv, np.log10(self.s_min), np.log10(self.s_max), args=(aow, DCTy, y, Wtot), xtol=self.errp, full_output=False, disp=False) s = 10**log10s gamma_s = self.gamma(s) z0 = z z = self.RF * idctn(gamma_s * DCTy) + (1 - self.RF) * z # if no weighted/missing data => tol=0 (no iteration) tol = norm(z0.ravel() - z.ravel()) / norm(z.ravel()) converged = tol <= self.tolz or not self.is_weighted if converged: break return z, s, converged
def test_dct_and_dctn(self): a = np.arange(12).reshape((3, -1)) y = wd.dct(a) x = wd.idct(y) assert_array_almost_equal(x, a) yn = wd.dctn(a) # , shape=(10,), axes=(1,)) xn = wd.idctn(yn) # , axes=(1,)) assert_array_almost_equal(xn, a)
def _initial_guess(y, I): # Initial Guess with weighted/missing data # nearest neighbor interpolation (in case of missing values) z = y if (1 - I).any(): notI = ~I z, L = distance_transform_edt(notI, return_indices=True) z[notI] = y[L.flat[notI]] # coarse fast smoothing using one-tenth of the DCT coefficients shape = z.shape d = z.ndim z = dctn(z) for k in range(d): z[int((shape[k] + 0.5) / 10) + 1::, ...] = 0 z = z.reshape(np.roll(shape, -k)) z = z.transpose(np.roll(range(d), -1)) # z = shiftdim(z,1); return idctn(z)
def test_dct3(self): a = np.array([[[0.51699637, 0.42946223, 0.89843545], [0.27853391, 0.8931508, 0.34319118], [0.51984431, 0.09217771, 0.78764716]], [[0.25019845, 0.92622331, 0.06111409], [0.81363641, 0.06093368, 0.13123373], [0.47268657, 0.39635091, 0.77978269]], [[0.86098829, 0.07901332, 0.82169182], [0.12560088, 0.78210188, 0.69805434], [0.33544628, 0.81540172, 0.9393219]]]) dct = wd.dct d = dct(dct(dct(a).transpose(0, 2, 1)).transpose(2, 1, 0) ).transpose(2, 1, 0).transpose(0, 2, 1) d0 = wd.dctn(a) idct = wd.idct e = idct(idct(idct(d).transpose(0, 2, 1)).transpose(2, 1, 0) ).transpose(2, 1, 0).transpose(0, 2, 1) assert_array_almost_equal(d, d0) assert_array_almost_equal(a, e)
def InitialGuess(y, I): # Initial Guess with weighted/missing data # nearest neighbor interpolation (in case of missing values) z = y if (1 - I).any(): notI = ~I z, L = distance_transform_edt(notI, return_indices=True) z[notI] = y[L.flat[notI]] # coarse fast smoothing using one-tenth of the DCT coefficients siz = z.shape d = z.ndim z = dctn(z) for k in range(d): z[int((siz[k] + 0.5) / 10) + 1::, ...] = 0 z = z.reshape(np.roll(siz, -k)) z = z.transpose(np.roll(range(z.ndim), -1)) # z = shiftdim(z,1); z = idctn(z) return z
def evar(y): """Noise variance estimation. Assuming that the deterministic function Y has additive Gaussian noise, EVAR(Y) returns an estimated variance of this noise. Note: ---- A thin-plate smoothing spline model is used to smooth Y. It is assumed that the model whose generalized cross-validation score is minimum can provide the variance of the additive noise. A few tests showed that EVAR works very well with "not too irregular" functions. Examples: -------- 1D signal >>> n = 1e6 >>> x = np.linspace(0,100,n); >>> y = np.cos(x/10)+(x/50) >>> var0 = 0.02 # noise variance >>> yn = y + sqrt(var0)*np.random.randn(*y.shape) >>> s = evar(yn) # estimated variance >>> np.abs(s-var0)/var0 < 3.5/np.sqrt(n) True 2D function >>> xp = np.linspace(0,1,50) >>> x, y = np.meshgrid(xp,xp) >>> f = np.exp(x+y) + np.sin((x-2*y)*3) >>> var0 = 0.04 # noise variance >>> fn = f + sqrt(var0)*np.random.randn(*f.shape) >>> s = evar(fn) # estimated variance >>> np.abs(s-var0)/var0 < 3.5/np.sqrt(50) True 3D function >>> yp = np.linspace(-2,2,50) >>> [x,y,z] = np.meshgrid(yp, yp, yp, sparse=True) >>> f = x*np.exp(-x**2-y**2-z**2) >>> var0 = 0.5 # noise variance >>> fn = f + np.sqrt(var0)*np.random.randn(*f.shape) >>> s = evar(fn) # estimated variance >>> np.abs(s-var0)/var0 < 3.5/np.sqrt(50) True Other example ------------- http://www.biomecardio.com/matlab/evar.html Note: ---- EVAR is only adapted to evenly-gridded 1-D to N-D data. See also -------- VAR, STD, SMOOTHN """ # Damien Garcia -- 2008/04, revised 2009/10 y = np.atleast_1d(y) d = y.ndim sh0 = y.shape S = np.zeros(sh0) sh1 = np.ones((d, ), dtype=np.int64) cos = np.cos for i in range(d): ni = sh0[i] sh1[i] = ni t = np.arange(ni).reshape(sh1) / ni S += cos(pi * t) sh1[i] = 1 S2 = 2 * (d - S).ravel() # N-D Discrete Cosine Transform of Y dcty2 = dctn(y).ravel()**2 def score_fun(L, S2, dcty2): # Generalized cross validation score M = 1 - 1. / (1 + 10**L * S2) noisevar = (dcty2 * M**2).mean() return noisevar / M.mean()**2 # fun = lambda x : score_fun(x, S2, dcty2) Lopt = optimize.fminbound(score_fun, -38, 38, args=(S2, dcty2)) M = 1.0 - 1.0 / (1 + 10**Lopt * S2) noisevar = (dcty2 * M**2).mean() return noisevar
def smoothn(data, s=None, weight=None, robust=False, z0=None, tolz=1e-3, maxiter=100, fulloutput=False): ''' SMOOTHN fast and robust spline smoothing for 1-D to N-D data. Parameters ---------- data : array like uniformly-sampled data array to smooth. Non finite values (NaN or Inf) are treated as missing values. s : real positive scalar smooting parameter. The larger S is, the smoother the output will be. Default value is automatically determined using the generalized cross-validation (GCV) method. weight : string or array weights weighting array of real positive values, that must have the same size as DATA. Note that a zero weight corresponds to a missing value. robust : bool If true carry out a robust smoothing that minimizes the influence of outlying data. tolz : real positive scalar Termination tolerance on Z (default = 1e-3) maxiter : scalar integer Maximum number of iterations allowed (default = 100) z0 : array-like Initial value for the iterative process (default = original data) Returns ------- z : array like smoothed data To be made ---------- Estimate the confidence bands (see Wahba 1983, Nychka 1988). Reference --------- Garcia D, Robust smoothing of gridded data in one and higher dimensions with missing values. Computational Statistics & Data Analysis, 2010. http://www.biomecardio.com/pageshtm/publi/csda10.pdf Examples: -------- 1-D example >>> import matplotlib.pyplot as plt >>> x = np.linspace(0,100,2**8) >>> y = np.cos(x/10)+(x/50)**2 + np.random.randn(*x.shape)/10 >>> y[np.r_[70, 75, 80]] = np.array([5.5, 5, 6]) >>> z = smoothn(y) # Regular smoothing >>> zr = smoothn(y,robust=True) # Robust smoothing >>> h=plt.subplot(121), >>> h = plt.plot(x,y,'r.',x,z,'k',linewidth=2) >>> h=plt.title('Regular smoothing') >>> h=plt.subplot(122) >>> h=plt.plot(x,y,'r.',x,zr,'k',linewidth=2) >>> h=plt.title('Robust smoothing') 2-D example >>> xp = np.r_[0:1:.02] >>> [x,y] = np.meshgrid(xp,xp) >>> f = np.exp(x+y) + np.sin((x-2*y)*3); >>> fn = f + np.random.randn(*f.shape)*0.5; >>> fs = smoothn(fn); >>> h=plt.subplot(121), >>> h=plt.contourf(xp,xp,fn) >>> h=plt.subplot(122) >>> h=plt.contourf(xp,xp,fs) 2-D example with missing data n = 256; y0 = peaks(n); y = y0 + rand(size(y0))*2; I = randperm(n^2); y(I(1:n^2*0.5)) = NaN; lose 1/2 of data y(40:90,140:190) = NaN; create a hole z = smoothn(y); smooth data subplot(2,2,1:2), imagesc(y), axis equal off title('Noisy corrupt data') subplot(223), imagesc(z), axis equal off title('Recovered data ...') subplot(224), imagesc(y0), axis equal off title('... compared with original data') 3-D example [x,y,z] = meshgrid(-2:.2:2); xslice = [-0.8,1]; yslice = 2; zslice = [-2,0]; vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06; subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic') title('Noisy data') v = smoothn(vn); subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic') title('Smoothed data') Cardioid t = linspace(0,2*pi,1000); x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1; y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1; z = smoothn(complex(x,y)); plot(x,y,'r.',real(z),imag(z),'k','linewidth',2) axis equal tight Cellular vortical flow [x,y] = meshgrid(linspace(0,1,24)); Vx = cos(2*pi*x+pi/2).*cos(2*pi*y); Vy = sin(2*pi*x+pi/2).*sin(2*pi*y); Vx = Vx + sqrt(0.05)*randn(24,24); adding Gaussian noise Vy = Vy + sqrt(0.05)*randn(24,24); adding Gaussian noise I = randperm(numel(Vx)); Vx(I(1:30)) = (rand(30,1)-0.5)*5; adding outliers Vy(I(1:30)) = (rand(30,1)-0.5)*5; adding outliers Vx(I(31:60)) = NaN; missing values Vy(I(31:60)) = NaN; missing values Vs = smoothn(complex(Vx,Vy),'robust'); automatic smoothing subplot(121), quiver(x,y,Vx,Vy,2.5), axis square title('Noisy velocity field') subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square title('Smoothed velocity field') See also SMOOTH, SMOOTH3, DCTN, IDCTN. -- Damien Garcia -- 2009/03, revised 2010/11 Visit http://www.biomecardio.com/matlab/smoothn.html for more details about SMOOTHN ''' y = np.atleast_1d(data) sizy = y.shape noe = y.size if noe < 2: return data weightstr = 'bisquare' W = np.ones(sizy) # Smoothness parameter and weights if weight is None: pass elif isinstance(weight, str): weightstr = weight.lower() else: W = weight # Weights. Zero weights are assigned to not finite values (Inf or NaN), # (Inf/NaN values = missing data). IsFinite = np.isfinite(y) nof = IsFinite.sum() # number of finite elements W = W * IsFinite if (W < 0).any(): raise ValueError('Weights must all be >=0') else: W = W / W.max() isweighted = (W < 1).any() # Weighted or missing data? isauto = s is None # Automatic smoothing? # Creation of the Lambda tensor # Lambda contains the eingenvalues of the difference matrix used in this # penalized least squares process. d = y.ndim Lambda = np.zeros(sizy) siz0 = [1, ] * d for i in range(d): siz0[i] = sizy[i] Lambda = Lambda + \ np.cos(pi * np.arange(sizy[i]) / sizy[i]).reshape(siz0) siz0[i] = 1 Lambda = -2 * (d - Lambda) if not isauto: Gamma = 1. / (1 + s * Lambda ** 2) # Upper and lower bound for the smoothness parameter # The average leverage (h) is by definition in [0 1]. Weak smoothing occurs # if h is close to 1, while over-smoothing appears when h is near 0. Upper # and lower bounds for h are given to avoid under- or over-smoothing. See # equation relating h to the smoothness parameter (Equation #12 in the # referenced CSDA paper). N = (np.array(sizy) != 1).sum() # tensor rank of the y-array hMin = 1e-6 hMax = 0.99 sMinBnd = (((1 + sqrt(1 + 8 * hMax ** (2. / N))) / 4. / hMax ** (2. / N)) ** 2 - 1) / 16 sMaxBnd = (((1 + sqrt(1 + 8 * hMin ** (2. / N))) / 4. / hMin ** (2. / N)) ** 2 - 1) / 16 # Initialize before iterating Wtot = W # Initial conditions for z if isweighted: # With weighted/missing data # An initial guess is provided to ensure faster convergence. For that # purpose, a nearest neighbor interpolation followed by a coarse # smoothing are performed. if z0 is None: z = InitialGuess(y, IsFinite) else: # an initial guess (z0) has been provided z = z0 else: z = np.zeros(sizy) z0 = z y[~IsFinite] = 0 # arbitrary values for missing y-data tol = 1 RobustIterativeProcess = True RobustStep = 1 # Error on p. Smoothness parameter s = 10^p errp = 0.1 # Relaxation factor RF: to speedup convergence RF = 1.75 if isweighted else 1.0 norm = linalg.norm # Main iterative process while RobustIterativeProcess: # "amount" of weights (see the function GCVscore) aow = Wtot.sum() / noe # 0 < aow <= 1 exitflag = True for nit in range(1, maxiter + 1): DCTy = dctn(Wtot * (y - z) + z) if isauto and not np.remainder(np.log2(nit), 1): # The generalized cross-validation (GCV) method is used. # We seek the smoothing parameter s that minimizes the GCV # score i.e. s = Argmin(GCVscore). # Because this process is time-consuming, it is performed from # time to time (when nit is a power of 2) log10s = optimize.fminbound( gcv, np.log10(sMinBnd), np.log10(sMaxBnd), args=(aow, Lambda, DCTy, y, Wtot, IsFinite, nof, noe), xtol=errp, full_output=False, disp=False) s = 10 ** log10s Gamma = 1.0 / (1 + s * Lambda ** 2) z = RF * idctn(Gamma * DCTy) + (1 - RF) * z # if no weighted/missing data => tol=0 (no iteration) tol = norm(z0.ravel() - z.ravel()) / norm( z.ravel()) if isweighted else 0.0 if tol <= tolz: break z0 = z # re-initialization else: exitflag = False # nit<MaxIter; if robust: # -- Robust Smoothing: iteratively re-weighted process # --- average leverage h = sqrt(1 + 16 * s) h = sqrt(1 + h) / sqrt(2) / h h = h ** N # take robust weights into account Wtot = W * RobustWeights(y - z, IsFinite, h, weightstr) # re-initialize for another iterative weighted process isweighted = True tol = 1 RobustStep = RobustStep + 1 # 3 robust steps are enough. RobustIterativeProcess = RobustStep < 4 else: RobustIterativeProcess = False # stop the whole process # Warning messages if isauto: if abs(np.log10(s) - np.log10(sMinBnd)) < errp: warnings.warn('''s = %g: the lower bound for s has been reached. Put s as an input variable if required.''' % s) elif abs(np.log10(s) - np.log10(sMaxBnd)) < errp: warnings.warn('''s = %g: the Upper bound for s has been reached. Put s as an input variable if required.''' % s) if not exitflag: warnings.warn('''Maximum number of iterations (%d) has been exceeded. Increase MaxIter option or decrease TolZ value.''' % (maxiter)) if fulloutput: return z, s else: return z
def evar(y): """Noise variance estimation. Assuming that the deterministic function Y has additive Gaussian noise, EVAR(Y) returns an estimated variance of this noise. Note: ---- A thin-plate smoothing spline model is used to smooth Y. It is assumed that the model whose generalized cross-validation score is minimum can provide the variance of the additive noise. A few tests showed that EVAR works very well with "not too irregular" functions. Examples: -------- 1D signal >>> n = 1e6 >>> x = np.linspace(0,100,n); >>> y = np.cos(x/10)+(x/50) >>> var0 = 0.02 # noise variance >>> yn = y + sqrt(var0)*np.random.randn(*y.shape) >>> s = evar(yn) # estimated variance >>> np.abs(s-var0)/var0 < 3.5/np.sqrt(n) True 2D function >>> xp = np.linspace(0,1,50) >>> x, y = np.meshgrid(xp,xp) >>> f = np.exp(x+y) + np.sin((x-2*y)*3) >>> var0 = 0.04 # noise variance >>> fn = f + sqrt(var0)*np.random.randn(*f.shape) >>> s = evar(fn) # estimated variance >>> np.abs(s-var0)/var0 < 3.5/np.sqrt(50) True 3D function >>> yp = np.linspace(-2,2,50) >>> [x,y,z] = meshgrid(yp,yp,yp, sparse=True) >>> f = x*exp(-x**2-y**2-z**2) >>> var0 = 0.5 # noise variance >>> fn = f + sqrt(var0)*np.random.randn(*f.shape) >>> s = evar(fn) # estimated variance >>> np.abs(s-var0)/var0 < 3.5/np.sqrt(50) True Other example ------------- http://www.biomecardio.com/matlab/evar.html Note: ---- EVAR is only adapted to evenly-gridded 1-D to N-D data. See also -------- VAR, STD, SMOOTHN """ # Damien Garcia -- 2008/04, revised 2009/10 y = np.atleast_1d(y) d = y.ndim sh0 = y.shape S = np.zeros(sh0) sh1 = np.ones((d,)) cos = np.cos pi = np.pi for i in range(d): ni = sh0[i] sh1[i] = ni t = np.arange(ni).reshape(sh1) / ni S += cos(pi * t) sh1[i] = 1 S2 = 2 * (d - S).ravel() # N-D Discrete Cosine Transform of Y dcty2 = dctn(y).ravel() ** 2 def score_fun(L, S2, dcty2): # Generalized cross validation score M = 1 - 1. / (1 + 10 ** L * S2) noisevar = (dcty2 * M ** 2).mean() return noisevar / M.mean() ** 2 # fun = lambda x : score_fun(x, S2, dcty2) Lopt = optimize.fminbound(score_fun, -38, 38, args=(S2, dcty2)) M = 1.0 - 1.0 / (1 + 10 ** Lopt * S2) noisevar = (dcty2 * M ** 2).mean() return noisevar