def _smooth(self, z, s): auto_smooth = self.auto_smooth norm = linalg.norm y = self.y Wtot = self.Wtot gamma_s = self.gamma(s) # "amount" of weights (see the function GCVscore) aow = Wtot.sum() / y.size # 0 < aow <= 1 for nit in range(self.maxiter): DCTy = dctn(Wtot * (y - z) + z) if auto_smooth and not np.remainder(np.log2(nit + 1), 1): # The generalized cross-validation (GCV) method is used. # We seek the smoothing parameter s that minimizes the GCV # score i.e. s = Argmin(GCVscore). # Because this process is time-consuming, it is performed from # time to time (when nit is a power of 2) log10s = optimize.fminbound(self.gcv, np.log10(self.s_min), np.log10(self.s_max), args=(aow, DCTy, y, Wtot), xtol=self.errp, full_output=False, disp=False) s = 10**log10s gamma_s = self.gamma(s) z0 = z z = self.RF * idctn(gamma_s * DCTy) + (1 - self.RF) * z # if no weighted/missing data => tol=0 (no iteration) tol = norm(z0.ravel() - z.ravel()) / norm(z.ravel()) converged = tol <= self.tolz or not self.is_weighted if converged: break return z, s, converged
def test_dct_and_dctn(self): a = np.arange(12).reshape((3, -1)) y = wd.dct(a) x = wd.idct(y) assert_array_almost_equal(x, a) yn = wd.dctn(a) # , shape=(10,), axes=(1,)) xn = wd.idctn(yn) # , axes=(1,)) assert_array_almost_equal(xn, a)
def gcv(p, aow, Lambda, DCTy, y, Wtot, IsFinite, nof, noe): # Search the smoothing parameter s that minimizes the GCV score s = 10 ** p Gamma = 1.0 / (1 + s * Lambda ** 2) # RSS = Residual sum-of-squares if aow > 0.9: # aow = 1 means that all of the data are equally weighted # very much faster: does not require any inverse DCT RSS = linalg.norm(DCTy.ravel() * (Gamma.ravel() - 1)) ** 2 else: # take account of the weights to calculate RSS: yhat = idctn(Gamma * DCTy) RSS = linalg.norm(sqrt(Wtot[IsFinite]) * (y[IsFinite] - yhat[IsFinite])) ** 2 TrH = Gamma.sum() GCVscore = RSS / nof / (1.0 - TrH / noe) ** 2 return GCVscore
def gcv(self, p, aow, DCTy, y, Wtot): # Search the smoothing parameter s that minimizes the GCV score s = 10.0**p gamma_s = self.gamma(s) if aow > 0.9: # aow = 1 means that all of the data are equally weighted # very much faster: does not require any inverse DCT residual = DCTy.ravel() * (gamma_s.ravel() - 1) else: # take account of the weights to calculate RSS: is_finite = self.is_finite yhat = idctn(gamma_s * DCTy) residual = sqrt(Wtot[is_finite]) * (y[is_finite] - yhat[is_finite]) TrH = gamma_s.sum() RSS = linalg.norm(residual)**2 # Residual sum-of-squares GCVscore = RSS / self.nof / (1.0 - TrH / y.size)**2 return GCVscore
def _initial_guess(y, I): # Initial Guess with weighted/missing data # nearest neighbor interpolation (in case of missing values) z = y if (1 - I).any(): notI = ~I z, L = distance_transform_edt(notI, return_indices=True) z[notI] = y[L.flat[notI]] # coarse fast smoothing using one-tenth of the DCT coefficients shape = z.shape d = z.ndim z = dctn(z) for k in range(d): z[int((shape[k] + 0.5) / 10) + 1::, ...] = 0 z = z.reshape(np.roll(shape, -k)) z = z.transpose(np.roll(range(d), -1)) # z = shiftdim(z,1); return idctn(z)
def InitialGuess(y, I): # Initial Guess with weighted/missing data # nearest neighbor interpolation (in case of missing values) z = y if (1 - I).any(): notI = ~I z, L = distance_transform_edt(notI, return_indices=True) z[notI] = y[L.flat[notI]] # coarse fast smoothing using one-tenth of the DCT coefficients siz = z.shape d = z.ndim z = dctn(z) for k in range(d): z[int((siz[k] + 0.5) / 10) + 1::, ...] = 0 z = z.reshape(np.roll(siz, -k)) z = z.transpose(np.roll(range(z.ndim), -1)) # z = shiftdim(z,1); z = idctn(z) return z
def smoothn(data, s=None, weight=None, robust=False, z0=None, tolz=1e-3, maxiter=100, fulloutput=False): ''' SMOOTHN fast and robust spline smoothing for 1-D to N-D data. Parameters ---------- data : array like uniformly-sampled data array to smooth. Non finite values (NaN or Inf) are treated as missing values. s : real positive scalar smooting parameter. The larger S is, the smoother the output will be. Default value is automatically determined using the generalized cross-validation (GCV) method. weight : string or array weights weighting array of real positive values, that must have the same size as DATA. Note that a zero weight corresponds to a missing value. robust : bool If true carry out a robust smoothing that minimizes the influence of outlying data. tolz : real positive scalar Termination tolerance on Z (default = 1e-3) maxiter : scalar integer Maximum number of iterations allowed (default = 100) z0 : array-like Initial value for the iterative process (default = original data) Returns ------- z : array like smoothed data To be made ---------- Estimate the confidence bands (see Wahba 1983, Nychka 1988). Reference --------- Garcia D, Robust smoothing of gridded data in one and higher dimensions with missing values. Computational Statistics & Data Analysis, 2010. http://www.biomecardio.com/pageshtm/publi/csda10.pdf Examples: -------- 1-D example >>> import matplotlib.pyplot as plt >>> x = np.linspace(0,100,2**8) >>> y = np.cos(x/10)+(x/50)**2 + np.random.randn(*x.shape)/10 >>> y[np.r_[70, 75, 80]] = np.array([5.5, 5, 6]) >>> z = smoothn(y) # Regular smoothing >>> zr = smoothn(y,robust=True) # Robust smoothing >>> h=plt.subplot(121), >>> h = plt.plot(x,y,'r.',x,z,'k',linewidth=2) >>> h=plt.title('Regular smoothing') >>> h=plt.subplot(122) >>> h=plt.plot(x,y,'r.',x,zr,'k',linewidth=2) >>> h=plt.title('Robust smoothing') 2-D example >>> xp = np.r_[0:1:.02] >>> [x,y] = np.meshgrid(xp,xp) >>> f = np.exp(x+y) + np.sin((x-2*y)*3); >>> fn = f + np.random.randn(*f.shape)*0.5; >>> fs = smoothn(fn); >>> h=plt.subplot(121), >>> h=plt.contourf(xp,xp,fn) >>> h=plt.subplot(122) >>> h=plt.contourf(xp,xp,fs) 2-D example with missing data n = 256; y0 = peaks(n); y = y0 + rand(size(y0))*2; I = randperm(n^2); y(I(1:n^2*0.5)) = NaN; lose 1/2 of data y(40:90,140:190) = NaN; create a hole z = smoothn(y); smooth data subplot(2,2,1:2), imagesc(y), axis equal off title('Noisy corrupt data') subplot(223), imagesc(z), axis equal off title('Recovered data ...') subplot(224), imagesc(y0), axis equal off title('... compared with original data') 3-D example [x,y,z] = meshgrid(-2:.2:2); xslice = [-0.8,1]; yslice = 2; zslice = [-2,0]; vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06; subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic') title('Noisy data') v = smoothn(vn); subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic') title('Smoothed data') Cardioid t = linspace(0,2*pi,1000); x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1; y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1; z = smoothn(complex(x,y)); plot(x,y,'r.',real(z),imag(z),'k','linewidth',2) axis equal tight Cellular vortical flow [x,y] = meshgrid(linspace(0,1,24)); Vx = cos(2*pi*x+pi/2).*cos(2*pi*y); Vy = sin(2*pi*x+pi/2).*sin(2*pi*y); Vx = Vx + sqrt(0.05)*randn(24,24); adding Gaussian noise Vy = Vy + sqrt(0.05)*randn(24,24); adding Gaussian noise I = randperm(numel(Vx)); Vx(I(1:30)) = (rand(30,1)-0.5)*5; adding outliers Vy(I(1:30)) = (rand(30,1)-0.5)*5; adding outliers Vx(I(31:60)) = NaN; missing values Vy(I(31:60)) = NaN; missing values Vs = smoothn(complex(Vx,Vy),'robust'); automatic smoothing subplot(121), quiver(x,y,Vx,Vy,2.5), axis square title('Noisy velocity field') subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square title('Smoothed velocity field') See also SMOOTH, SMOOTH3, DCTN, IDCTN. -- Damien Garcia -- 2009/03, revised 2010/11 Visit http://www.biomecardio.com/matlab/smoothn.html for more details about SMOOTHN ''' y = np.atleast_1d(data) sizy = y.shape noe = y.size if noe < 2: return data weightstr = 'bisquare' W = np.ones(sizy) # Smoothness parameter and weights if weight is None: pass elif isinstance(weight, str): weightstr = weight.lower() else: W = weight # Weights. Zero weights are assigned to not finite values (Inf or NaN), # (Inf/NaN values = missing data). IsFinite = np.isfinite(y) nof = IsFinite.sum() # number of finite elements W = W * IsFinite if (W < 0).any(): raise ValueError('Weights must all be >=0') else: W = W / W.max() isweighted = (W < 1).any() # Weighted or missing data? isauto = s is None # Automatic smoothing? # Creation of the Lambda tensor # Lambda contains the eingenvalues of the difference matrix used in this # penalized least squares process. d = y.ndim Lambda = np.zeros(sizy) siz0 = [1, ] * d for i in range(d): siz0[i] = sizy[i] Lambda = Lambda + \ np.cos(pi * np.arange(sizy[i]) / sizy[i]).reshape(siz0) siz0[i] = 1 Lambda = -2 * (d - Lambda) if not isauto: Gamma = 1. / (1 + s * Lambda ** 2) # Upper and lower bound for the smoothness parameter # The average leverage (h) is by definition in [0 1]. Weak smoothing occurs # if h is close to 1, while over-smoothing appears when h is near 0. Upper # and lower bounds for h are given to avoid under- or over-smoothing. See # equation relating h to the smoothness parameter (Equation #12 in the # referenced CSDA paper). N = (np.array(sizy) != 1).sum() # tensor rank of the y-array hMin = 1e-6 hMax = 0.99 sMinBnd = (((1 + sqrt(1 + 8 * hMax ** (2. / N))) / 4. / hMax ** (2. / N)) ** 2 - 1) / 16 sMaxBnd = (((1 + sqrt(1 + 8 * hMin ** (2. / N))) / 4. / hMin ** (2. / N)) ** 2 - 1) / 16 # Initialize before iterating Wtot = W # Initial conditions for z if isweighted: # With weighted/missing data # An initial guess is provided to ensure faster convergence. For that # purpose, a nearest neighbor interpolation followed by a coarse # smoothing are performed. if z0 is None: z = InitialGuess(y, IsFinite) else: # an initial guess (z0) has been provided z = z0 else: z = np.zeros(sizy) z0 = z y[~IsFinite] = 0 # arbitrary values for missing y-data tol = 1 RobustIterativeProcess = True RobustStep = 1 # Error on p. Smoothness parameter s = 10^p errp = 0.1 # Relaxation factor RF: to speedup convergence RF = 1.75 if isweighted else 1.0 norm = linalg.norm # Main iterative process while RobustIterativeProcess: # "amount" of weights (see the function GCVscore) aow = Wtot.sum() / noe # 0 < aow <= 1 exitflag = True for nit in range(1, maxiter + 1): DCTy = dctn(Wtot * (y - z) + z) if isauto and not np.remainder(np.log2(nit), 1): # The generalized cross-validation (GCV) method is used. # We seek the smoothing parameter s that minimizes the GCV # score i.e. s = Argmin(GCVscore). # Because this process is time-consuming, it is performed from # time to time (when nit is a power of 2) log10s = optimize.fminbound( gcv, np.log10(sMinBnd), np.log10(sMaxBnd), args=(aow, Lambda, DCTy, y, Wtot, IsFinite, nof, noe), xtol=errp, full_output=False, disp=False) s = 10 ** log10s Gamma = 1.0 / (1 + s * Lambda ** 2) z = RF * idctn(Gamma * DCTy) + (1 - RF) * z # if no weighted/missing data => tol=0 (no iteration) tol = norm(z0.ravel() - z.ravel()) / norm( z.ravel()) if isweighted else 0.0 if tol <= tolz: break z0 = z # re-initialization else: exitflag = False # nit<MaxIter; if robust: # -- Robust Smoothing: iteratively re-weighted process # --- average leverage h = sqrt(1 + 16 * s) h = sqrt(1 + h) / sqrt(2) / h h = h ** N # take robust weights into account Wtot = W * RobustWeights(y - z, IsFinite, h, weightstr) # re-initialize for another iterative weighted process isweighted = True tol = 1 RobustStep = RobustStep + 1 # 3 robust steps are enough. RobustIterativeProcess = RobustStep < 4 else: RobustIterativeProcess = False # stop the whole process # Warning messages if isauto: if abs(np.log10(s) - np.log10(sMinBnd)) < errp: warnings.warn('''s = %g: the lower bound for s has been reached. Put s as an input variable if required.''' % s) elif abs(np.log10(s) - np.log10(sMaxBnd)) < errp: warnings.warn('''s = %g: the Upper bound for s has been reached. Put s as an input variable if required.''' % s) if not exitflag: warnings.warn('''Maximum number of iterations (%d) has been exceeded. Increase MaxIter option or decrease TolZ value.''' % (maxiter)) if fulloutput: return z, s else: return z