Example #1
0
 def _smooth(self, z, s):
     auto_smooth = self.auto_smooth
     norm = linalg.norm
     y = self.y
     Wtot = self.Wtot
     gamma_s = self.gamma(s)
     # "amount" of weights (see the function GCVscore)
     aow = Wtot.sum() / y.size  # 0 < aow <= 1
     for nit in range(self.maxiter):
         DCTy = dctn(Wtot * (y - z) + z)
         if auto_smooth and not np.remainder(np.log2(nit + 1), 1):
             # The generalized cross-validation (GCV) method is used.
             # We seek the smoothing parameter s that minimizes the GCV
             # score i.e. s = Argmin(GCVscore).
             # Because this process is time-consuming, it is performed from
             # time to time (when nit is a power of 2)
             log10s = optimize.fminbound(self.gcv,
                                         np.log10(self.s_min),
                                         np.log10(self.s_max),
                                         args=(aow, DCTy, y, Wtot),
                                         xtol=self.errp,
                                         full_output=False,
                                         disp=False)
             s = 10**log10s
             gamma_s = self.gamma(s)
         z0 = z
         z = self.RF * idctn(gamma_s * DCTy) + (1 - self.RF) * z
         # if no weighted/missing data => tol=0 (no iteration)
         tol = norm(z0.ravel() - z.ravel()) / norm(z.ravel())
         converged = tol <= self.tolz or not self.is_weighted
         if converged:
             break
     return z, s, converged
Example #2
0
    def test_dct_and_dctn(self):
        a = np.arange(12).reshape((3, -1))

        y = wd.dct(a)
        x = wd.idct(y)
        assert_array_almost_equal(x, a)

        yn = wd.dctn(a)  # , shape=(10,), axes=(1,))
        xn = wd.idctn(yn)  # , axes=(1,))

        assert_array_almost_equal(xn, a)
Example #3
0
def gcv(p, aow, Lambda, DCTy, y, Wtot, IsFinite, nof, noe):
    # Search the smoothing parameter s that minimizes the GCV score
    s = 10 ** p
    Gamma = 1.0 / (1 + s * Lambda ** 2)
    # RSS = Residual sum-of-squares
    if aow > 0.9:  # aow = 1 means that all of the data are equally weighted
        # very much faster: does not require any inverse DCT
        RSS = linalg.norm(DCTy.ravel() * (Gamma.ravel() - 1)) ** 2
    else:
        # take account of the weights to calculate RSS:
        yhat = idctn(Gamma * DCTy)
        RSS = linalg.norm(sqrt(Wtot[IsFinite]) *
                          (y[IsFinite] - yhat[IsFinite])) ** 2

    TrH = Gamma.sum()
    GCVscore = RSS / nof / (1.0 - TrH / noe) ** 2
    return GCVscore
Example #4
0
    def gcv(self, p, aow, DCTy, y, Wtot):
        # Search the smoothing parameter s that minimizes the GCV score
        s = 10.0**p
        gamma_s = self.gamma(s)
        if aow > 0.9:
            # aow = 1 means that all of the data are equally weighted
            # very much faster: does not require any inverse DCT
            residual = DCTy.ravel() * (gamma_s.ravel() - 1)
        else:
            # take account of the weights to calculate RSS:
            is_finite = self.is_finite
            yhat = idctn(gamma_s * DCTy)
            residual = sqrt(Wtot[is_finite]) * (y[is_finite] - yhat[is_finite])

        TrH = gamma_s.sum()
        RSS = linalg.norm(residual)**2  # Residual sum-of-squares
        GCVscore = RSS / self.nof / (1.0 - TrH / y.size)**2
        return GCVscore
Example #5
0
    def _initial_guess(y, I):
        # Initial Guess with weighted/missing data
        # nearest neighbor interpolation (in case of missing values)
        z = y
        if (1 - I).any():
            notI = ~I
            z, L = distance_transform_edt(notI, return_indices=True)
            z[notI] = y[L.flat[notI]]

        # coarse fast smoothing using one-tenth of the DCT coefficients
        shape = z.shape
        d = z.ndim
        z = dctn(z)
        for k in range(d):
            z[int((shape[k] + 0.5) / 10) + 1::, ...] = 0
            z = z.reshape(np.roll(shape, -k))
            z = z.transpose(np.roll(range(d), -1))
            # z = shiftdim(z,1);
        return idctn(z)
Example #6
0
def InitialGuess(y, I):
    # Initial Guess with weighted/missing data
    # nearest neighbor interpolation (in case of missing values)
    z = y
    if (1 - I).any():
        notI = ~I
        z, L = distance_transform_edt(notI,  return_indices=True)
        z[notI] = y[L.flat[notI]]

    # coarse fast smoothing using one-tenth of the DCT coefficients
    siz = z.shape
    d = z.ndim
    z = dctn(z)
    for k in range(d):
        z[int((siz[k] + 0.5) / 10) + 1::, ...] = 0
        z = z.reshape(np.roll(siz, -k))
        z = z.transpose(np.roll(range(z.ndim), -1))
        # z = shiftdim(z,1);
    z = idctn(z)

    return z
Example #7
0
def smoothn(data, s=None, weight=None, robust=False, z0=None, tolz=1e-3,
            maxiter=100, fulloutput=False):
    '''
    SMOOTHN fast and robust spline smoothing for 1-D to N-D data.

    Parameters
    ----------
    data : array like
        uniformly-sampled data array to smooth. Non finite values (NaN or Inf)
        are treated as missing values.
    s : real positive scalar
        smooting parameter. The larger S is, the smoother the output will be.
        Default value is automatically determined using the generalized
        cross-validation (GCV) method.
    weight : string or array weights
        weighting array of real positive values, that must have the same size
        as DATA. Note that a zero weight corresponds to a missing value.
    robust  : bool
        If true carry out a robust smoothing that minimizes the influence of
        outlying data.
    tolz : real positive scalar
        Termination tolerance on Z (default = 1e-3)
    maxiter :  scalar integer
        Maximum number of iterations allowed (default = 100)
    z0 : array-like
        Initial value for the iterative process (default = original data)

    Returns
    -------
    z : array like
        smoothed data

    To be made
    ----------
    Estimate the confidence bands (see Wahba 1983, Nychka 1988).

    Reference
    ---------
    Garcia D, Robust smoothing of gridded data in one and higher dimensions
    with missing values. Computational Statistics & Data Analysis, 2010.
    http://www.biomecardio.com/pageshtm/publi/csda10.pdf

    Examples:
    --------

    1-D example
    >>> import matplotlib.pyplot as plt
    >>> x = np.linspace(0,100,2**8)
    >>> y = np.cos(x/10)+(x/50)**2 + np.random.randn(*x.shape)/10
    >>> y[np.r_[70, 75, 80]] = np.array([5.5, 5, 6])
    >>> z = smoothn(y) # Regular smoothing
    >>> zr = smoothn(y,robust=True) #  Robust smoothing
    >>> h=plt.subplot(121),
    >>> h = plt.plot(x,y,'r.',x,z,'k',linewidth=2)
    >>> h=plt.title('Regular smoothing')
    >>> h=plt.subplot(122)
    >>> h=plt.plot(x,y,'r.',x,zr,'k',linewidth=2)
    >>> h=plt.title('Robust smoothing')

     2-D example
    >>> xp = np.r_[0:1:.02]
    >>> [x,y] = np.meshgrid(xp,xp)
    >>> f = np.exp(x+y) + np.sin((x-2*y)*3);
    >>> fn = f + np.random.randn(*f.shape)*0.5;
    >>> fs = smoothn(fn);
    >>> h=plt.subplot(121),
    >>> h=plt.contourf(xp,xp,fn)
    >>> h=plt.subplot(122)
    >>> h=plt.contourf(xp,xp,fs)

     2-D example with missing data
    n = 256;
    y0 = peaks(n);
    y = y0 + rand(size(y0))*2;
    I = randperm(n^2);
    y(I(1:n^2*0.5)) = NaN;  lose 1/2 of data
    y(40:90,140:190) = NaN;  create a hole
    z = smoothn(y);  smooth data
    subplot(2,2,1:2), imagesc(y), axis equal off
    title('Noisy corrupt data')
    subplot(223), imagesc(z), axis equal off
    title('Recovered data ...')
    subplot(224), imagesc(y0), axis equal off
    title('... compared with original data')

     3-D example
    [x,y,z] = meshgrid(-2:.2:2);
    xslice = [-0.8,1]; yslice = 2; zslice = [-2,0];
    vn = x.*exp(-x.^2-y.^2-z.^2) + randn(size(x))*0.06;
    subplot(121), slice(x,y,z,vn,xslice,yslice,zslice,'cubic')
    title('Noisy data')
    v = smoothn(vn);
    subplot(122), slice(x,y,z,v,xslice,yslice,zslice,'cubic')
    title('Smoothed data')

    Cardioid

    t = linspace(0,2*pi,1000);
    x = 2*cos(t).*(1-cos(t)) + randn(size(t))*0.1;
    y = 2*sin(t).*(1-cos(t)) + randn(size(t))*0.1;
    z = smoothn(complex(x,y));
    plot(x,y,'r.',real(z),imag(z),'k','linewidth',2)
    axis equal tight

     Cellular vortical flow
    [x,y] = meshgrid(linspace(0,1,24));
    Vx = cos(2*pi*x+pi/2).*cos(2*pi*y);
    Vy = sin(2*pi*x+pi/2).*sin(2*pi*y);
    Vx = Vx + sqrt(0.05)*randn(24,24);  adding Gaussian noise
    Vy = Vy + sqrt(0.05)*randn(24,24);  adding Gaussian noise
    I = randperm(numel(Vx));
    Vx(I(1:30)) = (rand(30,1)-0.5)*5;  adding outliers
    Vy(I(1:30)) = (rand(30,1)-0.5)*5;  adding outliers
    Vx(I(31:60)) = NaN;  missing values
    Vy(I(31:60)) = NaN;  missing values
    Vs = smoothn(complex(Vx,Vy),'robust');  automatic smoothing
    subplot(121), quiver(x,y,Vx,Vy,2.5), axis square
    title('Noisy velocity field')
    subplot(122), quiver(x,y,real(Vs),imag(Vs)), axis square
    title('Smoothed velocity field')

    See also SMOOTH, SMOOTH3, DCTN, IDCTN.

    -- Damien Garcia -- 2009/03, revised 2010/11
    Visit
    http://www.biomecardio.com/matlab/smoothn.html
    for more details about SMOOTHN
    '''

    y = np.atleast_1d(data)
    sizy = y.shape
    noe = y.size
    if noe < 2:
        return data

    weightstr = 'bisquare'
    W = np.ones(sizy)
    # Smoothness parameter and weights
    if weight is None:
        pass
    elif isinstance(weight, str):
        weightstr = weight.lower()
    else:
        W = weight

    # Weights. Zero weights are assigned to not finite values (Inf or NaN),
    # (Inf/NaN values = missing data).
    IsFinite = np.isfinite(y)
    nof = IsFinite.sum()  # number of finite elements
    W = W * IsFinite
    if (W < 0).any():
        raise ValueError('Weights must all be >=0')
    else:
        W = W / W.max()

    isweighted = (W < 1).any()  # Weighted or missing data?
    isauto = s is None  # Automatic smoothing?
    # Creation of the Lambda tensor
    # Lambda contains the eingenvalues of the difference matrix used in this
    # penalized least squares process.
    d = y.ndim
    Lambda = np.zeros(sizy)
    siz0 = [1, ] * d
    for i in range(d):
        siz0[i] = sizy[i]
        Lambda = Lambda + \
            np.cos(pi * np.arange(sizy[i]) / sizy[i]).reshape(siz0)
        siz0[i] = 1

    Lambda = -2 * (d - Lambda)
    if not isauto:
        Gamma = 1. / (1 + s * Lambda ** 2)

    # Upper and lower bound for the smoothness parameter
    # The average leverage (h) is by definition in [0 1]. Weak smoothing occurs
    # if h is close to 1, while over-smoothing appears when h is near 0. Upper
    # and lower bounds for h are given to avoid under- or over-smoothing. See
    # equation relating h to the smoothness parameter (Equation #12 in the
    # referenced CSDA paper).
    N = (np.array(sizy) != 1).sum()  # tensor rank of the y-array
    hMin = 1e-6
    hMax = 0.99
    sMinBnd = (((1 + sqrt(1 + 8 * hMax ** (2. / N))) / 4. /
                hMax ** (2. / N)) ** 2 - 1) / 16
    sMaxBnd = (((1 + sqrt(1 + 8 * hMin ** (2. / N))) / 4. /
                hMin ** (2. / N)) ** 2 - 1) / 16

    # Initialize before iterating

    Wtot = W
    # Initial conditions for z
    if isweighted:
        # With weighted/missing data
        # An initial guess is provided to ensure faster convergence. For that
        # purpose, a nearest neighbor interpolation followed by a coarse
        # smoothing are performed.

        if z0 is None:
            z = InitialGuess(y, IsFinite)
        else:
            # an initial guess (z0) has been provided
            z = z0
    else:
        z = np.zeros(sizy)
    z0 = z
    y[~IsFinite] = 0  # arbitrary values for missing y-data

    tol = 1
    RobustIterativeProcess = True
    RobustStep = 1

    # Error on p. Smoothness parameter s = 10^p
    errp = 0.1

    # Relaxation factor RF: to speedup convergence
    RF = 1.75 if isweighted else 1.0

    norm = linalg.norm
    # Main iterative process
    while RobustIterativeProcess:
        # "amount" of weights (see the function GCVscore)
        aow = Wtot.sum() / noe  # 0 < aow <= 1
        exitflag = True
        for nit in range(1, maxiter + 1):
            DCTy = dctn(Wtot * (y - z) + z)
            if isauto and not np.remainder(np.log2(nit), 1):

                # The generalized cross-validation (GCV) method is used.
                # We seek the smoothing parameter s that minimizes the GCV
                # score i.e. s = Argmin(GCVscore).
                # Because this process is time-consuming, it is performed from
                # time to time (when nit is a power of 2)
                log10s = optimize.fminbound(
                    gcv, np.log10(sMinBnd), np.log10(sMaxBnd),
                    args=(aow, Lambda, DCTy, y, Wtot, IsFinite, nof, noe),
                    xtol=errp, full_output=False, disp=False)
                s = 10 ** log10s
                Gamma = 1.0 / (1 + s * Lambda ** 2)
            z = RF * idctn(Gamma * DCTy) + (1 - RF) * z

            # if no weighted/missing data => tol=0 (no iteration)
            tol = norm(z0.ravel() - z.ravel()) / norm(
                z.ravel()) if isweighted else 0.0
            if tol <= tolz:
                break
            z0 = z  # re-initialization
        else:
            exitflag = False  # nit<MaxIter;

        if robust:
            # -- Robust Smoothing: iteratively re-weighted process
            # --- average leverage
            h = sqrt(1 + 16 * s)
            h = sqrt(1 + h) / sqrt(2) / h
            h = h ** N
            # take robust weights into account
            Wtot = W * RobustWeights(y - z, IsFinite, h, weightstr)
            # re-initialize for another iterative weighted process
            isweighted = True
            tol = 1
            RobustStep = RobustStep + 1
            # 3 robust steps are enough.
            RobustIterativeProcess = RobustStep < 4
        else:
            RobustIterativeProcess = False  # stop the whole process

    # Warning messages
    if isauto:
        if abs(np.log10(s) - np.log10(sMinBnd)) < errp:
            warnings.warn('''s = %g: the lower bound for s has been reached.
            Put s as an input variable if required.''' % s)
        elif abs(np.log10(s) - np.log10(sMaxBnd)) < errp:
            warnings.warn('''s = %g: the Upper bound for s has been reached.
            Put s as an input variable if required.''' % s)

    if not exitflag:
        warnings.warn('''Maximum number of iterations (%d) has been exceeded.
        Increase MaxIter option or decrease TolZ value.''' % (maxiter))
    if fulloutput:
        return z, s
    else:
        return z