Python profiler Examples

Programming Language: Python

Namespace/Package Name: common

Method/Function: profiler

Examples at hotexamples.com: 12

Python profiler - 12 examples found. These are the top rated real world Python examples of common.profiler extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: filemngmnt.py Project: arnaldorusso/klib

def load_dataset(path, pattern='(.*)', ftype='xy', delimiter='\t',
                 var_from_name=False, masked=False, xlim=None, ylim=None):
    """Loads an entire dataset.

    It uses the numpy.loadtxt function and therefore accepts regular
    ASCII files or GZIP compressed ones.

    PARAMETERS
        path (string) :
            The path in which the data files are located.
        pattern (string, optional) :
            Regular expression pattern correspondig to valid file names
            to be loaded.
        ftype (string, optional) :
            Specifies the file type that is loaded. The accepted values
            are 'xy', 'xt' and 'ty'.

            For 'xy', or map, files, the first line contains the
            longitude coordinates, the first column contains the
            latitude coordinates and the rest contains the data in
            matrix style. If var_from_name is set to True, it assumes
            that the time is given at the upper left cell.

            For 'xt', or zonal-temporal, files, the first line contains
            the longitude coordinates, the first column contains the
            time and the rest contains the data in matrix style. If
            var_from_name is set to True, it assumes that the latitude
            is given at the upper left cell.

            For 'ty', or temporal-meridional, files, the first line
            contains the time, the first column contains the longitude
            and the rest contains the data in matrix style. If
            var_from_name is set to True, it assumes that the latitude
            is given at the upper left cell.
        delimiter (string, optional) :
            Specifies the data delimiter used while loading the data.
            The default value is '\t' (tab)
        var_from_name (boolean, optional) :
            If set to true, it tries to infer eather the time, latitude
            or longitude from the first match in pattern according to
            the chosen file type. If set to true, the pattern has to be
            set in such a way that the last matches contain the value
            and the hemisphere ('N', 'S', 'E' or 'W') if appropriate.
        masked (boolean, optional) :
            Returnes masked array. Default is False.
        xlim, ylim (array like, optional) :
            List containing the upper and lower zonal and meridional
            limits, respectivelly.
            

    RETURNS
        lon (array like) :
            Longitude.
        lat (array like) :
            Latitude.
        t (array like) :
            Time.
        z (array like) :
            Loaded variable.

    """
    t0 = 0
    t1 = time()

    s = 'Loading data...'
    os.sys.stdout.write(s)
    os.sys.stdout.flush()

    # Generates list of files and tries to match them to the pattern
    flist = os.listdir(path)
    flist, match = common.reglist(flist, pattern)

    # Loads all the data from the file list
    N = len(flist)
    if N == 0:
        raise Warning, 'No files to be loaded.'
    Lon, Lat, Tm, Z, Sh = [], [], [], [], []
    i = 0
    for n, fname in enumerate(flist):
        t2 = time()

        dat = numpy.loadtxt('%s/%s' % (path, fname), delimiter=delimiter)
        if ftype == 'xy':
            lon = dat[0, 1:]
            lat = dat[1:, 0]
            tm = dat[0, 0]
        elif ftype == 'xt':
            lon = dat[0, 1:]
            lat = dat[0, 0]
            tm = dat[1:, 0]
        elif ftype == 'ty':
            lon = dat[0, 0]
            lat = dat[1:, 0]
            tm = dat[0, 1:]
        z = dat[1:, 1:]

        if var_from_name:
            if (ftype == 'xt') | (ftype == 'ty'):
                var = atof(match[n][-2])    # Gets coordinate out of match ...
                rav = match[n][-1].upper()  # ... and also its hemisphere.
                if (rav == 'S' | rav == 'W'):
                    var *= -1
                if ftype == 'xt':
                    lat = var
                else:
                    lon = var
            elif ftype == 'xy':
                tm = atof(match[n][-1])     # Gets time out of last match.

        Lon.append(numpy.asarray(lon))
        Lat.append(numpy.asarray(lat))
        Tm.append(numpy.asarray(tm))
        Z.append(numpy.asarray(z))
        Sh.append(numpy.asarray(z.shape))

        os.sys.stdout.write(len(s) * '\b')
        s = 'Loading data (%s)... %s ' % (fname, common.profiler(N, n + 1, t0,
            t1, t2),)
        os.sys.stdout.write(s)
        os.sys.stdout.flush()
    #
    os.sys.stdout.write('\n')

    # Reshaping and rearranging the arrays to form an uniform data matrix.
    t1 = time()
    s = 'Reshaping arrays...'
    os.sys.stdout.write(s)
    os.sys.stdout.flush()

    try:
        lon = numpy.unique(numpy.concatenate(Lon))
    except:
        lon = numpy.unique(numpy.asarray(Lon))
    try:
        lat = numpy.unique(numpy.concatenate(Lat))
    except:
        lat = numpy.unique(numpy.asarray(Lat))
    try:
        tm = numpy.unique(numpy.concatenate(Tm))
    except:
        tm = numpy.unique(numpy.asarray(Tm))
    if numpy.isnan(tm).all():
        tm = numpy.array([numpy.nan])
    #elif ftype == 'ty':
    #    raise Warning, 'Loading of temporal-meridional files not implemented yet.'
    #
    dx = numpy.diff(lon).mean()
    dy = numpy.diff(lat).mean()
    dt = numpy.diff(tm).mean()

    # To ensure that the edges are padded with NaN's to avoid distortions when
    # generating maps.
    lon = numpy.concatenate([[lon[0] - dx], lon, [lon[-1] + dx]])
    lat = numpy.concatenate([[lat[0] - dy], lat, [lat[-1] + dy]])

    a, b, c = lon.size, lat.size, tm.size
    if masked:
        z = numpy.ma.empty([c, b, a], dtype=float) * numpy.nan
    else:
        z = numpy.empty([c, b, a], dtype=float) * numpy.nan

    for n in range(N):
        t2 = time()

        if ftype == 'xt':
            i = [pylab.find(lon == x)[0] for x in Lon[n]]
            j = pylab.find(lat == Lat[n])[0]
            z[:, j, i] = Z[n]
        elif ftype == 'xy':
            i = [pylab.find(lon == x)[0] for x in Lon[n]]
            j = [pylab.find(lat == y)[0] for y in Lat[n]]
            if numpy.isnan(Tm[n]) :
                k = pylab.find(numpy.isnan(tm))[0]
            else:
                k = pylab.find(tm == Tm[n])
            i, j = numpy.meshgrid(i, j)
            z[k, j, i] = Z[n]
        elif ftype == 'ty':
            i = pylab.find(lon == Lon[n])[0]
            j = [pylab.find(lat == y)[0] for y in Lat[n]]
            i, j = numpy.meshgrid(i, j)
            z[:, j, i] = Z[n]

        os.sys.stdout.write(len(s) * '\b')
        s = 'Reshaping arrays... %s ' % (common.profiler(N, n + 1, t0, t1, t2))
        os.sys.stdout.write(s)
        os.sys.stdout.flush()
    #
    os.sys.stdout.write('\n')
    
    # Finds the upper and lower zonal and meridional limits to return only the
    # selected ranges.
    if masked and ((xlim != None) or (ylim != None)):
        if xlim != None:
            xsel = pylab.find((lon < min(xlim)) | (lon > max(xlim)))
        else:
            xsel = range(a)
        if ylim != None:
            ysel = pylab.find((lat < min(ylim)) | (lat > max(ylim)))
        else:
            ysel = range(b)
        xsel, ysel = numpy.meshgrid(xsel, ysel)
        z[:, ysel, xsel] = numpy.nan
    else:
        if xlim != None:
            xsel = pylab.find((lon >= min(xlim)) & (lon <= max(xlim)))
        else:
            xsel = range(a)
        if ylim != None:
            ysel = pylab.find((lat >= min(ylim)) & (lat <= max(ylim)))
        else:
            ysel = range(b)
        lon = lon[xsel]
        lat = lat[ysel]
        xsel, ysel = numpy.meshgrid(xsel, ysel)
        z = z[:, ysel, xsel]

    if c == 1:
        z = z[0, :, :]
    if masked:
        z.mask = numpy.isnan(z)
        z.data[z.mask] = 0

    return lon, lat, tm, z

Example #2

Show file

File: file.py Project: arnaldorusso/klib

def load_dataset(path, pattern='(.*)', ftype='xy', flist=None, delimiter='\t',
                 var_from_name=False, masked=False, xlim=None, ylim=None, 
                 lon=None, lat=None, tm=None, topomask=None, verbose=False):
    """Loads an entire dataset.

    It uses the numpy.loadtxt function and therefore accepts regular
    ASCII files or GZIP compressed ones.

    PARAMETERS
        path (string) :
            The path in which the data files are located.
        pattern (string, optional) :
            Regular expression pattern correspondig to valid file names
            to be loaded.
        ftype (string, optional) :
            Specifies the file type that is loaded. The accepted values
            are 'xy', 'xt' and 'ty'.

            For 'xy', or map, files, the first line contains the
            longitude coordinates, the first column contains the
            latitude coordinates and the rest contains the data in
            matrix style. If var_from_name is set to True, it assumes
            that the time is given at the upper left cell.

            For 'xt', or zonal-temporal, files, the first line contains
            the longitude coordinates, the first column contains the
            time and the rest contains the data in matrix style. If
            var_from_name is set to True, it assumes that the latitude
            is given at the upper left cell.

            For 'ty', or temporal-meridional, files, the first line
            contains the time, the first column contains the longitude
            and the rest contains the data in matrix style. If
            var_from_name is set to True, it assumes that the latitude
            is given at the upper left cell.
        flist (array like, optional) :
            Lists the files to be loaded in path. If set, it ignores the
            pattern.
        delimiter (string, optional) :
            Specifies the data delimiter used while loading the data.
            The default value is '\t' (tab)
        var_from_name (boolean, optional) :
            If set to true, it tries to infer eather the time, latitude
            or longitude from the first match in pattern according to
            the chosen file type. If set to true, the pattern has to be
            set in such a way that the last matches contain the value
            and the hemisphere ('N', 'S', 'E' or 'W') if appropriate.
        masked (boolean, optional) :
            Returnes masked array. Default is False.
        xlim, ylim (array like, optional) :
            List containing the upper and lower zonal and meridional
            limits, respectivelly.
        lon, lat, tm (array like, optional):
        topomask (string, optional) :
            Topography mask.
        verbose (boolean, optional) :
            If set to true, does not print anything on screen.
            

    RETURNS
        lon (array like) :
            Longitude.
        lat (array like) :
            Latitude.
        t (array like) :
            Time.
        z (array like) :
            Loaded variable.

    """
    t0 = time()
    
    if topomask != None:
        masked = True

    S = 'Preparing data'
    s = '%s...' % (S)
    if not verbose:
        os.sys.stdout.write(s)
        os.sys.stdout.flush()

    # Generates list of files and tries to match them to the pattern
    if flist == None:
        flist = os.listdir(path)
        flist, match = common.reglist(flist, pattern)
    
    # Loads all the data from file list to create arrays
    N = len(flist)
    if N == 0:
        raise Warning, 'No files to be loaded.'

    # Initializes the set of array limits
    Lon = set()
    Lat = set()
    Tm = set()
    # Walks through the file loading process twice. At the first step loads
    # all the files to get all the geographical and temporal boundaries. At the
    # second step, reloads all files and fits them to the initialized data 
    # arrays
    for step in range(2):
        t1 = time()
        for n, fname in enumerate(flist):
            t2 = time()
            
            if (lon != None) and (lat != None) and (tm !=None):
                continue

            x, y, t, z = load_map('%s/%s' % (path, fname), ftype=ftype,
                delimiter=delimiter, lon=lon, lat=lat, tm=tm, masked=masked,
                topomask=topomask)

            if var_from_name:
                if (ftype == 'xt') | (ftype == 'ty'):
                    var = atof(match[n][-2])      # Gets coordinate out of ...
                    rav = match[n][-1].upper()    # ... match and also its ...
                    if (rav == 'S' | rav == 'W'): # ... hemisphere.
                        var *= -1
                    if ftype == 'xt':
                        y = var
                    else:
                        x = var
                elif ftype == 'xy':
                    t = atof(match[n][-1])       # Gets time out of last match.
            
            if numpy.isnan(t).all():
                t = 0
            
            if type(x).__name__ in ['int', 'long', 'float', 'float64']:
                x = [x]
            if type(y).__name__ in ['int', 'long', 'float', 'float64']:
                y = [y]
            if type(t).__name__ in ['int', 'long', 'float', 'float64']:
                t = [t]
            
            ###################################################################
            # FIRST STEP
            ###################################################################
            if step == 0:
                Lon.update(x)
                Lat.update(y)
                Tm.update(t)
            ###################################################################
            # SECOND STEP
            ###################################################################
            elif step == 1:
                selx = [pylab.find(Lon == i)[0] for i in x]
                sely = [pylab.find(Lat == i)[0] for i in y]
                selt = [pylab.find(Tm == i)[0] for i in t]
                
                i, j, k = common.meshgrid2(selx, sely, selt)
                
                if ftype == 'xt':
                    a, b, c = i.shape
                    z = z.reshape((a, 1, c))
                
                # Makes sure only to overwrite values not previously assigned.
                if masked:
                    Z[k, j, i] = numpy.ma.where(~Z[k, j, i].mask, 
                        Z[k, j, i], z)
                else:
                    Z[k, j, i] = numpy.where(~numpy.isnan(Z[k, j, i]), 
                        Z[k, j, i], z)
            
            ###################################################################
            # PROFILING
            ###################################################################
            if not verbose:
                os.sys.stdout.write(len(s) * '\b')
            s = '%s (%s)... %s ' % (S, fname, common.profiler(N, n + 1, t0, t1,
                t2))
            if not verbose:
                os.sys.stdout.write(s)
                os.sys.stdout.flush()
        #
        if not verbose:
            os.sys.stdout.write('\n')
    
        # Now creates data array based on input parameters xlim, ylim and
        # the loaded coordinate sets.
        if step == 0:
            if lon == None:
                Lon = numpy.asarray(list(Lon))
            else:
                Lon = lon
            if lat == None:
                Lat = numpy.asarray(list(Lat))
            else:
                Lat = lat
            if tm == None:
                Tm = numpy.asarray(list(Tm))
            else:
                Tm = tm
            
            Lon.sort()
            Lat.sort()
            Tm.sort()

            # Makes sure that all the coordinates are continuous, equally
            # spaced and that they are inside the coordinate limits.
            dx, dy, dt = numpy.diff(Lon), numpy.diff(Lat), numpy.diff(Tm)
            
            if len(dx) == 0: dx = numpy.array([1.])
            if len(dy) == 0: dy = numpy.array([1.])
            if len(dt) == 0: dt = numpy.array([1.])
            
            #if ((not (dx == dx[0]).all()) or (not (dy == dy[0]).all()) or 
            #    (not (dt == dt[0]).all())):
            #    raise Warning, 'One or more coordinates are not evenly spaced.'
            
            dx = dx[0]
            dy = dy[0]
            dt = dt[0]
            
            if xlim == None:
                xlim = [Lon.min(), Lon.max()]
            if ylim == None:
                ylim = [Lat.min(), Lat.max()]

            selx = pylab.find((Lon >= min(xlim)) & (Lon <= max(xlim)))
            Lon = Lon[selx]
            sely = pylab.find((Lat >= min(ylim)) & (Lat <= max(ylim)))
            Lat = Lat[sely]
            
            # Pads edges with NaN's to avoid distortions when generating maps.
            if lon == None:
                Lon = numpy.concatenate([[Lon[0] - dx], Lon, [Lon[-1] + dx]])
            if lat == None:
                Lat = numpy.concatenate([[Lat[0] - dy], Lat, [Lat[-1] + dy]])
            
            # Initializes data arrays
            a, b, c = Lon.size, Lat.size, Tm.size
            if masked:
                Z = numpy.ma.empty([c, b, a], dtype=float) * numpy.nan
                Z.mask = True
            else:
                Z = numpy.empty([c, b, a], dtype=float) * numpy.nan
            lon, lat = numpy.array(Lon), numpy.array(Lat)
            
            # Now everything might be ready for the second step in the loop,
            # filling in the data array.
            S, s = 'Loading data', ''

    # Interpolates topography into data grid.
    if topomask != None:
        if not verbose:
            print 'Masking topographic features...'
        ezi, _, _ = interpolate.nearest([common.etopo.x, 
            common.etopo.y], common.etopo.z, [Lon, Lat])
        if topomask == 'ocean':
            tmask = (ezi > 0)
        elif topomask == 'land':
            tmask = (ezi < 0)
        #
        tmask = tmask.reshape([1, b, a])
        tmask = tmask.repeat(c, axis=0)
        #
        Z.mask = Z.mask | tmask

    if masked:
        Z.mask = Z.mask | numpy.isnan(Z.data)
        Z.data[Z.mask] = 0
    
    return Lon, Lat, Tm, Z

Example #3

Show file

File: filemngmnt.py Project: arnaldorusso/klib

def save_dataset(lon, lat, tm, z, path, fname=None, prefix='', fmt='%.3f'):
    """Saves an entire dataset of maps to files.

    Function accepts only three-dimensional data variables, for now.

    PARAMTERS
        lon, lat (array like) :
            Longitude and latitude coordinates.
        tm (floag) :
            Time or other relevant information (i.e. period) to append
            to the upper left cell.
        z (array like) :
            Variable data.
        path (string) :
            Path to the dataset directory.
        fnames (string, array like, optional) :
            Forces the file name of the data. If omitted then the
            default 'xy%s_%d' % (prefix, tm[i]), where i is a counter
            starting at zero.
        prefix (string, optional) :
                Prefix to retain naming conventions such as basin.
        fmt (string, optional) :
            Format string for the values saved in the map. Default is a
            floating point number with three digits precision ('%.3f').

    OUTPUTS
        Saved map files to directory specified in path.

    RETURNS
        Nothing.

    """
    t1 = time()

    c, b, a = z.shape
    if lon.size != a:
        raise Warning, 'Longitude and data lengths do not match.'
    if lat.size != b:
        raise Warning, 'Latitude and data lengths do not match.'
    if tm.size != c:
        raise Warning, 'Time and data lengths do not match.'

    if type(fname).__name__ == 'str':
        fname = ['%s%d' % (fname, i) for i in range(c)]
    elif type(fname).__name__ in ['list', 'tuple', 'ndarray']:
        C = len(fname)
        if c > C:
            for i in range(int(numpy.ceil(float(c) / C))):
                for j in range(C):
                    fname = '%s%d' % (fname[j], i)
    else:
        fname = ['xy%s_%06d' % (prefix, tm[i]) for i in range(c)]

    # Starts saving the maps to gziped files.
    if c == 1:
        plural = ''
    else:
        plural = 's'
    s = 'Saving %d file%s... ' % (c, plural)
    os.sys.stdout.write(s)
    os.sys.stdout.flush()
    for i in range(c):
        t2 = time()

        s = '%s/%s.gz' % (path, fname[i])
        save_map(lon, lat, z[i, :, :], s, tm[i], fmt)

        os.sys.stdout.write(len(s) * '\b')
        s = 'Saving %d file%s... %s ' % (c, plural, common.profiler(c, i + 1, 
            0, t1, t2),)
        os.sys.stdout.write(s)
        os.sys.stdout.flush()
    #
    os.sys.stdout.write('\n')

Example #4

Show file

File: stats.py Project: florianboergel/klib

def wavelet_analysis(z,
                     tm,
                     lon=None,
                     lat=None,
                     mother='Morlet',
                     alpha=0.0,
                     siglvl=0.95,
                     loc=None,
                     onlyloc=False,
                     periods=None,
                     sel_periods=[],
                     show=False,
                     save='',
                     dsave='',
                     prefix='',
                     labels=dict(),
                     title=None,
                     name=None,
                     fpath='',
                     fpattern='',
                     std=dict(),
                     crange=None,
                     levels=None,
                     cmap=cm.GMT_no_green,
                     debug=False):
    """Continuous wavelet transform and significance analysis.

    The analysis is made using the methodology and statistical approach
    suggested by Torrence and Compo (1998).

    Depending on the dimensions of the input array, three different
    kinds of approaches are taken. If the input array is one-dimensional
    then only a simple analysis is performed. If the array is
    bi- or three-dimensional then spectral Hovmoller diagrams are drawn
    for each Fourier period given within a range of +/-25%.

    PARAMETERS
        z (array like) :
            Input data. The data array should have one of these forms,
            z[tm], z[tm, lat] or z[tm, lat, lon].
        tm (array like) :
            Time axis. It should contain values in matplotlib date
            format (i.e. number of days since 0001-01-01 UTC).
        lon (array like, optional) :
            Longitude.
        lat (array like, optional) :
            Latitude.
        mother (string, optional) :
            Gives the name of the mother wavelet to be used. Possible
            values are 'Morlet' (default), 'Paul' or 'Mexican hat'.
        alpha (float or dictionary, optional) :
            Lag-1 autocorrelation for background noise.  Default value
            is 0.0 (white noise). If different autocorrelation
            coefficients should be used for different locations, then
            the input should contain a dictionary with 'lon', 'lat',
            'map' keys as for the std parameter.
        siglvl (float, optional) :
            Significance level. Default value is 0.95.
        loc (array like, optional) :
            Special locations of interest. If the input array is of
            higher dimenstions, the output of the simple wavelet
            analysis of each of the locations is output. The list
            should contain the pairs of (lon, lat) for each locations
            of interest.
        onlyloc (boolean, optional) :
            If set to true then only the specified locations are
            analysed. The default is false.
        periods (array like, optional) :
            Special Fourier periods of interest in case of analysis of
            higher dimensions (in years).
        sel_periods (array like, optional) :
            Select which Fourier periods spectral power are averaged.
        show (boolean, optional) :
            If set to true the the resulting maps are shown on screen.
        save (string, optional) :
            The path in which the resulting plots are to be saved. If
            not set, then no images will be saved.
        dsave (string, optional) :
            If set, saves the scale averaged power spectrum series to
            this path. This is especially useful if memory is an issue.
        prefix (string, optional) :
            Prefix to retain naming conventions such as basin.
        labels (dictionary, optional) :
            Sets the labels for the plot axis.
        title (string, array like, optional) :
            Title of each of the selected periods.
        name (string, array like, optional) :
            Name of each of the selected periods. Used when saving the
            results to files.
        fpath (string, optional) :
            Path for the source files to be loaded when memory issues
            are a concern.
        fpattern (string, optional) :
            Regular expression pattern to match file names.
        std (dictionary, optional) :
            A dictionary containing a map of the standard deviation of
            the analysed time series. To set the longitude and latitude
            coordinates of the map, they should be included as
            separate 'lon' and 'lat' key items. If they are omitted,
            then the regular input parameters are assumed. Accepted
            standard deviation error is set in key 'err' (default value
            is 1e-2).
        crange (array like, optional) :
            Array of power levels to be used in average Hovmoler colour bar.
        levels (array like, optional) :
            Array of power levels to be used in spectrogram colour bar.
        cmap (colormap, optional) :
            Sets the colour map to be used in the plots. The default is
            the Generic Mapping Tools (GMT) no green.
        debug (boolean, optional) :
            If set to True then warnings are shown.

    OUTPUT
        If show or save are set, plots either on screen and or on file
        according to the specified parameters.

        If dsave parameter is set, also saves the scale averaged power
        series to files.

    RETURNS
        wave (dictionary) :
            Dictionary containing the resulting calculations from the
            wavelet analysis according to the input parameters. The
            output items might be:
                scale --
                    Wavelet scales.
                period --
                    Equivalent Fourier periods (in days).
                power_spectrum --
                    Wavelet power spectrum (in units**2).
                power_significance --
                    Relative significance of the power spectrum.
                global_power --
                    Global wavelet power spectrum (in units**2).
                scale_spectrum  --
                    Scale averaged wavelet spectra (in units**2)
                    according to selected periods.
                scale_significance --
                    Relative significance of the scale averaged wavelet
                    spectra.
                fft --
                    Fourier spectrum.
                fft_first --
                    Fourier spectrum of the first half of the
                    time-series.
                fft_second --
                    Fourier spectrum of the second half of the
                    time-series.
                fft_period --
                    Fourier periods (in days).
                trend --
                    Signal trend (in units/yr).
                wavelet_trend --
                    Wavelet spectrum trends (in units**2/yr).

    """
    t1 = time()
    result = {}

    # Resseting unit labels for hovmoller plots
    hlabels = dict(labels)
    hlabels['units'] = ''

    # Setting some titles and paths
    if name == None:
        name = title

    # Working with the std parameter and setting its properties:
    if 'val' in std.keys():
        if 'lon' not in std.keys():
            std['lon'] = lon
        std['lon180'] = common.lon180(std['lon'])
        if 'lat' not in std.keys():
            std['lat'] = lat
        if 'err' not in std.keys():
            std['err'] = 1e-2
        std['map'] = True
    else:
        std['map'] = False

    # Lag-1 autocorrelation parameter
    if type(alpha).__name__ == 'dict':
        if 'lon' not in alpha.keys():
            alpha['lon'] = lon
        alpha['lon180'] = common.lon180(alpha['lon'])
        if 'lat' not in alpha.keys():
            alpha['lat'] = lat
        alpha['mean'] = alpha['val'].mean()
        alpha['map'] = True
        alpha['calc'] = False
    else:
        if alpha == -1:
            alpha = {'mean': -1, 'calc': True}
        else:
            alpha = {'val': alpha, 'mean': alpha, 'map': False, 'calc': False}

    # Shows some of the options on screen.
    print('Average Lag-1 autocorrelation for background noise: %.2f' %
          (alpha['mean']))
    if save:
        print 'Saving result figures in \'%s\'.' % (save)
    if dsave:
        print 'Saving result data in \'%s\'.' % (dsave)

    if fpath:
        # Gets the list of files to be loaded individually extracts all the
        # latitudes and loads the first file to get the main parameters.
        flist = os.listdir(fpath)
        flist, match = common.reglist(flist, fpattern)
        if len(flist) == 0:
            raise Warning, 'No files matched search pattern.'
        flist = numpy.asarray(flist)
        lst_lat = []
        for item in match:
            y = string.atof(item[-2])
            if item[-1].upper() == 'S': y *= -1
            lst_lat.append(y)
        # Detect file type from file name
        ftype = fm.detect_ftype(flist[0])
        x, y, tm, z = fm.load_map('%s/%s' % (fpath, flist[0]),
                                  ftype=ftype,
                                  masked=True)
        if lon == None:
            lon = x
        lat = numpy.unique(lst_lat)
        dim = 2
    else:
        # Transforms input arrays in numpy arrays and numpy masked arrays.
        tm = numpy.asarray(tm)
        z = numpy.ma.asarray(z)
        z.mask = numpy.isnan(z)

        # Determines the number of dimensions of the variable to be plotted and
        # the sizes of each dimension.
        a = b = c = None
        dim = len(z.shape)
        if dim == 3:
            c, b, a = z.shape
        elif dim == 2:
            c, a = z.shape
            b = 1
            z = z.reshape(c, b, a)
        else:
            c = z.shape[0]
            a = b = 1
            z = z.reshape(c, b, a)
        if tm.size != c:
            raise Warning, 'Time and data lengths do not match.'

    # Transforms coordinate arrays into numpy arrays
    s = type(lat).__name__
    if s in ['int', 'float', 'float64']:
        lat = numpy.asarray([lat])
    elif s != 'NoneType':
        lat = numpy.asarray(lat)
    s = type(lon).__name__
    if s in ['int', 'float', 'float64']:
        lon = numpy.asarray([lon])
    elif s != 'NoneType':
        lon = numpy.asarray(lon)

    # Starts the mother wavelet class instance and determines important
    # analysis parameters
    mother = mother.lower()
    if mother == 'morlet':
        mother = wavelet.Morlet()
    elif mother == 'paul':
        mother = wavelet.Paul()
    elif mother in ['mexican hat', 'mexicanhat', 'mexican_hat']:
        mother = wavelet.Mexican_hat()
    else:
        raise Warning, 'Mother wavelet unknown.'

    t = tm / common.daysinyear  # Time array in years
    dt = tm[1] - tm[0]  # Temporal sampling interval
    try:  # Zonal sampling interval
        dx = lon[1] - lon[0]
    except:
        dx = 1
    try:  # Meridional sampling interval
        dy = lat[1] - lat[0]
    except:
        dy = dx
    if numpy.isnan(dt): dt = 1
    if numpy.isnan(dx): dx = 1
    if numpy.isnan(dy): dy = dx
    dj = 0.25  # Four sub-octaves per octave
    s0 = 2 * dt  # Smallest scale
    J = 7 / dj - 1  # Seven powers of two with dj sub-octaves
    scales = period = None

    if type(crange).__name__ == 'NoneType':
        crange = numpy.arange(0, 1.1, 0.1)
    if type(levels).__name__ == 'NoneType':
        levels = 2.**numpy.arange(-3, 6)

    if fpath:
        N = lat.size
        # TODO: refactoring # lon = numpy.arange(-81. - dx / 2., 290. + dx / 2, dx)
        # TODO: refactoring # lat = numpy.unique(numpy.asarray(lst_lat))
        c, b, a = tm.size, lat.size, lon.size
    else:
        N = a * b

    # Making sure that the longitudes range from -180 to 180 degrees and
    # setting the squared search radius R2.
    try:
        lon180 = common.lon180(lon)
    except:
        lon180 = None
    R2 = dx**2 + dy**2
    if numpy.isnan(R2):
        R2 = 65535.
    if loc != None:
        loc = numpy.asarray([[common.lon180(item[0]), item[1]]
                             for item in loc])

    # Initializes important result variables such as the global wavelet power
    # spectrum map, scale avaraged spectrum time-series and their significance,
    # wavelet power trend map.
    global_power = numpy.ma.empty([J + 1, b, a]) * numpy.nan
    try:
        C = len(periods) + 1
        dT = numpy.diff(periods)
        pmin = numpy.concatenate([[periods[0] - dT[0] / 2],
                                  0.5 * (periods[:-1] + periods[1:])])
        pmax = numpy.concatenate(
            [0.5 * (periods[:-1] + periods[1:]), [periods[-1] + dT[-1] / 2]])
    except:
        # Sets the lowest period to null and the highest to half the time
        # series length.
        C = 1
        pmin = numpy.array([0])
        pmax = numpy.array([(tm[-1] - tm[0]) / 2])
    if type(sel_periods).__name__ in ['int', 'float']:
        sel_periods = [sel_periods]
    elif len(sel_periods) == 0:
        sel_periods = [-1.]
    try:
        if fpath:
            raise Warning, 'Process files individually'
        avg_spectrum = numpy.ma.empty([C, c, b, a]) * numpy.nan
        mem_error = False
    except:
        avg_spectrum = numpy.ma.empty([C, c, a]) * numpy.nan
        mem_error = True
    avg_spectrum_signif = numpy.ma.empty([C, b, a]) * numpy.nan
    trend = numpy.ma.empty([b, a]) * numpy.nan
    wavelet_trend = numpy.ma.empty([C, b, a]) * numpy.nan
    fft_trend = numpy.ma.empty([C, b, a]) * numpy.nan
    std_map = numpy.ma.empty([b, a]) * numpy.nan
    zero = numpy.ma.empty([c, a])
    fft_spectrum = None
    fft_spectrum1 = None
    fft_spectrum2 = None

    # Walks through each latitude and then through each longitude to perform
    # the temporal wavelet analysis.
    if N == 1:
        plural = ''
    else:
        plural = 's'
    s = 'Spectral analysis of %d location%s... ' % (N, plural)
    stdout.write(s)
    stdout.flush()
    for j in range(b):
        t2 = time()
        isloc = False  # Ressets 'is special location' flag
        hloc = []  # Cleans location list for Hovmoller plots
        zero *= numpy.nan
        if mem_error:
            # Clears average spectrum for next step.
            avg_spectrum *= numpy.nan
            avg_spectrum.mask = False
        if fpath:
            findex = pylab.find(lst_lat == lat[j])
            if len(findex) == 0:
                continue
            ftype = fm.detect_ftype(flist[findex[0]])
            try:
                x, y, tm, z = fm.load_dataset(fpath,
                                              flist=flist[findex],
                                              ftype=ftype,
                                              masked=True,
                                              lon=lon,
                                              lat=lat[j:j + 1],
                                              verbose=True)
            except:
                continue
            z = z[:, 0, :]
            x180 = common.lon180(x)

        # Determines the first and second halves of the time-series and some
        # constants for the FFT
        fft_ta = numpy.ceil(t.min())
        fft_tb = numpy.floor(t.max())
        fft_tc = numpy.round(fft_ta + fft_tb) / 2
        fft_ia = pylab.find((t >= fft_ta) & (t <= fft_tc))
        fft_ib = pylab.find((t >= fft_tc) & (t <= fft_tb))
        fft_N = int(2**numpy.ceil(numpy.log2(max([len(fft_ia), len(fft_ib)]))))
        fft_N2 = fft_N / 2 - 1
        fft_dt = t[fft_ib].mean() - t[fft_ia].mean()

        for i in range(a):
            # Some string output.
            try:
                Y, X = common.num2latlon(lon[i],
                                         lat[j],
                                         mode='each',
                                         padding=False)
            except:
                Y = X = '?'

            # Extracts individual time-series from the whole dataset and
            # sets or calculates its standard deviation, squared standard
            # deviation and finally the normalized time-series.
            if fpath:
                try:
                    ilon = pylab.find(x == lon[i])[0]
                    fz = z[:, ilon]
                except:
                    continue
            else:
                fz = z[:, j, i]
            if fz.mask.all():
                continue
            if std['map']:
                try:
                    u = pylab.find(std['lon180'] == lon180[i])[0]
                    v = pylab.find(std['lat'] == lat[j])[0]
                except:
                    if debug:
                        warnings.warn(
                            'Unable to locate standard deviation '
                            'for (%s, %s)' % (X, Y), Warning)
                    continue
                fstd = std['val'][v, u]
                estd = fstd - fz.std()
                if (estd < 0) & (abs(estd) > std['err']):
                    if debug:
                        warnings.warn('Discrepant input standard deviation '
                                      '(%f) location (%.3f, %.3f) will be '
                                      'disregarded.' %
                                      (estd, lon180[i], lat[j]))
                    continue
            else:
                fstd = fz.std()
            fstd2 = fstd**2
            std_map[j, i] = fstd
            zero[:, i] = fz
            fz = (fz - fz.mean()) / fstd

            # Calculates the distance of the current point to any special
            # location set in the 'loc' parameter. If only special locations
            # are to be analysed, then skips all other ones. If the input
            # array is one dimensional, then do the analysis anyway.
            if dim == 1:
                dist = numpy.asarray([0.])
            else:
                try:
                    dist = numpy.asarray([
                        ((item[0] - (lon180[i]))**2 + (item[1] - lat[j])**2)
                        for item in loc
                    ])
                except:
                    dist = []
            if (dist > R2).all() & (loc != 'all') & onlyloc:
                continue

            # Determines the lag-1 autocorrelation coefficient to be used in
            # the significance test from the input parameter
            if alpha['calc']:
                ac = acorr(fz)
                alpha_ij = (ac[c + 1] + ac[c + 2]**0.5) / 2
            elif alpha['map']:
                try:
                    u = pylab.find(alpha['lon180'] == lon180[i])[0]
                    v = pylab.find(alpha['lat'] == lat[j])[0]
                    alpha_ij = alpha['val'][v, u]
                except:
                    if debug:
                        warnings.warn(
                            'Unable to locate standard deviation '
                            'for (%s, %s) using mean value instead' % (X, Y),
                            Warning)
                    alpha_ij = alpha['mean']
            else:
                alpha_ij = alpha['mean']

            # Calculates the continuous wavelet transform using the wavelet
            # Python module. Calculates the wavelet and Fourier power spectrum
            # and the periods in days. Also calculates the Fourier power
            # spectrum for the first and second halves of the timeseries.
            wave, scales, freqs, coi, fft, fftfreqs = wavelet.cwt(
                fz, dt, dj, s0, J, mother)
            power = abs(wave * wave.conj())
            fft_power = abs(fft * fft.conj())
            period = 1. / freqs
            fftperiod = 1. / fftfreqs
            psel = pylab.find(period <= pmax.max())

            # Calculates the Fourier transform for the first and the second
            # halves ot the time-series for later trend analysis.
            fft_1 = numpy.fft.fft(fz[fft_ia], fft_N)[1:fft_N / 2] / fft_N**0.5
            fft_2 = numpy.fft.fft(fz[fft_ib], fft_N)[1:fft_N / 2] / fft_N**0.5
            fft_p1 = abs(fft_1 * fft_1.conj())
            fft_p2 = abs(fft_2 * fft_2.conj())

            # Creates FFT return array and stores the spectrum accordingly
            try:
                fft_spectrum[:, j, i] = fft_power * fstd2
                fft_spectrum1[:, j, i] = fft_p1 * fstd2
                fft_spectrum2[:, j, i] = fft_p2 * fstd2
            except:
                fft_spectrum = (numpy.ma.empty([len(fft_power), b, a]) *
                                numpy.nan)
                fft_spectrum1 = (numpy.ma.empty([fft_N2, b, a]) * numpy.nan)
                fft_spectrum2 = (numpy.ma.empty([fft_N2, b, a]) * numpy.nan)
                #
                fft_spectrum[:, j, i] = fft_power * fstd2
                fft_spectrum1[:, j, i] = fft_p1 * fstd2
                fft_spectrum2[:, j, i] = fft_p2 * fstd2

            # Performs the significance test according to the article by
            # Torrence and Compo (1998). The wavelet power is significant
            # if the ratio power/sig95 is > 1.
            signif, fft_theor = wavelet.significance(1.,
                                                     dt,
                                                     scales,
                                                     0,
                                                     alpha_ij,
                                                     significance_level=siglvl,
                                                     wavelet=mother)
            sig95 = (signif * numpy.ones((c, 1))).transpose()
            sig95 = power / sig95

            # Calculates the global wavelet power spectrum and its
            # significance. The global wavelet spectrum is the average of the
            # wavelet power spectrum over time. The degrees of freedom (dof)
            # have to be corrected for padding at the edges.
            glbl_power = power.mean(axis=1)
            dof = c - scales
            glbl_signif, tmp = wavelet.significance(1.,
                                                    dt,
                                                    scales,
                                                    1,
                                                    alpha_ij,
                                                    significance_level=siglvl,
                                                    dof=dof,
                                                    wavelet=mother)
            global_power[:, j, i] = glbl_power * fstd2

            # Calculates the average wavelet spectrum along the scales and its
            # significance according to Torrence and Compo (1998) eq. 24. The
            # scale_avg_full variable is used multiple times according to the
            # selected periods range.
            #
            # Also calculates the average Fourier power spectrum.
            Cdelta = mother.cdelta
            scale_avg_full = (scales * numpy.ones((c, 1))).transpose()
            scale_avg_full = power / scale_avg_full
            for k in range(C):
                if k == 0:
                    sel = pylab.find((period >= pmin[0])
                                     & (period <= pmax[-1]))
                    pminmax = [period[sel[0]], period[sel[-1]]]
                    les = pylab.find((fftperiod >= pmin[0])
                                     & (fftperiod <= pmax[-1]))
                    fminmax = [fftperiod[les[0]], fftperiod[les[-1]]]
                else:
                    sel = pylab.find((period >= pmin[k - 1])
                                     & (period < pmax[k - 1]))
                    pminmax = [pmin[k - 1], pmax[k - 1]]
                    les = pylab.find((fftperiod >= pmin[k - 1])
                                     & (fftperiod <= pmax[k - 1]))
                    fminmax = [fftperiod[les[0]], fftperiod[les[-1]]]

                scale_avg = numpy.ma.array(
                    (dj * dt / Cdelta * scale_avg_full[sel, :].sum(axis=0)))
                scale_avg_signif, tmp = wavelet.significance(
                    1.,
                    dt,
                    scales,
                    2,
                    alpha_ij,
                    significance_level=siglvl,
                    dof=[scales[sel[0]], scales[sel[-1]]],
                    wavelet=mother)
                scale_avg.mask = (scale_avg < scale_avg_signif)
                if mem_error:
                    avg_spectrum[k, :, i] = scale_avg
                else:
                    avg_spectrum[k, :, j, i] = scale_avg
                avg_spectrum_signif[k, j, i] = scale_avg_signif

                # Trend analysis using least square polynomial fit of one
                # degree of the original input data and scale averaged
                # wavelet power. The wavelet power trend is calculated only
                # where the cone of influence spans the highest analyzed
                # period. In the end, the returned value for the trend is in
                # units**2.
                #
                # Also calculates the trends in the Fourier power spectrum.
                # Note that the FFT power spectrum is already multiplied by
                # the signal's standard deviation.
                incoi = pylab.find(coi >= pmax[-1])
                if len(incoi) == 0:
                    incoi = numpy.arange(c)
                polyw = numpy.polyfit(t[incoi], scale_avg[incoi].data, 1)
                wavelet_trend[k, j, i] = polyw[0] * fstd2
                fft_trend[k, j, i] = (
                    fft_spectrum2[les[les < fft_N2], j, i] -
                    fft_spectrum1[les[les < fft_N2], j, i]).mean() / fft_dt
                if k == 0:
                    polyz = numpy.polyfit(t, fz * fstd, 1)
                    trend[j, i] = polyz[0]

                # Plots the wavelet analysis results for the individual
                # series. The plot is only generated if the dimension of the
                # input variable z is one, if a special location is within a
                # range of the search radius R and if the show or save
                # parameters are set.
                if (show | (save != '')) & ((k in sel_periods)):
                    if (dist < R2).any() | (loc == 'all') | (dim == 1):
                        # There is an interesting spot within the search
                        # radius of location (%s, %s).' % (Y, X)
                        isloc = True
                        if (dist < R2).any():
                            try:
                                hloc.append(loc[(dist < R2)][0, 0])
                            except:
                                pass
                        if save:
                            try:
                                sv = '%s/tz_%s_%s_%d' % (
                                    save, prefix,
                                    common.num2latlon(lon[i], lat[j]), k)
                            except:
                                sv = '%s' % (save)
                        else:
                            sv = ''
                        graphics.wavelet_plot(tm,
                                              period[psel],
                                              fz,
                                              power[psel, :],
                                              coi,
                                              glbl_power[psel],
                                              scale_avg.data,
                                              fft=fft,
                                              fft_period=fftperiod,
                                              power_signif=sig95[psel, :],
                                              glbl_signif=glbl_signif[psel],
                                              scale_signif=scale_avg_signif,
                                              pminmax=pminmax,
                                              labels=labels,
                                              normalized=True,
                                              std=fstd,
                                              ztrend=polyz,
                                              wtrend=polyw,
                                              show=show,
                                              save=sv,
                                              levels=levels,
                                              cmap=cmap)

        # Saves and/or plots the intermediate results as zonal temporal
        # diagrams.
        if dsave:
            for k in range(C):
                if k == 0:
                    sv = '%s/%s/%s_%s.xt.gz' % (
                        dsave, 'global', prefix,
                        common.num2latlon(lon[i], lat[j], mode='each')[0])
                else:
                    sv = '%s/%s/%s_%s.xt.gz' % (
                        dsave, name[k - 1].lower(), prefix,
                        common.num2latlon(lon[i], lat[j], mode='each')[0])
                if mem_error:
                    fm.save_map(lon, tm, avg_spectrum[k, :, :].data, sv,
                                lat[j])
                else:
                    fm.save_map(lon, tm, avg_spectrum[k, :, j, :].data, sv,
                                lat[j])

        if ((dim > 1) and (show or (save != '')) & (not onlyloc)
                and len(hloc) > 0):
            hloc = common.lon360(numpy.unique(hloc))
            if save:
                sv = '%s/xt_%s_%s' % (save, prefix,
                                      common.num2latlon(
                                          lon[i], lat[j], mode='each')[0])
            else:
                sv = ''
            if mem_error:
                # To include overlapping original signal, use zz=zero
                gis.hovmoller(lon,
                              tm,
                              avg_spectrum[1:, :, :],
                              zo=avg_spectrum_signif[1:, j, :],
                              title=title,
                              crange=crange,
                              show=show,
                              save=sv,
                              labels=hlabels,
                              loc=hloc,
                              cmap=cmap,
                              bottom='avg',
                              right='avg',
                              std=std_map[j, :])
            else:
                gis.hovmoller(lon,
                              tm,
                              avg_spectrum[1:, :, j, :],
                              zo=avg_spectrum_signif[1:, j, :],
                              title=title,
                              crange=crange,
                              show=show,
                              save=sv,
                              labels=hlabels,
                              loc=hloc,
                              cmap=cmap,
                              bottom='avg',
                              right='avg',
                              std=std_map[j, :])

        # Flushing profiling text.
        stdout.write(len(s) * '\b')
        s = 'Spectral analysis of %d location%s (%s)... %s ' % (
            N, plural, Y, common.profiler(b, j + 1, 0, t1, t2))
        stdout.write(s)
        stdout.flush()

    stdout.write('\n')

    result['scale'] = scales
    result['period'] = period
    if dim == 1:
        result['power_spectrum'] = power * fstd2
        result['power_significance'] = sig95
        result['cwt'] = wave
        result['fft'] = fft
    result['global_power'] = global_power
    result['scale_spectrum'] = avg_spectrum
    if fpath:
        result['lon'] = lon
        result['lat'] = lat
    result['scale_significance'] = avg_spectrum_signif
    result['trend'] = trend
    result['wavelet_trend'] = wavelet_trend
    result['fft_power'] = fft_spectrum
    result['fft_first'] = fft_spectrum1
    result['fft_second'] = fft_spectrum2
    result['fft_period'] = fftperiod
    result['fft_trend'] = fft_trend
    return result

Example #5

Show file

File: stats.py Project: florianboergel/klib

def basics(z, dt=None, oldschool=False):
    """Performs basic statistics on given data variable z.

    Calculates the mean, standard deviation and trend along time.
    Assumes fist dimension of the array to be time and the others to be
    the coordinates. Maximum number of dimensions is three. The trend
    is calculated by least square fit of a one degree polynomial
    function.

    PARAMETERS
        z (array like) :
            Variable to be analysed.
        dt (float) :
            Temporal sampling scale to normalize the trend.
        oldschool (boolean, optional):
            If set to true, calculates the avarages and standard deviation
            using old school techniques.

    RETURNS
        mean, std, trend, alpha (array like) :
            Calculated mean, standard deviation, trends and lag-1 auto-
            correlation.
    """
    t1 = time()

    # Transforms input arrays numpy masked arrays.
    z = numpy.ma.masked_invalid(z)
    if dt == None:
        dt = 1.

    dim = len(z.shape)
    if dim == 1:
        z = z.reshape(z.size, 1, 1)
        print 'Hey! ', z.shape
    elif dim == 2:
        c, b = z.shape
        z = z.reshape(c, b, 1)
    elif dim > 3:
        raise Warning, 'Higher dimensions than three are not implemented.'
    c, b, a = z.shape
    t = numpy.arange(c) * dt
    mask = z.mask

    t2 = time()
    s = 'Calculating mean... '
    stdout.write(s)
    stdout.flush()
    if oldschool:
        zmean = numpy.ma.empty([b, a]) * numpy.nan
        zstd = numpy.ma.empty([b, a]) * numpy.nan
        for i in range(a):
            t2 = time()
            for j in range(b):
                if not mask[j, i]:
                    zmean[j, i] = z[:, j, i].mean()
                    zstd[j, i] = z[:, j, i].std()
            stdout.write(len(s) * '\b')
            s = ('Calculating mean and standard deviation... %s ' %
                 (common.profiler(a, i + 1, 0, t1, t2)))
            stdout.write(s)
            stdout.flush()
        s = '\n'
    else:
        zmean = z.mean(axis=0)
        s = '%s\n' % (common.profiler(1, 1, 0, t1, t2))
    zmean[mask] = numpy.nan
    zmean.mask = mask
    stdout.write(s)

    if not oldschool:
        t2 = time()
        s = 'Calculating standard deviation... '
        stdout.write(s)
        stdout.flush()
        zstd = z.std(axis=0)
        s = '%s\n' % (common.profiler(1, 1, 0, t1, t2))
        stdout.write(s)
    zstd[mask] = numpy.nan
    zstd.mask = mask

    s = 'Calculating trends and lag-1 autocorrelation... '
    stdout.write(s)
    stdout.flush()
    ztrend = numpy.ma.empty([b, a]) * numpy.nan
    zalpha = numpy.ma.empty([b, a]) * numpy.nan
    for i in range(a):
        t2 = time()
        for j in range(b):
            if not mask[j, i]:
                p = numpy.polyfit(t, z[:, j, i], 1)
                ztrend[j, i] = p[0]
                #
                ac = acorr(z[:, j, i])
                zalpha[j, i] = (ac[c] + ac[c + 1]**0.5) / 2

        stdout.write(len(s) * '\b')
        s = ('Calculating trends and lag-1 autocorrelation... %s ' %
             (common.profiler(a, i + 1, 0, t1, t2)))
        stdout.write(s)
        stdout.flush()
    ztrend.mask = mask
    zalpha.mask = mask | numpy.isnan(zalpha)
    stdout.write('\n')

    return zmean, zstd, ztrend, zalpha

Example #6

Show file

def save_dataset(lon, lat, tm, z, path, fname=None, prefix='', fmt='%.3f'):
    """Saves an entire dataset of maps to files.

    Function accepts only three-dimensional data variables, for now.

    PARAMTERS
        lon, lat (array like) :
            Longitude and latitude coordinates.
        tm (floag) :
            Time or other relevant information (i.e. period) to append
            to the upper left cell.
        z (array like) :
            Variable data.
        path (string) :
            Path to the dataset directory.
        fnames (string, array like, optional) :
            Forces the file name of the data. If omitted then the
            default 'xy%s_%d' % (prefix, tm[i]), where i is a counter
            starting at zero.
        prefix (string, optional) :
                Prefix to retain naming conventions such as basin.
        fmt (string, optional) :
            Format string for the values saved in the map. Default is a
            floating point number with three digits precision ('%.3f').

    OUTPUTS
        Saved map files to directory specified in path.

    RETURNS
        Nothing.

    """
    t1 = time()

    c, b, a = z.shape
    if lon.size != a:
        raise Warning, 'Longitude and data lengths do not match.'
    if lat.size != b:
        raise Warning, 'Latitude and data lengths do not match.'
    if tm.size != c:
        raise Warning, 'Time and data lengths do not match.'

    if type(fname).__name__ == 'str':
        fname = ['%s%d' % (fname, i) for i in range(c)]
    elif type(fname).__name__ in ['list', 'tuple', 'ndarray']:
        C = len(fname)
        if c > C:
            for i in range(int(numpy.ceil(float(c) / C))):
                for j in range(C):
                    fname = '%s%d' % (fname[j], i)
    else:
        fname = ['%s_%06d.xy' % (prefix, tm[i]) for i in range(c)]

    # Starts saving the maps to gziped files.
    if c == 1:
        plural = ''
    else:
        plural = 's'
    s = 'Saving %d file%s... ' % (c, plural)
    os.sys.stdout.write(s)
    os.sys.stdout.flush()
    for i in range(c):
        t2 = time()

        f = '%s/%s.gz' % (path, fname[i])
        save_map(lon, lat, z[i, :, :], f, tm[i], fmt)

        os.sys.stdout.write(len(s) * '\b')
        s = 'Saving %d file%s... %s ' % (
            c,
            plural,
            common.profiler(c, i + 1, 0, t1, t2),
        )
        os.sys.stdout.write(s)
        os.sys.stdout.flush()
    #
    os.sys.stdout.write('\n')

Example #7

Show file

def bin_average(x, y, dx=1., bins=None, nstd=2., interpolate='bins', k=3,
    s=None, extrapolate='repeat', mode='mean', profile=False, usemask=True):
    """Calculates bin average from input data.

    Inside each bin, calculates the average and standard deviation, and
    selects only those values inside the confidence interval given in
    `nstd`. Finally calculates the bin average using spline
    interpolation at the middle points in each bin. Linearly
    extrapolates values outside of the data boundaries.

    Parameters
    ----------
    x : array like
        Input coordinate to be binned. It has to be 1-dimensional.
    y : array like
        The data input array.
    dx: float, optional
    bins : array like, optional
        Array of bins. It has to be 1-dimensional and strictly
        increasing.
    nstd : float, optional
        Confidence interval given as number of standard deviations.
    interpolate : string or boolean, optional
        Valid options are `bins` (default), `full` or `False` and
        defines whether to interpolate data to central bin points only
        in filled bins, over full time-series, or skip interpolation
        respectively.
    k : int, optional
        Specifies the order of the interpolation spline. Default is 3,
        `cubic`.
    s : float, optional
        Positive smoothing factor used to choose the number of knots.
    extrapolate : string, bool, optional
        Sets if averaging outside data boundaries should be
        extrapolated. If `True` or `linear`, extrapolates data linearly,
        if `repeat` (default) repeats values from nearest bin.
    mode : string, optional
        Sets averaging mode: `mean` (default), `median`.

    Returns
    -------
    bin_x : array like
        Coordinate at the center of the bins.
    bin_y : array like
        Interpolated array of bin averages.
    avg_x : array like
        Average coordinate in each bin.
    avg_y : array like
        Average values inside each bin.
    std_x : array like
        Coordinate standard deviation in each bin.
    std_y : array like
        Standard deviation in each bin.
    min_y : array like
        Minimum values in each bin.
    max_y : array like
        Maximum values in each bin.

    """
    t0 = time()
    # If no bins are given, calculate them from input data.
    if bins == None:
        x_min = floor(x.min() / dx) * dx
        x_max = 0. # numpy.ceil(x.max() / dx) * dx
        bins = arange(x_min-dx, x_max+dx, dx) + dx/2
    # Checks if bin array is strictly increasing.
    if not all(x < y for x, y in zip(bins, bins[1:])):
        raise ValueError('Bin array must be strictly increasing.')
    # Ensures that input coordinate `x` is monotonically increasing.
    _i = x.argsort()
    x = x[_i]
    y = y[_i]
    # Data types
    dtype_x = x.dtype
    dtype_y = y.dtype
    # Some variable initializations
    nbins = len(bins) - 1
    ndata = len(y)
    Sel = zeros(ndata, dtype=bool)
    # Initializes ouput arrays, masked or not.
    if usemask:
        bin_y = ma.empty(nbins, dtype=dtype_y) * nan
        avg_x = ma.empty(nbins, dtype=dtype_x) * nan
        avg_y = ma.empty(nbins, dtype=dtype_y) * nan
        std_x = ma.empty(nbins, dtype=dtype_x) * nan
        std_y = ma.empty(nbins, dtype=dtype_y) * nan
        min_y = ma.empty(nbins, dtype=dtype_y) * nan
        max_y = ma.empty(nbins, dtype=dtype_y) * nan
    else:
        bin_y = empty(nbins, dtype=dtype_y) * nan
        avg_x = empty(nbins, dtype=dtype_x) * nan
        avg_y = empty(nbins, dtype=dtype_y) * nan
        std_x = empty(nbins, dtype=dtype_x) * nan
        std_y = empty(nbins, dtype=dtype_y) * nan
        min_y = empty(nbins, dtype=dtype_y) * nan
        max_y = empty(nbins, dtype=dtype_y) * nan
    # Determines indices of the bins to which each data points belongs.
    bin_sel = digitize(x, bins) - 1
    bin_sel_unique = unique(bin_sel)
    _nbins = bin_sel_unique.size
    #
    t1 = time()
    for i, bin_i in enumerate(bin_sel_unique):
        if profile:
            # Erase line ANSI terminal string when using return feed
            # character.
            # (source:http://www.termsys.demon.co.uk/vtansi.htm#cursor)
            _s = '\x1b[2K\rBin-averaging... %s' % (common.profiler(_nbins, i, 0, t0,
                t1))
            stdout.write(_s)
            stdout.flush()
        # Ignores when data is not in valid range:
        if (bin_i < 0) | (bin_i > nbins):
            print 'Uhuuu (signal.py line 908)!!'
            continue
        # Calculate averages inside each bin in two steps: (i) calculate
        # average and standard deviation; (ii) consider only those values
        # within selected standard deviation range.
        sel = flatnonzero(bin_sel == bin_i)
        # Selects data within selected standard deviation or single
        # data in current bin.
        if sel.size > 1:
            _avg_y = y[sel].mean()
            _std_y = y[sel].std()
            if _std_y > 1e-10:
                _sel = ((y[sel] >= (_avg_y - nstd * _std_y)) &
                    (y[sel] <= (_avg_y + nstd * _std_y)))
                #print bin_i, sel.size, _avg_y, _std_y
                sel = sel[_sel]
                #print sel.size
            # Calculates final values
            if mode == 'mean':
                _avg_x = x[sel].mean()
                _avg_y = y[sel].mean()
            elif mode == 'median':
                _avg_x = median(x[sel])
                _avg_y = median(y[sel])
            else:
                raise ValueError('Invalid mode `{}`.'.format(mode))
            _std_x = x[sel].std()
            _std_y = y[sel].std()
            _min_y = y[sel].min()
            _max_y = y[sel].max()
        else:
            _avg_x, _avg_y = x[sel][0], y[sel][0]
            _std_x, _std_y = 0, 0
            _min_y, _max_y = nan, nan
        #
        #print i, sel_sum, _avg_x, _avg_y
        #
        avg_x[bin_i] = _avg_x
        avg_y[bin_i] = _avg_y
        std_x[bin_i] = _std_x
        std_y[bin_i] = _std_y
        min_y[bin_i] = _min_y
        max_y[bin_i] = _max_y
        #
        if profile:
            _s = '\rBin-averaging... %s' % (common.profiler(_nbins, i+1, 0, t0,
                t1))
            stdout.write(_s)
            stdout.flush()

    # Interpolates selected data to central data point in bin using spline.
    # Only interpolates data in filled bins.
    if interpolate in ['bins', 'full']:
        sel = ~isnan(avg_y)
        bin_x = (bins[1:] + bins[:-1]) * 0.5
        if interpolate == 'bins':
            bin_y[sel] = _interpolate(bin_x[sel], avg_x[sel], avg_y[sel], k=k,
                s=s, outside=extrapolate)
        elif interpolate == 'full':
            bin_y = _interpolate(bin_x, avg_x[sel], avg_y[sel], k=k, s=s,
                outside=extrapolate)
    elif interpolate != False:
        raise ValueError('Invalid interpolation mode `{}`.'.format(
            interpolate))

    # Masks invalid data.
    if usemask:
        bin_y = ma.masked_invalid(bin_y)
        avg_x = ma.masked_invalid(avg_x)
        avg_y = ma.masked_invalid(avg_y)
        std_x = ma.masked_invalid(std_x)
        std_y = ma.masked_invalid(std_y)
        min_y = ma.masked_invalid(min_y)
        max_y = ma.masked_invalid(max_y)

    if interpolate:
        return bin_x, bin_y, avg_x, avg_y, std_x, std_y, min_y, max_y
    else:
        return avg_x, avg_y, std_x, std_y, min_y, max_y

Example #8

Show file

def load_dataset(path,
                 pattern='(.*)',
                 ftype='xy',
                 flist=None,
                 delimiter='\t',
                 var_from_name=False,
                 masked=False,
                 xlim=None,
                 ylim=None,
                 lon=None,
                 lat=None,
                 tm=None,
                 topomask=None,
                 verbose=False,
                 dummy=False):
    """Loads an entire dataset.

    It uses the numpy.loadtxt function and therefore accepts regular
    ASCII files or GZIP compressed ones.

    PARAMETERS
        path (string) :
            The path in which the data files are located.
        pattern (string, optional) :
            Regular expression pattern correspondig to valid file names
            to be loaded.
        ftype (string, optional) :
            Specifies the file type that is loaded. The accepted values
            are 'xy', 'xt' and 'ty'.

            For 'xy', or map, files, the first line contains the
            longitude coordinates, the first column contains the
            latitude coordinates and the rest contains the data in
            matrix style. If var_from_name is set to True, it assumes
            that the time is given at the upper left cell.

            For 'xt', or zonal-temporal, files, the first line contains
            the longitude coordinates, the first column contains the
            time and the rest contains the data in matrix style. If
            var_from_name is set to True, it assumes that the latitude
            is given at the upper left cell.

            For 'ty', or temporal-meridional, files, the first line
            contains the time, the first column contains the longitude
            and the rest contains the data in matrix style. If
            var_from_name is set to True, it assumes that the latitude
            is given at the upper left cell.
        flist (array like, optional) :
            Lists the files to be loaded in path. If set, it ignores the
            pattern.
        delimiter (string, optional) :
            Specifies the data delimiter used while loading the data.
            The default value is '\t' (tab)
        var_from_name (boolean, optional) :
            If set to true, it tries to infer eather the time, latitude
            or longitude from the first match in pattern according to
            the chosen file type. If set to true, the pattern has to be
            set in such a way that the last matches contain the value
            and the hemisphere ('N', 'S', 'E' or 'W') if appropriate.
        masked (boolean, optional) :
            Returnes masked array. Default is False.
        xlim, ylim (array like, optional) :
            List containing the upper and lower zonal and meridional
            limits, respectivelly.
        lon, lat, tm (array like, optional):
        topomask (string, optional) :
            Topography mask.
        verbose (boolean, optional) :
            If set to true, does not print anything on screen.
        dummy (boolean, optional) :
            If set to true, does not load data and returns blank
            data array for test purposes.
    
    RETURNS
        lon (array like) :
            Longitude.
        lat (array like) :
            Latitude.
        t (array like) :
            Time.
        z (array like) :
            Loaded variable.

    """
    t0 = time()

    if topomask != None:
        masked = True

    S = 'Preparing data'
    s = '%s...' % (S)
    if not verbose:
        os.sys.stdout.write(s)
        os.sys.stdout.flush()

    # Generates list of files and tries to match them to the pattern
    if flist == None:
        flist = os.listdir(path)
        flist, match = common.reglist(flist, pattern)

    # Loads all the data from file list to create arrays
    N = len(flist)
    if N == 0:
        raise Warning, 'No files to be loaded.'

    # Initializes the set of array limits
    Lon = set()
    Lat = set()
    Tm = set()

    # Walks through the file loading process twice. At the first step loads
    # all the files to get all the geographical and temporal boundaries. At the
    # second step, reloads all files and fits them to the initialized data
    # arrays
    if dummy:
        step_range = 1
    else:
        step_range = 2
    for step in range(step_range):
        t1 = time()
        for n, fname in enumerate(flist):
            t2 = time()

            if (lon != None) and (lat != None) and (tm != None):
                continue

            x, y, t, z = load_map('%s/%s' % (path, fname),
                                  ftype=ftype,
                                  delimiter=delimiter,
                                  lon=lon,
                                  lat=lat,
                                  tm=tm,
                                  masked=masked,
                                  topomask=None)

            if var_from_name:
                if (ftype == 'xt') | (ftype == 'ty'):
                    var = atof(match[n][-2])  # Gets coordinate out of ...
                    rav = match[n][-1].upper()  # ... match and also its ...
                    if (rav == 'S' | rav == 'W'):  # ... hemisphere.
                        var *= -1
                    if ftype == 'xt':
                        y = var
                    else:
                        x = var
                elif ftype == 'xy':
                    t = atof(match[n][-1])  # Gets time out of last match.

            if numpy.isnan(t).all():
                t = 0

            if type(x).__name__ in ['int', 'long', 'float', 'float64']:
                x = [x]
            if type(y).__name__ in ['int', 'long', 'float', 'float64']:
                y = [y]
            if type(t).__name__ in ['int', 'long', 'float', 'float64']:
                t = [t]

            ###################################################################
            # FIRST STEP
            ###################################################################
            if step == 0:
                Lon.update(x)
                Lat.update(y)
                Tm.update(t)
            ###################################################################
            # SECOND STEP
            ###################################################################
            elif step == 1:
                selx = [pylab.find(Lon == i)[0] for i in x]
                sely = [pylab.find(Lat == i)[0] for i in y]
                selt = [pylab.find(Tm == i)[0] for i in t]

                #print len(selx), len(sely), len(selt)
                i, j, k = common.meshgrid2(selx, sely, selt)

                if ftype == 'xt':
                    a, b, c = i.shape
                    z = z.reshape((a, 1, c))

                # Makes sure only to overwrite values not previously assigned.
                if masked:
                    Z[k, j, i] = numpy.ma.where(~Z[k, j, i].mask, Z[k, j, i],
                                                z)
                else:
                    Z[k, j, i] = numpy.where(~numpy.isnan(Z[k, j, i]), Z[k, j,
                                                                         i], z)

            ###################################################################
            # PROFILING
            ###################################################################
            if not verbose:
                os.sys.stdout.write(len(s) * '\b')
            s = '%s (%s)... %s ' % (S, fname,
                                    common.profiler(N, n + 1, t0, t1, t2))
            if not verbose:
                os.sys.stdout.write(s)
                os.sys.stdout.flush()
        #
        if not verbose:
            os.sys.stdout.write('\n')

        # Now creates data array based on input parameters xlim, ylim and
        # the loaded coordinate sets.
        if step == 0:
            if lon == None:
                Lon = numpy.asarray(list(Lon))
            else:
                Lon = lon
            if lat == None:
                Lat = numpy.asarray(list(Lat))
            else:
                Lat = lat
            if tm == None:
                Tm = numpy.asarray(list(Tm))
            else:
                Tm = tm

            Lon.sort()
            Lat.sort()
            Tm.sort()

            # Makes sure that all the coordinates are continuous, equally
            # spaced and that they are inside the coordinate limits.
            dx, dy, dt = numpy.diff(Lon), numpy.diff(Lat), numpy.diff(Tm)

            if len(dx) == 0: dx = numpy.array([1.])
            if len(dy) == 0: dy = numpy.array([1.])
            if len(dt) == 0: dt = numpy.array([1.])

            #if ((not (dx == dx[0]).all()) or (not (dy == dy[0]).all()) or
            #    (not (dt == dt[0]).all())):
            #    raise Warning, 'One or more coordinates are not evenly spaced.'

            dx = dx[0]
            dy = dy[0]
            dt = dt[0]

            if xlim == None:
                xlim = [Lon.min(), Lon.max()]
            if ylim == None:
                ylim = [Lat.min(), Lat.max()]

            selx = pylab.find((Lon >= min(xlim)) & (Lon <= max(xlim)))
            Lon = Lon[selx]
            sely = pylab.find((Lat >= min(ylim)) & (Lat <= max(ylim)))
            Lat = Lat[sely]

            # Pads edges with NaN's to avoid distortions when generating maps.
            if lon == None:
                Lon = numpy.concatenate([[Lon[0] - dx], Lon, [Lon[-1] + dx]])
            if lat == None:
                Lat = numpy.concatenate([[Lat[0] - dy], Lat, [Lat[-1] + dy]])

            # Initializes data arrays
            a, b, c = Lon.size, Lat.size, Tm.size
            if masked:
                Z = numpy.ma.empty([c, b, a], dtype=float) * numpy.nan
                Z.mask = True
            else:
                Z = numpy.empty([c, b, a], dtype=float) * numpy.nan
            lon, lat = numpy.array(Lon), numpy.array(Lat)

            # Now everything might be ready for the second step in the loop,
            # filling in the data array.
            S, s = 'Loading data', ''

    # Interpolates topography into data grid.
    if topomask != None:
        if not verbose:
            print 'Masking topographic features...'
        ezi, _, _ = interpolate.nearest([common.etopo.x, common.etopo.y],
                                        common.etopo.z, [Lon, Lat])
        if topomask == 'ocean':
            tmask = (ezi > 0)
        elif topomask == 'land':
            tmask = (ezi < 0)
        #
        tmask = tmask.reshape([1, b, a])
        tmask = tmask.repeat(c, axis=0)
        #
        Z.mask = Z.mask | tmask

    if masked:
        Z.mask = Z.mask | numpy.isnan(Z.data)
        Z.data[Z.mask] = 0

    return Lon, Lat, Tm, Z

Example #9

Show file

File: stats.py Project: regeirk/klib

def basics(z, dt=None, oldschool=False):
    """Performs basic statistics on given data variable z.

    Calculates the mean, standard deviation and trend along time.
    Assumes fist dimension of the array to be time and the others to be
    the coordinates. Maximum number of dimensions is three. The trend
    is calculated by least square fit of a one degree polynomial
    function.

    PARAMETERS
        z (array like) :
            Variable to be analysed.
        dt (float) :
            Temporal sampling scale to normalize the trend.
        oldschool (boolean, optional):
            If set to true, calculates the avarages and standard deviation
            using old school techniques.

    RETURNS
        mean, std, trend, alpha (array like) :
            Calculated mean, standard deviation, trends and lag-1 auto-
            correlation.
    """
    t1 = time()

    # Transforms input arrays numpy masked arrays.
    z = numpy.ma.masked_invalid(z)
    if dt == None:
        dt = 1.

    dim = len(z.shape)
    if dim == 1:
        z = z.reshape(z.size, 1, 1)
        print 'Hey! ', z.shape
    elif dim == 2:
        c, b = z.shape
        z = z.reshape(c, b, 1)
    elif dim > 3:
        raise Warning, 'Higher dimensions than three are not implemented.'
    c, b, a = z.shape
    t = numpy.arange(c) * dt
    mask = z.mask

    t2 = time()
    s = 'Calculating mean... '
    stdout.write(s)
    stdout.flush()
    if oldschool:
        zmean = numpy.ma.empty([b, a]) * numpy.nan
        zstd = numpy.ma.empty([b, a]) * numpy.nan
        for i in range(a):
            t2 = time()
            for j in range(b):
                if not mask[j, i]:
                    zmean[j, i] = z[:, j, i].mean()
                    zstd[j, i] = z[:, j, i].std()
            stdout.write(len(s) * '\b')
            s = ('Calculating mean and standard deviation... %s ' %
                (common.profiler(a, i + 1, 0, t1, t2)))
            stdout.write(s)
            stdout.flush()
        s = '\n'
    else:
        zmean = z.mean(axis=0)
        s = '%s\n' % (common.profiler(1, 1, 0, t1, t2))
    zmean[mask] = numpy.nan
    zmean.mask = mask
    stdout.write(s)

    if not oldschool:
        t2 = time()
        s = 'Calculating standard deviation... '
        stdout.write(s)
        stdout.flush()
        zstd = z.std(axis=0)
        s = '%s\n' % (common.profiler(1, 1, 0, t1, t2))
        stdout.write(s)
    zstd[mask] = numpy.nan
    zstd.mask = mask

    s = 'Calculating trends and lag-1 autocorrelation... '
    stdout.write(s)
    stdout.flush()
    ztrend = numpy.ma.empty([b, a]) * numpy.nan
    zalpha = numpy.ma.empty([b, a]) * numpy.nan
    for i in range(a):
        t2 = time()
        for j in range(b):
            if not mask[j, i]:
                p = numpy.polyfit(t, z[:, j, i], 1)
                ztrend[j, i] = p[0]
                #
                ac = acorr(z[:, j, i])
                zalpha[j, i] = (ac[c] + ac[c + 1] ** 0.5) / 2

        stdout.write(len(s) * '\b')
        s = ('Calculating trends and lag-1 autocorrelation... %s ' %
                (common.profiler(a, i + 1, 0, t1, t2)))
        stdout.write(s)
        stdout.flush()
    ztrend.mask = mask
    zalpha.mask = mask | numpy.isnan(zalpha)
    stdout.write('\n')

    return zmean, zstd, ztrend, zalpha

Example #10

Show file

File: stats.py Project: regeirk/klib

def wavelet_analysis(z, tm, lon=None, lat=None, mother='Morlet', alpha=0.0,
                     siglvl=0.95, loc=None, onlyloc=False, periods=None,
                     sel_periods=[], show=False, save='', dsave='', prefix='',
                     labels=dict(), title=None, name=None, fpath='',
                     fpattern='', std=dict(), crange=None, levels=None,
                     cmap=cm.GMT_no_green, debug=False):
    """Continuous wavelet transform and significance analysis.

    The analysis is made using the methodology and statistical approach
    suggested by Torrence and Compo (1998).

    Depending on the dimensions of the input array, three different
    kinds of approaches are taken. If the input array is one-dimensional
    then only a simple analysis is performed. If the array is
    bi- or three-dimensional then spectral Hovmoller diagrams are drawn
    for each Fourier period given within a range of +/-25%.

    PARAMETERS
        z (array like) :
            Input data. The data array should have one of these forms,
            z[tm], z[tm, lat] or z[tm, lat, lon].
        tm (array like) :
            Time axis. It should contain values in matplotlib date
            format (i.e. number of days since 0001-01-01 UTC).
        lon (array like, optional) :
            Longitude.
        lat (array like, optional) :
            Latitude.
        mother (string, optional) :
            Gives the name of the mother wavelet to be used. Possible
            values are 'Morlet' (default), 'Paul' or 'Mexican hat'.
        alpha (float or dictionary, optional) :
            Lag-1 autocorrelation for background noise.  Default value
            is 0.0 (white noise). If different autocorrelation
            coefficients should be used for different locations, then
            the input should contain a dictionary with 'lon', 'lat',
            'map' keys as for the std parameter.
        siglvl (float, optional) :
            Significance level. Default value is 0.95.
        loc (array like, optional) :
            Special locations of interest. If the input array is of
            higher dimenstions, the output of the simple wavelet
            analysis of each of the locations is output. The list
            should contain the pairs of (lon, lat) for each locations
            of interest.
        onlyloc (boolean, optional) :
            If set to true then only the specified locations are
            analysed. The default is false.
        periods (array like, optional) :
            Special Fourier periods of interest in case of analysis of
            higher dimensions (in years).
        sel_periods (array like, optional) :
            Select which Fourier periods spectral power are averaged.
        show (boolean, optional) :
            If set to true the the resulting maps are shown on screen.
        save (string, optional) :
            The path in which the resulting plots are to be saved. If
            not set, then no images will be saved.
        dsave (string, optional) :
            If set, saves the scale averaged power spectrum series to
            this path. This is especially useful if memory is an issue.
        prefix (string, optional) :
            Prefix to retain naming conventions such as basin.
        labels (dictionary, optional) :
            Sets the labels for the plot axis.
        title (string, array like, optional) :
            Title of each of the selected periods.
        name (string, array like, optional) :
            Name of each of the selected periods. Used when saving the
            results to files.
        fpath (string, optional) :
            Path for the source files to be loaded when memory issues
            are a concern.
        fpattern (string, optional) :
            Regular expression pattern to match file names.
        std (dictionary, optional) :
            A dictionary containing a map of the standard deviation of
            the analysed time series. To set the longitude and latitude
            coordinates of the map, they should be included as
            separate 'lon' and 'lat' key items. If they are omitted,
            then the regular input parameters are assumed. Accepted
            standard deviation error is set in key 'err' (default value
            is 1e-2).
        crange (array like, optional) :
            Array of power levels to be used in average Hovmoler colour bar.
        levels (array like, optional) :
            Array of power levels to be used in spectrogram colour bar.
        cmap (colormap, optional) :
            Sets the colour map to be used in the plots. The default is
            the Generic Mapping Tools (GMT) no green.
        debug (boolean, optional) :
            If set to True then warnings are shown.

    OUTPUT
        If show or save are set, plots either on screen and or on file
        according to the specified parameters.

        If dsave parameter is set, also saves the scale averaged power
        series to files.

    RETURNS
        wave (dictionary) :
            Dictionary containing the resulting calculations from the
            wavelet analysis according to the input parameters. The
            output items might be:
                scale --
                    Wavelet scales.
                period --
                    Equivalent Fourier periods (in days).
                power_spectrum --
                    Wavelet power spectrum (in units**2).
                power_significance --
                    Relative significance of the power spectrum.
                global_power --
                    Global wavelet power spectrum (in units**2).
                scale_spectrum  --
                    Scale averaged wavelet spectra (in units**2)
                    according to selected periods.
                scale_significance --
                    Relative significance of the scale averaged wavelet
                    spectra.
                fft --
                    Fourier spectrum.
                fft_first --
                    Fourier spectrum of the first half of the
                    time-series.
                fft_second --
                    Fourier spectrum of the second half of the
                    time-series.
                fft_period --
                    Fourier periods (in days).
                trend --
                    Signal trend (in units/yr).
                wavelet_trend --
                    Wavelet spectrum trends (in units**2/yr).

    """
    t1 = time()
    result = {}

    # Resseting unit labels for hovmoller plots
    hlabels = dict(labels)
    hlabels['units'] = ''

    # Setting some titles and paths
    if name == None:
        name = title

    # Working with the std parameter and setting its properties:
    if 'val' in std.keys():
        if 'lon' not in std.keys():
            std['lon'] = lon
        std['lon180'] = common.lon180(std['lon'])
        if 'lat' not in std.keys():
            std['lat'] = lat
        if 'err' not in std.keys():
            std['err'] = 1e-2
        std['map'] = True
    else:
        std['map'] = False

    # Lag-1 autocorrelation parameter
    if type(alpha).__name__ == 'dict':
        if 'lon' not in alpha.keys():
            alpha['lon'] = lon
        alpha['lon180'] = common.lon180(alpha['lon'])
        if 'lat' not in alpha.keys():
            alpha['lat'] = lat
        alpha['mean'] = alpha['val'].mean()
        alpha['map'] = True
        alpha['calc'] = False
    else:
        if alpha == -1:
            alpha = {'mean': -1, 'calc': True}
        else:
            alpha = {'val': alpha, 'mean': alpha, 'map': False, 'calc': False}

    # Shows some of the options on screen.
    print ('Average Lag-1 autocorrelation for background noise: %.2f' %
        (alpha['mean']))
    if save:
        print 'Saving result figures in \'%s\'.' % (save)
    if dsave:
        print 'Saving result data in \'%s\'.' % (dsave)

    if fpath:
        # Gets the list of files to be loaded individually extracts all the
        # latitudes and loads the first file to get the main parameters.
        flist = os.listdir(fpath)
        flist, match = common.reglist(flist, fpattern)
        if len(flist) == 0:
            raise Warning, 'No files matched search pattern.'
        flist = numpy.asarray(flist)
        lst_lat = []
        for item in match:
            y = string.atof(item[-2])
            if item[-1].upper() == 'S': y *= -1
            lst_lat.append(y)
        # Detect file type from file name
        ftype = fm.detect_ftype(flist[0])
        x, y, tm, z = fm.load_map('%s/%s' % (fpath, flist[0]),
            ftype=ftype, masked=True)
        if lon == None:
            lon = x
        lat = numpy.unique(lst_lat)
        dim = 2
    else:
        # Transforms input arrays in numpy arrays and numpy masked arrays.
        tm = numpy.asarray(tm)
        z = numpy.ma.asarray(z)
        z.mask = numpy.isnan(z)

        # Determines the number of dimensions of the variable to be plotted and
        # the sizes of each dimension.
        a = b = c = None
        dim = len(z.shape)
        if dim == 3:
            c, b, a = z.shape
        elif dim == 2:
            c, a = z.shape
            b = 1
            z = z.reshape(c, b, a)
        else:
            c = z.shape[0]
            a = b = 1
            z = z.reshape(c, b, a)
        if tm.size != c:
            raise Warning, 'Time and data lengths do not match.'

    # Transforms coordinate arrays into numpy arrays
    s = type(lat).__name__
    if s in ['int', 'float', 'float64']:
        lat = numpy.asarray([lat])
    elif s != 'NoneType':
        lat = numpy.asarray(lat)
    s = type(lon).__name__
    if s in ['int', 'float', 'float64']:
        lon = numpy.asarray([lon])
    elif s != 'NoneType':
        lon = numpy.asarray(lon)

    # Starts the mother wavelet class instance and determines important
    # analysis parameters
    mother = mother.lower()
    if mother == 'morlet':
        mother = wavelet.Morlet()
    elif mother == 'paul':
        mother = wavelet.Paul()
    elif mother in ['mexican hat', 'mexicanhat', 'mexican_hat']:
        mother = wavelet.Mexican_hat()
    else:
        raise Warning, 'Mother wavelet unknown.'

    t = tm / common.daysinyear        # Time array in years
    dt = tm[1] - tm[0]                # Temporal sampling interval
    try:                              # Zonal sampling interval
        dx = lon[1] - lon[0]
    except:
        dx = 1
    try:                              # Meridional sampling interval
        dy = lat[1] - lat[0]
    except:
        dy = dx
    if numpy.isnan(dt): dt = 1
    if numpy.isnan(dx): dx = 1
    if numpy.isnan(dy): dy = dx
    dj = 0.25                         # Four sub-octaves per octave
    s0 = 2 * dt                       # Smallest scale
    J = 7 / dj - 1                    # Seven powers of two with dj sub-octaves
    scales = period = None

    if type(crange).__name__ == 'NoneType':
        crange = numpy.arange(0, 1.1, 0.1)
    if type(levels).__name__ == 'NoneType':
        levels = 2. ** numpy.arange(-3, 6)

    if fpath:
        N = lat.size
        # TODO: refactoring # lon = numpy.arange(-81. - dx / 2., 290. + dx / 2, dx)
        # TODO: refactoring # lat = numpy.unique(numpy.asarray(lst_lat))
        c, b, a = tm.size, lat.size, lon.size
    else:
        N = a * b

    # Making sure that the longitudes range from -180 to 180 degrees and
    # setting the squared search radius R2.
    try:
        lon180 = common.lon180(lon)
    except:
        lon180 = None
    R2 = dx ** 2 + dy ** 2
    if numpy.isnan(R2):
        R2 = 65535.
    if loc != None:
        loc = numpy.asarray([[common.lon180(item[0]), item[1]] for item in
            loc])

    # Initializes important result variables such as the global wavelet power
    # spectrum map, scale avaraged spectrum time-series and their significance,
    # wavelet power trend map.
    global_power = numpy.ma.empty([J + 1, b, a]) * numpy.nan
    try:
        C = len(periods) + 1
        dT = numpy.diff(periods)
        pmin = numpy.concatenate([[periods[0] - dT[0] / 2],
                                 0.5 * (periods[:-1] + periods[1:])])
        pmax = numpy.concatenate([0.5 * (periods[:-1] + periods[1:]),
                                 [periods[-1] + dT[-1] / 2]])
    except:
        # Sets the lowest period to null and the highest to half the time
        # series length.
        C = 1
        pmin = numpy.array([0])
        pmax = numpy.array([(tm[-1] - tm[0]) / 2])
    if type(sel_periods).__name__ in ['int', 'float']:
        sel_periods = [sel_periods]
    elif len(sel_periods) == 0:
        sel_periods = [-1.]
    try:
        if fpath:
            raise Warning, 'Process files individually'
        avg_spectrum = numpy.ma.empty([C, c, b, a]) * numpy.nan
        mem_error = False
    except:
        avg_spectrum = numpy.ma.empty([C, c, a]) * numpy.nan
        mem_error = True
    avg_spectrum_signif = numpy.ma.empty([C, b, a]) * numpy.nan
    trend = numpy.ma.empty([b, a]) * numpy.nan
    wavelet_trend = numpy.ma.empty([C, b, a]) * numpy.nan
    fft_trend = numpy.ma.empty([C, b, a]) * numpy.nan
    std_map = numpy.ma.empty([b, a]) * numpy.nan
    zero = numpy.ma.empty([c, a])
    fft_spectrum = None
    fft_spectrum1 = None
    fft_spectrum2 = None

    # Walks through each latitude and then through each longitude to perform
    # the temporal wavelet analysis.
    if N == 1:
        plural = ''
    else:
        plural = 's'
    s = 'Spectral analysis of %d location%s... ' % (N, plural)
    stdout.write(s)
    stdout.flush()
    for j in range(b):
        t2 = time()
        isloc = False  # Ressets 'is special location' flag
        hloc = []      # Cleans location list for Hovmoller plots
        zero *= numpy.nan
        if mem_error:
            # Clears average spectrum for next step.
            avg_spectrum *= numpy.nan
            avg_spectrum.mask = False
        if fpath:
            findex = pylab.find(lst_lat == lat[j])
            if len(findex) == 0:
                continue
            ftype = fm.detect_ftype(flist[findex[0]])
            try:
                x, y, tm, z = fm.load_dataset(fpath, flist=flist[findex],
                    ftype=ftype, masked=True, lon=lon, lat=lat[j:j+1],
                    verbose=True)
            except:
                continue
            z = z[:, 0, :]
            x180 = common.lon180(x)

        # Determines the first and second halves of the time-series and some
        # constants for the FFT
        fft_ta = numpy.ceil(t.min())
        fft_tb = numpy.floor(t.max())
        fft_tc = numpy.round(fft_ta + fft_tb) / 2
        fft_ia = pylab.find((t >= fft_ta) & (t <= fft_tc))
        fft_ib = pylab.find((t >= fft_tc) & (t <= fft_tb))
        fft_N = int(2 ** numpy.ceil(numpy.log2(max([len(fft_ia),
            len(fft_ib)]))))
        fft_N2 = fft_N / 2 - 1
        fft_dt = t[fft_ib].mean() - t[fft_ia].mean()

        for i in range(a):
            # Some string output.
            try:
                Y, X = common.num2latlon(lon[i], lat[j], mode='each',
                    padding=False)
            except:
                Y = X = '?'

            # Extracts individual time-series from the whole dataset and
            # sets or calculates its standard deviation, squared standard
            # deviation and finally the normalized time-series.
            if fpath:
                try:
                    ilon = pylab.find(x == lon[i])[0]
                    fz = z[:, ilon]
                except:
                    continue
            else:
                fz = z[:, j, i]
            if fz.mask.all():
                continue
            if std['map']:
                try:
                    u = pylab.find(std['lon180'] == lon180[i])[0]
                    v = pylab.find(std['lat'] == lat[j])[0]
                except:
                    if debug:
                        warnings.warn('Unable to locate standard deviation '
                                      'for (%s, %s)' % (X, Y), Warning)
                    continue
                fstd = std['val'][v, u]
                estd = fstd - fz.std()
                if (estd < 0) & (abs(estd) > std['err']):
                    if debug:
                        warnings.warn('Discrepant input standard deviation '
                            '(%f) location (%.3f, %.3f) will be '
                            'disregarded.' % (estd, lon180[i], lat[j]))
                    continue
            else:
                fstd = fz.std()
            fstd2 = fstd ** 2
            std_map[j, i] = fstd
            zero[:, i] = fz
            fz = (fz - fz.mean()) / fstd

            # Calculates the distance of the current point to any special
            # location set in the 'loc' parameter. If only special locations
            # are to be analysed, then skips all other ones. If the input
            # array is one dimensional, then do the analysis anyway.
            if dim == 1:
                dist = numpy.asarray([0.])
            else:
                try:
                    dist = numpy.asarray([((item[0] - (lon180[i])) **
                        2 + (item[1] - lat[j]) ** 2) for item in loc])
                except:
                    dist = []
            if (dist > R2).all() & (loc != 'all') & onlyloc:
                continue

            # Determines the lag-1 autocorrelation coefficient to be used in
            # the significance test from the input parameter
            if alpha['calc']:
                ac = acorr(fz)
                alpha_ij = (ac[c + 1] + ac[c + 2] ** 0.5) / 2
            elif alpha['map']:
                try:
                    u = pylab.find(alpha['lon180'] == lon180[i])[0]
                    v = pylab.find(alpha['lat'] == lat[j])[0]
                    alpha_ij = alpha['val'][v, u]
                except:
                    if debug:
                        warnings.warn('Unable to locate standard deviation '
                            'for (%s, %s) using mean value instead' %
                            (X, Y), Warning)
                    alpha_ij = alpha['mean']
            else:
                alpha_ij = alpha['mean']

            # Calculates the continuous wavelet transform using the wavelet
            # Python module. Calculates the wavelet and Fourier power spectrum
            # and the periods in days. Also calculates the Fourier power
            # spectrum for the first and second halves of the timeseries.
            wave, scales, freqs, coi, fft, fftfreqs = wavelet.cwt(fz, dt, dj,
                s0, J, mother)
            power = abs(wave * wave.conj())
            fft_power = abs(fft * fft.conj())
            period = 1. / freqs
            fftperiod = 1. / fftfreqs
            psel = pylab.find(period <= pmax.max())

            # Calculates the Fourier transform for the first and the second
            # halves ot the time-series for later trend analysis.
            fft_1 = numpy.fft.fft(fz[fft_ia], fft_N)[1:fft_N/2] / fft_N ** 0.5
            fft_2 = numpy.fft.fft(fz[fft_ib], fft_N)[1:fft_N/2] / fft_N ** 0.5
            fft_p1 = abs(fft_1 * fft_1.conj())
            fft_p2 = abs(fft_2 * fft_2.conj())

            # Creates FFT return array and stores the spectrum accordingly
            try:
                fft_spectrum[:, j, i] = fft_power * fstd2
                fft_spectrum1[:, j, i] = fft_p1 * fstd2
                fft_spectrum2[:, j, i] = fft_p2 * fstd2
            except:
                fft_spectrum = (numpy.ma.empty([len(fft_power), b, a]) *
                    numpy.nan)
                fft_spectrum1 = (numpy.ma.empty([fft_N2, b, a]) *
                    numpy.nan)
                fft_spectrum2 = (numpy.ma.empty([fft_N2, b, a]) *
                    numpy.nan)
                #
                fft_spectrum[:, j, i] = fft_power * fstd2
                fft_spectrum1[:, j, i] = fft_p1 * fstd2
                fft_spectrum2[:, j, i] = fft_p2 * fstd2

            # Performs the significance test according to the article by
            # Torrence and Compo (1998). The wavelet power is significant
            # if the ratio power/sig95 is > 1.
            signif, fft_theor = wavelet.significance(1., dt, scales, 0,
                alpha_ij, significance_level=siglvl, wavelet=mother)
            sig95 = (signif * numpy.ones((c, 1))).transpose()
            sig95 = power / sig95

            # Calculates the global wavelet power spectrum and its
            # significance. The global wavelet spectrum is the average of the
            # wavelet power spectrum over time. The degrees of freedom (dof)
            # have to be corrected for padding at the edges.
            glbl_power = power.mean(axis=1)
            dof = c - scales
            glbl_signif, tmp = wavelet.significance(1., dt, scales, 1,
                alpha_ij, significance_level=siglvl, dof=dof, wavelet=mother)
            global_power[:, j, i] = glbl_power * fstd2

            # Calculates the average wavelet spectrum along the scales and its
            # significance according to Torrence and Compo (1998) eq. 24. The
            # scale_avg_full variable is used multiple times according to the
            # selected periods range.
            #
            # Also calculates the average Fourier power spectrum.
            Cdelta = mother.cdelta
            scale_avg_full = (scales * numpy.ones((c, 1))).transpose()
            scale_avg_full = power / scale_avg_full
            for k in range(C):
                if k == 0:
                    sel = pylab.find((period >= pmin[0]) &
                        (period <= pmax[-1]))
                    pminmax = [period[sel[0]], period[sel[-1]]]
                    les = pylab.find((fftperiod >= pmin[0]) &
                        (fftperiod <= pmax[-1]))
                    fminmax = [fftperiod[les[0]], fftperiod[les[-1]]]
                else:
                    sel = pylab.find((period >= pmin[k - 1]) &
                        (period < pmax[k - 1]))
                    pminmax = [pmin[k-1], pmax[k-1]]
                    les = pylab.find((fftperiod >= pmin[k - 1]) &
                        (fftperiod <= pmax[k - 1]))
                    fminmax = [fftperiod[les[0]], fftperiod[les[-1]]]

                scale_avg = numpy.ma.array((dj * dt / Cdelta *
                    scale_avg_full[sel, :].sum(axis=0)))
                scale_avg_signif, tmp = wavelet.significance(1., dt, scales,
                    2, alpha_ij, significance_level=siglvl,
                    dof=[scales[sel[0]], scales[sel[-1]]], wavelet=mother)
                scale_avg.mask = (scale_avg < scale_avg_signif)
                if mem_error:
                    avg_spectrum[k, :, i] = scale_avg
                else:
                    avg_spectrum[k, :, j, i] = scale_avg
                avg_spectrum_signif[k, j, i] = scale_avg_signif

                # Trend analysis using least square polynomial fit of one
                # degree of the original input data and scale averaged
                # wavelet power. The wavelet power trend is calculated only
                # where the cone of influence spans the highest analyzed
                # period. In the end, the returned value for the trend is in
                # units**2.
                #
                # Also calculates the trends in the Fourier power spectrum.
                # Note that the FFT power spectrum is already multiplied by
                # the signal's standard deviation.
                incoi = pylab.find(coi >= pmax[-1])
                if len(incoi) == 0:
                    incoi = numpy.arange(c)
                polyw = numpy.polyfit(t[incoi], scale_avg[incoi].data, 1)
                wavelet_trend[k, j, i] = polyw[0] * fstd2
                fft_trend[k, j, i] = (fft_spectrum2[les[les<fft_N2], j, i] -
                    fft_spectrum1[les[les<fft_N2], j, i]).mean() / fft_dt
                if k == 0:
                    polyz = numpy.polyfit(t, fz * fstd, 1)
                    trend[j, i] = polyz[0]

                # Plots the wavelet analysis results for the individual
                # series. The plot is only generated if the dimension of the
                # input variable z is one, if a special location is within a
                # range of the search radius R and if the show or save
                # parameters are set.
                if (show | (save != '')) & ((k in sel_periods)):
                    if (dist < R2).any() | (loc == 'all') | (dim == 1):
                        # There is an interesting spot within the search
                        # radius of location (%s, %s).' % (Y, X)
                        isloc = True
                        if (dist < R2).any():
                            try:
                                hloc.append(loc[(dist < R2)][0, 0])
                            except:
                                pass
                        if save:
                            try:
                                sv = '%s/tz_%s_%s_%d' % (save, prefix,
                                    common.num2latlon(lon[i], lat[j]), k)
                            except:
                                sv = '%s' % (save)
                        else:
                            sv = ''
                        graphics.wavelet_plot(tm, period[psel], fz,
                            power[psel, :], coi, glbl_power[psel],
                            scale_avg.data, fft=fft, fft_period=fftperiod,
                            power_signif=sig95[psel, :],
                            glbl_signif=glbl_signif[psel],
                            scale_signif=scale_avg_signif, pminmax=pminmax,
                            labels=labels, normalized=True, std=fstd,
                            ztrend=polyz, wtrend=polyw, show=show, save=sv,
                            levels=levels, cmap=cmap)

        # Saves and/or plots the intermediate results as zonal temporal
        # diagrams.
        if dsave:
            for k in range(C):
                if k == 0:
                    sv = '%s/%s/%s_%s.xt.gz' % (dsave, 'global', prefix,
                        common.num2latlon(lon[i], lat[j], mode='each')[0])
                else:
                    sv = '%s/%s/%s_%s.xt.gz' % (dsave, name[k - 1].lower(),
                        prefix,
                        common.num2latlon(lon[i], lat[j], mode='each')[0])
                if mem_error:
                    fm.save_map(lon, tm, avg_spectrum[k, :, :].data,
                        sv, lat[j])
                else:
                    fm.save_map(lon, tm, avg_spectrum[k, :, j, :].data,
                        sv, lat[j])

        if ((dim > 1) and (show or (save != '')) & (not onlyloc) and
                len(hloc) > 0):
            hloc = common.lon360(numpy.unique(hloc))
            if save:
                sv = '%s/xt_%s_%s' % (save, prefix,
                    common.num2latlon(lon[i], lat[j], mode='each')[0])
            else:
                sv = ''
            if mem_error:
                # To include overlapping original signal, use zz=zero
                gis.hovmoller(lon, tm, avg_spectrum[1:, :, :],
                    zo=avg_spectrum_signif[1:, j, :], title=title,
                    crange=crange, show=show, save=sv, labels=hlabels,
                    loc=hloc, cmap=cmap, bottom='avg', right='avg',
                    std=std_map[j, :])
            else:
                gis.hovmoller(lon, tm, avg_spectrum[1:, :, j, :],
                    zo=avg_spectrum_signif[1:, j, :], title=title,
                    crange=crange, show=show, save=sv, labels=hlabels,
                    loc=hloc, cmap=cmap, bottom='avg', right='avg',
                    std=std_map[j, :])

        # Flushing profiling text.
        stdout.write(len(s) * '\b')
        s = 'Spectral analysis of %d location%s (%s)... %s ' % (N, plural, Y,
            common.profiler(b, j + 1, 0, t1, t2))
        stdout.write(s)
        stdout.flush()

    stdout.write('\n')

    result['scale'] = scales
    result['period'] = period
    if dim == 1:
        result['power_spectrum'] = power * fstd2
        result['power_significance'] = sig95
        result['cwt'] = wave
        result['fft'] = fft
    result['global_power'] = global_power
    result['scale_spectrum'] = avg_spectrum
    if fpath:
        result['lon'] = lon
        result['lat'] = lat
    result['scale_significance'] = avg_spectrum_signif
    result['trend'] = trend
    result['wavelet_trend'] = wavelet_trend
    result['fft_power'] = fft_spectrum
    result['fft_first'] = fft_spectrum1
    result['fft_second'] = fft_spectrum2
    result['fft_period'] = fftperiod
    result['fft_trend'] = fft_trend
    return result

Example #11

Show file

File: mapping.py Project: arnaldorusso/klib

def map(lon, lat, z, z2=None, tm=None, projection='cyl', save='', ftype='png',
        crange=None, crange2=None, cmap=cm.GMT_no_green, show=False,
        shiftgrd=0., orientation='landscape', title='', label='', units='',
        subplot=None, adjustprops=None, loc=[], xlim=None, ylim=None,
        xstep=None, ystep=None, etopo=False, profile=True, hook=None, 
        **kwargs):
    """Generates maps.

    The maps can be either saved as image files or simply showed on
    screen.

    PARAMETERS
        lon, lat (array like) :
            Longitude and latitude arrays.
        z (array like) :
            Variable data array. For bi-dimensional MxN arrays, then a
            single map is plotted where M and N should have the same
            lengths as the latitude and the longitude respectively.

            For tri-dimensional TxMxN arrays, eather a sequence of maps
            is generated if T has the same length as tm or, in case tm
            is not set, T maps are plotted on the save figure.
        z2 (array like, optional) :
            Second variable to be plotted using simple line contours.
        t (array like, optional) :
            Time array. It should contain values in matplotlib date
            format (i.e. number of days since 0001-01-01 UTC).
        projection (text, optional) :
            Sets the map projection. Implemented projections are:
                cyl -- Equidistant cylindrical
                ortho -- Orthographic
                robin -- Robinson
                moll -- Mollweide
                eqdc -- Equidistant conic
                poly -- Polyconic
                omerc -- Oblique mercator
            Default is the equidistant cylindrical projection (cyl).
        save (string, optional) :
            The path in which the resulting plots are to be saved. If
            not set, then no images will be saved.
        ftype (string, optional) :
            The image file type. Most backends support png, pdf, ps,
            eps and svg.
        crange (array like, optional) :
            Sets the color range of the maps. If not given then the
            range is calculated from the input data.
        crange2 (array like, optional) :
            Sets the contour line interval.
        cmap (colormap, optional) :
            Sets the colormap to be used in the plots. The default is
            the Generic Mapping Tools (GMT) no green.
        show (boolean, optional) :
            If set to true the the resulting maps are explicitly shown
            on screen.
        shiftgrd (float, optional) :
            Shifts the longitude and variable data arrays east or west.
            Its value determines the starting longitude for the shifted
            grid.
            TODO: update functionality
        orientation (string, optional) :
            Sets the orientation of the figure. Allowed options are
            'landscape' (default), 'portrait', 'squared'.
        title (string, array like, optional) :
            Sets the map title. If array like, each element of the
            array becomes the title for each map. If the title is set
            to '%date%' then the ISO formated date is written.
        label (string, array like, optional) :
            Sets the label for each plot. If array like, each element
            of the array becomes the label for each plot.
        units (string, array like, optional) :
            Determines the units for all the maps of for each map
            sepparetely if a text array is given.
        subplot (array like, optional) :
            Two item list containing the number of rows and columns for
            subplots.
        adjustprops (dict, optional) :
            Dictionary containing the subplot parameters.
        loc (list, optional) :
            Lists the longitude of locations to be marked in map.
        xlim, ylim (array like, optional) :
            List containing the upper and lower zonal and meridional
            limits, respectivelly.
        xstep, ystep (float, optional) :
            Determines the parallel and meridian spacing.
        etopo (boolean, optional) :
            If true, overlays ETOPO contour lines on map.
        profile (boolean, optional) :
            Turns profiler on/off. If set to true (default) outputs the
            ETA and other information on screen.
        hook (function, optional) :
            Executes a hook function after the plot. The map instance
            is passed along as parameter.

    OUTPUT
        Map plots either on screen and or on file according to the
        specified parameters.

    RETURNS
        Nothing.

    """
    t1 = time()
    __init__()

    # Transforms input arrays in numpy arrays and numpy masked arrays.
    lat = numpy.asarray(lat)
    lon = numpy.asarray(lon)
    if type(tm).__name__ != 'NoneType':
        tm = numpy.asarray(tm)
    if type(z).__name__ != 'MaskedArray':
        z = numpy.ma.asarray(z)
        z.mask = numpy.isnan(z)

    # Determines the number of dimensions of the variable to be plotted and
    # the sizes of each dimension.
    dim = len(z.shape)
    if dim == 3:
        c, b, a = z.shape
    elif dim == 2:
        b, a = z.shape
        c = 1
        z = z.reshape(c, b, a)
    else:
        raise Warning, ('Map plots require either bi-dimensional or tri-'
                        'dimensional data.')
    if lon.size != a:
        raise Warning, 'Longitude and data lengths do not match.'
    if lat.size != b:
        raise Warning, 'Latitude and data lengths do not match.'
    #if type(tm).__name__ != 'NoneType':
    #    if tm.size != c:
    #        raise Warning, 'Time and data lengths do not match.'

    # Shifts the longitude and data grid if applicable and determines central
    # latitude and longitude for the map.
    lon180 = common.lon180(lon)
    if xlim == None:
        try:
            mask = ~z.mask.all(axis=0).all(axis=0)
            xlim = [lon180[mask].min(), lon180[mask].max()]
        except:
            xlim = [lon.min(), lon.max()]
    if ylim == None:
        try:
            mask = ~z.mask.all(axis=0).all(axis=1)
            ylim = [lat[mask].min(), lat[mask].max()]
        except:
            ylim = [lat.min(), lat.max()]
    lon0 = numpy.mean(xlim)
    lat0 = numpy.mean(ylim)
    if (shiftgrd != 0): # | (projection in ['ortho', 'robin', 'moll']):
        dx, dy = lon[1] - lon[0], lat[1] - lat[0]
        lon = lon180
        shift = pylab.find(pylab.diff(lon) < 0) + 1
        try:
          lon = numpy.roll(lon, -shift)
          z = numpy.roll(z, -shift)
        except:
          pass
        #z, lon = shiftgrid(shiftgrd, z, lon0)
        
        # Pad borders with NaN's to avoid distorsions
        #lon = numpy.concatenate([[lon[0] - dx], lon, [lon[-1] + dx]])
        #lat = numpy.concatenate([[lat[0] - dy], lat, [lat[-1] + dy]])
        #nan = numpy.ma.empty((c, 1, a)) * numpy.nan
        #nan.mask = True
        #z = numpy.ma.concatenate([nan, z, nan], axis=1)
        #nan = numpy.ma.empty((c, b+2, 1)) * numpy.nan
        #nan.mask = True
        #z = numpy.ma.concatenate([nan, z, nan], axis=2)
    
    # Loads topographic data, if appropriate.
    if etopo:
        ez = common.etopo.z
        ex = common.etopo.x
        ey = common.etopo.y
        er = -numpy.arange(1000, 12000, 1000)

    # Setting the color ranges
    if crange == None:
        cmajor, cminor, crange, cticks, extend = common.step(z,
            returnrange=True)
    else:
        crange = numpy.asarray(crange)
        cminor = numpy.diff(crange).mean()
        if crange.size > 11:
            cmajor = 2 * cminor
        if len(crange) < 15 :
            cticks = crange[::2]
        else:
            cticks = crange[::5]

        xmin, xmax = z.min(), z.max()
        rmin, rmax = crange.min(), crange.max()
        
        if (xmin < rmin) & (xmax > rmax):
            extend = 'both'
        elif (xmin < rmin) & (xmax <= rmax):
            extend = 'min'
        elif (xmin >= rmin) & (xmax > rmax):
            extend = 'max'
        elif (xmin >= rmin) & (xmax <= rmax):
            extend = 'neither'
        else:
            raise Warning, 'Unable to determine extend'
    if type(z2).__name__ != 'NoneType' and crange2 == None:
        cmajor2, cminor2, crange2, cticks2, extend2 = common.step(z2,
            returnrange=True)

    # Turning interactive mode on or off according to show parameter.
    if show == False:
        pylab.ioff()
    elif show == True:
        pylab.ion()
    else:
        raise Warning, 'Invalid show option.'

    # Sets the figure properties according to the orientation parameter and to
    # the data dimensions.
    if adjustprops == None:
        if projection in ['cyl', 'eqdc', 'poly', 'omerc', 'vandg', 'nsper']:
            adjustprops = dict(left=0.1, bottom=0.15, right=0.95, top=0.9,
                               wspace=0.05, hspace=0.5)
        else:
            adjustprops = dict(left=0.05, bottom=0.15, right=0.95, top=0.9,
                               wspace=0.05, hspace=0.2)

    # Sets the meridian and the parallel coordinates and necessary parameters
    # depending on the chosen projection.
    if xstep == None:
        xstep = int(common.step(xlim, 5, kind='polar')[0])
    if ystep == None:
        ystep = int(common.step(ylim, 3, kind='polar')[0])
    merid = numpy.arange(10 * int(min(xlim) / 10 - 2),
                         10 * int(max(xlim) / 10 + 3), xstep)
    if (max(ylim) - min(ylim)) > 130 | (projection in ['ortho', 'robin', 
        'moll']):
        #paral = numpy.array([-(66. + 33. / 60. + 38. / (60. * 60.)),
        #                     -(23. + 26. / 60. + 22. / (60. * 60.)), 0.,
        #                      (23. + 26. / 60. + 22. / (60. * 60.)),
        #                      (66. + 33. / 60. + 38. / (60. * 60.))])
        #paral = numpy.round(paral)
        paral = numpy.array([-60, -30, 0, 30, 60])
    else:
        paral = numpy.arange(numpy.floor(min(ylim) / ystep) * ystep,
                             numpy.ceil(max(ylim) / ystep) * ystep + ystep,
                             ystep)
    if projection == 'eqdc':
        if not (('lat_0' in kwargs.keys()) and ('lat_1' in kwargs.keys())):
            kwargs['lat_0'] = min(ylim) + (max(ylim) - min(ylim)) / 3.
            kwargs['lat_1'] = min(ylim) + 2 * (max(ylim) - min(ylim)) / 3.
        if not ('lon_0' in kwargs.keys()):
            kwargs['lon_0'] = lon0
    elif projection == 'poly':
        if not ('lat_0' in kwargs.keys()):
            kwargs['lat_0'] = (max(ylim) - min(ylim)) / 2.
        if not ('lon_0' in kwargs.keys()):
            kwargs['lon_0'] = lon0
    elif projection == 'omerc':
        if not (('lat_0' in kwargs.keys()) and ('lat_1' in kwargs.keys())):
            kwargs['lat_1'] = min(ylim) + (max(ylim) - min(ylim)) / 4.
            kwargs['lat_2'] = min(ylim) + 3 * (max(ylim) - min(ylim)) / 4.
        if not (('lon_0' in kwargs.keys()) and ('lon_1' in kwargs.keys())):
            kwargs['lon_1'] = min(xlim) + (max(ylim) - min(ylim)) / 4.
            kwargs['lon_2'] = min(xlim) + 3 * (max(ylim) - min(ylim)) / 4.
        kwargs['no_rot'] = False
    elif projection == 'vandg':
        kwargs['lon_0'] = lon0
    elif projection == 'nsper':
        kwargs['lon_0'] = lon0
        kwargs['lat_0'] = lat0
    elif projection in ['aea', 'lcc']:
        kwargs['lon_0'] = lon0
        kwargs['lat_0'] = (min(ylim) + max(ylim)) / 2.
        kwargs['lat_1'] = max(ylim) - (max(ylim) - min(ylim)) / 4.
        kwargs['lat_2'] = min(ylim) + (max(ylim) - min(ylim)) / 4.

    # Setting the subplot parameters in case multiple maps per figure.
    try:
        plrows, plcols = subplot
    except:
        if type(tm).__name__ in ['NoneType', 'float']:
            if orientation in ['landscape', 'worldmap']:
                plcols = min(3, c)
                plrows = numpy.ceil(float(c) / plcols)
            elif orientation == 'portrait':
                plrows = min(3, c)
                plcols = numpy.ceil(float(c) / plrows)
            elif orientation == 'squared':
                plrows = plcols = numpy.ceil(float(c) ** 0.5)
        else:
            plcols = plrows = 1

    bbox = dict(edgecolor='w', facecolor='w', alpha=0.9)

    # Starts the plotting routines
    if profile:
        if c == 1:
            plural = ''
        else:
            plural = 's'
        s = 'Plotting %d map%s... ' % (c, plural)
        stdout.write(s)
        stdout.flush()

    fig = graphics.figure(fp=dict(), ap=adjustprops, orientation=orientation)
    for n in range(c):
        t2 = time()
        if plcols * plrows > 1:
            ax = pylab.subplot(plrows, plcols, n + 1)
        else:
            fig.clear()
            ax = pylab.subplot(plcols, plrows, 1)
        
        if (projection in ['ortho', 'robin', 'moll']):
            m = Basemap(projection=projection, lat_0=lat0, lon_0=lon0, *kwargs)
            xoffset = (m.urcrnrx - m.llcrnrx) / 50.
        elif projection in ['aea', 'cyl', 'eqdc', 'poly', 'omerc', 'vandg', 
                            'nsper', 'lcc']:
            m = Basemap(projection=projection, llcrnrlat=min(ylim),
                        urcrnrlat=max(ylim), llcrnrlon=min(xlim),
                        urcrnrlon=max(xlim), **kwargs)
            xoffset = None
        else:
            raise Warning, 'Projection \'%s\' not implemented.' % (projection)

        x, y = m(*numpy.meshgrid(lon, lat))
        dat = z[n, :, :]
        
        # Set the merdians' and parallels' labels
        if plcols * plrows > 1:
            if (n % plcols) == 0:
                plabels =  [1, 0, 0, 0]
            else:
                plabels = [0, 0, 0, 0]
            if (n >= c - plcols):
                mlabels = [0, 0, 0, 1]
            else:
                mlabels = [0, 0, 0, 0]
        else:
            mlabels = [0, 0, 0, 1]
            plabels = [1, 0, 0, 0]
        if projection in ['ortho']:
            plabels = [0, 0, 0, 0]
        if projection in ['geos', 'ortho', 'aeqd', 'moll']:
            mlabels = [0, 0, 0, 0]

        # Plots locations
        for item in loc:
            m.scatter(item[0], item[1], s=24, c='w', marker='o', alpha=1, 
                      zorder=99)

        # Plot contour
        im = m.contourf(x, y, dat, crange, cmap=cmap, extend=extend, hold='on')

        if type(z2).__name__ != 'NoneType':
            dat2 = z2[n, :, :]
            im2 = m.contour(x, y, dat2, crange2, colors='k', hatch='x',
                hold='on', linewidths=numpy.linspace(0.25, 2., len(crange2)),
                alpha=0.6)
            #pylab.clabel(im2, fmt='%.1f')

        # Plot topography, if appropriate
        if etopo:
            xe, ye = m(*numpy.meshgrid(ex, ey))
            cs = m.contour(xe, ye, ez, er, colors='k', linestyles='-',
                alpha=0.3, hold='on')

        # Run hook function, if appropriate
        try:
            hook(m)
        except:
            pass

        m.drawcoastlines()
        m.fillcontinents()
        m.drawcountries()
        if projection != 'nsper':
            m.drawmapboundary(fill_color='white')
        m.drawmeridians(merid, linewidth=0.5, labels=mlabels)
        m.drawparallels(paral, linewidth=0.5, labels=plabels, xoffset=xoffset)
        
        # Draws colorbar
        if orientation == 'squared':
            cx = pylab.axes([0.25, 0.07, 0.5, 0.03])
        elif orientation  in ['landscape', 'worldmap']:
            cx = pylab.axes([0.2, 0.05, 0.6, 0.03])
        elif orientation == 'portrait':
            cx = pylab.axes([0.25, 0.05, 0.5, 0.02])
        pylab.colorbar(im, cax=cx, orientation='horizontal', ticks=cticks,
                       extend=extend)

        # Titles, units and other things
        ttl = None
        if title.__class__ == str:
            ttl = title
        else:
            try:
                ttl = title[n]
            except:
                pass
        if ttl:
            if ttl == '%date%':
                try:
                    ttl = dates.num2date(tm[n]).isoformat()[:10]
                except:
                    try:
                        ttl = dates.num2date(tm).isoformat()[:10]
                    except:
                        ttl = ''
                        pass
            ax.text(0.5, 1.05, ttl, ha='center', va='baseline',
                transform=ax.transAxes)
        
        lbl = None
        if label.__class__ == str:
            lbl = label
        else:
            try:
                lbl = label[n]
            except:
                pass
        if lbl:
            if lbl == '%date%':
                try: 
                    ttl = dates.num2date(tm[n]).isoformat()[:10]
                except:
                    try:
                        ttl = dates.num2date(tm).isoformat()[:10]
                    except:
                        ttl = ''
                        pass
            ax.text(0.04, 0.83, lbl, ha='left', va='bottom', 
                transform=ax.transAxes, bbox=bbox)

        unt = None
        if units.__class__ == str:
            unt = units
        else:
            try:
                unt = units[n]
            except:
                pass
        if unt:
            cx.text(1.05, 0.5, r'$\left[%s\right]$' % (unt), ha='left',
                va='center', transform=cx.transAxes)

        # Drawing and saving the figure if appropriate.
        pylab.draw()
        if save:
            if (c == 1) | (plcols * plrows > 1):
                pylab.savefig('%s.%s' % (save, ftype), dpi=150)
            else:
                pylab.savefig('%s%06d.%s' % (save, n+1, ftype), dpi=150)

        if profile:
            stdout.write(len(s) * '\b')
            s = 'Plotting %d map%s... %s ' % (c, plural, common.profiler(c, 
                n + 1, 0, t1, t2),)
            stdout.write(s)
            stdout.flush()

    #
    if profile:
        stdout.write('\n')
    if show == False:
        pylab.close(fig)
    else:
        return fig

Example #12

Show file

def bin_average(x,
                y,
                dx=1.,
                bins=None,
                nstd=2.,
                interpolate='bins',
                k=3,
                s=None,
                extrapolate='repeat',
                mode='mean',
                profile=False,
                usemask=True):
    """Calculates bin average from input data.

    Inside each bin, calculates the average and standard deviation, and
    selects only those values inside the confidence interval given in
    `nstd`. Finally calculates the bin average using spline
    interpolation at the middle points in each bin. Linearly
    extrapolates values outside of the data boundaries.

    Parameters
    ----------
    x : array like
        Input coordinate to be binned. It has to be 1-dimensional.
    y : array like
        The data input array.
    dx: float, optional
    bins : array like, optional
        Array of bins. It has to be 1-dimensional and strictly
        increasing.
    nstd : float, optional
        Confidence interval given as number of standard deviations.
    interpolate : string or boolean, optional
        Valid options are `bins` (default), `full` or `False` and
        defines whether to interpolate data to central bin points only
        in filled bins, over full time-series, or skip interpolation
        respectively.
    k : int, optional
        Specifies the order of the interpolation spline. Default is 3,
        `cubic`.
    s : float, optional
        Positive smoothing factor used to choose the number of knots.
    extrapolate : string, bool, optional
        Sets if averaging outside data boundaries should be
        extrapolated. If `True` or `linear`, extrapolates data linearly,
        if `repeat` (default) repeats values from nearest bin.
    mode : string, optional
        Sets averaging mode: `mean` (default), `median`.

    Returns
    -------
    bin_x : array like
        Coordinate at the center of the bins.
    bin_y : array like
        Interpolated array of bin averages.
    avg_x : array like
        Average coordinate in each bin.
    avg_y : array like
        Average values inside each bin.
    std_x : array like
        Coordinate standard deviation in each bin.
    std_y : array like
        Standard deviation in each bin.
    min_y : array like
        Minimum values in each bin.
    max_y : array like
        Maximum values in each bin.

    """
    t0 = time()
    # If no bins are given, calculate them from input data.
    if bins is None:
        x_min = floor(x.min() / dx) * dx
        x_max = 0.  # numpy.ceil(x.max() / dx) * dx
        bins = arange(x_min - dx, x_max + dx, dx) + dx / 2
    # Checks if bin array is strictly increasing.
    if not all(x < y for x, y in zip(bins, bins[1:])):
        raise ValueError('Bin array must be strictly increasing.')
    # Ensures that input coordinate `x` is monotonically increasing.
    _i = x.argsort()
    x = x[_i]
    y = y[_i]
    # Data types
    dtype_x = x.dtype
    dtype_y = y.dtype
    # Some variable initializations
    nbins = len(bins) - 1
    ndata = len(y)
    Sel = zeros(ndata, dtype=bool)
    # Initializes ouput arrays, masked or not.
    if usemask:
        bin_y = ma.empty(nbins, dtype=dtype_y) * nan
        avg_x = ma.empty(nbins, dtype=dtype_x) * nan
        avg_y = ma.empty(nbins, dtype=dtype_y) * nan
        std_x = ma.empty(nbins, dtype=dtype_x) * nan
        std_y = ma.empty(nbins, dtype=dtype_y) * nan
        min_y = ma.empty(nbins, dtype=dtype_y) * nan
        max_y = ma.empty(nbins, dtype=dtype_y) * nan
    else:
        bin_y = empty(nbins, dtype=dtype_y) * nan
        avg_x = empty(nbins, dtype=dtype_x) * nan
        avg_y = empty(nbins, dtype=dtype_y) * nan
        std_x = empty(nbins, dtype=dtype_x) * nan
        std_y = empty(nbins, dtype=dtype_y) * nan
        min_y = empty(nbins, dtype=dtype_y) * nan
        max_y = empty(nbins, dtype=dtype_y) * nan
    # Determines indices of the bins to which each data points belongs.
    bin_sel = digitize(x, bins) - 1
    bin_sel_unique = unique(bin_sel)
    _nbins = bin_sel_unique.size
    #
    t1 = time()
    for i, bin_i in enumerate(bin_sel_unique):
        if profile:
            # Erase line ANSI terminal string when using return feed
            # character.
            # (source:http://www.termsys.demon.co.uk/vtansi.htm#cursor)
            _s = '\x1b[2K\rBin-averaging... %s' % (common.profiler(
                _nbins, i, 0, t0, t1))
            stdout.write(_s)
            stdout.flush()
        # Ignores when data is not in valid range:
        if (bin_i < 0) | (bin_i > nbins):
            print 'Uhuuu (signal.py line 908)!!'
            continue
        # Calculate averages inside each bin in two steps: (i) calculate
        # average and standard deviation; (ii) consider only those values
        # within selected standard deviation range.
        sel = flatnonzero(bin_sel == bin_i)
        # Selects data within selected standard deviation or single
        # data in current bin.
        if sel.size > 1:
            _avg_y = y[sel].mean()
            _std_y = y[sel].std()
            if _std_y > 1e-10:
                _sel = ((y[sel] >= (_avg_y - nstd * _std_y)) &
                        (y[sel] <= (_avg_y + nstd * _std_y)))
                #print bin_i, sel.size, _avg_y, _std_y
                sel = sel[_sel]
                #print sel.size
            # Calculates final values
            if mode == 'mean':
                _avg_x = x[sel].mean()
                _avg_y = y[sel].mean()
            elif mode == 'median':
                _avg_x = median(x[sel])
                _avg_y = median(y[sel])
            else:
                raise ValueError('Invalid mode `{}`.'.format(mode))
            _std_x = x[sel].std()
            _std_y = y[sel].std()
            _min_y = y[sel].min()
            _max_y = y[sel].max()
        else:
            _avg_x, _avg_y = x[sel][0], y[sel][0]
            _std_x, _std_y = 0, 0
            _min_y, _max_y = nan, nan
        #
        #print i, sel_sum, _avg_x, _avg_y
        #
        avg_x[bin_i] = _avg_x
        avg_y[bin_i] = _avg_y
        std_x[bin_i] = _std_x
        std_y[bin_i] = _std_y
        min_y[bin_i] = _min_y
        max_y[bin_i] = _max_y
        #
        if profile:
            _s = '\rBin-averaging... %s' % (common.profiler(
                _nbins, i + 1, 0, t0, t1))
            stdout.write(_s)
            stdout.flush()

    # Interpolates selected data to central data point in bin using spline.
    # Only interpolates data in filled bins.
    if interpolate in ['bins', 'full']:
        sel = ~isnan(avg_y)
        bin_x = (bins[1:] + bins[:-1]) * 0.5
        if interpolate == 'bins':
            bin_y[sel] = _interpolate(bin_x[sel],
                                      avg_x[sel],
                                      avg_y[sel],
                                      k=k,
                                      s=s,
                                      outside=extrapolate)
        elif interpolate == 'full':
            bin_y = _interpolate(bin_x,
                                 avg_x[sel],
                                 avg_y[sel],
                                 k=k,
                                 s=s,
                                 outside=extrapolate)
    elif interpolate != False:
        raise ValueError(
            'Invalid interpolation mode `{}`.'.format(interpolate))

    # Masks invalid data.
    if usemask:
        bin_y = ma.masked_invalid(bin_y)
        avg_x = ma.masked_invalid(avg_x)
        avg_y = ma.masked_invalid(avg_y)
        std_x = ma.masked_invalid(std_x)
        std_y = ma.masked_invalid(std_y)
        min_y = ma.masked_invalid(min_y)
        max_y = ma.masked_invalid(max_y)

    if interpolate:
        return bin_x, bin_y, avg_x, avg_y, std_x, std_y, min_y, max_y
    else:
        return avg_x, avg_y, std_x, std_y, min_y, max_y