Example #1
0
def readAndMask(url, variable, mask=None, cachePath=CachePath, hdfsPath=None):
    '''Read a variable from a netCDF or HDF file and return a numpy masked array.
If the URL is remote or HDFS, first retrieve the file into a cache directory.
    '''
    v = None
    if mask:
        variables = [variable, mask]
    else:
        variables = [variable]
    try:
        path = retrieveFile(url, cachePath, hdfsPath)
    except:
        print('readAndMask: Error, continuing without file %s' % url,
              file=sys.stderr)
        return v

    try:
        print('Reading variable %s from %s' % (variable, path),
              file=sys.stderr)
        var, fh = getVariables(
            path, variables, arrayOnly=True,
            set_auto_mask=True)  # return dict of variable objects by name
        v = var[variable]  # could be masked array
        if v.shape[0] == 1:
            v = v[0]  # throw away trivial time dimension for CF-style files
        if VERBOSE:
            print('Variable range: %fs to %f' % (v.min(), v.max()),
                  file=sys.stderr)
        close(fh)
    except:
        print('readAndMask: Error, cannot read variable %s from file %s' %
              (variable, path),
              file=sys.stderr)

    return v
Example #2
0
def readCoordinates(path, coordinates=['lat', 'lon']):
    '''Read coordinate arrays from local netCDF file.'''
    var, fh = getVariables(path,
                           coordinates,
                           arrayOnly=True,
                           set_auto_mask=True)
    close(fh)
    return [var[k] for k in coordinates]
Example #3
0
def accumulate(urls,
               variable,
               accumulators=['count', 'mean', 'M2', 'min', 'max'],
               cachePath='~/cache'):
    '''Accumulate data into statistics accumulators like count, sum, sumsq, min, max, M3, M4, etc.'''
    keys, urls = urls
    accum = {}
    for i, url in enumerate(urls):
        try:
            path = retrieveFile(url, cachePath)
        except:
            print >> sys.stderr, 'accumulate: Error, continuing without file %s' % url
            continue

        try:
            print >> sys.stderr, 'Reading %s ...' % path
            var, fh = getVariables(
                path, [variable], arrayOnly=True,
                set_auto_mask=True)  # return dict of variable objects by name
            v = var[variable]  # could be masked array
            if v.shape[0] == 1:
                v = v[
                    0]  # throw away trivial time dimension for CF-style files
            close(fh)
        except:
            print >> sys.stderr, 'accumulate: Error, cannot read variable %s from file %s' % (
                variable, path)
            continue

        if i == 0:
            for k in accumulators:
                if k == 'min':
                    accum[k] = default_fillvals['f8'] * np.ones(
                        v.shape, dtype=np.float64)
                elif k == 'max':
                    accum[k] = -default_fillvals['f8'] * np.ones(
                        v.shape, dtype=np.float64)
                elif k == 'count':
                    accum[k] = np.zeros(v.shape, dtype=np.int64)
                else:
                    accum[k] = np.zeros(v.shape, dtype=np.float64)

        if np.ma.isMaskedArray(v):
            if 'count' in accumulators:
                accum['count'] += ~v.mask
            if 'min' in accumulators:
                accum['min'] = np.ma.minimum(accum['min'], v)
            if 'max' in accumulators:
                accum['max'] = np.ma.maximum(accum['max'], v)

            v = np.ma.filled(v, 0.)
        else:
            if 'count' in accumulators:
                accum['count'] += 1
            if 'min' in accumulators:
                accum['min'] = np.minimum(accum['min'], v)
            if 'max' in accumulators:
                accum['max'] = np.maximum(accum['max'], v)

        if 'mean' in accumulators:
            n = accum['count']
            delta = v - accum[
                'mean']  # subtract running mean from new values, eliminate roundoff errors
            delta_n = delta / n
            accum['mean'] += delta_n
        if 'M2' in accumulators:
            term = delta * delta_n * (n - 1)
            accum['M2'] += term
    return (keys, accum)