def readAndMask(url, variable, mask=None, cachePath=CachePath, hdfsPath=None): '''Read a variable from a netCDF or HDF file and return a numpy masked array. If the URL is remote or HDFS, first retrieve the file into a cache directory. ''' v = None if mask: variables = [variable, mask] else: variables = [variable] try: path = retrieveFile(url, cachePath, hdfsPath) except: print('readAndMask: Error, continuing without file %s' % url, file=sys.stderr) return v try: print('Reading variable %s from %s' % (variable, path), file=sys.stderr) var, fh = getVariables( path, variables, arrayOnly=True, set_auto_mask=True) # return dict of variable objects by name v = var[variable] # could be masked array if v.shape[0] == 1: v = v[0] # throw away trivial time dimension for CF-style files if VERBOSE: print('Variable range: %fs to %f' % (v.min(), v.max()), file=sys.stderr) close(fh) except: print('readAndMask: Error, cannot read variable %s from file %s' % (variable, path), file=sys.stderr) return v
def readCoordinates(path, coordinates=['lat', 'lon']): '''Read coordinate arrays from local netCDF file.''' var, fh = getVariables(path, coordinates, arrayOnly=True, set_auto_mask=True) close(fh) return [var[k] for k in coordinates]
def accumulate(urls, variable, accumulators=['count', 'mean', 'M2', 'min', 'max'], cachePath='~/cache'): '''Accumulate data into statistics accumulators like count, sum, sumsq, min, max, M3, M4, etc.''' keys, urls = urls accum = {} for i, url in enumerate(urls): try: path = retrieveFile(url, cachePath) except: print >> sys.stderr, 'accumulate: Error, continuing without file %s' % url continue try: print >> sys.stderr, 'Reading %s ...' % path var, fh = getVariables( path, [variable], arrayOnly=True, set_auto_mask=True) # return dict of variable objects by name v = var[variable] # could be masked array if v.shape[0] == 1: v = v[ 0] # throw away trivial time dimension for CF-style files close(fh) except: print >> sys.stderr, 'accumulate: Error, cannot read variable %s from file %s' % ( variable, path) continue if i == 0: for k in accumulators: if k == 'min': accum[k] = default_fillvals['f8'] * np.ones( v.shape, dtype=np.float64) elif k == 'max': accum[k] = -default_fillvals['f8'] * np.ones( v.shape, dtype=np.float64) elif k == 'count': accum[k] = np.zeros(v.shape, dtype=np.int64) else: accum[k] = np.zeros(v.shape, dtype=np.float64) if np.ma.isMaskedArray(v): if 'count' in accumulators: accum['count'] += ~v.mask if 'min' in accumulators: accum['min'] = np.ma.minimum(accum['min'], v) if 'max' in accumulators: accum['max'] = np.ma.maximum(accum['max'], v) v = np.ma.filled(v, 0.) else: if 'count' in accumulators: accum['count'] += 1 if 'min' in accumulators: accum['min'] = np.minimum(accum['min'], v) if 'max' in accumulators: accum['max'] = np.maximum(accum['max'], v) if 'mean' in accumulators: n = accum['count'] delta = v - accum[ 'mean'] # subtract running mean from new values, eliminate roundoff errors delta_n = delta / n accum['mean'] += delta_n if 'M2' in accumulators: term = delta * delta_n * (n - 1) accum['M2'] += term return (keys, accum)