def convolve_dim(f, convolve_def): convolve_parts = convolve_def.split(',') dimkey = convolve_parts.pop(0) mode = convolve_parts.pop(0) weights = np.array(convolve_parts, dtype='f') outf = PseudoNetCDFFile() from PseudoNetCDF.pncgen import Pseudo2NetCDF p2p = Pseudo2NetCDF(verbose=0) p2p.addGlobalProperties(f, outf) p2p.addDimensions(f, outf) dim = outf.dimensions[dimkey] dim = outf.createDimension( dimkey, len(np.convolve(weights, np.arange(len(dim)), mode=mode))) dim.setunlimited(f.dimensions[dimkey].isunlimited()) for vark, var in f.variables.items(): lconvolve = dimkey in var.dimensions p2p.addVariable(f, outf, vark, data=not lconvolve) if lconvolve: axisi = list(var.dimensions).index(dimkey) values = np.apply_along_axis( func1d=lambda x_: np.convolve(weights, x_, mode=mode), axis=axisi, arr=var[:]) if isinstance(var[:], np.ma.MaskedArray): values = np.ma.masked_invalid(values) outf.variables[vark][:] = values return outf
def pncbo(op, ifile1, ifile2, coordkeys = [], verbose = False): """ Perform binary operation (op) on all variables in ifile1 and ifile2. The returned file (rfile) contains the result rfile = ifile1 <op> ifile2 op can be any valid operator (e.g., +, -, /, *, **, &, ||) """ from PseudoNetCDF.sci_var import Pseudo2NetCDF # Copy infile1 to a temporary PseudoNetCDFFile p2p = Pseudo2NetCDF() p2p.verbose = verbose tmpfile = PseudoNetCDFFile() p2p.addGlobalProperties(ifile1, tmpfile) p2p.addDimensions(ifile1, tmpfile) # For each variable, assign the new value # to the tmpfile variables. for k in ifile1.variables.keys(): in1var = ifile1.variables[k] if k not in ifile2.variables.keys() or k in coordkeys: warn('%s not found in ifile2' % k) p2p.addVariable(ifile1, tmpfile, k) else: in2var = ifile2.variables[k] propd = dict([(ak, getattr(in1var, ak)) for ak in in1var.ncattrs()]) unit1 = getattr(in1var, 'units', 'unknown') unit2 = getattr(in2var, 'units', 'unknown') propd['units'] = '(%s) %s (%s)' % (unit1, op, unit2) outval = np.ma.masked_invalid(eval('in1var[...] %s in2var[...]' % op).view(np.ndarray)) outvar = tmpfile.createVariable(k, in1var.dtype.char, in1var.dimensions, fill_value = -999, values = outval) return tmpfile
def pncbfunc(func, ifile1, ifile2, coordkeys=[], verbose=0): """ Perform binary function (func) on all variables in ifile1 and ifile2. The returned file (rfile) contains the result rfile = ifile1 <op> ifile2 op can be any valid operator (e.g., +, -, /, *, **, &, ||) """ from PseudoNetCDF.sci_var import Pseudo2NetCDF # Copy infile1 to a temporary PseudoNetCDFFile p2p = Pseudo2NetCDF() p2p.verbose = verbose tmpfile = PseudoNetCDFFile() p2p.convert(ifile1, tmpfile) # For each variable, assign the new value # to the tmpfile variables. for k in tmpfile.variables.keys(): if k in coordkeys: continue outvar = tmpfile.variables[k] in1var = ifile1.variables[k] if k not in ifile2.variables.keys(): warn('%s not found in ifile2' % k) continue in2var = ifile2.variables[k] outval = np.ma.filled( np.ma.masked_invalid(func(in1var[...], in2var[...])), -999) if outvar.ndim > 0: outvar[:] = outval else: outvar.itemset(outval) outvar.fill_value = -999 return tmpfile
def __init__(self, ncffile, dimension, oldres, newres, repeat_method=repeat, condense_method=sum, nthick=0): from PseudoNetCDF.sci_var import Pseudo2NetCDF PseudoNetCDFFile.__init__(self) self.__dimension = array(dimension, ndmin=1) oldres = array(oldres, ndmin=1) newres = array(newres, ndmin=1) self.__mesh = newres / oldres.astype('f') self.__condense = condense_method self.__repeat = repeat_method self.__file = ncffile self.__nthick = nthick if not logical_or((self.__mesh % 1) == 0, (1. / self.__mesh) % 1 == 0).any(): raise ValueError("One resolution must be a factor of the other.") Pseudo2NetCDF().addDimensions(self.__file, self) any_non_time_key = [ k for k in self.__file.variables.keys() if 'TFLAG' not in k][0] for dk, dfactor in zip(self.__dimension, 1. / self.__mesh): dimo = self.dimensions[dk] ndimo = self.createDimension(str(dk), len(dimo) * dfactor) ndimo.setunlimited(dimo.isunlimited()) v = self.__file.variables[any_non_time_key] v = self.__method(v) self.variables = PseudoNetCDFVariables( self.__variables, self.__file.variables.keys())
def slice_dim(f, slicedef, fuzzydim=True): """ variables have dimensions (e.g., time, layer, lat, lon), which can be subset using: slice_dim(f, 'dim,start,stop,stride') e.g., slice_dim(f, 'layer,0,47,5') would sample every fifth layer starting at 0 """ inf = f historydef = "slice_dim(f, %s, fuzzydim = %s); " % (slicedef, fuzzydim) slicedef = slicedef.split(',') slicedef = [slicedef[0]] + list(map(eval, slicedef[1:])) if len(slicedef) == 2: slicedef.append(slicedef[-1] + 1) slicedef = (slicedef + [ None, ])[:4] dimkey, dmin, dmax, dstride = slicedef if dimkey not in inf.dimensions: warn('%s not in file' % dimkey) return inf unlimited = inf.dimensions[dimkey].isunlimited() if fuzzydim: partial_check = [ key for key in inf.dimensions if dimkey == key[:len(dimkey)] and key[len(dimkey):].isdigit() ] for dimk in partial_check: inf = slice_dim(inf, '%s,%s,%s,%s' % (dimk, dmin, dmax, dstride)) from PseudoNetCDF.sci_var import Pseudo2NetCDF p2p = Pseudo2NetCDF(verbose=0) outf = PseudoNetCDFFile() p2p.addDimensions(inf, outf) p2p.addGlobalProperties(inf, outf) for varkey in inf.variables.keys(): var = inf.variables[varkey] if dimkey not in var.dimensions: p2p.addVariable(inf, outf, varkey) else: axis = list(var.dimensions).index(dimkey) vout = var[...].swapaxes(0, axis)[dmin:dmax:dstride].swapaxes( 0, axis) newlen = vout.shape[axis] newdim = outf.createDimension(dimkey, newlen) newdim.setunlimited(unlimited) outf.variables[varkey] = vout history = getattr(outf, 'history', '') history += historydef setattr(outf, 'history', history) return outf
def __variables(self, k): if 'TFLAG' in k and (self.__axis != 0).any(): raise KeyError("Tflag is off limits") else: ov = self.__file.variables[k] v = self.__method(ov) Pseudo2NetCDF().addVariableProperties(ov, v) return v
def __variables(self, k): if 'TFLAG' in k: return self.__file.variables[k] ov = self.__file.variables[k] nv = ov[self.__idx] from PseudoNetCDF.sci_var import Pseudo2NetCDF Pseudo2NetCDF().addVariableProperties(nv, ov) return nv
def stack_files(fs, stackdim, coordkeys=None): """ Create files with dimensions extended by stacking. Currently, there is no sanity check... """ f = PseudoNetCDFFile() tmpf = fs[0] if coordkeys is None: coordkeys = tmpf.getCoords() dimensions = [f_.dimensions for f_ in fs] shareddims = {} for dimk, dim in tmpf.dimensions.items(): if dimk == stackdim: continue dimlens = map(len, [dims[dimk] for dims in dimensions]) if all([len(dim) == i for i in dimlens]): shareddims[dimk] = len(dim) differentdims = [ set(dims.keys()).difference(shareddims.keys()) for dims in dimensions ] assert (all([different == set([stackdim]) for different in differentdims])) from PseudoNetCDF.sci_var import Pseudo2NetCDF p2p = Pseudo2NetCDF(verbose=0) p2p.addDimensions(tmpf, f) f.createDimension(stackdim, sum([len(dims[stackdim]) for dims in dimensions])) p2p.addGlobalProperties(tmpf, f) for tmpf in fs: for varkey, var in tmpf.variables.items(): if stackdim not in var.dimensions: if varkey in f.variables: if varkey not in coordkeys: warn(('Got duplicate variables for %s ' % varkey) + 'without stackable dimension; first value ' + 'retained') else: p2p.addVariable(tmpf, f, varkey, data=True) else: if varkey not in f.variables.keys(): axisi = list(var.dimensions).index(stackdim) values = np.ma.concatenate( [f_.variables[varkey][:] for f_ in fs], axis=axisi) p2p.addVariable(tmpf, f, varkey, data=False) f.variables[varkey][:] = values return f
def pncfunc(func, ifile1, coordkeys=None, verbose=0): """ Perform function (func) on all variables in ifile1. The returned file (rfile) contains the result rfile = ifile1 <op> func can be a function or string """ from PseudoNetCDF.sci_var import Pseudo2NetCDF if coordkeys is None: coordkeys = ifile1.getCoords() # Copy infile1 to a temporary PseudoNetCDFFile p2p = Pseudo2NetCDF() p2p.verbose = verbose tmpfile = PseudoNetCDFFile() p2p.convert(ifile1, tmpfile) # For each variable, assign the new value # to the tmpfile variables. for k in tmpfile.variables.keys(): if k in coordkeys: continue outvar = tmpfile.variables[k] in1var = ifile1.variables[k] if not hasattr(func, '__call__'): if hasattr(in1var, func): outval = getattr(in1var, func)() elif '.' == func[:1]: outval = eval('in1var[:]' + func) else: outval = func(in1var[:]) outval = np.ma.filled(np.ma.masked_invalid(outval), -999) if outvar.ndim > 0: outvar[:] = outval else: outvar.itemset(outval) outvar.fill_value = -999 return tmpfile
def splitdim(inf, olddim, newdims, newshape): oldsize = len(inf.dimensions[olddim]) newsize = np.prod(newshape) if newsize != oldsize: raise ValueError( 'New shape, must match old dimension length: %d %d %s' % (oldsize, newsize, newshape)) if len(newdims) != len(newshape): raise ValueError('Shape and dimensions must match in length') from PseudoNetCDF.sci_var import Pseudo2NetCDF p2n = Pseudo2NetCDF() outf = PseudoNetCDFFile() for dk, d in inf.dimensions.items(): if dk == olddim: for dk, dl in zip(newdims, newshape): outf.createDimension(dk, dl) else: p2n.addDimension(inf, outf, dk) for vk, invar in inf.variables.items(): if olddim in invar.dimensions: outdims = [] outshape = [] for dk in invar.dimensions: if dk == olddim: outdims.extend(newdims) outshape.extend(newshape) else: outdims.append(dk) outshape.append(len(inf.dimensions[dk])) outvar = outf.createVariable(vk, invar.dtype.char, tuple(outdims)) p2n.addVariableProperties(invar, outvar) outvar[:] = invar[:].reshape(*outshape) else: p2n.addVariable(inf, outf, vk) return outf
elif args.verbose > 2: print(point, isin) varkeys = ['temperature', 'windDir', 'windSpeed', 'dewpoint', 'altimeter'] vardds = [k + 'DD' for k in varkeys] if args.verbose > 1: print('Subset variables') getvarkeys = varkeys + vardds + \ ['stationName', 'timeObs', 'timeNominal', 'elevation', 'latitude', 'longitude'] if args.verbose > 1: print('Slicing files') p2p = Pseudo2NetCDF(verbose=0) outfile = PseudoNetCDFFile() p2p.addDimensions(ncff, outfile) outfile.createDimension('recNum', len(found_point_ids)) p2p.addGlobalProperties(ncff, outfile) for vark in getvarkeys: p2p.addVariable(ncff, outfile, vark, data=False) for vark in getvarkeys: invar = ncff.variables[vark] outvar = outfile.variables[vark] recid = list(invar.dimensions).index('recNum') outvar[:] = invar[:].take(found_point_ids, recid) if args.humidity:
def reduce_dim( f, reducedef, fuzzydim=True, metakeys='time layer level latitude longitude time_bounds latitude_bounds longitude_bounds ROW COL LAY TFLAG ETFLAG' .split()): """ variable dimensions can be reduced using reduce_dim(file 'dim,function,weight') e.g., reduce_dim(layer,mean,weight). Weighting is not fully functional. """ inf = f metakeys = [k for k in metakeys if k in inf.variables.keys()] historydef = "reduce_dim(f, %s, fuzzydim = %s, metakeys = %s); " % ( reducedef, fuzzydim, metakeys) import numpy as np if hasattr(reducedef, 'split') and hasattr(reducedef, 'count'): commacount = reducedef.count(',') reducevals = reducedef.split(',') else: commacount = len(reducedef) reducevals = reducedef if commacount == 3: dimkey, func, numweightkey, denweightkey = reducevals numweight = inf.variables[numweightkey] denweight = inf.variables[denweightkey] elif commacount == 2: dimkey, func, numweightkey = reducevals numweight = inf.variables[numweightkey] denweightkey = None elif commacount == 1: dimkey, func = reducevals numweightkey = None denweightkey = None if fuzzydim: partial_check = [ key for key in inf.dimensions if dimkey == key[:len(dimkey)] and key[len(dimkey):].isdigit() ] for dimk in partial_check: if commacount == 1: inf = reduce_dim( inf, '%s,%s' % (dimk, func), ) elif commacount == 2: inf = reduce_dim( inf, '%s,%s,%s' % (dimk, func, numweightkey), ) elif commacount == 3: inf = reduce_dim( inf, '%s,%s,%s,%s' % (dimk, func, numweightkey, denweightkey), ) if dimkey not in inf.dimensions: warn('%s not in file' % dimkey) return inf from PseudoNetCDF.sci_var import Pseudo2NetCDF p2p = Pseudo2NetCDF(verbose=0) outf = PseudoNetCDFFile() p2p.addDimensions(inf, outf) del outf.dimensions[dimkey] p2p.addGlobalProperties(inf, outf) #unlimited = inf.dimensions[dimkey].isunlimited() #outf.createDimension(dimkey, 1) #if unlimited: # outf.dimensions[dimkey].setunlimited(True) for varkey in inf.variables.keys(): var = inf.variables[varkey] if dimkey not in var.dimensions: p2p.addVariable(inf, outf, varkey) continue axis = list(var.dimensions).index(dimkey) #def addunitydim(var): # return var[(slice(None),) * (axis + 1) + (None,)] vreshape = var[slice(None)] #vreshape = addunitydim(var) if not varkey in metakeys: if numweightkey is None: vout = _getfunc(vreshape, func)(axis=axis, keepdims=True) elif denweightkey is None: wvar = var * np.array( numweight, ndmin=var.ndim)[(slice(None), ) * axis + (slice(0, var.shape[axis]), )] vout = getattr(wvar[(slice(None), ) * (axis + 1) + (None, )], func)(axis=axis) vout.units = vout.units.strip( ) + ' * ' + numweight.units.strip() if hasattr(vout, 'base_units'): vout.base_units = vout.base_units.strip( ) + ' * ' + numweight.base_units.strip() else: nwvar = var * np.array( numweight, ndmin=var.ndim)[(slice(None), ) * axis + (slice(0, var.shape[axis]), )] vout = getattr(nwvar[(slice(None), ) * (axis + 1) + (None, )], func)(axis=axis) / getattr( np.array(denweight, ndmin=var.ndim)[ (slice(None), ) * axis + (slice(0, var.shape[axis]), None)], func)(axis=axis) else: if '_bounds' not in varkey and '_bnds' not in varkey: vout = _getfunc(vreshape, func)(axis=axis, keepdims=True) else: vout = _getfunc(vreshape, func)(axis=axis, keepdims=True) vmin = _getfunc(vreshape, 'min')(axis=axis, keepdims=True) vmax = _getfunc(vreshape, 'max')(axis=axis, keepdims=True) if 'lon' in varkey or 'time' in varkey: try: vout[..., [0, 3]] = vmin[..., [0, 3]] vout[..., [1, 2]] = vmax[..., [1, 2]] except: vout[..., [0, 1]] = vmin[0, 0], vmax[0, 1] elif 'lat' in varkey: nmin = vout.shape[-1] // 2 vout[..., :nmin] = vmin[..., :nmin] vout[..., nmin:] = vmax[..., nmin:] if dimkey not in outf.dimensions: outdim = outf.createDimension(dimkey, vout.shape[axis]) outdim.setunlimited(inf.dimensions[dimkey].isunlimited()) nvar = outf.variables[varkey] = PseudoNetCDFMaskedVariable( outf, varkey, var.dtype.char, var.dimensions, values=vout) for k in var.ncattrs(): setattr(nvar, k, getattr(var, k)) history = getattr(outf, 'history', '') history += historydef setattr(outf, 'history', history) return outf
def extract_lonlat(f, lonlat, unique=False, gridded=None, method='nn', passthrough=True): from PseudoNetCDF.sci_var import Pseudo2NetCDF try: from StringIO import StringIO as BytesIO except ImportError: from io import BytesIO import os outf = PseudoNetCDFFile() outf.dimensions = f.dimensions.copy() if hasattr(f, 'groups'): outf.groups = {} for grpk, grpv in f.groups.items(): outf.groups[grpk] = extract(grpv, lonlat) p2p = Pseudo2NetCDF() p2p.verbose = 0 p2p.addGlobalProperties(f, outf) longitude = f.variables['longitude'][:] latitude = f.variables['latitude'][:] if gridded is None: gridded = ('longitude' in f.dimensions and 'latitude' in f.dimensions) or \ ('COL' in f.dimensions and 'ROW' in f.dimensions) or \ ('x' in f.dimensions and 'y' in f.dimensions) if isinstance(lonlat, (str, )): lonlat = [lonlat] lonlatin = lonlat lonlatout = [] for ll in lonlat: if isinstance(ll, (str, )): try: if os.path.exists(ll): ll = open(ll, 'r').read().strip() except Exception as e: warn('Windows machines may have uncessary warnings; ' + str(e)) lonlatout.append(ll) lonlat = ('/'.join(lonlatout)) try: lons, lats = np.genfromtxt(BytesIO( bytes(lonlat.replace('/', '\n'), 'ASCII')), delimiter=',').T except Exception as e: print(str(e)) raise e outf.lonlatcoords = lonlat latlon1d = longitude.ndim == 1 and latitude.ndim == 1 if method == 'nn': if latlon1d and gridded: latitude = latitude[(slice(None), None, None)] longitude = longitude[(None, slice(None), None)] else: latitude = latitude[Ellipsis, None] longitude = longitude[Ellipsis, None] lonlatdims = latitude.ndim - 1 londists = longitude - lons[(None, ) * lonlatdims] latdists = latitude - lats[(None, ) * lonlatdims] totaldists = ((latdists**2 + londists**2)**.5) if latlon1d and not gridded: latidxs, = lonidxs, = np.unravel_index( totaldists.reshape(-1, latdists.shape[-1]).argmin(0), totaldists.shape[:-1]) else: latidxs, lonidxs = np.unravel_index( totaldists.reshape(-1, latdists.shape[-1]).argmin(0), totaldists.shape[:-1]) def extractfunc(v, thiscoords): newslice = tuple([{ 'latitude': latidxs, 'longitude': lonidxs, 'points': latidxs, 'PERIM': latidxs }.get(d, slice(None)) for d in thiscoords]) if newslice == (): return v else: return v[:][newslice] elif method == 'KDTree': if latlon1d and gridded: longitude, latitude = np.meshgrid(longitude, latitude) from scipy.spatial import KDTree tree = KDTree(np.ma.array([latitude.ravel(), longitude.ravel()]).T) dists, idxs = tree.query(np.ma.array([lats, lons]).T) if latlon1d and not gridded: latidxs, = lonidxs, = np.unravel_index(idxs, latitude.shape) else: latidxs, lonidxs = np.unravel_index(idxs, latitude.shape) def extractfunc(v, thiscoords): newslice = tuple([{ 'latitude': latidxs, 'longitude': lonidxs, 'points': latidxs, 'PERIM': latidxs }.get(d, slice(None)) for d in thiscoords]) return v[newslice] elif method in ('linear', 'cubic'): from scipy.interpolate import LinearNDInterpolator, CloughTocher2DInterpolator if method == 'cubic': interpclass = CloughTocher2DInterpolator else: interpclass = LinearNDInterpolator if latlon1d and gridded: longitude, latitude = np.meshgrid(longitude, latitude) points = np.array([longitude.ravel(), latitude.ravel()]).T def extractfunc(v, thiscoords): if not 'latitude' in thiscoords or not 'longitude' in thiscoords: return v newshape = [ dl if d not in ('latitude', 'longitude') else -1 for di, (d, dl) in enumerate(zip(thiscoords, v.shape)) ] i1 = newshape.index(-1) if newshape.count(-1) > 1: i2 = newshape.index(-1, i1 + 1) assert (i1 == (i2 - 1)) newshape.pop(i2) i2df = interpclass(points, np.rollaxis(v.reshape(*newshape), i1, 0)) out = np.rollaxis( np.ma.array([i2df(lon, lat) for lat, lon in zip(lats, lons)]), 0, len(newshape)) return out latidxs = extractfunc(latitude, ('latitude', 'longitude')) elif method in ('cubic', 'quintic'): from scipy.interpolate import interp2d if latlon1d and gridded: longitude, latitude = np.meshgrid(longitude, latitude) def extractfunc(v, thiscoords): i2df = interp2d(latitude, longitude, v, kind=method) return np.ma.array( [i2df(lat, lon) for lat, lon in zip(lats, lons)]) latidxs = extractfunc(latitude, '') else: raise ValueError('method must be: nn, KDTree') if unique: tmpx = OrderedDict() for lon, lat, lonlatstr in zip(lonidxs, latidxs, outf.lonlatcoords.split('/')): if (lon, lat) not in tmpx: tmpx[(lon, lat)] = lonlatstr lonidxs, latidxs = np.array(tmpx.keys()).T outf.lonlatcoords_orig = outf.lonlatcoords outf.lonlatcoords = '/'.join([tmpx[k] for k in zip(lonidxs, latidxs)]) for k, v in f.variables.items(): try: coords = v.coordinates.split() except: coords = v.dimensions dims = v.dimensions outf.createDimension('points', len(latidxs)) if passthrough or 'longitude' in coords or 'latitude' in coords: try: del outf.variables[k] except: pass newdims = [] if len(dims) != len(coords): thiscoords = dims else: thiscoords = coords for d, c in zip(dims, thiscoords): if d not in ('longitude', 'latitude') and c not in ('longitude', 'latitude'): newdims.append(d) else: if 'points' not in newdims: newdims.append('points') newdims = tuple(newdims) newv = extractfunc(v, thiscoords) propd = dict([(ak, getattr(v, ak)) for ak in v.ncattrs()]) nv = outf.createVariable(k, v.dtype.char, newdims, values=newv, **propd) setattr(nv, 'coordinates', getattr(v, 'coordinates', ' '.join(coords))) for di, dk in enumerate(newdims): if dk not in outf.dimensions: outf.createDimension(dk, nv.shape[di]) return outf