def setUp(self): ''' create two test variables ''' # create axis and variable instances (make *copies* of data and attributes!) x1 = np.linspace(0,10,15); x2 = np.linspace(2,8,18); if self.ldatetime: start_datetime, end_datetime = pd.to_datetime('1981-05-01'), pd.to_datetime('1981-05-16') t1 = np.arange(start_datetime, end_datetime, dtype='datetime64[D]') xax1 = Axis(name='Time1-Axis', units='X Time', coord=t1) t2 = np.arange(start_datetime, end_datetime+np.timedelta64(3, 'D'), dtype='datetime64[D]') xax2 = Axis(name='Time2-Axis', units='X Time', coord=t2) else: xax1 = Axis(name='X1-Axis', units='X Units', coord=x1) xax2 = Axis(name='X2-Axis', units='X Units', coord=x2) var0 = Variable(axes=(xax1,), data=np.sin(x1), atts=dict(name='relative', units='')) var1 = Variable(axes=(xax1,), data=x1.copy(), atts=dict(name='blue', units='units')) self.var0 = var0; self.var1 = var1; self.xax1 = xax1 var2 = Variable(name='purple',units='units',axes=(xax2,), data=(x2**2)/5.) self.var2 = var2; self.xax2 = xax2 # create error variables with random noise noise1 = np.random.rand(len(xax1))*var1.data_array.std()/2. err1 = Variable(axes=(xax1,), data=noise1, atts=dict(name='blue_std', units='units')) noise2 = np.random.rand(len(xax2))*var2.data_array.std()/2. err2 = Variable(name='purple',units='units',axes=(xax2,), data=noise2) self.err1 = err1; self.err2 = err2 # add to list self.vars = [var1, var2] self.errs = [err1, err2] self.axes = [xax1, xax2]
def setUp(self): ''' create two test variables ''' # create axis and variable instances (make *copies* of data and attributes!) x1 = np.random.randn(180) xax1 = Axis(name='X1-Axis', units='X Units', length=len(x1)) var1 = Variable(axes=(xax1, ), data=x1.copy(), atts=dict(name='blue', units='units')) self.var1 = var1 self.xax1 = xax1 x2 = np.random.randn(180) xax2 = Axis(name='X2-Axis', units='X Units', length=len(x2)) var2 = Variable(name='purple', units='units', axes=(xax2, ), data=x2) self.var2 = var2 self.xax2 = xax2 # actual normal distribution self.dist = 'norm' distvar = VarRV(name=self.dist, units='units', dist=self.dist, params=(0, 1)) self.distVar = distvar # add to list self.vars = [var1, var2] self.axes = [xax1, xax2]
def setUp(self): ''' create a 2D test variable ''' # create axis and variable instances (make *copies* of data and attributes!) xax = Axis(name='X-Axis', units='X Units', coord=np.linspace(0,10,15)) yax = Axis(name='Y-Axis', units='Y Units', coord=np.linspace(2,8,18)) xx,yy = np.meshgrid(yax[:],xax[:],) # create mesh (transposed w.r.t. values) var0 = Variable(axes=(xax,yax), data=np.sin(xx)*np.cos(yy), atts=dict(name='Color', units='Color Units')) var1 = Variable(axes=(xax,yax), data=np.cos(xx)*np.sin(yy), atts=dict(name='Contour', units='Contour Units')) self.var0 = var0; self.var1 = var1; self.xax = xax; self.yax = yax # add to list self.axes = [xax, yax] self.vars = [var0, var1]
def setUp(self): ''' create a reference and two test variables for Taylor plot''' self.thetamin = 0.; self.Rmin = 0.; self.thetamax = np.pi/2.; self.Rmax = 2. # create axis and variable instances (make *copies* of data and attributes!) self.x1 = np.linspace(0,10,11); self.xax1 = Axis(name='X1-Axis', units='X Units', coord=self.x1) self.data0 = np.sin(self.x1) self.var0 = Variable(axes=(self.xax1,), data=self.data0, atts=dict(name='Reference', units='units')) # create error variables with random noise self.data1 = self.data0 + ( np.random.rand(len(self.xax1))-0.5 )*0.5 self.var1 = Variable(axes=(self.xax1,), data=self.data1, atts=dict(name='Blue', units='units')) self.data2 = self.data0 + ( np.random.rand(len(self.xax1))-0.5 )*1.5 self.var2 = Variable(axes=(self.xax1,), data=self.data2, atts=dict(name='Red', units='units')) self.data3 = 1. + np.random.rand(len(self.xax1))*1.5 self.var3 = Variable(axes=(self.xax1,), data=self.data3, atts=dict(name='Random', units='units')) # add to list self.vars = [self.var0, self.var1, self.var2, self.var3] self.data = [self.data0, self.data1, self.data2, self.data3] self.axes = [self.xax1,]
def testIrregularSurfacePlot(self): ''' test a color/surface plot with irregular coordiante variables ''' fig,ax = getFigAx(1, name=sys._getframe().f_code.co_name[4:], **figargs) # use test method name as title assert fig.__class__.__name__ == 'MyFigure' assert fig.axes_class.__name__ == 'MyAxes' assert not isinstance(ax,(list,tuple)) # should return a "naked" axes var0 = self.var0 # create coordiante variables xax,yax = var0.axes xx,yy = np.meshgrid(xax[:],yax[:], indexing='ij') xax = Variable(name='X Coordinate', units='X Units', data=xx, axes=var0.axes) yax = Variable(name='Y Coordinate', units='Y Units', data=yy, axes=var0.axes) # create plot plt = ax.surfacePlot(var0, flipxy=False, clog=False, xax=xax, yax=yax, llabel=True, lprint=True, clim=var0.limits(),) assert plt # add label ax.addLabel(label=0, loc=4, lstroke=False, lalphabet=True, size=None, prop=None)
def setUp(self): ''' create two test variables ''' # define plot ranges self.thetamin = 0.; self.Rmin = 0.; self.thetamax = 2*np.pi; self.Rmax = 2. # create theta axis and variable instances (values are radius values, I believe) theta1 = np.linspace(self.thetamin,self.thetamax,361) thax1 = Axis(atts=dict(name='$\\theta$-Axis', units='Radians'), coord=theta1) var0 = Variable(axes=(thax1,), data=np.sin(theta1), atts=dict(name='Blue', units='units')) tmp = theta1.copy()*(self.Rmax-self.Rmin)/(self.thetamax-self.thetamin) var1 = Variable(axes=(thax1,), data=tmp, atts=dict(name='Red', units='units')) self.var0 = var0; self.var1 = var1; self.xax1 = theta1 # create error variables with random noise noise0 = np.random.rand(len(thax1))*var0.data_array.std()/2. err0 = Variable(axes=(thax1,), data=noise0, atts=dict(name='Blue Noise', units='units')) noise1 = np.random.rand(len(thax1))*var1.data_array.std()/2. err1 = Variable(axes=(thax1,), data=noise1, atts=dict(name='Red Noise', units='units')) self.err1 = err1; self.err0 = err0 # add to list self.vars = [var0, var1] self.errs = [err0, err1] self.axes = [thax1,]
def addLengthAndNamesOfMonth(dataset, noleap=False, length=None, names=None): ''' Function to add the names and length of month to a NetCDF dataset. ''' if not isinstance(dataset,Dataset): raise TypeError # attributes lenatts = dict(name='length_of_month', units='days',long_name='Length of Month') stratts = dict(name='name_of_month', units='', long_name='Name of the Month') # data if length is None: # leap year or no leap year if noleap: length = days_per_month_365 else: length = days_per_month if names is None: names = name_of_month # create variables if isinstance(dataset, DatasetNetCDF) and 'w' in dataset.mode: dataset.addVariable(Variable(axes=(dataset.time,), data=length, atts=lenatts), asNC=True) dataset.addVariable(Variable(axes=(dataset.time,), data=names, atts=stratts), asNC=True) else: # N.B.: char/string arrays are currently not supported as Variables dataset.addVariable(Variable(axes=(dataset.time,), data=length, atts=lenatts)) dataset.addVariable(Variable(axes=(dataset.time,), data=names, atts=stratts)) # return length variable return dataset.variables[lenatts['name']]
def setUp(self): ''' create two test variables ''' # create axis and variable instances (make *copies* of data and attributes!) x1 = np.linspace(0, 10, 11) xax1 = Axis(name='X1-Axis', units='X Units', coord=x1) var0 = Variable(axes=(xax1, ), data=np.sin(x1), atts=dict(name='relative', units='')) var1 = Variable(axes=(xax1, ), data=x1.copy(), atts=dict(name='blue', units='units')) self.var0 = var0 self.var1 = var1 self.xax1 = xax1 x2 = np.linspace(2, 8, 13) xax2 = Axis(name='X2-Axis', units='X Units', coord=x2) var2 = Variable(name='purple', units='units', axes=(xax2, ), data=(x2**2) / 5.) self.var2 = var2 self.xax2 = xax2 # create error variables with random noise noise1 = np.random.rand(len(xax1)) * var1.data_array.std() / 2. err1 = Variable(axes=(xax1, ), data=noise1, atts=dict(name='blue_std', units='units')) noise2 = np.random.rand(len(xax2)) * var2.data_array.std() / 2. err2 = Variable(name='purple', units='units', axes=(xax2, ), data=noise2) self.err1 = err1 self.err2 = err2 # add to list self.vars = [var1, var2] self.errs = [err1, err2] self.axes = [xax1, xax2]
def addLandMask(dataset, varname='precip', maskname='landmask', atts=None): ''' Add a landmask variable with meta data from a masked variable to a dataset. ''' # check if not isinstance(dataset,Dataset): raise TypeError if dataset.hasVariable(maskname): raise DatasetError, "The Dataset '%s' already has a field called '%s'."%(dataset.name,maskname) # attributes and meta data if atts is None: atts = default_varatts[maskname].copy() atts['long_name'] = 'Geographic Mask for Climatology Fields' atts['description'] = 'data are valid where this mask is zero' # axes and data var = dataset.variables[varname] axes = var.axes[-2:] # last two axes (i.e. map axes) data = var.getMask().__getitem__((0,)*(var.ndim-2)+(slice(None),)*2) if 'gdal' in dataset.__dict__ and dataset.gdal: if dataset.xlon not in axes or dataset.ylat not in axes: raise AxisError if not all([ax.name in ('x','y','lon','lat') for ax in axes]): raise AxisError # create variable and add to dataset if isinstance(dataset, DatasetNetCDF) and 'w' in dataset.mode: dataset.addVariable(Variable(axes=axes, name=maskname, data=data, atts=atts), asNC=True) else: dataset.addVariable(Variable(axes=axes, name=maskname, data=data, atts=atts)) # return mask variable return dataset.variables[maskname]
def computeNetRadiation(dataset, asVar=True, lA=True, lrad=True, name='netrad'): ''' function to compute net radiation at surface for Penman-Monteith equation (http://www.fao.org/docrep/x0490e/x0490e07.htm#radiation) ''' if lrad and 'SWDNB' in dataset and 'LWDNB' in dataset and 'SWUPB' in dataset and 'LWUPB' in dataset: data = radiation(dataset['SWDNB'][:], dataset['LWDNB'][:], dataset['SWUPB'][:], dataset['LWUPB'][:]) # downward total net radiation elif 'SWD' in dataset and 'GLW' in dataset and 'e' in dataset: if not lA: A = 0.23 # reference Albedo for grass elif lA and 'A' in dataset: A = dataset['A'][:] else: raise VariableError, "Actual Albedo is not available for radiation calculation." if 'TSmin' in dataset and 'TSmax' in dataset: Ts = dataset['TSmin'][:] TSmax = dataset['TSmax'][:] elif 'TSmean' in dataset: Ts = dataset['TSmean'][:] TSmax = None elif 'Ts' in dataset: Ts = dataset['Ts'][:] TSmax = None else: raise VariableError, "Either 'Ts' or 'TSmean' are required to compute net radiation for PET calculation." data = radiation_black(A, dataset['SWD'][:], dataset['GLW'][:], dataset['e'][:], Ts, TSmax) # downward total net radiation else: raise VariableError, "Cannot determine net radiation calculation." # cast as Variable if asVar: var = Variable(data=data, name=name, units='W/m^2', axes=dataset['SWD'].axes) else: var = data # return new variable return var
def computeVaporDeficit(dataset): ''' function to compute water vapor deficit for Penman-Monteith PET (http://www.fao.org/docrep/x0490e/x0490e07.htm#air%20humidity) ''' if 'Q2' in dataset: ea = dataset['Q2'][:] # actual vapor pressure elif 'q2' in dataset and 'ps' in dataset: # water vapor mixing ratio ea = dataset['q2'][:] * dataset['ps'][:] * 28.96 / 18.02 else: raise VariableError, "Cannot determine 2m water vapor pressure for PET calculation." # get saturation water vapor if 'Tmin' in dataset and 'Tmax' in dataset: es = e_sat(dataset['Tmin'][:], dataset['Tmax'][:]) # else: Es = e_sat(T) # backup, but not very accurate else: raise VariableError, "'Tmin' and 'Tmax' are required to compute saturation water vapor pressure for PET calculation." var = Variable(data=es - ea, name='vapdef', units='Pa', axes=dataset['Tmin'].axes) # return new variable return var
def loadGPCC_LTM(name=dataset_name, varlist=None, resolution='025', varatts=ltmvaratts, filelist=None, folder=ltmfolder): ''' Get a properly formatted dataset the monthly accumulated GPCC precipitation climatology. ''' # prepare input if resolution not in ('025', '05', '10', '25'): raise DatasetError, "Selected resolution '%s' is not available!" % resolution # translate varlist if varlist is None: varlist = varatts.keys() if varlist and varatts: varlist = translateVarNames(varlist, varatts) # load variables separately if 'p' in varlist: dataset = DatasetNetCDF(name=name, folder=folder, filelist=['normals_v2011_%s.nc' % resolution], varlist=['p'], varatts=varatts, ncformat='NETCDF4_CLASSIC') if 's' in varlist: gauges = nc.Dataset(folder + 'normals_gauges_v2011_%s.nc' % resolution, mode='r', format='NETCDF4_CLASSIC') stations = Variable(data=gauges.variables['p'][0, :, :], axes=(dataset.lat, dataset.lon), **varatts['s']) # consolidate dataset dataset.addVariable(stations, asNC=False, copy=True) dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder) # N.B.: projection should be auto-detected as geographic # return formatted dataset return dataset
def loadKister_StnTS(station=None, well=None, folder=None, varlist='default', varatts=None, name='observations', title=None, basin=None, start_date=None, end_date=None, sampling=None, period=None, date_range=None, llastIncl=True, WSC_station=None, basin_list=None, filenames=None, time_axis='datetime', scalefactors=None, metadata=None, lkgs=False, ntime=None, **kwargs): ''' load EnKF ensemble data as formatted GeoPy Dataset ''' if folder and not os.path.exists(folder): raise IOError(folder) # default values if isinstance(varlist, str) and varlist == 'default': varlist = [] if station: varlist += ['discharge'] if well: varlist += ['head'] if varatts is None: varatts = variable_attributes.copy() # figure out time axis if date_range: start_date, end_date, sampling = date_range time = timeAxis(start_date=start_date, end_date=end_date, sampling=sampling, date_range=date_range, time_axis=time_axis, llastIncl=llastIncl, ntime=ntime, varatts=varatts) ntime = len(time) # load WSC station meta data pass # initialize Dataset dataset = Dataset(name=name, title=title if title else name.title(), atts=metadata) # load well data if 'head' in varlist: if not well: raise ArgumentError if folder: filepath = os.path.join(folder, well) # default output folder else: filepath = station data = readKister(filepath=filepath, period=(start_date, end_date), resample=sampling, lvalues=True) assert ntime == len(data), data.shape atts = varatts['head'] dataset += Variable(atts=atts, data=data, axes=(time, )) # load discharge/hydrograph data if 'discharge' in varlist: if not station: raise ArgumentError if folder: filepath = os.path.join(folder, station) # default output folder else: filepath = station data = readKister(filepath=filepath, period=(start_date, end_date), resample=sampling, lvalues=True) assert ntime == len(data), data.shape atts = varatts['discharge'] if lkgs: data *= 1000. if atts['units'] == 'm^3/s': atts['units'] = 'kg/s' dataset += Variable(atts=atts, data=data, axes=(time, )) # return formatted Dataset if scalefactors is not None and scalefactors != 1: raise NotImplementedError return dataset
def loadEnKF_StnTS(folder=None, varlist='all', varatts=None, name='enkf', title='EnKF', basin=None, start_date=None, end_date=None, sampling=None, period=None, date_range=None, llastIncl=True, WSC_station=None, basin_list=None, filenames=None, prefix=None, time_axis='datetime', scalefactors=None, metadata=None, lkgs=False, out_dir='out/', yaml_file='../input_data/obs_meta.yaml', lYAML=True, nreal=None, ntime=None, **kwargs): ''' load EnKF ensemble data as formatted GeoPy Dataset ''' out_folder = os.path.join(folder, 'out/') # default output folder if not os.path.exists(out_folder): raise IOError(out_folder) # default values if isinstance(varlist, str) and varlist == 'hydro': varlist = Hydro.varlist elif isinstance(varlist, str) and varlist == 'obs': varlist = Obs.varlist elif isinstance(varlist, str) and varlist == 'all': varlist = Hydro.varlist + Obs.varlist elif not isinstance(varlist, (tuple, list)): raise TypeError(varlist) if varatts is None: varatts = variable_attributes.copy() varmap = { varatt['name']: enkf_name for enkf_name, varatt in list(varatts.items()) } varlist = [varmap[var] for var in varlist] # load WSC station meta data pass # initialize Dataset dataset = Dataset(name=name, title=title if title else name.title(), atts=metadata) ensemble = None time = None observation = None # load observation/innovation data if any([var in Obs.atts for var in varlist]): # load data vardata = loadObs(varlist=[var for var in varlist if var in Obs.atts], folder=out_folder, lpandas=False) ntime, nobs, nreal = list(vardata.values())[0].shape # create Axes if time is None: # figure out time axis time = timeAxis(start_date=start_date, end_date=end_date, sampling=sampling, date_range=date_range, time_axis=time_axis, llastIncl=llastIncl, ntime=ntime, varatts=varatts) elif len(time) != ntime: raise AxisError(time) if ensemble is None: # construct ensemble axis ensemble = Axis(atts=varatts['ensemble'], coord=np.arange(1, nreal + 1)) elif len(ensemble) != nreal: raise AxisError(ensemble) if observation is None: # construct ensemble axis observation = Axis(atts=varatts['observation'], coord=np.arange(1, nobs + 1)) elif len(observation) != nobs: raise AxisError(observation) # create variables for varname, data in list(vardata.items()): dataset += Variable(atts=varatts[varname], data=data, axes=(time, observation, ensemble)) # load YAML data, if available if lYAML: # load YAML file yaml_path = os.path.join(out_folder, yaml_file) if not os.path.exists(yaml_path): raise IOError(yaml_path) with open(yaml_path, 'r') as yf: obs_meta = yaml.load(yf) if obs_meta is None: raise IOError(yaml_path) # not a YAML file? # constant create variables for cvar, cval in list(obs_meta[0].items()): if isinstance(cval, str): dtype, missing = np.string_, '' elif isinstance(cval, (np.integer, int)): dtype, missing = np.int_, 0 elif isinstance(cval, (np.inexact, float)): dtype, missing = np.float_, np.NaN else: dtype = None # skip if dtype: data = np.asarray([ missing if obs[cvar] is None else obs[cvar] for obs in obs_meta ], dtype=dtype) if cvar in varatts: atts = varatts[cvar] else: atts = dict(name=cvar, units='') dataset += Variable(atts=atts, data=data, axes=(observation, )) elif ntime is None: # try to infer time dimension from backup.info file backup_info = os.path.join(folder, 'backup.info') if os.path.exists(backup_info): with open(backup_info, 'r') as bf: ntime = int(bf.readline()) # load discharge/hydrograph data if 'discharge' in varlist: data = loadHydro(folder=out_folder, nreal=nreal, ntime=ntime) ntime, nreal = data.shape if time is None: # figure out time axis time = timeAxis(start_date=start_date, end_date=end_date, sampling=sampling, date_range=date_range, time_axis=time_axis, llastIncl=llastIncl, ntime=ntime, varatts=varatts) elif len(time) != ntime: raise AxisError(time) if ensemble is None: # construct ensemble axis ensemble = Axis(atts=varatts['ensemble'], coord=np.arange(1, nreal + 1)) elif len(ensemble) != nreal: raise AxisError(ensemble) atts = varatts['discharge'] if lkgs: data *= 1000. if atts['units'] == 'm^3/s': atts['units'] = 'kg/s' dataset += Variable(atts=atts, data=data, axes=(time, ensemble)) # return formatted Dataset if scalefactors is not None and scalefactors != 1: raise NotImplementedError return dataset
def computeClimatology(experiment, filetype, domain, periods=None, offset=0, griddef=None, varlist=None, ldebug=False, loverwrite=False, lparallel=False, pidstr='', logger=None): ''' worker function to compute climatologies for given file parameters. ''' # input type checks if not isinstance(experiment, Exp): raise TypeError if not isinstance(filetype, basestring): raise TypeError if not isinstance(domain, (np.integer, int)): raise TypeError if periods is not None and not (isinstance(periods, (tuple, list)) and isInt(periods)): raise TypeError if not isinstance(offset, (np.integer, int)): raise TypeError if not isinstance(loverwrite, (bool, np.bool)): raise TypeError if griddef is not None and not isinstance(griddef, GridDefinition): raise TypeError #if pidstr == '[proc01]': raise TypeError # to test error handling # load source dataset_name = experiment.name fileclass = fileclasses[filetype] # used for target file name tsfile = fileclass.tsfile.format(domain, '') expfolder = experiment.avgfolder filepath = '{:s}/{:s}'.format(expfolder, tsfile) logger.info('\n\n{0:s} *** Processing Experiment {1:<15s} *** '. format(pidstr, "'{:s}'".format(dataset_name)) + '\n{0:s} *** {1:^37s} *** \n'.format( pidstr, "'{:s}'".format(tsfile))) # check file and read begin/enddates if not os.path.exists(filepath): #raise IOError, "Source file '{:s}' does not exist!".format(filepath) # print message and skip skipmsg = "\n{:s} >>> File '{:s}' in dataset '{:s}' is missing --- skipping!".format( pidstr, tsfile, dataset_name) skipmsg += "\n{:s} >>> ('{:s}')\n".format(pidstr, filepath) logger.warning(skipmsg) # N.B.: this can cause a lot of error messages, when not all files are present else: # if monthly source file exists import netCDF4 as nc ncfile = nc.Dataset(filepath, mode='r') begintuple = ncfile.begin_date.split('-') endtuple = ncfile.end_date.split('-') ncfile.close() # N.B.: at this point we don't want to initialize a full GDAL-enabled dataset, since we don't even # know if we need it, and it creates a lot of overhead # determine age of source file if not loverwrite: sourceage = datetime.fromtimestamp(os.path.getmtime(filepath)) # figure out start date filebegin = int(begintuple[0]) # first element is the year fileend = int(endtuple[0]) # first element is the year begindate = offset + filebegin if not (filebegin <= begindate <= fileend): raise DateError # handle cases where the first month in the record is not January firstmonth = int(begintuple[1]) # second element is the month shift = firstmonth - 1 # will be zero for January (01) ## loop over periods if periods is None: periods = [begindate - fileend] # periods.sort(reverse=True) # reverse, so that largest chunk is done first source = None # will later be assigned to the source dataset for period in periods: # figure out period enddate = begindate + period if filebegin > enddate: raise DateError, 'End date earlier than begin date.' if enddate - 1 > fileend: # if filebegin is 1979 and the simulation is 10 years, fileend will be 1988, not 1989! # if end date is not available, skip period endmsg = "\n{:s} --- Invalid Period for '{:s}': End Date {:4d} not in File! --- \n".format( pidstr, dataset_name, enddate) endmsg += "{:s} --- ('{:s}')\n".format(pidstr, filepath) logger.info(endmsg) else: ## perform averaging for selected period # determine if sink file already exists, and what to do about it periodstr = '{0:4d}-{1:4d}'.format(begindate, enddate) gridstr = '' if griddef is None or griddef.name is 'WRF' else '_' + griddef.name filename = fileclass.climfile.format(domain, gridstr, '_' + periodstr) if ldebug: filename = 'test_' + filename if lparallel: tmppfx = 'tmp_wrfavg_{:s}_'.format(pidstr[1:-1]) else: tmppfx = 'tmp_wrfavg_'.format(pidstr[1:-1]) tmpfilename = tmppfx + filename assert os.path.exists(expfolder) filepath = expfolder + filename tmpfilepath = expfolder + tmpfilename lskip = False # else just go ahead if os.path.exists(filepath): if not loverwrite: age = datetime.fromtimestamp( os.path.getmtime(filepath)) # if sink file is newer than source file, skip (do not recompute) if age > sourceage and os.path.getsize(filepath) > 1e6: lskip = True # N.B.: NetCDF files smaller than 1MB are usually incomplete header fragments from a previous crash #print sourceage, age if not lskip: os.remove(filepath) # depending on last modification time of file or overwrite setting, start computation, or skip if lskip: # print message skipmsg = "\n{:s} >>> Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format( pidstr, filename, dataset_name) skipmsg += "\n{:s} >>> ('{:s}')\n".format( pidstr, filepath) logger.info(skipmsg) else: if griddef is None: lregrid = False else: lregrid = True ## begin actual computation beginmsg = "\n{:s} <<< Computing '{:s}' (d{:02d}) Climatology from {:s}".format( pidstr, dataset_name, domain, periodstr) if not lregrid: beginmsg += " >>> \n" else: beginmsg += " ('{:s}' grid) >>> \n".format( griddef.name) logger.info(beginmsg) ## actually load datasets if source is None: source = loadWRF_TS( experiment=experiment, filetypes=[filetype], domains=domain) # comes out as a tuple... if not lparallel and ldebug: logger.info('\n' + str(source) + '\n') # prepare sink if os.path.exists(tmpfilepath): os.remove(tmpfilepath) # remove old temp files sink = DatasetNetCDF(name='WRF Climatology', folder=expfolder, filelist=[tmpfilename], atts=source.atts.copy(), mode='w') sink.atts.period = periodstr # if lregrid: addGDALtoDataset(sink, griddef=griddef) # initialize processing CPU = CentralProcessingUnit( source, sink, varlist=varlist, tmp=lregrid, feedback=ldebug) # no need for lat/lon # start processing climatology if shift != 0: logger.info( '{0:s} (shifting climatology by {1:d} month, to start with January) \n' .format(pidstr, shift)) CPU.Climatology(period=period, offset=offset, shift=shift, flush=False) # N.B.: immediate flushing should not be necessary for climatologies, since they are much smaller! # reproject and resample (regrid) dataset if lregrid: CPU.Regrid(griddef=griddef, flush=True) logger.info('{:s} --- {:s} --- \n'.format( pidstr, griddef.name)) logger.debug('{:s} --- {:s} --- \n'.format( pidstr, str(griddef))) # sync temporary storage with output dataset (sink) CPU.sync(flush=True) # add Geopotential Height Variance if 'GHT_Var' in sink and 'Z_var' not in sink: data_array = (sink['GHT_Var'].data_array - sink['Z'].data_array**2)**0.5 atts = dict( name='Z_var', units='m', long_name= 'Square Root of Geopotential Height Variance') sink += Variable(axes=sink['Z'].axes, data=data_array, atts=atts) # add (relative) Vorticity Variance if 'Vorticity_Var' in sink and 'zeta_var' not in sink: data_array = (sink['Vorticity_Var'].data_array - sink['zeta'].data_array**2)**0.5 atts = dict( name='zeta_var', units='1/s', long_name= 'Square Root of Relative Vorticity Variance') sink += Variable(axes=sink['zeta'].axes, data=data_array, atts=atts) # add names and length of months sink.axisAnnotation('name_of_month', name_of_month, 'time', atts=dict( name='name_of_month', units='', long_name='Name of the Month')) if not sink.hasVariable('length_of_month'): sink += Variable(name='length_of_month', units='days', axes=(sink.time, ), data=days_per_month, atts=dict( name='length_of_month', units='days', long_name='Length of Month')) # close... and write results to file sink.sync() sink.close() writemsg = "\n{:s} >>> Writing to file '{:s}' in dataset {:s}".format( pidstr, filename, dataset_name) writemsg += "\n{:s} >>> ('{:s}')\n".format( pidstr, filepath) logger.info(writemsg) # rename file to proper name if os.path.exists(filepath): os.remove(filepath) # remove old file os.rename(tmpfilepath, filepath) # this will overwrite the old file # print dataset if not lparallel and ldebug: logger.info('\n' + str(sink) + '\n') # clean up (not sure if this is necessary, but there seems to be a memory leak... del sink, CPU gc.collect() # get rid of these guys immediately # clean up and return if source is not None: source.unload() del source # N.B.: source is only loaded once for all periods # N.B.: garbage is collected in multi-processing wrapper as well # return return 0 # so far, there is no measure of success, hence, if there is no crash...
asNC=True, copy=True, deepcopy=True) # add names and length of months sink.axisAnnotation('name_of_month', name_of_month, 'time', atts=dict(name='name_of_month', units='', long_name='Name of the Month')) if not sink.hasVariable('length_of_month'): sink += Variable(name='length_of_month', units='days', axes=(sink.time, ), data=days_per_month, atts=dict(name='length_of_month', units='days', long_name='Length of Month')) # apply higher resolution mask if griddef is not None: sink.mask(sink.landmask, maskSelf=False, varlist=None, skiplist=['prismmask', 'lon2d', 'lat2d'], invert=False, merge=True) # finalize changes sink.sync()
def rasterVariable(name=None, units=None, axes=None, atts=None, plot=None, dtype=None, projection=None, griddef=None, file_pattern=None, lgzip=None, lgdal=True, lmask=True, fillValue=None, lskipMissing=True, path_params=None, offset=0, scalefactor=1, transform=None, time_axis=None, lfeedback=False, **kwargs): ''' function to read multi-dimensional raster data and construct a GDAL-enabled Variable object ''' # print status if lfeedback: print "Loading variable '{}': ".format(name), # no newline ## figure out axes arguments and load data # figure out axes (list/tuple of axes has to be ordered correctly!) axes_list = [ax.name for ax in axes[:-2]] # N.B.: the last two axes are the two horizontal map axes (x&y); they can be None and will be inferred from raster # N.B.: coordinate values can be overridden with keyword arguments, but length must be consistent # figure out coordinates for axes for ax in axes[:-2]: if ax.name in kwargs: # just make sure the dimensions match, but use keyword argument if not len(kwargs[ax.name]) == len(ax): raise AxisError( "Length of Variable axis and raster file dimension have to be equal." ) else: # use Axis coordinates and add to kwargs for readRasterArray call kwargs[ax.name] = tuple(ax.coord) # load raster data if lfeedback: print("'{}'".format(file_pattern)) data, geotransform = readRasterArray(file_pattern, lgzip=lgzip, lgdal=lgdal, dtype=dtype, lmask=lmask, fillValue=fillValue, lgeotransform=True, axes=axes_list, lna=False, lskipMissing=lskipMissing, path_params=path_params, lfeedback=lfeedback, **kwargs) # shift and rescale if offset != 0: data += offset if scalefactor != 1: data *= scalefactor ## create Variable object and add GDAL # check map axes and generate if necessary xlon, ylat = getAxes( geotransform, xlen=data.shape[-1], ylen=data.shape[-2], projected=griddef.isProjected if griddef else bool(projection)) axes = list(axes) if axes[-1] is None: axes[-1] = xlon elif len(axes[-1]) != len(xlon): raise AxisError(axes[-1]) if axes[-2] is None: axes[-2] = ylat elif len(axes[-2]) != len(ylat): raise AxisError(axes[-2]) # create regular Variable with data in memory var = Variable(name=name, units=units, axes=axes, data=data, dtype=dtype, mask=None, fillValue=fillValue, atts=atts, plot=plot) # apply transform (if any), now that we have axes etc. if transform is not None: var = transform(var=var, time_axis=time_axis) # add GDAL functionality if griddef is not None: # perform some consistency checks ... if projection is None: projection = griddef.projection elif projection != griddef.projection: raise ArgumentError( "Conflicting projection and GridDef!\n {} != {}".format( projection, griddef.projection)) if not np.isclose(geotransform, griddef.geotransform).all(): raise ArgumentError( "Conflicting geotransform (from raster) and GridDef!\n {} != {}" .format(geotransform, griddef.geotransform)) # ... and use provided geotransform (due to issues with numerical precision, this is usually better) geotransform = griddef.geotransform # if we don't pass the geotransform explicitly, it will be recomputed from the axes # add GDAL functionality var = addGDALtoVar(var, griddef=griddef, projection=projection, geotransform=geotransform, gridfolder=None) # return final, GDAL-enabled variable return var
# add meta data meta_dicts = [ loadMetadata(well, conservation_authority=conservation_authority) for well in wells ] for key in meta_dicts[0].keys(): if key in varatts: atts = varatts[key] elif key.lower() in varatts: atts = varatts[key.lower()] else: atts = dict(name=key, units='') if atts['units']: data = np.asarray([wmd[key] for wmd in meta_dicts], dtype=np.float64) else: data = np.asarray([wmd[key] for wmd in meta_dicts]) try: dataset += Variable(data=data, axes=(well_ax, ), **atts) except: pass # add names dataset += Variable(data=wells, axes=(well_ax, ), name='well_name', units='', atts=dict(long_name='Short Well Name')) for varname in ('d_piezo', 'well_name', 'depth'): print('') print((dataset[varname])) print((dataset[varname][:])) # add well heads data = np.zeros(( len(well_ax),
# add landmask print ' === landmask === ' tmpatts = dict( name='landmask', units='', long_name='Landmask for Climatology Fields', description='where this mask is non-zero, no data is available') # find a masked variable for var in sink.variables.itervalues(): if var.masked and var.gdal: mask = var.getMapMask() break # add variable to dataset sink.addVariable(Variable(name='landmask', units='', axes=(sink.lat, sink.lon), data=mask, atts=tmpatts), asNC=True) sink.mask(sink.landmask) # add names and length of months sink.axisAnnotation('name_of_month', name_of_month, 'time', atts=dict(name='name_of_month', units='', long_name='Name of the Month')) #print ' === month === ' sink.addVariable(Variable(name='length_of_month', units='days', axes=(sink.time, ),
def computePotEvapPM(dataset, lterms=True, lmeans=False): ''' function to compute potential evapotranspiration (according to Penman-Monteith method: https://en.wikipedia.org/wiki/Penman%E2%80%93Monteith_equation, http://www.fao.org/docrep/x0490e/x0490e06.htm#formulation%20of%20the%20penman%20monteith%20equation) ''' # get net radiation at surface if 'netrad' in dataset: Rn = dataset['netrad'][:] # net radiation if 'Rn' in dataset: Rn = dataset['Rn'][:] # alias else: Rn = computeNetRadiation(dataset, asVar=False) # try to compute # heat flux in and out of the ground if 'grdflx' in dataset: G = dataset['grdflx'][:] # heat release by the soil else: raise VariableError, "Cannot determine soil heat flux for PET calculation." # get wind speed if 'U2' in dataset: u2 = dataset['U2'][:] elif lmeans and 'U10' in dataset: u2 = wind(dataset['U10'][:], z=10) elif 'u10' in dataset and 'v10' in dataset: u2 = wind(u=dataset['u10'][:], v=dataset['v10'][:], z=10) else: raise VariableError, "Cannot determine 2m wind speed for PET calculation." # get psychrometric variables if 'ps' in dataset: p = dataset['ps'][:] else: raise VariableError, "Cannot determine surface air pressure for PET calculation." g = gamma(p) # psychrometric constant (pressure-dependent) if 'Q2' in dataset: ea = dataset['Q2'][:] elif 'q2' in dataset: ea = dataset['q2'][:] * dataset['ps'][:] * 28.96 / 18.02 else: raise VariableError, "Cannot determine 2m water vapor pressure for PET calculation." # get temperature if lmeans and 'Tmean' in dataset: T = dataset['Tmean'][:] elif 'T2' in dataset: T = dataset['T2'][:] else: raise VariableError, "Cannot determine 2m mean temperature for PET calculation." # get saturation water vapor if 'Tmin' in dataset and 'Tmax' in dataset: es = e_sat(dataset['Tmin'][:], dataset['Tmax'][:]) # else: Es = e_sat(T) # backup, but not very accurate else: raise VariableError, "'Tmin' and 'Tmax' are required to compute saturation water vapor pressure for PET calculation." D = Delta(T) # slope of saturation vapor pressure w.r.t. temperature # compute potential evapotranspiration according to Penman-Monteith method # (http://www.fao.org/docrep/x0490e/x0490e06.htm#fao%20penman%20monteith%20equation) if lterms: Dgu = evaluate( '( D + g * (1 + 0.34 * u2) ) * 86400') # common denominator rad = evaluate('0.0352512 * D * (Rn + G) / Dgu') # radiation term wnd = evaluate( 'g * u2 * (es - ea) * 0.9 / T / Dgu') # wind term (vapor deficit) pet = evaluate( '( 0.0352512 * D * (Rn + G) + ( g * u2 * (es - ea) * 0.9 / T ) ) / ( D + g * (1 + 0.34 * u2) ) / 86400' ) import numpy as np assert np.allclose(pet, rad + wnd, equal_nan=True) rad = Variable(data=rad, name='petrad', units='kg/m^2/s', axes=dataset['ps'].axes) wnd = Variable(data=wnd, name='petwnd', units='kg/m^2/s', axes=dataset['ps'].axes) else: pet = evaluate( '( 0.0352512 * D * (Rn + G) + ( g * u2 * (es - ea) * 0.9 / T ) ) / ( D + g * (1 + 0.34 * u2) ) / 86400' ) # N.B.: units have been converted to SI (mm/day -> 1/86400 kg/m^2/s, kPa -> 1000 Pa, and Celsius to K) pet = Variable(data=pet, name='pet', units='kg/m^2/s', axes=dataset['ps'].axes) assert 'waterflx' not in dataset or pet.units == dataset[ 'waterflx'].units, pet # return new variable(s) return (pet, rad, wnd) if lterms else pet
def loadHGS_StnTS(station=None, varlist=None, varatts=None, folder=None, name=None, title=None, start_date=None, end_date=None, run_period=15, period=None, lskipNaN=False, lcheckComplete=True, basin=None, WSC_station=None, basin_list=None, filename=None, prefix=None, scalefactors=None, **kwargs): ''' Get a properly formatted WRF dataset with monthly time-series at station locations; as in the hgsrun module, the capitalized kwargs can be used to construct folders and/or names ''' if folder is None or ( filename is None and station is None ): raise ArgumentError # try to find meta data for gage station from WSC HGS_station = station if basin is not None and basin_list is not None: station_name = station station = getGageStation(basin=basin, station=station if WSC_station is None else WSC_station, basin_list=basin_list) # only works with registered basins if station_name is None: station_name = station.name # backup, in case we don't have a HGS station name metadata = station.getMetaData() # load station meta data if metadata is None: raise GageStationError(name) else: metadata = dict(); station = None; station_name = None # prepare name expansion arguments (all capitalized) expargs = dict(ROOT_FOLDER=root_folder, STATION=HGS_station, NAME=name, TITLE=title, PREFIX=prefix, BASIN=basin, WSC_STATION=WSC_station) for key,value in metadata.items(): if isinstance(value,basestring): expargs['WSC_'+key.upper()] = value # in particular, this includes WSC_ID if 'WSC_ID' in expargs: if expargs['WSC_ID'][0] == '0': expargs['WSC_ID0'] = expargs['WSC_ID'][1:] else: raise DatasetError('Expected leading zero in WSC station ID: {}'.format(expargs['WSC_ID'])) # exparg preset keys will get overwritten if capitalized versions are defined for key,value in kwargs.items(): KEY = key.upper() # we only use capitalized keywords, and non-capitalized keywords are only used/converted if KEY == key or KEY not in kwargs: expargs[KEY] = value # if no capitalized version is defined # read folder and infer prefix, if necessary folder = folder.format(**expargs) if not os.path.exists(folder): raise IOError(folder) if expargs['PREFIX'] is None: with open('{}/{}'.format(folder,prefix_file), 'r') as pfx: expargs['PREFIX'] = prefix = ''.join(pfx.readlines()).strip() # now assemble file name for station timeseries filename = filename.format(**expargs) filepath = '{}/{}'.format(folder,filename) if not os.path.exists(filepath): IOError(filepath) if station_name is None: station_name = filename[filename.index('hydrograph.')+1:-4] if station is None else station # set meta data (and allow keyword expansion of name and title) metadata['problem'] = prefix metadata['station_name'] = metadata.get('long_name', station_name) if name is not None: name = name.format(**expargs) # name expansion with capitalized keyword arguments else: name = 'HGS_{:s}'.format(station_name) metadata['name'] = name; expargs['Name'] = name.title() # name in title format if title is None: title = '{{Name:s}} (HGS, {problem:s})'.format(**metadata) title = title.format(**expargs) # name expansion with capitalized keyword arguments metadata['long_name'] = metadata['title'] = title # now determine start data for date_parser if end_date is None: if start_date and run_period: end_date = start_date + run_period elif period: end_date = period[1] else: raise ArgumentError("Need to specify either 'start_date' & 'run_period' or 'period' to infer 'end_date'.") end_year,end_month,end_day = convertDate(end_date) if start_date is None: if end_date and run_period: start_date = end_date - run_period elif period: start_date = period[0] else: raise ArgumentError("Need to specify either 'end_date' & 'run_period' or 'period' to infer 'start_date'.") start_year,start_month,start_day = convertDate(start_date) if start_day != 1 or end_day != 1: raise NotImplementedError('Currently only monthly data is supported.') # import functools # date_parser = functools.partial(date_parser, year=start_year, month=start_month, day=start_day) # # now load data using pandas ascii reader # data_frame = pd.read_table(filepath, sep='\s+', header=2, dtype=np.float64, index_col=['time'], # date_parser=date_parser, names=ascii_varlist) # # resample to monthly data # data_frame = data_frame.resample(resampling).agg(np.mean) # data = data_frame[flowvar].values # parse header if varlist is None: varlist = variable_list[:] # default list with open(filepath, 'r') as f: line = f.readline(); lline = line.lower() # 1st line if not "hydrograph" in lline: raise GageStationError(line,filepath) # parse variables and determine columns line = f.readline(); lline = line.lower() # 2nd line if not "variables" in lline: raise GageStationError(line) variable_order = [v.strip('"').lower() for v in line[line.find('"'):].strip().split(',')] # figure out varlist and data columns if variable_order[0] == 'time': del variable_order[0] # only keep variables else: raise GageStationError(variable_order) variable_order = [hgs_variables[v] for v in variable_order] # replace HGS names with GeoPy names vardict = {v:i+1 for i,v in enumerate(variable_order)} # column mapping; +1 because time was removed variable_order = [v for v in variable_order if v in varlist or flow_to_flux[v] in varlist] usecols = tuple(vardict[v] for v in variable_order) # variable columns that need to loaded (except time, which is col 0) assert 0 not in usecols, usecols # load data as tab separated values data = np.genfromtxt(filepath, dtype=np.float64, delimiter=None, skip_header=3, usecols = (0,)+usecols) assert data.shape[1] == len(usecols)+1, data.shape if lskipNaN: data = data[np.isnan(data).sum(axis=1)==0,:] elif np.any( np.isnan(data) ): raise DataError("Missing values (NaN) encountered in hydrograph file; use 'lskipNaN' to ignore.\n('{:s}')".format(filepath)) time_series = data[:,0]; flow_data = data[:,1:] assert flow_data.shape == (len(time_series),len(usecols)), flow_data.shape # original time deltas in seconds time_diff = time_series.copy(); time_diff[1:] = np.diff(time_series) # time period between time steps assert np.all( time_diff > 0 ), filepath time_diff = time_diff.reshape((len(time_diff),1)) # reshape to make sure broadcasting works # integrate flow over time steps before resampling flow_data[1:,:] -= np.diff(flow_data, axis=0)/2. # get average flow between time steps flow_data *= time_diff # integrate flow in time interval by multiplying average flow with time period flow_data = np.cumsum(flow_data, axis=0) # integrate by summing up total flow per time interval # generate regular monthly time steps start_datetime = np.datetime64(dt.datetime(year=start_year, month=start_month, day=start_day), 'M') end_datetime = np.datetime64(dt.datetime(year=end_year, month=end_month, day=end_day), 'M') time_monthly = np.arange(start_datetime, end_datetime+np.timedelta64(1, 'M'), dtype='datetime64[M]') assert time_monthly[0] == start_datetime, time_monthly[0] assert time_monthly[-1] == end_datetime, time_monthly[-1] # convert monthly time series to regular array of seconds since start date time_monthly = ( time_monthly.astype('datetime64[s]') - start_datetime.astype('datetime64[s]') ) / np.timedelta64(1,'s') assert time_monthly[0] == 0, time_monthly[0] # interpolate integrated flow to new time axis #flow_data = np.interp(time_monthly, xp=time_series[:,0], fp=flow_data[:,0],).reshape((len(time_monthly),1)) time_series = np.concatenate(([0],time_series), axis=0) # integrated flow at time zero must be zero... flow_data = np.concatenate(([[0,]*len(usecols)],flow_data), axis=0) # ... this is probably better than interpolation # N.B.: we are adding zeros here so we don't have to extrapolate to the left; on the right we just fill in NaN's if ( time_monthly[-1] - time_series[-1] ) > 3*86400. and lcheckComplete: warn("Data record ends more than 3 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.)) elif (time_monthly[-1]-time_series[-1]) > 5*86400.: if lcheckComplete: raise DataError("Data record ends more than 5 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.)) else: warn("Data record ends more than 5 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.)) flow_interp = si.interp1d(x=time_series, y=flow_data, kind='linear', axis=0, copy=False, bounds_error=False, fill_value=np.NaN, assume_sorted=True) flow_data = flow_interp(time_monthly) # evaluate with call # compute monthly flow rate from interpolated integrated flow flow_data = np.diff(flow_data, axis=0) / np.diff(time_monthly, axis=0).reshape((len(time_monthly)-1,1)) flow_data *= 1000 # convert from m^3/s to kg/s # construct time axis start_time = 12*(start_year - 1979) + start_month -1 end_time = 12*(end_year - 1979) + end_month -1 time = Axis(name='time', units='month', atts=dict(long_name='Month since 1979-01'), coord=np.arange(start_time, end_time)) # not including the last, e.g. 1979-01 to 1980-01 is 12 month assert len(time_monthly) == end_time-start_time+1 assert flow_data.shape == (len(time),len(variable_order)), (flow_data.shape,len(time),len(variable_order)) # construct dataset dataset = Dataset(atts=metadata) dataset.station = station # add gage station object, if available (else None) for i,flowvar in enumerate(variable_order): data = flow_data[:,i] fluxvar = flow_to_flux[flowvar] if flowvar in varlist: flowatts = variable_attributes[flowvar] # convert variables and put into dataset (monthly time series) if flowatts['units'] != 'kg/s': raise VariableError("Hydrograph data is read as kg/s; flow variable does not match.\n{}".format(flowatts)) dataset += Variable(data=data, axes=(time,), **flowatts) if fluxvar in varlist and 'shp_area' in metadata: # compute surface flux variable based on drainage area fluxatts = variable_attributes[fluxvar] if fluxatts['units'] == 'kg/s' and fluxatts['units'] != 'kg/m^2/s': raise VariableError(fluxatts) data = data / metadata['shp_area'] # need to make a copy dataset += Variable(data=data, axes=(time,), **fluxatts) # apply analysis period if period is not None: dataset = dataset(years=period) # adjust scalefactors, if necessary if scalefactors: if isinstance(scalefactors,dict): dataset = updateScalefactor(dataset, varlist=scalefactors, scalefactor=None) elif isNumber(scalefactors): scalelist = ('discharge','seepage','flow') dataset = updateScalefactor(dataset, varlist=scalelist, scalefactor=scalefactors) else: raise TypeError(scalefactors) # return completed dataset return dataset
def loadGageStation(basin=None, station=None, varlist=None, varatts=None, mode='climatology', aggregation=None, filetype='monthly', folder=None, name=None, period=None, basin_list=None, lcheck=True, lexpand=True, lfill=True, lflatten=True, lkgs=True, scalefactors=None, title=None): ''' function to load hydrograph climatologies and timeseries for a given basin ''' ## resolve input if mode == 'timeseries' and aggregation: raise ArgumentError('Timeseries does not support aggregation.') # get GageStation instance station = getGageStation(basin=basin, station=station, name=name, folder=folder, river=None, basin_list=basin_list, lcheck=True) # variable attributes if varlist is None: varlist = variable_list elif not isinstance(varlist,(list,tuple)): raise TypeError varlist = list(varlist) # make copy of varlist to avoid interference if varatts is None: if aggregation is None: varatts = variable_attributes_kgs if lkgs else variable_attributes_mms else: varatts = agg_varatts_kgs if lkgs else agg_varatts_mms elif not isinstance(varatts,dict): raise TypeError ## read csv data # time series data and time coordinates lexpand = True; lfill = True if mode == 'climatology': lexpand = False; lfill = False; lflatten = False data, time = station.getTimeseriesData(units='kg/s' if lkgs else 'm^3/s', lcheck=True, lexpand=lexpand, lfill=lfill, period=period, lflatten=lflatten) # station meta data metadata = station.getMetaData(lcheck=True) den = metadata['shp_area'] if lkgs else ( metadata['shp_area'] / 1000. ) ## create dataset for station dataset = Dataset(name='WSC', title=title or metadata['Station Name'], varlist=[], atts=metadata,) if mode.lower() in ('timeseries','time-series'): time = time.flatten(); data = data.flatten() # just to make sure... # make time axis based on time coordinate from csv file timeAxis = Axis(name='time', units='month', coord=time, # time series centered at 1979-01 atts=dict(long_name='Month since 1979-01')) dataset += timeAxis # load mean discharge dataset += Variable(axes=[timeAxis], data=data, atts=varatts['discharge']) # load mean runoff doa = data / den dataset += Variable(axes=[timeAxis], data=doa, atts=varatts['runoff']) elif mode == 'climatology': # N.B.: this is primarily for backwards compatibility; it should not be used anymore... # make common time axis for climatology te = 12 # length of time axis: 12 month climAxis = Axis(name='time', units='month', length=12, coord=np.arange(1,te+1,1)) # monthly climatology dataset.addAxis(climAxis, copy=False) # extract variables (min/max/mean are separate variables) # N.B.: this is mainly for backwards compatibility doa = data / den if aggregation is None or aggregation.lower() == 'mean': # load mean discharge tmpdata = nf.nanmean(data, axis=0) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discharge']) dataset.addVariable(tmpvar, copy=False) # load mean runoff tmpdata = nf.nanmean(doa, axis=0) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['runoff']) dataset.addVariable(tmpvar, copy=False) if aggregation is None or aggregation.lower() == 'std': # load discharge standard deviation tmpdata = nf.nanstd(data, axis=0, ddof=1) # very few values means large uncertainty! tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discstd']) dataset.addVariable(tmpvar, copy=False) # load runoff standard deviation tmpdata = nf.nanstd(doa, axis=0, ddof=1) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_std']) dataset.addVariable(tmpvar, copy=False) if aggregation is None or aggregation.lower() == 'sem': # load discharge standard deviation tmpdata = nf.nansem(data, axis=0, ddof=1) # very few values means large uncertainty! tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discsem']) dataset.addVariable(tmpvar, copy=False) # load runoff standard deviation tmpdata = nf.nansem(doa, axis=0, ddof=1) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_sem']) dataset.addVariable(tmpvar, copy=False) if aggregation is None or aggregation.lower() == 'max': # load maximum discharge tmpdata = nf.nanmax(data, axis=0) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discmax']) dataset.addVariable(tmpvar, copy=False) # load maximum runoff tmpdata = nf.nanmax(doa, axis=0) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_max']) dataset.addVariable(tmpvar, copy=False) if aggregation is None or aggregation.lower() == 'min': # load minimum discharge tmpdata = nf.nanmin(data, axis=0) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discmin']) dataset.addVariable(tmpvar, copy=False) # load minimum runoff tmpdata = nf.nanmin(doa, axis=0) tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_min']) dataset.addVariable(tmpvar, copy=False) else: raise NotImplementedError, "Time axis mode '{}' is not supported.".format(mode) # adjust scalefactors, if necessary if scalefactors: if isinstance(scalefactors,dict): dataset = updateScalefactor(dataset, varlist=scalefactors, scalefactor=None) elif isNumber(scalefactors): scalelist = ('discharge','StdDisc','SEMDisc','MaxDisc','MinDisc',) dataset = updateScalefactor(dataset, varlist=scalelist, scalefactor=scalefactors) else: raise TypeError(scalefactors) # return station dataset return dataset