Beispiel #1
0
 def setUp(self):
   ''' create two test variables '''
   # create axis and variable instances (make *copies* of data and attributes!)
   x1 = np.linspace(0,10,15); 
   x2 = np.linspace(2,8,18);
   if self.ldatetime:
       start_datetime, end_datetime = pd.to_datetime('1981-05-01'), pd.to_datetime('1981-05-16')
       t1 = np.arange(start_datetime, end_datetime, dtype='datetime64[D]') 
       xax1 = Axis(name='Time1-Axis', units='X Time', coord=t1) 
       t2 = np.arange(start_datetime, end_datetime+np.timedelta64(3, 'D'), dtype='datetime64[D]')
       xax2 = Axis(name='Time2-Axis', units='X Time', coord=t2)
   else:
       xax1 = Axis(name='X1-Axis', units='X Units', coord=x1)
       xax2 = Axis(name='X2-Axis', units='X Units', coord=x2)
   var0 = Variable(axes=(xax1,), data=np.sin(x1), atts=dict(name='relative', units=''))
   var1 = Variable(axes=(xax1,), data=x1.copy(), atts=dict(name='blue', units='units'))
   self.var0 = var0; self.var1 = var1; self.xax1 = xax1
   var2 = Variable(name='purple',units='units',axes=(xax2,), data=(x2**2)/5.)
   self.var2 = var2; self.xax2 = xax2
   # create error variables with random noise
   noise1 = np.random.rand(len(xax1))*var1.data_array.std()/2.
   err1 = Variable(axes=(xax1,), data=noise1, atts=dict(name='blue_std', units='units'))
   noise2 = np.random.rand(len(xax2))*var2.data_array.std()/2.
   err2 = Variable(name='purple',units='units',axes=(xax2,), data=noise2)
   self.err1 = err1; self.err2 = err2
   # add to list
   self.vars = [var1, var2]
   self.errs = [err1, err2]
   self.axes = [xax1, xax2]
Beispiel #2
0
 def setUp(self):
     ''' create two test variables '''
     # create axis and variable instances (make *copies* of data and attributes!)
     x1 = np.random.randn(180)
     xax1 = Axis(name='X1-Axis', units='X Units', length=len(x1))
     var1 = Variable(axes=(xax1, ),
                     data=x1.copy(),
                     atts=dict(name='blue', units='units'))
     self.var1 = var1
     self.xax1 = xax1
     x2 = np.random.randn(180)
     xax2 = Axis(name='X2-Axis', units='X Units', length=len(x2))
     var2 = Variable(name='purple', units='units', axes=(xax2, ), data=x2)
     self.var2 = var2
     self.xax2 = xax2
     # actual normal distribution
     self.dist = 'norm'
     distvar = VarRV(name=self.dist,
                     units='units',
                     dist=self.dist,
                     params=(0, 1))
     self.distVar = distvar
     # add to list
     self.vars = [var1, var2]
     self.axes = [xax1, xax2]
Beispiel #3
0
 def setUp(self):
   ''' create a 2D test variable '''
   # create axis and variable instances (make *copies* of data and attributes!)
   xax = Axis(name='X-Axis', units='X Units', coord=np.linspace(0,10,15))
   yax = Axis(name='Y-Axis', units='Y Units', coord=np.linspace(2,8,18))
   xx,yy = np.meshgrid(yax[:],xax[:],) # create mesh (transposed w.r.t. values)
   var0 = Variable(axes=(xax,yax), data=np.sin(xx)*np.cos(yy), atts=dict(name='Color', units='Color Units'))
   var1 = Variable(axes=(xax,yax), data=np.cos(xx)*np.sin(yy), atts=dict(name='Contour', units='Contour Units'))
   self.var0 = var0; self.var1 = var1; self.xax = xax; self.yax = yax
   # add to list
   self.axes = [xax, yax]
   self.vars = [var0, var1]
Beispiel #4
0
 def setUp(self):
   ''' create a reference and two test variables for Taylor plot'''
   self.thetamin = 0.; self.Rmin = 0.; self.thetamax = np.pi/2.; self.Rmax = 2.
   # create axis and variable instances (make *copies* of data and attributes!)
   self.x1 = np.linspace(0,10,11); self.xax1 = Axis(name='X1-Axis', units='X Units', coord=self.x1)
   self.data0 = np.sin(self.x1)
   self.var0 = Variable(axes=(self.xax1,), data=self.data0, atts=dict(name='Reference', units='units'))
   # create error variables with random noise
   self.data1 = self.data0 + ( np.random.rand(len(self.xax1))-0.5 )*0.5
   self.var1 = Variable(axes=(self.xax1,), data=self.data1, atts=dict(name='Blue', units='units'))
   self.data2 = self.data0 + ( np.random.rand(len(self.xax1))-0.5 )*1.5
   self.var2 = Variable(axes=(self.xax1,), data=self.data2, atts=dict(name='Red', units='units'))
   self.data3 = 1. + np.random.rand(len(self.xax1))*1.5
   self.var3 = Variable(axes=(self.xax1,), data=self.data3, atts=dict(name='Random', units='units'))
   # add to list
   self.vars = [self.var0, self.var1, self.var2, self.var3]
   self.data = [self.data0, self.data1, self.data2, self.data3]
   self.axes = [self.xax1,]
Beispiel #5
0
 def testIrregularSurfacePlot(self):
   ''' test a color/surface plot with irregular coordiante variables '''    
   fig,ax = getFigAx(1, name=sys._getframe().f_code.co_name[4:], **figargs) # use test method name as title
   assert fig.__class__.__name__ == 'MyFigure'
   assert fig.axes_class.__name__ == 'MyAxes'
   assert not isinstance(ax,(list,tuple)) # should return a "naked" axes
   var0 = self.var0
   # create coordiante variables
   xax,yax = var0.axes
   xx,yy = np.meshgrid(xax[:],yax[:], indexing='ij')
   xax = Variable(name='X Coordinate', units='X Units', data=xx, axes=var0.axes)
   yax = Variable(name='Y Coordinate', units='Y Units', data=yy, axes=var0.axes)
   # create plot
   plt = ax.surfacePlot(var0, flipxy=False, clog=False, xax=xax, yax=yax,
                        llabel=True, lprint=True, clim=var0.limits(),)
   assert plt
   # add label
   ax.addLabel(label=0, loc=4, lstroke=False, lalphabet=True, size=None, prop=None)
Beispiel #6
0
 def setUp(self):
   ''' create two test variables '''
   # define plot ranges
   self.thetamin = 0.; self.Rmin = 0.; self.thetamax = 2*np.pi; self.Rmax = 2.
   # create theta axis and variable instances (values are radius values, I believe)
   theta1 = np.linspace(self.thetamin,self.thetamax,361)
   thax1 = Axis(atts=dict(name='$\\theta$-Axis', units='Radians'), coord=theta1) 
   var0 = Variable(axes=(thax1,), data=np.sin(theta1), atts=dict(name='Blue', units='units'))
   tmp = theta1.copy()*(self.Rmax-self.Rmin)/(self.thetamax-self.thetamin)
   var1 = Variable(axes=(thax1,), data=tmp, atts=dict(name='Red', units='units'))
   self.var0 = var0; self.var1 = var1; self.xax1 = theta1
   # create error variables with random noise
   noise0 = np.random.rand(len(thax1))*var0.data_array.std()/2.
   err0 = Variable(axes=(thax1,), data=noise0, atts=dict(name='Blue Noise', units='units'))
   noise1 = np.random.rand(len(thax1))*var1.data_array.std()/2.
   err1 = Variable(axes=(thax1,), data=noise1, atts=dict(name='Red Noise', units='units'))
   self.err1 = err1; self.err0 = err0
   # add to list
   self.vars = [var0, var1]
   self.errs = [err0, err1]
   self.axes = [thax1,]
Beispiel #7
0
def addLengthAndNamesOfMonth(dataset, noleap=False, length=None, names=None):
  ''' Function to add the names and length of month to a NetCDF dataset. '''
  if not isinstance(dataset,Dataset): raise TypeError
  # attributes
  lenatts = dict(name='length_of_month', units='days',long_name='Length of Month')
  stratts = dict(name='name_of_month', units='', long_name='Name of the Month')
  # data
  if length is None: # leap year or no leap year
    if noleap: length = days_per_month_365
    else: length = days_per_month
  if names is None: names = name_of_month
  # create variables
  if isinstance(dataset, DatasetNetCDF) and 'w' in dataset.mode: 
    dataset.addVariable(Variable(axes=(dataset.time,), data=length, atts=lenatts), asNC=True)
    dataset.addVariable(Variable(axes=(dataset.time,), data=names, atts=stratts), asNC=True)
  else:
    # N.B.: char/string arrays are currently not supported as Variables
    dataset.addVariable(Variable(axes=(dataset.time,), data=length, atts=lenatts))
    dataset.addVariable(Variable(axes=(dataset.time,), data=names, atts=stratts))
  # return length variable
  return dataset.variables[lenatts['name']]
Beispiel #8
0
 def setUp(self):
     ''' create two test variables '''
     # create axis and variable instances (make *copies* of data and attributes!)
     x1 = np.linspace(0, 10, 11)
     xax1 = Axis(name='X1-Axis', units='X Units', coord=x1)
     var0 = Variable(axes=(xax1, ),
                     data=np.sin(x1),
                     atts=dict(name='relative', units=''))
     var1 = Variable(axes=(xax1, ),
                     data=x1.copy(),
                     atts=dict(name='blue', units='units'))
     self.var0 = var0
     self.var1 = var1
     self.xax1 = xax1
     x2 = np.linspace(2, 8, 13)
     xax2 = Axis(name='X2-Axis', units='X Units', coord=x2)
     var2 = Variable(name='purple',
                     units='units',
                     axes=(xax2, ),
                     data=(x2**2) / 5.)
     self.var2 = var2
     self.xax2 = xax2
     # create error variables with random noise
     noise1 = np.random.rand(len(xax1)) * var1.data_array.std() / 2.
     err1 = Variable(axes=(xax1, ),
                     data=noise1,
                     atts=dict(name='blue_std', units='units'))
     noise2 = np.random.rand(len(xax2)) * var2.data_array.std() / 2.
     err2 = Variable(name='purple',
                     units='units',
                     axes=(xax2, ),
                     data=noise2)
     self.err1 = err1
     self.err2 = err2
     # add to list
     self.vars = [var1, var2]
     self.errs = [err1, err2]
     self.axes = [xax1, xax2]
Beispiel #9
0
def addLandMask(dataset, varname='precip', maskname='landmask', atts=None):
  ''' Add a landmask variable with meta data from a masked variable to a dataset. '''
  # check
  if not isinstance(dataset,Dataset): raise TypeError
  if dataset.hasVariable(maskname): 
    raise DatasetError, "The Dataset '%s' already has a field called '%s'."%(dataset.name,maskname)
  # attributes and meta data
  if atts is None:
    atts = default_varatts[maskname].copy()
    atts['long_name'] = 'Geographic Mask for Climatology Fields' 
    atts['description'] = 'data are valid where this mask is zero'  
  # axes and data
  var = dataset.variables[varname]
  axes = var.axes[-2:] # last two axes (i.e. map axes)
  data = var.getMask().__getitem__((0,)*(var.ndim-2)+(slice(None),)*2)
  if 'gdal' in dataset.__dict__ and dataset.gdal:
    if dataset.xlon not in axes or dataset.ylat not in axes: raise AxisError
  if not all([ax.name in ('x','y','lon','lat') for ax in axes]): raise AxisError
  # create variable and add to dataset
  if isinstance(dataset, DatasetNetCDF) and 'w' in dataset.mode: 
    dataset.addVariable(Variable(axes=axes, name=maskname, data=data, atts=atts), asNC=True)
  else: dataset.addVariable(Variable(axes=axes, name=maskname, data=data, atts=atts))
  # return mask variable
  return dataset.variables[maskname]
Beispiel #10
0
def computeNetRadiation(dataset,
                        asVar=True,
                        lA=True,
                        lrad=True,
                        name='netrad'):
    ''' function to compute net radiation at surface for Penman-Monteith equation
      (http://www.fao.org/docrep/x0490e/x0490e07.htm#radiation)
  '''
    if lrad and 'SWDNB' in dataset and 'LWDNB' in dataset and 'SWUPB' in dataset and 'LWUPB' in dataset:
        data = radiation(dataset['SWDNB'][:], dataset['LWDNB'][:],
                         dataset['SWUPB'][:],
                         dataset['LWUPB'][:])  # downward total net radiation
    elif 'SWD' in dataset and 'GLW' in dataset and 'e' in dataset:
        if not lA: A = 0.23  # reference Albedo for grass
        elif lA and 'A' in dataset: A = dataset['A'][:]
        else:
            raise VariableError, "Actual Albedo is not available for radiation calculation."
        if 'TSmin' in dataset and 'TSmax' in dataset:
            Ts = dataset['TSmin'][:]
            TSmax = dataset['TSmax'][:]
        elif 'TSmean' in dataset:
            Ts = dataset['TSmean'][:]
            TSmax = None
        elif 'Ts' in dataset:
            Ts = dataset['Ts'][:]
            TSmax = None
        else:
            raise VariableError, "Either 'Ts' or 'TSmean' are required to compute net radiation for PET calculation."
        data = radiation_black(A, dataset['SWD'][:], dataset['GLW'][:],
                               dataset['e'][:], Ts,
                               TSmax)  # downward total net radiation
    else:
        raise VariableError, "Cannot determine net radiation calculation."
    # cast as Variable
    if asVar:
        var = Variable(data=data,
                       name=name,
                       units='W/m^2',
                       axes=dataset['SWD'].axes)
    else:
        var = data
    # return new variable
    return var
Beispiel #11
0
def computeVaporDeficit(dataset):
    ''' function to compute water vapor deficit for Penman-Monteith PET
      (http://www.fao.org/docrep/x0490e/x0490e07.htm#air%20humidity)
  '''
    if 'Q2' in dataset: ea = dataset['Q2'][:]  # actual vapor pressure
    elif 'q2' in dataset and 'ps' in dataset:  # water vapor mixing ratio
        ea = dataset['q2'][:] * dataset['ps'][:] * 28.96 / 18.02
    else:
        raise VariableError, "Cannot determine 2m water vapor pressure for PET calculation."
    # get saturation water vapor
    if 'Tmin' in dataset and 'Tmax' in dataset:
        es = e_sat(dataset['Tmin'][:], dataset['Tmax'][:])
        # else: Es = e_sat(T) # backup, but not very accurate
    else:
        raise VariableError, "'Tmin' and 'Tmax' are required to compute saturation water vapor pressure for PET calculation."
    var = Variable(data=es - ea,
                   name='vapdef',
                   units='Pa',
                   axes=dataset['Tmin'].axes)
    # return new variable
    return var
Beispiel #12
0
def loadGPCC_LTM(name=dataset_name,
                 varlist=None,
                 resolution='025',
                 varatts=ltmvaratts,
                 filelist=None,
                 folder=ltmfolder):
    ''' Get a properly formatted dataset the monthly accumulated GPCC precipitation climatology. '''
    # prepare input
    if resolution not in ('025', '05', '10', '25'):
        raise DatasetError, "Selected resolution '%s' is not available!" % resolution
    # translate varlist
    if varlist is None: varlist = varatts.keys()
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    # load variables separately
    if 'p' in varlist:
        dataset = DatasetNetCDF(name=name,
                                folder=folder,
                                filelist=['normals_v2011_%s.nc' % resolution],
                                varlist=['p'],
                                varatts=varatts,
                                ncformat='NETCDF4_CLASSIC')
    if 's' in varlist:
        gauges = nc.Dataset(folder + 'normals_gauges_v2011_%s.nc' % resolution,
                            mode='r',
                            format='NETCDF4_CLASSIC')
        stations = Variable(data=gauges.variables['p'][0, :, :],
                            axes=(dataset.lat, dataset.lon),
                            **varatts['s'])
        # consolidate dataset
        dataset.addVariable(stations, asNC=False, copy=True)
    dataset = addGDALtoDataset(dataset,
                               projection=None,
                               geotransform=None,
                               gridfolder=grid_folder)
    # N.B.: projection should be auto-detected as geographic
    # return formatted dataset
    return dataset
Beispiel #13
0
def loadKister_StnTS(station=None,
                     well=None,
                     folder=None,
                     varlist='default',
                     varatts=None,
                     name='observations',
                     title=None,
                     basin=None,
                     start_date=None,
                     end_date=None,
                     sampling=None,
                     period=None,
                     date_range=None,
                     llastIncl=True,
                     WSC_station=None,
                     basin_list=None,
                     filenames=None,
                     time_axis='datetime',
                     scalefactors=None,
                     metadata=None,
                     lkgs=False,
                     ntime=None,
                     **kwargs):
    ''' load EnKF ensemble data as formatted GeoPy Dataset '''
    if folder and not os.path.exists(folder): raise IOError(folder)
    # default values
    if isinstance(varlist, str) and varlist == 'default':
        varlist = []
        if station: varlist += ['discharge']
        if well: varlist += ['head']
    if varatts is None: varatts = variable_attributes.copy()
    # figure out time axis
    if date_range: start_date, end_date, sampling = date_range
    time = timeAxis(start_date=start_date,
                    end_date=end_date,
                    sampling=sampling,
                    date_range=date_range,
                    time_axis=time_axis,
                    llastIncl=llastIncl,
                    ntime=ntime,
                    varatts=varatts)
    ntime = len(time)
    # load WSC station meta data
    pass
    # initialize Dataset
    dataset = Dataset(name=name,
                      title=title if title else name.title(),
                      atts=metadata)
    # load well data
    if 'head' in varlist:
        if not well: raise ArgumentError
        if folder:
            filepath = os.path.join(folder, well)  # default output folder
        else:
            filepath = station
        data = readKister(filepath=filepath,
                          period=(start_date, end_date),
                          resample=sampling,
                          lvalues=True)
        assert ntime == len(data), data.shape
        atts = varatts['head']
        dataset += Variable(atts=atts, data=data, axes=(time, ))
    # load discharge/hydrograph data
    if 'discharge' in varlist:
        if not station: raise ArgumentError
        if folder:
            filepath = os.path.join(folder, station)  # default output folder
        else:
            filepath = station
        data = readKister(filepath=filepath,
                          period=(start_date, end_date),
                          resample=sampling,
                          lvalues=True)
        assert ntime == len(data), data.shape
        atts = varatts['discharge']
        if lkgs:
            data *= 1000.
            if atts['units'] == 'm^3/s': atts['units'] = 'kg/s'
        dataset += Variable(atts=atts, data=data, axes=(time, ))
    # return formatted Dataset
    if scalefactors is not None and scalefactors != 1:
        raise NotImplementedError
    return dataset
Beispiel #14
0
def loadEnKF_StnTS(folder=None,
                   varlist='all',
                   varatts=None,
                   name='enkf',
                   title='EnKF',
                   basin=None,
                   start_date=None,
                   end_date=None,
                   sampling=None,
                   period=None,
                   date_range=None,
                   llastIncl=True,
                   WSC_station=None,
                   basin_list=None,
                   filenames=None,
                   prefix=None,
                   time_axis='datetime',
                   scalefactors=None,
                   metadata=None,
                   lkgs=False,
                   out_dir='out/',
                   yaml_file='../input_data/obs_meta.yaml',
                   lYAML=True,
                   nreal=None,
                   ntime=None,
                   **kwargs):
    ''' load EnKF ensemble data as formatted GeoPy Dataset '''
    out_folder = os.path.join(folder, 'out/')  # default output folder
    if not os.path.exists(out_folder): raise IOError(out_folder)
    # default values
    if isinstance(varlist, str) and varlist == 'hydro':
        varlist = Hydro.varlist
    elif isinstance(varlist, str) and varlist == 'obs':
        varlist = Obs.varlist
    elif isinstance(varlist, str) and varlist == 'all':
        varlist = Hydro.varlist + Obs.varlist
    elif not isinstance(varlist, (tuple, list)):
        raise TypeError(varlist)
    if varatts is None: varatts = variable_attributes.copy()
    varmap = {
        varatt['name']: enkf_name
        for enkf_name, varatt in list(varatts.items())
    }
    varlist = [varmap[var] for var in varlist]
    # load WSC station meta data
    pass
    # initialize Dataset
    dataset = Dataset(name=name,
                      title=title if title else name.title(),
                      atts=metadata)
    ensemble = None
    time = None
    observation = None
    # load observation/innovation data
    if any([var in Obs.atts for var in varlist]):
        # load data
        vardata = loadObs(varlist=[var for var in varlist if var in Obs.atts],
                          folder=out_folder,
                          lpandas=False)
        ntime, nobs, nreal = list(vardata.values())[0].shape
        # create Axes
        if time is None:
            # figure out time axis
            time = timeAxis(start_date=start_date,
                            end_date=end_date,
                            sampling=sampling,
                            date_range=date_range,
                            time_axis=time_axis,
                            llastIncl=llastIncl,
                            ntime=ntime,
                            varatts=varatts)
        elif len(time) != ntime:
            raise AxisError(time)
        if ensemble is None:
            # construct ensemble axis
            ensemble = Axis(atts=varatts['ensemble'],
                            coord=np.arange(1, nreal + 1))
        elif len(ensemble) != nreal:
            raise AxisError(ensemble)
        if observation is None:
            # construct ensemble axis
            observation = Axis(atts=varatts['observation'],
                               coord=np.arange(1, nobs + 1))
        elif len(observation) != nobs:
            raise AxisError(observation)
        # create variables
        for varname, data in list(vardata.items()):
            dataset += Variable(atts=varatts[varname],
                                data=data,
                                axes=(time, observation, ensemble))
        # load YAML data, if available
        if lYAML:
            # load YAML file
            yaml_path = os.path.join(out_folder, yaml_file)
            if not os.path.exists(yaml_path): raise IOError(yaml_path)
            with open(yaml_path, 'r') as yf:
                obs_meta = yaml.load(yf)
            if obs_meta is None: raise IOError(yaml_path)  # not a YAML file?
            # constant create variables
            for cvar, cval in list(obs_meta[0].items()):
                if isinstance(cval, str): dtype, missing = np.string_, ''
                elif isinstance(cval, (np.integer, int)):
                    dtype, missing = np.int_, 0
                elif isinstance(cval, (np.inexact, float)):
                    dtype, missing = np.float_, np.NaN
                else:
                    dtype = None  # skip
                if dtype:
                    data = np.asarray([
                        missing if obs[cvar] is None else obs[cvar]
                        for obs in obs_meta
                    ],
                                      dtype=dtype)
                    if cvar in varatts: atts = varatts[cvar]
                    else: atts = dict(name=cvar, units='')
                    dataset += Variable(atts=atts,
                                        data=data,
                                        axes=(observation, ))
    elif ntime is None:
        # try to infer time dimension from backup.info file
        backup_info = os.path.join(folder, 'backup.info')
        if os.path.exists(backup_info):
            with open(backup_info, 'r') as bf:
                ntime = int(bf.readline())
    # load discharge/hydrograph data
    if 'discharge' in varlist:
        data = loadHydro(folder=out_folder, nreal=nreal, ntime=ntime)
        ntime, nreal = data.shape
        if time is None:
            # figure out time axis
            time = timeAxis(start_date=start_date,
                            end_date=end_date,
                            sampling=sampling,
                            date_range=date_range,
                            time_axis=time_axis,
                            llastIncl=llastIncl,
                            ntime=ntime,
                            varatts=varatts)
        elif len(time) != ntime:
            raise AxisError(time)
        if ensemble is None:
            # construct ensemble axis
            ensemble = Axis(atts=varatts['ensemble'],
                            coord=np.arange(1, nreal + 1))
        elif len(ensemble) != nreal:
            raise AxisError(ensemble)
        atts = varatts['discharge']
        if lkgs:
            data *= 1000.
            if atts['units'] == 'm^3/s': atts['units'] = 'kg/s'
        dataset += Variable(atts=atts, data=data, axes=(time, ensemble))
    # return formatted Dataset
    if scalefactors is not None and scalefactors != 1:
        raise NotImplementedError
    return dataset
Beispiel #15
0
def computeClimatology(experiment,
                       filetype,
                       domain,
                       periods=None,
                       offset=0,
                       griddef=None,
                       varlist=None,
                       ldebug=False,
                       loverwrite=False,
                       lparallel=False,
                       pidstr='',
                       logger=None):
    ''' worker function to compute climatologies for given file parameters. '''
    # input type checks
    if not isinstance(experiment, Exp): raise TypeError
    if not isinstance(filetype, basestring): raise TypeError
    if not isinstance(domain, (np.integer, int)): raise TypeError
    if periods is not None and not (isinstance(periods, (tuple, list))
                                    and isInt(periods)):
        raise TypeError
    if not isinstance(offset, (np.integer, int)): raise TypeError
    if not isinstance(loverwrite, (bool, np.bool)): raise TypeError
    if griddef is not None and not isinstance(griddef, GridDefinition):
        raise TypeError

    #if pidstr == '[proc01]': raise TypeError # to test error handling

    # load source
    dataset_name = experiment.name
    fileclass = fileclasses[filetype]  # used for target file name
    tsfile = fileclass.tsfile.format(domain, '')
    expfolder = experiment.avgfolder
    filepath = '{:s}/{:s}'.format(expfolder, tsfile)
    logger.info('\n\n{0:s}   ***   Processing Experiment {1:<15s}   ***   '.
                format(pidstr, "'{:s}'".format(dataset_name)) +
                '\n{0:s}   ***   {1:^37s}   ***   \n'.format(
                    pidstr, "'{:s}'".format(tsfile)))

    # check file and read begin/enddates
    if not os.path.exists(filepath):
        #raise IOError, "Source file '{:s}' does not exist!".format(filepath)
        # print message and skip
        skipmsg = "\n{:s}   >>>   File '{:s}' in dataset '{:s}' is missing --- skipping!".format(
            pidstr, tsfile, dataset_name)
        skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr, filepath)
        logger.warning(skipmsg)
        # N.B.: this can cause a lot of error messages, when not all files are present
    else:  # if monthly source file exists
        import netCDF4 as nc
        ncfile = nc.Dataset(filepath, mode='r')
        begintuple = ncfile.begin_date.split('-')
        endtuple = ncfile.end_date.split('-')
        ncfile.close()
        # N.B.: at this point we don't want to initialize a full GDAL-enabled dataset, since we don't even
        #       know if we need it, and it creates a lot of overhead

        # determine age of source file
        if not loverwrite:
            sourceage = datetime.fromtimestamp(os.path.getmtime(filepath))

        # figure out start date
        filebegin = int(begintuple[0])  # first element is the year
        fileend = int(endtuple[0])  # first element is the year
        begindate = offset + filebegin
        if not (filebegin <= begindate <= fileend): raise DateError
        # handle cases where the first month in the record is not January
        firstmonth = int(begintuple[1])  # second element is the month
        shift = firstmonth - 1  # will be zero for January (01)

        ## loop over periods
        if periods is None: periods = [begindate - fileend]
        #   periods.sort(reverse=True) # reverse, so that largest chunk is done first
        source = None  # will later be assigned to the source dataset
        for period in periods:

            # figure out period
            enddate = begindate + period
            if filebegin > enddate:
                raise DateError, 'End date earlier than begin date.'
            if enddate - 1 > fileend:  # if filebegin is 1979 and the simulation is 10 years, fileend will be 1988, not 1989!
                # if end date is not available, skip period
                endmsg = "\n{:s}   ---   Invalid Period for '{:s}': End Date {:4d} not in File!   ---   \n".format(
                    pidstr, dataset_name, enddate)
                endmsg += "{:s}   ---   ('{:s}')\n".format(pidstr, filepath)
                logger.info(endmsg)

            else:  ## perform averaging for selected period

                # determine if sink file already exists, and what to do about it
                periodstr = '{0:4d}-{1:4d}'.format(begindate, enddate)
                gridstr = '' if griddef is None or griddef.name is 'WRF' else '_' + griddef.name
                filename = fileclass.climfile.format(domain, gridstr,
                                                     '_' + periodstr)
                if ldebug: filename = 'test_' + filename
                if lparallel: tmppfx = 'tmp_wrfavg_{:s}_'.format(pidstr[1:-1])
                else: tmppfx = 'tmp_wrfavg_'.format(pidstr[1:-1])
                tmpfilename = tmppfx + filename
                assert os.path.exists(expfolder)
                filepath = expfolder + filename
                tmpfilepath = expfolder + tmpfilename
                lskip = False  # else just go ahead
                if os.path.exists(filepath):
                    if not loverwrite:
                        age = datetime.fromtimestamp(
                            os.path.getmtime(filepath))
                        # if sink file is newer than source file, skip (do not recompute)
                        if age > sourceage and os.path.getsize(filepath) > 1e6:
                            lskip = True
                        # N.B.: NetCDF files smaller than 1MB are usually incomplete header fragments from a previous crash
                        #print sourceage, age
                    if not lskip: os.remove(filepath)

                # depending on last modification time of file or overwrite setting, start computation, or skip
                if lskip:
                    # print message
                    skipmsg = "\n{:s}   >>>   Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format(
                        pidstr, filename, dataset_name)
                    skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(
                        pidstr, filepath)
                    logger.info(skipmsg)
                else:

                    if griddef is None: lregrid = False
                    else: lregrid = True

                    ## begin actual computation
                    beginmsg = "\n{:s}   <<<   Computing '{:s}' (d{:02d}) Climatology from {:s}".format(
                        pidstr, dataset_name, domain, periodstr)
                    if not lregrid: beginmsg += "  >>>   \n"
                    else:
                        beginmsg += " ('{:s}' grid)  >>>   \n".format(
                            griddef.name)
                    logger.info(beginmsg)

                    ## actually load datasets
                    if source is None:
                        source = loadWRF_TS(
                            experiment=experiment,
                            filetypes=[filetype],
                            domains=domain)  # comes out as a tuple...
                    if not lparallel and ldebug:
                        logger.info('\n' + str(source) + '\n')

                    # prepare sink
                    if os.path.exists(tmpfilepath):
                        os.remove(tmpfilepath)  # remove old temp files
                    sink = DatasetNetCDF(name='WRF Climatology',
                                         folder=expfolder,
                                         filelist=[tmpfilename],
                                         atts=source.atts.copy(),
                                         mode='w')
                    sink.atts.period = periodstr
                    #           if lregrid: addGDALtoDataset(sink, griddef=griddef)

                    # initialize processing
                    CPU = CentralProcessingUnit(
                        source,
                        sink,
                        varlist=varlist,
                        tmp=lregrid,
                        feedback=ldebug)  # no need for lat/lon

                    # start processing climatology
                    if shift != 0:
                        logger.info(
                            '{0:s}   (shifting climatology by {1:d} month, to start with January)   \n'
                            .format(pidstr, shift))
                    CPU.Climatology(period=period,
                                    offset=offset,
                                    shift=shift,
                                    flush=False)
                    # N.B.: immediate flushing should not be necessary for climatologies, since they are much smaller!

                    # reproject and resample (regrid) dataset
                    if lregrid:
                        CPU.Regrid(griddef=griddef, flush=True)
                        logger.info('{:s}   ---   {:s}   ---   \n'.format(
                            pidstr, griddef.name))
                        logger.debug('{:s}   ---   {:s}   ---   \n'.format(
                            pidstr, str(griddef)))

                    # sync temporary storage with output dataset (sink)
                    CPU.sync(flush=True)

                    # add Geopotential Height Variance
                    if 'GHT_Var' in sink and 'Z_var' not in sink:
                        data_array = (sink['GHT_Var'].data_array -
                                      sink['Z'].data_array**2)**0.5
                        atts = dict(
                            name='Z_var',
                            units='m',
                            long_name=
                            'Square Root of Geopotential Height Variance')
                        sink += Variable(axes=sink['Z'].axes,
                                         data=data_array,
                                         atts=atts)

                    # add (relative) Vorticity Variance
                    if 'Vorticity_Var' in sink and 'zeta_var' not in sink:
                        data_array = (sink['Vorticity_Var'].data_array -
                                      sink['zeta'].data_array**2)**0.5
                        atts = dict(
                            name='zeta_var',
                            units='1/s',
                            long_name=
                            'Square Root of Relative Vorticity Variance')
                        sink += Variable(axes=sink['zeta'].axes,
                                         data=data_array,
                                         atts=atts)

                    # add names and length of months
                    sink.axisAnnotation('name_of_month',
                                        name_of_month,
                                        'time',
                                        atts=dict(
                                            name='name_of_month',
                                            units='',
                                            long_name='Name of the Month'))
                    if not sink.hasVariable('length_of_month'):
                        sink += Variable(name='length_of_month',
                                         units='days',
                                         axes=(sink.time, ),
                                         data=days_per_month,
                                         atts=dict(
                                             name='length_of_month',
                                             units='days',
                                             long_name='Length of Month'))

                    # close... and write results to file
                    sink.sync()
                    sink.close()
                    writemsg = "\n{:s}   >>>   Writing to file '{:s}' in dataset {:s}".format(
                        pidstr, filename, dataset_name)
                    writemsg += "\n{:s}   >>>   ('{:s}')\n".format(
                        pidstr, filepath)
                    logger.info(writemsg)
                    # rename file to proper name
                    if os.path.exists(filepath):
                        os.remove(filepath)  # remove old file
                    os.rename(tmpfilepath,
                              filepath)  # this will overwrite the old file

                    # print dataset
                    if not lparallel and ldebug:
                        logger.info('\n' + str(sink) + '\n')

                    # clean up (not sure if this is necessary, but there seems to be a memory leak...
                    del sink, CPU
                    gc.collect()  # get rid of these guys immediately

        # clean up and return
        if source is not None:
            source.unload()
            del source
        # N.B.: source is only loaded once for all periods

    # N.B.: garbage is collected in multi-processing wrapper as well
    # return
    return 0  # so far, there is no measure of success, hence, if there is no crash...
Beispiel #16
0
                                         asNC=True,
                                         copy=True,
                                         deepcopy=True)

                # add names and length of months
                sink.axisAnnotation('name_of_month',
                                    name_of_month,
                                    'time',
                                    atts=dict(name='name_of_month',
                                              units='',
                                              long_name='Name of the Month'))
                if not sink.hasVariable('length_of_month'):
                    sink += Variable(name='length_of_month',
                                     units='days',
                                     axes=(sink.time, ),
                                     data=days_per_month,
                                     atts=dict(name='length_of_month',
                                               units='days',
                                               long_name='Length of Month'))

                # apply higher resolution mask
                if griddef is not None:
                    sink.mask(sink.landmask,
                              maskSelf=False,
                              varlist=None,
                              skiplist=['prismmask', 'lon2d', 'lat2d'],
                              invert=False,
                              merge=True)

                # finalize changes
                sink.sync()
Beispiel #17
0
def rasterVariable(name=None,
                   units=None,
                   axes=None,
                   atts=None,
                   plot=None,
                   dtype=None,
                   projection=None,
                   griddef=None,
                   file_pattern=None,
                   lgzip=None,
                   lgdal=True,
                   lmask=True,
                   fillValue=None,
                   lskipMissing=True,
                   path_params=None,
                   offset=0,
                   scalefactor=1,
                   transform=None,
                   time_axis=None,
                   lfeedback=False,
                   **kwargs):
    ''' function to read multi-dimensional raster data and construct a GDAL-enabled Variable object '''

    # print status
    if lfeedback: print "Loading variable '{}': ".format(name),  # no newline

    ## figure out axes arguments and load data
    # figure out axes (list/tuple of axes has to be ordered correctly!)
    axes_list = [ax.name for ax in axes[:-2]]
    # N.B.: the last two axes are the two horizontal map axes (x&y); they can be None and will be inferred from raster
    # N.B.: coordinate values can be overridden with keyword arguments, but length must be consistent
    # figure out coordinates for axes
    for ax in axes[:-2]:
        if ax.name in kwargs:
            # just make sure the dimensions match, but use keyword argument
            if not len(kwargs[ax.name]) == len(ax):
                raise AxisError(
                    "Length of Variable axis and raster file dimension have to be equal."
                )
        else:
            # use Axis coordinates and add to kwargs for readRasterArray call
            kwargs[ax.name] = tuple(ax.coord)
    # load raster data
    if lfeedback: print("'{}'".format(file_pattern))
    data, geotransform = readRasterArray(file_pattern,
                                         lgzip=lgzip,
                                         lgdal=lgdal,
                                         dtype=dtype,
                                         lmask=lmask,
                                         fillValue=fillValue,
                                         lgeotransform=True,
                                         axes=axes_list,
                                         lna=False,
                                         lskipMissing=lskipMissing,
                                         path_params=path_params,
                                         lfeedback=lfeedback,
                                         **kwargs)
    # shift and rescale
    if offset != 0: data += offset
    if scalefactor != 1: data *= scalefactor
    ## create Variable object and add GDAL
    # check map axes and generate if necessary
    xlon, ylat = getAxes(
        geotransform,
        xlen=data.shape[-1],
        ylen=data.shape[-2],
        projected=griddef.isProjected if griddef else bool(projection))
    axes = list(axes)
    if axes[-1] is None: axes[-1] = xlon
    elif len(axes[-1]) != len(xlon): raise AxisError(axes[-1])
    if axes[-2] is None: axes[-2] = ylat
    elif len(axes[-2]) != len(ylat): raise AxisError(axes[-2])
    # create regular Variable with data in memory
    var = Variable(name=name,
                   units=units,
                   axes=axes,
                   data=data,
                   dtype=dtype,
                   mask=None,
                   fillValue=fillValue,
                   atts=atts,
                   plot=plot)
    # apply transform (if any), now that we have axes etc.
    if transform is not None: var = transform(var=var, time_axis=time_axis)
    # add GDAL functionality
    if griddef is not None:
        # perform some consistency checks ...
        if projection is None:
            projection = griddef.projection
        elif projection != griddef.projection:
            raise ArgumentError(
                "Conflicting projection and GridDef!\n {} != {}".format(
                    projection, griddef.projection))
        if not np.isclose(geotransform, griddef.geotransform).all():
            raise ArgumentError(
                "Conflicting geotransform (from raster) and GridDef!\n {} != {}"
                .format(geotransform, griddef.geotransform))
        # ... and use provided geotransform (due to issues with numerical precision, this is usually better)
        geotransform = griddef.geotransform  # if we don't pass the geotransform explicitly, it will be recomputed from the axes
    # add GDAL functionality
    var = addGDALtoVar(var,
                       griddef=griddef,
                       projection=projection,
                       geotransform=geotransform,
                       gridfolder=None)

    # return final, GDAL-enabled variable
    return var
Beispiel #18
0
 # add meta data
 meta_dicts = [
     loadMetadata(well, conservation_authority=conservation_authority)
     for well in wells
 ]
 for key in meta_dicts[0].keys():
     if key in varatts: atts = varatts[key]
     elif key.lower() in varatts: atts = varatts[key.lower()]
     else: atts = dict(name=key, units='')
     if atts['units']:
         data = np.asarray([wmd[key] for wmd in meta_dicts],
                           dtype=np.float64)
     else:
         data = np.asarray([wmd[key] for wmd in meta_dicts])
     try:
         dataset += Variable(data=data, axes=(well_ax, ), **atts)
     except:
         pass
 # add names
 dataset += Variable(data=wells,
                     axes=(well_ax, ),
                     name='well_name',
                     units='',
                     atts=dict(long_name='Short Well Name'))
 for varname in ('d_piezo', 'well_name', 'depth'):
     print('')
     print((dataset[varname]))
     print((dataset[varname][:]))
 # add well heads
 data = np.zeros((
     len(well_ax),
Beispiel #19
0
 # add landmask
 print '   ===   landmask   ===   '
 tmpatts = dict(
     name='landmask',
     units='',
     long_name='Landmask for Climatology Fields',
     description='where this mask is non-zero, no data is available')
 # find a masked variable
 for var in sink.variables.itervalues():
     if var.masked and var.gdal:
         mask = var.getMapMask()
         break
 # add variable to dataset
 sink.addVariable(Variable(name='landmask',
                           units='',
                           axes=(sink.lat, sink.lon),
                           data=mask,
                           atts=tmpatts),
                  asNC=True)
 sink.mask(sink.landmask)
 # add names and length of months
 sink.axisAnnotation('name_of_month',
                     name_of_month,
                     'time',
                     atts=dict(name='name_of_month',
                               units='',
                               long_name='Name of the Month'))
 #print '   ===   month   ===   '
 sink.addVariable(Variable(name='length_of_month',
                           units='days',
                           axes=(sink.time, ),
Beispiel #20
0
def computePotEvapPM(dataset, lterms=True, lmeans=False):
    ''' function to compute potential evapotranspiration (according to Penman-Monteith method:
      https://en.wikipedia.org/wiki/Penman%E2%80%93Monteith_equation,
      http://www.fao.org/docrep/x0490e/x0490e06.htm#formulation%20of%20the%20penman%20monteith%20equation)
  '''
    # get net radiation at surface
    if 'netrad' in dataset: Rn = dataset['netrad'][:]  # net radiation
    if 'Rn' in dataset: Rn = dataset['Rn'][:]  # alias
    else: Rn = computeNetRadiation(dataset, asVar=False)  # try to compute
    # heat flux in and out of the ground
    if 'grdflx' in dataset:
        G = dataset['grdflx'][:]  # heat release by the soil
    else:
        raise VariableError, "Cannot determine soil heat flux for PET calculation."
    # get wind speed
    if 'U2' in dataset: u2 = dataset['U2'][:]
    elif lmeans and 'U10' in dataset: u2 = wind(dataset['U10'][:], z=10)
    elif 'u10' in dataset and 'v10' in dataset:
        u2 = wind(u=dataset['u10'][:], v=dataset['v10'][:], z=10)
    else:
        raise VariableError, "Cannot determine 2m wind speed for PET calculation."
    # get psychrometric variables
    if 'ps' in dataset: p = dataset['ps'][:]
    else:
        raise VariableError, "Cannot determine surface air pressure for PET calculation."
    g = gamma(p)  # psychrometric constant (pressure-dependent)
    if 'Q2' in dataset: ea = dataset['Q2'][:]
    elif 'q2' in dataset:
        ea = dataset['q2'][:] * dataset['ps'][:] * 28.96 / 18.02
    else:
        raise VariableError, "Cannot determine 2m water vapor pressure for PET calculation."
    # get temperature
    if lmeans and 'Tmean' in dataset: T = dataset['Tmean'][:]
    elif 'T2' in dataset: T = dataset['T2'][:]
    else:
        raise VariableError, "Cannot determine 2m mean temperature for PET calculation."
    # get saturation water vapor
    if 'Tmin' in dataset and 'Tmax' in dataset:
        es = e_sat(dataset['Tmin'][:], dataset['Tmax'][:])
        # else: Es = e_sat(T) # backup, but not very accurate
    else:
        raise VariableError, "'Tmin' and 'Tmax' are required to compute saturation water vapor pressure for PET calculation."
    D = Delta(T)  # slope of saturation vapor pressure w.r.t. temperature
    # compute potential evapotranspiration according to Penman-Monteith method
    # (http://www.fao.org/docrep/x0490e/x0490e06.htm#fao%20penman%20monteith%20equation)
    if lterms:
        Dgu = evaluate(
            '( D + g * (1 + 0.34 * u2) ) * 86400')  # common denominator
        rad = evaluate('0.0352512 * D * (Rn + G) / Dgu')  # radiation term
        wnd = evaluate(
            'g * u2 * (es - ea) * 0.9 / T / Dgu')  # wind term (vapor deficit)
        pet = evaluate(
            '( 0.0352512 * D * (Rn + G) + ( g * u2 * (es - ea) * 0.9 / T ) ) / ( D + g * (1 + 0.34 * u2) ) / 86400'
        )
        import numpy as np
        assert np.allclose(pet, rad + wnd, equal_nan=True)
        rad = Variable(data=rad,
                       name='petrad',
                       units='kg/m^2/s',
                       axes=dataset['ps'].axes)
        wnd = Variable(data=wnd,
                       name='petwnd',
                       units='kg/m^2/s',
                       axes=dataset['ps'].axes)
    else:
        pet = evaluate(
            '( 0.0352512 * D * (Rn + G) + ( g * u2 * (es - ea) * 0.9 / T ) ) / ( D + g * (1 + 0.34 * u2) ) / 86400'
        )
    # N.B.: units have been converted to SI (mm/day -> 1/86400 kg/m^2/s, kPa -> 1000 Pa, and Celsius to K)
    pet = Variable(data=pet,
                   name='pet',
                   units='kg/m^2/s',
                   axes=dataset['ps'].axes)
    assert 'waterflx' not in dataset or pet.units == dataset[
        'waterflx'].units, pet
    # return new variable(s)
    return (pet, rad, wnd) if lterms else pet
Beispiel #21
0
def loadHGS_StnTS(station=None, varlist=None, varatts=None, folder=None, name=None, title=None,
                  start_date=None, end_date=None, run_period=15, period=None, lskipNaN=False, lcheckComplete=True,
                  basin=None, WSC_station=None, basin_list=None, filename=None, prefix=None, 
                  scalefactors=None, **kwargs):
  ''' Get a properly formatted WRF dataset with monthly time-series at station locations; as in
      the hgsrun module, the capitalized kwargs can be used to construct folders and/or names '''
  if folder is None or ( filename is None and station is None ): raise ArgumentError
  # try to find meta data for gage station from WSC
  HGS_station = station
  if basin is not None and basin_list is not None:
    station_name = station
    station = getGageStation(basin=basin, station=station if WSC_station is None else WSC_station, 
                             basin_list=basin_list) # only works with registered basins
    if station_name is None: station_name = station.name # backup, in case we don't have a HGS station name
    metadata = station.getMetaData() # load station meta data
    if metadata is None: raise GageStationError(name)
  else: 
    metadata = dict(); station = None; station_name =  None    
  # prepare name expansion arguments (all capitalized)
  expargs = dict(ROOT_FOLDER=root_folder, STATION=HGS_station, NAME=name, TITLE=title,
                 PREFIX=prefix, BASIN=basin, WSC_STATION=WSC_station)
  for key,value in metadata.items():
      if isinstance(value,basestring):
          expargs['WSC_'+key.upper()] = value # in particular, this includes WSC_ID
  if 'WSC_ID' in expargs: 
      if expargs['WSC_ID'][0] == '0': expargs['WSC_ID0'] = expargs['WSC_ID'][1:]
      else: raise DatasetError('Expected leading zero in WSC station ID: {}'.format(expargs['WSC_ID']))
  # exparg preset keys will get overwritten if capitalized versions are defined
  for key,value in kwargs.items():
    KEY = key.upper() # we only use capitalized keywords, and non-capitalized keywords are only used/converted
    if KEY == key or KEY not in kwargs: expargs[KEY] = value # if no capitalized version is defined
  # read folder and infer prefix, if necessary
  folder = folder.format(**expargs)
  if not os.path.exists(folder): raise IOError(folder)
  if expargs['PREFIX'] is None:
    with open('{}/{}'.format(folder,prefix_file), 'r') as pfx:
      expargs['PREFIX'] = prefix = ''.join(pfx.readlines()).strip()      
  # now assemble file name for station timeseries
  filename = filename.format(**expargs)
  filepath = '{}/{}'.format(folder,filename)
  if not os.path.exists(filepath): IOError(filepath)
  if station_name is None: 
      station_name = filename[filename.index('hydrograph.')+1:-4] if station is None else station
  # set meta data (and allow keyword expansion of name and title)
  metadata['problem'] = prefix
  metadata['station_name'] = metadata.get('long_name', station_name)
  if name is not None: name = name.format(**expargs) # name expansion with capitalized keyword arguments
  else: name = 'HGS_{:s}'.format(station_name)
  metadata['name'] = name; expargs['Name'] = name.title() # name in title format
  if title is None: title = '{{Name:s}} (HGS, {problem:s})'.format(**metadata)
  title = title.format(**expargs) # name expansion with capitalized keyword arguments
  metadata['long_name'] = metadata['title'] = title
  # now determine start data for date_parser
  if end_date is None: 
      if start_date and run_period: end_date = start_date + run_period 
      elif period: end_date = period[1]
      else: raise ArgumentError("Need to specify either 'start_date' & 'run_period' or 'period' to infer 'end_date'.")
  end_year,end_month,end_day = convertDate(end_date)
  if start_date is None: 
      if end_date and run_period: start_date = end_date - run_period 
      elif period: start_date = period[0]
      else: raise ArgumentError("Need to specify either 'end_date' & 'run_period' or 'period' to infer 'start_date'.")
  start_year,start_month,start_day = convertDate(start_date)
  if start_day != 1 or end_day != 1: 
    raise NotImplementedError('Currently only monthly data is supported.')
#   import functools
#   date_parser = functools.partial(date_parser, year=start_year, month=start_month, day=start_day)
#   # now load data using pandas ascii reader
#   data_frame = pd.read_table(filepath, sep='\s+', header=2, dtype=np.float64, index_col=['time'], 
#                              date_parser=date_parser, names=ascii_varlist)
#   # resample to monthly data
#   data_frame = data_frame.resample(resampling).agg(np.mean)
#       data = data_frame[flowvar].values
  # parse header
  if varlist is None: varlist = variable_list[:] # default list 
  with open(filepath, 'r') as f:
      line = f.readline(); lline = line.lower() # 1st line
      if not "hydrograph" in lline: raise GageStationError(line,filepath)
      # parse variables and determine columns
      line = f.readline(); lline = line.lower() # 2nd line
      if not "variables" in lline: raise GageStationError(line)
      variable_order = [v.strip('"').lower() for v in line[line.find('"'):].strip().split(',')]
  # figure out varlist and data columns
  if variable_order[0] == 'time': del variable_order[0] # only keep variables
  else: raise GageStationError(variable_order)
  variable_order = [hgs_variables[v] for v in variable_order] # replace HGS names with GeoPy names
  vardict = {v:i+1 for i,v in enumerate(variable_order)} # column mapping; +1 because time was removed
  variable_order = [v for v in variable_order if v in varlist or flow_to_flux[v] in varlist]
  usecols = tuple(vardict[v] for v in variable_order) # variable columns that need to loaded (except time, which is col 0)
  assert 0 not in usecols, usecols
  # load data as tab separated values
  data = np.genfromtxt(filepath, dtype=np.float64, delimiter=None, skip_header=3, usecols = (0,)+usecols)
  assert data.shape[1] == len(usecols)+1, data.shape
  if lskipNaN:
      data = data[np.isnan(data).sum(axis=1)==0,:]
  elif np.any( np.isnan(data) ):
      raise DataError("Missing values (NaN) encountered in hydrograph file; use 'lskipNaN' to ignore.\n('{:s}')".format(filepath))    
  time_series = data[:,0]; flow_data = data[:,1:]
  assert flow_data.shape == (len(time_series),len(usecols)), flow_data.shape
  # original time deltas in seconds
  time_diff = time_series.copy(); time_diff[1:] = np.diff(time_series) # time period between time steps
  assert np.all( time_diff > 0 ), filepath
  time_diff = time_diff.reshape((len(time_diff),1)) # reshape to make sure broadcasting works
  # integrate flow over time steps before resampling
  flow_data[1:,:] -= np.diff(flow_data, axis=0)/2. # get average flow between time steps
  flow_data *= time_diff # integrate flow in time interval by multiplying average flow with time period
  flow_data = np.cumsum(flow_data, axis=0) # integrate by summing up total flow per time interval
  # generate regular monthly time steps
  start_datetime = np.datetime64(dt.datetime(year=start_year, month=start_month, day=start_day), 'M')
  end_datetime = np.datetime64(dt.datetime(year=end_year, month=end_month, day=end_day), 'M')
  time_monthly = np.arange(start_datetime, end_datetime+np.timedelta64(1, 'M'), dtype='datetime64[M]')
  assert time_monthly[0] == start_datetime, time_monthly[0]
  assert time_monthly[-1] == end_datetime, time_monthly[-1] 
  # convert monthly time series to regular array of seconds since start date
  time_monthly = ( time_monthly.astype('datetime64[s]') - start_datetime.astype('datetime64[s]') ) / np.timedelta64(1,'s')
  assert time_monthly[0] == 0, time_monthly[0]
  # interpolate integrated flow to new time axis
  #flow_data = np.interp(time_monthly, xp=time_series[:,0], fp=flow_data[:,0],).reshape((len(time_monthly),1))
  time_series = np.concatenate(([0],time_series), axis=0) # integrated flow at time zero must be zero...
  flow_data = np.concatenate(([[0,]*len(usecols)],flow_data), axis=0) # ... this is probably better than interpolation
  # N.B.: we are adding zeros here so we don't have to extrapolate to the left; on the right we just fill in NaN's
  if ( time_monthly[-1] - time_series[-1] ) > 3*86400. and lcheckComplete: 
      warn("Data record ends more than 3 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.))
  elif (time_monthly[-1]-time_series[-1]) > 5*86400.: 
      if lcheckComplete: 
        raise DataError("Data record ends more than 5 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.))
      else:
        warn("Data record ends more than 5 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.))
  flow_interp = si.interp1d(x=time_series, y=flow_data, kind='linear', axis=0, copy=False, 
                            bounds_error=False, fill_value=np.NaN, assume_sorted=True) 
  flow_data = flow_interp(time_monthly) # evaluate with call
  # compute monthly flow rate from interpolated integrated flow
  flow_data = np.diff(flow_data, axis=0) / np.diff(time_monthly, axis=0).reshape((len(time_monthly)-1,1))
  flow_data *= 1000 # convert from m^3/s to kg/s
  # construct time axis
  start_time = 12*(start_year - 1979) + start_month -1
  end_time = 12*(end_year - 1979) + end_month -1
  time = Axis(name='time', units='month', atts=dict(long_name='Month since 1979-01'), 
              coord=np.arange(start_time, end_time)) # not including the last, e.g. 1979-01 to 1980-01 is 12 month
  assert len(time_monthly) == end_time-start_time+1
  assert flow_data.shape == (len(time),len(variable_order)), (flow_data.shape,len(time),len(variable_order))
  # construct dataset
  dataset = Dataset(atts=metadata)
  dataset.station = station # add gage station object, if available (else None)
  for i,flowvar in enumerate(variable_order):
      data = flow_data[:,i]
      fluxvar = flow_to_flux[flowvar]
      if flowvar in varlist:
        flowatts = variable_attributes[flowvar]
        # convert variables and put into dataset (monthly time series)
        if flowatts['units'] != 'kg/s': 
          raise VariableError("Hydrograph data is read as kg/s; flow variable does not match.\n{}".format(flowatts))
        dataset += Variable(data=data, axes=(time,), **flowatts)
      if fluxvar in varlist and 'shp_area' in metadata:
        # compute surface flux variable based on drainage area
        fluxatts = variable_attributes[fluxvar]
        if fluxatts['units'] == 'kg/s' and fluxatts['units'] != 'kg/m^2/s': raise VariableError(fluxatts)
        data = data / metadata['shp_area'] # need to make a copy
        dataset += Variable(data=data, axes=(time,), **fluxatts)
  # apply analysis period
  if period is not None:
      dataset = dataset(years=period)
  # adjust scalefactors, if necessary
  if scalefactors:
      if isinstance(scalefactors,dict):
          dataset = updateScalefactor(dataset, varlist=scalefactors, scalefactor=None)
      elif isNumber(scalefactors):
          scalelist = ('discharge','seepage','flow')
          dataset = updateScalefactor(dataset, varlist=scalelist, scalefactor=scalefactors)
      else: 
          raise TypeError(scalefactors) 
  # return completed dataset
  return dataset
Beispiel #22
0
def loadGageStation(basin=None, station=None, varlist=None, varatts=None, mode='climatology', 
                    aggregation=None, filetype='monthly', folder=None, name=None, period=None,
                    basin_list=None, lcheck=True, lexpand=True, lfill=True, lflatten=True,
                    lkgs=True, scalefactors=None, title=None):
  ''' function to load hydrograph climatologies and timeseries for a given basin '''
  ## resolve input
  if mode == 'timeseries' and aggregation: 
    raise ArgumentError('Timeseries does not support aggregation.')
  # get GageStation instance
  station = getGageStation(basin=basin, station=station, name=name, folder=folder, 
                           river=None, basin_list=basin_list, lcheck=True)
  # variable attributes
  if varlist is None: varlist = variable_list
  elif not isinstance(varlist,(list,tuple)): raise TypeError  
  varlist = list(varlist) # make copy of varlist to avoid interference
  if varatts is None: 
    if aggregation is None: varatts = variable_attributes_kgs if lkgs else variable_attributes_mms
    else: varatts = agg_varatts_kgs if lkgs else agg_varatts_mms
  elif not isinstance(varatts,dict): raise TypeError
  
  ## read csv data
  # time series data and time coordinates
  lexpand = True; lfill = True
  if mode == 'climatology': lexpand = False; lfill = False; lflatten = False
  data, time = station.getTimeseriesData(units='kg/s' if lkgs else 'm^3/s', lcheck=True, lexpand=lexpand, 
                                         lfill=lfill, period=period, lflatten=lflatten)
  # station meta data
  metadata = station.getMetaData(lcheck=True)
  den = metadata['shp_area'] if lkgs else ( metadata['shp_area'] / 1000. )
  ## create dataset for station
  dataset = Dataset(name='WSC', title=title or metadata['Station Name'], varlist=[], atts=metadata,) 
  if mode.lower() in ('timeseries','time-series'): 
    time = time.flatten(); data = data.flatten() # just to make sure...
    # make time axis based on time coordinate from csv file
    timeAxis = Axis(name='time', units='month', coord=time, # time series centered at 1979-01
                    atts=dict(long_name='Month since 1979-01'))
    dataset += timeAxis
    # load mean discharge
    dataset += Variable(axes=[timeAxis], data=data, atts=varatts['discharge'])
    # load mean runoff
    doa = data / den 
    dataset += Variable(axes=[timeAxis], data=doa, atts=varatts['runoff'])
  elif mode == 'climatology': 
    # N.B.: this is primarily for backwards compatibility; it should not be used anymore...
    # make common time axis for climatology
    te = 12 # length of time axis: 12 month
    climAxis = Axis(name='time', units='month', length=12, coord=np.arange(1,te+1,1)) # monthly climatology
    dataset.addAxis(climAxis, copy=False)
    # extract variables (min/max/mean are separate variables)
    # N.B.: this is mainly for backwards compatibility
    doa = data / den
    if aggregation is None or aggregation.lower() == 'mean':
      # load mean discharge
      tmpdata = nf.nanmean(data, axis=0)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discharge'])
      dataset.addVariable(tmpvar, copy=False)
      # load mean runoff
      tmpdata = nf.nanmean(doa, axis=0)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['runoff'])
      dataset.addVariable(tmpvar, copy=False)
    if aggregation is None or aggregation.lower() == 'std':
      # load  discharge standard deviation
      tmpdata = nf.nanstd(data, axis=0, ddof=1) # very few values means large uncertainty!
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discstd'])
      dataset.addVariable(tmpvar, copy=False)
      # load  runoff standard deviation
      tmpdata = nf.nanstd(doa, axis=0, ddof=1)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_std'])
      dataset.addVariable(tmpvar, copy=False)
    if aggregation is None or aggregation.lower() == 'sem':
      # load  discharge standard deviation
      tmpdata = nf.nansem(data, axis=0, ddof=1) # very few values means large uncertainty!
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discsem'])
      dataset.addVariable(tmpvar, copy=False)
      # load  runoff standard deviation
      tmpdata = nf.nansem(doa, axis=0, ddof=1)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_sem'])
      dataset.addVariable(tmpvar, copy=False)
    if aggregation is None or aggregation.lower() == 'max':
      # load maximum discharge
      tmpdata = nf.nanmax(data, axis=0)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discmax'])
      dataset.addVariable(tmpvar, copy=False)
      # load maximum runoff
      tmpdata = nf.nanmax(doa, axis=0)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_max'])
      dataset.addVariable(tmpvar, copy=False)
    if aggregation is None or aggregation.lower() == 'min':
      # load minimum discharge
      tmpdata = nf.nanmin(data, axis=0)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discmin'])
      dataset.addVariable(tmpvar, copy=False)
      # load minimum runoff
      tmpdata = nf.nanmin(doa, axis=0)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_min'])
      dataset.addVariable(tmpvar, copy=False)
  else: 
    raise NotImplementedError, "Time axis mode '{}' is not supported.".format(mode)
  # adjust scalefactors, if necessary
  if scalefactors:
      if isinstance(scalefactors,dict):
          dataset = updateScalefactor(dataset, varlist=scalefactors, scalefactor=None)
      elif isNumber(scalefactors):
          scalelist = ('discharge','StdDisc','SEMDisc','MaxDisc','MinDisc',)
          dataset = updateScalefactor(dataset, varlist=scalelist, scalefactor=scalefactors)
      else: 
          raise TypeError(scalefactors) 
  # return station dataset
  return dataset