Ejemplo n.º 1
0
def loadShapeObservations(obs=None, seasons=None, basins=None, provs=None, shapes=None, stations=None, varlist=None, slices=None,
                          aggregation='mean', dataset_mode='time-series', lWSC=True, WSC_period=None, shapetype=None, 
                          variable_list=None, basin_list=None, lforceList=True, obs_ts=None, obs_clim=None, 
                          name=None, title=None, obs_list=None, ensemble_list=None, ensemble_product='inner', **kwargs):
  ''' convenience function to load shape observations based on 'aggregation' and 'varlist' (mainly add WSC gage data) '''
  if obs_list is None: obs_list = observational_datasets
  if name is None: name = 'obs'
  if title is None: title = 'Observations'
  # variables for which ensemble expansion is not supported
  not_supported = ('season','seasons','varlist','mode','dataset_mode','provs','basins','shapes',) 
  # resolve variable list (no need to maintain order)
  if isinstance(varlist,str): varlist = [varlist]
  variables = set(shp_params)
  for name in varlist: 
      if name in variable_list: variables.update(variable_list[name].vars)
      elif lforceList: raise VariableError("Variable list '{}' does not exist.".format(name))
      else: variables.add(name)
  variables = list(variables)
  # determine if we need gage dataset
  lWSC = isinstance(basins,str) and any([var in WSC_vars for var in variables]) and lWSC # doesn't work if multiple basins are loaded
  # default obs list
  if obs is None: obs = ['Observations',]
  elif isinstance(obs,str): obs = [obs]
  elif isinstance(obs,tuple): obs = list(obs)
  elif not isinstance(obs,list): raise TypeError(obs)
  # configure slicing (extract basin/province/shape and period)
  expand_vars = ('basins','stations','provs','shapes','slices') # variables that need to be added to slices (and expanded first)
  if ensemble_list: expand_list = [varname for varname in expand_vars if varname in ensemble_list]
  if ensemble_list and expand_list:
      local_vars = locals(); exp_args = dict()
      for varname in expand_vars: # copy variables to expand right away
          exp_args[varname] = local_vars[varname]
      for varname in expand_list: # remove entries from ensemble expansion
          if  varname != 'slices': ensemble_list.remove(varname) # only 'slices' will continue to be expanded
      if 'slices' not in ensemble_list: ensemble_list.append('slices')
      slices = [_configSlices(**arg_dict) for arg_dict in expandArgumentList(expand_list=expand_list, lproduct=ensemble_product, **exp_args)]
  else:
      slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, stations=stations, period=None)
  # substitute default observational dataset and seperate aggregation methods
  iobs = None; clim_ens = None
  for i,obs_name in reverse_enumerate(obs):
      # N.B.: we need to iterate in reverse order, so that deleting items does not interfere with the indexing
      if obs_name in obs_aliases or obs_name not in timeseries_datasets:
          if iobs is not None: raise ArgumentError("Can only resolve one default dataset: {}".format(obs))
          if aggregation == 'mean' and seasons is None and obs_clim is not None: 
              # remove dataset entry from list (and all the arguments)
              del obs[i]; iobs = i # remember position of default obs in ensemble              
              clim_args = kwargs.copy(); slc = slices; shp = shapetype
              # clean up variables for ensemble expansion, if necessary
              if ensemble_list and ensemble_product.lower() == 'inner':
                  if 'names' in ensemble_list:
                      obs_names = [obs_clim]
                      for arg in ensemble_list:
                          if arg in ('slices','shape'): pass # dealt with separately
                          elif arg in not_supported:
                              raise ArgumentError("Expansion of keyword '{:s}' is currently not supported in ensemble expansion.".format(arg))
                          elif arg in kwargs: 
                              clim_args[arg] = kwargs[arg][iobs]; del kwargs[arg][iobs]
                          else: 
                              raise ArgumentError("Keyword '{:s}' not found in keyword arguments.".format(arg))
                      if 'slices' in ensemble_list: slc = slices[iobs]; del slices[iobs]
                      if 'shape' in ensemble_list: shp = shapetype[iobs]; del shapetype[iobs]
                      clim_len = 1 # expect length of climatology ensemble
                  else: 
                      obs_names = obs_clim # no name expansion
                      clim_len = None # expect length of climatology ensemble
                      for arg in ensemble_list:
                          if arg in not_supported:
                              raise ArgumentError("Expansion of keyword '{:s}' is currently not supported in ensemble expansion.".format(arg))
                          elif 'slices' in ensemble_list: l = len(slc) 
                          elif 'shape' in ensemble_list: l = len(shp)
                          elif arg in clim_args: l = len(clim_args[arg])
                          else: raise ArgumentError("Keyword '{:s}' not found in keyword arguments.".format(arg))
                          if clim_len is None: clim_len = l
                          elif l != clim_len: raise ArgumentError(arg,l,clim_len)
              elif ensemble_list and ensemble_product.lower() == 'outer':
                  clim_len = 1
                  for arg in ensemble_list:
                      if arg != 'names':
                        assert isinstance(clim_args[arg],(list,tuple)), clim_args[arg] 
                        clim_len *= len(clim_args[arg])
                  obs_names = [obs_clim] if 'names' in ensemble_list else obs_clim
              else:
                  obs_names = [obs_clim]; clim_len = 1
              # now load climtology instead of time-series and skip aggregation
              try:
                  clim_ens = loadEnsemble(names=obs_names, season=seasons, aggregation=None, slices=slc, varlist=variables, 
                                          ldataset=False, dataset_mode='climatology', shape=shp,
                                          ensemble_list=ensemble_list, ensemble_product=ensemble_product, 
                                          obs_list=obs_list, basin_list=basin_list, **clim_args)
                  assert len(clim_ens) == clim_len, clim_ens
              except EmptyDatasetError: pass
          else: 
              obs[i] = obs_ts # trivial: just substitute default name and load time-series
  # prepare and load ensemble of observations
  if len(obs) > 0:
      if len(obs) == 1 and ensemble_list and 'names' not in ensemble_list: obs = obs[0]
      try:
          obsens = loadEnsemble(names=obs, season=seasons, aggregation=aggregation, slices=slices,
                                varlist=variables, ldataset=False, dataset_mode=dataset_mode, 
                                shape=shapetype, obs_list=obs_list, basin_list=basin_list, 
                                ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs)          
      except EmptyDatasetError:
          obsens = Ensemble(name=name, title=title, obs_list=obs_list, basetype=Dataset)
  else: 
      obsens = Ensemble(name=name, title=title, obs_list=obs_list, basetype=Dataset)
  # add default obs back in if they were removed earlier
  if clim_ens is not None:
      for clim_ds in clim_ens[::-1]: # add observations in correct order: adding backwards allows successive insertion ...
          obsens.insertMember(iobs,clim_ds) # ... at the point where the name block starts
  # load stream gage data from WSC; should not interfere with anything else; append to ensemble
  if lWSC: # another special case: river hydrographs
      from datasets.WSC import GageStationError, loadGageStation
      try:
          if aggregation is not None and seasons is None: dataset_mode = 'climatology' # handled differently with gage data
          if WSC_period is None: WSC_period = kwargs.get('obs_period',kwargs.get('period',None))
          dataset = loadGageStation(basin=basins, varlist=['runoff'], aggregation=aggregation, period=WSC_period, 
                                    mode=dataset_mode, filetype='monthly', basin_list=basin_list, lfill=True, lexpand=True) # always load runoff/discharge
          if seasons:
              method = aggregation if aggregation.isupper() else aggregation.title() 
              if aggregation: dataset = getattr(dataset,'seasonal'+method)(season=seasons, taxis='time')
              else: dataset = dataset.seasonalSample(season=seasons)
          if slices is not None: dataset = dataset(**slices) # slice immediately
          obsens += dataset.load()
      except GageStationError: 
          pass # just ignore, if gage station data is missing 
  # return ensembles (will be wrapped in a list, if BatchLoad is used)
  return obsens
Ejemplo n.º 2
0
def loadShapeEnsemble(names=None, seasons=None, basins=None, provs=None, shapes=None, varlist=None, 
                      aggregation='mean', slices=None, shapetype=None, filetypes=None, 
                      period=None, obs_period=None, WSC_period=None, name=None, title=None,
                      variable_list=None, WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, 
                      basin_list=None, lforceList=True, obs_list=None, obs_ts=None, obs_clim=None, 
                      ensemble_list=None, ensemble_product='inner', **kwargs):
  ''' convenience function to load shape ensembles (in Ensemble container) or observations; kwargs are passed to loadEnsembleTS '''
  names = list(names) # make a new list (copy)
  # separate observations
  if obs_list is None: obs_list = observational_datasets
  obs_names = []; iobs = []; ens_names = []; iens = []
  for i,name in enumerate(names):
      if name in obs_list or name in obs_aliases:
          obs_names.append(name); iobs.append(i)          
      else: 
          ens_names.append(name); iens.append(i)
  assert len(iens) == len(ens_names) and len(iobs) == len(obs_names) 
  if len(obs_names) > 0:       
      # assemble arguments
      obs_args = dict(obs=obs_names, seasons=seasons, basins=basins, provs=provs, shapes=shapes, varlist=varlist, 
                      slices=slices, aggregation=aggregation, shapetype=shapetype, 
                      period=period, obs_period=obs_period, obs_ts=obs_ts, obs_clim=obs_clim, 
                      variable_list=variable_list, basin_list=basin_list, WSC_period=WSC_period,
                      ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs)
      # check if we have to modify to preserve ensemble_list expansion
      if ensemble_list and ensemble_product == 'inner' and 'names' in ensemble_list and len(ensemble_list) > 1: 
          for key in ensemble_list:
              if key != 'names':
                  ens_list = obs_args[key]
                  obs_args[key] = [ens_list[i] for i in iobs]
      # observations for basins require special treatment to merge basin averages with gage values
      # load observations by redirecting to appropriate loader function
      obsens = loadShapeObservations(name=name, title=title, obs_list=obs_list, **obs_args)
  else: obsens = []
  if len(ens_names) > 0: # has to be a list
      # prepare arguments
      variables, filetypes = _resolveVarlist(varlist=varlist, filetypes=filetypes, 
                                             params=shp_params, variable_list=variable_list, lforceList=lforceList)
      # configure slicing (extract basin/province/shape and period)
      slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, period=period)
      # assemble arguments
      ens_args = dict(names=ens_names, season=seasons, slices=slices, varlist=variables, shape=shapetype, 
                      aggregation=aggregation, period=period, obs_period=obs_period, 
                      WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, filetypes=filetypes, 
                      ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs)
      # check if we have to remove obs datasets to preserve ensemble_list expansion
      if ensemble_list and ensemble_product == 'inner' and 'names' in ensemble_list and len(ensemble_list) > 1: 
          for key in ensemble_list:
              if key != 'names':
                  ens_list = ens_args[key]
                  ens_args[key] = [ens_list[i] for i in iens]
      # load ensemble (no iteration here)
      shpens = loadEnsemble(name=name, title=title, obs_list=obs_list, **ens_args)
  else: shpens = Ensemble(name=name, title=title, basetype='Dataset')
  # get resolution tag (will be added below)
  res = None
  for member in shpens:
      if 'resstr' in member.atts:
          if res is None: res = member.atts['resstr']
          elif res != member.atts['resstr']:
              res = None; break # no common resolution
  # return ensembles (will be wrapped in a list, if BatchLoad is used)
  if len(obsens) > 0 and len(shpens) > 0:
      for name,i in zip(obs_names,iobs): 
          shpens.insertMember(i,obsens[name]) # add known observations in correct order
          del obsens[name] # remove the ones we already know from list, so we can deal with the rest
      j = i + 1 # add remaining obs datasets after last one
      for i,obs in enumerate(obsens): shpens.insertMember(j+i,obs)
  elif len(obsens) > 0 and len(shpens) == 0:
      shpens = obsens
  shpens.resolution = res # ad resolution tag now, to make sure it is there 
  return shpens