def loadShapeObservations(obs=None, seasons=None, basins=None, provs=None, shapes=None, stations=None, varlist=None, slices=None, aggregation='mean', dataset_mode='time-series', lWSC=True, WSC_period=None, shapetype=None, variable_list=None, basin_list=None, lforceList=True, obs_ts=None, obs_clim=None, name=None, title=None, obs_list=None, ensemble_list=None, ensemble_product='inner', **kwargs): ''' convenience function to load shape observations based on 'aggregation' and 'varlist' (mainly add WSC gage data) ''' if obs_list is None: obs_list = observational_datasets if name is None: name = 'obs' if title is None: title = 'Observations' # variables for which ensemble expansion is not supported not_supported = ('season','seasons','varlist','mode','dataset_mode','provs','basins','shapes',) # resolve variable list (no need to maintain order) if isinstance(varlist,str): varlist = [varlist] variables = set(shp_params) for name in varlist: if name in variable_list: variables.update(variable_list[name].vars) elif lforceList: raise VariableError("Variable list '{}' does not exist.".format(name)) else: variables.add(name) variables = list(variables) # determine if we need gage dataset lWSC = isinstance(basins,str) and any([var in WSC_vars for var in variables]) and lWSC # doesn't work if multiple basins are loaded # default obs list if obs is None: obs = ['Observations',] elif isinstance(obs,str): obs = [obs] elif isinstance(obs,tuple): obs = list(obs) elif not isinstance(obs,list): raise TypeError(obs) # configure slicing (extract basin/province/shape and period) expand_vars = ('basins','stations','provs','shapes','slices') # variables that need to be added to slices (and expanded first) if ensemble_list: expand_list = [varname for varname in expand_vars if varname in ensemble_list] if ensemble_list and expand_list: local_vars = locals(); exp_args = dict() for varname in expand_vars: # copy variables to expand right away exp_args[varname] = local_vars[varname] for varname in expand_list: # remove entries from ensemble expansion if varname != 'slices': ensemble_list.remove(varname) # only 'slices' will continue to be expanded if 'slices' not in ensemble_list: ensemble_list.append('slices') slices = [_configSlices(**arg_dict) for arg_dict in expandArgumentList(expand_list=expand_list, lproduct=ensemble_product, **exp_args)] else: slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, stations=stations, period=None) # substitute default observational dataset and seperate aggregation methods iobs = None; clim_ens = None for i,obs_name in reverse_enumerate(obs): # N.B.: we need to iterate in reverse order, so that deleting items does not interfere with the indexing if obs_name in obs_aliases or obs_name not in timeseries_datasets: if iobs is not None: raise ArgumentError("Can only resolve one default dataset: {}".format(obs)) if aggregation == 'mean' and seasons is None and obs_clim is not None: # remove dataset entry from list (and all the arguments) del obs[i]; iobs = i # remember position of default obs in ensemble clim_args = kwargs.copy(); slc = slices; shp = shapetype # clean up variables for ensemble expansion, if necessary if ensemble_list and ensemble_product.lower() == 'inner': if 'names' in ensemble_list: obs_names = [obs_clim] for arg in ensemble_list: if arg in ('slices','shape'): pass # dealt with separately elif arg in not_supported: raise ArgumentError("Expansion of keyword '{:s}' is currently not supported in ensemble expansion.".format(arg)) elif arg in kwargs: clim_args[arg] = kwargs[arg][iobs]; del kwargs[arg][iobs] else: raise ArgumentError("Keyword '{:s}' not found in keyword arguments.".format(arg)) if 'slices' in ensemble_list: slc = slices[iobs]; del slices[iobs] if 'shape' in ensemble_list: shp = shapetype[iobs]; del shapetype[iobs] clim_len = 1 # expect length of climatology ensemble else: obs_names = obs_clim # no name expansion clim_len = None # expect length of climatology ensemble for arg in ensemble_list: if arg in not_supported: raise ArgumentError("Expansion of keyword '{:s}' is currently not supported in ensemble expansion.".format(arg)) elif 'slices' in ensemble_list: l = len(slc) elif 'shape' in ensemble_list: l = len(shp) elif arg in clim_args: l = len(clim_args[arg]) else: raise ArgumentError("Keyword '{:s}' not found in keyword arguments.".format(arg)) if clim_len is None: clim_len = l elif l != clim_len: raise ArgumentError(arg,l,clim_len) elif ensemble_list and ensemble_product.lower() == 'outer': clim_len = 1 for arg in ensemble_list: if arg != 'names': assert isinstance(clim_args[arg],(list,tuple)), clim_args[arg] clim_len *= len(clim_args[arg]) obs_names = [obs_clim] if 'names' in ensemble_list else obs_clim else: obs_names = [obs_clim]; clim_len = 1 # now load climtology instead of time-series and skip aggregation try: clim_ens = loadEnsemble(names=obs_names, season=seasons, aggregation=None, slices=slc, varlist=variables, ldataset=False, dataset_mode='climatology', shape=shp, ensemble_list=ensemble_list, ensemble_product=ensemble_product, obs_list=obs_list, basin_list=basin_list, **clim_args) assert len(clim_ens) == clim_len, clim_ens except EmptyDatasetError: pass else: obs[i] = obs_ts # trivial: just substitute default name and load time-series # prepare and load ensemble of observations if len(obs) > 0: if len(obs) == 1 and ensemble_list and 'names' not in ensemble_list: obs = obs[0] try: obsens = loadEnsemble(names=obs, season=seasons, aggregation=aggregation, slices=slices, varlist=variables, ldataset=False, dataset_mode=dataset_mode, shape=shapetype, obs_list=obs_list, basin_list=basin_list, ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs) except EmptyDatasetError: obsens = Ensemble(name=name, title=title, obs_list=obs_list, basetype=Dataset) else: obsens = Ensemble(name=name, title=title, obs_list=obs_list, basetype=Dataset) # add default obs back in if they were removed earlier if clim_ens is not None: for clim_ds in clim_ens[::-1]: # add observations in correct order: adding backwards allows successive insertion ... obsens.insertMember(iobs,clim_ds) # ... at the point where the name block starts # load stream gage data from WSC; should not interfere with anything else; append to ensemble if lWSC: # another special case: river hydrographs from datasets.WSC import GageStationError, loadGageStation try: if aggregation is not None and seasons is None: dataset_mode = 'climatology' # handled differently with gage data if WSC_period is None: WSC_period = kwargs.get('obs_period',kwargs.get('period',None)) dataset = loadGageStation(basin=basins, varlist=['runoff'], aggregation=aggregation, period=WSC_period, mode=dataset_mode, filetype='monthly', basin_list=basin_list, lfill=True, lexpand=True) # always load runoff/discharge if seasons: method = aggregation if aggregation.isupper() else aggregation.title() if aggregation: dataset = getattr(dataset,'seasonal'+method)(season=seasons, taxis='time') else: dataset = dataset.seasonalSample(season=seasons) if slices is not None: dataset = dataset(**slices) # slice immediately obsens += dataset.load() except GageStationError: pass # just ignore, if gage station data is missing # return ensembles (will be wrapped in a list, if BatchLoad is used) return obsens
def loadShapeEnsemble(names=None, seasons=None, basins=None, provs=None, shapes=None, varlist=None, aggregation='mean', slices=None, shapetype=None, filetypes=None, period=None, obs_period=None, WSC_period=None, name=None, title=None, variable_list=None, WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, basin_list=None, lforceList=True, obs_list=None, obs_ts=None, obs_clim=None, ensemble_list=None, ensemble_product='inner', **kwargs): ''' convenience function to load shape ensembles (in Ensemble container) or observations; kwargs are passed to loadEnsembleTS ''' names = list(names) # make a new list (copy) # separate observations if obs_list is None: obs_list = observational_datasets obs_names = []; iobs = []; ens_names = []; iens = [] for i,name in enumerate(names): if name in obs_list or name in obs_aliases: obs_names.append(name); iobs.append(i) else: ens_names.append(name); iens.append(i) assert len(iens) == len(ens_names) and len(iobs) == len(obs_names) if len(obs_names) > 0: # assemble arguments obs_args = dict(obs=obs_names, seasons=seasons, basins=basins, provs=provs, shapes=shapes, varlist=varlist, slices=slices, aggregation=aggregation, shapetype=shapetype, period=period, obs_period=obs_period, obs_ts=obs_ts, obs_clim=obs_clim, variable_list=variable_list, basin_list=basin_list, WSC_period=WSC_period, ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs) # check if we have to modify to preserve ensemble_list expansion if ensemble_list and ensemble_product == 'inner' and 'names' in ensemble_list and len(ensemble_list) > 1: for key in ensemble_list: if key != 'names': ens_list = obs_args[key] obs_args[key] = [ens_list[i] for i in iobs] # observations for basins require special treatment to merge basin averages with gage values # load observations by redirecting to appropriate loader function obsens = loadShapeObservations(name=name, title=title, obs_list=obs_list, **obs_args) else: obsens = [] if len(ens_names) > 0: # has to be a list # prepare arguments variables, filetypes = _resolveVarlist(varlist=varlist, filetypes=filetypes, params=shp_params, variable_list=variable_list, lforceList=lforceList) # configure slicing (extract basin/province/shape and period) slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, period=period) # assemble arguments ens_args = dict(names=ens_names, season=seasons, slices=slices, varlist=variables, shape=shapetype, aggregation=aggregation, period=period, obs_period=obs_period, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, filetypes=filetypes, ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs) # check if we have to remove obs datasets to preserve ensemble_list expansion if ensemble_list and ensemble_product == 'inner' and 'names' in ensemble_list and len(ensemble_list) > 1: for key in ensemble_list: if key != 'names': ens_list = ens_args[key] ens_args[key] = [ens_list[i] for i in iens] # load ensemble (no iteration here) shpens = loadEnsemble(name=name, title=title, obs_list=obs_list, **ens_args) else: shpens = Ensemble(name=name, title=title, basetype='Dataset') # get resolution tag (will be added below) res = None for member in shpens: if 'resstr' in member.atts: if res is None: res = member.atts['resstr'] elif res != member.atts['resstr']: res = None; break # no common resolution # return ensembles (will be wrapped in a list, if BatchLoad is used) if len(obsens) > 0 and len(shpens) > 0: for name,i in zip(obs_names,iobs): shpens.insertMember(i,obsens[name]) # add known observations in correct order del obsens[name] # remove the ones we already know from list, so we can deal with the rest j = i + 1 # add remaining obs datasets after last one for i,obs in enumerate(obsens): shpens.insertMember(j+i,obs) elif len(obsens) > 0 and len(shpens) == 0: shpens = obsens shpens.resolution = res # ad resolution tag now, to make sure it is there return shpens