def __call__(self, load_list=None, lproduct='outer', inner_list=None, outer_list=None, lensemble=None, ens_name=None, ens_title=None, **kwargs): ''' wrap original function: expand argument list, execute load_fct over argument list, and return a list or Ensemble of datasets ''' # decide, what to do if load_list is None and inner_list is None and outer_list is None: # normal operation: no expansion datasets = self.load_fct(**kwargs) else: # expansion required lensemble = ens_name is not None if lensemble is None else lensemble # figure out arguments kwargs_list = expandArgumentList(expand_list=load_list, lproduct=lproduct, inner_list=inner_list, outer_list=outer_list, **kwargs) # load datasets datasets = [] for kwargs in kwargs_list: # load dataset datasets.append(self.load_fct(**kwargs)) # construct ensemble if lensemble: datasets = Ensemble(members=datasets, name=ens_name, title=ens_title, basetype='Dataset') # return list or ensemble of datasets return datasets
def loadEnsembleTS(names=None, name=None, title=None, varlist=None, aggregation=None, season=None, prov=None, slices=None, obsslices=None, years=None, reduction=None, shape=None, station=None, constraints=None, filetypes=None, domain=None, ldataset=False, lcheckVar=False, lwrite=False, ltrimT=True, name_tags=None, dataset_mode='time-series', lminmax=False, master=None, lall=True, ensemble_list=None, ensemble_product='inner', lensembleAxis=False, WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, **kwargs): ''' a convenience function to load an ensemble of time-series, based on certain criteria; works with either stations or regions; seasonal/climatological aggregation is also supported ''' # prepare ensemble if varlist is not None: varlist = list(varlist)[:] # copy list if station: for var in stn_params: # necessary to select stations if var not in varlist: varlist.append(var) if shape: for var in shp_params: # necessary to select shapes if var not in varlist: varlist.append(var) # perpare ensemble and arguments if ldataset and ensemble_list: raise ArgumentError elif not ldataset: ensemble = Ensemble(name=name, title=title, basetype='Dataset') # expand argument list if ensemble_list is None: ensemble_list = ['names'] if not ldataset else None loadargs = expandArgumentList(names=names, station=station, prov=prov, shape=shape, varlist=varlist, mode=dataset_mode, filetypes=filetypes, domains=domain, lwrite=lwrite, slices=slices, obsslices=obsslices, name_tags=name_tags, ltrimT=ltrimT, years=years, expand_list=ensemble_list, lproduct=ensemble_product, lensembleAxis=lensembleAxis) for loadarg in loadargs: # clean up argumetns name = loadarg.pop('names',None); name_tag = loadarg.pop('name_tags',None) slcs = loadarg.pop('slices',None); obsslcs = loadarg.pop('obsslices',None) # load individual dataset dataset = loadDataset(name=name, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, **loadarg) if name_tag is not None: if name_tag[0] == '_': dataset.name += name_tag else: dataset.name = name_tag # apply slicing if obsslcs and ( dataset.name[:3].lower() == 'obs' or dataset.name.isupper() ): if slcs is None: slcs = obsslcs else: slcs.update(**obsslcs) # add special slices for obs # N.B.: currently VarNC's can only be sliced once, because we can't combine slices yet if slcs: dataset = dataset(lminmax=lminmax, **slcs) # slice immediately if not ldataset: ensemble += dataset.load() # load data and add to ensemble # if input was not a list, just return dataset if ldataset: ensemble = dataset.load() # load data # select specific stations (if applicable) if not ldataset and station and constraints: from datasets.EC import selectStations ensemble = selectStations(ensemble, stnaxis='station', master=master, linplace=False, lall=lall, lcheckVar=lcheckVar, **constraints) # make sure all have cluster meta data for varname in stn_params + shp_params: # find valid instance var = None for ds in ensemble: if varname in ds: var = ds[varname]; break # give to those who have not if var is not None: var.load() # load data and add as regular variable (not VarNC) for ds in ensemble: if varname not in ds: ds.addVariable(var.copy()) # apply general reduction operations if reduction is not None: for ax,op in reduction.iteritems(): if isinstance(op, basestring): ensemble = getattr(ensemble,op)(axis=ax) elif isinstance(op, (int,np.integer,float,np.inexact)): ensemble = ensemble(**{ax:op}) # extract seasonal/climatological values/extrema if (ldataset and len(ensemble)==0): raise EmptyDatasetError, varlist if not ldataset and any([len(ds)==0 for ds in ensemble]): raise EmptyDatasetError, ensemble # N.B.: the operations below should work with Ensembles as well as Datasets if aggregation: method = aggregation if aggregation.isupper() else aggregation.title() if season is None: ensemble = getattr(ensemble,'clim'+method)(taxis='time', **kwargs) else: ensemble = getattr(ensemble,'seasonal'+method)(season=season, taxis='time', **kwargs) elif season: # but not aggregation ensemble = ensemble.seasonalSample(season=season) # return dataset return ensemble
def selectElements(datasets, axis, testFct=None, master=None, linplace=False, lall=False): ''' Extract common points that meet a specific criterion from a list of datasets. The test function has to accept the following input: index, dataset, axis''' if linplace: raise NotImplementedError, "Option 'linplace' does not work currently." # check input if not isinstance(datasets, (list,tuple,Ensemble)): raise TypeError if not all(isinstance(dataset,Dataset) for dataset in datasets): raise TypeError if not isCallable(testFct) and testFct is not None: raise TypeError if isinstance(axis, Axis): axis = axis.name if not isinstance(axis, basestring): raise TypeError if lall and master is not None: raise ArgumentError, "The options 'lall' and 'imaster' are mutually exclusive!" # save some ensemble parameters for later lnotest = testFct is None lens = isinstance(datasets,Ensemble) if lens: enskwargs = dict(basetype=datasets.basetype, idkey=datasets.idkey, name=datasets.name, title=datasets.title) # use dataset with shortest axis as master sample (more efficient) axes = [dataset.getAxis(axis) for dataset in datasets] if master is None: imaster = np.argmin([len(ax) for ax in axes]) # find shortest axis elif isinstance(master,basestring): # translate name of dataset into index imaster = None for i,dataset in enumerate(datasets): if dataset.name == master: imaster = i; break if imaster is None: raise ArgumentError, "Master '{:s}' not found in datasets".format(master) else: imaster = master if not imaster is None and not isinstance(imaster,(int,np.integer)): raise TypeError, imaster elif imaster >= len(datasets) or imaster < 0: raise ValueError maxis = axes.pop(imaster) # extraxt shortest axis for loop if lall: tmpds = tuple(datasets) if imaster != 0: tmpds = (tmpds[imaster],)+tmpds[:imaster]+tmpds[imaster+1:] test_fct = lambda i,ds: testFct(i, ds, axis) # prepare test function arguments else: test_fct = lambda i: testFct(i, datasets[imaster], axis) # loop over coordinate axis itpls = [] # list of valid index tuple for i,x in enumerate(maxis.coord): # check other axes if all([x in ax.coord for ax in axes]): # only the other axes # no condition if lnotest: # just find and add indices itpls.append((i,)+tuple(ax.coord.searchsorted(x) for ax in axes)) # check condition using shortest dataset elif lall: # check test condition on all datasets (slower) tmpidx = (i,)+tuple(ax.coord.searchsorted(x) for ax in axes) if all(test_fct(ii,ds) for ii,ds in zip(tmpidx,tmpds)): # add corresponding indices in each dataset to list itpls.append(tmpidx) else: # check test condition on only one dataset (faster, default) if test_fct(i): # add corresponding indices in each dataset to list itpls.append((i,)+tuple(ax.coord.searchsorted(x) for ax in axes)) # N.B.: since we can expect exact matches, plain searchsorted is fastest (side='left') # check if there is anything left... if len(itpls) == 0: raise DatasetError, "Aborting: no data points match all criteria!" # construct axis indices for each dataset (need to remember to move shortest axis back in line) idxs = [[] for ds in datasets] # create unique empty lists for itpl in itpls: for i,idx in enumerate(itpl): idxs[i].append(idx) idxs.insert(imaster,idxs.pop(0)) # move first element back in line (where shortest axis was) idxs = [np.asarray(idxlst, dtype='int') for idxlst in idxs] # slice datasets using only positive results datasets = [ds(lidx=True, linplace=linplace, **{axis:idx}) for ds,idx in zip(datasets,idxs)] if lens: datasets = Ensemble(*datasets, **enskwargs) # return datasets return datasets
def loadShapeObservations(obs=None, seasons=None, basins=None, provs=None, shapes=None, varlist=None, slices=None, aggregation='mean', shapetype=None, period=None, variable_list=None, **kwargs): ''' convenience function to load shape observations; the main function is to select sensible defaults based on 'varlist', if no 'obs' are specified ''' # prepare arguments if shapetype is None: shapetype = 'shpavg' # really only one in use # resolve variable list (no need to maintain order) if isinstance(varlist, basestring): varlist = [varlist] variables = set(shp_params) for name in varlist: if name in variable_list: variables.update(variable_list[name].vars) else: variables.add(name) variables = list(variables) # figure out default datasets if obs is None: obs = 'Observations' lUnity = lCRU = lWSC = False if obs[:3].lower() in ('obs', 'wsc'): if any(var in CRU_vars for var in variables): if aggregation == 'mean' and seasons is None: lUnity = True obs = [] if basins and any([var in WSC_vars for var in variables]): if aggregation.lower() in ('mean', 'std', 'sem', 'min', 'max') and seasons is None: lWSC = True obs = [] if not isinstance(obs, (list, tuple)): obs = (obs, ) # configure slicing (extract basin/province/shape and period) slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, period=period) if slices is not None: noyears = slices.copy() noyears.pop('years', None) # slices for climatologies # prepare and load ensemble of observations obsens = Ensemble(name='obs', title='Observations', basetype=Dataset) if len(obs) > 0: # regular operations with user-defined dataset try: ensemble = loadEnsembleTS(names=obs, season=seasons, aggregation=aggregation, slices=slices, varlist=variables, shape=shapetype, ldataset=False, **kwargs) for ens in ensemble: obsens += ens except EmptyDatasetError: pass if lUnity: # load Unity data instead of averaging CRU data if period is None: period = (1979, 1994) dataset = loadDataset(name='Unity', varlist=variables, mode='climatology', period=period, shape=shapetype) if slices is not None: dataset = dataset(**noyears) # slice immediately obsens += dataset.load() if lCRU: # this is basically regular operations with CRU as default obsens += loadEnsembleTS(names='CRU', season=seasons, aggregation=aggregation, slices=slices, varlist=variables, shape=shapetype, ldataset=True, **kwargs) if lWSC: # another special case: river hydrographs # from datasets.WSC import loadGageStation, GageStationError try: dataset = loadGageStation(basin=basins, varlist=['runoff'], aggregation=aggregation, mode='climatology', filetype='monthly') if slices is not None: dataset = dataset(**noyears) # slice immediately obsens += dataset.load() except GageStationError: pass # just ignore, if gage station data is missing # return ensembles (will be wrapped in a list, if BatchLoad is used) return obsens
def rescaleDistributions(datasets, reference=None, target=None, lscale=False, suffixes=None, lglobal=False): ''' Rescale datasets, so that the mean of each variable matches the corresponding variable in the reference dataset; if a target is specified, the target scale factors are applied to all datasets, if target is None, each dataset is rescaled individually. ''' if not isinstance(datasets, (list, tuple, Ensemble)): raise TypeError if isinstance(datasets, Ensemble) and isinstance(reference, basestring): reference = datasets[reference] elif not isinstance(reference, Dataset): raise TypeError if target is None or target == 'auto': pass # every dataset is scaled individually or based on suffixes elif isinstance(datasets, Ensemble) and isinstance(target, basestring): target = datasets[target] elif not isinstance(target, Dataset): raise TypeError, target if suffixes is None: suffixes = ('-2050', '2100') # suffixes for scaling heuristic # determine scale factor def scaleFactor(reference, target, lscale=False, lglobal=False): ''' internal function to compute rescaling factors for common variables ''' scalefactors = dict( ) # return dict with scalefactors for all applicable variables for varname, refvar in reference.variables.iteritems(): if varname in target and isinstance( refvar, VarRV): # only varaibles that appear in both sets tgtvar = target.variables[varname] iloc = 1 if refvar.shape[-1] == 3 else 0 # insert dummy ensemble axis, if necessary refvar = refvar.insertAxes(new_axes=tgtvar.axes, lcopy=True, asVar=True, linplace=False) if refvar.axes[-1].name.startswith('params'): refdata = refvar.data_array.take(iloc, axis=-1) else: raise AxisError, refvar.axes[-1] if refvar.ndim < tgtvar.ndim: # N.B.: this is necessary, because WRF (target) can have an extra ensemble dimension that obs # typically don't have; then we just replicate the obs for each ensemble element from warnings import warn if lglobal: warn( "Scalefactors are being averaged over extra target dimensions (e.g. 'ensemble' axis)" ) dimdiff = tgtvar.ndim - refvar.ndim if refvar.shape != tgtvar.shape[dimdiff:]: raise AxisError, "{:s} != {:s}".format(tgtvar, refvar) refdata = refdata.reshape((1, ) * dimdiff + refvar.shape[:-1]) elif refvar.shape != tgtvar.shape: raise AxisError, "{:s} != {:s}".format(tgtvar, refvar) tgtdata = tgtvar.data_array.take(iloc, axis=-1) if lglobal: loc = np.mean(refdata) / np.mean(tgtdata) else: loc = refdata / tgtdata if lscale: iscale = 2 if refvar.shape[-1] == 3 else 1 if lglobal: scale = np.mean(refvar.data_array.take( iscale, axis=-1)) / np.mean( tgtvar.data_array.take(iscale, axis=-1)) else: scale = refvar.data_array.take( iscale, axis=-1) / tgtvar.data_array.take(iscale, axis=-1) scalefactors[varname] = loc, (scale / loc) else: scalefactors[varname] = loc return scalefactors # return dict with scale factors for variables # compute general scalefactors if target == 'auto': scalefactor_collection = dict() elif target is not None: scalefactors = scaleFactor(reference, target, lscale=lscale, lglobal=lglobal) # loop over datasets rescaled_datasets = [] for dataset in datasets: if dataset == reference: # determine variables that can be scaled (VarRV's) varlist = [ varname for varname, var in dataset.variables.iteritems() if isinstance(var, VarRV) ] rescaled_dataset = dataset.copy(varlist=varlist) # add mock scale factors for consistency for var in rescaled_dataset.variables.itervalues(): var.atts['loc_factor'] = 1 var.atts['scale_factor'] = 1 var.atts['shape_factor'] = 1 else: # generate new dataset (without variables, and in-memory) if isinstance(dataset, DatasetNetCDF): rescaled_dataset = dataset.copy(varlist=[], asNC=False) else: rescaled_dataset = dataset.copy(varlist=[]) # individual scaling if target is None or target == 'auto': parent = None if target == 'auto' and dataset.name.endswith(suffixes): for suffix in suffixes: if dataset.name.endswith( suffix): # check, which suffix, and remove it parent = dataset.name[:-(len(suffix) + 1)] break if parent and '-' not in parent: parent += '-1' # convention for WRF names if parent and parent in scalefactor_collection: scalefactors = scalefactor_collection[ parent] # use scale factors from parent else: # scale individually scalefactors = scaleFactor(reference, dataset, lscale=lscale, lglobal=lglobal) if target == 'auto': scalefactor_collection[ dataset.name] = scalefactors # for later use # loop over variables for varname, scalefactor in scalefactors.iteritems(): if varname in dataset: # rescale and add variable to new dataset var = dataset.variables[varname] if lscale: rsvar = var.rescale(loc=scalefactor[0], scale=scalefactor[1]) else: rsvar = var.rescale(loc=scalefactor) rescaled_dataset.addVariable(rsvar) # add dataset to list rescaled_datasets.append(rescaled_dataset) # put everythign into Ensemble, if input was Ensemble if isinstance(datasets, Ensemble): rescaled_datasets = Ensemble(*rescaled_datasets, name=datasets.ens_name, title=datasets.ens_title) # return datasets/ensemble return rescaled_datasets