コード例 #1
0
ファイル: common.py プロジェクト: EdwardBetts/GeoPy
 def __call__(self, load_list=None, lproduct='outer', inner_list=None, outer_list=None, 
              lensemble=None, ens_name=None, ens_title=None, **kwargs):
   ''' wrap original function: expand argument list, execute load_fct over argument list, 
       and return a list or Ensemble of datasets '''
   # decide, what to do
   if load_list is None and inner_list is None and outer_list is None:
     # normal operation: no expansion      
     datasets =  self.load_fct(**kwargs)
   else:
     # expansion required
     lensemble = ens_name is not None if lensemble is None else lensemble
     # figure out arguments
     kwargs_list = expandArgumentList(expand_list=load_list, lproduct=lproduct, 
                                      inner_list=inner_list, outer_list=outer_list, **kwargs)
     # load datasets
     datasets = []
     for kwargs in kwargs_list:    
       # load dataset
       datasets.append(self.load_fct(**kwargs))    
     # construct ensemble
     if lensemble:
       datasets = Ensemble(members=datasets, name=ens_name, title=ens_title, basetype='Dataset')
   # return list or ensemble of datasets
   return datasets
コード例 #2
0
ファイル: common.py プロジェクト: EdwardBetts/GeoPy
def loadEnsembleTS(names=None, name=None, title=None, varlist=None, aggregation=None, season=None, prov=None, 
                   slices=None, obsslices=None, years=None, reduction=None, shape=None, station=None, 
                   constraints=None, filetypes=None, domain=None, ldataset=False, lcheckVar=False, 
                   lwrite=False, ltrimT=True, name_tags=None, dataset_mode='time-series', lminmax=False,
                   master=None, lall=True, ensemble_list=None, ensemble_product='inner', lensembleAxis=False,
                   WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, **kwargs):
  ''' a convenience function to load an ensemble of time-series, based on certain criteria; works 
      with either stations or regions; seasonal/climatological aggregation is also supported '''
  # prepare ensemble
  if varlist is not None:
    varlist = list(varlist)[:] # copy list
    if station: 
      for var in stn_params: # necessary to select stations
        if var not in varlist: varlist.append(var)
    if shape: 
      for var in shp_params: # necessary to select shapes
        if var not in varlist: varlist.append(var)
  # perpare ensemble and arguments
  if ldataset and ensemble_list: raise ArgumentError 
  elif not ldataset: ensemble = Ensemble(name=name, title=title, basetype='Dataset')
  # expand argument list
  if ensemble_list is None: ensemble_list = ['names'] if not ldataset else None
  loadargs = expandArgumentList(names=names, station=station, prov=prov, shape=shape, varlist=varlist, 
                                mode=dataset_mode, filetypes=filetypes, domains=domain, lwrite=lwrite,
                                slices=slices, obsslices=obsslices, name_tags=name_tags, ltrimT=ltrimT,
                                years=years, expand_list=ensemble_list, lproduct=ensemble_product,
                                lensembleAxis=lensembleAxis)
  for loadarg in loadargs:
    # clean up argumetns
    name = loadarg.pop('names',None); name_tag = loadarg.pop('name_tags',None)
    slcs = loadarg.pop('slices',None); obsslcs = loadarg.pop('obsslices',None)    
    # load individual dataset
    dataset = loadDataset(name=name, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, **loadarg)
    if name_tag is not None: 
      if name_tag[0] == '_': dataset.name += name_tag
      else: dataset.name = name_tag
    # apply slicing
    if obsslcs and ( dataset.name[:3].lower() == 'obs' or dataset.name.isupper() ):
      if slcs is None: slcs = obsslcs
      else: slcs.update(**obsslcs) # add special slices for obs
      # N.B.: currently VarNC's can only be sliced once, because we can't combine slices yet
    if slcs: dataset = dataset(lminmax=lminmax, **slcs) # slice immediately 
    if not ldataset: ensemble += dataset.load() # load data and add to ensemble
  # if input was not a list, just return dataset
  if ldataset: ensemble = dataset.load() # load data
  # select specific stations (if applicable)
  if not ldataset and station and constraints:
    from datasets.EC import selectStations
    ensemble = selectStations(ensemble, stnaxis='station', master=master, linplace=False, lall=lall,
                              lcheckVar=lcheckVar, **constraints)
  # make sure all have cluster meta data  
  for varname in stn_params + shp_params:
    # find valid instance
    var = None
    for ds in ensemble: 
      if varname in ds: var = ds[varname]; break
    # give to those who have not
    if var is not None:
      var.load() # load data and add as regular variable (not VarNC)
      for ds in ensemble: 
        if varname not in ds: ds.addVariable(var.copy()) 
  # apply general reduction operations
  if reduction is not None:
    for ax,op in reduction.iteritems():
      if isinstance(op, basestring): ensemble = getattr(ensemble,op)(axis=ax)
      elif isinstance(op, (int,np.integer,float,np.inexact)): ensemble = ensemble(**{ax:op})
  # extract seasonal/climatological values/extrema
  if (ldataset and len(ensemble)==0): raise EmptyDatasetError, varlist
  if not ldataset and any([len(ds)==0 for ds in ensemble]): raise EmptyDatasetError, ensemble
  # N.B.: the operations below should work with Ensembles as well as Datasets 
  if aggregation:
    method = aggregation if aggregation.isupper() else aggregation.title() 
    if season is None:
      ensemble = getattr(ensemble,'clim'+method)(taxis='time', **kwargs)
    else:
      ensemble = getattr(ensemble,'seasonal'+method)(season=season, taxis='time', **kwargs)
  elif season: # but not aggregation
    ensemble = ensemble.seasonalSample(season=season)
  # return dataset
  return ensemble
コード例 #3
0
ファイル: common.py プロジェクト: EdwardBetts/GeoPy
def selectElements(datasets, axis, testFct=None, master=None, linplace=False, lall=False):
  ''' Extract common points that meet a specific criterion from a list of datasets. 
      The test function has to accept the following input: index, dataset, axis'''
  if linplace: raise NotImplementedError, "Option 'linplace' does not work currently."
  # check input
  if not isinstance(datasets, (list,tuple,Ensemble)): raise TypeError
  if not all(isinstance(dataset,Dataset) for dataset in datasets): raise TypeError 
  if not isCallable(testFct) and testFct is not None: raise TypeError
  if isinstance(axis, Axis): axis = axis.name
  if not isinstance(axis, basestring): raise TypeError
  if lall and master is not None: raise ArgumentError, "The options 'lall' and 'imaster' are mutually exclusive!"
  # save some ensemble parameters for later  
  lnotest = testFct is None
  lens = isinstance(datasets,Ensemble)
  if lens:
    enskwargs = dict(basetype=datasets.basetype, idkey=datasets.idkey, 
                     name=datasets.name, title=datasets.title) 
  # use dataset with shortest axis as master sample (more efficient)
  axes = [dataset.getAxis(axis) for dataset in datasets]
  if master is None: imaster = np.argmin([len(ax) for ax in axes]) # find shortest axis
  elif isinstance(master,basestring): 
    # translate name of dataset into index
    imaster = None
    for i,dataset in enumerate(datasets): 
      if dataset.name == master: 
        imaster = i; break
    if imaster is None: raise ArgumentError, "Master '{:s}' not found in datasets".format(master)
  else: imaster = master
  if not imaster is None and not isinstance(imaster,(int,np.integer)): raise TypeError, imaster
  elif imaster >= len(datasets) or imaster < 0: raise ValueError 
  maxis = axes.pop(imaster) # extraxt shortest axis for loop
  if lall: 
    tmpds = tuple(datasets)
    if imaster != 0: tmpds = (tmpds[imaster],)+tmpds[:imaster]+tmpds[imaster+1:]
    test_fct = lambda i,ds: testFct(i, ds, axis) # prepare test function arguments
  else: 
    test_fct = lambda i: testFct(i, datasets[imaster], axis) 
  # loop over coordinate axis
  itpls = [] # list of valid index tuple
  for i,x in enumerate(maxis.coord):
    # check other axes
    if all([x in ax.coord for ax in axes]): # only the other axes
      # no condition
      if lnotest:
        # just find and add indices
        itpls.append((i,)+tuple(ax.coord.searchsorted(x) for ax in axes))
      # check condition using shortest dataset
      elif lall: 
        # check test condition on all datasets (slower)
        tmpidx = (i,)+tuple(ax.coord.searchsorted(x) for ax in axes)
        if all(test_fct(ii,ds) for ii,ds in zip(tmpidx,tmpds)):
          # add corresponding indices in each dataset to list
          itpls.append(tmpidx)
      else:
        # check test condition on only one dataset (faster, default)
        if test_fct(i):
          # add corresponding indices in each dataset to list
          itpls.append((i,)+tuple(ax.coord.searchsorted(x) for ax in axes))
          # N.B.: since we can expect exact matches, plain searchsorted is fastest (side='left') 
  # check if there is anything left...
  if len(itpls) == 0: raise DatasetError, "Aborting: no data points match all criteria!"
  # construct axis indices for each dataset (need to remember to move shortest axis back in line)
  idxs = [[] for ds in datasets] # create unique empty lists
  for itpl in itpls:
    for i,idx in enumerate(itpl): idxs[i].append(idx)
  idxs.insert(imaster,idxs.pop(0)) # move first element back in line (where shortest axis was)
  idxs = [np.asarray(idxlst, dtype='int') for idxlst in idxs]      
  # slice datasets using only positive results  
  datasets = [ds(lidx=True, linplace=linplace, **{axis:idx}) for ds,idx in zip(datasets,idxs)]
  if lens: datasets = Ensemble(*datasets, **enskwargs)
  # return datasets
  return datasets
コード例 #4
0
ファイル: load.py プロジェクト: xiefengy/WRF-Projects
def loadShapeObservations(obs=None,
                          seasons=None,
                          basins=None,
                          provs=None,
                          shapes=None,
                          varlist=None,
                          slices=None,
                          aggregation='mean',
                          shapetype=None,
                          period=None,
                          variable_list=None,
                          **kwargs):
    ''' convenience function to load shape observations; the main function is to select sensible defaults 
      based on 'varlist', if no 'obs' are specified '''
    # prepare arguments
    if shapetype is None: shapetype = 'shpavg'  # really only one in use
    # resolve variable list (no need to maintain order)
    if isinstance(varlist, basestring): varlist = [varlist]
    variables = set(shp_params)
    for name in varlist:
        if name in variable_list: variables.update(variable_list[name].vars)
        else: variables.add(name)
    variables = list(variables)
    # figure out default datasets
    if obs is None: obs = 'Observations'
    lUnity = lCRU = lWSC = False
    if obs[:3].lower() in ('obs', 'wsc'):
        if any(var in CRU_vars for var in variables):
            if aggregation == 'mean' and seasons is None:
                lUnity = True
                obs = []
        if basins and any([var in WSC_vars for var in variables]):
            if aggregation.lower() in ('mean', 'std', 'sem', 'min',
                                       'max') and seasons is None:
                lWSC = True
                obs = []
    if not isinstance(obs, (list, tuple)): obs = (obs, )
    # configure slicing (extract basin/province/shape and period)
    slices = _configSlices(slices=slices,
                           basins=basins,
                           provs=provs,
                           shapes=shapes,
                           period=period)
    if slices is not None:
        noyears = slices.copy()
        noyears.pop('years', None)  # slices for climatologies
    # prepare and load ensemble of observations
    obsens = Ensemble(name='obs', title='Observations', basetype=Dataset)
    if len(obs) > 0:  # regular operations with user-defined dataset
        try:
            ensemble = loadEnsembleTS(names=obs,
                                      season=seasons,
                                      aggregation=aggregation,
                                      slices=slices,
                                      varlist=variables,
                                      shape=shapetype,
                                      ldataset=False,
                                      **kwargs)
            for ens in ensemble:
                obsens += ens
        except EmptyDatasetError:
            pass
    if lUnity:  # load Unity data instead of averaging CRU data
        if period is None: period = (1979, 1994)
        dataset = loadDataset(name='Unity',
                              varlist=variables,
                              mode='climatology',
                              period=period,
                              shape=shapetype)
        if slices is not None:
            dataset = dataset(**noyears)  # slice immediately
        obsens += dataset.load()
    if lCRU:  # this is basically regular operations with CRU as default
        obsens += loadEnsembleTS(names='CRU',
                                 season=seasons,
                                 aggregation=aggregation,
                                 slices=slices,
                                 varlist=variables,
                                 shape=shapetype,
                                 ldataset=True,
                                 **kwargs)
    if lWSC:  # another special case: river hydrographs
        #     from datasets.WSC import loadGageStation, GageStationError
        try:
            dataset = loadGageStation(basin=basins,
                                      varlist=['runoff'],
                                      aggregation=aggregation,
                                      mode='climatology',
                                      filetype='monthly')
            if slices is not None:
                dataset = dataset(**noyears)  # slice immediately
            obsens += dataset.load()
        except GageStationError:
            pass  # just ignore, if gage station data is missing
    # return ensembles (will be wrapped in a list, if BatchLoad is used)
    return obsens
コード例 #5
0
def rescaleDistributions(datasets,
                         reference=None,
                         target=None,
                         lscale=False,
                         suffixes=None,
                         lglobal=False):
    ''' Rescale datasets, so that the mean of each variable matches the corresponding variable in the
      reference dataset; if a target is specified, the target scale factors are applied to all
      datasets, if target is None, each dataset is rescaled individually. '''
    if not isinstance(datasets, (list, tuple, Ensemble)): raise TypeError
    if isinstance(datasets, Ensemble) and isinstance(reference, basestring):
        reference = datasets[reference]
    elif not isinstance(reference, Dataset):
        raise TypeError
    if target is None or target == 'auto':
        pass  # every dataset is scaled individually or based on suffixes
    elif isinstance(datasets, Ensemble) and isinstance(target, basestring):
        target = datasets[target]
    elif not isinstance(target, Dataset):
        raise TypeError, target
    if suffixes is None:
        suffixes = ('-2050', '2100')  # suffixes for scaling heuristic

    # determine scale factor
    def scaleFactor(reference, target, lscale=False, lglobal=False):
        ''' internal function to compute rescaling factors for common variables '''
        scalefactors = dict(
        )  # return dict with scalefactors for all applicable variables
        for varname, refvar in reference.variables.iteritems():
            if varname in target and isinstance(
                    refvar, VarRV):  # only varaibles that appear in both sets
                tgtvar = target.variables[varname]
                iloc = 1 if refvar.shape[-1] == 3 else 0
                # insert dummy ensemble axis, if necessary
                refvar = refvar.insertAxes(new_axes=tgtvar.axes,
                                           lcopy=True,
                                           asVar=True,
                                           linplace=False)
                if refvar.axes[-1].name.startswith('params'):
                    refdata = refvar.data_array.take(iloc, axis=-1)
                else:
                    raise AxisError, refvar.axes[-1]
                if refvar.ndim < tgtvar.ndim:
                    # N.B.: this is necessary, because WRF (target) can have an extra ensemble dimension that obs
                    #       typically don't have; then we just replicate the obs for each ensemble element
                    from warnings import warn
                    if lglobal:
                        warn(
                            "Scalefactors are being averaged over extra target dimensions (e.g. 'ensemble' axis)"
                        )
                    dimdiff = tgtvar.ndim - refvar.ndim
                    if refvar.shape != tgtvar.shape[dimdiff:]:
                        raise AxisError, "{:s} != {:s}".format(tgtvar, refvar)
                    refdata = refdata.reshape((1, ) * dimdiff +
                                              refvar.shape[:-1])
                elif refvar.shape != tgtvar.shape:
                    raise AxisError, "{:s} != {:s}".format(tgtvar, refvar)
                tgtdata = tgtvar.data_array.take(iloc, axis=-1)
                if lglobal: loc = np.mean(refdata) / np.mean(tgtdata)
                else: loc = refdata / tgtdata
                if lscale:
                    iscale = 2 if refvar.shape[-1] == 3 else 1
                    if lglobal:
                        scale = np.mean(refvar.data_array.take(
                            iscale, axis=-1)) / np.mean(
                                tgtvar.data_array.take(iscale, axis=-1))
                    else:
                        scale = refvar.data_array.take(
                            iscale, axis=-1) / tgtvar.data_array.take(iscale,
                                                                      axis=-1)
                    scalefactors[varname] = loc, (scale / loc)
                else:
                    scalefactors[varname] = loc
        return scalefactors  # return dict with scale factors for variables

    # compute general scalefactors
    if target == 'auto':
        scalefactor_collection = dict()
    elif target is not None:
        scalefactors = scaleFactor(reference,
                                   target,
                                   lscale=lscale,
                                   lglobal=lglobal)
    # loop over datasets
    rescaled_datasets = []
    for dataset in datasets:
        if dataset == reference:
            # determine variables that can be scaled (VarRV's)
            varlist = [
                varname for varname, var in dataset.variables.iteritems()
                if isinstance(var, VarRV)
            ]
            rescaled_dataset = dataset.copy(varlist=varlist)
            # add mock scale factors for consistency
            for var in rescaled_dataset.variables.itervalues():
                var.atts['loc_factor'] = 1
                var.atts['scale_factor'] = 1
                var.atts['shape_factor'] = 1
        else:
            # generate new dataset (without variables, and in-memory)
            if isinstance(dataset, DatasetNetCDF):
                rescaled_dataset = dataset.copy(varlist=[], asNC=False)
            else:
                rescaled_dataset = dataset.copy(varlist=[])
            # individual scaling
            if target is None or target == 'auto':
                parent = None
                if target == 'auto' and dataset.name.endswith(suffixes):
                    for suffix in suffixes:
                        if dataset.name.endswith(
                                suffix):  # check, which suffix, and remove it
                            parent = dataset.name[:-(len(suffix) + 1)]
                            break
                    if parent and '-' not in parent:
                        parent += '-1'  # convention for WRF names
                if parent and parent in scalefactor_collection:
                    scalefactors = scalefactor_collection[
                        parent]  # use scale factors from parent
                else:  # scale individually
                    scalefactors = scaleFactor(reference,
                                               dataset,
                                               lscale=lscale,
                                               lglobal=lglobal)
                    if target == 'auto':
                        scalefactor_collection[
                            dataset.name] = scalefactors  # for later use
            # loop over variables
            for varname, scalefactor in scalefactors.iteritems():
                if varname in dataset:
                    # rescale and add variable to new dataset
                    var = dataset.variables[varname]
                    if lscale:
                        rsvar = var.rescale(loc=scalefactor[0],
                                            scale=scalefactor[1])
                    else:
                        rsvar = var.rescale(loc=scalefactor)
                    rescaled_dataset.addVariable(rsvar)
        # add dataset to list
        rescaled_datasets.append(rescaled_dataset)
    # put everythign into Ensemble, if input was Ensemble
    if isinstance(datasets, Ensemble):
        rescaled_datasets = Ensemble(*rescaled_datasets,
                                     name=datasets.ens_name,
                                     title=datasets.ens_title)
    # return datasets/ensemble
    return rescaled_datasets