Ejemplo n.º 1
0
 def __call__(self, load_list=None, lproduct='outer', inner_list=None, outer_list=None, 
              lensemble=None, ens_name=None, ens_title=None, **kwargs):
   ''' wrap original function: expand argument list, execute load_fct over argument list, 
       and return a list or Ensemble of datasets '''
   # decide, what to do
   if load_list is None and inner_list is None and outer_list is None:
     # normal operation: no expansion      
     datasets =  self.load_fct(**kwargs)
   else:
     # expansion required
     lensemble = ens_name is not None if lensemble is None else lensemble
     # figure out arguments
     kwargs_list = expandArgumentList(expand_list=load_list, lproduct=lproduct, 
                                      inner_list=inner_list, outer_list=outer_list, **kwargs)
     # load datasets
     datasets = []
     for kwargs in kwargs_list:    
       # load dataset
       datasets.append(self.load_fct(**kwargs))    
     # construct ensemble
     if lensemble:
       datasets = Ensemble(members=datasets, name=ens_name, title=ens_title, basetype='Dataset')
   # return list or ensemble of datasets
   return datasets
Ejemplo n.º 2
0
def loadEnsembleTS(names=None, name=None, title=None, varlist=None, aggregation=None, season=None, prov=None, 
                   slices=None, obsslices=None, years=None, reduction=None, shape=None, station=None, 
                   constraints=None, filetypes=None, domain=None, ldataset=False, lcheckVar=False, 
                   lwrite=False, ltrimT=True, name_tags=None, dataset_mode='time-series', lminmax=False,
                   master=None, lall=True, ensemble_list=None, ensemble_product='inner', lensembleAxis=False,
                   WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, **kwargs):
  ''' a convenience function to load an ensemble of time-series, based on certain criteria; works 
      with either stations or regions; seasonal/climatological aggregation is also supported '''
  # prepare ensemble
  if varlist is not None:
    varlist = list(varlist)[:] # copy list
    if station: 
      for var in stn_params: # necessary to select stations
        if var not in varlist: varlist.append(var)
    if shape: 
      for var in shp_params: # necessary to select shapes
        if var not in varlist: varlist.append(var)
  # perpare ensemble and arguments
  if ldataset and ensemble_list: raise ArgumentError()
  elif not ldataset: ensemble = Ensemble(name=name, title=title, basetype='Dataset')
  # expand argument list
  if ensemble_list is None: ensemble_list = ['names'] if not ldataset else None
  loadargs = expandArgumentList(names=names, station=station, prov=prov, shape=shape, varlist=varlist, 
                                mode=dataset_mode, filetypes=filetypes, domains=domain, lwrite=lwrite,
                                slices=slices, obsslices=obsslices, name_tags=name_tags, ltrimT=ltrimT,
                                years=years, expand_list=ensemble_list, lproduct=ensemble_product,
                                lensembleAxis=lensembleAxis)
  for loadarg in loadargs:
    # clean up argumetns
    name = loadarg.pop('names',None); name_tag = loadarg.pop('name_tags',None)
    slcs = loadarg.pop('slices',None); obsslcs = loadarg.pop('obsslices',None)    
    # load individual dataset
    dataset = loadDataset(name=name, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, **loadarg)
    if name_tag is not None: 
      if name_tag[0] == '_': dataset.name += name_tag
      else: dataset.name = name_tag
    # apply slicing
    if obsslcs and ( dataset.name[:3].lower() == 'obs' or dataset.name.isupper() ):
      slcs = dict() if slcs is None else slcs.copy()
      slcs.update(**obsslcs) # add special slices for obs
      # N.B.: currently VarNC's can only be sliced once, because we can't combine slices yet
    if slcs: dataset = dataset(lminmax=lminmax, **slcs) # slice immediately 
    if not ldataset: ensemble += dataset.load() # load data and add to ensemble
  # if input was not a list, just return dataset
  if ldataset: ensemble = dataset.load() # load data
  # select specific stations (if applicable)
  if not ldataset and station and constraints:
    from datasets.EC import selectStations
    ensemble = selectStations(ensemble, stnaxis='station', master=master, linplace=False, lall=lall,
                              lcheckVar=lcheckVar, **constraints)
  # make sure all have cluster meta data  
  for varname in stn_params + shp_params:
    # find valid instance
    var = None
    for ds in ensemble: 
      if varname in ds: var = ds[varname]; break
    # give to those who have not
    if var is not None:
      var.load() # load data and add as regular variable (not VarNC)
      for ds in ensemble: 
        if varname not in ds: ds.addVariable(var.copy()) 
  # apply general reduction operations
  if reduction is not None:
    for ax,op in reduction.iteritems():
      if isinstance(op, basestring): ensemble = getattr(ensemble,op)(axis=ax)
      elif isinstance(op, (int,np.integer,float,np.inexact)): ensemble = ensemble(**{ax:op})
  # extract seasonal/climatological values/extrema
  if (ldataset and len(ensemble)==0): raise EmptyDatasetError(varlist)
  if not ldataset and any([len(ds)==0 for ds in ensemble]): raise EmptyDatasetError(ensemble)
  # N.B.: the operations below should work with Ensembles as well as Datasets 
  if aggregation:
    method = aggregation if aggregation.isupper() else aggregation.title() 
    if season is None:
      ensemble = getattr(ensemble,'clim'+method)(taxis='time', **kwargs)
    else:
      ensemble = getattr(ensemble,'seasonal'+method)(season=season, taxis='time', **kwargs)
  elif season: # but not aggregation
    ensemble = ensemble.seasonalSample(season=season)
  # return dataset
  return ensemble
Ejemplo n.º 3
0
def loadEnsembleTS(names=None, name=None, title=None, varlist=None, aggregation=None, season=None, prov=None, 
                   slices=None, obsslices=None, years=None, reduction=None, shape=None, station=None, 
                   constraints=None, filetypes=None, domain=None, ldataset=False, lcheckVar=False, 
                   lwrite=False, ltrimT=True, name_tags=None, dataset_mode='time-series', lminmax=False,
                   master=None, lall=True, ensemble_list=None, ensemble_product='inner', lensembleAxis=False,
                   WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, **kwargs):
  ''' a convenience function to load an ensemble of time-series, based on certain criteria; works 
      with either stations or regions; seasonal/climatological aggregation is also supported '''
  # prepare ensemble
  if varlist is not None:
    varlist = list(varlist)[:] # copy list
    if station: 
      for var in stn_params: # necessary to select stations
        if var not in varlist: varlist.append(var)
    if shape: 
      for var in shp_params: # necessary to select shapes
        if var not in varlist: varlist.append(var)
  # perpare ensemble and arguments
  if ldataset and ensemble_list: raise ArgumentError 
  elif not ldataset: ensemble = Ensemble(name=name, title=title, basetype='Dataset')
  # expand argument list
  if ensemble_list is None: ensemble_list = ['names'] if not ldataset else None
  loadargs = expandArgumentList(names=names, station=station, prov=prov, shape=shape, varlist=varlist, 
                                mode=dataset_mode, filetypes=filetypes, domains=domain, lwrite=lwrite,
                                slices=slices, obsslices=obsslices, name_tags=name_tags, ltrimT=ltrimT,
                                years=years, expand_list=ensemble_list, lproduct=ensemble_product,
                                lensembleAxis=lensembleAxis)
  for loadarg in loadargs:
    # clean up argumetns
    name = loadarg.pop('names',None); name_tag = loadarg.pop('name_tags',None)
    slcs = loadarg.pop('slices',None); obsslcs = loadarg.pop('obsslices',None)    
    # load individual dataset
    dataset = loadDataset(name=name, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, **loadarg)
    if name_tag is not None: 
      if name_tag[0] == '_': dataset.name += name_tag
      else: dataset.name = name_tag
    # apply slicing
    if obsslcs and ( dataset.name[:3].lower() == 'obs' or dataset.name.isupper() ):
      if slcs is None: slcs = obsslcs
      else: slcs.update(**obsslcs) # add special slices for obs
      # N.B.: currently VarNC's can only be sliced once, because we can't combine slices yet
    if slcs: dataset = dataset(lminmax=lminmax, **slcs) # slice immediately 
    if not ldataset: ensemble += dataset.load() # load data and add to ensemble
  # if input was not a list, just return dataset
  if ldataset: ensemble = dataset.load() # load data
  # select specific stations (if applicable)
  if not ldataset and station and constraints:
    from datasets.EC import selectStations
    ensemble = selectStations(ensemble, stnaxis='station', master=master, linplace=False, lall=lall,
                              lcheckVar=lcheckVar, **constraints)
  # make sure all have cluster meta data  
  for varname in stn_params + shp_params:
    # find valid instance
    var = None
    for ds in ensemble: 
      if varname in ds: var = ds[varname]; break
    # give to those who have not
    if var is not None:
      var.load() # load data and add as regular variable (not VarNC)
      for ds in ensemble: 
        if varname not in ds: ds.addVariable(var.copy()) 
  # apply general reduction operations
  if reduction is not None:
    for ax,op in reduction.iteritems():
      if isinstance(op, basestring): ensemble = getattr(ensemble,op)(axis=ax)
      elif isinstance(op, (int,np.integer,float,np.inexact)): ensemble = ensemble(**{ax:op})
  # extract seasonal/climatological values/extrema
  if (ldataset and len(ensemble)==0): raise EmptyDatasetError, varlist
  if not ldataset and any([len(ds)==0 for ds in ensemble]): raise EmptyDatasetError, ensemble
  # N.B.: the operations below should work with Ensembles as well as Datasets 
  if aggregation:
    method = aggregation if aggregation.isupper() else aggregation.title() 
    if season is None:
      ensemble = getattr(ensemble,'clim'+method)(taxis='time', **kwargs)
    else:
      ensemble = getattr(ensemble,'seasonal'+method)(season=season, taxis='time', **kwargs)
  elif season: # but not aggregation
    ensemble = ensemble.seasonalSample(season=season)
  # return dataset
  return ensemble
Ejemplo n.º 4
0
def selectElements(datasets, axis, testFct=None, master=None, linplace=False, lall=False):
  ''' Extract common points that meet a specific criterion from a list of datasets. 
      The test function has to accept the following input: index, dataset, axis'''
  if linplace: raise NotImplementedError, "Option 'linplace' does not work currently."
  # check input
  if not isinstance(datasets, (list,tuple,Ensemble)): raise TypeError
  if not all(isinstance(dataset,Dataset) for dataset in datasets): raise TypeError 
  if not isCallable(testFct) and testFct is not None: raise TypeError
  if isinstance(axis, Axis): axis = axis.name
  if not isinstance(axis, basestring): raise TypeError
  if lall and master is not None: raise ArgumentError, "The options 'lall' and 'imaster' are mutually exclusive!"
  # save some ensemble parameters for later  
  lnotest = testFct is None
  lens = isinstance(datasets,Ensemble)
  if lens:
    enskwargs = dict(basetype=datasets.basetype, idkey=datasets.idkey, 
                     name=datasets.name, title=datasets.title) 
  # use dataset with shortest axis as master sample (more efficient)
  axes = [dataset.getAxis(axis) for dataset in datasets]
  if master is None: imaster = np.argmin([len(ax) for ax in axes]) # find shortest axis
  elif isinstance(master,basestring): 
    # translate name of dataset into index
    imaster = None
    for i,dataset in enumerate(datasets): 
      if dataset.name == master: 
        imaster = i; break
    if imaster is None: raise ArgumentError, "Master '{:s}' not found in datasets".format(master)
  else: imaster = master
  if not imaster is None and not isinstance(imaster,(int,np.integer)): raise TypeError, imaster
  elif imaster >= len(datasets) or imaster < 0: raise ValueError 
  maxis = axes.pop(imaster) # extraxt shortest axis for loop
  if lall: 
    tmpds = tuple(datasets)
    if imaster != 0: tmpds = (tmpds[imaster],)+tmpds[:imaster]+tmpds[imaster+1:]
    test_fct = lambda i,ds: testFct(i, ds, axis) # prepare test function arguments
  else: 
    test_fct = lambda i: testFct(i, datasets[imaster], axis) 
  # loop over coordinate axis
  itpls = [] # list of valid index tuple
  for i,x in enumerate(maxis.coord):
    # check other axes
    if all([x in ax.coord for ax in axes]): # only the other axes
      # no condition
      if lnotest:
        # just find and add indices
        itpls.append((i,)+tuple(ax.coord.searchsorted(x) for ax in axes))
      # check condition using shortest dataset
      elif lall: 
        # check test condition on all datasets (slower)
        tmpidx = (i,)+tuple(ax.coord.searchsorted(x) for ax in axes)
        if all(test_fct(ii,ds) for ii,ds in zip(tmpidx,tmpds)):
          # add corresponding indices in each dataset to list
          itpls.append(tmpidx)
      else:
        # check test condition on only one dataset (faster, default)
        if test_fct(i):
          # add corresponding indices in each dataset to list
          itpls.append((i,)+tuple(ax.coord.searchsorted(x) for ax in axes))
          # N.B.: since we can expect exact matches, plain searchsorted is fastest (side='left') 
  # check if there is anything left...
  if len(itpls) == 0: raise DatasetError, "Aborting: no data points match all criteria!"
  # construct axis indices for each dataset (need to remember to move shortest axis back in line)
  idxs = [[] for ds in datasets] # create unique empty lists
  for itpl in itpls:
    for i,idx in enumerate(itpl): idxs[i].append(idx)
  idxs.insert(imaster,idxs.pop(0)) # move first element back in line (where shortest axis was)
  idxs = [np.asarray(idxlst, dtype='int') for idxlst in idxs]      
  # slice datasets using only positive results  
  datasets = [ds(lidx=True, linplace=linplace, **{axis:idx}) for ds,idx in zip(datasets,idxs)]
  if lens: datasets = Ensemble(*datasets, **enskwargs)
  # return datasets
  return datasets
Ejemplo n.º 5
0
def generateStatistics(varname, ens, fit, scl=None, reference=None, mode='Ratio', plot_labels=None, 
                       nsamples=None, bootstrap_axis='bootstrap', lflatten=False, sample_axis='time', 
                       lcrossval=True):
  ''' Perform K-S test and compute ratio of means; return results in formatted string. '''
  # some average diagnosics
  idkey = 'dataset_name' if ens.basetype is Dataset else 'name'  
  varlist = Ensemble(*[ds[varname] for ds in ens if ds is not None and varname in ds], idkey=idkey)
  if not all(varlist[0].ndim==ndim for ndim in varlist.ndim):
    new_axes = varlist[np.argmax(varlist.ndim)].axes
    varlist = varlist.insertAxes(new_axes=new_axes, lcheckAxis=False)    
  mvars = varlist.mean() # growth rate
  lratio = mode.lower() == 'ratio'
  lshift = mode.lower() == 'shift'
  if plot_labels is None: plot_labels = dict()
  # figure out fillValue
  if np.issubdtype(varlist[0].dtype, np.floating): fillValue = np.NaN
  elif np.issubdtype(varlist[0].dtype, np.integer): fillValue = 0
  else: raise TypeError(varlist[0].dtype)
  # define reference
  if isinstance(reference,(list,tuple)): 
    reflist0 = list(reference); reference = reference[0]
  else: reflist0 = [] # dummy list
  if reference is None: iref0 = 0
  elif isinstance(reference,(int,np.integer)): iref0 = reference 
  elif isinstance(reference,str): iref0 = varlist.idkeys.index(reference)
  else: raise ArgumentError  
  # goodness of fit, reported on plot panels
  if fit:
    fitlist = Ensemble(*[ds[varname] for ds in fit if ds is not None and varname in ds], idkey=idkey)
    if any(fitlist.hasAxis(bootstrap_axis)): fitlist = fitlist(**{bootstrap_axis:0, 'lcheckAxis':False})
    if not all(fitlist[0].ndim==ndim for ndim in fitlist.ndim):
      new_axes = fitlist[np.argmax(fitlist.ndim)].axes
      fitlist = fitlist.insertAxes(new_axes=new_axes, lcheckAxis=False) 
#       for var in fitlist: 
#         print [ax.name for ax in var.axes], var.shape
#       assert  np.all(fitlist[0][1,:] == fitlist[0][2,:])
    assert not isinstance(reference,str) or iref0 == fitlist.idkeys.index(reference), reference
    if any([isinstance(dist,VarRV) for dist in fitlist]) or not scl:
      names = [plot_labels.get(getattr(dist,idkey),getattr(dist,idkey)) for dist in fitlist]  
      lnames = max([len(name) for name in names]) # allocate line space
      headline = 'Sample'; lhead = len(headline) # sample/exp header
      headline += ' '*max(lnames-lhead,0) # 'Exp.'+' '*max(lnames-4,0) if lnames < 8 else 'Experiment'
      string = '{:s}  Fit  {:s}\n'.format(headline,mode.title())
      namestr = '{{:>{:d}s}}  {{:s}}  '.format(max(lhead,lnames))
      iref = iref0; reflist = reflist0[:] # copy list
      for i,dist,var,name,mvar in zip(range(len(fitlist)),fitlist,varlist,names,mvars):
        if isinstance(dist,VarRV) or not scl:
          if isinstance(dist,VarRV):
            pval = dist.fittest(var, nsamples=nsamples, asVar=False, lcrossval=lcrossval) #lflatten=lflatten, axis_idx=var.axisIndex(sample_axis, lcheck=False))
#             print var.name, pval, pval.mean().__class__.__name__, '{:s}'.format(pval.mean())
#             pval = '{:3.2f}'.format(float(pval.mean())) # mean is only necessary to convert to scalar
            pval = '{:3.2f}'.format(float(np.median(pval))) # mean is only necessary to convert to scalar
            # for some reason masked array scalars appear string-type, rather than numbers... 
          else: pval = '  - '
          if len(reflist) > 0 and name == reflist[0]: # assign new reference 
            iref = i; del reflist[0] # pop element 
          if isinstance(mvar,np.ma.core.MaskedConstant) or isinstance(mvars[iref],np.ma.core.MaskedConstant): 
            string += namestr.format(name,' N/A\n')
          elif lratio: string += (namestr+'{:3.2f}\n').format(name,pval,(mvar/mvars[iref]).mean())
          elif lshift: string += (namestr+'{:+2.1f}\n').format(name,pval,(mvar-mvars[iref]).mean())
    else: string = ''
  else: raise NotImplementedError
  if scl:
    scllist = Ensemble(*[ds[varname] for ds in scl if ds is not None and varname in ds], idkey=idkey)
    bs_axes = scllist.axisIndex(bootstrap_axis, lcheck=False) # return None, if not present
    if bs_axes is None: bs_axes = [None]*len(scllist)
    scllist = scllist(**{bootstrap_axis:0, 'lcheckAxis':False})
    if not all(scllist[0].ndim==ndim for ndim in scllist.ndim):
      new_axes = scllist[np.argmax(scllist.ndim)].axes
      scllist = scllist.insertAxes(new_axes=new_axes, lcheckAxis=False) 
    assert not isinstance(reference,str) or iref0 == scllist.idkeys.index(reference), reference
    if len(scllist) != len(varlist): raise AxisError(scllist)
    # compute means
    mvars = []
    for svr,var in zip(scllist,varlist):
      if isinstance(svr,VarRV): mvar = svr.stats(moments='mv', asVar=False)[...,0] # only first moment
      else: mvar = var.mean()*svr.atts.get('loc_factor',1.)
      mvars.append(mvar)        
    # figure out label width and prepare header
    if len(varlist) > 1: # otherwise no comparison...
      names = [plot_labels.get(getattr(dist,idkey),getattr(dist,idkey)) for dist in scllist]  
      lnames = max([len(name) for name in names]) # allocate line space
      namestr = '{{:>{:d}s}}  {{:s}}  '.format(max(lhead,lnames))
      tmphead = 'Fit to {:s}:' if scl == fit else 'Rescaled to {:s}:' # new heading
      tmphead += ' '*(max(lnames-len(names[iref0]),0)+5)+'\n'
      string += tmphead.format(names[iref0])
      # prepare first reference sample for K-S test
      scale,shape = scllist[iref0].atts.get('scale_factor', 1),scllist[iref0].atts.get('shape_factor', 1)
      if not (scale is None or scale == 1) and not (shape is None or shape == 1): 
        raise NotImplementedError("Cannot rescale scale/variance and shape parameters of reference sample!")
      refsmpl = varlist[iref0].getArray(unmask=True, fillValue=fillValue) # only once
      loc0 = scllist[iref0].atts.get('loc_factor', 1)     
      refsmpl = _rescaleSample(refsmpl, loc0, bs_axis=bs_axes[iref0]) # apply rescaling (varies, dependign on loc-type)
  #     print varlist[iref0].dataset_name, [ax.name for ax in varlist[iref0].axes], refsmpl.shape, 
      # start loop
      iref = iref0; reflist = reflist0[:] # copy list
      for i,dist,varsmpl,mvar,bs_axis in zip(range(len(varlist)),scllist,varlist,mvars,bs_axes):
        name = getattr(dist,idkey)
        if len(reflist) > 0 and name == reflist[0]: # assign new reference 
          iref = i; del reflist[0] # pop element       
          # prepare subsequent reference sample for K-S test
          scale,shape = dist.atts.get('scale_factor', 1),dist.atts.get('shape_factor', 1)
          if not (scale is None or scale == 1) and not (shape is None or shape == 1): 
            raise NotImplementedError("Cannot rescale scale/variance and shape parameters of reference sample!")
          refsmpl = varsmpl.getArray(unmask=True, fillValue=fillValue) # only once
          if not varsmpl.atts.get('rescaled',False):
            refsmpl = _rescaleSample(refsmpl, dist.atts.get('loc_factor', 1), bs_axis=bs_axis) # apply rescaling (varies, dependign on loc-type)
        elif i != iref:
          scale,shape = dist.atts.get('scale_factor', 1),dist.atts.get('shape_factor', 1) 
          # perform K-S test
          if (scale is None or scale == 1) and (shape is None or shape == 1):
            # K-S test between actual samples is more realistic, and rescaling of mean is simple
            smpl = varsmpl.getArray(unmask=True, fillValue=fillValue) # only once
            if not varsmpl.atts.get('rescaled',False):
              smpl = _rescaleSample(smpl, dist.atts.get('loc_factor', 1), bs_axis=bs_axis) # apply rescaling (varies, dependign on loc-type)
  #           print varsmpl.dataset_name, [ax.name for ax in varsmpl.axes], smpl.shape
  #           print smpl.shape, np.nanmean(smpl), refsmpl.shape, np.nanmean(refsmpl)
  #           print lflatten, sample_axis
            pval = ks_2samp(refsmpl, smpl, asVar=False, lflatten=lflatten, 
                            axis_idx=varsmpl.axisIndex(sample_axis, lcheck=False))
  #           print dist.name, pval
  #           pval = '{:3.2f}'.format(float(pval.mean()))
            pval = '{:3.2f}'.format(float(np.median(pval)))
          else:
            # no straight-forward way to rescale samples, so have to compare distribution with 
            # reference sample, which means more noise (since the distribution will be randomly sampled)
            if isinstance(dist,VarRV): pval = '{:3.2f}'.format(float(dist.kstest(refsmpl).mean()))
            else: pval = '  - '
          # add column with ratio/difference of means after rescaling
          if name in plot_labels: name = plot_labels[name]  
          if isinstance(mvar,np.ma.core.MaskedConstant) or isinstance(mvars[iref],np.ma.core.MaskedConstant):
            string += namestr.format(name,' N/A\n') 
          elif lratio: string += (namestr+'{:3.2f}\n').format(name,pval,(mvar/mvars[iref]).mean())
          elif lshift: string += (namestr+'{:+2.1f}\n').format(name,pval,(mvar-mvars[iref]).mean())
  # return formatted table in string
  return string        
Ejemplo n.º 6
0
def loadShapeObservations(obs=None,
                          seasons=None,
                          basins=None,
                          provs=None,
                          shapes=None,
                          varlist=None,
                          slices=None,
                          aggregation='mean',
                          shapetype=None,
                          period=None,
                          variable_list=None,
                          **kwargs):
    ''' convenience function to load shape observations; the main function is to select sensible defaults 
      based on 'varlist', if no 'obs' are specified '''
    # prepare arguments
    if shapetype is None: shapetype = 'shpavg'  # really only one in use
    # resolve variable list (no need to maintain order)
    if isinstance(varlist, basestring): varlist = [varlist]
    variables = set(shp_params)
    for name in varlist:
        if name in variable_list: variables.update(variable_list[name].vars)
        else: variables.add(name)
    variables = list(variables)
    # figure out default datasets
    if obs is None: obs = 'Observations'
    lUnity = lCRU = lWSC = False
    if obs[:3].lower() in ('obs', 'wsc'):
        if any(var in CRU_vars for var in variables):
            if aggregation == 'mean' and seasons is None:
                lUnity = True
                obs = []
        if basins and any([var in WSC_vars for var in variables]):
            if aggregation.lower() in ('mean', 'std', 'sem', 'min',
                                       'max') and seasons is None:
                lWSC = True
                obs = []
    if not isinstance(obs, (list, tuple)): obs = (obs, )
    # configure slicing (extract basin/province/shape and period)
    slices = _configSlices(slices=slices,
                           basins=basins,
                           provs=provs,
                           shapes=shapes,
                           period=period)
    if slices is not None:
        noyears = slices.copy()
        noyears.pop('years', None)  # slices for climatologies
    # prepare and load ensemble of observations
    obsens = Ensemble(name='obs', title='Observations', basetype=Dataset)
    if len(obs) > 0:  # regular operations with user-defined dataset
        try:
            ensemble = loadEnsembleTS(names=obs,
                                      season=seasons,
                                      aggregation=aggregation,
                                      slices=slices,
                                      varlist=variables,
                                      shape=shapetype,
                                      ldataset=False,
                                      **kwargs)
            for ens in ensemble:
                obsens += ens
        except EmptyDatasetError:
            pass
    if lUnity:  # load Unity data instead of averaging CRU data
        if period is None: period = (1979, 1994)
        dataset = loadDataset(name='Unity',
                              varlist=variables,
                              mode='climatology',
                              period=period,
                              shape=shapetype)
        if slices is not None:
            dataset = dataset(**noyears)  # slice immediately
        obsens += dataset.load()
    if lCRU:  # this is basically regular operations with CRU as default
        obsens += loadEnsembleTS(names='CRU',
                                 season=seasons,
                                 aggregation=aggregation,
                                 slices=slices,
                                 varlist=variables,
                                 shape=shapetype,
                                 ldataset=True,
                                 **kwargs)
    if lWSC:  # another special case: river hydrographs
        #     from datasets.WSC import loadGageStation, GageStationError
        try:
            dataset = loadGageStation(basin=basins,
                                      varlist=['runoff'],
                                      aggregation=aggregation,
                                      mode='climatology',
                                      filetype='monthly')
            if slices is not None:
                dataset = dataset(**noyears)  # slice immediately
            obsens += dataset.load()
        except GageStationError:
            pass  # just ignore, if gage station data is missing
    # return ensembles (will be wrapped in a list, if BatchLoad is used)
    return obsens
Ejemplo n.º 7
0
def rescaleDistributions(datasets,
                         reference=None,
                         target=None,
                         lscale=False,
                         suffixes=None,
                         lglobal=False):
    ''' Rescale datasets, so that the mean of each variable matches the corresponding variable in the
      reference dataset; if a target is specified, the target scale factors are applied to all
      datasets, if target is None, each dataset is rescaled individually. '''
    if not isinstance(datasets, (list, tuple, Ensemble)): raise TypeError
    if isinstance(datasets, Ensemble) and isinstance(reference, basestring):
        reference = datasets[reference]
    elif not isinstance(reference, Dataset):
        raise TypeError
    if target is None or target == 'auto':
        pass  # every dataset is scaled individually or based on suffixes
    elif isinstance(datasets, Ensemble) and isinstance(target, basestring):
        target = datasets[target]
    elif not isinstance(target, Dataset):
        raise TypeError, target
    if suffixes is None:
        suffixes = ('-2050', '2100')  # suffixes for scaling heuristic

    # determine scale factor
    def scaleFactor(reference, target, lscale=False, lglobal=False):
        ''' internal function to compute rescaling factors for common variables '''
        scalefactors = dict(
        )  # return dict with scalefactors for all applicable variables
        for varname, refvar in reference.variables.iteritems():
            if varname in target and isinstance(
                    refvar, VarRV):  # only varaibles that appear in both sets
                tgtvar = target.variables[varname]
                iloc = 1 if refvar.shape[-1] == 3 else 0
                # insert dummy ensemble axis, if necessary
                refvar = refvar.insertAxes(new_axes=tgtvar.axes,
                                           lcopy=True,
                                           asVar=True,
                                           linplace=False)
                if refvar.axes[-1].name.startswith('params'):
                    refdata = refvar.data_array.take(iloc, axis=-1)
                else:
                    raise AxisError, refvar.axes[-1]
                if refvar.ndim < tgtvar.ndim:
                    # N.B.: this is necessary, because WRF (target) can have an extra ensemble dimension that obs
                    #       typically don't have; then we just replicate the obs for each ensemble element
                    from warnings import warn
                    if lglobal:
                        warn(
                            "Scalefactors are being averaged over extra target dimensions (e.g. 'ensemble' axis)"
                        )
                    dimdiff = tgtvar.ndim - refvar.ndim
                    if refvar.shape != tgtvar.shape[dimdiff:]:
                        raise AxisError, "{:s} != {:s}".format(tgtvar, refvar)
                    refdata = refdata.reshape((1, ) * dimdiff +
                                              refvar.shape[:-1])
                elif refvar.shape != tgtvar.shape:
                    raise AxisError, "{:s} != {:s}".format(tgtvar, refvar)
                tgtdata = tgtvar.data_array.take(iloc, axis=-1)
                if lglobal: loc = np.mean(refdata) / np.mean(tgtdata)
                else: loc = refdata / tgtdata
                if lscale:
                    iscale = 2 if refvar.shape[-1] == 3 else 1
                    if lglobal:
                        scale = np.mean(refvar.data_array.take(
                            iscale, axis=-1)) / np.mean(
                                tgtvar.data_array.take(iscale, axis=-1))
                    else:
                        scale = refvar.data_array.take(
                            iscale, axis=-1) / tgtvar.data_array.take(iscale,
                                                                      axis=-1)
                    scalefactors[varname] = loc, (scale / loc)
                else:
                    scalefactors[varname] = loc
        return scalefactors  # return dict with scale factors for variables

    # compute general scalefactors
    if target == 'auto':
        scalefactor_collection = dict()
    elif target is not None:
        scalefactors = scaleFactor(reference,
                                   target,
                                   lscale=lscale,
                                   lglobal=lglobal)
    # loop over datasets
    rescaled_datasets = []
    for dataset in datasets:
        if dataset == reference:
            # determine variables that can be scaled (VarRV's)
            varlist = [
                varname for varname, var in dataset.variables.iteritems()
                if isinstance(var, VarRV)
            ]
            rescaled_dataset = dataset.copy(varlist=varlist)
            # add mock scale factors for consistency
            for var in rescaled_dataset.variables.itervalues():
                var.atts['loc_factor'] = 1
                var.atts['scale_factor'] = 1
                var.atts['shape_factor'] = 1
        else:
            # generate new dataset (without variables, and in-memory)
            if isinstance(dataset, DatasetNetCDF):
                rescaled_dataset = dataset.copy(varlist=[], asNC=False)
            else:
                rescaled_dataset = dataset.copy(varlist=[])
            # individual scaling
            if target is None or target == 'auto':
                parent = None
                if target == 'auto' and dataset.name.endswith(suffixes):
                    for suffix in suffixes:
                        if dataset.name.endswith(
                                suffix):  # check, which suffix, and remove it
                            parent = dataset.name[:-(len(suffix) + 1)]
                            break
                    if parent and '-' not in parent:
                        parent += '-1'  # convention for WRF names
                if parent and parent in scalefactor_collection:
                    scalefactors = scalefactor_collection[
                        parent]  # use scale factors from parent
                else:  # scale individually
                    scalefactors = scaleFactor(reference,
                                               dataset,
                                               lscale=lscale,
                                               lglobal=lglobal)
                    if target == 'auto':
                        scalefactor_collection[
                            dataset.name] = scalefactors  # for later use
            # loop over variables
            for varname, scalefactor in scalefactors.iteritems():
                if varname in dataset:
                    # rescale and add variable to new dataset
                    var = dataset.variables[varname]
                    if lscale:
                        rsvar = var.rescale(loc=scalefactor[0],
                                            scale=scalefactor[1])
                    else:
                        rsvar = var.rescale(loc=scalefactor)
                    rescaled_dataset.addVariable(rsvar)
        # add dataset to list
        rescaled_datasets.append(rescaled_dataset)
    # put everythign into Ensemble, if input was Ensemble
    if isinstance(datasets, Ensemble):
        rescaled_datasets = Ensemble(*rescaled_datasets,
                                     name=datasets.ens_name,
                                     title=datasets.ens_title)
    # return datasets/ensemble
    return rescaled_datasets
Ejemplo n.º 8
0
def loadShapeObservations(obs=None, seasons=None, basins=None, provs=None, shapes=None, stations=None, varlist=None, slices=None,
                          aggregation='mean', dataset_mode='time-series', lWSC=True, WSC_period=None, shapetype=None, 
                          variable_list=None, basin_list=None, lforceList=True, obs_ts=None, obs_clim=None, 
                          name=None, title=None, obs_list=None, ensemble_list=None, ensemble_product='inner', **kwargs):
  ''' convenience function to load shape observations based on 'aggregation' and 'varlist' (mainly add WSC gage data) '''
  if obs_list is None: obs_list = observational_datasets
  if name is None: name = 'obs'
  if title is None: title = 'Observations'
  # variables for which ensemble expansion is not supported
  not_supported = ('season','seasons','varlist','mode','dataset_mode','provs','basins','shapes',) 
  # resolve variable list (no need to maintain order)
  if isinstance(varlist,str): varlist = [varlist]
  variables = set(shp_params)
  for name in varlist: 
      if name in variable_list: variables.update(variable_list[name].vars)
      elif lforceList: raise VariableError("Variable list '{}' does not exist.".format(name))
      else: variables.add(name)
  variables = list(variables)
  # determine if we need gage dataset
  lWSC = isinstance(basins,str) and any([var in WSC_vars for var in variables]) and lWSC # doesn't work if multiple basins are loaded
  # default obs list
  if obs is None: obs = ['Observations',]
  elif isinstance(obs,str): obs = [obs]
  elif isinstance(obs,tuple): obs = list(obs)
  elif not isinstance(obs,list): raise TypeError(obs)
  # configure slicing (extract basin/province/shape and period)
  expand_vars = ('basins','stations','provs','shapes','slices') # variables that need to be added to slices (and expanded first)
  if ensemble_list: expand_list = [varname for varname in expand_vars if varname in ensemble_list]
  if ensemble_list and expand_list:
      local_vars = locals(); exp_args = dict()
      for varname in expand_vars: # copy variables to expand right away
          exp_args[varname] = local_vars[varname]
      for varname in expand_list: # remove entries from ensemble expansion
          if  varname != 'slices': ensemble_list.remove(varname) # only 'slices' will continue to be expanded
      if 'slices' not in ensemble_list: ensemble_list.append('slices')
      slices = [_configSlices(**arg_dict) for arg_dict in expandArgumentList(expand_list=expand_list, lproduct=ensemble_product, **exp_args)]
  else:
      slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, stations=stations, period=None)
  # substitute default observational dataset and seperate aggregation methods
  iobs = None; clim_ens = None
  for i,obs_name in reverse_enumerate(obs):
      # N.B.: we need to iterate in reverse order, so that deleting items does not interfere with the indexing
      if obs_name in obs_aliases or obs_name not in timeseries_datasets:
          if iobs is not None: raise ArgumentError("Can only resolve one default dataset: {}".format(obs))
          if aggregation == 'mean' and seasons is None and obs_clim is not None: 
              # remove dataset entry from list (and all the arguments)
              del obs[i]; iobs = i # remember position of default obs in ensemble              
              clim_args = kwargs.copy(); slc = slices; shp = shapetype
              # clean up variables for ensemble expansion, if necessary
              if ensemble_list and ensemble_product.lower() == 'inner':
                  if 'names' in ensemble_list:
                      obs_names = [obs_clim]
                      for arg in ensemble_list:
                          if arg in ('slices','shape'): pass # dealt with separately
                          elif arg in not_supported:
                              raise ArgumentError("Expansion of keyword '{:s}' is currently not supported in ensemble expansion.".format(arg))
                          elif arg in kwargs: 
                              clim_args[arg] = kwargs[arg][iobs]; del kwargs[arg][iobs]
                          else: 
                              raise ArgumentError("Keyword '{:s}' not found in keyword arguments.".format(arg))
                      if 'slices' in ensemble_list: slc = slices[iobs]; del slices[iobs]
                      if 'shape' in ensemble_list: shp = shapetype[iobs]; del shapetype[iobs]
                      clim_len = 1 # expect length of climatology ensemble
                  else: 
                      obs_names = obs_clim # no name expansion
                      clim_len = None # expect length of climatology ensemble
                      for arg in ensemble_list:
                          if arg in not_supported:
                              raise ArgumentError("Expansion of keyword '{:s}' is currently not supported in ensemble expansion.".format(arg))
                          elif 'slices' in ensemble_list: l = len(slc) 
                          elif 'shape' in ensemble_list: l = len(shp)
                          elif arg in clim_args: l = len(clim_args[arg])
                          else: raise ArgumentError("Keyword '{:s}' not found in keyword arguments.".format(arg))
                          if clim_len is None: clim_len = l
                          elif l != clim_len: raise ArgumentError(arg,l,clim_len)
              elif ensemble_list and ensemble_product.lower() == 'outer':
                  clim_len = 1
                  for arg in ensemble_list:
                      if arg != 'names':
                        assert isinstance(clim_args[arg],(list,tuple)), clim_args[arg] 
                        clim_len *= len(clim_args[arg])
                  obs_names = [obs_clim] if 'names' in ensemble_list else obs_clim
              else:
                  obs_names = [obs_clim]; clim_len = 1
              # now load climtology instead of time-series and skip aggregation
              try:
                  clim_ens = loadEnsemble(names=obs_names, season=seasons, aggregation=None, slices=slc, varlist=variables, 
                                          ldataset=False, dataset_mode='climatology', shape=shp,
                                          ensemble_list=ensemble_list, ensemble_product=ensemble_product, 
                                          obs_list=obs_list, basin_list=basin_list, **clim_args)
                  assert len(clim_ens) == clim_len, clim_ens
              except EmptyDatasetError: pass
          else: 
              obs[i] = obs_ts # trivial: just substitute default name and load time-series
  # prepare and load ensemble of observations
  if len(obs) > 0:
      if len(obs) == 1 and ensemble_list and 'names' not in ensemble_list: obs = obs[0]
      try:
          obsens = loadEnsemble(names=obs, season=seasons, aggregation=aggregation, slices=slices,
                                varlist=variables, ldataset=False, dataset_mode=dataset_mode, 
                                shape=shapetype, obs_list=obs_list, basin_list=basin_list, 
                                ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs)          
      except EmptyDatasetError:
          obsens = Ensemble(name=name, title=title, obs_list=obs_list, basetype=Dataset)
  else: 
      obsens = Ensemble(name=name, title=title, obs_list=obs_list, basetype=Dataset)
  # add default obs back in if they were removed earlier
  if clim_ens is not None:
      for clim_ds in clim_ens[::-1]: # add observations in correct order: adding backwards allows successive insertion ...
          obsens.insertMember(iobs,clim_ds) # ... at the point where the name block starts
  # load stream gage data from WSC; should not interfere with anything else; append to ensemble
  if lWSC: # another special case: river hydrographs
      from datasets.WSC import GageStationError, loadGageStation
      try:
          if aggregation is not None and seasons is None: dataset_mode = 'climatology' # handled differently with gage data
          if WSC_period is None: WSC_period = kwargs.get('obs_period',kwargs.get('period',None))
          dataset = loadGageStation(basin=basins, varlist=['runoff'], aggregation=aggregation, period=WSC_period, 
                                    mode=dataset_mode, filetype='monthly', basin_list=basin_list, lfill=True, lexpand=True) # always load runoff/discharge
          if seasons:
              method = aggregation if aggregation.isupper() else aggregation.title() 
              if aggregation: dataset = getattr(dataset,'seasonal'+method)(season=seasons, taxis='time')
              else: dataset = dataset.seasonalSample(season=seasons)
          if slices is not None: dataset = dataset(**slices) # slice immediately
          obsens += dataset.load()
      except GageStationError: 
          pass # just ignore, if gage station data is missing 
  # return ensembles (will be wrapped in a list, if BatchLoad is used)
  return obsens
Ejemplo n.º 9
0
def loadShapeEnsemble(names=None, seasons=None, basins=None, provs=None, shapes=None, varlist=None, 
                      aggregation='mean', slices=None, shapetype=None, filetypes=None, 
                      period=None, obs_period=None, WSC_period=None, name=None, title=None,
                      variable_list=None, WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, 
                      basin_list=None, lforceList=True, obs_list=None, obs_ts=None, obs_clim=None, 
                      ensemble_list=None, ensemble_product='inner', **kwargs):
  ''' convenience function to load shape ensembles (in Ensemble container) or observations; kwargs are passed to loadEnsembleTS '''
  names = list(names) # make a new list (copy)
  # separate observations
  if obs_list is None: obs_list = observational_datasets
  obs_names = []; iobs = []; ens_names = []; iens = []
  for i,name in enumerate(names):
      if name in obs_list or name in obs_aliases:
          obs_names.append(name); iobs.append(i)          
      else: 
          ens_names.append(name); iens.append(i)
  assert len(iens) == len(ens_names) and len(iobs) == len(obs_names) 
  if len(obs_names) > 0:       
      # assemble arguments
      obs_args = dict(obs=obs_names, seasons=seasons, basins=basins, provs=provs, shapes=shapes, varlist=varlist, 
                      slices=slices, aggregation=aggregation, shapetype=shapetype, 
                      period=period, obs_period=obs_period, obs_ts=obs_ts, obs_clim=obs_clim, 
                      variable_list=variable_list, basin_list=basin_list, WSC_period=WSC_period,
                      ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs)
      # check if we have to modify to preserve ensemble_list expansion
      if ensemble_list and ensemble_product == 'inner' and 'names' in ensemble_list and len(ensemble_list) > 1: 
          for key in ensemble_list:
              if key != 'names':
                  ens_list = obs_args[key]
                  obs_args[key] = [ens_list[i] for i in iobs]
      # observations for basins require special treatment to merge basin averages with gage values
      # load observations by redirecting to appropriate loader function
      obsens = loadShapeObservations(name=name, title=title, obs_list=obs_list, **obs_args)
  else: obsens = []
  if len(ens_names) > 0: # has to be a list
      # prepare arguments
      variables, filetypes = _resolveVarlist(varlist=varlist, filetypes=filetypes, 
                                             params=shp_params, variable_list=variable_list, lforceList=lforceList)
      # configure slicing (extract basin/province/shape and period)
      slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, period=period)
      # assemble arguments
      ens_args = dict(names=ens_names, season=seasons, slices=slices, varlist=variables, shape=shapetype, 
                      aggregation=aggregation, period=period, obs_period=obs_period, 
                      WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, filetypes=filetypes, 
                      ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs)
      # check if we have to remove obs datasets to preserve ensemble_list expansion
      if ensemble_list and ensemble_product == 'inner' and 'names' in ensemble_list and len(ensemble_list) > 1: 
          for key in ensemble_list:
              if key != 'names':
                  ens_list = ens_args[key]
                  ens_args[key] = [ens_list[i] for i in iens]
      # load ensemble (no iteration here)
      shpens = loadEnsemble(name=name, title=title, obs_list=obs_list, **ens_args)
  else: shpens = Ensemble(name=name, title=title, basetype='Dataset')
  # get resolution tag (will be added below)
  res = None
  for member in shpens:
      if 'resstr' in member.atts:
          if res is None: res = member.atts['resstr']
          elif res != member.atts['resstr']:
              res = None; break # no common resolution
  # return ensembles (will be wrapped in a list, if BatchLoad is used)
  if len(obsens) > 0 and len(shpens) > 0:
      for name,i in zip(obs_names,iobs): 
          shpens.insertMember(i,obsens[name]) # add known observations in correct order
          del obsens[name] # remove the ones we already know from list, so we can deal with the rest
      j = i + 1 # add remaining obs datasets after last one
      for i,obs in enumerate(obsens): shpens.insertMember(j+i,obs)
  elif len(obsens) > 0 and len(shpens) == 0:
      shpens = obsens
  shpens.resolution = res # ad resolution tag now, to make sure it is there 
  return shpens