Exemple #1
0
 def __call__(self,
              lparallel=False,
              NP=None,
              inner_list=None,
              outer_list=None,
              callback=None,
              **kwargs):
     ''' this method is called instead of a class or instance method; it applies the arguments 
     'kwargs' to each ensemble member; it also supports argument expansion with inner and 
     outer product (prior to application to ensemble) and parallelization using multiprocessing '''
     # expand kwargs to ensemble list
     kwargs_list = expandArgumentList(inner_list=inner_list,
                                      outer_list=outer_list,
                                      **kwargs)
     if len(kwargs_list) == 1:
         kwargs_list = kwargs_list * len(self.klass.members)
     elif len(kwargs_list) != len(self.klass.members):
         raise ArgumentError(
             'Length of expanded argument list does not match ensemble size! {} ~= {}'
             .format(len(kwargs_list), len(self.klass.members)))
     # loop over ensemble members and execute function
     if lparallel:
         # parallelize method execution using multiprocessing
         pool = multiprocessing.Pool(processes=NP)  # initialize worker pool
         if callback is not None and not callable(callback):
             raise TypeError(callback)
         # N.B.: the callback function is passed a result from the apply_method function,
         #       which returns a tuple of the form (member, exit_code)
         # define work loads (function and its arguments) and start tasks
         results = [
             pool.apply_async(apply_method, (member, self.attr),
                              kwargs,
                              callback=callback)
             for member, kwargs in zip(self.klass.members, kwargs_list)
         ]
         # N.B.: Beware Pickling!!!
         pool.close()
         pool.join()  # wait to finish
         # retrieve and assemble results
         results = [result.get() for result in results]
         # divide members and results (apply_method returns both, in case members were modified)
         self.klass.members = [result[0] for result in results]
         results = [result[1] for result in results]
     else:
         # get instance methods
         methods = [
             getattr(member, self.attr) for member in self.klass.members
         ]
         # just apply sequentially
         results = [
             method(**kwargs)
             for method, kwargs in zip(methods, kwargs_list)
         ]
     if len(results) != len(self.klass.members):
         raise ArgumentError(
             'Length of results list does not match ensemble size! {} ~= {}'
             .format(len(results), len(self.klass.members)))
     return tuple(results)
Exemple #2
0
 def __call__(self, load_list=None, lproduct='outer', inner_list=None, outer_list=None, 
              lensemble=None, ens_name=None, ens_title=None, **kwargs):
   ''' wrap original function: expand argument list, execute load_fct over argument list, 
       and return a list or Ensemble of datasets '''
   # decide, what to do
   if load_list is None and inner_list is None and outer_list is None:
     # normal operation: no expansion      
     datasets =  self.load_fct(**kwargs)
   else:
     # expansion required
     lensemble = ens_name is not None if lensemble is None else lensemble
     # figure out arguments
     kwargs_list = expandArgumentList(expand_list=load_list, lproduct=lproduct, 
                                      inner_list=inner_list, outer_list=outer_list, **kwargs)
     # load datasets
     datasets = []
     for kwargs in kwargs_list:    
       # load dataset
       datasets.append(self.load_fct(**kwargs))    
     # construct ensemble
     if lensemble:
       datasets = Ensemble(members=datasets, name=ens_name, title=ens_title, basetype='Dataset')
   # return list or ensemble of datasets
   return datasets
Exemple #3
0
 def __call__(self, load_list=None, lproduct='outer', inner_list=None, outer_list=None, 
              lensemble=None, ens_name=None, ens_title=None, **kwargs):
   ''' wrap original function: expand argument list, execute load_fct over argument list, 
       and return a list or Ensemble of datasets '''
   # decide, what to do
   if load_list is None and inner_list is None and outer_list is None:
     # normal operation: no expansion      
     datasets =  self.load_fct(**kwargs)
   else:
     # expansion required
     lensemble = ens_name is not None if lensemble is None else lensemble
     # figure out arguments
     kwargs_list = expandArgumentList(expand_list=load_list, lproduct=lproduct, 
                                      inner_list=inner_list, outer_list=outer_list, **kwargs)
     # load datasets
     datasets = []
     for kwargs in kwargs_list:    
       # load dataset
       datasets.append(self.load_fct(**kwargs))    
     # construct ensemble
     if lensemble:
       datasets = Ensemble(members=datasets, name=ens_name, title=ens_title, basetype='Dataset')
   # return list or ensemble of datasets
   return datasets
Exemple #4
0
 def __call__(self, lparallel=False, NP=None, inner_list=None, outer_list=None, callback=None, **kwargs):
   ''' this method is called instead of a class or instance method; it applies the arguments 
       'kwargs' to each ensemble member; it also supports argument expansion with inner and 
       outer product (prior to application to ensemble) and parallelization using multiprocessing '''
   # expand kwargs to ensemble list
   kwargs_list = expandArgumentList(inner_list=inner_list, outer_list=outer_list, **kwargs)
   if len(kwargs_list) == 1: kwargs_list = kwargs_list * len(self.klass.members)
   elif len(kwargs_list) != len(self.klass.members): 
     raise ArgumentError('Length of expanded argument list does not match ensemble size! {} ~= {}'.format(
                         len(kwargs_list),len(self.klass.members)))
   # loop over ensemble members and execute function
   if lparallel:
     # parallelize method execution using multiprocessing
     pool = multiprocessing.Pool(processes=NP) # initialize worker pool
     if callback is not None and not callable(callback): raise TypeError(callback)
     # N.B.: the callback function is passed a result from the apply_method function, 
     #       which returns a tuple of the form (member, exit_code)
     # define work loads (function and its arguments) and start tasks      
     results = [pool.apply_async(apply_method, (member,self.attr), kwargs, callback=callback) 
                                     for member,kwargs in zip(self.klass.members,kwargs_list)]          
     # N.B.: Beware Pickling!!!
     pool.close(); pool.join() # wait to finish
     # retrieve and assemble results 
     results = [result.get() for result in results]
     # divide members and results (apply_method returns both, in case members were modified)
     self.klass.members = [result[0] for result in results]
     results = [result[1] for result in results]
   else:
     # get instance methods
     methods = [getattr(member,self.attr) for member in self.klass.members]
     # just apply sequentially
     results = [method(**kwargs) for method,kwargs in zip(methods,kwargs_list)]
   if len(results) != len(self.klass.members): 
     raise ArgumentError('Length of results list does not match ensemble size! {} ~= {}'.format(
                         len(results),len(self.klass.members)))
   return tuple(results)
Exemple #5
0
def loadEnsembleTS(names=None, name=None, title=None, varlist=None, aggregation=None, season=None, prov=None, 
                   slices=None, obsslices=None, years=None, reduction=None, shape=None, station=None, 
                   constraints=None, filetypes=None, domain=None, ldataset=False, lcheckVar=False, 
                   lwrite=False, ltrimT=True, name_tags=None, dataset_mode='time-series', lminmax=False,
                   master=None, lall=True, ensemble_list=None, ensemble_product='inner', lensembleAxis=False,
                   WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, **kwargs):
  ''' a convenience function to load an ensemble of time-series, based on certain criteria; works 
      with either stations or regions; seasonal/climatological aggregation is also supported '''
  # prepare ensemble
  if varlist is not None:
    varlist = list(varlist)[:] # copy list
    if station: 
      for var in stn_params: # necessary to select stations
        if var not in varlist: varlist.append(var)
    if shape: 
      for var in shp_params: # necessary to select shapes
        if var not in varlist: varlist.append(var)
  # perpare ensemble and arguments
  if ldataset and ensemble_list: raise ArgumentError()
  elif not ldataset: ensemble = Ensemble(name=name, title=title, basetype='Dataset')
  # expand argument list
  if ensemble_list is None: ensemble_list = ['names'] if not ldataset else None
  loadargs = expandArgumentList(names=names, station=station, prov=prov, shape=shape, varlist=varlist, 
                                mode=dataset_mode, filetypes=filetypes, domains=domain, lwrite=lwrite,
                                slices=slices, obsslices=obsslices, name_tags=name_tags, ltrimT=ltrimT,
                                years=years, expand_list=ensemble_list, lproduct=ensemble_product,
                                lensembleAxis=lensembleAxis)
  for loadarg in loadargs:
    # clean up argumetns
    name = loadarg.pop('names',None); name_tag = loadarg.pop('name_tags',None)
    slcs = loadarg.pop('slices',None); obsslcs = loadarg.pop('obsslices',None)    
    # load individual dataset
    dataset = loadDataset(name=name, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, **loadarg)
    if name_tag is not None: 
      if name_tag[0] == '_': dataset.name += name_tag
      else: dataset.name = name_tag
    # apply slicing
    if obsslcs and ( dataset.name[:3].lower() == 'obs' or dataset.name.isupper() ):
      slcs = dict() if slcs is None else slcs.copy()
      slcs.update(**obsslcs) # add special slices for obs
      # N.B.: currently VarNC's can only be sliced once, because we can't combine slices yet
    if slcs: dataset = dataset(lminmax=lminmax, **slcs) # slice immediately 
    if not ldataset: ensemble += dataset.load() # load data and add to ensemble
  # if input was not a list, just return dataset
  if ldataset: ensemble = dataset.load() # load data
  # select specific stations (if applicable)
  if not ldataset and station and constraints:
    from datasets.EC import selectStations
    ensemble = selectStations(ensemble, stnaxis='station', master=master, linplace=False, lall=lall,
                              lcheckVar=lcheckVar, **constraints)
  # make sure all have cluster meta data  
  for varname in stn_params + shp_params:
    # find valid instance
    var = None
    for ds in ensemble: 
      if varname in ds: var = ds[varname]; break
    # give to those who have not
    if var is not None:
      var.load() # load data and add as regular variable (not VarNC)
      for ds in ensemble: 
        if varname not in ds: ds.addVariable(var.copy()) 
  # apply general reduction operations
  if reduction is not None:
    for ax,op in reduction.iteritems():
      if isinstance(op, basestring): ensemble = getattr(ensemble,op)(axis=ax)
      elif isinstance(op, (int,np.integer,float,np.inexact)): ensemble = ensemble(**{ax:op})
  # extract seasonal/climatological values/extrema
  if (ldataset and len(ensemble)==0): raise EmptyDatasetError(varlist)
  if not ldataset and any([len(ds)==0 for ds in ensemble]): raise EmptyDatasetError(ensemble)
  # N.B.: the operations below should work with Ensembles as well as Datasets 
  if aggregation:
    method = aggregation if aggregation.isupper() else aggregation.title() 
    if season is None:
      ensemble = getattr(ensemble,'clim'+method)(taxis='time', **kwargs)
    else:
      ensemble = getattr(ensemble,'seasonal'+method)(season=season, taxis='time', **kwargs)
  elif season: # but not aggregation
    ensemble = ensemble.seasonalSample(season=season)
  # return dataset
  return ensemble
Exemple #6
0
def loadEnsembleTS(names=None, name=None, title=None, varlist=None, aggregation=None, season=None, prov=None, 
                   slices=None, obsslices=None, years=None, reduction=None, shape=None, station=None, 
                   constraints=None, filetypes=None, domain=None, ldataset=False, lcheckVar=False, 
                   lwrite=False, ltrimT=True, name_tags=None, dataset_mode='time-series', lminmax=False,
                   master=None, lall=True, ensemble_list=None, ensemble_product='inner', lensembleAxis=False,
                   WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, **kwargs):
  ''' a convenience function to load an ensemble of time-series, based on certain criteria; works 
      with either stations or regions; seasonal/climatological aggregation is also supported '''
  # prepare ensemble
  if varlist is not None:
    varlist = list(varlist)[:] # copy list
    if station: 
      for var in stn_params: # necessary to select stations
        if var not in varlist: varlist.append(var)
    if shape: 
      for var in shp_params: # necessary to select shapes
        if var not in varlist: varlist.append(var)
  # perpare ensemble and arguments
  if ldataset and ensemble_list: raise ArgumentError 
  elif not ldataset: ensemble = Ensemble(name=name, title=title, basetype='Dataset')
  # expand argument list
  if ensemble_list is None: ensemble_list = ['names'] if not ldataset else None
  loadargs = expandArgumentList(names=names, station=station, prov=prov, shape=shape, varlist=varlist, 
                                mode=dataset_mode, filetypes=filetypes, domains=domain, lwrite=lwrite,
                                slices=slices, obsslices=obsslices, name_tags=name_tags, ltrimT=ltrimT,
                                years=years, expand_list=ensemble_list, lproduct=ensemble_product,
                                lensembleAxis=lensembleAxis)
  for loadarg in loadargs:
    # clean up argumetns
    name = loadarg.pop('names',None); name_tag = loadarg.pop('name_tags',None)
    slcs = loadarg.pop('slices',None); obsslcs = loadarg.pop('obsslices',None)    
    # load individual dataset
    dataset = loadDataset(name=name, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, **loadarg)
    if name_tag is not None: 
      if name_tag[0] == '_': dataset.name += name_tag
      else: dataset.name = name_tag
    # apply slicing
    if obsslcs and ( dataset.name[:3].lower() == 'obs' or dataset.name.isupper() ):
      if slcs is None: slcs = obsslcs
      else: slcs.update(**obsslcs) # add special slices for obs
      # N.B.: currently VarNC's can only be sliced once, because we can't combine slices yet
    if slcs: dataset = dataset(lminmax=lminmax, **slcs) # slice immediately 
    if not ldataset: ensemble += dataset.load() # load data and add to ensemble
  # if input was not a list, just return dataset
  if ldataset: ensemble = dataset.load() # load data
  # select specific stations (if applicable)
  if not ldataset and station and constraints:
    from datasets.EC import selectStations
    ensemble = selectStations(ensemble, stnaxis='station', master=master, linplace=False, lall=lall,
                              lcheckVar=lcheckVar, **constraints)
  # make sure all have cluster meta data  
  for varname in stn_params + shp_params:
    # find valid instance
    var = None
    for ds in ensemble: 
      if varname in ds: var = ds[varname]; break
    # give to those who have not
    if var is not None:
      var.load() # load data and add as regular variable (not VarNC)
      for ds in ensemble: 
        if varname not in ds: ds.addVariable(var.copy()) 
  # apply general reduction operations
  if reduction is not None:
    for ax,op in reduction.iteritems():
      if isinstance(op, basestring): ensemble = getattr(ensemble,op)(axis=ax)
      elif isinstance(op, (int,np.integer,float,np.inexact)): ensemble = ensemble(**{ax:op})
  # extract seasonal/climatological values/extrema
  if (ldataset and len(ensemble)==0): raise EmptyDatasetError, varlist
  if not ldataset and any([len(ds)==0 for ds in ensemble]): raise EmptyDatasetError, ensemble
  # N.B.: the operations below should work with Ensembles as well as Datasets 
  if aggregation:
    method = aggregation if aggregation.isupper() else aggregation.title() 
    if season is None:
      ensemble = getattr(ensemble,'clim'+method)(taxis='time', **kwargs)
    else:
      ensemble = getattr(ensemble,'seasonal'+method)(season=season, taxis='time', **kwargs)
  elif season: # but not aggregation
    ensemble = ensemble.seasonalSample(season=season)
  # return dataset
  return ensemble
Exemple #7
0
 def __init__(self, inner_list=None, outer_list=None, **kwargs):
     ''' initialize an ensemble of HGS simulations based on HGS arguments and project descriptors;
     all keyword arguments are automatically expanded based on inner/outer product rules, defined
     using the inner_list/outer_list arguments; the expanded argument lists are used to initialize
     the individual ensemble members; note that a string substitution is applied to all folder 
     variables (incl. 'rundir') prior to constructing the HGS instance, i.e. rundir.format(**kwargs) '''
     self.lreport = kwargs.get('lreport', self.lreport)
     self.loverwrite = kwargs.get('loverwrite', self.loverwrite)
     self.lindicator = kwargs.get('lindicator', self.lindicator)
     self.lrunfailed = kwargs.get('lrunfailed', self.lrunfailed)
     self.lrestart = kwargs.get('lrestart', self.lrestart)
     # expand argument list (plain, nothing special)
     kwargs_list = expandArgumentList(inner_list=inner_list,
                                      outer_list=outer_list,
                                      **kwargs)
     # loop over ensemble members
     self.members = []
     self.rundirs = []
     self.hgsargs = []  # ensemble lists
     for kwargs in kwargs_list:
         # isolate folder variables and perform variable substitution
         for folder_type in ('rundir', 'template_folder', 'input_folder',
                             'pet_folder', 'precip_inc', 'pet_inc',
                             'ic_files'):
             if folder_type in kwargs:
                 folder = kwargs[folder_type]
                 if isinstance(folder, str):
                     # perform keyword substitution with all available arguments
                     if folder_type is 'ic_files':
                         # we need to preserve '{FILETYPE}' for later
                         kwargs[folder_type] = folder.format(
                             FILETYPE='{FILETYPE}', **kwargs)
                     else:
                         kwargs[folder_type] = folder.format(**kwargs)
                 elif folder is None:
                     pass
                 else:
                     raise TypeError(folder)
         # check rundir
         rundir = kwargs['rundir']
         kwargs[
             'restart'] = False  # this keyword argument should be controlled by the Ensemble handler
         if rundir in self.rundirs:
             raise ArgumentError(
                 "Multiple occurence of run directory:\n '{}'".format(
                     rundir))
         # figure out skipping
         if os.path.exists(rundir):
             if self.loverwrite:
                 if self.lreport:
                     print(
                         ("Overwriting existing experiment folder '{:s}'.".
                          format(rundir)))
                 lskip = False
             elif self.lindicator and os.path.exists(
                     '{}/SCHEDULED'.format(rundir)):
                 if self.lreport:
                     print(
                         ("Skipping experiment folder '{:s}' (scheduled).".
                          format(rundir)))
                 lskip = True
             elif self.lindicator and os.path.exists(
                     '{}/IN_PROGRESS'.format(rundir)):
                 if self.lrestart:
                     shutil.move(os.path.join(rundir, 'IN_PROGRESS'),
                                 os.path.join(rundir, 'RESTARTED'))
                     if self.lreport:
                         print((
                             "Restarting experiment in folder '{:s}' (was in progress)."
                             .format(rundir)))
                     lskip = False
                     kwargs['restart'] = True
                 else:
                     if self.lreport:
                         print((
                             "Skipping experiment folder '{:s}' (in progress)."
                             .format(rundir)))
                     lskip = True
             elif self.lindicator and os.path.exists(
                     '{}/COMPLETED'.format(rundir)):
                 if self.lreport:
                     print(
                         ("Skipping experiment folder '{:s}' (completed).".
                          format(rundir)))
                 lskip = True
             elif self.lindicator and os.path.exists(
                     '{}/FAILED'.format(rundir)):
                 # this should be the last option, so as to prevent overwriting data
                 if self.lrunfailed:
                     if self.lreport:
                         print(
                             ("Overwriting failed experiment folder '{:s}'."
                              .format(rundir)))
                     lskip = False  # rundir will be deleted
                 else:
                     if self.lreport:
                         print(
                             ("Skipping experiment folder '{:s}' (failed).".
                              format(rundir)))
                     lskip = True
             else:  # no/unknown indicator file
                 if self.lreport:
                     print(
                         ("Overwriting existing experiment folder '{:s}'.".
                          format(rundir)))
                 lskip = False  # rundir will be deleted
         else:
             if self.lreport:
                 print(("Creating new experiment folder '{:s}'.".format(
                     rundir)))
             lskip = False
         if not lskip:
             self.rundirs.append(rundir)
             # isolate HGS constructor arguments
             hgsargs = inspect.getargspec(
                 HGS.__init__
             ).args  # returns args, varargs, kwargs, defaults
             hgsargs = {
                 arg: kwargs[arg]
                 for arg in hgsargs if arg in kwargs
             }
             self.hgsargs.append(hgsargs)
             # initialize HGS instance
             hgs = HGS(**hgsargs)
             self.members.append(hgs)
     # final check
     if len(self.members) == 0:
         raise EnsembleError("No experiments to run (empty list).")
Exemple #8
0
def readRasterArray(file_pattern, lgzip=None, lgdal=True, dtype=np.float32, lmask=True, fillValue=None, lfeedback=False,
                    lgeotransform=True, axes=None, lna=False, lskipMissing=False, path_params=None, **kwargs):
    ''' function to load a multi-dimensional numpy array from several structured ASCII raster files '''
    
    if axes is None: raise NotImplementedError
    #TODO: implement automatic detection of axes arguments and axes order
    
    ## expand path argument and figure out dimensions
    
    # collect axes arguments
    shape = []; axes_kwargs = dict()
    for ax in axes:
        if ax not in kwargs: raise AxisError(ax)
        coord = kwargs.pop(ax)
        shape.append(len(coord))
        axes_kwargs[ax] = coord
    assert len(axes) == len(shape) == len(axes_kwargs)
    shape = tuple(shape)
    #TODO: add handling of embedded inner product expansion
    
    # argument expansion using outer product
    file_kwargs_list = expandArgumentList(outer_list=axes, **axes_kwargs)
    assert np.prod(shape) == len(file_kwargs_list)
    
    ## load data from raster files and assemble array
    path_params = dict() if path_params is None else path_params.copy() # will be modified
    
    # find first valid 2D raster to determine shape
    i0 = 0 
    path_params.update(file_kwargs_list[i0]) # update axes parameters
    filepath = file_pattern.format(**path_params) # construct file name
    if not os.path.exists(filepath): 
        if lskipMissing: # find first valid
            while not os.path.exists(filepath):
                i0 += 1 # go to next raster file
                if i0 >= len(file_kwargs_list): 
                  raise IOError("No valid input raster files found!\n'{}'".format(filepath))
                if lfeedback: print ' ',
                path_params.update(file_kwargs_list[i0]) # update axes parameters
                filepath = file_pattern.format(**path_params) # nest in line
        else: # or raise error
            raise IOError(filepath)
      
    # read first 2D raster file
    data2D = readASCIIraster(filepath, lgzip=lgzip, lgdal=lgdal, dtype=dtype, lna=True,
                             lmask=lmask, fillValue=fillValue, lgeotransform=lgeotransform, **kwargs)
    if lgeotransform: data2D, geotransform0, na = data2D
    else: data2D, na = data2D # we might still need na, but no need to check if it is the same
    shape2D = data2D.shape # get 2D raster shape for later use
    
    # allocate data array
    list_shape = (np.prod(shape),)+shape2D # assume 3D shape to concatenate 2D rasters
    if lmask:
        data = ma.empty(list_shape, dtype=dtype)
        if fillValue is None: data._fill_value = data2D._fill_value 
        else: data._fill_value = fillValue
        data.mask = True # initialize everything as masked 
    else: data = np.empty(list_shape, dtype=dtype) # allocate the array
    assert data.shape[0] == len(file_kwargs_list), (data.shape, len(file_kwargs_list))
    # insert (up to) first raster before continuing
    if lskipMissing and i0 > 0:
      data[:i0,:,:] = ma.masked if lmask else fillValue # mask all invalid rasters up to first valid raster
    data[i0,:,:] = data2D # add first (valid) raster
    
    # loop over remaining 2D raster files
    for i,file_kwargs in enumerate(file_kwargs_list[i0:]):
        
        path_params.update(file_kwargs) # update axes parameters
        filepath = file_pattern.format(**path_params) # construct file name
        if os.path.exists(filepath):
            if lfeedback: print '.', # indicate data with bar/pipe
            # read 2D raster file
            data2D = readASCIIraster(filepath, lgzip=lgzip, lgdal=lgdal, dtype=dtype, lna=False,
                                     lmask=lmask, fillValue=fillValue, lgeotransform=lgeotransform, **kwargs)
            # check geotransform
            if lgeotransform: 
                data2D, geotransform = data2D
                if not geotransform == geotransform0:
                    raise AxisError(geotransform) # to make sure all geotransforms are identical!
            else: geotransform = None
            # size information
            if not shape2D == data2D.shape:
                raise AxisError(data2D.shape) # to make sure all geotransforms are identical!            
            # insert 2D raster into 3D array
            data[i+i0,:,:] = data2D # raster shape has to match
        elif lskipMissing:
            # fill with masked values
            data[i+i0,:,:] = ma.masked # mask missing raster
            if lfeedback: print ' ', # indicate missing with dot
        else:
          raise IOError(filepath)

    # complete feedback with linebreak
    if lfeedback: print ''
    
    # reshape and check dimensions
    assert i+i0 == data.shape[0]-1, (i,i0)
    data = data.reshape(shape+shape2D) # now we have the full shape
    gc.collect() # remove duplicate data
    
    # return data and optional meta data
    if lgeotransform or lna:
        return_data = (data,)
        if lgeotransform: return_data += (geotransform,)
        if lna: return_data += (na,)
    else: 
        return_data = data
    return return_data
Exemple #9
0
 def __init__(self, inner_list=None, outer_list=None, **kwargs):
   ''' initialize an ensemble of HGS simulations based on HGS arguments and project descriptors;
       all keyword arguments are automatically expanded based on inner/outer product rules, defined
       using the inner_list/outer_list arguments; the expanded argument lists are used to initialize
       the individual ensemble members; note that a string substitution is applied to all folder 
       variables (incl. 'rundir') prior to constructing the HGS instance, i.e. rundir.format(**kwargs) '''
   self.lreport    = kwargs.get('lreport',self.lreport)
   self.loverwrite = kwargs.get('loverwrite',self.loverwrite)
   self.lindicator = kwargs.get('lindicator',self.lindicator)
   self.lrunfailed = kwargs.get('lrunfailed',self.lrunfailed)
   self.lrestart   = kwargs.get('lrestart',self.lrestart)
   # expand argument list (plain, nothing special)
   kwargs_list = expandArgumentList(inner_list=inner_list, outer_list=outer_list, **kwargs)
   # loop over ensemble members
   self.members = []; self.rundirs = []; self.hgsargs = [] # ensemble lists
   for kwargs in kwargs_list:
     # isolate folder variables and perform variable substitution
     for folder_type in ('rundir','template_folder','input_folder','pet_folder','precip_inc','pet_inc','ic_files'):
         if folder_type in kwargs:
             folder = kwargs[folder_type]
             if isinstance(folder,str):
               # perform keyword substitution with all available arguments
               if folder_type is 'ic_files':
                   # we need to preserve '{FILETYPE}' for later 
                   kwargs[folder_type] = folder.format(FILETYPE='{FILETYPE}', **kwargs)
               else: kwargs[folder_type] = folder.format(**kwargs)
             elif folder is None: pass
             else: raise TypeError(folder)
     # check rundir
     rundir = kwargs['rundir']
     kwargs['restart'] = False # this keyword argument should be controlled by the Ensemble handler
     if rundir in self.rundirs:
       raise ArgumentError("Multiple occurence of run directory:\n '{}'".format(rundir))
     # figure out skipping      
     if os.path.exists(rundir):
       if self.loverwrite:
         if self.lreport: print(("Overwriting existing experiment folder '{:s}'.".format(rundir)))
         lskip = False
       elif self.lindicator and os.path.exists('{}/SCHEDULED'.format(rundir)):
         if self.lreport: print(("Skipping experiment folder '{:s}' (scheduled).".format(rundir)))
         lskip = True
       elif self.lindicator and os.path.exists('{}/IN_PROGRESS'.format(rundir)):
         if self.lrestart:
           shutil.move(os.path.join(rundir,'IN_PROGRESS'),os.path.join(rundir,'RESTARTED'))
           if self.lreport: print(("Restarting experiment in folder '{:s}' (was in progress).".format(rundir)))
           lskip = False
           kwargs['restart'] = True
         else:
           if self.lreport: print(("Skipping experiment folder '{:s}' (in progress).".format(rundir)))
           lskip = True
       elif self.lindicator and os.path.exists('{}/COMPLETED'.format(rundir)):
         if self.lreport: print(("Skipping experiment folder '{:s}' (completed).".format(rundir)))
         lskip = True
       elif self.lindicator and os.path.exists('{}/FAILED'.format(rundir)):
         # this should be the last option, so as to prevent overwriting data
         if self.lrunfailed:            
           if self.lreport: print(("Overwriting failed experiment folder '{:s}'.".format(rundir)))
           lskip = False # rundir will be deleted
         else: 
           if self.lreport: print(("Skipping experiment folder '{:s}' (failed).".format(rundir)))
           lskip = True
       else: # no/unknown indicator file
         if self.lreport: print(("Overwriting existing experiment folder '{:s}'.".format(rundir)))
         lskip = False # rundir will be deleted
     else:
       if self.lreport: print(("Creating new experiment folder '{:s}'.".format(rundir)))
       lskip = False
     if not lskip:
       self.rundirs.append(rundir)
       # isolate HGS constructor arguments
       hgsargs = inspect.getargspec(HGS.__init__).args # returns args, varargs, kwargs, defaults
       hgsargs = {arg:kwargs[arg] for arg in hgsargs if arg in kwargs} 
       self.hgsargs.append(hgsargs)
       # initialize HGS instance      
       hgs = HGS(**hgsargs)
       self.members.append(hgs)
   # final check
   if len(self.members) == 0: 
     raise EnsembleError("No experiments to run (empty list).")
Exemple #10
0
def loadShapeObservations(obs=None, seasons=None, basins=None, provs=None, shapes=None, stations=None, varlist=None, slices=None,
                          aggregation='mean', dataset_mode='time-series', lWSC=True, WSC_period=None, shapetype=None, 
                          variable_list=None, basin_list=None, lforceList=True, obs_ts=None, obs_clim=None, 
                          name=None, title=None, obs_list=None, ensemble_list=None, ensemble_product='inner', **kwargs):
  ''' convenience function to load shape observations based on 'aggregation' and 'varlist' (mainly add WSC gage data) '''
  if obs_list is None: obs_list = observational_datasets
  if name is None: name = 'obs'
  if title is None: title = 'Observations'
  # variables for which ensemble expansion is not supported
  not_supported = ('season','seasons','varlist','mode','dataset_mode','provs','basins','shapes',) 
  # resolve variable list (no need to maintain order)
  if isinstance(varlist,str): varlist = [varlist]
  variables = set(shp_params)
  for name in varlist: 
      if name in variable_list: variables.update(variable_list[name].vars)
      elif lforceList: raise VariableError("Variable list '{}' does not exist.".format(name))
      else: variables.add(name)
  variables = list(variables)
  # determine if we need gage dataset
  lWSC = isinstance(basins,str) and any([var in WSC_vars for var in variables]) and lWSC # doesn't work if multiple basins are loaded
  # default obs list
  if obs is None: obs = ['Observations',]
  elif isinstance(obs,str): obs = [obs]
  elif isinstance(obs,tuple): obs = list(obs)
  elif not isinstance(obs,list): raise TypeError(obs)
  # configure slicing (extract basin/province/shape and period)
  expand_vars = ('basins','stations','provs','shapes','slices') # variables that need to be added to slices (and expanded first)
  if ensemble_list: expand_list = [varname for varname in expand_vars if varname in ensemble_list]
  if ensemble_list and expand_list:
      local_vars = locals(); exp_args = dict()
      for varname in expand_vars: # copy variables to expand right away
          exp_args[varname] = local_vars[varname]
      for varname in expand_list: # remove entries from ensemble expansion
          if  varname != 'slices': ensemble_list.remove(varname) # only 'slices' will continue to be expanded
      if 'slices' not in ensemble_list: ensemble_list.append('slices')
      slices = [_configSlices(**arg_dict) for arg_dict in expandArgumentList(expand_list=expand_list, lproduct=ensemble_product, **exp_args)]
  else:
      slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, stations=stations, period=None)
  # substitute default observational dataset and seperate aggregation methods
  iobs = None; clim_ens = None
  for i,obs_name in reverse_enumerate(obs):
      # N.B.: we need to iterate in reverse order, so that deleting items does not interfere with the indexing
      if obs_name in obs_aliases or obs_name not in timeseries_datasets:
          if iobs is not None: raise ArgumentError("Can only resolve one default dataset: {}".format(obs))
          if aggregation == 'mean' and seasons is None and obs_clim is not None: 
              # remove dataset entry from list (and all the arguments)
              del obs[i]; iobs = i # remember position of default obs in ensemble              
              clim_args = kwargs.copy(); slc = slices; shp = shapetype
              # clean up variables for ensemble expansion, if necessary
              if ensemble_list and ensemble_product.lower() == 'inner':
                  if 'names' in ensemble_list:
                      obs_names = [obs_clim]
                      for arg in ensemble_list:
                          if arg in ('slices','shape'): pass # dealt with separately
                          elif arg in not_supported:
                              raise ArgumentError("Expansion of keyword '{:s}' is currently not supported in ensemble expansion.".format(arg))
                          elif arg in kwargs: 
                              clim_args[arg] = kwargs[arg][iobs]; del kwargs[arg][iobs]
                          else: 
                              raise ArgumentError("Keyword '{:s}' not found in keyword arguments.".format(arg))
                      if 'slices' in ensemble_list: slc = slices[iobs]; del slices[iobs]
                      if 'shape' in ensemble_list: shp = shapetype[iobs]; del shapetype[iobs]
                      clim_len = 1 # expect length of climatology ensemble
                  else: 
                      obs_names = obs_clim # no name expansion
                      clim_len = None # expect length of climatology ensemble
                      for arg in ensemble_list:
                          if arg in not_supported:
                              raise ArgumentError("Expansion of keyword '{:s}' is currently not supported in ensemble expansion.".format(arg))
                          elif 'slices' in ensemble_list: l = len(slc) 
                          elif 'shape' in ensemble_list: l = len(shp)
                          elif arg in clim_args: l = len(clim_args[arg])
                          else: raise ArgumentError("Keyword '{:s}' not found in keyword arguments.".format(arg))
                          if clim_len is None: clim_len = l
                          elif l != clim_len: raise ArgumentError(arg,l,clim_len)
              elif ensemble_list and ensemble_product.lower() == 'outer':
                  clim_len = 1
                  for arg in ensemble_list:
                      if arg != 'names':
                        assert isinstance(clim_args[arg],(list,tuple)), clim_args[arg] 
                        clim_len *= len(clim_args[arg])
                  obs_names = [obs_clim] if 'names' in ensemble_list else obs_clim
              else:
                  obs_names = [obs_clim]; clim_len = 1
              # now load climtology instead of time-series and skip aggregation
              try:
                  clim_ens = loadEnsemble(names=obs_names, season=seasons, aggregation=None, slices=slc, varlist=variables, 
                                          ldataset=False, dataset_mode='climatology', shape=shp,
                                          ensemble_list=ensemble_list, ensemble_product=ensemble_product, 
                                          obs_list=obs_list, basin_list=basin_list, **clim_args)
                  assert len(clim_ens) == clim_len, clim_ens
              except EmptyDatasetError: pass
          else: 
              obs[i] = obs_ts # trivial: just substitute default name and load time-series
  # prepare and load ensemble of observations
  if len(obs) > 0:
      if len(obs) == 1 and ensemble_list and 'names' not in ensemble_list: obs = obs[0]
      try:
          obsens = loadEnsemble(names=obs, season=seasons, aggregation=aggregation, slices=slices,
                                varlist=variables, ldataset=False, dataset_mode=dataset_mode, 
                                shape=shapetype, obs_list=obs_list, basin_list=basin_list, 
                                ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs)          
      except EmptyDatasetError:
          obsens = Ensemble(name=name, title=title, obs_list=obs_list, basetype=Dataset)
  else: 
      obsens = Ensemble(name=name, title=title, obs_list=obs_list, basetype=Dataset)
  # add default obs back in if they were removed earlier
  if clim_ens is not None:
      for clim_ds in clim_ens[::-1]: # add observations in correct order: adding backwards allows successive insertion ...
          obsens.insertMember(iobs,clim_ds) # ... at the point where the name block starts
  # load stream gage data from WSC; should not interfere with anything else; append to ensemble
  if lWSC: # another special case: river hydrographs
      from datasets.WSC import GageStationError, loadGageStation
      try:
          if aggregation is not None and seasons is None: dataset_mode = 'climatology' # handled differently with gage data
          if WSC_period is None: WSC_period = kwargs.get('obs_period',kwargs.get('period',None))
          dataset = loadGageStation(basin=basins, varlist=['runoff'], aggregation=aggregation, period=WSC_period, 
                                    mode=dataset_mode, filetype='monthly', basin_list=basin_list, lfill=True, lexpand=True) # always load runoff/discharge
          if seasons:
              method = aggregation if aggregation.isupper() else aggregation.title() 
              if aggregation: dataset = getattr(dataset,'seasonal'+method)(season=seasons, taxis='time')
              else: dataset = dataset.seasonalSample(season=seasons)
          if slices is not None: dataset = dataset(**slices) # slice immediately
          obsens += dataset.load()
      except GageStationError: 
          pass # just ignore, if gage station data is missing 
  # return ensembles (will be wrapped in a list, if BatchLoad is used)
  return obsens
Exemple #11
0
def readRasterArray(file_pattern,
                    lgzip=None,
                    lgdal=True,
                    dtype=np.float32,
                    lmask=True,
                    fillValue=None,
                    lfeedback=False,
                    lgeotransform=True,
                    axes=None,
                    lna=False,
                    lskipMissing=False,
                    path_params=None,
                    **kwargs):
    ''' function to load a multi-dimensional numpy array from several structured ASCII raster files '''

    if axes is None: raise NotImplementedError
    #TODO: implement automatic detection of axes arguments and axes order

    ## expand path argument and figure out dimensions

    # collect axes arguments
    shape = []
    axes_kwargs = dict()
    for ax in axes:
        if ax not in kwargs: raise AxisError(ax)
        coord = kwargs.pop(ax)
        shape.append(len(coord))
        axes_kwargs[ax] = coord
    assert len(axes) == len(shape) == len(axes_kwargs)
    shape = tuple(shape)
    #TODO: add handling of embedded inner product expansion

    # argument expansion using outer product
    file_kwargs_list = expandArgumentList(outer_list=axes, **axes_kwargs)
    assert np.prod(shape) == len(file_kwargs_list)

    ## load data from raster files and assemble array
    path_params = dict() if path_params is None else path_params.copy(
    )  # will be modified

    # find first valid 2D raster to determine shape
    i0 = 0
    path_params.update(file_kwargs_list[i0])  # update axes parameters
    filepath = file_pattern.format(**path_params)  # construct file name
    if not os.path.exists(filepath):
        if lskipMissing:  # find first valid
            while not os.path.exists(filepath):
                i0 += 1  # go to next raster file
                if i0 >= len(file_kwargs_list):
                    raise IOError(
                        "No valid input raster files found!\n'{}'".format(
                            filepath))
                if lfeedback: print ' ',
                path_params.update(
                    file_kwargs_list[i0])  # update axes parameters
                filepath = file_pattern.format(**path_params)  # nest in line
        else:  # or raise error
            raise IOError(filepath)

    # read first 2D raster file
    data2D = readASCIIraster(filepath,
                             lgzip=lgzip,
                             lgdal=lgdal,
                             dtype=dtype,
                             lna=True,
                             lmask=lmask,
                             fillValue=fillValue,
                             lgeotransform=lgeotransform,
                             **kwargs)
    if lgeotransform: data2D, geotransform0, na = data2D
    else:
        data2D, na = data2D  # we might still need na, but no need to check if it is the same
    shape2D = data2D.shape  # get 2D raster shape for later use

    # allocate data array
    list_shape = (np.prod(shape),
                  ) + shape2D  # assume 3D shape to concatenate 2D rasters
    if lmask:
        data = ma.empty(list_shape, dtype=dtype)
        if fillValue is None: data._fill_value = data2D._fill_value
        else: data._fill_value = fillValue
        data.mask = True  # initialize everything as masked
    else:
        data = np.empty(list_shape, dtype=dtype)  # allocate the array
    assert data.shape[0] == len(file_kwargs_list), (data.shape,
                                                    len(file_kwargs_list))
    # insert (up to) first raster before continuing
    if lskipMissing and i0 > 0:
        data[:
             i0, :, :] = ma.masked if lmask else fillValue  # mask all invalid rasters up to first valid raster
    data[i0, :, :] = data2D  # add first (valid) raster

    # loop over remaining 2D raster files
    for i, file_kwargs in enumerate(file_kwargs_list[i0:]):

        path_params.update(file_kwargs)  # update axes parameters
        filepath = file_pattern.format(**path_params)  # construct file name
        if os.path.exists(filepath):
            if lfeedback: print '.',  # indicate data with bar/pipe
            # read 2D raster file
            data2D = readASCIIraster(filepath,
                                     lgzip=lgzip,
                                     lgdal=lgdal,
                                     dtype=dtype,
                                     lna=False,
                                     lmask=lmask,
                                     fillValue=fillValue,
                                     lgeotransform=lgeotransform,
                                     **kwargs)
            # check geotransform
            if lgeotransform:
                data2D, geotransform = data2D
                if not geotransform == geotransform0:
                    raise AxisError(
                        geotransform
                    )  # to make sure all geotransforms are identical!
            else:
                geotransform = None
            # size information
            if not shape2D == data2D.shape:
                raise AxisError(
                    data2D.shape
                )  # to make sure all geotransforms are identical!
            # insert 2D raster into 3D array
            data[i + i0, :, :] = data2D  # raster shape has to match
        elif lskipMissing:
            # fill with masked values
            data[i + i0, :, :] = ma.masked  # mask missing raster
            if lfeedback: print ' ',  # indicate missing with dot
        else:
            raise IOError(filepath)

    # complete feedback with linebreak
    if lfeedback: print ''

    # reshape and check dimensions
    assert i + i0 == data.shape[0] - 1, (i, i0)
    data = data.reshape(shape + shape2D)  # now we have the full shape
    gc.collect()  # remove duplicate data

    # return data and optional meta data
    if lgeotransform or lna:
        return_data = (data, )
        if lgeotransform: return_data += (geotransform, )
        if lna: return_data += (na, )
    else:
        return_data = data
    return return_data