def __call__(self, lparallel=False, NP=None, inner_list=None, outer_list=None, callback=None, **kwargs): ''' this method is called instead of a class or instance method; it applies the arguments 'kwargs' to each ensemble member; it also supports argument expansion with inner and outer product (prior to application to ensemble) and parallelization using multiprocessing ''' # expand kwargs to ensemble list kwargs_list = expandArgumentList(inner_list=inner_list, outer_list=outer_list, **kwargs) if len(kwargs_list) == 1: kwargs_list = kwargs_list * len(self.klass.members) elif len(kwargs_list) != len(self.klass.members): raise ArgumentError( 'Length of expanded argument list does not match ensemble size! {} ~= {}' .format(len(kwargs_list), len(self.klass.members))) # loop over ensemble members and execute function if lparallel: # parallelize method execution using multiprocessing pool = multiprocessing.Pool(processes=NP) # initialize worker pool if callback is not None and not callable(callback): raise TypeError(callback) # N.B.: the callback function is passed a result from the apply_method function, # which returns a tuple of the form (member, exit_code) # define work loads (function and its arguments) and start tasks results = [ pool.apply_async(apply_method, (member, self.attr), kwargs, callback=callback) for member, kwargs in zip(self.klass.members, kwargs_list) ] # N.B.: Beware Pickling!!! pool.close() pool.join() # wait to finish # retrieve and assemble results results = [result.get() for result in results] # divide members and results (apply_method returns both, in case members were modified) self.klass.members = [result[0] for result in results] results = [result[1] for result in results] else: # get instance methods methods = [ getattr(member, self.attr) for member in self.klass.members ] # just apply sequentially results = [ method(**kwargs) for method, kwargs in zip(methods, kwargs_list) ] if len(results) != len(self.klass.members): raise ArgumentError( 'Length of results list does not match ensemble size! {} ~= {}' .format(len(results), len(self.klass.members))) return tuple(results)
def __call__(self, load_list=None, lproduct='outer', inner_list=None, outer_list=None, lensemble=None, ens_name=None, ens_title=None, **kwargs): ''' wrap original function: expand argument list, execute load_fct over argument list, and return a list or Ensemble of datasets ''' # decide, what to do if load_list is None and inner_list is None and outer_list is None: # normal operation: no expansion datasets = self.load_fct(**kwargs) else: # expansion required lensemble = ens_name is not None if lensemble is None else lensemble # figure out arguments kwargs_list = expandArgumentList(expand_list=load_list, lproduct=lproduct, inner_list=inner_list, outer_list=outer_list, **kwargs) # load datasets datasets = [] for kwargs in kwargs_list: # load dataset datasets.append(self.load_fct(**kwargs)) # construct ensemble if lensemble: datasets = Ensemble(members=datasets, name=ens_name, title=ens_title, basetype='Dataset') # return list or ensemble of datasets return datasets
def __call__(self, lparallel=False, NP=None, inner_list=None, outer_list=None, callback=None, **kwargs): ''' this method is called instead of a class or instance method; it applies the arguments 'kwargs' to each ensemble member; it also supports argument expansion with inner and outer product (prior to application to ensemble) and parallelization using multiprocessing ''' # expand kwargs to ensemble list kwargs_list = expandArgumentList(inner_list=inner_list, outer_list=outer_list, **kwargs) if len(kwargs_list) == 1: kwargs_list = kwargs_list * len(self.klass.members) elif len(kwargs_list) != len(self.klass.members): raise ArgumentError('Length of expanded argument list does not match ensemble size! {} ~= {}'.format( len(kwargs_list),len(self.klass.members))) # loop over ensemble members and execute function if lparallel: # parallelize method execution using multiprocessing pool = multiprocessing.Pool(processes=NP) # initialize worker pool if callback is not None and not callable(callback): raise TypeError(callback) # N.B.: the callback function is passed a result from the apply_method function, # which returns a tuple of the form (member, exit_code) # define work loads (function and its arguments) and start tasks results = [pool.apply_async(apply_method, (member,self.attr), kwargs, callback=callback) for member,kwargs in zip(self.klass.members,kwargs_list)] # N.B.: Beware Pickling!!! pool.close(); pool.join() # wait to finish # retrieve and assemble results results = [result.get() for result in results] # divide members and results (apply_method returns both, in case members were modified) self.klass.members = [result[0] for result in results] results = [result[1] for result in results] else: # get instance methods methods = [getattr(member,self.attr) for member in self.klass.members] # just apply sequentially results = [method(**kwargs) for method,kwargs in zip(methods,kwargs_list)] if len(results) != len(self.klass.members): raise ArgumentError('Length of results list does not match ensemble size! {} ~= {}'.format( len(results),len(self.klass.members))) return tuple(results)
def loadEnsembleTS(names=None, name=None, title=None, varlist=None, aggregation=None, season=None, prov=None, slices=None, obsslices=None, years=None, reduction=None, shape=None, station=None, constraints=None, filetypes=None, domain=None, ldataset=False, lcheckVar=False, lwrite=False, ltrimT=True, name_tags=None, dataset_mode='time-series', lminmax=False, master=None, lall=True, ensemble_list=None, ensemble_product='inner', lensembleAxis=False, WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, **kwargs): ''' a convenience function to load an ensemble of time-series, based on certain criteria; works with either stations or regions; seasonal/climatological aggregation is also supported ''' # prepare ensemble if varlist is not None: varlist = list(varlist)[:] # copy list if station: for var in stn_params: # necessary to select stations if var not in varlist: varlist.append(var) if shape: for var in shp_params: # necessary to select shapes if var not in varlist: varlist.append(var) # perpare ensemble and arguments if ldataset and ensemble_list: raise ArgumentError() elif not ldataset: ensemble = Ensemble(name=name, title=title, basetype='Dataset') # expand argument list if ensemble_list is None: ensemble_list = ['names'] if not ldataset else None loadargs = expandArgumentList(names=names, station=station, prov=prov, shape=shape, varlist=varlist, mode=dataset_mode, filetypes=filetypes, domains=domain, lwrite=lwrite, slices=slices, obsslices=obsslices, name_tags=name_tags, ltrimT=ltrimT, years=years, expand_list=ensemble_list, lproduct=ensemble_product, lensembleAxis=lensembleAxis) for loadarg in loadargs: # clean up argumetns name = loadarg.pop('names',None); name_tag = loadarg.pop('name_tags',None) slcs = loadarg.pop('slices',None); obsslcs = loadarg.pop('obsslices',None) # load individual dataset dataset = loadDataset(name=name, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, **loadarg) if name_tag is not None: if name_tag[0] == '_': dataset.name += name_tag else: dataset.name = name_tag # apply slicing if obsslcs and ( dataset.name[:3].lower() == 'obs' or dataset.name.isupper() ): slcs = dict() if slcs is None else slcs.copy() slcs.update(**obsslcs) # add special slices for obs # N.B.: currently VarNC's can only be sliced once, because we can't combine slices yet if slcs: dataset = dataset(lminmax=lminmax, **slcs) # slice immediately if not ldataset: ensemble += dataset.load() # load data and add to ensemble # if input was not a list, just return dataset if ldataset: ensemble = dataset.load() # load data # select specific stations (if applicable) if not ldataset and station and constraints: from datasets.EC import selectStations ensemble = selectStations(ensemble, stnaxis='station', master=master, linplace=False, lall=lall, lcheckVar=lcheckVar, **constraints) # make sure all have cluster meta data for varname in stn_params + shp_params: # find valid instance var = None for ds in ensemble: if varname in ds: var = ds[varname]; break # give to those who have not if var is not None: var.load() # load data and add as regular variable (not VarNC) for ds in ensemble: if varname not in ds: ds.addVariable(var.copy()) # apply general reduction operations if reduction is not None: for ax,op in reduction.iteritems(): if isinstance(op, basestring): ensemble = getattr(ensemble,op)(axis=ax) elif isinstance(op, (int,np.integer,float,np.inexact)): ensemble = ensemble(**{ax:op}) # extract seasonal/climatological values/extrema if (ldataset and len(ensemble)==0): raise EmptyDatasetError(varlist) if not ldataset and any([len(ds)==0 for ds in ensemble]): raise EmptyDatasetError(ensemble) # N.B.: the operations below should work with Ensembles as well as Datasets if aggregation: method = aggregation if aggregation.isupper() else aggregation.title() if season is None: ensemble = getattr(ensemble,'clim'+method)(taxis='time', **kwargs) else: ensemble = getattr(ensemble,'seasonal'+method)(season=season, taxis='time', **kwargs) elif season: # but not aggregation ensemble = ensemble.seasonalSample(season=season) # return dataset return ensemble
def loadEnsembleTS(names=None, name=None, title=None, varlist=None, aggregation=None, season=None, prov=None, slices=None, obsslices=None, years=None, reduction=None, shape=None, station=None, constraints=None, filetypes=None, domain=None, ldataset=False, lcheckVar=False, lwrite=False, ltrimT=True, name_tags=None, dataset_mode='time-series', lminmax=False, master=None, lall=True, ensemble_list=None, ensemble_product='inner', lensembleAxis=False, WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, **kwargs): ''' a convenience function to load an ensemble of time-series, based on certain criteria; works with either stations or regions; seasonal/climatological aggregation is also supported ''' # prepare ensemble if varlist is not None: varlist = list(varlist)[:] # copy list if station: for var in stn_params: # necessary to select stations if var not in varlist: varlist.append(var) if shape: for var in shp_params: # necessary to select shapes if var not in varlist: varlist.append(var) # perpare ensemble and arguments if ldataset and ensemble_list: raise ArgumentError elif not ldataset: ensemble = Ensemble(name=name, title=title, basetype='Dataset') # expand argument list if ensemble_list is None: ensemble_list = ['names'] if not ldataset else None loadargs = expandArgumentList(names=names, station=station, prov=prov, shape=shape, varlist=varlist, mode=dataset_mode, filetypes=filetypes, domains=domain, lwrite=lwrite, slices=slices, obsslices=obsslices, name_tags=name_tags, ltrimT=ltrimT, years=years, expand_list=ensemble_list, lproduct=ensemble_product, lensembleAxis=lensembleAxis) for loadarg in loadargs: # clean up argumetns name = loadarg.pop('names',None); name_tag = loadarg.pop('name_tags',None) slcs = loadarg.pop('slices',None); obsslcs = loadarg.pop('obsslices',None) # load individual dataset dataset = loadDataset(name=name, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, **loadarg) if name_tag is not None: if name_tag[0] == '_': dataset.name += name_tag else: dataset.name = name_tag # apply slicing if obsslcs and ( dataset.name[:3].lower() == 'obs' or dataset.name.isupper() ): if slcs is None: slcs = obsslcs else: slcs.update(**obsslcs) # add special slices for obs # N.B.: currently VarNC's can only be sliced once, because we can't combine slices yet if slcs: dataset = dataset(lminmax=lminmax, **slcs) # slice immediately if not ldataset: ensemble += dataset.load() # load data and add to ensemble # if input was not a list, just return dataset if ldataset: ensemble = dataset.load() # load data # select specific stations (if applicable) if not ldataset and station and constraints: from datasets.EC import selectStations ensemble = selectStations(ensemble, stnaxis='station', master=master, linplace=False, lall=lall, lcheckVar=lcheckVar, **constraints) # make sure all have cluster meta data for varname in stn_params + shp_params: # find valid instance var = None for ds in ensemble: if varname in ds: var = ds[varname]; break # give to those who have not if var is not None: var.load() # load data and add as regular variable (not VarNC) for ds in ensemble: if varname not in ds: ds.addVariable(var.copy()) # apply general reduction operations if reduction is not None: for ax,op in reduction.iteritems(): if isinstance(op, basestring): ensemble = getattr(ensemble,op)(axis=ax) elif isinstance(op, (int,np.integer,float,np.inexact)): ensemble = ensemble(**{ax:op}) # extract seasonal/climatological values/extrema if (ldataset and len(ensemble)==0): raise EmptyDatasetError, varlist if not ldataset and any([len(ds)==0 for ds in ensemble]): raise EmptyDatasetError, ensemble # N.B.: the operations below should work with Ensembles as well as Datasets if aggregation: method = aggregation if aggregation.isupper() else aggregation.title() if season is None: ensemble = getattr(ensemble,'clim'+method)(taxis='time', **kwargs) else: ensemble = getattr(ensemble,'seasonal'+method)(season=season, taxis='time', **kwargs) elif season: # but not aggregation ensemble = ensemble.seasonalSample(season=season) # return dataset return ensemble
def __init__(self, inner_list=None, outer_list=None, **kwargs): ''' initialize an ensemble of HGS simulations based on HGS arguments and project descriptors; all keyword arguments are automatically expanded based on inner/outer product rules, defined using the inner_list/outer_list arguments; the expanded argument lists are used to initialize the individual ensemble members; note that a string substitution is applied to all folder variables (incl. 'rundir') prior to constructing the HGS instance, i.e. rundir.format(**kwargs) ''' self.lreport = kwargs.get('lreport', self.lreport) self.loverwrite = kwargs.get('loverwrite', self.loverwrite) self.lindicator = kwargs.get('lindicator', self.lindicator) self.lrunfailed = kwargs.get('lrunfailed', self.lrunfailed) self.lrestart = kwargs.get('lrestart', self.lrestart) # expand argument list (plain, nothing special) kwargs_list = expandArgumentList(inner_list=inner_list, outer_list=outer_list, **kwargs) # loop over ensemble members self.members = [] self.rundirs = [] self.hgsargs = [] # ensemble lists for kwargs in kwargs_list: # isolate folder variables and perform variable substitution for folder_type in ('rundir', 'template_folder', 'input_folder', 'pet_folder', 'precip_inc', 'pet_inc', 'ic_files'): if folder_type in kwargs: folder = kwargs[folder_type] if isinstance(folder, str): # perform keyword substitution with all available arguments if folder_type is 'ic_files': # we need to preserve '{FILETYPE}' for later kwargs[folder_type] = folder.format( FILETYPE='{FILETYPE}', **kwargs) else: kwargs[folder_type] = folder.format(**kwargs) elif folder is None: pass else: raise TypeError(folder) # check rundir rundir = kwargs['rundir'] kwargs[ 'restart'] = False # this keyword argument should be controlled by the Ensemble handler if rundir in self.rundirs: raise ArgumentError( "Multiple occurence of run directory:\n '{}'".format( rundir)) # figure out skipping if os.path.exists(rundir): if self.loverwrite: if self.lreport: print( ("Overwriting existing experiment folder '{:s}'.". format(rundir))) lskip = False elif self.lindicator and os.path.exists( '{}/SCHEDULED'.format(rundir)): if self.lreport: print( ("Skipping experiment folder '{:s}' (scheduled).". format(rundir))) lskip = True elif self.lindicator and os.path.exists( '{}/IN_PROGRESS'.format(rundir)): if self.lrestart: shutil.move(os.path.join(rundir, 'IN_PROGRESS'), os.path.join(rundir, 'RESTARTED')) if self.lreport: print(( "Restarting experiment in folder '{:s}' (was in progress)." .format(rundir))) lskip = False kwargs['restart'] = True else: if self.lreport: print(( "Skipping experiment folder '{:s}' (in progress)." .format(rundir))) lskip = True elif self.lindicator and os.path.exists( '{}/COMPLETED'.format(rundir)): if self.lreport: print( ("Skipping experiment folder '{:s}' (completed).". format(rundir))) lskip = True elif self.lindicator and os.path.exists( '{}/FAILED'.format(rundir)): # this should be the last option, so as to prevent overwriting data if self.lrunfailed: if self.lreport: print( ("Overwriting failed experiment folder '{:s}'." .format(rundir))) lskip = False # rundir will be deleted else: if self.lreport: print( ("Skipping experiment folder '{:s}' (failed).". format(rundir))) lskip = True else: # no/unknown indicator file if self.lreport: print( ("Overwriting existing experiment folder '{:s}'.". format(rundir))) lskip = False # rundir will be deleted else: if self.lreport: print(("Creating new experiment folder '{:s}'.".format( rundir))) lskip = False if not lskip: self.rundirs.append(rundir) # isolate HGS constructor arguments hgsargs = inspect.getargspec( HGS.__init__ ).args # returns args, varargs, kwargs, defaults hgsargs = { arg: kwargs[arg] for arg in hgsargs if arg in kwargs } self.hgsargs.append(hgsargs) # initialize HGS instance hgs = HGS(**hgsargs) self.members.append(hgs) # final check if len(self.members) == 0: raise EnsembleError("No experiments to run (empty list).")
def readRasterArray(file_pattern, lgzip=None, lgdal=True, dtype=np.float32, lmask=True, fillValue=None, lfeedback=False, lgeotransform=True, axes=None, lna=False, lskipMissing=False, path_params=None, **kwargs): ''' function to load a multi-dimensional numpy array from several structured ASCII raster files ''' if axes is None: raise NotImplementedError #TODO: implement automatic detection of axes arguments and axes order ## expand path argument and figure out dimensions # collect axes arguments shape = []; axes_kwargs = dict() for ax in axes: if ax not in kwargs: raise AxisError(ax) coord = kwargs.pop(ax) shape.append(len(coord)) axes_kwargs[ax] = coord assert len(axes) == len(shape) == len(axes_kwargs) shape = tuple(shape) #TODO: add handling of embedded inner product expansion # argument expansion using outer product file_kwargs_list = expandArgumentList(outer_list=axes, **axes_kwargs) assert np.prod(shape) == len(file_kwargs_list) ## load data from raster files and assemble array path_params = dict() if path_params is None else path_params.copy() # will be modified # find first valid 2D raster to determine shape i0 = 0 path_params.update(file_kwargs_list[i0]) # update axes parameters filepath = file_pattern.format(**path_params) # construct file name if not os.path.exists(filepath): if lskipMissing: # find first valid while not os.path.exists(filepath): i0 += 1 # go to next raster file if i0 >= len(file_kwargs_list): raise IOError("No valid input raster files found!\n'{}'".format(filepath)) if lfeedback: print ' ', path_params.update(file_kwargs_list[i0]) # update axes parameters filepath = file_pattern.format(**path_params) # nest in line else: # or raise error raise IOError(filepath) # read first 2D raster file data2D = readASCIIraster(filepath, lgzip=lgzip, lgdal=lgdal, dtype=dtype, lna=True, lmask=lmask, fillValue=fillValue, lgeotransform=lgeotransform, **kwargs) if lgeotransform: data2D, geotransform0, na = data2D else: data2D, na = data2D # we might still need na, but no need to check if it is the same shape2D = data2D.shape # get 2D raster shape for later use # allocate data array list_shape = (np.prod(shape),)+shape2D # assume 3D shape to concatenate 2D rasters if lmask: data = ma.empty(list_shape, dtype=dtype) if fillValue is None: data._fill_value = data2D._fill_value else: data._fill_value = fillValue data.mask = True # initialize everything as masked else: data = np.empty(list_shape, dtype=dtype) # allocate the array assert data.shape[0] == len(file_kwargs_list), (data.shape, len(file_kwargs_list)) # insert (up to) first raster before continuing if lskipMissing and i0 > 0: data[:i0,:,:] = ma.masked if lmask else fillValue # mask all invalid rasters up to first valid raster data[i0,:,:] = data2D # add first (valid) raster # loop over remaining 2D raster files for i,file_kwargs in enumerate(file_kwargs_list[i0:]): path_params.update(file_kwargs) # update axes parameters filepath = file_pattern.format(**path_params) # construct file name if os.path.exists(filepath): if lfeedback: print '.', # indicate data with bar/pipe # read 2D raster file data2D = readASCIIraster(filepath, lgzip=lgzip, lgdal=lgdal, dtype=dtype, lna=False, lmask=lmask, fillValue=fillValue, lgeotransform=lgeotransform, **kwargs) # check geotransform if lgeotransform: data2D, geotransform = data2D if not geotransform == geotransform0: raise AxisError(geotransform) # to make sure all geotransforms are identical! else: geotransform = None # size information if not shape2D == data2D.shape: raise AxisError(data2D.shape) # to make sure all geotransforms are identical! # insert 2D raster into 3D array data[i+i0,:,:] = data2D # raster shape has to match elif lskipMissing: # fill with masked values data[i+i0,:,:] = ma.masked # mask missing raster if lfeedback: print ' ', # indicate missing with dot else: raise IOError(filepath) # complete feedback with linebreak if lfeedback: print '' # reshape and check dimensions assert i+i0 == data.shape[0]-1, (i,i0) data = data.reshape(shape+shape2D) # now we have the full shape gc.collect() # remove duplicate data # return data and optional meta data if lgeotransform or lna: return_data = (data,) if lgeotransform: return_data += (geotransform,) if lna: return_data += (na,) else: return_data = data return return_data
def __init__(self, inner_list=None, outer_list=None, **kwargs): ''' initialize an ensemble of HGS simulations based on HGS arguments and project descriptors; all keyword arguments are automatically expanded based on inner/outer product rules, defined using the inner_list/outer_list arguments; the expanded argument lists are used to initialize the individual ensemble members; note that a string substitution is applied to all folder variables (incl. 'rundir') prior to constructing the HGS instance, i.e. rundir.format(**kwargs) ''' self.lreport = kwargs.get('lreport',self.lreport) self.loverwrite = kwargs.get('loverwrite',self.loverwrite) self.lindicator = kwargs.get('lindicator',self.lindicator) self.lrunfailed = kwargs.get('lrunfailed',self.lrunfailed) self.lrestart = kwargs.get('lrestart',self.lrestart) # expand argument list (plain, nothing special) kwargs_list = expandArgumentList(inner_list=inner_list, outer_list=outer_list, **kwargs) # loop over ensemble members self.members = []; self.rundirs = []; self.hgsargs = [] # ensemble lists for kwargs in kwargs_list: # isolate folder variables and perform variable substitution for folder_type in ('rundir','template_folder','input_folder','pet_folder','precip_inc','pet_inc','ic_files'): if folder_type in kwargs: folder = kwargs[folder_type] if isinstance(folder,str): # perform keyword substitution with all available arguments if folder_type is 'ic_files': # we need to preserve '{FILETYPE}' for later kwargs[folder_type] = folder.format(FILETYPE='{FILETYPE}', **kwargs) else: kwargs[folder_type] = folder.format(**kwargs) elif folder is None: pass else: raise TypeError(folder) # check rundir rundir = kwargs['rundir'] kwargs['restart'] = False # this keyword argument should be controlled by the Ensemble handler if rundir in self.rundirs: raise ArgumentError("Multiple occurence of run directory:\n '{}'".format(rundir)) # figure out skipping if os.path.exists(rundir): if self.loverwrite: if self.lreport: print(("Overwriting existing experiment folder '{:s}'.".format(rundir))) lskip = False elif self.lindicator and os.path.exists('{}/SCHEDULED'.format(rundir)): if self.lreport: print(("Skipping experiment folder '{:s}' (scheduled).".format(rundir))) lskip = True elif self.lindicator and os.path.exists('{}/IN_PROGRESS'.format(rundir)): if self.lrestart: shutil.move(os.path.join(rundir,'IN_PROGRESS'),os.path.join(rundir,'RESTARTED')) if self.lreport: print(("Restarting experiment in folder '{:s}' (was in progress).".format(rundir))) lskip = False kwargs['restart'] = True else: if self.lreport: print(("Skipping experiment folder '{:s}' (in progress).".format(rundir))) lskip = True elif self.lindicator and os.path.exists('{}/COMPLETED'.format(rundir)): if self.lreport: print(("Skipping experiment folder '{:s}' (completed).".format(rundir))) lskip = True elif self.lindicator and os.path.exists('{}/FAILED'.format(rundir)): # this should be the last option, so as to prevent overwriting data if self.lrunfailed: if self.lreport: print(("Overwriting failed experiment folder '{:s}'.".format(rundir))) lskip = False # rundir will be deleted else: if self.lreport: print(("Skipping experiment folder '{:s}' (failed).".format(rundir))) lskip = True else: # no/unknown indicator file if self.lreport: print(("Overwriting existing experiment folder '{:s}'.".format(rundir))) lskip = False # rundir will be deleted else: if self.lreport: print(("Creating new experiment folder '{:s}'.".format(rundir))) lskip = False if not lskip: self.rundirs.append(rundir) # isolate HGS constructor arguments hgsargs = inspect.getargspec(HGS.__init__).args # returns args, varargs, kwargs, defaults hgsargs = {arg:kwargs[arg] for arg in hgsargs if arg in kwargs} self.hgsargs.append(hgsargs) # initialize HGS instance hgs = HGS(**hgsargs) self.members.append(hgs) # final check if len(self.members) == 0: raise EnsembleError("No experiments to run (empty list).")
def loadShapeObservations(obs=None, seasons=None, basins=None, provs=None, shapes=None, stations=None, varlist=None, slices=None, aggregation='mean', dataset_mode='time-series', lWSC=True, WSC_period=None, shapetype=None, variable_list=None, basin_list=None, lforceList=True, obs_ts=None, obs_clim=None, name=None, title=None, obs_list=None, ensemble_list=None, ensemble_product='inner', **kwargs): ''' convenience function to load shape observations based on 'aggregation' and 'varlist' (mainly add WSC gage data) ''' if obs_list is None: obs_list = observational_datasets if name is None: name = 'obs' if title is None: title = 'Observations' # variables for which ensemble expansion is not supported not_supported = ('season','seasons','varlist','mode','dataset_mode','provs','basins','shapes',) # resolve variable list (no need to maintain order) if isinstance(varlist,str): varlist = [varlist] variables = set(shp_params) for name in varlist: if name in variable_list: variables.update(variable_list[name].vars) elif lforceList: raise VariableError("Variable list '{}' does not exist.".format(name)) else: variables.add(name) variables = list(variables) # determine if we need gage dataset lWSC = isinstance(basins,str) and any([var in WSC_vars for var in variables]) and lWSC # doesn't work if multiple basins are loaded # default obs list if obs is None: obs = ['Observations',] elif isinstance(obs,str): obs = [obs] elif isinstance(obs,tuple): obs = list(obs) elif not isinstance(obs,list): raise TypeError(obs) # configure slicing (extract basin/province/shape and period) expand_vars = ('basins','stations','provs','shapes','slices') # variables that need to be added to slices (and expanded first) if ensemble_list: expand_list = [varname for varname in expand_vars if varname in ensemble_list] if ensemble_list and expand_list: local_vars = locals(); exp_args = dict() for varname in expand_vars: # copy variables to expand right away exp_args[varname] = local_vars[varname] for varname in expand_list: # remove entries from ensemble expansion if varname != 'slices': ensemble_list.remove(varname) # only 'slices' will continue to be expanded if 'slices' not in ensemble_list: ensemble_list.append('slices') slices = [_configSlices(**arg_dict) for arg_dict in expandArgumentList(expand_list=expand_list, lproduct=ensemble_product, **exp_args)] else: slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, stations=stations, period=None) # substitute default observational dataset and seperate aggregation methods iobs = None; clim_ens = None for i,obs_name in reverse_enumerate(obs): # N.B.: we need to iterate in reverse order, so that deleting items does not interfere with the indexing if obs_name in obs_aliases or obs_name not in timeseries_datasets: if iobs is not None: raise ArgumentError("Can only resolve one default dataset: {}".format(obs)) if aggregation == 'mean' and seasons is None and obs_clim is not None: # remove dataset entry from list (and all the arguments) del obs[i]; iobs = i # remember position of default obs in ensemble clim_args = kwargs.copy(); slc = slices; shp = shapetype # clean up variables for ensemble expansion, if necessary if ensemble_list and ensemble_product.lower() == 'inner': if 'names' in ensemble_list: obs_names = [obs_clim] for arg in ensemble_list: if arg in ('slices','shape'): pass # dealt with separately elif arg in not_supported: raise ArgumentError("Expansion of keyword '{:s}' is currently not supported in ensemble expansion.".format(arg)) elif arg in kwargs: clim_args[arg] = kwargs[arg][iobs]; del kwargs[arg][iobs] else: raise ArgumentError("Keyword '{:s}' not found in keyword arguments.".format(arg)) if 'slices' in ensemble_list: slc = slices[iobs]; del slices[iobs] if 'shape' in ensemble_list: shp = shapetype[iobs]; del shapetype[iobs] clim_len = 1 # expect length of climatology ensemble else: obs_names = obs_clim # no name expansion clim_len = None # expect length of climatology ensemble for arg in ensemble_list: if arg in not_supported: raise ArgumentError("Expansion of keyword '{:s}' is currently not supported in ensemble expansion.".format(arg)) elif 'slices' in ensemble_list: l = len(slc) elif 'shape' in ensemble_list: l = len(shp) elif arg in clim_args: l = len(clim_args[arg]) else: raise ArgumentError("Keyword '{:s}' not found in keyword arguments.".format(arg)) if clim_len is None: clim_len = l elif l != clim_len: raise ArgumentError(arg,l,clim_len) elif ensemble_list and ensemble_product.lower() == 'outer': clim_len = 1 for arg in ensemble_list: if arg != 'names': assert isinstance(clim_args[arg],(list,tuple)), clim_args[arg] clim_len *= len(clim_args[arg]) obs_names = [obs_clim] if 'names' in ensemble_list else obs_clim else: obs_names = [obs_clim]; clim_len = 1 # now load climtology instead of time-series and skip aggregation try: clim_ens = loadEnsemble(names=obs_names, season=seasons, aggregation=None, slices=slc, varlist=variables, ldataset=False, dataset_mode='climatology', shape=shp, ensemble_list=ensemble_list, ensemble_product=ensemble_product, obs_list=obs_list, basin_list=basin_list, **clim_args) assert len(clim_ens) == clim_len, clim_ens except EmptyDatasetError: pass else: obs[i] = obs_ts # trivial: just substitute default name and load time-series # prepare and load ensemble of observations if len(obs) > 0: if len(obs) == 1 and ensemble_list and 'names' not in ensemble_list: obs = obs[0] try: obsens = loadEnsemble(names=obs, season=seasons, aggregation=aggregation, slices=slices, varlist=variables, ldataset=False, dataset_mode=dataset_mode, shape=shapetype, obs_list=obs_list, basin_list=basin_list, ensemble_list=ensemble_list, ensemble_product=ensemble_product, **kwargs) except EmptyDatasetError: obsens = Ensemble(name=name, title=title, obs_list=obs_list, basetype=Dataset) else: obsens = Ensemble(name=name, title=title, obs_list=obs_list, basetype=Dataset) # add default obs back in if they were removed earlier if clim_ens is not None: for clim_ds in clim_ens[::-1]: # add observations in correct order: adding backwards allows successive insertion ... obsens.insertMember(iobs,clim_ds) # ... at the point where the name block starts # load stream gage data from WSC; should not interfere with anything else; append to ensemble if lWSC: # another special case: river hydrographs from datasets.WSC import GageStationError, loadGageStation try: if aggregation is not None and seasons is None: dataset_mode = 'climatology' # handled differently with gage data if WSC_period is None: WSC_period = kwargs.get('obs_period',kwargs.get('period',None)) dataset = loadGageStation(basin=basins, varlist=['runoff'], aggregation=aggregation, period=WSC_period, mode=dataset_mode, filetype='monthly', basin_list=basin_list, lfill=True, lexpand=True) # always load runoff/discharge if seasons: method = aggregation if aggregation.isupper() else aggregation.title() if aggregation: dataset = getattr(dataset,'seasonal'+method)(season=seasons, taxis='time') else: dataset = dataset.seasonalSample(season=seasons) if slices is not None: dataset = dataset(**slices) # slice immediately obsens += dataset.load() except GageStationError: pass # just ignore, if gage station data is missing # return ensembles (will be wrapped in a list, if BatchLoad is used) return obsens
def readRasterArray(file_pattern, lgzip=None, lgdal=True, dtype=np.float32, lmask=True, fillValue=None, lfeedback=False, lgeotransform=True, axes=None, lna=False, lskipMissing=False, path_params=None, **kwargs): ''' function to load a multi-dimensional numpy array from several structured ASCII raster files ''' if axes is None: raise NotImplementedError #TODO: implement automatic detection of axes arguments and axes order ## expand path argument and figure out dimensions # collect axes arguments shape = [] axes_kwargs = dict() for ax in axes: if ax not in kwargs: raise AxisError(ax) coord = kwargs.pop(ax) shape.append(len(coord)) axes_kwargs[ax] = coord assert len(axes) == len(shape) == len(axes_kwargs) shape = tuple(shape) #TODO: add handling of embedded inner product expansion # argument expansion using outer product file_kwargs_list = expandArgumentList(outer_list=axes, **axes_kwargs) assert np.prod(shape) == len(file_kwargs_list) ## load data from raster files and assemble array path_params = dict() if path_params is None else path_params.copy( ) # will be modified # find first valid 2D raster to determine shape i0 = 0 path_params.update(file_kwargs_list[i0]) # update axes parameters filepath = file_pattern.format(**path_params) # construct file name if not os.path.exists(filepath): if lskipMissing: # find first valid while not os.path.exists(filepath): i0 += 1 # go to next raster file if i0 >= len(file_kwargs_list): raise IOError( "No valid input raster files found!\n'{}'".format( filepath)) if lfeedback: print ' ', path_params.update( file_kwargs_list[i0]) # update axes parameters filepath = file_pattern.format(**path_params) # nest in line else: # or raise error raise IOError(filepath) # read first 2D raster file data2D = readASCIIraster(filepath, lgzip=lgzip, lgdal=lgdal, dtype=dtype, lna=True, lmask=lmask, fillValue=fillValue, lgeotransform=lgeotransform, **kwargs) if lgeotransform: data2D, geotransform0, na = data2D else: data2D, na = data2D # we might still need na, but no need to check if it is the same shape2D = data2D.shape # get 2D raster shape for later use # allocate data array list_shape = (np.prod(shape), ) + shape2D # assume 3D shape to concatenate 2D rasters if lmask: data = ma.empty(list_shape, dtype=dtype) if fillValue is None: data._fill_value = data2D._fill_value else: data._fill_value = fillValue data.mask = True # initialize everything as masked else: data = np.empty(list_shape, dtype=dtype) # allocate the array assert data.shape[0] == len(file_kwargs_list), (data.shape, len(file_kwargs_list)) # insert (up to) first raster before continuing if lskipMissing and i0 > 0: data[: i0, :, :] = ma.masked if lmask else fillValue # mask all invalid rasters up to first valid raster data[i0, :, :] = data2D # add first (valid) raster # loop over remaining 2D raster files for i, file_kwargs in enumerate(file_kwargs_list[i0:]): path_params.update(file_kwargs) # update axes parameters filepath = file_pattern.format(**path_params) # construct file name if os.path.exists(filepath): if lfeedback: print '.', # indicate data with bar/pipe # read 2D raster file data2D = readASCIIraster(filepath, lgzip=lgzip, lgdal=lgdal, dtype=dtype, lna=False, lmask=lmask, fillValue=fillValue, lgeotransform=lgeotransform, **kwargs) # check geotransform if lgeotransform: data2D, geotransform = data2D if not geotransform == geotransform0: raise AxisError( geotransform ) # to make sure all geotransforms are identical! else: geotransform = None # size information if not shape2D == data2D.shape: raise AxisError( data2D.shape ) # to make sure all geotransforms are identical! # insert 2D raster into 3D array data[i + i0, :, :] = data2D # raster shape has to match elif lskipMissing: # fill with masked values data[i + i0, :, :] = ma.masked # mask missing raster if lfeedback: print ' ', # indicate missing with dot else: raise IOError(filepath) # complete feedback with linebreak if lfeedback: print '' # reshape and check dimensions assert i + i0 == data.shape[0] - 1, (i, i0) data = data.reshape(shape + shape2D) # now we have the full shape gc.collect() # remove duplicate data # return data and optional meta data if lgeotransform or lna: return_data = (data, ) if lgeotransform: return_data += (geotransform, ) if lna: return_data += (na, ) else: return_data = data return return_data