def testLoadStandardDeviation(self): ''' test station data load functions (ensemble and list) ''' from datasets.common import loadEnsembleTS # just a random function call that exposes a bug in Numpy's nanfunctions.py slices = {'shape_name': 'FRB', 'years': (1979, 1994)} loadEnsembleTS(names='CRU', season=None, aggregation='SEM', slices=slices, varlist=['precip'], shape='shpavg', ldataset=True)
def testBasicLoadEnsembleTS(self): ''' test station data load functions (ensemble and list) ''' from datasets.common import loadEnsembleTS # test list expansion of ensembles loading names = ['EC', 'phys-ens']; varlist = ['MaxPrecip_1d'] prov = ['BC','AB']; season = ['summer','winter']; mode = ['max','min'] constraints = dict(min_len=50, lat=(50,55), max_zerr=300, prov=('AB','BC')) enslst = loadEnsembleTS(names=names, prov=prov, season=season, mode=mode, station='ecprecip', constraints=constraints, varlist=varlist, filetypes=['hydro'], domain=2, load_list=[('mode','season'),'prov',], lproduct='outer', lwrite=False, lensembleAxis=True) assert len(enslst) == 4 assert all(isinstance(ens, Ensemble) for ens in enslst) assert all(ens.basetype.__name__ == 'Dataset' for ens in enslst) assert all(ens.hasVariable(varlist[0]) for ens in enslst) assert all(ens.hasAxis('ensemble') for ens in enslst) assert all('EC' in ens for ens in enslst) # test simple ensemble with basins names = ['GPCC', 'phys-ens_d01','max-ens-2100']; varlist = ['precip'] aggregation = None; slices = dict(shape_name='ARB'); obsslices = dict(years=(1939,1945)) shpens = loadEnsembleTS(names=names, season=None, shape='shpavg', aggregation=aggregation, slices=slices, varlist=varlist, filetypes=['hydro'], obsslices=obsslices) assert isinstance(shpens, Ensemble) assert shpens.basetype.__name__ == 'Dataset' assert all(shpens.hasVariable(varlist[0])) assert names[0] in shpens assert len(shpens[names[0]].time) == 72 # time-series assert len(shpens[names[-1]].time) == 720 # ensemble assert all('ARB' == ds.atts.shape_name for ds in shpens)
def testBasicLoadEnsembleTS(self): ''' test station data load functions (ensemble and list) ''' from datasets.common import loadEnsembleTS # test list expansion of ensembles loading names = ['EC', 'phys-ens'] varlist = ['MaxPrecip_1d'] prov = ['BC', 'AB'] season = ['summer', 'winter'] mode = ['max', 'min'] constraints = dict(min_len=50, lat=(50, 55), max_zerr=300, prov=('AB', 'BC')) enslst = loadEnsembleTS(names=names, prov=prov, season=season, mode=mode, station='ecprecip', constraints=constraints, varlist=varlist, filetypes=['hydro'], domain=2, load_list=[ ('mode', 'season'), 'prov', ], lproduct='outer', lwrite=False, lensembleAxis=True) assert len(enslst) == 4 assert all(isinstance(ens, Ensemble) for ens in enslst) assert all(ens.basetype.__name__ == 'Dataset' for ens in enslst) assert all(ens.hasVariable(varlist[0]) for ens in enslst) assert all(ens.hasAxis('ensemble') for ens in enslst) assert all('EC' in ens for ens in enslst) # test simple ensemble with basins names = ['GPCC', 'phys-ens_d01', 'max-ens-2100'] varlist = ['precip'] aggregation = None slices = dict(shape_name='ARB') obsslices = dict(years=(1939, 1945)) shpens = loadEnsembleTS(names=names, season=None, shape='shpavg', aggregation=aggregation, slices=slices, varlist=varlist, filetypes=['hydro'], obsslices=obsslices) assert isinstance(shpens, Ensemble) assert shpens.basetype.__name__ == 'Dataset' assert all(shpens.hasVariable(varlist[0])) assert names[0] in shpens assert len(shpens[names[0]].time) == 72 # time-series assert len(shpens[names[-1]].time) == 720 # ensemble assert all('ARB' == ds.atts.shape_name for ds in shpens)
def loadShapeObservations(obs=None, seasons=None, basins=None, provs=None, shapes=None, varlist=None, slices=None, aggregation='mean', shapetype=None, period=None, variable_list=None, **kwargs): ''' convenience function to load shape observations; the main function is to select sensible defaults based on 'varlist', if no 'obs' are specified ''' # prepare arguments if shapetype is None: shapetype = 'shpavg' # really only one in use # resolve variable list (no need to maintain order) if isinstance(varlist,basestring): varlist = [varlist] variables = set(shp_params) for name in varlist: if name in variable_list: variables.update(variable_list[name].vars) else: variables.add(name) variables = list(variables) # figure out default datasets if obs is None: obs = 'Observations' lUnity = lCRU = lWSC = False if obs[:3].lower() in ('obs','wsc'): if any(var in CRU_vars for var in variables): if aggregation == 'mean' and seasons is None: lUnity = True; obs = [] if basins and any([var in WSC_vars for var in variables]): if aggregation.lower() in ('mean','std','sem','min','max') and seasons is None: lWSC = True; obs = [] if not isinstance(obs,(list,tuple)): obs = (obs,) # configure slicing (extract basin/province/shape and period) slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, period=period) if slices is not None: noyears = slices.copy(); noyears.pop('years',None) # slices for climatologies # prepare and load ensemble of observations obsens = Ensemble(name='obs',title='Observations', basetype=Dataset) if len(obs) > 0: # regular operations with user-defined dataset try: ensemble = loadEnsembleTS(names=obs, season=seasons, aggregation=aggregation, slices=slices, varlist=variables, shape=shapetype, ldataset=False, **kwargs) for ens in ensemble: obsens += ens except EmptyDatasetError: pass if lUnity: # load Unity data instead of averaging CRU data if period is None: period = (1979,1994) dataset = loadDataset(name='Unity', varlist=variables, mode='climatology', period=period, shape=shapetype) if slices is not None: dataset = dataset(**noyears) # slice immediately obsens += dataset.load() if lCRU: # this is basically regular operations with CRU as default obsens += loadEnsembleTS(names='CRU', season=seasons, aggregation=aggregation, slices=slices, varlist=variables, shape=shapetype, ldataset=True, **kwargs) if lWSC: # another special case: river hydrographs # from datasets.WSC import loadGageStation, GageStationError try: dataset = loadGageStation(basin=basins, varlist=['runoff'], aggregation=aggregation, mode='climatology', filetype='monthly') if slices is not None: dataset = dataset(**noyears) # slice immediately obsens += dataset.load() except GageStationError: pass # just ignore, if gage station if data is missing # return ensembles (will be wrapped in a list, if BatchLoad is used) return obsens
def loadStationEnsemble(seasons=None, provs=None, clusters=None, varlist=None, aggregation='mean', constraints=None, filetypes=None, cluster_name=None, stationtype=None, load_list=None, lproduct='outer', WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, variable_list=None, default_constraints=None, **kwargs): ''' convenience function to load station data for ensembles (in Ensemble container); kwargs are passed to loadEnsembleTS ''' load_list = [] if load_list is None else load_list[:] # use a copy, since the list may be modified #XXX: move this into helper function with Batch-decorator to allow batch-loadign of varlists # figure out varlist if isinstance(varlist,basestring) and not stationtype: if varlist.lower().find('prec') >= 0: stationtype = 'ecprecip' elif varlist.lower().find('temp') >= 0: stationtype = 'ectemp' else: raise ArgumentError, varlist if not isinstance(stationtype,basestring): raise ArgumentError, stationtype # not inferred if clusters and not cluster_name: raise ArgumentError params = stn_params + [cluster_name] if cluster_name else stn_params # need to load cluster_name! variables, filetypes = _resolveVarlist(varlist=varlist, filetypes=filetypes, params=params, variable_list=variable_list) # prepare arguments if provs or clusters: if constraints is None: constraints = default_constraints.copy() constraint_list = [] if 'provs' in load_list and 'clusters' in load_list: raise ArgumentError, "Cannot expand 'provs' and 'clusters' at the same time." # figure out proper handling of provinces if provs: if 'prov' not in load_list: constraints['prov'] = provs; provs = None else: if len(constraint_list) > 0: raise ArgumentError, "Cannot expand multiple keyword-constraints at once." for prov in provs: tmp = constraints.copy() tmp['prov'] = prov constraint_list.append(tmp) load_list[load_list.index('prov')] = 'constraints' constraints = constraint_list; provs = None # and analogously, handling of clusters! if clusters: if 'cluster' not in load_list: constraints['cluster'] = clusters; clusters = None else: if len(constraint_list) > 0: raise ArgumentError, "Cannot expand multiple keyword-constraints at once." for cluster in clusters: tmp = constraints.copy() tmp['cluster'] = cluster if cluster_name: tmp['cluster_name'] = cluster_name # will be expanded next to cluster index constraint_list.append(tmp) load_list[load_list.index('cluster')] = 'constraints' constraints = constraint_list; clusters = None # load ensemble (no iteration here) stnens = loadEnsembleTS(season=seasons, prov=provs, station=stationtype, varlist=variables, aggregation=aggregation, constraints=constraints, filetypes=filetypes, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, load_list=load_list, lproduct=lproduct, lcheckVar=False, **kwargs) # return ensembles (will be wrapped in a list, if BatchLoad is used) return stnens
def loadShapeEnsemble(seasons=None, basins=None, provs=None, shapes=None, varlist=None, aggregation='mean', slices=None, shapetype=None, filetypes=None, period=None, variable_list=None, WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, **kwargs): ''' convenience function to load shape ensembles (in Ensemble container); kwargs are passed to loadEnsembleTS ''' # prepare arguments if shapetype is None: shapetype = 'shpavg' # really only one in use variables, filetypes = _resolveVarlist(varlist=varlist, filetypes=filetypes, params=shp_params, variable_list=variable_list) # configure slicing (extract basin/province/shape and period) slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, period=period) # load ensemble (no iteration here) shpens = loadEnsembleTS(season=seasons, slices=slices, varlist=variables, shape=shapetype, aggregation=aggregation, filetypes=filetypes, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, **kwargs) # return ensembles (will be wrapped in a list, if BatchLoad is used) return shpens
def testAdvancedLoadEnsembleTS(self): ''' test station data load functions (ensemble and list) ''' from datasets.common import loadEnsembleTS lwrite = False # test ensemble (inner) list expansion names = 'CRU'; varlist = ['precip']; slices = dict(shape_name='FRB'); obsslices = [dict(years=(1914,1918)), dict(years=(1939,1945))] name_tags = ['_1914','_1939'] shpens = loadEnsembleTS(names=names, shape='shpavg', name_tags=name_tags, obsslices=obsslices, slices=slices, varlist=varlist, filetypes=['hydro'], aggregation=None, season=None, ensemble_list=['obsslices', 'name_tags']) assert isinstance(shpens, Ensemble) assert shpens.basetype.__name__ == 'Dataset' assert all(shpens.hasVariable(varlist[0])) assert all('CRU' == ds.name[:3] for ds in shpens) assert len(shpens['CRU_1914'].time) == 48 # time-series assert len(shpens['CRU_1939'].time) == 72 # time-series assert all('FRB' == ds.atts.shape_name for ds in shpens) # test ensemble (inner) list expansion with outer list expansion varlist = ['MaxPrecip_1d']; constraints = dict(min_len=50, lat=(50,55), max_zerr=300,) # inner expansion names = ['EC', 'EC', 'erai-max']; name_tags = ['_1990','_1940','WRF_1990'] obsslices = [dict(years=(1929,1945)), dict(years=(1979,1995)), dict()] # outer expansion prov = ['BC','AB']; season = ['summer','winter']; mode = ['max'] # load data enslst = loadEnsembleTS(names=names, prov=prov, season=season, mode=mode, station='ecprecip', constraints=constraints, name_tags=name_tags, obsslices=obsslices, domain=2, filetypes=['hydro'], varlist=varlist, ensemble_product='inner', ensemble_list=['names','name_tags','obsslices',], lwrite=lwrite, load_list=['mode','season','prov',], lproduct='outer',) assert len(enslst) == 4 assert all(isinstance(ens, Ensemble) for ens in enslst) assert all(ens.basetype.__name__ == 'Dataset' for ens in enslst) assert all(ens.hasVariable(varlist[0]) for ens in enslst) assert all('EC_1990' in ens for ens in enslst) assert all('EC_1940' in ens for ens in enslst) assert all('WRF_1990' in ens for ens in enslst) # add CVDP data cvdp = loadEnsembleTS(names=names, prov=prov, season=season, mode=mode, name_tags=name_tags, obsslices=obsslices, varlist=['PDO'], ensemble_product='inner', ensemble_list=['names','name_tags','obsslices',], lwrite=lwrite, load_list=['mode','season','prov',], lproduct='outer', dataset_mode='CVDP') assert all(ens.hasVariable('PDO') for ens in enslst) # add PDO time-series to datasets for ts,cv in zip(enslst,cvdp): ts.addVariable(cv.PDO, lautoTrim=True) all(ens.hasVariable('PDO') for ens in enslst) # test slicing by PDO ds = enslst[0]['WRF_1990'] assert ds(PDO=(-1,0.), lminmax=True) ## some debugging test # NetCDF datasets to add cluster_id to wrfensnc = ['max-ctrl','max-ens-A','max-ens-B','max-ens-C', # Ensembles don't have unique NetCDF files 'max-ctrl-2050','max-ens-A-2050','max-ens-B-2050','max-ens-C-2050', 'max-ctrl-2100','max-ens-A-2100','max-ens-B-2100','max-ens-C-2100',] wrfensnc = loadEnsembleTS(names=wrfensnc, name='WRF_NC', title=None, varlist=None, station='ecprecip', filetypes=['hydro'], domain=2, lwrite=lwrite) # climatology constraints = dict() constraints['min_len'] = 10 # for valid climatology constraints['lat'] = (45,60) #constraints['max_zerr'] = 100 # can't use this, because we are loading EC data separately from WRF constraints['prov'] = ('BC','AB') wrfens = loadEnsembleTS(names=['max-ens','max-ens-2050','max-ens-2100'], name='WRF', title=None, varlist=None, aggregation='mean', station='ecprecip', constraints=constraints, filetypes=['hydro'], domain=2, lwrite=False) wrfens = wrfens.copy(asNC=False) # read-only DatasetNetCDF can't add new variables (not as VarNC, anyway...)
def loadShapeObservations(obs=None, seasons=None, basins=None, provs=None, shapes=None, varlist=None, slices=None, aggregation='mean', shapetype=None, period=None, variable_list=None, **kwargs): ''' convenience function to load shape observations; the main function is to select sensible defaults based on 'varlist', if no 'obs' are specified ''' # prepare arguments if shapetype is None: shapetype = 'shpavg' # really only one in use # resolve variable list (no need to maintain order) if isinstance(varlist, basestring): varlist = [varlist] variables = set(shp_params) for name in varlist: if name in variable_list: variables.update(variable_list[name].vars) else: variables.add(name) variables = list(variables) # figure out default datasets if obs is None: obs = 'Observations' lUnity = lCRU = lWSC = False if obs[:3].lower() in ('obs', 'wsc'): if any(var in CRU_vars for var in variables): if aggregation == 'mean' and seasons is None: lUnity = True obs = [] if basins and any([var in WSC_vars for var in variables]): if aggregation.lower() in ('mean', 'std', 'sem', 'min', 'max') and seasons is None: lWSC = True obs = [] if not isinstance(obs, (list, tuple)): obs = (obs, ) # configure slicing (extract basin/province/shape and period) slices = _configSlices(slices=slices, basins=basins, provs=provs, shapes=shapes, period=period) if slices is not None: noyears = slices.copy() noyears.pop('years', None) # slices for climatologies # prepare and load ensemble of observations obsens = Ensemble(name='obs', title='Observations', basetype=Dataset) if len(obs) > 0: # regular operations with user-defined dataset try: ensemble = loadEnsembleTS(names=obs, season=seasons, aggregation=aggregation, slices=slices, varlist=variables, shape=shapetype, ldataset=False, **kwargs) for ens in ensemble: obsens += ens except EmptyDatasetError: pass if lUnity: # load Unity data instead of averaging CRU data if period is None: period = (1979, 1994) dataset = loadDataset(name='Unity', varlist=variables, mode='climatology', period=period, shape=shapetype) if slices is not None: dataset = dataset(**noyears) # slice immediately obsens += dataset.load() if lCRU: # this is basically regular operations with CRU as default obsens += loadEnsembleTS(names='CRU', season=seasons, aggregation=aggregation, slices=slices, varlist=variables, shape=shapetype, ldataset=True, **kwargs) if lWSC: # another special case: river hydrographs # from datasets.WSC import loadGageStation, GageStationError try: dataset = loadGageStation(basin=basins, varlist=['runoff'], aggregation=aggregation, mode='climatology', filetype='monthly') if slices is not None: dataset = dataset(**noyears) # slice immediately obsens += dataset.load() except GageStationError: pass # just ignore, if gage station data is missing # return ensembles (will be wrapped in a list, if BatchLoad is used) return obsens
def loadStationEnsemble(seasons=None, provs=None, clusters=None, varlist=None, aggregation='mean', constraints=None, filetypes=None, cluster_name=None, stationtype=None, load_list=None, lproduct='outer', WRF_exps=None, CESM_exps=None, WRF_ens=None, CESM_ens=None, variable_list=None, default_constraints=None, **kwargs): ''' convenience function to load station data for ensembles (in Ensemble container); kwargs are passed to loadEnsembleTS ''' load_list = [] if load_list is None else load_list[:] # use a copy, since the list may be modified #XXX: move this into helper function with Batch-decorator to allow batch-loadign of varlists # figure out varlist if isinstance(varlist, basestring) and not stationtype: if varlist.lower().find('prec') >= 0: stationtype = 'ecprecip' elif varlist.lower().find('temp') >= 0: stationtype = 'ectemp' else: raise ArgumentError, varlist if not isinstance(stationtype, basestring): raise ArgumentError, stationtype # not inferred if clusters and not cluster_name: raise ArgumentError params = stn_params + [ cluster_name ] if cluster_name else stn_params # need to load cluster_name! variables, filetypes = _resolveVarlist(varlist=varlist, filetypes=filetypes, params=params, variable_list=variable_list) # prepare arguments if provs or clusters: if constraints is None: constraints = default_constraints.copy() constraint_list = [] if 'provs' in load_list and 'clusters' in load_list: raise ArgumentError, "Cannot expand 'provs' and 'clusters' at the same time." # figure out proper handling of provinces if provs: if 'prov' not in load_list: constraints['prov'] = provs provs = None else: if len(constraint_list) > 0: raise ArgumentError, "Cannot expand multiple keyword-constraints at once." for prov in provs: tmp = constraints.copy() tmp['prov'] = prov constraint_list.append(tmp) load_list[load_list.index('prov')] = 'constraints' constraints = constraint_list provs = None # and analogously, handling of clusters! if clusters: if 'cluster' not in load_list: constraints['cluster'] = clusters clusters = None else: if len(constraint_list) > 0: raise ArgumentError, "Cannot expand multiple keyword-constraints at once." for cluster in clusters: tmp = constraints.copy() tmp['cluster'] = cluster if cluster_name: tmp['cluster_name'] = cluster_name # will be expanded next to cluster index constraint_list.append(tmp) load_list[load_list.index('cluster')] = 'constraints' constraints = constraint_list clusters = None # load ensemble (no iteration here) stnens = loadEnsembleTS(season=seasons, prov=provs, station=stationtype, varlist=variables, aggregation=aggregation, constraints=constraints, filetypes=filetypes, WRF_exps=WRF_exps, CESM_exps=CESM_exps, WRF_ens=WRF_ens, CESM_ens=CESM_ens, load_list=load_list, lproduct=lproduct, lcheckVar=False, **kwargs) # return ensembles (will be wrapped in a list, if BatchLoad is used) return stnens
def testAdvancedLoadEnsembleTS(self): ''' test station data load functions (ensemble and list) ''' from datasets.common import loadEnsembleTS lwrite = False # test ensemble (inner) list expansion names = 'CRU' varlist = ['precip'] slices = dict(shape_name='FRB') obsslices = [dict(years=(1914, 1918)), dict(years=(1939, 1945))] name_tags = ['_1914', '_1939'] shpens = loadEnsembleTS(names=names, shape='shpavg', name_tags=name_tags, obsslices=obsslices, slices=slices, varlist=varlist, filetypes=['hydro'], aggregation=None, season=None, ensemble_list=['obsslices', 'name_tags']) assert isinstance(shpens, Ensemble) assert shpens.basetype.__name__ == 'Dataset' assert all(shpens.hasVariable(varlist[0])) assert all('CRU' == ds.name[:3] for ds in shpens) assert len(shpens['CRU_1914'].time) == 48 # time-series assert len(shpens['CRU_1939'].time) == 72 # time-series assert all('FRB' == ds.atts.shape_name for ds in shpens) # test ensemble (inner) list expansion with outer list expansion varlist = ['MaxPrecip_1d'] constraints = dict( min_len=50, lat=(50, 55), max_zerr=300, ) # inner expansion names = ['EC', 'EC', 'erai-max'] name_tags = ['_1990', '_1940', 'WRF_1990'] obsslices = [ dict(years=(1929, 1945)), dict(years=(1979, 1995)), dict() ] # outer expansion prov = ['BC', 'AB'] season = ['summer', 'winter'] mode = ['max'] # load data enslst = loadEnsembleTS( names=names, prov=prov, season=season, mode=mode, station='ecprecip', constraints=constraints, name_tags=name_tags, obsslices=obsslices, domain=2, filetypes=['hydro'], varlist=varlist, ensemble_product='inner', ensemble_list=[ 'names', 'name_tags', 'obsslices', ], lwrite=lwrite, load_list=[ 'mode', 'season', 'prov', ], lproduct='outer', ) assert len(enslst) == 4 assert all(isinstance(ens, Ensemble) for ens in enslst) assert all(ens.basetype.__name__ == 'Dataset' for ens in enslst) assert all(ens.hasVariable(varlist[0]) for ens in enslst) assert all('EC_1990' in ens for ens in enslst) assert all('EC_1940' in ens for ens in enslst) assert all('WRF_1990' in ens for ens in enslst) # add CVDP data cvdp = loadEnsembleTS(names=names, prov=prov, season=season, mode=mode, name_tags=name_tags, obsslices=obsslices, varlist=['PDO'], ensemble_product='inner', ensemble_list=[ 'names', 'name_tags', 'obsslices', ], lwrite=lwrite, load_list=[ 'mode', 'season', 'prov', ], lproduct='outer', dataset_mode='CVDP') assert all(ens.hasVariable('PDO') for ens in enslst) # add PDO time-series to datasets for ts, cv in zip(enslst, cvdp): ts.addVariable(cv.PDO, lautoTrim=True) all(ens.hasVariable('PDO') for ens in enslst) # test slicing by PDO ds = enslst[0]['WRF_1990'] assert ds(PDO=(-1, 0.), lminmax=True) ## some debugging test # NetCDF datasets to add cluster_id to wrfensnc = [ 'max-ctrl', 'max-ens-A', 'max-ens-B', 'max-ens-C', # Ensembles don't have unique NetCDF files 'max-ctrl-2050', 'max-ens-A-2050', 'max-ens-B-2050', 'max-ens-C-2050', 'max-ctrl-2100', 'max-ens-A-2100', 'max-ens-B-2100', 'max-ens-C-2100', ] wrfensnc = loadEnsembleTS(names=wrfensnc, name='WRF_NC', title=None, varlist=None, station='ecprecip', filetypes=['hydro'], domain=2, lwrite=lwrite) # climatology constraints = dict() constraints['min_len'] = 10 # for valid climatology constraints['lat'] = (45, 60) #constraints['max_zerr'] = 100 # can't use this, because we are loading EC data separately from WRF constraints['prov'] = ('BC', 'AB') wrfens = loadEnsembleTS( names=['max-ens', 'max-ens-2050', 'max-ens-2100'], name='WRF', title=None, varlist=None, aggregation='mean', station='ecprecip', constraints=constraints, filetypes=['hydro'], domain=2, lwrite=False) wrfens = wrfens.copy( asNC=False ) # read-only DatasetNetCDF can't add new variables (not as VarNC, anyway...)