Example #1
0
def loadNARR_LTM(name=dataset_name, varlist=None, grid=None, interval='monthly', varatts=None, filelist=None, folder=ltmfolder):
  ''' Get a properly formatted dataset of daily or monthly NARR climatologies (LTM). '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # prepare input
    if varatts is None: varatts = ltmvaratts.copy()
    if varlist is None: varlist = ltmvarlist
    if interval == 'monthly': 
      pfx = '.mon.ltm.nc'; tlen = 12
    elif interval == 'daily': 
      pfx = '.day.ltm.nc'; tlen = 365
    else: raise DatasetError, "Selected interval '%s' is not supported!"%interval
    # translate varlist
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)  
    # axes dictionary, primarily to override time axis 
    axes = dict(time=Axis(name='time',units='day',coord=(1,tlen,tlen)),load=True)
    if filelist is None: # generate default filelist
      filelist = [special[var]+pfx if var in special else var+pfx for var in varlist if var not in nofile]
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, 
                            axes=axes, atts=projdict, multifile=False, ncformat='NETCDF4_CLASSIC')
    # add projection
    projection = getProjFromDict(projdict, name='{0:s} Coordinate System'.format(name))
    dataset = addGDALtoDataset(dataset, projection=projection, geotransform=None, folder=grid_folder)
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    raise NotImplementedError, "Need to implement loading neatly formatted and regridded time-series!"
  # return formatted dataset
  return dataset
Example #2
0
def loadGPCC_TS(name=dataset_name, grid=None, varlist=None, resolution='25', varatts=None, filelist=None, 
                folder=None, lautoregrid=None):
  ''' Get a properly formatted dataset with the monthly GPCC time-series. '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # prepare input  
    if resolution not in ('05', '10', '25'): raise DatasetError, "Selected resolution '%s' is not available!"%resolution
    # translate varlist
    if varatts is None: varatts = tsvaratts.copy()
    if varlist is None: varlist = varatts.keys()
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    if filelist is None: # generate default filelist
      filelist = []
      if 'p' in varlist: filelist.append(orig_ts_file.format('precip',resolution))
      if 's' in varlist: filelist.append(orig_ts_file.format('statio',resolution))
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, multifile=False, ncformat='NETCDF4_CLASSIC')
    # replace time axis with number of month since Jan 1979 
    data = np.arange(0,len(dataset.time),1, dtype='int16') + (1901-1979)*12 # month since 1979 (Jan 1979 = 0)
    timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01'))
    dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
    # add GDAL info
    dataset = addGDALtoDataset(dataset, projection=None, geotransform=None)
    # N.B.: projection should be auto-detected as geographic
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    grid, resolution = checkGridRes(grid, resolution, period=None, lclim=False)
    dataset = loadObservations(name=name, folder=folder, projection=None, resolution=resolution, grid=grid, 
                               period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, 
                               filelist=filelist, lautoregrid=lautoregrid, mode='time-series')
  # return formatted dataset
  return dataset
Example #3
0
File: CRU.py Project: aerler/GeoPy
def loadCRU_TS(name=dataset_name, grid=None, varlist=None, resolution=None, varatts=None, filelist=None, 
               folder=None, lautoregrid=None):
  ''' Get a properly formatted  CRU dataset with monthly mean time-series. '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # translate varlist
    if varatts is None: varatts = tsvaratts.copy()
    if varlist is None: varlist = varatts.keys()
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    # assemble filelist
    if filelist is None: # generate default filelist
      filelist = [orig_ts_file.format(var) for var in varlist if var not in nofile]
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, 
                            multifile=False, ncformat='NETCDF4_CLASSIC')
    # replace time axis with number of month since Jan 1979 
    data = np.arange(0,len(dataset.time),1, dtype='int16') + (1901-1979)*12 # month since 1979 (Jan 1979 = 0)
    timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01'))
    dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
    # add projection  
    dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder)
    # N.B.: projection should be auto-detected as geographic    
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    dataset = loadObservations(name=name, folder=folder, projection=None, resolution=None, grid=grid, 
                               period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, 
                               filelist=filelist, lautoregrid=lautoregrid, mode='time-series')
  # return formatted dataset
  return dataset
Example #4
0
def loadNARR_TS(name=dataset_name, grid=None, varlist=None, resolution=None, varatts=None, filelist=None, 
               folder=None, lautoregrid=None):
  ''' Get a properly formatted NARR dataset with monthly mean time-series. '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # translate varlist
    if varatts is None: varatts = tsvaratts.copy()
    if varlist is None: varlist = tsvarlist
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    if filelist is None: # generate default filelist
      filelist = [orig_ts_file.format(special[var]) if var in special else orig_ts_file.format(var) for var in varlist 
                  if var not in nofile and var in varatts]
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, 
                            atts=projdict, multifile=False, ncformat='NETCDF4_CLASSIC')
    # replace time axis with number of month since Jan 1979 
    data = np.arange(0,len(dataset.time),1, dtype='int16') # month since 1979 (Jan 1979 = 0)
    timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01'))
    dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
    # add projection
    projection = getProjFromDict(projdict, name='{0:s} Coordinate System'.format(name))
    dataset = addGDALtoDataset(dataset, projection=projection, geotransform=None, gridfolder=grid_folder)
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    dataset = loadObservations(name=name, folder=folder, projection=None, resolution=None, grid=grid, 
                               period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, 
                               filelist=filelist, lautoregrid=lautoregrid, mode='time-series')
  # return formatted dataset
  return dataset
Example #5
0
def loadPCIC_LTM(name=dataset_name,
                 varlist=None,
                 varatts=ltmvaratts,
                 filelist=None,
                 folder=ltmfolder):
    ''' Get a properly formatted dataset the monthly PCIC PRISM climatology. '''
    # translate varlist
    if varlist is None: varlist = varatts.keys()
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    # generate file list
    filelist = [
        ltmfile.format(var) for var in varlist
        if var not in ('time', 'lat', 'lon')
    ]
    # load variables separately
    dataset = DatasetNetCDF(name=name,
                            folder=folder,
                            filelist=filelist,
                            varlist=varlist,
                            varatts=varatts,
                            ncformat='NETCDF4')
    dataset = addGDALtoDataset(dataset,
                               projection=None,
                               geotransform=None,
                               gridfolder=grid_folder)
    # N.B.: projection should be auto-detected as geographic
    # return formatted dataset
    return dataset
Example #6
0
File: GPCC.py Project: aerler/GeoPy
def loadGPCC_LTM(
    name=dataset_name, varlist=None, resolution="025", varatts=ltmvaratts, filelist=None, folder=ltmfolder
):
    """ Get a properly formatted dataset the monthly accumulated GPCC precipitation climatology. """
    # prepare input
    if resolution not in ("025", "05", "10", "25"):
        raise DatasetError, "Selected resolution '%s' is not available!" % resolution
    # translate varlist
    if varlist is None:
        varlist = varatts.keys()
    if varlist and varatts:
        varlist = translateVarNames(varlist, varatts)
    # load variables separately
    if "p" in varlist:
        dataset = DatasetNetCDF(
            name=name,
            folder=folder,
            filelist=["normals_v2011_%s.nc" % resolution],
            varlist=["p"],
            varatts=varatts,
            ncformat="NETCDF4_CLASSIC",
        )
    if "s" in varlist:
        gauges = nc.Dataset(folder + "normals_gauges_v2011_%s.nc" % resolution, mode="r", format="NETCDF4_CLASSIC")
        stations = Variable(data=gauges.variables["p"][0, :, :], axes=(dataset.lat, dataset.lon), **varatts["s"])
        # consolidate dataset
        dataset.addVariable(stations, asNC=False, copy=True)
    dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder)
    # N.B.: projection should be auto-detected as geographic
    # return formatted dataset
    return dataset
Example #7
0
def loadCFSR_TS(name=dataset_name, grid=None, varlist=None, varatts=None, resolution='hires', 
                filelist=None, folder=None, lautoregrid=None):
  ''' Get a properly formatted CFSR dataset with monthly mean time-series. '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # translate varlist
    if varatts is None: varatts = tsvaratts.copy()
    if varlist is None:
      if resolution == 'hires' or resolution == '03' or resolution == '031': varlist = varlist_hires
      elif resolution == 'lowres' or resolution == '05': varlist = varlist_lowres     
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    if filelist is None: # generate default filelist
      if resolution == 'hires' or resolution == '03' or resolution == '031': 
        files = [hiresfiles[var] for var in varlist if var in hiresfiles]
      elif resolution == 'lowres' or resolution == '05': 
        files = [lowresfiles[var] for var in varlist if var in lowresfiles]
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=files, varlist=varlist, varatts=varatts, 
                            check_override=['time'], multifile=False, ncformat='NETCDF4_CLASSIC')
    # load static data
    if filelist is None: # generate default filelist
      if resolution == 'hires' or resolution == '03' or resolution == '031': 
        files = [hiresstatic[var] for var in varlist if var in hiresstatic]
      elif resolution == 'lowres' or resolution == '05': 
        files = [lowresstatic[var] for var in varlist if var in lowresstatic]
      # load constants, if any (and with singleton time axis)
      if len(files) > 0:
        staticdata = DatasetNetCDF(name=name, folder=folder, filelist=files, varlist=varlist, varatts=varatts, 
                                   axes=dict(lon=dataset.lon, lat=dataset.lat), multifile=False, 
                                   check_override=['time'], ncformat='NETCDF4_CLASSIC')
        # N.B.: need to override the axes, so that the datasets are consistent
        if len(staticdata.variables) > 0:
          for var in staticdata.variables.values(): 
            if not dataset.hasVariable(var.name):
              var.squeeze() # remove time dimension
              dataset.addVariable(var, copy=False) # no need to copy... but we can't write to the netcdf file!
    # replace time axis with number of month since Jan 1979 
    data = np.arange(0,len(dataset.time),1, dtype='int16') # month since 1979 (Jan 1979 = 0)
    timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01'))
    dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
    # add projection  
    dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder)
    # N.B.: projection should be auto-detected as geographic
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    grid, resolution = checkGridRes(grid, resolution)
    dataset = loadObservations(name=name, folder=folder, projection=None, resolution=resolution, grid=grid, 
                               period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, 
                               filelist=filelist, lautoregrid=lautoregrid, mode='time-series')
  # return formatted dataset
  return dataset
Example #8
0
def loadPCIC_LTM(name=dataset_name, varlist=None, varatts=ltmvaratts, filelist=None, folder=ltmfolder):
  ''' Get a properly formatted dataset the monthly PCIC PRISM climatology. '''
  # translate varlist
  if varlist is None: varlist = varatts.keys()
  if varlist and varatts: varlist = translateVarNames(varlist, varatts)
  # generate file list
  filelist = [ltmfile.format(var) for var in varlist if var not in ('time','lat','lon')]
  # load variables separately
  dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, ncformat='NETCDF4')
  dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder)
  # N.B.: projection should be auto-detected as geographic    
  # return formatted dataset
  return dataset
Example #9
0
def loadGPCC_LTM(name=dataset_name,
                 varlist=None,
                 resolution='025',
                 varatts=ltmvaratts,
                 filelist=None,
                 folder=ltmfolder):
    ''' Get a properly formatted dataset the monthly accumulated GPCC precipitation climatology. '''
    # prepare input
    if resolution not in ('025', '05', '10', '25'):
        raise DatasetError, "Selected resolution '%s' is not available!" % resolution
    # translate varlist
    if varlist is None: varlist = varatts.keys()
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    # load variables separately
    if 'p' in varlist:
        dataset = DatasetNetCDF(name=name,
                                folder=folder,
                                filelist=['normals_v2011_%s.nc' % resolution],
                                varlist=['p'],
                                varatts=varatts,
                                ncformat='NETCDF4_CLASSIC')
    if 's' in varlist:
        gauges = nc.Dataset(folder + 'normals_gauges_v2011_%s.nc' % resolution,
                            mode='r',
                            format='NETCDF4_CLASSIC')
        stations = Variable(data=gauges.variables['p'][0, :, :],
                            axes=(dataset.lat, dataset.lon),
                            **varatts['s'])
        # consolidate dataset
        dataset.addVariable(stations, asNC=False, copy=True)
    dataset = addGDALtoDataset(dataset,
                               projection=None,
                               geotransform=None,
                               gridfolder=grid_folder)
    # N.B.: projection should be auto-detected as geographic
    # return formatted dataset
    return dataset
Example #10
0
def loadCFSR_TS(name=dataset_name,
                grid=None,
                varlist=None,
                varatts=None,
                resolution='hires',
                filelist=None,
                folder=None,
                lautoregrid=None):
    ''' Get a properly formatted CFSR dataset with monthly mean time-series. '''
    if grid is None:
        # load from original time-series files
        if folder is None: folder = orig_ts_folder
        # translate varlist
        if varatts is None: varatts = tsvaratts.copy()
        if varlist is None:
            if resolution == 'hires' or resolution == '03' or resolution == '031':
                varlist = varlist_hires
            elif resolution == 'lowres' or resolution == '05':
                varlist = varlist_lowres
        if varlist and varatts: varlist = translateVarNames(varlist, varatts)
        if filelist is None:  # generate default filelist
            if resolution == 'hires' or resolution == '03' or resolution == '031':
                files = [
                    hiresfiles[var] for var in varlist if var in hiresfiles
                ]
            elif resolution == 'lowres' or resolution == '05':
                files = [
                    lowresfiles[var] for var in varlist if var in lowresfiles
                ]
        # load dataset
        dataset = DatasetNetCDF(name=name,
                                folder=folder,
                                filelist=files,
                                varlist=varlist,
                                varatts=varatts,
                                check_override=['time'],
                                multifile=False,
                                ncformat='NETCDF4_CLASSIC')
        # load static data
        if filelist is None:  # generate default filelist
            if resolution == 'hires' or resolution == '03' or resolution == '031':
                files = [
                    hiresstatic[var] for var in varlist if var in hiresstatic
                ]
            elif resolution == 'lowres' or resolution == '05':
                files = [
                    lowresstatic[var] for var in varlist if var in lowresstatic
                ]
            # create singleton time axis
            staticdata = DatasetNetCDF(name=name,
                                       folder=folder,
                                       filelist=files,
                                       varlist=varlist,
                                       varatts=varatts,
                                       axes=dict(lon=dataset.lon,
                                                 lat=dataset.lat),
                                       multifile=False,
                                       check_override=['time'],
                                       ncformat='NETCDF4_CLASSIC')
            # N.B.: need to override the axes, so that the datasets are consistent
        if len(staticdata.variables) > 0:
            for var in staticdata.variables.values():
                if not dataset.hasVariable(var.name):
                    var.squeeze()  # remove time dimension
                    dataset.addVariable(
                        var, copy=False
                    )  # no need to copy... but we can't write to the netcdf file!
        # replace time axis with number of month since Jan 1979
        data = np.arange(0, len(dataset.time), 1,
                         dtype='int16')  # month since 1979 (Jan 1979 = 0)
        timeAxis = Axis(name='time',
                        units='month',
                        coord=data,
                        atts=dict(long_name='Month since 1979-01'))
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
        # add projection
        dataset = addGDALtoDataset(dataset,
                                   projection=None,
                                   geotransform=None,
                                   gridfolder=grid_folder)
        # N.B.: projection should be auto-detected as geographic
    else:
        # load from neatly formatted and regridded time-series files
        if folder is None: folder = avgfolder
        grid, resolution = checkGridRes(grid, resolution)
        dataset = loadObservations(name=name,
                                   folder=folder,
                                   projection=None,
                                   resolution=resolution,
                                   grid=grid,
                                   period=None,
                                   varlist=varlist,
                                   varatts=varatts,
                                   filepattern=tsfile,
                                   filelist=filelist,
                                   lautoregrid=lautoregrid,
                                   mode='time-series')
    # return formatted dataset
    return dataset
Example #11
0
  def __init__(self):
    self.name = 'const' 
    self.atts = dict(orog    = dict(name='zs', units='m'), # surface altitude
    
# axes (don't have their own file)
class Axes(FileType):
  ''' A mock-filetype for axes. '''
  def __init__(self):
    self.atts = dict(time        = dict(name='time', units='days', offset=-47116, atts=dict(long_name='Month since 1979')), # time coordinate (days since 1979-01-01)
                     # NOTE THAT THE CMIP5 DATASET HAVE DIFFERENT TIME OFFSETS BETWEEN MEMBERS !!!
                     # N.B.: the time coordinate is only used for the monthly time-series data, not the LTM
                     #       the time offset is chose such that 1979 begins with the origin (time=0)
                     lon           = dict(name='lon', units='deg E'), # west-east coordinate
                     lat           = dict(name='lat', units='deg N'), # south-north coordinate
                     plev = dict(name='lev', units='')) # hybrid pressure coordinate
    self.vars = self.atts.keys()

# Time-Series (monthly)
def loadCMIP5_TS(experiment=None, name=None, grid=None, filetypes=None, varlist=None, varatts=None,  
                translateVars=None, lautoregrid=None, load3D=False, ignore_list=None, lcheckExp=True,
                lreplaceTime=True, lwrite=False, exps=None):
  ''' Get a properly formatted CESM dataset with a monthly time-series. (wrapper for loadCESM)'''
  return loadCMIP5_All(experiment=experiment, name=name, grid=grid, period=None, station=None, 
                      filetypes=filetypes, varlist=varlist, varatts=varatts, translateVars=translateVars, 
                      lautoregrid=lautoregrid, load3D=load3D, ignore_list=ignore_list, mode='time-series', 
                      lcheckExp=lcheckExp, lreplaceTime=lreplaceTime, lwrite=lwrite, exps=exps)

# load minimally pre-processed CESM climatology files 
def loadCMIP5(experiment=None, name=None, grid=None, period=None, filetypes=None, varlist=None, 
             varatts=None, translateVars=None, lautoregrid=None, load3D=False, ignore_list=None, 
             lcheckExp=True, lreplaceTime=True, lencl=False, lwrite=False, exps=None):
  ''' Get a properly formatted monthly CESM climatology as NetCDFDataset. '''
  return loadCMIP5_All(experiment=experiment, name=name, grid=grid, period=period, station=None, 
                      filetypes=filetypes, varlist=varlist, varatts=varatts, translateVars=translateVars, 
                      lautoregrid=lautoregrid, load3D=load3D, ignore_list=ignore_list, exps=exps, 
                      mode='climatology', lcheckExp=lcheckExp, lreplaceTime=lreplaceTime, lwrite=lwrite)


# load any of the various pre-processed CESM climatology and time-series files 
def loadCMIP5_All(experiment=None, name=None, grid=None, station=None, shape=None, period=None, 
                 varlist=None, varatts=None, translateVars=None, lautoregrid=None, load3D=False, 
                 ignore_list=None, mode='climatology', cvdp_mode=None, lcheckExp=True, exps=None,
                 lreplaceTime=True, filetypes=None, lencl=False, lwrite=False, check_vars=None):
  ''' Get any of the monthly CESM files as a properly formatted NetCDFDataset. '''
  # period
  if isinstance(period,(tuple,list)):
    if not all(isNumber(period)): raise ValueError
  elif isinstance(period,basestring): period = [int(prd) for prd in period.split('-')]
  elif isinstance(period,(int,np.integer)) or period is None : pass # handled later
  else: raise DateError, "Illegal period definition: {:s}".format(str(period))
  # prepare input  
  lclim = False; lts = False; lcvdp = False; ldiag = False # mode switches
  if mode.lower() == 'climatology': # post-processed climatology files
    lclim = True
    folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='avg', exps=exps)    
    if period is None: raise DateError, 'Currently CESM Climatologies have to be loaded with the period explicitly specified.'
  elif mode.lower() in ('time-series','timeseries'): # concatenated time-series files
    lts = True
    folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='avg', exps=exps)
    lclim = False; period = None; periodstr = None # to indicate time-series (but for safety, the input must be more explicit)
    if lautoregrid is None: lautoregrid = False # this can take very long!
  elif mode.lower() == 'cvdp': # concatenated time-series files
    lcvdp = True
    folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='cvdp', 
                                           cvdp_mode=cvdp_mode, exps=exps)
    if period is None:
      if not isinstance(experiment,Exp): raise DatasetError, 'Periods can only be inferred for registered datasets.'
      period = (experiment.beginyear, experiment.endyear)  
  elif mode.lower() == 'diag': # concatenated time-series files
    ldiag = True
    folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='diag', exps=exps)
    raise NotImplementedError, "Loading AMWG diagnostic files is not supported yet."
  else: raise NotImplementedError,"Unsupported mode: '{:s}'".format(mode)  
  # cast/copy varlist
  if isinstance(varlist,basestring): varlist = [varlist] # cast as list
  elif varlist is not None: varlist = list(varlist) # make copy to avoid interference
  # handle stations and shapes
  if station and shape: raise ArgumentError
  elif station or shape: 
    if grid is not None: raise NotImplementedError, 'Currently CESM station data can only be loaded from the native grid.'
    if lcvdp: raise NotImplementedError, 'CVDP data is not available as station data.'
    if lautoregrid: raise GDALError, 'Station data can not be regridded, since it is not map data.'   
    lstation = bool(station); lshape = bool(shape)
    # add station/shape parameters
    if varlist:
      params = stn_params if lstation else shp_params
      for param in params:
        if param not in varlist: varlist.append(param)
  else:
    lstation = False; lshape = False
  # period  
  if isinstance(period,(int,np.integer)):
    if not isinstance(experiment,Exp): raise DatasetError, 'Integer periods are only supported for registered datasets.'
    period = (experiment.beginyear, experiment.beginyear+period)
  if lclim: periodstr = '_{0:4d}-{1:4d}'.format(*period)
  elif lcvdp: periodstr = '{0:4d}-{1:4d}'.format(period[0],period[1]-1)
  else: periodstr = ''
  # N.B.: the period convention in CVDP is that the end year is included
  # generate filelist and attributes based on filetypes and domain
  if filetypes is None: filetypes = ['atm','lnd']
  elif isinstance(filetypes,(list,tuple,set,basestring)):
    if isinstance(filetypes,basestring): filetypes = [filetypes]
    else: filetypes = list(filetypes)
    # interprete/replace WRF filetypes (for convenience)
    tmp = []
    for ft in filetypes:
      if ft in ('const','drydyn3d','moist3d','rad','plev3d','srfc','xtrm','hydro'):
        if 'atm' not in tmp: tmp.append('atm')
      elif ft in ('lsm','snow'):
        if 'lnd' not in tmp: tmp.append('lnd')
      elif ft in ('aux'): pass # currently not supported
#       elif ft in (,):
#         if 'atm' not in tmp: tmp.append('atm')
#         if 'lnd' not in tmp: tmp.append('lnd')        
      else: tmp.append(ft)
    filetypes = tmp; del tmp
    if 'axes' not in filetypes: filetypes.append('axes')    
  else: raise TypeError  
  atts = dict(); filelist = []; typelist = []
  for filetype in filetypes:
    fileclass = fileclasses[filetype]
    if lclim and fileclass.climfile is not None: filelist.append(fileclass.climfile)
    elif lts and fileclass.tsfile is not None: filelist.append(fileclass.tsfile)
    elif lcvdp and fileclass.cvdpfile is not None: filelist.append(fileclass.cvdpfile)
    elif ldiag and fileclass.diagfile is not None: filelist.append(fileclass.diagfile)
    typelist.append(filetype)
    atts.update(fileclass.atts) 
  # figure out ignore list  
  if ignore_list is None: ignore_list = set(ignore_list_2D)
  elif isinstance(ignore_list,(list,tuple)): ignore_list = set(ignore_list)
  elif not isinstance(ignore_list,set): raise TypeError
  if not load3D: ignore_list.update(ignore_list_3D)
  if lautoregrid is None: lautoregrid = not load3D # don't auto-regrid 3D variables - takes too long!
  # translate varlist
  if varatts is not None: atts.update(varatts)
  lSST = False
  if varlist is not None:
    varlist = list(varlist) 
    if 'SST' in varlist: # special handling of name SST variable, as it is part of Ts
      varlist.remove('SST')
      if not 'Ts' in varlist: varlist.append('Ts')
      lSST = True # Ts is renamed to SST below
    if translateVars is None: varlist = list(varlist) + translateVarNames(varlist, atts) # also aff translations, just in case
    elif translateVars is True: varlist = translateVarNames(varlist, atts) 
    # N.B.: DatasetNetCDF does never apply translation!
  # NetCDF file mode
  ncmode = 'rw' if lwrite else 'r'   
  # get grid or station-set name
  if lstation:
    # the station name can be inserted as the grid name
    gridstr = '_'+station.lower(); # only use lower case for filenames
    griddef = None
  elif lshape:
    # the station name can be inserted as the grid name
    gridstr = '_'+shape.lower(); # only use lower case for filenames
    griddef = None
  else:
    if grid is None or grid == experiment.grid: 
      gridstr = ''; griddef = None
    else: 
      gridstr = '_'+grid.lower() # only use lower case for filenames
      griddef = loadPickledGridDef(grid=grid, res=None, filename=None, folder=grid_folder, check=True)
  # insert grid name and period
  filenames = []
  for filetype,fileformat in zip(typelist,filelist):
    if lclim: filename = fileformat.format(gridstr,periodstr) # put together specfic filename for climatology
    elif lts: filename = fileformat.format(gridstr) # or for time-series
    elif lcvdp: filename = fileformat.format(experiment.name if experiment else name,periodstr) # not implemented: gridstr
    elif ldiag: raise NotImplementedError
    else: raise DatasetError
    filenames.append(filename) # append to list (passed to DatasetNetCDF later)
    # check existance
    filepath = '{:s}/{:s}'.format(folder,filename)
    if not os.path.exists(filepath):
      nativename = fileformat.format('',periodstr) # original filename (before regridding)
      nativepath = '{:s}/{:s}'.format(folder,nativename)
      if os.path.exists(nativepath):
        if lautoregrid: 
          from processing.regrid import performRegridding # causes circular reference if imported earlier
          griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder)
          dataargs = dict(experiment=experiment, filetypes=[filetype], period=period)
          print("The '{:s}' (CESM) dataset for the grid ('{:s}') is not available:\n Attempting regridding on-the-fly.".format(name,filename,grid))
          if performRegridding('CESM','climatology' if lclim else 'time-series', griddef, dataargs): # default kwargs
            raise IOError, "Automatic regridding failed!"
          print("Output: '{:s}'".format(name,filename,grid,filepath))            
        else: raise IOError, "The '{:s}' (CESM) dataset '{:s}' for the selected grid ('{:s}') is not available - use the regrid module to generate it.".format(name,filename,grid) 
      else: raise IOError, "The '{:s}' (CESM) dataset file '{:s}' does not exits!\n({:s})".format(name,filename,folder)
   
  # load dataset
  #print varlist, filenames
  if experiment: title = experiment.title
  else: title = name
  dataset = DatasetNetCDF(name=name, folder=folder, filelist=filenames, varlist=varlist, axes=None, 
                          varatts=atts, title=title, multifile=False, ignore_list=ignore_list, 
                          ncformat='NETCDF4', squeeze=True, mode=ncmode, check_vars=check_vars)
  # replace time axis
  if lreplaceTime:
    if lts or lcvdp:
      # check time axis and center at 1979-01 (zero-based)
      if experiment is None: ys = period[0]; ms = 1
      else: ys,ms,ds = [int(t) for t in experiment.begindate.split('-')]; assert ds == 1
      if dataset.hasAxis('time'):
        ts = (ys-1979)*12 + (ms-1); te = ts+len(dataset.time) # month since 1979 (Jan 1979 = 0)
        atts = dict(long_name='Month since 1979-01')
        timeAxis = Axis(name='time', units='month', coord=np.arange(ts,te,1, dtype='int16'), atts=atts)
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
      if dataset.hasAxis('year'):
        ts = ys-1979; te = ts+len(dataset.year) # month since 1979 (Jan 1979 = 0)
        atts = dict(long_name='Years since 1979-01')
        yearAxis = Axis(name='year', units='year', coord=np.arange(ts,te,1, dtype='int16'), atts=atts)
        dataset.replaceAxis(dataset.year, yearAxis, asNC=False, deepcopy=False)
    elif lclim:
      if dataset.hasAxis('time') and not dataset.time.units.lower() in monthlyUnitsList:
        atts = dict(long_name='Month of the Year')
        timeAxis = Axis(name='time', units='month', coord=np.arange(1,13, dtype='int16'), atts=atts)
        assert len(dataset.time) == len(timeAxis), dataset.time
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
      elif dataset.hasAxis('year'): raise NotImplementedError, dataset
  # rename SST
  if lSST: dataset['SST'] = dataset.Ts
  # correct ordinal number of shape (should start at 1, not 0)
  if lshape:
    # mask all shapes that are incomplete in dataset
    if lencl and 'shp_encl' in dataset: dataset.mask(mask='shp_encl', invert=True)   
    if dataset.hasAxis('shapes'): raise AxisError, "Axis 'shapes' should be renamed to 'shape'!"
    if not dataset.hasAxis('shape'): raise AxisError
    if dataset.shape.coord[0] == 0: dataset.shape.coord += 1
  # check
  if len(dataset) == 0: raise DatasetError, 'Dataset is empty - check source file or variable list!'
  # add projection, if applicable
  if not ( lstation or lshape ):
    dataset = addGDALtoDataset(dataset, griddef=griddef, gridfolder=grid_folder, lwrap360=True, geolocator=True)
  # return formatted dataset
  return dataset

## Dataset API

dataset_name = 'CMIP5' # dataset name
root_folder # root folder of the dataset
avgfolder # root folder for monthly averages
outfolder # root folder for direct WRF output
ts_file_pattern = 'cmip5{0:s}{1:s}_monthly.nc' # filename pattern: filetype, grid
clim_file_pattern = 'cmip5{0:s}{1:s}_clim{2:s}.nc' # filename pattern: filetype, grid, period
data_folder = root_folder # folder for user data
grid_def = {'':None} # there are too many... 
grid_res = {'':1.} # approximate grid resolution at 45 degrees latitude
default_grid = None 
# functions to access specific datasets
loadLongTermMean = None # WRF doesn't have that...
loadClimatology = loadCESM # pre-processed, standardized climatology
loadTimeSeries = loadCESM_TS # time-series data
#loadStationClimatology = loadCESM_Stn # pre-processed, standardized climatology at stations
#loadStationTimeSeries = loadCESM_StnTS # time-series data at stations
#loadShapeClimatology = loadCESM_Shp # climatologies without associated grid (e.g. provinces or basins) 
#loadShapeTimeSeries = loadCESM_ShpTS # time-series without associated grid (e.g. provinces or basins)


## (ab)use main execution for quick test
if __name__ == '__main__':
  
  # set mode/parameters
#   mode = 'test_climatology'
#   mode = 'test_timeseries'
#   mode = 'test_ensemble'
#   mode = 'test_point_climatology'
#   mode = 'test_point_timeseries'
#   mode = 'test_point_ensemble'
#   mode = 'test_cvdp'
  mode = 'pickle_grid'
#     mode = 'shift_lon'
#   experiments = ['Ctrl-1', 'Ctrl-A', 'Ctrl-B', 'Ctrl-C']
#   experiments += ['Ctrl-2050', 'Ctrl-A-2050', 'Ctrl-B-2050', 'Ctrl-C-2050']
  experiments = ('Ctrl-1',)
  periods = (15,)
  filetypes = ('atm',) # ['atm','lnd','ice']
  grids = ('cesm1x1',)*len(experiments) # grb1_d01
#   pntset = 'shpavg'
  pntset = 'ecprecip'

  from projects.CESM_experiments import Exp, CESM_exps, ensembles
  # N.B.: importing Exp through CESM_experiments is necessary, otherwise some isinstance() calls fail

  # pickle grid definition
  if mode == 'pickle_grid':
    
    for grid,experiment in zip(grids,experiments):
      
      print('')
      print('   ***   Pickling Grid Definition for {0:s}   ***   '.format(grid))
      print('')
      
      # load GridDefinition
      dataset = loadCESM(experiment=CESM_exps[experiment], grid=None, filetypes=['lnd'], period=(1979,1989))
      griddef = dataset.griddef
      #del griddef.xlon, griddef.ylat      
      print griddef
      griddef.name = grid
      print('   Loading Definition from \'{0:s}\''.format(dataset.name))
      # save pickle
      filename = '{0:s}/{1:s}'.format(grid_folder,griddef_pickle.format(grid))
      if os.path.exists(filename): os.remove(filename) # overwrite
      filehandle = open(filename, 'w')
      pickle.dump(griddef, filehandle)
      filehandle.close()
      
      print('   Saving Pickle to \'{0:s}\''.format(filename))
      print('')
      
      # load pickle to make sure it is right
      del griddef
      griddef = loadPickledGridDef(grid, res=None, folder=grid_folder)
      print(griddef)
      print('')
      print griddef.wrap360
      
Example #12
0
def loadGPCC_TS(name=dataset_name,
                grid=None,
                varlist=None,
                resolution='25',
                varatts=None,
                filelist=None,
                folder=None,
                lautoregrid=None):
    ''' Get a properly formatted dataset with the monthly GPCC time-series. '''
    if grid is None:
        # load from original time-series files
        if folder is None: folder = orig_ts_folder
        # prepare input
        if resolution not in ('05', '10', '25'):
            raise DatasetError, "Selected resolution '%s' is not available!" % resolution
        # translate varlist
        if varatts is None: varatts = tsvaratts.copy()
        if varlist is None: varlist = varatts.keys()
        if varlist and varatts: varlist = translateVarNames(varlist, varatts)
        if filelist is None:  # generate default filelist
            filelist = []
            if 'p' in varlist:
                filelist.append(orig_ts_file.format('precip', resolution))
            if 's' in varlist:
                filelist.append(orig_ts_file.format('statio', resolution))
        # load dataset
        dataset = DatasetNetCDF(name=name,
                                folder=folder,
                                filelist=filelist,
                                varlist=varlist,
                                varatts=varatts,
                                multifile=False,
                                ncformat='NETCDF4_CLASSIC')
        # replace time axis with number of month since Jan 1979
        data = np.arange(0, len(dataset.time), 1, dtype='int16') + (
            1901 - 1979) * 12  # month since 1979 (Jan 1979 = 0)
        timeAxis = Axis(name='time',
                        units='month',
                        coord=data,
                        atts=dict(long_name='Month since 1979-01'))
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
        # add GDAL info
        dataset = addGDALtoDataset(dataset, projection=None, geotransform=None)
        # N.B.: projection should be auto-detected as geographic
    else:
        # load from neatly formatted and regridded time-series files
        if folder is None: folder = avgfolder
        grid, resolution = checkGridRes(grid,
                                        resolution,
                                        period=None,
                                        lclim=False)
        dataset = loadObservations(name=name,
                                   folder=folder,
                                   projection=None,
                                   resolution=resolution,
                                   grid=grid,
                                   period=None,
                                   varlist=varlist,
                                   varatts=varatts,
                                   filepattern=tsfile,
                                   filelist=filelist,
                                   lautoregrid=lautoregrid,
                                   mode='time-series')
    # return formatted dataset
    return dataset
Example #13
0
def loadCRU_TS(name=dataset_name,
               grid=None,
               varlist=None,
               resolution=None,
               varatts=None,
               filelist=None,
               folder=None,
               lautoregrid=None):
    ''' Get a properly formatted  CRU dataset with monthly mean time-series. '''
    if grid is None:
        # load from original time-series files
        if folder is None: folder = orig_ts_folder
        # translate varlist
        if varatts is None: varatts = tsvaratts.copy()
        if varlist is None: varlist = varatts.keys()
        if varlist and varatts: varlist = translateVarNames(varlist, varatts)
        # assemble filelist
        if filelist is None:  # generate default filelist
            filelist = [
                orig_ts_file.format(var) for var in varlist
                if var not in nofile
            ]
        # load dataset
        dataset = DatasetNetCDF(name=name,
                                folder=folder,
                                filelist=filelist,
                                varlist=varlist,
                                varatts=varatts,
                                multifile=False,
                                ncformat='NETCDF4_CLASSIC')
        # replace time axis with number of month since Jan 1979
        data = np.arange(0, len(dataset.time), 1, dtype='int16') + (
            1901 - 1979) * 12  # month since 1979 (Jan 1979 = 0)
        timeAxis = Axis(name='time',
                        units='month',
                        coord=data,
                        atts=dict(long_name='Month since 1979-01'))
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
        # add projection
        dataset = addGDALtoDataset(dataset,
                                   projection=None,
                                   geotransform=None,
                                   gridfolder=grid_folder)
        # N.B.: projection should be auto-detected as geographic
    else:
        # load from neatly formatted and regridded time-series files
        if folder is None: folder = avgfolder
        dataset = loadObservations(name=name,
                                   folder=folder,
                                   projection=None,
                                   resolution=None,
                                   grid=grid,
                                   period=None,
                                   varlist=varlist,
                                   varatts=varatts,
                                   filepattern=tsfile,
                                   filelist=filelist,
                                   lautoregrid=lautoregrid,
                                   mode='time-series')
    # return formatted dataset
    return dataset