Example #1
0
def getCommonGrid(grid, res=None, lfilepath=False):
  ''' return definitions of commonly used grids (either from datasets or pickles) '''
  # try pickle first
  griddef = loadPickledGridDef(grid=grid, res=res, folder=grid_folder, 
                               check=False, lfilepath=lfilepath)
  # alternatively look in known datasets
  if griddef is None:
    try:
      dataset = import_module(grid)
      if res is None: griddef = dataset.default_grid
      else: griddef = dataset.grid_def[res]
      if lfilepath: griddef.filepath = None # monkey-patch...
    except ImportError:
      griddef = None
#       assert (elon-slon) % dlon == 0 
#       lon = np.linspace(slon+dlon/2,elon-dlon/2,(elon-slon)/dlon)
#       assert (elat-slat) % dlat == 0
#       lat = np.linspace(slat+dlat/2,elat-dlat/2,(elat-slat)/dlat)
#       # add new geographic coordinate axes for projected map
#       xlon = Axis(coord=lon, atts=dict(grid='lon', long_name='longitude', units='deg E'))
#       ylat = Axis(coord=lat, atts=dict(grid='lat', long_name='latitude', units='deg N'))
#       gridstr = '{0:s}_{1:s}'.format(grid,res) if res is not None else grid
  # return grid definition object (or None, if none found)
  return griddef
Example #2
0
def getCommonGrid(grid, res=None):
  ''' return definitions of commonly used grids (either from datasets or pickles) '''
  # try pickle first
  griddef = loadPickledGridDef(grid=grid, res=res, folder=grid_folder, check=False)
  # alternatively look in known datasets
  if griddef is None:
    try:
      dataset = import_module(grid)
      if res is None:
        griddef = dataset.default_grid
      else:
        griddef = dataset.grid_def[res]
    except ImportError:
      griddef = None
#       assert (elon-slon) % dlon == 0 
#       lon = np.linspace(slon+dlon/2,elon-dlon/2,(elon-slon)/dlon)
#       assert (elat-slat) % dlat == 0
#       lat = np.linspace(slat+dlat/2,elat-dlat/2,(elat-slat)/dlat)
#       # add new geographic coordinate axes for projected map
#       xlon = Axis(coord=lon, atts=dict(grid='lon', long_name='longitude', units='deg E'))
#       ylat = Axis(coord=lat, atts=dict(grid='lat', long_name='latitude', units='deg N'))
#       gridstr = '{0:s}_{1:s}'.format(grid,res) if res is not None else grid
  # return grid definition object (or None, if none found)
  return griddef
Example #3
0
def loadObservations(name=None, folder=None, period=None, grid=None, station=None, shape=None, lencl=False, 
                     varlist=None, varatts=None, filepattern=None, filelist=None, resolution=None, 
                     projection=None, geotransform=None, axes=None, lautoregrid=None, mode='climatology'):
  ''' A function to load standardized observational datasets. '''
  # prepare input
  if mode.lower() == 'climatology': # post-processed climatology files
    # transform period
    if period is None or period == '':
      if name not in ('PCIC','PRISM','GPCC','NARR'): 
        raise ValueError("A period is required to load observational climatologies.")
    elif isinstance(period,basestring):
      period = tuple([int(prd) for prd in period.split('-')]) 
    elif not isinstance(period,(int,np.integer)) and ( not isinstance(period,tuple) and len(period) == 2 ): 
      raise TypeError(period)
  elif mode.lower() in ('time-series','timeseries'): # concatenated time-series files
    period = None # to indicate time-series (but for safety, the input must be more explicit)
    if lautoregrid is None: lautoregrid = False # this can take very long!
  # cast/copy varlist
  if isinstance(varlist,basestring): varlist = [varlist] # cast as list
  elif varlist is not None: varlist = list(varlist) # make copy to avoid interference
  # figure out station and shape options
  if station and shape: raise ArgumentError()
  elif station or shape: 
    if grid is not None: raise NotImplementedError('Currently observational station data can only be loaded from the native grid.')
    if lautoregrid: raise GDALError('Station data can not be regridded, since it is not map data.')
    lstation = bool(station); lshape = bool(shape)
    grid = station if lstation else shape
    # add station/shape parameters
    if varlist:
      params = stn_params if lstation else shp_params
      for param in params:
        if param not in varlist: varlist.append(param)    
  else:
    lstation = False; lshape = False
  # varlist (varlist = None means all variables)
  if varatts is None: varatts = default_varatts.copy()
  if varlist is not None: varlist = translateVarNames(varlist, varatts)
  # filelist
  if filelist is None: 
    filename = getFileName(name=name, resolution=resolution, period=period, grid=grid, filepattern=filepattern)
    # check existance
    filepath = '{:s}/{:s}'.format(folder,filename)
    if not os.path.exists(filepath):
      nativename = getFileName(name=name, resolution=resolution, period=period, grid=None, filepattern=filepattern)
      nativepath = '{:s}/{:s}'.format(folder,nativename)
      if os.path.exists(nativepath):
        if lautoregrid: 
          from processing.regrid import performRegridding # causes circular reference if imported earlier
          griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder)
          dataargs = dict(period=period, resolution=resolution)
          performRegridding(name, 'climatology',griddef, dataargs) # default kwargs
        else: raise IOError("The dataset '{:s}' for the selected grid ('{:s}') is not available - use the regrid module to generate it.".format(filename,grid) )
      else: raise IOError("The dataset file '{:s}' does not exits!\n('{:s}')".format(filename,filepath))
  # load dataset
  dataset = DatasetNetCDF(name=name, folder=folder, filelist=[filename], varlist=varlist, varatts=varatts, 
                          axes=axes, multifile=False, ncformat='NETCDF4')
  # mask all shapes that are incomplete in dataset
  if shape and lencl and 'shp_encl' in dataset: 
    dataset.load() # need to load data before masking; is cheap for shape averages, anyway
    dataset.mask(mask='shp_encl', invert=True, skiplist=shp_params)
  # correct ordinal number of shape (should start at 1, not 0)
  if lshape:
    if dataset.hasAxis('shapes'): raise AxisError("Axis 'shapes' should be renamed to 'shape'!")
    if not dataset.hasAxis('shape'): 
      raise AxisError()
    if dataset.shape.coord[0] == 0: dataset.shape.coord += 1
# figure out grid
  if not lstation and not lshape:
    if grid is None or grid == name:
      dataset = addGDALtoDataset(dataset, projection=projection, geotransform=geotransform, gridfolder=grid_folder)
    elif isinstance(grid,basestring): # load from pickle file
  #     griddef = loadPickledGridDef(grid=grid, res=None, filename=None, folder=grid_folder)
      # add GDAL functionality to dataset 
      dataset = addGDALtoDataset(dataset, griddef=grid, gridfolder=grid_folder)
    else: raise TypeError(dataset)
    # N.B.: projection should be auto-detected, if geographic (lat/lon)
  return dataset
Example #4
0
        
        if griddef is None:
          print('GridDefinition object for {0:s} not found!'.format(gridstr))         
        else:
          # save pickle
          filename = '{0:s}/{1:s}'.format(grid_folder,griddef_pickle.format(gridstr))
          filehandle = open(filename, 'w')
          pickle.dump(griddef, filehandle)
          filehandle.close()
          
          print('   Saving Pickle to \'{0:s}\''.format(filename))
          print('')
          
          # load pickle to make sure it is right
          del griddef
          griddef = loadPickledGridDef(grid, res=res, folder=grid_folder)
          print(griddef)
        print('')

  ## create a new grid
  elif mode == 'create_grid':
    
    # parameters for UTM 17
    name = 'grw1' # 1km resolution
    geotransform = [500.e3,1.e3,0,4740.e3,0,1.e3]; size = (132,162)
#     name = 'grw2' # 5km resolution
#     geotransform = [500.e3,5.e3,0,4740.e3,0,5.e3]; size = (27,33)
    projection = "+proj=utm +zone=17 +north +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
    # N.B.: [x_0, dx, 0, y_0, 0, dy]
    #       GT(0),GT(3) are the coordinates of the bottom left corner
    #       GT(1) & GT(5) are pixel width and height
Example #5
0
      sink = DatasetNetCDF(name='GPCC Climatology', folder=avgfolder, filelist=[filename], atts=source.atts, mode='w')
#       sink = addGDALtoDataset(sink, griddef=source.griddef)
      
      # initialize processing
      CPU = CentralProcessingUnit(source, sink, tmp=True)

      if period is not None:
        # determine averaging interval
        offset = source.time.getIndex(period[0]-1979)/12 # origin of monthly time-series is at January 1979 
        # start processing climatology
        CPU.Climatology(period=period[1]-period[0], offset=offset, flush=False)
#         CPU.sync(flush=True)

      # get NARR coordinates
      if grid is not 'GPCC':
        griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder)
        #new_grid = import_module(grid[0:4]).__dict__[grid+'_grid']
#       if grid == 'NARR':
#         from datasets.NARR import NARR_grid
        # reproject and resample (regrid) dataset
        CPU.Regrid(griddef=griddef, flush=False)
            
#       # shift longitude axis by 180 degrees  left (i.e. -180 - 180 -> 0 - 360)
#       print('')
#       print('   +++   processing shift longitude   +++   ') 
#       CPU.Shift(lon=-180, flush=True)
#       print('\n')
#
#       # shift longitude axis by 180 degrees  left (i.e. -180 - 180 -> 0 - 360)
#       print('')
#       print('   +++   processing shift/roll   +++   ') 
Example #6
0
    ## operational config for SON2
#     project = 'SON'
#     start_date = '2011-01-01'; end_date = None
#     grid_name  = 'son2'
    ## operational config for ASB2
#     project = 'ASB'
#     start_date = '2010-01-01'; end_date = None
#     grid_name  = 'asb2'

    ## define target grid/projection
    # projection/UTM zone
    tgt_size = None; tgt_geotrans = None # valid for native grid
    if project == 'WRF':
        # load pickled GridDef
        from geodata.gdal import loadPickledGridDef
        griddef = loadPickledGridDef(grid=grid_name, encoding='latin1')
        print(griddef)
        tgt_crs = genProj(griddef.projection.ExportToProj4(), name=grid_name)
        tgt_geotrans = griddef.geotransform; tgt_size = griddef.size
    elif project == 'SnoDAS':
        tgt_crs = None # native grid
    elif project.upper() in ('SON','GRW'):
        # southern Ontario projection
        tgt_crs = genProj("+proj=utm +zone=17 +north +ellps=WGS84 +datum=WGS84 +units=m +no_defs", name=grid_name)
    elif project.upper() == 'CMB':
        # southern Ontario projection
        tgt_crs = genProj("+proj=utm +zone=11 +north +ellps=WGS84 +datum=WGS84 +units=m +no_defs", name=grid_name)
    elif project.upper() == 'ASB':
        # Assiniboin projection
        tgt_crs = genProj("+proj=utm +zone=14 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs", name=grid_name)
    # grid definition (mostly UTM grids for HGS)
Example #7
0
def loadObservations(name=None, folder=None, period=None, grid=None, station=None, shape=None, lencl=False, 
                     varlist=None, varatts=None, filepattern=None, filelist=None, resolution=None, 
                     projection=None, geotransform=None, axes=None, lautoregrid=None, mode='climatology'):
  ''' A function to load standardized observational datasets. '''
  # prepare input
  if mode.lower() == 'climatology': # post-processed climatology files
    # transform period
    if period is None or period == '':
      if name not in ('PCIC','PRISM','GPCC','NARR'): 
        raise ValueError, "A period is required to load observational climatologies."
    elif isinstance(period,basestring):
      period = tuple([int(prd) for prd in period.split('-')]) 
    elif not isinstance(period,(int,np.integer)) and ( not isinstance(period,tuple) and len(period) == 2 ): 
      raise TypeError
  elif mode.lower() in ('time-series','timeseries'): # concatenated time-series files
    period = None # to indicate time-series (but for safety, the input must be more explicit)
    if lautoregrid is None: lautoregrid = False # this can take very long!
  # cast/copy varlist
  if isinstance(varlist,basestring): varlist = [varlist] # cast as list
  elif varlist is not None: varlist = list(varlist) # make copy to avoid interference
  # figure out station and shape options
  if station and shape: raise ArgumentError
  elif station or shape: 
    if grid is not None: raise NotImplementedError, 'Currently observational station data can only be loaded from the native grid.'
    if lautoregrid: raise GDALError, 'Station data can not be regridded, since it is not map data.'   
    lstation = bool(station); lshape = bool(shape)
    grid = station if lstation else shape
    # add station/shape parameters
    if varlist:
      params = stn_params if lstation else shp_params
      for param in params:
        if param not in varlist: varlist.append(param)    
  else:
    lstation = False; lshape = False
  # varlist (varlist = None means all variables)
  if varatts is None: varatts = default_varatts.copy()
  if varlist is not None: varlist = translateVarNames(varlist, varatts)
  # filelist
  if filelist is None: 
    filename = getFileName(name=name, resolution=resolution, period=period, grid=grid, filepattern=filepattern)
    # check existance
    filepath = '{:s}/{:s}'.format(folder,filename)
    if not os.path.exists(filepath):
      nativename = getFileName(name=name, resolution=resolution, period=period, grid=None, filepattern=filepattern)
      nativepath = '{:s}/{:s}'.format(folder,nativename)
      if os.path.exists(nativepath):
        if lautoregrid: 
          from processing.regrid import performRegridding # causes circular reference if imported earlier
          griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder)
          dataargs = dict(period=period, resolution=resolution)
          performRegridding(name, 'climatology',griddef, dataargs) # default kwargs
        else: raise IOError, "The dataset '{:s}' for the selected grid ('{:s}') is not available - use the regrid module to generate it.".format(filename,grid) 
      else: raise IOError, "The dataset file '{:s}' does not exits!\n('{:s}')".format(filename,filepath)
  # load dataset
  dataset = DatasetNetCDF(name=name, folder=folder, filelist=[filename], varlist=varlist, varatts=varatts, 
                          axes=axes, multifile=False, ncformat='NETCDF4')
  # mask all shapes that are incomplete in dataset
  if shape and lencl and 'shp_encl' in dataset: 
    dataset.load() # need to load data before masking; is cheap for shape averages, anyway
    dataset.mask(mask='shp_encl', invert=True, skiplist=shp_params)
  # correct ordinal number of shape (should start at 1, not 0)
  if lshape:
    if dataset.hasAxis('shapes'): raise AxisError, "Axis 'shapes' should be renamed to 'shape'!"
    if not dataset.hasAxis('shape'): 
      raise AxisError
    if dataset.shape.coord[0] == 0: dataset.shape.coord += 1
# figure out grid
  if not lstation and not lshape:
    if grid is None or grid == name:
      dataset = addGDALtoDataset(dataset, projection=projection, geotransform=geotransform, gridfolder=grid_folder)
    elif isinstance(grid,basestring): # load from pickle file
  #     griddef = loadPickledGridDef(grid=grid, res=None, filename=None, folder=grid_folder)
      # add GDAL functionality to dataset 
      dataset = addGDALtoDataset(dataset, griddef=grid, gridfolder=grid_folder)
    else: raise TypeError
    # N.B.: projection should be auto-detected, if geographic (lat/lon)
  return dataset
Example #8
0
      
        print('')        
        if res is None:
          gridstr = grid
          print('   ***   Pickling Grid Definition for {0:s}   ***   '.format(grid))
        else:
          gridstr = '{0:s}_{1:s}'.format(grid,res)  
          print('   ***   Pickling Grid Definition for {0:s} Resolution {1:s}   ***   '.format(grid,res))
        print('')
        
        # load GridDefinition      
        griddef = getCommonGrid(grid,res)         
        
        if griddef is None:
          print('GridDefinition object for {0:s} not found!'.format(gridstr))         
        else:
          # save pickle
          filename = '{0:s}/{1:s}'.format(grid_folder,griddef_pickle.format(gridstr))
          filehandle = open(filename, 'w')
          pickle.dump(griddef, filehandle)
          filehandle.close()
          
          print('   Saving Pickle to \'{0:s}\''.format(filename))
          print('')
          
          # load pickle to make sure it is right
          del griddef
          griddef = loadPickledGridDef(grid, res=res, folder=grid_folder)
          print(griddef)
        print('')
Example #9
0
                                         'T2', 'Tmin', 'Tmax', 'Q2', 'pet',
                                         'cldfrc', 'wetfrq', 'frzfrq'
                                     ],
                                     lautoregrid=True)
                    cruclim = loadCRU(period=(1979, 2009),
                                      grid=grid,
                                      varlist=[
                                          'T2', 'Tmin', 'Tmax', 'Q2', 'pet',
                                          'cldfrc', 'wetfrq', 'frzfrq'
                                      ],
                                      lautoregrid=True)

                # grid definition
                try:
                    griddef = loadPickledGridDef(grid=grid,
                                                 res=None,
                                                 folder=grid_folder)
                    grid_name = griddef.name
                except IOError:
                    griddef = None
                    grid_name = grid
                periodstr = '{0:4d}-{1:4d}'.format(*period)

                print(
                    '\n   ***   Merging Climatology from {0:s} on {1:s} Grid  ***   \n'
                    .format(
                        periodstr,
                        grid,
                    ))
                ## prepare target dataset
                filename = getFileName(grid=grid_name,
Example #10
0
  def __init__(self):
    self.name = 'const' 
    self.atts = dict(orog    = dict(name='zs', units='m'), # surface altitude
    
# axes (don't have their own file)
class Axes(FileType):
  ''' A mock-filetype for axes. '''
  def __init__(self):
    self.atts = dict(time        = dict(name='time', units='days', offset=-47116, atts=dict(long_name='Month since 1979')), # time coordinate (days since 1979-01-01)
                     # NOTE THAT THE CMIP5 DATASET HAVE DIFFERENT TIME OFFSETS BETWEEN MEMBERS !!!
                     # N.B.: the time coordinate is only used for the monthly time-series data, not the LTM
                     #       the time offset is chose such that 1979 begins with the origin (time=0)
                     lon           = dict(name='lon', units='deg E'), # west-east coordinate
                     lat           = dict(name='lat', units='deg N'), # south-north coordinate
                     plev = dict(name='lev', units='')) # hybrid pressure coordinate
    self.vars = self.atts.keys()

# Time-Series (monthly)
def loadCMIP5_TS(experiment=None, name=None, grid=None, filetypes=None, varlist=None, varatts=None,  
                translateVars=None, lautoregrid=None, load3D=False, ignore_list=None, lcheckExp=True,
                lreplaceTime=True, lwrite=False, exps=None):
  ''' Get a properly formatted CESM dataset with a monthly time-series. (wrapper for loadCESM)'''
  return loadCMIP5_All(experiment=experiment, name=name, grid=grid, period=None, station=None, 
                      filetypes=filetypes, varlist=varlist, varatts=varatts, translateVars=translateVars, 
                      lautoregrid=lautoregrid, load3D=load3D, ignore_list=ignore_list, mode='time-series', 
                      lcheckExp=lcheckExp, lreplaceTime=lreplaceTime, lwrite=lwrite, exps=exps)

# load minimally pre-processed CESM climatology files 
def loadCMIP5(experiment=None, name=None, grid=None, period=None, filetypes=None, varlist=None, 
             varatts=None, translateVars=None, lautoregrid=None, load3D=False, ignore_list=None, 
             lcheckExp=True, lreplaceTime=True, lencl=False, lwrite=False, exps=None):
  ''' Get a properly formatted monthly CESM climatology as NetCDFDataset. '''
  return loadCMIP5_All(experiment=experiment, name=name, grid=grid, period=period, station=None, 
                      filetypes=filetypes, varlist=varlist, varatts=varatts, translateVars=translateVars, 
                      lautoregrid=lautoregrid, load3D=load3D, ignore_list=ignore_list, exps=exps, 
                      mode='climatology', lcheckExp=lcheckExp, lreplaceTime=lreplaceTime, lwrite=lwrite)


# load any of the various pre-processed CESM climatology and time-series files 
def loadCMIP5_All(experiment=None, name=None, grid=None, station=None, shape=None, period=None, 
                 varlist=None, varatts=None, translateVars=None, lautoregrid=None, load3D=False, 
                 ignore_list=None, mode='climatology', cvdp_mode=None, lcheckExp=True, exps=None,
                 lreplaceTime=True, filetypes=None, lencl=False, lwrite=False, check_vars=None):
  ''' Get any of the monthly CESM files as a properly formatted NetCDFDataset. '''
  # period
  if isinstance(period,(tuple,list)):
    if not all(isNumber(period)): raise ValueError
  elif isinstance(period,basestring): period = [int(prd) for prd in period.split('-')]
  elif isinstance(period,(int,np.integer)) or period is None : pass # handled later
  else: raise DateError, "Illegal period definition: {:s}".format(str(period))
  # prepare input  
  lclim = False; lts = False; lcvdp = False; ldiag = False # mode switches
  if mode.lower() == 'climatology': # post-processed climatology files
    lclim = True
    folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='avg', exps=exps)    
    if period is None: raise DateError, 'Currently CESM Climatologies have to be loaded with the period explicitly specified.'
  elif mode.lower() in ('time-series','timeseries'): # concatenated time-series files
    lts = True
    folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='avg', exps=exps)
    lclim = False; period = None; periodstr = None # to indicate time-series (but for safety, the input must be more explicit)
    if lautoregrid is None: lautoregrid = False # this can take very long!
  elif mode.lower() == 'cvdp': # concatenated time-series files
    lcvdp = True
    folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='cvdp', 
                                           cvdp_mode=cvdp_mode, exps=exps)
    if period is None:
      if not isinstance(experiment,Exp): raise DatasetError, 'Periods can only be inferred for registered datasets.'
      period = (experiment.beginyear, experiment.endyear)  
  elif mode.lower() == 'diag': # concatenated time-series files
    ldiag = True
    folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='diag', exps=exps)
    raise NotImplementedError, "Loading AMWG diagnostic files is not supported yet."
  else: raise NotImplementedError,"Unsupported mode: '{:s}'".format(mode)  
  # cast/copy varlist
  if isinstance(varlist,basestring): varlist = [varlist] # cast as list
  elif varlist is not None: varlist = list(varlist) # make copy to avoid interference
  # handle stations and shapes
  if station and shape: raise ArgumentError
  elif station or shape: 
    if grid is not None: raise NotImplementedError, 'Currently CESM station data can only be loaded from the native grid.'
    if lcvdp: raise NotImplementedError, 'CVDP data is not available as station data.'
    if lautoregrid: raise GDALError, 'Station data can not be regridded, since it is not map data.'   
    lstation = bool(station); lshape = bool(shape)
    # add station/shape parameters
    if varlist:
      params = stn_params if lstation else shp_params
      for param in params:
        if param not in varlist: varlist.append(param)
  else:
    lstation = False; lshape = False
  # period  
  if isinstance(period,(int,np.integer)):
    if not isinstance(experiment,Exp): raise DatasetError, 'Integer periods are only supported for registered datasets.'
    period = (experiment.beginyear, experiment.beginyear+period)
  if lclim: periodstr = '_{0:4d}-{1:4d}'.format(*period)
  elif lcvdp: periodstr = '{0:4d}-{1:4d}'.format(period[0],period[1]-1)
  else: periodstr = ''
  # N.B.: the period convention in CVDP is that the end year is included
  # generate filelist and attributes based on filetypes and domain
  if filetypes is None: filetypes = ['atm','lnd']
  elif isinstance(filetypes,(list,tuple,set,basestring)):
    if isinstance(filetypes,basestring): filetypes = [filetypes]
    else: filetypes = list(filetypes)
    # interprete/replace WRF filetypes (for convenience)
    tmp = []
    for ft in filetypes:
      if ft in ('const','drydyn3d','moist3d','rad','plev3d','srfc','xtrm','hydro'):
        if 'atm' not in tmp: tmp.append('atm')
      elif ft in ('lsm','snow'):
        if 'lnd' not in tmp: tmp.append('lnd')
      elif ft in ('aux'): pass # currently not supported
#       elif ft in (,):
#         if 'atm' not in tmp: tmp.append('atm')
#         if 'lnd' not in tmp: tmp.append('lnd')        
      else: tmp.append(ft)
    filetypes = tmp; del tmp
    if 'axes' not in filetypes: filetypes.append('axes')    
  else: raise TypeError  
  atts = dict(); filelist = []; typelist = []
  for filetype in filetypes:
    fileclass = fileclasses[filetype]
    if lclim and fileclass.climfile is not None: filelist.append(fileclass.climfile)
    elif lts and fileclass.tsfile is not None: filelist.append(fileclass.tsfile)
    elif lcvdp and fileclass.cvdpfile is not None: filelist.append(fileclass.cvdpfile)
    elif ldiag and fileclass.diagfile is not None: filelist.append(fileclass.diagfile)
    typelist.append(filetype)
    atts.update(fileclass.atts) 
  # figure out ignore list  
  if ignore_list is None: ignore_list = set(ignore_list_2D)
  elif isinstance(ignore_list,(list,tuple)): ignore_list = set(ignore_list)
  elif not isinstance(ignore_list,set): raise TypeError
  if not load3D: ignore_list.update(ignore_list_3D)
  if lautoregrid is None: lautoregrid = not load3D # don't auto-regrid 3D variables - takes too long!
  # translate varlist
  if varatts is not None: atts.update(varatts)
  lSST = False
  if varlist is not None:
    varlist = list(varlist) 
    if 'SST' in varlist: # special handling of name SST variable, as it is part of Ts
      varlist.remove('SST')
      if not 'Ts' in varlist: varlist.append('Ts')
      lSST = True # Ts is renamed to SST below
    if translateVars is None: varlist = list(varlist) + translateVarNames(varlist, atts) # also aff translations, just in case
    elif translateVars is True: varlist = translateVarNames(varlist, atts) 
    # N.B.: DatasetNetCDF does never apply translation!
  # NetCDF file mode
  ncmode = 'rw' if lwrite else 'r'   
  # get grid or station-set name
  if lstation:
    # the station name can be inserted as the grid name
    gridstr = '_'+station.lower(); # only use lower case for filenames
    griddef = None
  elif lshape:
    # the station name can be inserted as the grid name
    gridstr = '_'+shape.lower(); # only use lower case for filenames
    griddef = None
  else:
    if grid is None or grid == experiment.grid: 
      gridstr = ''; griddef = None
    else: 
      gridstr = '_'+grid.lower() # only use lower case for filenames
      griddef = loadPickledGridDef(grid=grid, res=None, filename=None, folder=grid_folder, check=True)
  # insert grid name and period
  filenames = []
  for filetype,fileformat in zip(typelist,filelist):
    if lclim: filename = fileformat.format(gridstr,periodstr) # put together specfic filename for climatology
    elif lts: filename = fileformat.format(gridstr) # or for time-series
    elif lcvdp: filename = fileformat.format(experiment.name if experiment else name,periodstr) # not implemented: gridstr
    elif ldiag: raise NotImplementedError
    else: raise DatasetError
    filenames.append(filename) # append to list (passed to DatasetNetCDF later)
    # check existance
    filepath = '{:s}/{:s}'.format(folder,filename)
    if not os.path.exists(filepath):
      nativename = fileformat.format('',periodstr) # original filename (before regridding)
      nativepath = '{:s}/{:s}'.format(folder,nativename)
      if os.path.exists(nativepath):
        if lautoregrid: 
          from processing.regrid import performRegridding # causes circular reference if imported earlier
          griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder)
          dataargs = dict(experiment=experiment, filetypes=[filetype], period=period)
          print("The '{:s}' (CESM) dataset for the grid ('{:s}') is not available:\n Attempting regridding on-the-fly.".format(name,filename,grid))
          if performRegridding('CESM','climatology' if lclim else 'time-series', griddef, dataargs): # default kwargs
            raise IOError, "Automatic regridding failed!"
          print("Output: '{:s}'".format(name,filename,grid,filepath))            
        else: raise IOError, "The '{:s}' (CESM) dataset '{:s}' for the selected grid ('{:s}') is not available - use the regrid module to generate it.".format(name,filename,grid) 
      else: raise IOError, "The '{:s}' (CESM) dataset file '{:s}' does not exits!\n({:s})".format(name,filename,folder)
   
  # load dataset
  #print varlist, filenames
  if experiment: title = experiment.title
  else: title = name
  dataset = DatasetNetCDF(name=name, folder=folder, filelist=filenames, varlist=varlist, axes=None, 
                          varatts=atts, title=title, multifile=False, ignore_list=ignore_list, 
                          ncformat='NETCDF4', squeeze=True, mode=ncmode, check_vars=check_vars)
  # replace time axis
  if lreplaceTime:
    if lts or lcvdp:
      # check time axis and center at 1979-01 (zero-based)
      if experiment is None: ys = period[0]; ms = 1
      else: ys,ms,ds = [int(t) for t in experiment.begindate.split('-')]; assert ds == 1
      if dataset.hasAxis('time'):
        ts = (ys-1979)*12 + (ms-1); te = ts+len(dataset.time) # month since 1979 (Jan 1979 = 0)
        atts = dict(long_name='Month since 1979-01')
        timeAxis = Axis(name='time', units='month', coord=np.arange(ts,te,1, dtype='int16'), atts=atts)
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
      if dataset.hasAxis('year'):
        ts = ys-1979; te = ts+len(dataset.year) # month since 1979 (Jan 1979 = 0)
        atts = dict(long_name='Years since 1979-01')
        yearAxis = Axis(name='year', units='year', coord=np.arange(ts,te,1, dtype='int16'), atts=atts)
        dataset.replaceAxis(dataset.year, yearAxis, asNC=False, deepcopy=False)
    elif lclim:
      if dataset.hasAxis('time') and not dataset.time.units.lower() in monthlyUnitsList:
        atts = dict(long_name='Month of the Year')
        timeAxis = Axis(name='time', units='month', coord=np.arange(1,13, dtype='int16'), atts=atts)
        assert len(dataset.time) == len(timeAxis), dataset.time
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
      elif dataset.hasAxis('year'): raise NotImplementedError, dataset
  # rename SST
  if lSST: dataset['SST'] = dataset.Ts
  # correct ordinal number of shape (should start at 1, not 0)
  if lshape:
    # mask all shapes that are incomplete in dataset
    if lencl and 'shp_encl' in dataset: dataset.mask(mask='shp_encl', invert=True)   
    if dataset.hasAxis('shapes'): raise AxisError, "Axis 'shapes' should be renamed to 'shape'!"
    if not dataset.hasAxis('shape'): raise AxisError
    if dataset.shape.coord[0] == 0: dataset.shape.coord += 1
  # check
  if len(dataset) == 0: raise DatasetError, 'Dataset is empty - check source file or variable list!'
  # add projection, if applicable
  if not ( lstation or lshape ):
    dataset = addGDALtoDataset(dataset, griddef=griddef, gridfolder=grid_folder, lwrap360=True, geolocator=True)
  # return formatted dataset
  return dataset

## Dataset API

dataset_name = 'CMIP5' # dataset name
root_folder # root folder of the dataset
avgfolder # root folder for monthly averages
outfolder # root folder for direct WRF output
ts_file_pattern = 'cmip5{0:s}{1:s}_monthly.nc' # filename pattern: filetype, grid
clim_file_pattern = 'cmip5{0:s}{1:s}_clim{2:s}.nc' # filename pattern: filetype, grid, period
data_folder = root_folder # folder for user data
grid_def = {'':None} # there are too many... 
grid_res = {'':1.} # approximate grid resolution at 45 degrees latitude
default_grid = None 
# functions to access specific datasets
loadLongTermMean = None # WRF doesn't have that...
loadClimatology = loadCESM # pre-processed, standardized climatology
loadTimeSeries = loadCESM_TS # time-series data
#loadStationClimatology = loadCESM_Stn # pre-processed, standardized climatology at stations
#loadStationTimeSeries = loadCESM_StnTS # time-series data at stations
#loadShapeClimatology = loadCESM_Shp # climatologies without associated grid (e.g. provinces or basins) 
#loadShapeTimeSeries = loadCESM_ShpTS # time-series without associated grid (e.g. provinces or basins)


## (ab)use main execution for quick test
if __name__ == '__main__':
  
  # set mode/parameters
#   mode = 'test_climatology'
#   mode = 'test_timeseries'
#   mode = 'test_ensemble'
#   mode = 'test_point_climatology'
#   mode = 'test_point_timeseries'
#   mode = 'test_point_ensemble'
#   mode = 'test_cvdp'
  mode = 'pickle_grid'
#     mode = 'shift_lon'
#   experiments = ['Ctrl-1', 'Ctrl-A', 'Ctrl-B', 'Ctrl-C']
#   experiments += ['Ctrl-2050', 'Ctrl-A-2050', 'Ctrl-B-2050', 'Ctrl-C-2050']
  experiments = ('Ctrl-1',)
  periods = (15,)
  filetypes = ('atm',) # ['atm','lnd','ice']
  grids = ('cesm1x1',)*len(experiments) # grb1_d01
#   pntset = 'shpavg'
  pntset = 'ecprecip'

  from projects.CESM_experiments import Exp, CESM_exps, ensembles
  # N.B.: importing Exp through CESM_experiments is necessary, otherwise some isinstance() calls fail

  # pickle grid definition
  if mode == 'pickle_grid':
    
    for grid,experiment in zip(grids,experiments):
      
      print('')
      print('   ***   Pickling Grid Definition for {0:s}   ***   '.format(grid))
      print('')
      
      # load GridDefinition
      dataset = loadCESM(experiment=CESM_exps[experiment], grid=None, filetypes=['lnd'], period=(1979,1989))
      griddef = dataset.griddef
      #del griddef.xlon, griddef.ylat      
      print griddef
      griddef.name = grid
      print('   Loading Definition from \'{0:s}\''.format(dataset.name))
      # save pickle
      filename = '{0:s}/{1:s}'.format(grid_folder,griddef_pickle.format(grid))
      if os.path.exists(filename): os.remove(filename) # overwrite
      filehandle = open(filename, 'w')
      pickle.dump(griddef, filehandle)
      filehandle.close()
      
      print('   Saving Pickle to \'{0:s}\''.format(filename))
      print('')
      
      # load pickle to make sure it is right
      del griddef
      griddef = loadPickledGridDef(grid, res=None, folder=grid_folder)
      print(griddef)
      print('')
      print griddef.wrap360
      
    grid_name = 'son2'
    ##
    #     project = 'SNW'
    #     grid_name  = 'snw2'
    ## operational config for ASB2
    #     project = 'ASB'
    #     grid_name  = 'asb2'

    ## define target grid/projection
    # projection/UTM zone
    tgt_size = None
    tgt_geotrans = None  # valid for native grid
    if project == 'WRF':
        # load pickled GridDef
        from geodata.gdal import loadPickledGridDef
        griddef = loadPickledGridDef(grid=grid_name, encoding='latin1')
        print(griddef)
        tgt_crs = genCRS(griddef.projection.ExportToProj4(), name=grid_name)
        tgt_geotrans = griddef.geotransform
        tgt_size = griddef.size
    elif project == 'Geo':
        # generic geographic lat/lon
        tgt_crs = genCRS(name=grid_name)
    elif project == 'ARB':
        # Projection for ARB model
        tgt_crs = genCRS(
            '+proj=laea +lat_0=45 +lon_0=-100 +x_0=0 +y_0=0 +ellps=sphere +units=m +no_defs',
            name=grid_name)
    elif project == 'Hugo':
        # Hugo's projection for Quebec
        tgt_crs = genCRS(
Example #12
0
def loadXArray(varname=None, varlist=None, folder=None, grid=None, bias_correction=None, resolution=None, varatts=None, 
               filename_pattern=None, default_varlist=None, resampling=None, mask_and_scale=True, varmap=None,
               lgeoref=True, geoargs=None, chunks=None, lautoChunk=False, lskip=False, **kwargs):
    ''' function to open a dataset where variables are stored in separate files and non-native grids are stored in subfolders;
        this mainly applies to high-resolution, high-frequency (daily) observations (e.g. SnoDAS); datasets are opened using xarray '''
    if grid: 
        folder = '{}/{}'.format(folder,grid) # non-native grids are stored in sub-folders
        # auto-detect resampling folders 
        if resampling is None:
            old_folder = os.getcwd()
            os.chdir(folder)
            # inspect folder
            nc_file = False; default_folder = False; folder_list = []
            for item in os.listdir():
                if os.path.isfile(item):
                    if item.endswith('.nc'): nc_file = True
                elif os.path.isdir(item):
                    if item.lower() == 'default': default_folder = item
                    folder_list.append(item)
                else:
                    raise IOError(item)
            os.chdir(old_folder) # return
            # evaluate findings
            if nc_file: resampling = None
            elif default_folder: resampling = default_folder
            elif len(folder_list) == 1: resampling = folder_list[0]
        if resampling: 
            folder = '{}/{}'.format(folder,resampling) # different resampling options are stored in subfolders
            #             
    # load variables
    if bias_correction is None and 'resolution' in kwargs: bias_correction = kwargs['resolution'] # allow backdoor
    if varname and varlist: raise ValueError(varname,varlist)
    elif varname:
        varlist = [varname] # load a single variable
    elif varlist is None:
        varlist = default_varlist
    # apply varmap in reverse to varlist
    if varmap is None and varatts is not None:
        varmap = {name:atts.get('name',name) for name,atts in varatts.items()}
    if varmap is not None:
        ravmap = {value:key for key,value in varmap.items()}
        varlist = [ravmap.get(varname,varname) for varname in varlist]
    # construct dataset
    xds = None
    for varname in varlist:
        if grid: varname = '{}_{}'.format(varname,grid) # also append non-native grid name to varname
        if bias_correction: varname = '{}_{}'.format(bias_correction,varname) # prepend bias correction method
        filename = filename_pattern.format(VAR=varname, RES=resolution).lower()
        filepath = '{}/{}'.format(folder,filename)
        if os.path.exists(filepath):
            # load dataset
            ds = xr.open_dataset(filepath, chunks=chunks, mask_and_scale=mask_and_scale, **kwargs)
            # N.B.: the use of open_mfdataset is problematic, because it does not play nicely with chunking - 
            #       by default it loads everything as one chunk, and it only respects chunking, if chunks are 
            #       specified explicitly at the initial load time (later chunking seems to have no effect!)
            # merge into new dataset
            if xds is None: xds = ds
            else: xds.update(ds)
        elif not lskip:
            raise IOError("The dataset file '{}' was not found in folder:\n '{}'".format(filename,folder))
    # rewrite chunking, if desired (this happens here, so we can infer chunking from dimension sizes)
    if lautoChunk:
        xds = autoChunkXArray(xds, chunks=chunks)
    # rename and apply attributes
    if varatts or varmap:
        xds = updateVariableAttrs(xds, varatts=varatts, varmap=varmap)
    # add projection info
    if lgeoref:
        if geoargs is not None:
            # check
            if 'proj4' in xds.attrs and 'proj4' in geoargs:
                if xds.attrs['proj4'] != geoargs['proj4']:
                    raise ValueError(xds.attrs['proj4'])
            # custom options 
            xds = addGeoReference(xds, **geoargs)
        # default options            
        elif 'proj4' in xds.attrs: 
            # read projection string
            xds = addGeoReference(xds, proj4_string=xds.attrs['proj4'])
        elif grid:
            # load griddef from pickle
            from geodata.gdal import loadPickledGridDef
            griddef = loadPickledGridDef(grid=grid)
            xds = addGeoReference(xds, proj4_string=griddef.projection.ExportToProj4(),) 
        else: 
            # use default lat/lon, if it works...
            xds = addGeoReference(xds,) 
    return xds