Beispiel #1
0
def loadCRU_TS(name=dataset_name, grid=None, varlist=None, resolution=None, varatts=None, filelist=None, 
               folder=None, lautoregrid=None):
  ''' Get a properly formatted  CRU dataset with monthly mean time-series. '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # translate varlist
    if varatts is None: varatts = tsvaratts.copy()
    if varlist is None: varlist = varatts.keys()
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    # assemble filelist
    if filelist is None: # generate default filelist
      filelist = [orig_ts_file.format(var) for var in varlist if var not in nofile]
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, 
                            multifile=False, ncformat='NETCDF4_CLASSIC')
    # replace time axis with number of month since Jan 1979 
    data = np.arange(0,len(dataset.time),1, dtype='int16') + (1901-1979)*12 # month since 1979 (Jan 1979 = 0)
    timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01'))
    dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
    # add projection  
    dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder)
    # N.B.: projection should be auto-detected as geographic    
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    dataset = loadObservations(name=name, folder=folder, projection=None, resolution=None, grid=grid, 
                               period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, 
                               filelist=filelist, lautoregrid=lautoregrid, mode='time-series')
  # return formatted dataset
  return dataset
Beispiel #2
0
def loadGPCC_TS(name=dataset_name, grid=None, varlist=None, resolution='25', varatts=None, filelist=None, 
                folder=None, lautoregrid=None):
  ''' Get a properly formatted dataset with the monthly GPCC time-series. '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # prepare input  
    if resolution not in ('05', '10', '25'): raise DatasetError, "Selected resolution '%s' is not available!"%resolution
    # translate varlist
    if varatts is None: varatts = tsvaratts.copy()
    if varlist is None: varlist = varatts.keys()
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    if filelist is None: # generate default filelist
      filelist = []
      if 'p' in varlist: filelist.append(orig_ts_file.format('precip',resolution))
      if 's' in varlist: filelist.append(orig_ts_file.format('statio',resolution))
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, multifile=False, ncformat='NETCDF4_CLASSIC')
    # replace time axis with number of month since Jan 1979 
    data = np.arange(0,len(dataset.time),1, dtype='int16') + (1901-1979)*12 # month since 1979 (Jan 1979 = 0)
    timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01'))
    dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
    # add GDAL info
    dataset = addGDALtoDataset(dataset, projection=None, geotransform=None)
    # N.B.: projection should be auto-detected as geographic
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    grid, resolution = checkGridRes(grid, resolution, period=None, lclim=False)
    dataset = loadObservations(name=name, folder=folder, projection=None, resolution=resolution, grid=grid, 
                               period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, 
                               filelist=filelist, lautoregrid=lautoregrid, mode='time-series')
  # return formatted dataset
  return dataset
Beispiel #3
0
def loadNARR_TS(name=dataset_name, grid=None, varlist=None, resolution=None, varatts=None, filelist=None, 
               folder=None, lautoregrid=None):
  ''' Get a properly formatted NARR dataset with monthly mean time-series. '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # translate varlist
    if varatts is None: varatts = tsvaratts.copy()
    if varlist is None: varlist = tsvarlist
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    if filelist is None: # generate default filelist
      filelist = [orig_ts_file.format(special[var]) if var in special else orig_ts_file.format(var) for var in varlist 
                  if var not in nofile and var in varatts]
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, 
                            atts=projdict, multifile=False, ncformat='NETCDF4_CLASSIC')
    # replace time axis with number of month since Jan 1979 
    data = np.arange(0,len(dataset.time),1, dtype='int16') # month since 1979 (Jan 1979 = 0)
    timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01'))
    dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
    # add projection
    projection = getProjFromDict(projdict, name='{0:s} Coordinate System'.format(name))
    dataset = addGDALtoDataset(dataset, projection=projection, geotransform=None, gridfolder=grid_folder)
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    dataset = loadObservations(name=name, folder=folder, projection=None, resolution=None, grid=grid, 
                               period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, 
                               filelist=filelist, lautoregrid=lautoregrid, mode='time-series')
  # return formatted dataset
  return dataset
Beispiel #4
0
def loadGPCC_LTM(
    name=dataset_name, varlist=None, resolution="025", varatts=ltmvaratts, filelist=None, folder=ltmfolder
):
    """ Get a properly formatted dataset the monthly accumulated GPCC precipitation climatology. """
    # prepare input
    if resolution not in ("025", "05", "10", "25"):
        raise DatasetError, "Selected resolution '%s' is not available!" % resolution
    # translate varlist
    if varlist is None:
        varlist = varatts.keys()
    if varlist and varatts:
        varlist = translateVarNames(varlist, varatts)
    # load variables separately
    if "p" in varlist:
        dataset = DatasetNetCDF(
            name=name,
            folder=folder,
            filelist=["normals_v2011_%s.nc" % resolution],
            varlist=["p"],
            varatts=varatts,
            ncformat="NETCDF4_CLASSIC",
        )
    if "s" in varlist:
        gauges = nc.Dataset(folder + "normals_gauges_v2011_%s.nc" % resolution, mode="r", format="NETCDF4_CLASSIC")
        stations = Variable(data=gauges.variables["p"][0, :, :], axes=(dataset.lat, dataset.lon), **varatts["s"])
        # consolidate dataset
        dataset.addVariable(stations, asNC=False, copy=True)
    dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder)
    # N.B.: projection should be auto-detected as geographic
    # return formatted dataset
    return dataset
Beispiel #5
0
def loadNARR_LTM(name=dataset_name, varlist=None, grid=None, interval='monthly', varatts=None, filelist=None, folder=ltmfolder):
  ''' Get a properly formatted dataset of daily or monthly NARR climatologies (LTM). '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # prepare input
    if varatts is None: varatts = ltmvaratts.copy()
    if varlist is None: varlist = ltmvarlist
    if interval == 'monthly': 
      pfx = '.mon.ltm.nc'; tlen = 12
    elif interval == 'daily': 
      pfx = '.day.ltm.nc'; tlen = 365
    else: raise DatasetError, "Selected interval '%s' is not supported!"%interval
    # translate varlist
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)  
    # axes dictionary, primarily to override time axis 
    axes = dict(time=Axis(name='time',units='day',coord=(1,tlen,tlen)),load=True)
    if filelist is None: # generate default filelist
      filelist = [special[var]+pfx if var in special else var+pfx for var in varlist if var not in nofile]
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, 
                            axes=axes, atts=projdict, multifile=False, ncformat='NETCDF4_CLASSIC')
    # add projection
    projection = getProjFromDict(projdict, name='{0:s} Coordinate System'.format(name))
    dataset = addGDALtoDataset(dataset, projection=projection, geotransform=None, folder=grid_folder)
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    raise NotImplementedError, "Need to implement loading neatly formatted and regridded time-series!"
  # return formatted dataset
  return dataset
Beispiel #6
0
def loadPCIC_LTM(name=dataset_name,
                 varlist=None,
                 varatts=ltmvaratts,
                 filelist=None,
                 folder=ltmfolder):
    ''' Get a properly formatted dataset the monthly PCIC PRISM climatology. '''
    # translate varlist
    if varlist is None: varlist = varatts.keys()
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    # generate file list
    filelist = [
        ltmfile.format(var) for var in varlist
        if var not in ('time', 'lat', 'lon')
    ]
    # load variables separately
    dataset = DatasetNetCDF(name=name,
                            folder=folder,
                            filelist=filelist,
                            varlist=varlist,
                            varatts=varatts,
                            ncformat='NETCDF4')
    dataset = addGDALtoDataset(dataset,
                               projection=None,
                               geotransform=None,
                               gridfolder=grid_folder)
    # N.B.: projection should be auto-detected as geographic
    # return formatted dataset
    return dataset
Beispiel #7
0
def loadGPCC_LTM(name=dataset_name,
                 varlist=None,
                 resolution='025',
                 varatts=ltmvaratts,
                 filelist=None,
                 folder=ltmfolder):
    ''' Get a properly formatted dataset the monthly accumulated GPCC precipitation climatology. '''
    # prepare input
    if resolution not in ('025', '05', '10', '25'):
        raise DatasetError, "Selected resolution '%s' is not available!" % resolution
    # translate varlist
    if varlist is None: varlist = varatts.keys()
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    # load variables separately
    if 'p' in varlist:
        dataset = DatasetNetCDF(name=name,
                                folder=folder,
                                filelist=['normals_v2011_%s.nc' % resolution],
                                varlist=['p'],
                                varatts=varatts,
                                ncformat='NETCDF4_CLASSIC')
    if 's' in varlist:
        gauges = nc.Dataset(folder + 'normals_gauges_v2011_%s.nc' % resolution,
                            mode='r',
                            format='NETCDF4_CLASSIC')
        stations = Variable(data=gauges.variables['p'][0, :, :],
                            axes=(dataset.lat, dataset.lon),
                            **varatts['s'])
        # consolidate dataset
        dataset.addVariable(stations, asNC=False, copy=True)
    dataset = addGDALtoDataset(dataset,
                               projection=None,
                               geotransform=None,
                               gridfolder=grid_folder)
    # N.B.: projection should be auto-detected as geographic
    # return formatted dataset
    return dataset
Beispiel #8
0
def loadCFSR_TS(name=dataset_name, grid=None, varlist=None, varatts=None, resolution='hires', 
                filelist=None, folder=None, lautoregrid=None):
  ''' Get a properly formatted CFSR dataset with monthly mean time-series. '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # translate varlist
    if varatts is None: varatts = tsvaratts.copy()
    if varlist is None:
      if resolution == 'hires' or resolution == '03' or resolution == '031': varlist = varlist_hires
      elif resolution == 'lowres' or resolution == '05': varlist = varlist_lowres     
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    if filelist is None: # generate default filelist
      if resolution == 'hires' or resolution == '03' or resolution == '031': 
        files = [hiresfiles[var] for var in varlist if var in hiresfiles]
      elif resolution == 'lowres' or resolution == '05': 
        files = [lowresfiles[var] for var in varlist if var in lowresfiles]
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=files, varlist=varlist, varatts=varatts, 
                            check_override=['time'], multifile=False, ncformat='NETCDF4_CLASSIC')
    # load static data
    if filelist is None: # generate default filelist
      if resolution == 'hires' or resolution == '03' or resolution == '031': 
        files = [hiresstatic[var] for var in varlist if var in hiresstatic]
      elif resolution == 'lowres' or resolution == '05': 
        files = [lowresstatic[var] for var in varlist if var in lowresstatic]
      # load constants, if any (and with singleton time axis)
      if len(files) > 0:
        staticdata = DatasetNetCDF(name=name, folder=folder, filelist=files, varlist=varlist, varatts=varatts, 
                                   axes=dict(lon=dataset.lon, lat=dataset.lat), multifile=False, 
                                   check_override=['time'], ncformat='NETCDF4_CLASSIC')
        # N.B.: need to override the axes, so that the datasets are consistent
        if len(staticdata.variables) > 0:
          for var in staticdata.variables.values(): 
            if not dataset.hasVariable(var.name):
              var.squeeze() # remove time dimension
              dataset.addVariable(var, copy=False) # no need to copy... but we can't write to the netcdf file!
    # replace time axis with number of month since Jan 1979 
    data = np.arange(0,len(dataset.time),1, dtype='int16') # month since 1979 (Jan 1979 = 0)
    timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01'))
    dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
    # add projection  
    dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder)
    # N.B.: projection should be auto-detected as geographic
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    grid, resolution = checkGridRes(grid, resolution)
    dataset = loadObservations(name=name, folder=folder, projection=None, resolution=resolution, grid=grid, 
                               period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, 
                               filelist=filelist, lautoregrid=lautoregrid, mode='time-series')
  # return formatted dataset
  return dataset
Beispiel #9
0
                  
    elif mode == 'average_timeseries':   
      
      # load source
      periodstr = '{0:4d}-{1:4d}'.format(*period)
      print('\n')
      print('   ***   Processing Resolution %s from %s   ***   '%(res,periodstr))
      print('\n')
      source = loadCFSR_TS(resolution=res)
      print(source)
      print('\n')
      # prepare sink
      filename = avgfile.format('_'+res,'_'+periodstr)
      if os.path.exists(avgfolder+filename): os.remove(avgfolder+filename)
      sink = DatasetNetCDF(name='CFSR Climatology', folder=avgfolder, filelist=[filename], atts=source.atts, mode='w')
      sink.atts.period = periodstr 
      
      # determine averaging interval
      offset = source.time.getIndex(period[0]-1979)/12 # origin of monthly time-series is at January 1979 
      # initialize processing
      CPU = CentralProcessingUnit(source, sink, tmp=True)
      
      # start processing climatology
      CPU.Climatology(period=period[1]-period[0], offset=offset, flush=False)
      
      # shift longitude axis by 180 degrees left (i.e. 0 - 360 -> -180 - 180)
      CPU.Shift(lon=-180, flush=False)
      
      # sync temporary storage with output (sink variable; do not flush!)
      CPU.sync(flush=False)
Beispiel #10
0
def performRegridding(dataset, mode, griddef, dataargs, loverwrite=False, varlist=None, lwrite=True, 
                      lreturn=False, ldebug=False, lparallel=False, pidstr='', logger=None):
  ''' worker function to perform regridding for a given dataset and target grid '''
  # input checking
  if not isinstance(dataset,basestring): raise TypeError
  if not isinstance(dataargs,dict): raise TypeError # all dataset arguments are kwargs 
  if not isinstance(griddef,GridDefinition): raise TypeError
  if lparallel: 
    if not lwrite: raise IOError, 'Can only write to disk in parallel mode (i.e. lwrite = True).'
    if lreturn: raise IOError, 'Can not return datasets in parallel mode (i.e. lreturn = False).'
  
  # logging
  if logger is None: # make new logger     
    logger = logging.getLogger() # new logger
    logger.addHandler(logging.StreamHandler())
  else:
    if isinstance(logger,basestring): 
      logger = logging.getLogger(name=logger) # connect to existing one
    elif not isinstance(logger,logging.Logger): 
      raise TypeError, 'Expected logger ID/handle in logger KW; got {}'.format(str(logger))

  ## extract meta data from arguments
  dataargs, loadfct, srcage, datamsgstr = getMetaData(dataset, mode, dataargs)
  dataset_name = dataargs.dataset_name; periodstr = dataargs.periodstr; avgfolder = dataargs.avgfolder

  # get filename for target dataset and do some checks
  filename = getTargetFile(dataset=dataset, mode=mode, dataargs=dataargs, lwrite=lwrite, 
                           grid=griddef.name.lower(), period=None, filetype=None) 
    
  # prepare target dataset
  if ldebug: filename = 'test_' + filename
  if not os.path.exists(avgfolder): raise IOError, "Dataset folder '{:s}' does not exist!".format(avgfolder)
  lskip = False # else just go ahead
  if lwrite:
    if lreturn: tmpfilename = filename # no temporary file if dataset is passed on (can't rename the file while it is open!)
    else: 
      if lparallel: tmppfx = 'tmp_regrid_{:s}_'.format(pidstr[1:-1])
      else: tmppfx = 'tmp_regrid_'.format(pidstr[1:-1])
      tmpfilename = tmppfx + filename      
    filepath = avgfolder + filename
    tmpfilepath = avgfolder + tmpfilename
    if os.path.exists(filepath): 
      if not loverwrite: 
        age = datetime.fromtimestamp(os.path.getmtime(filepath))
        # if source file is newer than sink file or if sink file is a stub, recompute, otherwise skip
        if age > srcage and os.path.getsize(filepath) > 1e6: 
          lskip = True
          if hasattr(griddef, 'filepath') and griddef.filepath is not None:
            gridage = datetime.fromtimestamp(os.path.getmtime(griddef.filepath))
            if age < gridage: lskip = False
        # N.B.: NetCDF files smaller than 1MB are usually incomplete header fragments from a previous crashed
      if not lskip: os.remove(filepath) # recompute
  
  # depending on last modification time of file or overwrite setting, start computation, or skip
  if lskip:        
    # print message
    skipmsg =  "\n{:s}   >>>   Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format(pidstr,filename,dataset_name)
    skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr,filepath)
    logger.info(skipmsg)              
  else:
          
    ## actually load datasets
    source = loadfct() # load source 
    # check period
    if 'period' in source.atts and dataargs.periodstr != source.atts.period: # a NetCDF attribute
      raise DateError, "Specifed period is inconsistent with netcdf records: '{:s}' != '{:s}'".format(periodstr,source.atts.period)

    # print message
    if mode == 'climatology': opmsgstr = 'Regridding Climatology ({:s}) to {:s} Grid'.format(periodstr, griddef.name)
    elif mode == 'time-series': opmsgstr = 'Regridding Time-series to {:s} Grid'.format(griddef.name)
    else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)        
    # print feedback to logger
    logger.info('\n{0:s}   ***   {1:^65s}   ***   \n{0:s}   ***   {2:^65s}   ***   \n'.format(pidstr,datamsgstr,opmsgstr))
    if not lparallel and ldebug: logger.info('\n'+str(source)+'\n')
    
    ## create new sink/target file
    # set attributes   
    atts=source.atts.copy()
    atts['period'] = periodstr; atts['name'] = dataset_name; atts['grid'] = griddef.name
    if mode == 'climatology': atts['title'] = '{:s} Climatology on {:s} Grid'.format(dataset_name, griddef.name)
    elif mode == 'time-series':  atts['title'] = '{:s} Time-series on {:s} Grid'.format(dataset_name, griddef.name)
      
    # make new dataset
    if lwrite: # write to NetCDF file 
      if os.path.exists(tmpfilepath): os.remove(tmpfilepath) # remove old temp files 
      sink = DatasetNetCDF(folder=avgfolder, filelist=[tmpfilename], atts=atts, mode='w')
    else: sink = Dataset(atts=atts) # ony create dataset in memory
    
    # initialize processing
    CPU = CentralProcessingUnit(source, sink, varlist=varlist, tmp=False, feedback=ldebug)
  
    # perform regridding (if target grid is different from native grid!)
    if griddef.name != dataset:
      # reproject and resample (regrid) dataset
      CPU.Regrid(griddef=griddef, flush=True)

    # get results    
    CPU.sync(flush=True)
    
    # add geolocators
    sink = addGeoLocator(sink, griddef=griddef, lgdal=True, lreplace=True, lcheck=True)
    # N.B.: WRF datasets come with their own geolocator arrays - we need to replace those!
    
    # add length and names of month
    if mode == 'climatology' and not sink.hasVariable('length_of_month') and sink.hasVariable('time'): 
      addLengthAndNamesOfMonth(sink, noleap=True if dataset.upper() in ('WRF','CESM') else False) 
    
    # print dataset
    if not lparallel and ldebug:
      logger.info('\n'+str(sink)+'\n')   
    # write results to file
    if lwrite:
      sink.sync()
      writemsg =  "\n{:s}   >>>   Writing to file '{:s}' in dataset {:s}".format(pidstr,filename,dataset_name)
      writemsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr,filepath)
      logger.info(writemsg)      
      
      # rename file to proper name
      if not lreturn:
        sink.unload(); sink.close(); del sink # destroy all references 
        if os.path.exists(filepath): os.remove(filepath) # remove old file
        os.rename(tmpfilepath,filepath) # this would also overwrite the old file...
      # N.B.: there is no temporary file if the dataset is returned, because an open file can't be renamed
        
    # clean up and return
    source.unload(); del source, CPU
    if lreturn:      
      return sink # return dataset for further use (netcdf file still open!)
    else:            
      return 0 # "exit code"
Beispiel #11
0
def loadCRU_TS(name=dataset_name,
               grid=None,
               varlist=None,
               resolution=None,
               varatts=None,
               filelist=None,
               folder=None,
               lautoregrid=None):
    ''' Get a properly formatted  CRU dataset with monthly mean time-series. '''
    if grid is None:
        # load from original time-series files
        if folder is None: folder = orig_ts_folder
        # translate varlist
        if varatts is None: varatts = tsvaratts.copy()
        if varlist is None: varlist = varatts.keys()
        if varlist and varatts: varlist = translateVarNames(varlist, varatts)
        # assemble filelist
        if filelist is None:  # generate default filelist
            filelist = [
                orig_ts_file.format(var) for var in varlist
                if var not in nofile
            ]
        # load dataset
        dataset = DatasetNetCDF(name=name,
                                folder=folder,
                                filelist=filelist,
                                varlist=varlist,
                                varatts=varatts,
                                multifile=False,
                                ncformat='NETCDF4_CLASSIC')
        # replace time axis with number of month since Jan 1979
        data = np.arange(0, len(dataset.time), 1, dtype='int16') + (
            1901 - 1979) * 12  # month since 1979 (Jan 1979 = 0)
        timeAxis = Axis(name='time',
                        units='month',
                        coord=data,
                        atts=dict(long_name='Month since 1979-01'))
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
        # add projection
        dataset = addGDALtoDataset(dataset,
                                   projection=None,
                                   geotransform=None,
                                   gridfolder=grid_folder)
        # N.B.: projection should be auto-detected as geographic
    else:
        # load from neatly formatted and regridded time-series files
        if folder is None: folder = avgfolder
        dataset = loadObservations(name=name,
                                   folder=folder,
                                   projection=None,
                                   resolution=None,
                                   grid=grid,
                                   period=None,
                                   varlist=varlist,
                                   varatts=varatts,
                                   filepattern=tsfile,
                                   filelist=filelist,
                                   lautoregrid=lautoregrid,
                                   mode='time-series')
    # return formatted dataset
    return dataset
Beispiel #12
0
def loadGPCC_TS(name=dataset_name,
                grid=None,
                varlist=None,
                resolution='25',
                varatts=None,
                filelist=None,
                folder=None,
                lautoregrid=None):
    ''' Get a properly formatted dataset with the monthly GPCC time-series. '''
    if grid is None:
        # load from original time-series files
        if folder is None: folder = orig_ts_folder
        # prepare input
        if resolution not in ('05', '10', '25'):
            raise DatasetError, "Selected resolution '%s' is not available!" % resolution
        # translate varlist
        if varatts is None: varatts = tsvaratts.copy()
        if varlist is None: varlist = varatts.keys()
        if varlist and varatts: varlist = translateVarNames(varlist, varatts)
        if filelist is None:  # generate default filelist
            filelist = []
            if 'p' in varlist:
                filelist.append(orig_ts_file.format('precip', resolution))
            if 's' in varlist:
                filelist.append(orig_ts_file.format('statio', resolution))
        # load dataset
        dataset = DatasetNetCDF(name=name,
                                folder=folder,
                                filelist=filelist,
                                varlist=varlist,
                                varatts=varatts,
                                multifile=False,
                                ncformat='NETCDF4_CLASSIC')
        # replace time axis with number of month since Jan 1979
        data = np.arange(0, len(dataset.time), 1, dtype='int16') + (
            1901 - 1979) * 12  # month since 1979 (Jan 1979 = 0)
        timeAxis = Axis(name='time',
                        units='month',
                        coord=data,
                        atts=dict(long_name='Month since 1979-01'))
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
        # add GDAL info
        dataset = addGDALtoDataset(dataset, projection=None, geotransform=None)
        # N.B.: projection should be auto-detected as geographic
    else:
        # load from neatly formatted and regridded time-series files
        if folder is None: folder = avgfolder
        grid, resolution = checkGridRes(grid,
                                        resolution,
                                        period=None,
                                        lclim=False)
        dataset = loadObservations(name=name,
                                   folder=folder,
                                   projection=None,
                                   resolution=resolution,
                                   grid=grid,
                                   period=None,
                                   varlist=varlist,
                                   varatts=varatts,
                                   filepattern=tsfile,
                                   filelist=filelist,
                                   lautoregrid=lautoregrid,
                                   mode='time-series')
    # return formatted dataset
    return dataset
Beispiel #13
0
def performRegridding(dataset,
                      mode,
                      griddef,
                      dataargs,
                      loverwrite=False,
                      varlist=None,
                      lwrite=True,
                      lreturn=False,
                      ldebug=False,
                      lparallel=False,
                      pidstr='',
                      logger=None):
    ''' worker function to perform regridding for a given dataset and target grid '''
    # input checking
    if not isinstance(dataset, basestring): raise TypeError
    if not isinstance(dataargs, dict):
        raise TypeError  # all dataset arguments are kwargs
    if not isinstance(griddef, GridDefinition): raise TypeError
    if lparallel:
        if not lwrite:
            raise IOError, 'Can only write to disk in parallel mode (i.e. lwrite = True).'
        if lreturn:
            raise IOError, 'Can not return datasets in parallel mode (i.e. lreturn = False).'

    # logging
    if logger is None:  # make new logger
        logger = logging.getLogger()  # new logger
        logger.addHandler(logging.StreamHandler())
    else:
        if isinstance(logger, basestring):
            logger = logging.getLogger(name=logger)  # connect to existing one
        elif not isinstance(logger, logging.Logger):
            raise TypeError, 'Expected logger ID/handle in logger KW; got {}'.format(
                str(logger))

    ## extract meta data from arguments
    dataargs, loadfct, srcage, datamsgstr = getMetaData(
        dataset, mode, dataargs)
    dataset_name = dataargs.dataset_name
    periodstr = dataargs.periodstr
    avgfolder = dataargs.avgfolder

    # get filename for target dataset and do some checks
    filename = getTargetFile(
        dataset=dataset,
        mode=mode,
        dataargs=dataargs,
        lwrite=lwrite,
        grid=griddef.name.lower(),
    )

    # prepare target dataset
    if ldebug: filename = 'test_' + filename
    if not os.path.exists(avgfolder):
        raise IOError, "Dataset folder '{:s}' does not exist!".format(
            avgfolder)
    lskip = False  # else just go ahead
    if lwrite:
        if lreturn:
            tmpfilename = filename  # no temporary file if dataset is passed on (can't rename the file while it is open!)
        else:
            if lparallel: tmppfx = 'tmp_regrid_{:s}_'.format(pidstr[1:-1])
            else: tmppfx = 'tmp_regrid_'.format(pidstr[1:-1])
            tmpfilename = tmppfx + filename
        filepath = avgfolder + filename
        tmpfilepath = avgfolder + tmpfilename
        if os.path.exists(filepath):
            if not loverwrite:
                age = datetime.fromtimestamp(os.path.getmtime(filepath))
                # if source file is newer than sink file or if sink file is a stub, recompute, otherwise skip
                if age > srcage and os.path.getsize(filepath) > 1e6:
                    lskip = True
                    if hasattr(griddef,
                               'filepath') and griddef.filepath is not None:
                        gridage = datetime.fromtimestamp(
                            os.path.getmtime(griddef.filepath))
                        if age < gridage: lskip = False
                # N.B.: NetCDF files smaller than 1MB are usually incomplete header fragments from a previous crashed

    # depending on last modification time of file or overwrite setting, start computation, or skip
    if lskip:
        # print message
        skipmsg = "\n{:s}   >>>   Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format(
            pidstr, filename, dataset_name)
        skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr, filepath)
        logger.info(skipmsg)
    else:

        ## actually load datasets
        source = loadfct()  # load source
        # check period
        if 'period' in source.atts and dataargs.periodstr != source.atts.period:  # a NetCDF attribute
            raise DateError, "Specifed period is inconsistent with netcdf records: '{:s}' != '{:s}'".format(
                periodstr, source.atts.period)

        # print message
        if mode == 'climatology':
            opmsgstr = 'Regridding Climatology ({:s}) to {:s} Grid'.format(
                periodstr, griddef.name)
        elif mode == 'time-series':
            opmsgstr = 'Regridding Time-series to {:s} Grid'.format(
                griddef.name)
        else:
            raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)
        # print feedback to logger
        logger.info(
            '\n{0:s}   ***   {1:^65s}   ***   \n{0:s}   ***   {2:^65s}   ***   \n'
            .format(pidstr, datamsgstr, opmsgstr))
        if not lparallel and ldebug: logger.info('\n' + str(source) + '\n')

        ## create new sink/target file
        # set attributes
        atts = source.atts.copy()
        atts['period'] = periodstr
        atts['name'] = dataset_name
        atts['grid'] = griddef.name
        if mode == 'climatology':
            atts['title'] = '{:s} Climatology on {:s} Grid'.format(
                dataset_name, griddef.name)
        elif mode == 'time-series':
            atts['title'] = '{:s} Time-series on {:s} Grid'.format(
                dataset_name, griddef.name)

        # make new dataset
        if lwrite:  # write to NetCDF file
            if os.path.exists(tmpfilepath):
                os.remove(tmpfilepath)  # remove old temp files
            sink = DatasetNetCDF(folder=avgfolder,
                                 filelist=[tmpfilename],
                                 atts=atts,
                                 mode='w')
        else:
            sink = Dataset(atts=atts)  # ony create dataset in memory

        # initialize processing
        CPU = CentralProcessingUnit(source,
                                    sink,
                                    varlist=varlist,
                                    tmp=False,
                                    feedback=ldebug)

        # perform regridding (if target grid is different from native grid!)
        if griddef.name != dataset:
            # reproject and resample (regrid) dataset
            CPU.Regrid(griddef=griddef, flush=True)

        # get results
        CPU.sync(flush=True)

        # add geolocators
        sink = addGeoLocator(sink,
                             griddef=griddef,
                             lgdal=True,
                             lreplace=True,
                             lcheck=True)
        # N.B.: WRF datasets come with their own geolocator arrays - we need to replace those!

        # add length and names of month
        if mode == 'climatology' and not sink.hasVariable(
                'length_of_month') and sink.hasVariable('time'):
            addLengthAndNamesOfMonth(
                sink,
                noleap=True if dataset.upper() in ('WRF', 'CESM') else False)

        # print dataset
        if not lparallel and ldebug:
            logger.info('\n' + str(sink) + '\n')
        # write results to file
        if lwrite:
            sink.sync()
            writemsg = "\n{:s}   >>>   Writing to file '{:s}' in dataset {:s}".format(
                pidstr, filename, dataset_name)
            writemsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr, filepath)
            logger.info(writemsg)

            # rename file to proper name
            if not lreturn:
                sink.unload()
                sink.close()
                del sink  # destroy all references
                if os.path.exists(filepath):
                    os.remove(filepath)  # remove old file
                os.rename(
                    tmpfilepath,
                    filepath)  # this would also overwrite the old file...
            # N.B.: there is no temporary file if the dataset is returned, because an open file can't be renamed

        # clean up and return
        source.unload()
        del source, CPU
        if lreturn:
            return sink  # return dataset for further use (netcdf file still open!)
        else:
            return 0  # "exit code"
Beispiel #14
0
def performExtraction(dataset,
                      mode,
                      stnfct,
                      dataargs,
                      loverwrite=False,
                      varlist=None,
                      lwrite=True,
                      lreturn=False,
                      ldebug=False,
                      lparallel=False,
                      pidstr='',
                      logger=None):
    ''' worker function to extract point data from gridded dataset '''
    # input checking
    if not isinstance(dataset, basestring): raise TypeError
    if not isinstance(dataargs, dict):
        raise TypeError  # all dataset arguments are kwargs
    if not callable(stnfct):
        raise TypeError  # function to load station dataset
    if lparallel:
        if not lwrite:
            raise IOError, 'In parallel mode we can only write to disk (i.e. lwrite = True).'
        if lreturn:
            raise IOError, 'Can not return datasets in parallel mode (i.e. lreturn = False).'

    # logging
    if logger is None:  # make new logger
        logger = logging.getLogger()  # new logger
        logger.addHandler(logging.StreamHandler())
    else:
        if isinstance(logger, basestring):
            logger = logging.getLogger(name=logger)  # connect to existing one
        elif not isinstance(logger, logging.Logger):
            raise TypeError, 'Expected logger ID/handle in logger KW; got {}'.format(
                str(logger))

    lclim = False
    lts = False
    if mode == 'climatology': lclim = True
    elif mode == 'time-series': lts = True
    else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)

    ## extract meta data from arguments
    dataargs, loadfct, srcage, datamsgstr = getMetaData(
        dataset, mode, dataargs)
    dataset_name = dataargs.dataset_name
    periodstr = dataargs.periodstr
    avgfolder = dataargs.avgfolder

    # load template dataset
    stndata = stnfct()  # load station dataset from function
    if not isinstance(stndata, Dataset): raise TypeError
    # N.B.: the loading function is necessary, because DataseNetCDF instances do not pickle well

    # get filename for target dataset and do some checks
    filename = getTargetFile(dataset=dataset,
                             mode=mode,
                             dataargs=dataargs,
                             lwrite=lwrite,
                             station=stndata.name)

    if ldebug: filename = 'test_' + filename
    if not os.path.exists(avgfolder):
        raise IOError, "Dataset folder '{:s}' does not exist!".format(
            avgfolder)
    lskip = False  # else just go ahead
    if lwrite:
        if lreturn:
            tmpfilename = filename  # no temporary file if dataset is passed on (can't rename the file while it is open!)
        else:
            if lparallel: tmppfx = 'tmp_exstns_{:s}_'.format(pidstr[1:-1])
            else: tmppfx = 'tmp_exstns_'.format(pidstr[1:-1])
            tmpfilename = tmppfx + filename
        filepath = avgfolder + filename
        tmpfilepath = avgfolder + tmpfilename
        if os.path.exists(filepath):
            if not loverwrite:
                age = datetime.fromtimestamp(os.path.getmtime(filepath))
                # if source file is newer than sink file or if sink file is a stub, recompute, otherwise skip
                if age > srcage and os.path.getsize(filepath) > 1e5:
                    lskip = True
                # N.B.: NetCDF files smaller than 100kB are usually incomplete header fragments from a previous crashed

    # depending on last modification time of file or overwrite setting, start computation, or skip
    if lskip:
        # print message
        skipmsg = "\n{:s}   >>>   Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format(
            pidstr, filename, dataset_name)
        skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr, filepath)
        logger.info(skipmsg)
    else:

        ## actually load datasets
        source = loadfct()  # load source
        # check period
        if 'period' in source.atts and dataargs.periodstr != source.atts.period:  # a NetCDF attribute
            raise DateError, "Specifed period is inconsistent with netcdf records: '{:s}' != '{:s}'".format(
                periodstr, source.atts.period)

        # print message
        if lclim:
            opmsgstr = "Extracting '{:s}'-type Point Data from Climatology ({:s})".format(
                stndata.name, periodstr)
        elif lts:
            opmsgstr = "Extracting '{:s}'-type Point Data from Time-series".format(
                stndata.name)
        else:
            raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)
        # print feedback to logger
        logger.info(
            '\n{0:s}   ***   {1:^65s}   ***   \n{0:s}   ***   {2:^65s}   ***   \n'
            .format(pidstr, datamsgstr, opmsgstr))
        if not lparallel and ldebug: logger.info('\n' + str(source) + '\n')

        ## create new sink/target file
        # set attributes
        atts = source.atts.copy()
        atts[
            'period'] = dataargs.periodstr if dataargs.periodstr else 'time-series'
        atts['name'] = dataset_name
        atts['station'] = stndata.name
        atts['title'] = '{:s} (Stations) from {:s} {:s}'.format(
            stndata.title, dataset_name, mode.title())
        # make new dataset
        if lwrite:  # write to NetCDF file
            if os.path.exists(tmpfilepath):
                os.remove(tmpfilepath)  # remove old temp files
            sink = DatasetNetCDF(folder=avgfolder,
                                 filelist=[tmpfilename],
                                 atts=atts,
                                 mode='w')
        else:
            sink = Dataset(atts=atts)  # ony create dataset in memory

        # initialize processing
        CPU = CentralProcessingUnit(source,
                                    sink,
                                    varlist=varlist,
                                    tmp=False,
                                    feedback=ldebug)

        # extract data at station locations
        CPU.Extract(template=stndata, flush=True)
        # get results
        CPU.sync(flush=True)

        # print dataset
        if not lparallel and ldebug:
            logger.info('\n' + str(sink) + '\n')
        # write results to file
        if lwrite:
            sink.sync()
            writemsg = "\n{:s}   >>>   Writing to file '{:s}' in dataset {:s}".format(
                pidstr, filename, dataset_name)
            writemsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr, filepath)
            logger.info(writemsg)

            # rename file to proper name
            if not lreturn:
                sink.unload()
                sink.close()
                del sink  # destroy all references
                if os.path.exists(filepath):
                    os.remove(filepath)  # remove old file
                os.rename(tmpfilepath, filepath)
            # N.B.: there is no temporary file if the dataset is returned, because an open file can't be renamed

        # clean up and return
        source.unload()
        del source  #, CPU
        if lreturn:
            return sink  # return dataset for further use (netcdf file still open!)
        else:
            return 0  # "exit code"
Beispiel #15
0
            filename = getFileName(grid=grid_name,
                                   period=None,
                                   name=None,
                                   filepattern=tsfile)
            filepath = avgfolder + filename
            print(' Saving data to: \'{0:s}\'\n'.format(filepath))
            assert os.path.exists(avgfolder)
            if os.path.exists(filepath): os.remove(filepath)  # remove old file
            # set attributes
            atts = dict()  # collect attributes, but add prefixes
            atts = uclim.atts.copy()
            atts['title'] = 'Corrected Time-sries on {:s} Grid'.format(
                grid_name)
            # make new dataset
            sink = DatasetNetCDF(folder=avgfolder,
                                 filelist=[filename],
                                 atts=atts,
                                 mode='w')
            # sync and write data so far
            sink.sync()

            ## correct data (create variables)
            for varname, var in uclim.variables.iteritems():
                print ''
                print varname
                # correct time-series variables
                if var.hasAxis('time'):
                    if varname in CRU_vars:
                        tsvar = cruts[varname]
                        climvar = cruclim[varname]
                        assert tsvar.axisIndex('time') == 1, tsvar
                        assert climvar.axisIndex(
Beispiel #16
0
   
 grid_name = grid
 periodstr = '{0:4d}-{1:4d}'.format(*period)        
 print('\n   ***   Merging Shape-Averaged Time-Series on {:s} Grid  ***   \n'.format(grid,))
 ## prepare target dataset 
 filename = getFileName(grid=grid_name, period=None, name=None, filepattern=tsfile)
 filepath = avgfolder + filename
 print(' Saving data to: \'{0:s}\'\n'.format(filepath))
 assert os.path.exists(avgfolder)
 if os.path.exists(filepath): os.remove(filepath) # remove old file
 # set attributes   
 atts=dict() # collect attributes, but add prefixes
 atts = uclim.atts.copy()
 atts['title'] = 'Corrected Time-sries on {:s} Grid'.format(grid_name)
 # make new dataset
 sink = DatasetNetCDF(folder=avgfolder, filelist=[filename], atts=atts, mode='w')
 # sync and write data so far 
 sink.sync()       
         
 ## correct data (create variables)
 for varname,var in uclim.variables.iteritems():
   print ''
   print varname
   # correct time-series variables
   if var.hasAxis('time'):
     if varname in CRU_vars:
       tsvar = cruts[varname]; climvar = cruclim[varname] 
       assert tsvar.axisIndex('time') == 1, tsvar            
       assert climvar.axisIndex('time') == 1 and var.axisIndex('time') == 1, climvar
       assert len(tsvar.axes[1])%12 == 0, len(tsvar.axes[1])
       assert tsvar.axes[1].coord[0]%12 == 0, tsvar.axes[1].coord[0]
Beispiel #17
0
def loadCFSR_TS(name=dataset_name,
                grid=None,
                varlist=None,
                varatts=None,
                resolution='hires',
                filelist=None,
                folder=None,
                lautoregrid=None):
    ''' Get a properly formatted CFSR dataset with monthly mean time-series. '''
    if grid is None:
        # load from original time-series files
        if folder is None: folder = orig_ts_folder
        # translate varlist
        if varatts is None: varatts = tsvaratts.copy()
        if varlist is None:
            if resolution == 'hires' or resolution == '03' or resolution == '031':
                varlist = varlist_hires
            elif resolution == 'lowres' or resolution == '05':
                varlist = varlist_lowres
        if varlist and varatts: varlist = translateVarNames(varlist, varatts)
        if filelist is None:  # generate default filelist
            if resolution == 'hires' or resolution == '03' or resolution == '031':
                files = [
                    hiresfiles[var] for var in varlist if var in hiresfiles
                ]
            elif resolution == 'lowres' or resolution == '05':
                files = [
                    lowresfiles[var] for var in varlist if var in lowresfiles
                ]
        # load dataset
        dataset = DatasetNetCDF(name=name,
                                folder=folder,
                                filelist=files,
                                varlist=varlist,
                                varatts=varatts,
                                check_override=['time'],
                                multifile=False,
                                ncformat='NETCDF4_CLASSIC')
        # load static data
        if filelist is None:  # generate default filelist
            if resolution == 'hires' or resolution == '03' or resolution == '031':
                files = [
                    hiresstatic[var] for var in varlist if var in hiresstatic
                ]
            elif resolution == 'lowres' or resolution == '05':
                files = [
                    lowresstatic[var] for var in varlist if var in lowresstatic
                ]
            # create singleton time axis
            staticdata = DatasetNetCDF(name=name,
                                       folder=folder,
                                       filelist=files,
                                       varlist=varlist,
                                       varatts=varatts,
                                       axes=dict(lon=dataset.lon,
                                                 lat=dataset.lat),
                                       multifile=False,
                                       check_override=['time'],
                                       ncformat='NETCDF4_CLASSIC')
            # N.B.: need to override the axes, so that the datasets are consistent
        if len(staticdata.variables) > 0:
            for var in staticdata.variables.values():
                if not dataset.hasVariable(var.name):
                    var.squeeze()  # remove time dimension
                    dataset.addVariable(
                        var, copy=False
                    )  # no need to copy... but we can't write to the netcdf file!
        # replace time axis with number of month since Jan 1979
        data = np.arange(0, len(dataset.time), 1,
                         dtype='int16')  # month since 1979 (Jan 1979 = 0)
        timeAxis = Axis(name='time',
                        units='month',
                        coord=data,
                        atts=dict(long_name='Month since 1979-01'))
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
        # add projection
        dataset = addGDALtoDataset(dataset,
                                   projection=None,
                                   geotransform=None,
                                   gridfolder=grid_folder)
        # N.B.: projection should be auto-detected as geographic
    else:
        # load from neatly formatted and regridded time-series files
        if folder is None: folder = avgfolder
        grid, resolution = checkGridRes(grid, resolution)
        dataset = loadObservations(name=name,
                                   folder=folder,
                                   projection=None,
                                   resolution=resolution,
                                   grid=grid,
                                   period=None,
                                   varlist=varlist,
                                   varatts=varatts,
                                   filepattern=tsfile,
                                   filelist=filelist,
                                   lautoregrid=lautoregrid,
                                   mode='time-series')
    # return formatted dataset
    return dataset
Beispiel #18
0
            # load source
            periodstr = '{0:4d}-{1:4d}'.format(*period)
            print('\n')
            print('   ***   Processing Resolution %s from %s   ***   ' %
                  (res, periodstr))
            print('\n')
            source = loadCFSR_TS(resolution=res)
            print(source)
            print('\n')
            # prepare sink
            filename = avgfile.format('_' + res, '_' + periodstr)
            if os.path.exists(avgfolder + filename):
                os.remove(avgfolder + filename)
            sink = DatasetNetCDF(name='CFSR Climatology',
                                 folder=avgfolder,
                                 filelist=[filename],
                                 atts=source.atts,
                                 mode='w')
            sink.atts.period = periodstr

            # determine averaging interval
            offset = source.time.getIndex(
                period[0] -
                1979) / 12  # origin of monthly time-series is at January 1979
            # initialize processing
            CPU = CentralProcessingUnit(source, sink, tmp=True)

            # start processing climatology
            CPU.Climatology(period=period[1] - period[0],
                            offset=offset,
                            flush=False)
Beispiel #19
0
  def __init__(self):
    self.name = 'const' 
    self.atts = dict(orog    = dict(name='zs', units='m'), # surface altitude
    
# axes (don't have their own file)
class Axes(FileType):
  ''' A mock-filetype for axes. '''
  def __init__(self):
    self.atts = dict(time        = dict(name='time', units='days', offset=-47116, atts=dict(long_name='Month since 1979')), # time coordinate (days since 1979-01-01)
                     # NOTE THAT THE CMIP5 DATASET HAVE DIFFERENT TIME OFFSETS BETWEEN MEMBERS !!!
                     # N.B.: the time coordinate is only used for the monthly time-series data, not the LTM
                     #       the time offset is chose such that 1979 begins with the origin (time=0)
                     lon           = dict(name='lon', units='deg E'), # west-east coordinate
                     lat           = dict(name='lat', units='deg N'), # south-north coordinate
                     plev = dict(name='lev', units='')) # hybrid pressure coordinate
    self.vars = self.atts.keys()

# Time-Series (monthly)
def loadCMIP5_TS(experiment=None, name=None, grid=None, filetypes=None, varlist=None, varatts=None,  
                translateVars=None, lautoregrid=None, load3D=False, ignore_list=None, lcheckExp=True,
                lreplaceTime=True, lwrite=False, exps=None):
  ''' Get a properly formatted CESM dataset with a monthly time-series. (wrapper for loadCESM)'''
  return loadCMIP5_All(experiment=experiment, name=name, grid=grid, period=None, station=None, 
                      filetypes=filetypes, varlist=varlist, varatts=varatts, translateVars=translateVars, 
                      lautoregrid=lautoregrid, load3D=load3D, ignore_list=ignore_list, mode='time-series', 
                      lcheckExp=lcheckExp, lreplaceTime=lreplaceTime, lwrite=lwrite, exps=exps)

# load minimally pre-processed CESM climatology files 
def loadCMIP5(experiment=None, name=None, grid=None, period=None, filetypes=None, varlist=None, 
             varatts=None, translateVars=None, lautoregrid=None, load3D=False, ignore_list=None, 
             lcheckExp=True, lreplaceTime=True, lencl=False, lwrite=False, exps=None):
  ''' Get a properly formatted monthly CESM climatology as NetCDFDataset. '''
  return loadCMIP5_All(experiment=experiment, name=name, grid=grid, period=period, station=None, 
                      filetypes=filetypes, varlist=varlist, varatts=varatts, translateVars=translateVars, 
                      lautoregrid=lautoregrid, load3D=load3D, ignore_list=ignore_list, exps=exps, 
                      mode='climatology', lcheckExp=lcheckExp, lreplaceTime=lreplaceTime, lwrite=lwrite)


# load any of the various pre-processed CESM climatology and time-series files 
def loadCMIP5_All(experiment=None, name=None, grid=None, station=None, shape=None, period=None, 
                 varlist=None, varatts=None, translateVars=None, lautoregrid=None, load3D=False, 
                 ignore_list=None, mode='climatology', cvdp_mode=None, lcheckExp=True, exps=None,
                 lreplaceTime=True, filetypes=None, lencl=False, lwrite=False, check_vars=None):
  ''' Get any of the monthly CESM files as a properly formatted NetCDFDataset. '''
  # period
  if isinstance(period,(tuple,list)):
    if not all(isNumber(period)): raise ValueError
  elif isinstance(period,basestring): period = [int(prd) for prd in period.split('-')]
  elif isinstance(period,(int,np.integer)) or period is None : pass # handled later
  else: raise DateError, "Illegal period definition: {:s}".format(str(period))
  # prepare input  
  lclim = False; lts = False; lcvdp = False; ldiag = False # mode switches
  if mode.lower() == 'climatology': # post-processed climatology files
    lclim = True
    folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='avg', exps=exps)    
    if period is None: raise DateError, 'Currently CESM Climatologies have to be loaded with the period explicitly specified.'
  elif mode.lower() in ('time-series','timeseries'): # concatenated time-series files
    lts = True
    folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='avg', exps=exps)
    lclim = False; period = None; periodstr = None # to indicate time-series (but for safety, the input must be more explicit)
    if lautoregrid is None: lautoregrid = False # this can take very long!
  elif mode.lower() == 'cvdp': # concatenated time-series files
    lcvdp = True
    folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='cvdp', 
                                           cvdp_mode=cvdp_mode, exps=exps)
    if period is None:
      if not isinstance(experiment,Exp): raise DatasetError, 'Periods can only be inferred for registered datasets.'
      period = (experiment.beginyear, experiment.endyear)  
  elif mode.lower() == 'diag': # concatenated time-series files
    ldiag = True
    folder,experiment,name = getFolderName(name=name, experiment=experiment, folder=None, mode='diag', exps=exps)
    raise NotImplementedError, "Loading AMWG diagnostic files is not supported yet."
  else: raise NotImplementedError,"Unsupported mode: '{:s}'".format(mode)  
  # cast/copy varlist
  if isinstance(varlist,basestring): varlist = [varlist] # cast as list
  elif varlist is not None: varlist = list(varlist) # make copy to avoid interference
  # handle stations and shapes
  if station and shape: raise ArgumentError
  elif station or shape: 
    if grid is not None: raise NotImplementedError, 'Currently CESM station data can only be loaded from the native grid.'
    if lcvdp: raise NotImplementedError, 'CVDP data is not available as station data.'
    if lautoregrid: raise GDALError, 'Station data can not be regridded, since it is not map data.'   
    lstation = bool(station); lshape = bool(shape)
    # add station/shape parameters
    if varlist:
      params = stn_params if lstation else shp_params
      for param in params:
        if param not in varlist: varlist.append(param)
  else:
    lstation = False; lshape = False
  # period  
  if isinstance(period,(int,np.integer)):
    if not isinstance(experiment,Exp): raise DatasetError, 'Integer periods are only supported for registered datasets.'
    period = (experiment.beginyear, experiment.beginyear+period)
  if lclim: periodstr = '_{0:4d}-{1:4d}'.format(*period)
  elif lcvdp: periodstr = '{0:4d}-{1:4d}'.format(period[0],period[1]-1)
  else: periodstr = ''
  # N.B.: the period convention in CVDP is that the end year is included
  # generate filelist and attributes based on filetypes and domain
  if filetypes is None: filetypes = ['atm','lnd']
  elif isinstance(filetypes,(list,tuple,set,basestring)):
    if isinstance(filetypes,basestring): filetypes = [filetypes]
    else: filetypes = list(filetypes)
    # interprete/replace WRF filetypes (for convenience)
    tmp = []
    for ft in filetypes:
      if ft in ('const','drydyn3d','moist3d','rad','plev3d','srfc','xtrm','hydro'):
        if 'atm' not in tmp: tmp.append('atm')
      elif ft in ('lsm','snow'):
        if 'lnd' not in tmp: tmp.append('lnd')
      elif ft in ('aux'): pass # currently not supported
#       elif ft in (,):
#         if 'atm' not in tmp: tmp.append('atm')
#         if 'lnd' not in tmp: tmp.append('lnd')        
      else: tmp.append(ft)
    filetypes = tmp; del tmp
    if 'axes' not in filetypes: filetypes.append('axes')    
  else: raise TypeError  
  atts = dict(); filelist = []; typelist = []
  for filetype in filetypes:
    fileclass = fileclasses[filetype]
    if lclim and fileclass.climfile is not None: filelist.append(fileclass.climfile)
    elif lts and fileclass.tsfile is not None: filelist.append(fileclass.tsfile)
    elif lcvdp and fileclass.cvdpfile is not None: filelist.append(fileclass.cvdpfile)
    elif ldiag and fileclass.diagfile is not None: filelist.append(fileclass.diagfile)
    typelist.append(filetype)
    atts.update(fileclass.atts) 
  # figure out ignore list  
  if ignore_list is None: ignore_list = set(ignore_list_2D)
  elif isinstance(ignore_list,(list,tuple)): ignore_list = set(ignore_list)
  elif not isinstance(ignore_list,set): raise TypeError
  if not load3D: ignore_list.update(ignore_list_3D)
  if lautoregrid is None: lautoregrid = not load3D # don't auto-regrid 3D variables - takes too long!
  # translate varlist
  if varatts is not None: atts.update(varatts)
  lSST = False
  if varlist is not None:
    varlist = list(varlist) 
    if 'SST' in varlist: # special handling of name SST variable, as it is part of Ts
      varlist.remove('SST')
      if not 'Ts' in varlist: varlist.append('Ts')
      lSST = True # Ts is renamed to SST below
    if translateVars is None: varlist = list(varlist) + translateVarNames(varlist, atts) # also aff translations, just in case
    elif translateVars is True: varlist = translateVarNames(varlist, atts) 
    # N.B.: DatasetNetCDF does never apply translation!
  # NetCDF file mode
  ncmode = 'rw' if lwrite else 'r'   
  # get grid or station-set name
  if lstation:
    # the station name can be inserted as the grid name
    gridstr = '_'+station.lower(); # only use lower case for filenames
    griddef = None
  elif lshape:
    # the station name can be inserted as the grid name
    gridstr = '_'+shape.lower(); # only use lower case for filenames
    griddef = None
  else:
    if grid is None or grid == experiment.grid: 
      gridstr = ''; griddef = None
    else: 
      gridstr = '_'+grid.lower() # only use lower case for filenames
      griddef = loadPickledGridDef(grid=grid, res=None, filename=None, folder=grid_folder, check=True)
  # insert grid name and period
  filenames = []
  for filetype,fileformat in zip(typelist,filelist):
    if lclim: filename = fileformat.format(gridstr,periodstr) # put together specfic filename for climatology
    elif lts: filename = fileformat.format(gridstr) # or for time-series
    elif lcvdp: filename = fileformat.format(experiment.name if experiment else name,periodstr) # not implemented: gridstr
    elif ldiag: raise NotImplementedError
    else: raise DatasetError
    filenames.append(filename) # append to list (passed to DatasetNetCDF later)
    # check existance
    filepath = '{:s}/{:s}'.format(folder,filename)
    if not os.path.exists(filepath):
      nativename = fileformat.format('',periodstr) # original filename (before regridding)
      nativepath = '{:s}/{:s}'.format(folder,nativename)
      if os.path.exists(nativepath):
        if lautoregrid: 
          from processing.regrid import performRegridding # causes circular reference if imported earlier
          griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder)
          dataargs = dict(experiment=experiment, filetypes=[filetype], period=period)
          print("The '{:s}' (CESM) dataset for the grid ('{:s}') is not available:\n Attempting regridding on-the-fly.".format(name,filename,grid))
          if performRegridding('CESM','climatology' if lclim else 'time-series', griddef, dataargs): # default kwargs
            raise IOError, "Automatic regridding failed!"
          print("Output: '{:s}'".format(name,filename,grid,filepath))            
        else: raise IOError, "The '{:s}' (CESM) dataset '{:s}' for the selected grid ('{:s}') is not available - use the regrid module to generate it.".format(name,filename,grid) 
      else: raise IOError, "The '{:s}' (CESM) dataset file '{:s}' does not exits!\n({:s})".format(name,filename,folder)
   
  # load dataset
  #print varlist, filenames
  if experiment: title = experiment.title
  else: title = name
  dataset = DatasetNetCDF(name=name, folder=folder, filelist=filenames, varlist=varlist, axes=None, 
                          varatts=atts, title=title, multifile=False, ignore_list=ignore_list, 
                          ncformat='NETCDF4', squeeze=True, mode=ncmode, check_vars=check_vars)
  # replace time axis
  if lreplaceTime:
    if lts or lcvdp:
      # check time axis and center at 1979-01 (zero-based)
      if experiment is None: ys = period[0]; ms = 1
      else: ys,ms,ds = [int(t) for t in experiment.begindate.split('-')]; assert ds == 1
      if dataset.hasAxis('time'):
        ts = (ys-1979)*12 + (ms-1); te = ts+len(dataset.time) # month since 1979 (Jan 1979 = 0)
        atts = dict(long_name='Month since 1979-01')
        timeAxis = Axis(name='time', units='month', coord=np.arange(ts,te,1, dtype='int16'), atts=atts)
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
      if dataset.hasAxis('year'):
        ts = ys-1979; te = ts+len(dataset.year) # month since 1979 (Jan 1979 = 0)
        atts = dict(long_name='Years since 1979-01')
        yearAxis = Axis(name='year', units='year', coord=np.arange(ts,te,1, dtype='int16'), atts=atts)
        dataset.replaceAxis(dataset.year, yearAxis, asNC=False, deepcopy=False)
    elif lclim:
      if dataset.hasAxis('time') and not dataset.time.units.lower() in monthlyUnitsList:
        atts = dict(long_name='Month of the Year')
        timeAxis = Axis(name='time', units='month', coord=np.arange(1,13, dtype='int16'), atts=atts)
        assert len(dataset.time) == len(timeAxis), dataset.time
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
      elif dataset.hasAxis('year'): raise NotImplementedError, dataset
  # rename SST
  if lSST: dataset['SST'] = dataset.Ts
  # correct ordinal number of shape (should start at 1, not 0)
  if lshape:
    # mask all shapes that are incomplete in dataset
    if lencl and 'shp_encl' in dataset: dataset.mask(mask='shp_encl', invert=True)   
    if dataset.hasAxis('shapes'): raise AxisError, "Axis 'shapes' should be renamed to 'shape'!"
    if not dataset.hasAxis('shape'): raise AxisError
    if dataset.shape.coord[0] == 0: dataset.shape.coord += 1
  # check
  if len(dataset) == 0: raise DatasetError, 'Dataset is empty - check source file or variable list!'
  # add projection, if applicable
  if not ( lstation or lshape ):
    dataset = addGDALtoDataset(dataset, griddef=griddef, gridfolder=grid_folder, lwrap360=True, geolocator=True)
  # return formatted dataset
  return dataset

## Dataset API

dataset_name = 'CMIP5' # dataset name
root_folder # root folder of the dataset
avgfolder # root folder for monthly averages
outfolder # root folder for direct WRF output
ts_file_pattern = 'cmip5{0:s}{1:s}_monthly.nc' # filename pattern: filetype, grid
clim_file_pattern = 'cmip5{0:s}{1:s}_clim{2:s}.nc' # filename pattern: filetype, grid, period
data_folder = root_folder # folder for user data
grid_def = {'':None} # there are too many... 
grid_res = {'':1.} # approximate grid resolution at 45 degrees latitude
default_grid = None 
# functions to access specific datasets
loadLongTermMean = None # WRF doesn't have that...
loadClimatology = loadCESM # pre-processed, standardized climatology
loadTimeSeries = loadCESM_TS # time-series data
#loadStationClimatology = loadCESM_Stn # pre-processed, standardized climatology at stations
#loadStationTimeSeries = loadCESM_StnTS # time-series data at stations
#loadShapeClimatology = loadCESM_Shp # climatologies without associated grid (e.g. provinces or basins) 
#loadShapeTimeSeries = loadCESM_ShpTS # time-series without associated grid (e.g. provinces or basins)


## (ab)use main execution for quick test
if __name__ == '__main__':
  
  # set mode/parameters
#   mode = 'test_climatology'
#   mode = 'test_timeseries'
#   mode = 'test_ensemble'
#   mode = 'test_point_climatology'
#   mode = 'test_point_timeseries'
#   mode = 'test_point_ensemble'
#   mode = 'test_cvdp'
  mode = 'pickle_grid'
#     mode = 'shift_lon'
#   experiments = ['Ctrl-1', 'Ctrl-A', 'Ctrl-B', 'Ctrl-C']
#   experiments += ['Ctrl-2050', 'Ctrl-A-2050', 'Ctrl-B-2050', 'Ctrl-C-2050']
  experiments = ('Ctrl-1',)
  periods = (15,)
  filetypes = ('atm',) # ['atm','lnd','ice']
  grids = ('cesm1x1',)*len(experiments) # grb1_d01
#   pntset = 'shpavg'
  pntset = 'ecprecip'

  from projects.CESM_experiments import Exp, CESM_exps, ensembles
  # N.B.: importing Exp through CESM_experiments is necessary, otherwise some isinstance() calls fail

  # pickle grid definition
  if mode == 'pickle_grid':
    
    for grid,experiment in zip(grids,experiments):
      
      print('')
      print('   ***   Pickling Grid Definition for {0:s}   ***   '.format(grid))
      print('')
      
      # load GridDefinition
      dataset = loadCESM(experiment=CESM_exps[experiment], grid=None, filetypes=['lnd'], period=(1979,1989))
      griddef = dataset.griddef
      #del griddef.xlon, griddef.ylat      
      print griddef
      griddef.name = grid
      print('   Loading Definition from \'{0:s}\''.format(dataset.name))
      # save pickle
      filename = '{0:s}/{1:s}'.format(grid_folder,griddef_pickle.format(grid))
      if os.path.exists(filename): os.remove(filename) # overwrite
      filehandle = open(filename, 'w')
      pickle.dump(griddef, filehandle)
      filehandle.close()
      
      print('   Saving Pickle to \'{0:s}\''.format(filename))
      print('')
      
      # load pickle to make sure it is right
      del griddef
      griddef = loadPickledGridDef(grid, res=None, folder=grid_folder)
      print(griddef)
      print('')
      print griddef.wrap360
      
Beispiel #20
0
  # generate averaged climatology
  elif mode == 'average_timeseries':
    
    # load source
    periodstr = '%4i-%4i'%period
    print('\n')
    print('   ***   Processing Grid %s from %s   ***   '%(grid,periodstr))
    print('\n')
    source = loadNARR_TS()
    print(source)
    print('\n')
    # prepare sink
    gridstr = '' if grid is 'NARR' else '_'+grid
    filename = avgfile.format(gridstr,'_'+periodstr)
    if os.path.exists(avgfolder+filename): os.remove(avgfolder+filename)
    sink = DatasetNetCDF(name='NARR Climatology', folder=avgfolder, filelist=[filename], atts=source.atts, mode='w')
    sink.atts.period = periodstr 
    
    # determine averaging interval
    offset = source.time.getIndex(period[0]-1979)/12 # origin of monthly time-series is at January 1979 
    # initialize processing
#     CPU = CentralProcessingUnit(source, sink, varlist=['precip', 'T2'], tmp=True) # no need for lat/lon
    CPU = CentralProcessingUnit(source, sink, varlist=None, tmp=True) # no need for lat/lon
    
    # start processing climatology
    CPU.Climatology(period=period[1]-period[0], offset=offset, flush=False)
    
    # sync temporary storage with output
    CPU.sync(flush=True)

#     # make new masks
Beispiel #21
0
      
    # load source
    periodstr = '%4i-%4i'%period
    print('\n')
    print('   ***   Processing Time-series from %s   ***   '%(periodstr,))
    print('\n')
    source = loadCRU_TS()
    source = source(time=timeSlice(period)) # only get relevant time-slice    
    print(source)
    assert period[0] != 1979 or source.time.coord[0] == 0
    assert len(source.time) == (period[1]-period[0])*12
    print('\n')
    # prepare sink
    filename = avgfile.format('','_'+periodstr,)
    if os.path.exists(avgfolder+filename): os.remove(avgfolder+filename)
    sink = DatasetNetCDF(name='CRU Climatology', folder=avgfolder, filelist=[filename], atts=source.atts, mode='w')
    sink.atts.period = periodstr 
    
    # determine averaging interval
    offset = source.time.getIndex(period[0]-1979)/12 # origin of monthly time-series is at January 1979 
    # initialize processing
#     CPU = CentralProcessingUnit(source, sink, varlist=['wetfrq'])
    CPU = CentralProcessingUnit(source, sink)
    # start processing      
    print('')
    print('   +++   processing   +++   ') 
    CPU.Climatology(period=period[1]-period[0], offset=offset, flush=False)
    # sync temporary storage with output
    CPU.sync(flush=False)   
    print('\n')
Beispiel #22
0
      # load source
      periodstr = 'Climatology' if period is None else '{0:4d}-{1:4d}'.format(*period)
      print('\n\n   ***   Processing Resolution %s from %s   ***   \n\n'%(res,periodstr))
      if period is None: source = loadGPCC_LTM(varlist=None,resolution=res) # ['stations','precip']
      else: source = loadGPCC_TS(varlist=None,resolution=res)
      source = source(time=timeSlice(period))
      #source.load()
      print(source)
      print('\n')
            
      # prepare sink
      gridstr = res if grid == 'GPCC' else grid
      filename = getFileName(grid=gridstr, period=period, name='GPCC', filepattern=avgfile)
      if os.path.exists(avgfolder+filename): os.remove(avgfolder+filename)
      atts =dict(period=periodstr, name='GPCC', title='GPCC Climatology') 
      sink = DatasetNetCDF(name='GPCC Climatology', folder=avgfolder, filelist=[filename], atts=source.atts, mode='w')
#       sink = addGDALtoDataset(sink, griddef=source.griddef)
      
      # initialize processing
      CPU = CentralProcessingUnit(source, sink, tmp=True)

      if period is not None:
        # determine averaging interval
        offset = source.time.getIndex(period[0]-1979)/12 # origin of monthly time-series is at January 1979 
        # start processing climatology
        CPU.Climatology(period=period[1]-period[0], offset=offset, flush=False)
#         CPU.sync(flush=True)

      # get NARR coordinates
      if grid is not 'GPCC':
        griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder)
Beispiel #23
0
def loadObservations(name=None, folder=None, period=None, grid=None, station=None, shape=None, lencl=False, 
                     varlist=None, varatts=None, filepattern=None, filelist=None, resolution=None, 
                     projection=None, geotransform=None, axes=None, lautoregrid=None, mode='climatology'):
  ''' A function to load standardized observational datasets. '''
  # prepare input
  if mode.lower() == 'climatology': # post-processed climatology files
    # transform period
    if period is None or period == '':
      if name not in ('PCIC','PRISM','GPCC','NARR'): 
        raise ValueError("A period is required to load observational climatologies.")
    elif isinstance(period,basestring):
      period = tuple([int(prd) for prd in period.split('-')]) 
    elif not isinstance(period,(int,np.integer)) and ( not isinstance(period,tuple) and len(period) == 2 ): 
      raise TypeError(period)
  elif mode.lower() in ('time-series','timeseries'): # concatenated time-series files
    period = None # to indicate time-series (but for safety, the input must be more explicit)
    if lautoregrid is None: lautoregrid = False # this can take very long!
  # cast/copy varlist
  if isinstance(varlist,basestring): varlist = [varlist] # cast as list
  elif varlist is not None: varlist = list(varlist) # make copy to avoid interference
  # figure out station and shape options
  if station and shape: raise ArgumentError()
  elif station or shape: 
    if grid is not None: raise NotImplementedError('Currently observational station data can only be loaded from the native grid.')
    if lautoregrid: raise GDALError('Station data can not be regridded, since it is not map data.')
    lstation = bool(station); lshape = bool(shape)
    grid = station if lstation else shape
    # add station/shape parameters
    if varlist:
      params = stn_params if lstation else shp_params
      for param in params:
        if param not in varlist: varlist.append(param)    
  else:
    lstation = False; lshape = False
  # varlist (varlist = None means all variables)
  if varatts is None: varatts = default_varatts.copy()
  if varlist is not None: varlist = translateVarNames(varlist, varatts)
  # filelist
  if filelist is None: 
    filename = getFileName(name=name, resolution=resolution, period=period, grid=grid, filepattern=filepattern)
    # check existance
    filepath = '{:s}/{:s}'.format(folder,filename)
    if not os.path.exists(filepath):
      nativename = getFileName(name=name, resolution=resolution, period=period, grid=None, filepattern=filepattern)
      nativepath = '{:s}/{:s}'.format(folder,nativename)
      if os.path.exists(nativepath):
        if lautoregrid: 
          from processing.regrid import performRegridding # causes circular reference if imported earlier
          griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder)
          dataargs = dict(period=period, resolution=resolution)
          performRegridding(name, 'climatology',griddef, dataargs) # default kwargs
        else: raise IOError("The dataset '{:s}' for the selected grid ('{:s}') is not available - use the regrid module to generate it.".format(filename,grid) )
      else: raise IOError("The dataset file '{:s}' does not exits!\n('{:s}')".format(filename,filepath))
  # load dataset
  dataset = DatasetNetCDF(name=name, folder=folder, filelist=[filename], varlist=varlist, varatts=varatts, 
                          axes=axes, multifile=False, ncformat='NETCDF4')
  # mask all shapes that are incomplete in dataset
  if shape and lencl and 'shp_encl' in dataset: 
    dataset.load() # need to load data before masking; is cheap for shape averages, anyway
    dataset.mask(mask='shp_encl', invert=True, skiplist=shp_params)
  # correct ordinal number of shape (should start at 1, not 0)
  if lshape:
    if dataset.hasAxis('shapes'): raise AxisError("Axis 'shapes' should be renamed to 'shape'!")
    if not dataset.hasAxis('shape'): 
      raise AxisError()
    if dataset.shape.coord[0] == 0: dataset.shape.coord += 1
# figure out grid
  if not lstation and not lshape:
    if grid is None or grid == name:
      dataset = addGDALtoDataset(dataset, projection=projection, geotransform=geotransform, gridfolder=grid_folder)
    elif isinstance(grid,basestring): # load from pickle file
  #     griddef = loadPickledGridDef(grid=grid, res=None, filename=None, folder=grid_folder)
      # add GDAL functionality to dataset 
      dataset = addGDALtoDataset(dataset, griddef=grid, gridfolder=grid_folder)
    else: raise TypeError(dataset)
    # N.B.: projection should be auto-detected, if geographic (lat/lon)
  return dataset
Beispiel #24
0
def performExtraction(dataset, mode, stnfct, dataargs, loverwrite=False, varlist=None, lwrite=True, lreturn=False,
                      ldebug=False, lparallel=False, pidstr='', logger=None):
  ''' worker function to extract point data from gridded dataset '''  
  # input checking
  if not isinstance(dataset,basestring): raise TypeError
  if not isinstance(dataargs,dict): raise TypeError # all dataset arguments are kwargs 
  if not callable(stnfct): raise TypeError # function to load station dataset
  if lparallel: 
    if not lwrite: raise IOError, 'In parallel mode we can only write to disk (i.e. lwrite = True).'
    if lreturn: raise IOError, 'Can not return datasets in parallel mode (i.e. lreturn = False).'
  
  # logging
  if logger is None: # make new logger     
    logger = logging.getLogger() # new logger
    logger.addHandler(logging.StreamHandler())
  else:
    if isinstance(logger,basestring): 
      logger = logging.getLogger(name=logger) # connect to existing one
    elif not isinstance(logger,logging.Logger): 
      raise TypeError, 'Expected logger ID/handle in logger KW; got {}'.format(str(logger))

  lclim = False; lts = False
  if mode == 'climatology': lclim = True
  elif mode == 'time-series': lts = True
  else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)
  
  ## extract meta data from arguments
  module, dataargs, loadfct, filepath, datamsgstr = getMetaData(dataset, mode, dataargs)
  dataset_name = dataargs.dataset_name; periodstr = dataargs.periodstr; avgfolder = dataargs.avgfolder

  # load template dataset
  stndata = stnfct() # load station dataset from function
  if not isinstance(stndata, Dataset): raise TypeError
  # N.B.: the loading function is necessary, because DataseNetCDF instances do not pickle well 
            
  # determine age of source file
  if not loverwrite: sourceage = datetime.fromtimestamp(os.path.getmtime(filepath))    
          
  # get filename for target dataset and do some checks
  filename = getTargetFile(stndata.name, dataset, mode, module, dataargs, lwrite)
  if ldebug: filename = 'test_' + filename
  if not os.path.exists(avgfolder): raise IOError, "Dataset folder '{:s}' does not exist!".format(avgfolder)
  lskip = False # else just go ahead
  if lwrite:
    if lreturn: 
      tmpfilename = filename # no temporary file if dataset is passed on (can't rename the file while it is open!)
    else: 
      if lparallel: tmppfx = 'tmp_exstns_{:s}_'.format(pidstr[1:-1])
      else: tmppfx = 'tmp_exstns_'.format(pidstr[1:-1])
      tmpfilename = tmppfx + filename      
    filepath = avgfolder + filename
    tmpfilepath = avgfolder + tmpfilename
    if os.path.exists(filepath): 
      if not loverwrite: 
        age = datetime.fromtimestamp(os.path.getmtime(filepath))
        # if source file is newer than sink file or if sink file is a stub, recompute, otherwise skip
        if age > sourceage and os.path.getsize(filepath) > 1e5: lskip = True
        # N.B.: NetCDF files smaller than 100kB are usually incomplete header fragments from a previous crashed
      if not lskip: os.remove(filepath) # recompute
  
  # depending on last modification time of file or overwrite setting, start computation, or skip
  if lskip:        
    # print message
    skipmsg =  "\n{:s}   >>>   Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format(pidstr,filename,dataset_name)
    skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr,filepath)
    logger.info(skipmsg)              
  else:
          
    ## actually load datasets
    source = loadfct() # load source 
    # check period
    if 'period' in source.atts and dataargs.periodstr != source.atts.period: # a NetCDF attribute
      raise DateError, "Specifed period is inconsistent with netcdf records: '{:s}' != '{:s}'".format(periodstr,source.atts.period)
  
    # print message
    if lclim: opmsgstr = "Extracting '{:s}'-type Point Data from Climatology ({:s})".format(stndata.name, periodstr)
    elif lts: opmsgstr = "Extracting '{:s}'-type Point Data from Time-series".format(stndata.name)
    else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)
    # print feedback to logger
    logger.info('\n{0:s}   ***   {1:^65s}   ***   \n{0:s}   ***   {2:^65s}   ***   \n'.format(pidstr,datamsgstr,opmsgstr))
    if not lparallel and ldebug: logger.info('\n'+str(source)+'\n')  
    
    ## create new sink/target file
    # set attributes   
    atts=source.atts.copy()
    atts['period'] = dataargs.periodstr if dataargs.periodstr else 'time-series' 
    atts['name'] = dataset_name; atts['station'] = stndata.name
    atts['title'] = '{:s} (Stations) from {:s} {:s}'.format(stndata.title,dataset_name,mode.title())
    # make new dataset
    if lwrite: # write to NetCDF file 
      if os.path.exists(tmpfilepath): os.remove(tmpfilepath) # remove old temp files 
      sink = DatasetNetCDF(folder=avgfolder, filelist=[tmpfilename], atts=atts, mode='w')
    else: sink = Dataset(atts=atts) # ony create dataset in memory
    
    # initialize processing
    CPU = CentralProcessingUnit(source, sink, varlist=varlist, tmp=False, feedback=ldebug)
  
    # extract data at station locations
    CPU.Extract(template=stndata, flush=True)
    # get results    
    CPU.sync(flush=True)
    
    # print dataset
    if not lparallel and ldebug:
      logger.info('\n'+str(sink)+'\n')   
    # write results to file
    if lwrite:
      sink.sync()
      writemsg =  "\n{:s}   >>>   Writing to file '{:s}' in dataset {:s}".format(pidstr,filename,dataset_name)
      writemsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr,filepath)
      logger.info(writemsg)      
      
      # rename file to proper name
      if not lreturn:
        sink.unload(); sink.close(); del sink # destroy all references 
        if os.path.exists(filepath): os.remove(filepath) # remove old file
        os.rename(tmpfilepath,filepath)
      # N.B.: there is no temporary file if the dataset is returned, because an open file can't be renamed
        
    # clean up and return
    source.unload(); del source#, CPU
    if lreturn:      
      return sink # return dataset for further use (netcdf file still open!)
    else:            
      return 0 # "exit code"
Beispiel #25
0
            print('\n')

            # prepare sink
            gridstr = res if grid == 'GPCC' else grid
            filename = getFileName(grid=gridstr,
                                   period=period,
                                   name='GPCC',
                                   filepattern=avgfile)
            if os.path.exists(avgfolder + filename):
                os.remove(avgfolder + filename)
            atts = dict(period=periodstr,
                        name='GPCC',
                        title='GPCC Climatology')
            sink = DatasetNetCDF(name='GPCC Climatology',
                                 folder=avgfolder,
                                 filelist=[filename],
                                 atts=source.atts,
                                 mode='w')
            #       sink = addGDALtoDataset(sink, griddef=source.griddef)

            # initialize processing
            CPU = CentralProcessingUnit(source, sink, tmp=True)

            if period is not None:
                # determine averaging interval
                offset = source.time.getIndex(
                    period[0] - 1979
                ) / 12  # origin of monthly time-series is at January 1979
                # start processing climatology
                CPU.Climatology(period=period[1] - period[0],
                                offset=offset,
Beispiel #26
0
def loadObservations(name=None, folder=None, period=None, grid=None, station=None, shape=None, lencl=False, 
                     varlist=None, varatts=None, filepattern=None, filelist=None, resolution=None, 
                     projection=None, geotransform=None, axes=None, lautoregrid=None, mode='climatology'):
  ''' A function to load standardized observational datasets. '''
  # prepare input
  if mode.lower() == 'climatology': # post-processed climatology files
    # transform period
    if period is None or period == '':
      if name not in ('PCIC','PRISM','GPCC','NARR'): 
        raise ValueError, "A period is required to load observational climatologies."
    elif isinstance(period,basestring):
      period = tuple([int(prd) for prd in period.split('-')]) 
    elif not isinstance(period,(int,np.integer)) and ( not isinstance(period,tuple) and len(period) == 2 ): 
      raise TypeError
  elif mode.lower() in ('time-series','timeseries'): # concatenated time-series files
    period = None # to indicate time-series (but for safety, the input must be more explicit)
    if lautoregrid is None: lautoregrid = False # this can take very long!
  # cast/copy varlist
  if isinstance(varlist,basestring): varlist = [varlist] # cast as list
  elif varlist is not None: varlist = list(varlist) # make copy to avoid interference
  # figure out station and shape options
  if station and shape: raise ArgumentError
  elif station or shape: 
    if grid is not None: raise NotImplementedError, 'Currently observational station data can only be loaded from the native grid.'
    if lautoregrid: raise GDALError, 'Station data can not be regridded, since it is not map data.'   
    lstation = bool(station); lshape = bool(shape)
    grid = station if lstation else shape
    # add station/shape parameters
    if varlist:
      params = stn_params if lstation else shp_params
      for param in params:
        if param not in varlist: varlist.append(param)    
  else:
    lstation = False; lshape = False
  # varlist (varlist = None means all variables)
  if varatts is None: varatts = default_varatts.copy()
  if varlist is not None: varlist = translateVarNames(varlist, varatts)
  # filelist
  if filelist is None: 
    filename = getFileName(name=name, resolution=resolution, period=period, grid=grid, filepattern=filepattern)
    # check existance
    filepath = '{:s}/{:s}'.format(folder,filename)
    if not os.path.exists(filepath):
      nativename = getFileName(name=name, resolution=resolution, period=period, grid=None, filepattern=filepattern)
      nativepath = '{:s}/{:s}'.format(folder,nativename)
      if os.path.exists(nativepath):
        if lautoregrid: 
          from processing.regrid import performRegridding # causes circular reference if imported earlier
          griddef = loadPickledGridDef(grid=grid, res=None, folder=grid_folder)
          dataargs = dict(period=period, resolution=resolution)
          performRegridding(name, 'climatology',griddef, dataargs) # default kwargs
        else: raise IOError, "The dataset '{:s}' for the selected grid ('{:s}') is not available - use the regrid module to generate it.".format(filename,grid) 
      else: raise IOError, "The dataset file '{:s}' does not exits!\n('{:s}')".format(filename,filepath)
  # load dataset
  dataset = DatasetNetCDF(name=name, folder=folder, filelist=[filename], varlist=varlist, varatts=varatts, 
                          axes=axes, multifile=False, ncformat='NETCDF4')
  # mask all shapes that are incomplete in dataset
  if shape and lencl and 'shp_encl' in dataset: 
    dataset.load() # need to load data before masking; is cheap for shape averages, anyway
    dataset.mask(mask='shp_encl', invert=True, skiplist=shp_params)
  # correct ordinal number of shape (should start at 1, not 0)
  if lshape:
    if dataset.hasAxis('shapes'): raise AxisError, "Axis 'shapes' should be renamed to 'shape'!"
    if not dataset.hasAxis('shape'): 
      raise AxisError
    if dataset.shape.coord[0] == 0: dataset.shape.coord += 1
# figure out grid
  if not lstation and not lshape:
    if grid is None or grid == name:
      dataset = addGDALtoDataset(dataset, projection=projection, geotransform=geotransform, gridfolder=grid_folder)
    elif isinstance(grid,basestring): # load from pickle file
  #     griddef = loadPickledGridDef(grid=grid, res=None, filename=None, folder=grid_folder)
      # add GDAL functionality to dataset 
      dataset = addGDALtoDataset(dataset, griddef=grid, gridfolder=grid_folder)
    else: raise TypeError
    # N.B.: projection should be auto-detected, if geographic (lat/lon)
  return dataset
Beispiel #27
0
def computeClimatology(experiment,
                       filetype,
                       domain,
                       periods=None,
                       offset=0,
                       griddef=None,
                       varlist=None,
                       ldebug=False,
                       loverwrite=False,
                       lparallel=False,
                       pidstr='',
                       logger=None):
    ''' worker function to compute climatologies for given file parameters. '''
    # input type checks
    if not isinstance(experiment, Exp): raise TypeError
    if not isinstance(filetype, basestring): raise TypeError
    if not isinstance(domain, (np.integer, int)): raise TypeError
    if periods is not None and not (isinstance(periods, (tuple, list))
                                    and isInt(periods)):
        raise TypeError
    if not isinstance(offset, (np.integer, int)): raise TypeError
    if not isinstance(loverwrite, (bool, np.bool)): raise TypeError
    if griddef is not None and not isinstance(griddef, GridDefinition):
        raise TypeError

    #if pidstr == '[proc01]': raise TypeError # to test error handling

    # load source
    dataset_name = experiment.name
    fileclass = fileclasses[filetype]  # used for target file name
    tsfile = fileclass.tsfile.format(domain, '')
    expfolder = experiment.avgfolder
    filepath = '{:s}/{:s}'.format(expfolder, tsfile)
    logger.info('\n\n{0:s}   ***   Processing Experiment {1:<15s}   ***   '.
                format(pidstr, "'{:s}'".format(dataset_name)) +
                '\n{0:s}   ***   {1:^37s}   ***   \n'.format(
                    pidstr, "'{:s}'".format(tsfile)))

    # check file and read begin/enddates
    if not os.path.exists(filepath):
        #raise IOError, "Source file '{:s}' does not exist!".format(filepath)
        # print message and skip
        skipmsg = "\n{:s}   >>>   File '{:s}' in dataset '{:s}' is missing --- skipping!".format(
            pidstr, tsfile, dataset_name)
        skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr, filepath)
        logger.warning(skipmsg)
        # N.B.: this can cause a lot of error messages, when not all files are present
    else:  # if monthly source file exists
        import netCDF4 as nc
        ncfile = nc.Dataset(filepath, mode='r')
        begintuple = ncfile.begin_date.split('-')
        endtuple = ncfile.end_date.split('-')
        ncfile.close()
        # N.B.: at this point we don't want to initialize a full GDAL-enabled dataset, since we don't even
        #       know if we need it, and it creates a lot of overhead

        # determine age of source file
        if not loverwrite:
            sourceage = datetime.fromtimestamp(os.path.getmtime(filepath))

        # figure out start date
        filebegin = int(begintuple[0])  # first element is the year
        fileend = int(endtuple[0])  # first element is the year
        begindate = offset + filebegin
        if not (filebegin <= begindate <= fileend): raise DateError
        # handle cases where the first month in the record is not January
        firstmonth = int(begintuple[1])  # second element is the month
        shift = firstmonth - 1  # will be zero for January (01)

        ## loop over periods
        if periods is None: periods = [begindate - fileend]
        #   periods.sort(reverse=True) # reverse, so that largest chunk is done first
        source = None  # will later be assigned to the source dataset
        for period in periods:

            # figure out period
            enddate = begindate + period
            if filebegin > enddate:
                raise DateError, 'End date earlier than begin date.'
            if enddate - 1 > fileend:  # if filebegin is 1979 and the simulation is 10 years, fileend will be 1988, not 1989!
                # if end date is not available, skip period
                endmsg = "\n{:s}   ---   Invalid Period for '{:s}': End Date {:4d} not in File!   ---   \n".format(
                    pidstr, dataset_name, enddate)
                endmsg += "{:s}   ---   ('{:s}')\n".format(pidstr, filepath)
                logger.info(endmsg)

            else:  ## perform averaging for selected period

                # determine if sink file already exists, and what to do about it
                periodstr = '{0:4d}-{1:4d}'.format(begindate, enddate)
                gridstr = '' if griddef is None or griddef.name is 'WRF' else '_' + griddef.name
                filename = fileclass.climfile.format(domain, gridstr,
                                                     '_' + periodstr)
                if ldebug: filename = 'test_' + filename
                if lparallel: tmppfx = 'tmp_wrfavg_{:s}_'.format(pidstr[1:-1])
                else: tmppfx = 'tmp_wrfavg_'.format(pidstr[1:-1])
                tmpfilename = tmppfx + filename
                assert os.path.exists(expfolder)
                filepath = expfolder + filename
                tmpfilepath = expfolder + tmpfilename
                lskip = False  # else just go ahead
                if os.path.exists(filepath):
                    if not loverwrite:
                        age = datetime.fromtimestamp(
                            os.path.getmtime(filepath))
                        # if sink file is newer than source file, skip (do not recompute)
                        if age > sourceage and os.path.getsize(filepath) > 1e6:
                            lskip = True
                        # N.B.: NetCDF files smaller than 1MB are usually incomplete header fragments from a previous crash
                        #print sourceage, age
                    if not lskip: os.remove(filepath)

                # depending on last modification time of file or overwrite setting, start computation, or skip
                if lskip:
                    # print message
                    skipmsg = "\n{:s}   >>>   Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format(
                        pidstr, filename, dataset_name)
                    skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(
                        pidstr, filepath)
                    logger.info(skipmsg)
                else:

                    if griddef is None: lregrid = False
                    else: lregrid = True

                    ## begin actual computation
                    beginmsg = "\n{:s}   <<<   Computing '{:s}' (d{:02d}) Climatology from {:s}".format(
                        pidstr, dataset_name, domain, periodstr)
                    if not lregrid: beginmsg += "  >>>   \n"
                    else:
                        beginmsg += " ('{:s}' grid)  >>>   \n".format(
                            griddef.name)
                    logger.info(beginmsg)

                    ## actually load datasets
                    if source is None:
                        source = loadWRF_TS(
                            experiment=experiment,
                            filetypes=[filetype],
                            domains=domain)  # comes out as a tuple...
                    if not lparallel and ldebug:
                        logger.info('\n' + str(source) + '\n')

                    # prepare sink
                    if os.path.exists(tmpfilepath):
                        os.remove(tmpfilepath)  # remove old temp files
                    sink = DatasetNetCDF(name='WRF Climatology',
                                         folder=expfolder,
                                         filelist=[tmpfilename],
                                         atts=source.atts.copy(),
                                         mode='w')
                    sink.atts.period = periodstr
                    #           if lregrid: addGDALtoDataset(sink, griddef=griddef)

                    # initialize processing
                    CPU = CentralProcessingUnit(
                        source,
                        sink,
                        varlist=varlist,
                        tmp=lregrid,
                        feedback=ldebug)  # no need for lat/lon

                    # start processing climatology
                    if shift != 0:
                        logger.info(
                            '{0:s}   (shifting climatology by {1:d} month, to start with January)   \n'
                            .format(pidstr, shift))
                    CPU.Climatology(period=period,
                                    offset=offset,
                                    shift=shift,
                                    flush=False)
                    # N.B.: immediate flushing should not be necessary for climatologies, since they are much smaller!

                    # reproject and resample (regrid) dataset
                    if lregrid:
                        CPU.Regrid(griddef=griddef, flush=True)
                        logger.info('{:s}   ---   {:s}   ---   \n'.format(
                            pidstr, griddef.name))
                        logger.debug('{:s}   ---   {:s}   ---   \n'.format(
                            pidstr, str(griddef)))

                    # sync temporary storage with output dataset (sink)
                    CPU.sync(flush=True)

                    # add Geopotential Height Variance
                    if 'GHT_Var' in sink and 'Z_var' not in sink:
                        data_array = (sink['GHT_Var'].data_array -
                                      sink['Z'].data_array**2)**0.5
                        atts = dict(
                            name='Z_var',
                            units='m',
                            long_name=
                            'Square Root of Geopotential Height Variance')
                        sink += Variable(axes=sink['Z'].axes,
                                         data=data_array,
                                         atts=atts)

                    # add (relative) Vorticity Variance
                    if 'Vorticity_Var' in sink and 'zeta_var' not in sink:
                        data_array = (sink['Vorticity_Var'].data_array -
                                      sink['zeta'].data_array**2)**0.5
                        atts = dict(
                            name='zeta_var',
                            units='1/s',
                            long_name=
                            'Square Root of Relative Vorticity Variance')
                        sink += Variable(axes=sink['zeta'].axes,
                                         data=data_array,
                                         atts=atts)

                    # add names and length of months
                    sink.axisAnnotation('name_of_month',
                                        name_of_month,
                                        'time',
                                        atts=dict(
                                            name='name_of_month',
                                            units='',
                                            long_name='Name of the Month'))
                    if not sink.hasVariable('length_of_month'):
                        sink += Variable(name='length_of_month',
                                         units='days',
                                         axes=(sink.time, ),
                                         data=days_per_month,
                                         atts=dict(
                                             name='length_of_month',
                                             units='days',
                                             long_name='Length of Month'))

                    # close... and write results to file
                    sink.sync()
                    sink.close()
                    writemsg = "\n{:s}   >>>   Writing to file '{:s}' in dataset {:s}".format(
                        pidstr, filename, dataset_name)
                    writemsg += "\n{:s}   >>>   ('{:s}')\n".format(
                        pidstr, filepath)
                    logger.info(writemsg)
                    # rename file to proper name
                    if os.path.exists(filepath):
                        os.remove(filepath)  # remove old file
                    os.rename(tmpfilepath,
                              filepath)  # this will overwrite the old file

                    # print dataset
                    if not lparallel and ldebug:
                        logger.info('\n' + str(sink) + '\n')

                    # clean up (not sure if this is necessary, but there seems to be a memory leak...
                    del sink, CPU
                    gc.collect()  # get rid of these guys immediately

        # clean up and return
        if source is not None:
            source.unload()
            del source
        # N.B.: source is only loaded once for all periods

    # N.B.: garbage is collected in multi-processing wrapper as well
    # return
    return 0  # so far, there is no measure of success, hence, if there is no crash...
Beispiel #28
0
def computeClimatology(experiment, filetype, domain, periods=None, offset=0, griddef=None, varlist=None, 
                       ldebug=False, loverwrite=False, lparallel=False, pidstr='', logger=None):
  ''' worker function to compute climatologies for given file parameters. '''
  # input type checks
  if not isinstance(experiment,Exp): raise TypeError
  if not isinstance(filetype,basestring): raise TypeError
  if not isinstance(domain,(np.integer,int)): raise TypeError
  if periods is not None and not (isinstance(periods,(tuple,list)) and isInt(periods)): raise TypeError
  if not isinstance(offset,(np.integer,int)): raise TypeError
  if not isinstance(loverwrite,(bool,np.bool)): raise TypeError  
  if griddef is not None and not isinstance(griddef,GridDefinition): raise TypeError
  
  #if pidstr == '[proc01]': raise TypeError # to test error handling

  # load source
  dataset_name = experiment.name
  fileclass = fileclasses[filetype] # used for target file name
  tsfile = fileclass.tsfile.format(domain,'')
  expfolder = experiment.avgfolder
  filepath = '{:s}/{:s}'.format(expfolder, tsfile)
  logger.info('\n\n{0:s}   ***   Processing Experiment {1:<15s}   ***   '.format(pidstr,"'{:s}'".format(dataset_name)) +
        '\n{0:s}   ***   {1:^37s}   ***   \n'.format(pidstr,"'{:s}'".format(tsfile)))
  
  # check file and read begin/enddates
  if not os.path.exists(filepath): 
    #raise IOError, "Source file '{:s}' does not exist!".format(filepath)
    # print message and skip
    skipmsg =  "\n{:s}   >>>   File '{:s}' in dataset '{:s}' is missing --- skipping!".format(pidstr,tsfile,dataset_name)
    skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr,filepath)
    logger.warning(skipmsg) 
    # N.B.: this can cause a lot of error messages, when not all files are present
  else: # if monthly source file exists
    import netCDF4 as nc
    ncfile = nc.Dataset(filepath,mode='r')
    begintuple = ncfile.begin_date.split('-')
    endtuple = ncfile.end_date.split('-')
    ncfile.close()
    # N.B.: at this point we don't want to initialize a full GDAL-enabled dataset, since we don't even
    #       know if we need it, and it creates a lot of overhead
    
    # determine age of source file
    if not loverwrite: sourceage = datetime.fromtimestamp(os.path.getmtime(filepath))
  
    # figure out start date
    filebegin = int(begintuple[0]) # first element is the year
    fileend = int(endtuple[0]) # first element is the year
    begindate = offset + filebegin
    if not ( filebegin <= begindate <= fileend ): raise DateError  
    # handle cases where the first month in the record is not January
    firstmonth = int(begintuple[1]) # second element is the month
    shift = firstmonth-1 # will be zero for January (01)
    
    ## loop over periods
    if periods is None: periods = [begindate-fileend]
    #   periods.sort(reverse=True) # reverse, so that largest chunk is done first
    source = None # will later be assigned to the source dataset
    for period in periods:       
              
      # figure out period
      enddate = begindate + period     
      if filebegin > enddate: raise DateError, 'End date earlier than begin date.'
      if enddate-1 > fileend: # if filebegin is 1979 and the simulation is 10 years, fileend will be 1988, not 1989!
        # if end date is not available, skip period
        endmsg = "\n{:s}   ---   Invalid Period for '{:s}': End Date {:4d} not in File!   ---   \n".format(pidstr,dataset_name,enddate)
        endmsg += "{:s}   ---   ('{:s}')\n".format(pidstr,filepath)
        logger.info(endmsg)
        
      else: ## perform averaging for selected period
  
        # determine if sink file already exists, and what to do about it      
        periodstr = '{0:4d}-{1:4d}'.format(begindate,enddate)
        gridstr = '' if griddef is None or griddef.name is 'WRF' else '_'+griddef.name
        filename = fileclass.climfile.format(domain,gridstr,'_'+periodstr)
        if ldebug: filename = 'test_' + filename
        if lparallel: tmppfx = 'tmp_wrfavg_{:s}_'.format(pidstr[1:-1])
        else: tmppfx = 'tmp_wrfavg_'.format(pidstr[1:-1])
        tmpfilename = tmppfx + filename
        assert os.path.exists(expfolder)
        filepath = expfolder+filename
        tmpfilepath = expfolder+tmpfilename
        lskip = False # else just go ahead
        if os.path.exists(filepath): 
          if not loverwrite: 
            age = datetime.fromtimestamp(os.path.getmtime(filepath))
            # if sink file is newer than source file, skip (do not recompute)
            if age > sourceage and os.path.getsize(filepath) > 1e6: lskip = True
            # N.B.: NetCDF files smaller than 1MB are usually incomplete header fragments from a previous crash
            #print sourceage, age
          if not lskip: os.remove(filepath) 
        
        # depending on last modification time of file or overwrite setting, start computation, or skip
        if lskip:        
          # print message
          skipmsg =  "\n{:s}   >>>   Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format(pidstr,filename,dataset_name)
          skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr,filepath)
          logger.info(skipmsg)              
        else:
           
          ## begin actual computation
          beginmsg = "\n{:s}   <<<   Computing '{:s}' (d{:02d}) Climatology from {:s}".format(
                      pidstr,dataset_name,domain,periodstr)
          if griddef is None: beginmsg += "  >>>   \n" 
          else: beginmsg += " ('{:s}' grid)  >>>   \n".format(griddef.name)
          logger.info(beginmsg)
  
          ## actually load datasets
          if source is None:
            source = loadWRF_TS(experiment=experiment, filetypes=[filetype], domains=domain) # comes out as a tuple... 
          if not lparallel and ldebug: logger.info('\n'+str(source)+'\n')
  
          # prepare sink
          if os.path.exists(tmpfilepath): os.remove(tmpfilepath) # remove old temp files
          sink = DatasetNetCDF(name='WRF Climatology', folder=expfolder, filelist=[tmpfilename], atts=source.atts.copy(), mode='w')
          sink.atts.period = periodstr 
          
          # initialize processing
          if griddef is None: lregrid = False
          else: lregrid = True
          CPU = CentralProcessingUnit(source, sink, varlist=varlist, tmp=lregrid, feedback=ldebug) # no need for lat/lon
          
          # start processing climatology
          if shift != 0: 
            logger.info('{0:s}   (shifting climatology by {1:d} month, to start with January)   \n'.format(pidstr,shift))
          CPU.Climatology(period=period, offset=offset, shift=shift, flush=False)
          # N.B.: immediate flushing should not be necessary for climatologies, since they are much smaller!
          
          # reproject and resample (regrid) dataset
          if lregrid:
            CPU.Regrid(griddef=griddef, flush=True)
            logger.info('%s    ---   '+str(griddef.geotansform)+'   ---   \n'%(pidstr))              
          
          # sync temporary storage with output dataset (sink)
          CPU.sync(flush=True)
          
          # add Geopotential Height Variance
          if 'GHT_Var' in sink and 'Z_var' not in sink:
            data_array = ( sink['GHT_Var'].data_array - sink['Z'].data_array**2 )**0.5
            atts = dict(name='Z_var',units='m',long_name='Square Root of Geopotential Height Variance')
            sink += Variable(axes=sink['Z'].axes, data=data_array, atts=atts)
            
          # add (relative) Vorticity Variance
          if 'Vorticity_Var' in sink and 'zeta_var' not in sink:
            data_array = ( sink['Vorticity_Var'].data_array - sink['zeta'].data_array**2 )**0.5
            atts = dict(name='zeta_var',units='1/s',long_name='Square Root of Relative Vorticity Variance')
            sink += Variable(axes=sink['zeta'].axes, data=data_array, atts=atts)
            
          # add names and length of months
          sink.axisAnnotation('name_of_month', name_of_month, 'time', 
                              atts=dict(name='name_of_month', units='', long_name='Name of the Month'))        
          if not sink.hasVariable('length_of_month'):
            sink += Variable(name='length_of_month', units='days', axes=(sink.time,), data=days_per_month,
                          atts=dict(name='length_of_month',units='days',long_name='Length of Month'))
          
          # close... and write results to file
          sink.sync()
          sink.close()
          writemsg =  "\n{:s}   >>>   Writing to file '{:s}' in dataset {:s}".format(pidstr,filename,dataset_name)
          writemsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr,filepath)
          logger.info(writemsg)      
          # rename file to proper name
          if os.path.exists(filepath): os.remove(filepath) # remove old file
          os.rename(tmpfilepath,filepath) # this will overwrite the old file
          
          # print dataset
          if not lparallel and ldebug:
            logger.info('\n'+str(sink)+'\n')
          
          # clean up (not sure if this is necessary, but there seems to be a memory leak...   
          del sink, CPU; gc.collect() # get rid of these guys immediately
          
    # clean up and return
    if source is not None: source.unload(); del source
    # N.B.: source is only loaded once for all periods    

  # N.B.: garbage is collected in multi-processing wrapper as well
  # return
  return 0 # so far, there is no measure of success, hence, if there is no crash...
Beispiel #29
0
        source = loadCRU_TS()
        source = source(time=timeSlice(period))  # only get relevant time-slice
        print(source)
        assert period[0] != 1979 or source.time.coord[0] == 0
        assert len(source.time) == (period[1] - period[0]) * 12
        print('\n')
        # prepare sink
        filename = avgfile.format(
            '',
            '_' + periodstr,
        )
        if os.path.exists(avgfolder + filename):
            os.remove(avgfolder + filename)
        sink = DatasetNetCDF(name='CRU Climatology',
                             folder=avgfolder,
                             filelist=[filename],
                             atts=source.atts,
                             mode='w')
        sink.atts.period = periodstr

        # determine averaging interval
        offset = source.time.getIndex(
            period[0] -
            1979) / 12  # origin of monthly time-series is at January 1979
        # initialize processing
        #     CPU = CentralProcessingUnit(source, sink, varlist=['wetfrq'])
        CPU = CentralProcessingUnit(source, sink)
        # start processing
        print('')
        print('   +++   processing   +++   ')
        CPU.Climatology(period=period[1] - period[0],