Python DatasetNetCDF.hasVariable Beispiele

Programmiersprache: Python

Namespace / Paketname: geodata.netcdf

Klasse / Typ: DatasetNetCDF

Methode / Funktion: hasVariable

Beispiele auf hotexamples.com: 10

Python DatasetNetCDF.hasVariable - 10 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die geodata.netcdf.DatasetNetCDF.hasVariable, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

DatasetNetCDF(13)

hasVariable(5)

addVariable(4)

close(4)

mask(4)

replaceAxis(4)

sync(4)

axisAnnotation(2)

unload(2)

hasAxis(1)

load(1)

Beispiel #1

Datei anzeigen

Datei: CFSR.py Projekt: xiefengy/GeoPy

def loadCFSR_TS(name=dataset_name, grid=None, varlist=None, varatts=None, resolution='hires', 
                filelist=None, folder=None, lautoregrid=None):
  ''' Get a properly formatted CFSR dataset with monthly mean time-series. '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # translate varlist
    if varatts is None: varatts = tsvaratts.copy()
    if varlist is None:
      if resolution == 'hires' or resolution == '03' or resolution == '031': varlist = varlist_hires
      elif resolution == 'lowres' or resolution == '05': varlist = varlist_lowres     
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    if filelist is None: # generate default filelist
      if resolution == 'hires' or resolution == '03' or resolution == '031': 
        files = [hiresfiles[var] for var in varlist if var in hiresfiles]
      elif resolution == 'lowres' or resolution == '05': 
        files = [lowresfiles[var] for var in varlist if var in lowresfiles]
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=files, varlist=varlist, varatts=varatts, 
                            check_override=['time'], multifile=False, ncformat='NETCDF4_CLASSIC')
    # load static data
    if filelist is None: # generate default filelist
      if resolution == 'hires' or resolution == '03' or resolution == '031': 
        files = [hiresstatic[var] for var in varlist if var in hiresstatic]
      elif resolution == 'lowres' or resolution == '05': 
        files = [lowresstatic[var] for var in varlist if var in lowresstatic]
      # load constants, if any (and with singleton time axis)
      if len(files) > 0:
        staticdata = DatasetNetCDF(name=name, folder=folder, filelist=files, varlist=varlist, varatts=varatts, 
                                   axes=dict(lon=dataset.lon, lat=dataset.lat), multifile=False, 
                                   check_override=['time'], ncformat='NETCDF4_CLASSIC')
        # N.B.: need to override the axes, so that the datasets are consistent
        if len(staticdata.variables) > 0:
          for var in staticdata.variables.values(): 
            if not dataset.hasVariable(var.name):
              var.squeeze() # remove time dimension
              dataset.addVariable(var, copy=False) # no need to copy... but we can't write to the netcdf file!
    # replace time axis with number of month since Jan 1979 
    data = np.arange(0,len(dataset.time),1, dtype='int16') # month since 1979 (Jan 1979 = 0)
    timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01'))
    dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
    # add projection  
    dataset = addGDALtoDataset(dataset, projection=None, geotransform=None, gridfolder=grid_folder)
    # N.B.: projection should be auto-detected as geographic
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    grid, resolution = checkGridRes(grid, resolution)
    dataset = loadObservations(name=name, folder=folder, projection=None, resolution=resolution, grid=grid, 
                               period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, 
                               filelist=filelist, lautoregrid=lautoregrid, mode='time-series')
  # return formatted dataset
  return dataset

Beispiel #2

Datei anzeigen

Datei: wrfavg.py Projekt: EdwardBetts/GeoPy

def computeClimatology(experiment, filetype, domain, periods=None, offset=0, griddef=None, varlist=None, 
                       ldebug=False, loverwrite=False, lparallel=False, pidstr='', logger=None):
  ''' worker function to compute climatologies for given file parameters. '''
  # input type checks
  if not isinstance(experiment,Exp): raise TypeError
  if not isinstance(filetype,basestring): raise TypeError
  if not isinstance(domain,(np.integer,int)): raise TypeError
  if periods is not None and not (isinstance(periods,(tuple,list)) and isInt(periods)): raise TypeError
  if not isinstance(offset,(np.integer,int)): raise TypeError
  if not isinstance(loverwrite,(bool,np.bool)): raise TypeError  
  if griddef is not None and not isinstance(griddef,GridDefinition): raise TypeError
  
  #if pidstr == '[proc01]': raise TypeError # to test error handling

  # load source
  dataset_name = experiment.name
  fileclass = fileclasses[filetype] # used for target file name
  tsfile = fileclass.tsfile.format(domain,'')
  expfolder = experiment.avgfolder
  filepath = '{:s}/{:s}'.format(expfolder, tsfile)
  logger.info('\n\n{0:s}   ***   Processing Experiment {1:<15s}   ***   '.format(pidstr,"'{:s}'".format(dataset_name)) +
        '\n{0:s}   ***   {1:^37s}   ***   \n'.format(pidstr,"'{:s}'".format(tsfile)))
  
  # check file and read begin/enddates
  if not os.path.exists(filepath): 
    #raise IOError, "Source file '{:s}' does not exist!".format(filepath)
    # print message and skip
    skipmsg =  "\n{:s}   >>>   File '{:s}' in dataset '{:s}' is missing --- skipping!".format(pidstr,tsfile,dataset_name)
    skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr,filepath)
    logger.warning(skipmsg) 
    # N.B.: this can cause a lot of error messages, when not all files are present
  else: # if monthly source file exists
    import netCDF4 as nc
    ncfile = nc.Dataset(filepath,mode='r')
    begintuple = ncfile.begin_date.split('-')
    endtuple = ncfile.end_date.split('-')
    ncfile.close()
    # N.B.: at this point we don't want to initialize a full GDAL-enabled dataset, since we don't even
    #       know if we need it, and it creates a lot of overhead
    
    # determine age of source file
    if not loverwrite: sourceage = datetime.fromtimestamp(os.path.getmtime(filepath))
  
    # figure out start date
    filebegin = int(begintuple[0]) # first element is the year
    fileend = int(endtuple[0]) # first element is the year
    begindate = offset + filebegin
    if not ( filebegin <= begindate <= fileend ): raise DateError  
    # handle cases where the first month in the record is not January
    firstmonth = int(begintuple[1]) # second element is the month
    shift = firstmonth-1 # will be zero for January (01)
    
    ## loop over periods
    if periods is None: periods = [begindate-fileend]
    #   periods.sort(reverse=True) # reverse, so that largest chunk is done first
    source = None # will later be assigned to the source dataset
    for period in periods:       
              
      # figure out period
      enddate = begindate + period     
      if filebegin > enddate: raise DateError, 'End date earlier than begin date.'
      if enddate-1 > fileend: # if filebegin is 1979 and the simulation is 10 years, fileend will be 1988, not 1989!
        # if end date is not available, skip period
        endmsg = "\n{:s}   ---   Invalid Period for '{:s}': End Date {:4d} not in File!   ---   \n".format(pidstr,dataset_name,enddate)
        endmsg += "{:s}   ---   ('{:s}')\n".format(pidstr,filepath)
        logger.info(endmsg)
        
      else: ## perform averaging for selected period
  
        # determine if sink file already exists, and what to do about it      
        periodstr = '{0:4d}-{1:4d}'.format(begindate,enddate)
        gridstr = '' if griddef is None or griddef.name is 'WRF' else '_'+griddef.name
        filename = fileclass.climfile.format(domain,gridstr,'_'+periodstr)
        if ldebug: filename = 'test_' + filename
        if lparallel: tmppfx = 'tmp_wrfavg_{:s}_'.format(pidstr[1:-1])
        else: tmppfx = 'tmp_wrfavg_'.format(pidstr[1:-1])
        tmpfilename = tmppfx + filename
        assert os.path.exists(expfolder)
        filepath = expfolder+filename
        tmpfilepath = expfolder+tmpfilename
        lskip = False # else just go ahead
        if os.path.exists(filepath): 
          if not loverwrite: 
            age = datetime.fromtimestamp(os.path.getmtime(filepath))
            # if sink file is newer than source file, skip (do not recompute)
            if age > sourceage and os.path.getsize(filepath) > 1e6: lskip = True
            # N.B.: NetCDF files smaller than 1MB are usually incomplete header fragments from a previous crash
            #print sourceage, age
          if not lskip: os.remove(filepath) 
        
        # depending on last modification time of file or overwrite setting, start computation, or skip
        if lskip:        
          # print message
          skipmsg =  "\n{:s}   >>>   Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format(pidstr,filename,dataset_name)
          skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr,filepath)
          logger.info(skipmsg)              
        else:
           
          ## begin actual computation
          beginmsg = "\n{:s}   <<<   Computing '{:s}' (d{:02d}) Climatology from {:s}".format(
                      pidstr,dataset_name,domain,periodstr)
          if griddef is None: beginmsg += "  >>>   \n" 
          else: beginmsg += " ('{:s}' grid)  >>>   \n".format(griddef.name)
          logger.info(beginmsg)
  
          ## actually load datasets
          if source is None:
            source = loadWRF_TS(experiment=experiment, filetypes=[filetype], domains=domain) # comes out as a tuple... 
          if not lparallel and ldebug: logger.info('\n'+str(source)+'\n')
  
          # prepare sink
          if os.path.exists(tmpfilepath): os.remove(tmpfilepath) # remove old temp files
          sink = DatasetNetCDF(name='WRF Climatology', folder=expfolder, filelist=[tmpfilename], atts=source.atts.copy(), mode='w')
          sink.atts.period = periodstr 
          
          # initialize processing
          if griddef is None: lregrid = False
          else: lregrid = True
          CPU = CentralProcessingUnit(source, sink, varlist=varlist, tmp=lregrid, feedback=ldebug) # no need for lat/lon
          
          # start processing climatology
          if shift != 0: 
            logger.info('{0:s}   (shifting climatology by {1:d} month, to start with January)   \n'.format(pidstr,shift))
          CPU.Climatology(period=period, offset=offset, shift=shift, flush=False)
          # N.B.: immediate flushing should not be necessary for climatologies, since they are much smaller!
          
          # reproject and resample (regrid) dataset
          if lregrid:
            CPU.Regrid(griddef=griddef, flush=True)
            logger.info('%s    ---   '+str(griddef.geotansform)+'   ---   \n'%(pidstr))              
          
          # sync temporary storage with output dataset (sink)
          CPU.sync(flush=True)
          
          # add Geopotential Height Variance
          if 'GHT_Var' in sink and 'Z_var' not in sink:
            data_array = ( sink['GHT_Var'].data_array - sink['Z'].data_array**2 )**0.5
            atts = dict(name='Z_var',units='m',long_name='Square Root of Geopotential Height Variance')
            sink += Variable(axes=sink['Z'].axes, data=data_array, atts=atts)
            
          # add (relative) Vorticity Variance
          if 'Vorticity_Var' in sink and 'zeta_var' not in sink:
            data_array = ( sink['Vorticity_Var'].data_array - sink['zeta'].data_array**2 )**0.5
            atts = dict(name='zeta_var',units='1/s',long_name='Square Root of Relative Vorticity Variance')
            sink += Variable(axes=sink['zeta'].axes, data=data_array, atts=atts)
            
          # add names and length of months
          sink.axisAnnotation('name_of_month', name_of_month, 'time', 
                              atts=dict(name='name_of_month', units='', long_name='Name of the Month'))        
          if not sink.hasVariable('length_of_month'):
            sink += Variable(name='length_of_month', units='days', axes=(sink.time,), data=days_per_month,
                          atts=dict(name='length_of_month',units='days',long_name='Length of Month'))
          
          # close... and write results to file
          sink.sync()
          sink.close()
          writemsg =  "\n{:s}   >>>   Writing to file '{:s}' in dataset {:s}".format(pidstr,filename,dataset_name)
          writemsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr,filepath)
          logger.info(writemsg)      
          # rename file to proper name
          if os.path.exists(filepath): os.remove(filepath) # remove old file
          os.rename(tmpfilepath,filepath) # this will overwrite the old file
          
          # print dataset
          if not lparallel and ldebug:
            logger.info('\n'+str(sink)+'\n')
          
          # clean up (not sure if this is necessary, but there seems to be a memory leak...   
          del sink, CPU; gc.collect() # get rid of these guys immediately
          
    # clean up and return
    if source is not None: source.unload(); del source
    # N.B.: source is only loaded once for all periods    

  # N.B.: garbage is collected in multi-processing wrapper as well
  # return
  return 0 # so far, there is no measure of success, hence, if there is no crash...

Beispiel #3

Datei anzeigen

Datei: regrid.py Projekt: zhenkunl/GeoPy

def performRegridding(dataset,
                      mode,
                      griddef,
                      dataargs,
                      loverwrite=False,
                      varlist=None,
                      lwrite=True,
                      lreturn=False,
                      ldebug=False,
                      lparallel=False,
                      pidstr='',
                      logger=None):
    ''' worker function to perform regridding for a given dataset and target grid '''
    # input checking
    if not isinstance(dataset, basestring): raise TypeError
    if not isinstance(dataargs, dict):
        raise TypeError  # all dataset arguments are kwargs
    if not isinstance(griddef, GridDefinition): raise TypeError
    if lparallel:
        if not lwrite:
            raise IOError, 'Can only write to disk in parallel mode (i.e. lwrite = True).'
        if lreturn:
            raise IOError, 'Can not return datasets in parallel mode (i.e. lreturn = False).'

    # logging
    if logger is None:  # make new logger
        logger = logging.getLogger()  # new logger
        logger.addHandler(logging.StreamHandler())
    else:
        if isinstance(logger, basestring):
            logger = logging.getLogger(name=logger)  # connect to existing one
        elif not isinstance(logger, logging.Logger):
            raise TypeError, 'Expected logger ID/handle in logger KW; got {}'.format(
                str(logger))

    ## extract meta data from arguments
    dataargs, loadfct, srcage, datamsgstr = getMetaData(
        dataset, mode, dataargs)
    dataset_name = dataargs.dataset_name
    periodstr = dataargs.periodstr
    avgfolder = dataargs.avgfolder

    # get filename for target dataset and do some checks
    filename = getTargetFile(
        dataset=dataset,
        mode=mode,
        dataargs=dataargs,
        lwrite=lwrite,
        grid=griddef.name.lower(),
    )

    # prepare target dataset
    if ldebug: filename = 'test_' + filename
    if not os.path.exists(avgfolder):
        raise IOError, "Dataset folder '{:s}' does not exist!".format(
            avgfolder)
    lskip = False  # else just go ahead
    if lwrite:
        if lreturn:
            tmpfilename = filename  # no temporary file if dataset is passed on (can't rename the file while it is open!)
        else:
            if lparallel: tmppfx = 'tmp_regrid_{:s}_'.format(pidstr[1:-1])
            else: tmppfx = 'tmp_regrid_'.format(pidstr[1:-1])
            tmpfilename = tmppfx + filename
        filepath = avgfolder + filename
        tmpfilepath = avgfolder + tmpfilename
        if os.path.exists(filepath):
            if not loverwrite:
                age = datetime.fromtimestamp(os.path.getmtime(filepath))
                # if source file is newer than sink file or if sink file is a stub, recompute, otherwise skip
                if age > srcage and os.path.getsize(filepath) > 1e6:
                    lskip = True
                    if hasattr(griddef,
                               'filepath') and griddef.filepath is not None:
                        gridage = datetime.fromtimestamp(
                            os.path.getmtime(griddef.filepath))
                        if age < gridage: lskip = False
                # N.B.: NetCDF files smaller than 1MB are usually incomplete header fragments from a previous crashed

    # depending on last modification time of file or overwrite setting, start computation, or skip
    if lskip:
        # print message
        skipmsg = "\n{:s}   >>>   Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format(
            pidstr, filename, dataset_name)
        skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr, filepath)
        logger.info(skipmsg)
    else:

        ## actually load datasets
        source = loadfct()  # load source
        # check period
        if 'period' in source.atts and dataargs.periodstr != source.atts.period:  # a NetCDF attribute
            raise DateError, "Specifed period is inconsistent with netcdf records: '{:s}' != '{:s}'".format(
                periodstr, source.atts.period)

        # print message
        if mode == 'climatology':
            opmsgstr = 'Regridding Climatology ({:s}) to {:s} Grid'.format(
                periodstr, griddef.name)
        elif mode == 'time-series':
            opmsgstr = 'Regridding Time-series to {:s} Grid'.format(
                griddef.name)
        else:
            raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)
        # print feedback to logger
        logger.info(
            '\n{0:s}   ***   {1:^65s}   ***   \n{0:s}   ***   {2:^65s}   ***   \n'
            .format(pidstr, datamsgstr, opmsgstr))
        if not lparallel and ldebug: logger.info('\n' + str(source) + '\n')

        ## create new sink/target file
        # set attributes
        atts = source.atts.copy()
        atts['period'] = periodstr
        atts['name'] = dataset_name
        atts['grid'] = griddef.name
        if mode == 'climatology':
            atts['title'] = '{:s} Climatology on {:s} Grid'.format(
                dataset_name, griddef.name)
        elif mode == 'time-series':
            atts['title'] = '{:s} Time-series on {:s} Grid'.format(
                dataset_name, griddef.name)

        # make new dataset
        if lwrite:  # write to NetCDF file
            if os.path.exists(tmpfilepath):
                os.remove(tmpfilepath)  # remove old temp files
            sink = DatasetNetCDF(folder=avgfolder,
                                 filelist=[tmpfilename],
                                 atts=atts,
                                 mode='w')
        else:
            sink = Dataset(atts=atts)  # ony create dataset in memory

        # initialize processing
        CPU = CentralProcessingUnit(source,
                                    sink,
                                    varlist=varlist,
                                    tmp=False,
                                    feedback=ldebug)

        # perform regridding (if target grid is different from native grid!)
        if griddef.name != dataset:
            # reproject and resample (regrid) dataset
            CPU.Regrid(griddef=griddef, flush=True)

        # get results
        CPU.sync(flush=True)

        # add geolocators
        sink = addGeoLocator(sink,
                             griddef=griddef,
                             lgdal=True,
                             lreplace=True,
                             lcheck=True)
        # N.B.: WRF datasets come with their own geolocator arrays - we need to replace those!

        # add length and names of month
        if mode == 'climatology' and not sink.hasVariable(
                'length_of_month') and sink.hasVariable('time'):
            addLengthAndNamesOfMonth(
                sink,
                noleap=True if dataset.upper() in ('WRF', 'CESM') else False)

        # print dataset
        if not lparallel and ldebug:
            logger.info('\n' + str(sink) + '\n')
        # write results to file
        if lwrite:
            sink.sync()
            writemsg = "\n{:s}   >>>   Writing to file '{:s}' in dataset {:s}".format(
                pidstr, filename, dataset_name)
            writemsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr, filepath)
            logger.info(writemsg)

            # rename file to proper name
            if not lreturn:
                sink.unload()
                sink.close()
                del sink  # destroy all references
                if os.path.exists(filepath):
                    os.remove(filepath)  # remove old file
                os.rename(
                    tmpfilepath,
                    filepath)  # this would also overwrite the old file...
            # N.B.: there is no temporary file if the dataset is returned, because an open file can't be renamed

        # clean up and return
        source.unload()
        del source, CPU
        if lreturn:
            return sink  # return dataset for further use (netcdf file still open!)
        else:
            return 0  # "exit code"

Beispiel #4

Datei anzeigen

def performRegridding(dataset, mode, griddef, dataargs, loverwrite=False, varlist=None, lwrite=True, 
                      lreturn=False, ldebug=False, lparallel=False, pidstr='', logger=None):
  ''' worker function to perform regridding for a given dataset and target grid '''
  # input checking
  if not isinstance(dataset,basestring): raise TypeError
  if not isinstance(dataargs,dict): raise TypeError # all dataset arguments are kwargs 
  if not isinstance(griddef,GridDefinition): raise TypeError
  if lparallel: 
    if not lwrite: raise IOError, 'Can only write to disk in parallel mode (i.e. lwrite = True).'
    if lreturn: raise IOError, 'Can not return datasets in parallel mode (i.e. lreturn = False).'
  
  # logging
  if logger is None: # make new logger     
    logger = logging.getLogger() # new logger
    logger.addHandler(logging.StreamHandler())
  else:
    if isinstance(logger,basestring): 
      logger = logging.getLogger(name=logger) # connect to existing one
    elif not isinstance(logger,logging.Logger): 
      raise TypeError, 'Expected logger ID/handle in logger KW; got {}'.format(str(logger))

  ## extract meta data from arguments
  dataargs, loadfct, srcage, datamsgstr = getMetaData(dataset, mode, dataargs)
  dataset_name = dataargs.dataset_name; periodstr = dataargs.periodstr; avgfolder = dataargs.avgfolder

  # get filename for target dataset and do some checks
  filename = getTargetFile(dataset=dataset, mode=mode, dataargs=dataargs, lwrite=lwrite, 
                           grid=griddef.name.lower(), period=None, filetype=None) 
    
  # prepare target dataset
  if ldebug: filename = 'test_' + filename
  if not os.path.exists(avgfolder): raise IOError, "Dataset folder '{:s}' does not exist!".format(avgfolder)
  lskip = False # else just go ahead
  if lwrite:
    if lreturn: tmpfilename = filename # no temporary file if dataset is passed on (can't rename the file while it is open!)
    else: 
      if lparallel: tmppfx = 'tmp_regrid_{:s}_'.format(pidstr[1:-1])
      else: tmppfx = 'tmp_regrid_'.format(pidstr[1:-1])
      tmpfilename = tmppfx + filename      
    filepath = avgfolder + filename
    tmpfilepath = avgfolder + tmpfilename
    if os.path.exists(filepath): 
      if not loverwrite: 
        age = datetime.fromtimestamp(os.path.getmtime(filepath))
        # if source file is newer than sink file or if sink file is a stub, recompute, otherwise skip
        if age > srcage and os.path.getsize(filepath) > 1e6: 
          lskip = True
          if hasattr(griddef, 'filepath') and griddef.filepath is not None:
            gridage = datetime.fromtimestamp(os.path.getmtime(griddef.filepath))
            if age < gridage: lskip = False
        # N.B.: NetCDF files smaller than 1MB are usually incomplete header fragments from a previous crashed
      if not lskip: os.remove(filepath) # recompute
  
  # depending on last modification time of file or overwrite setting, start computation, or skip
  if lskip:        
    # print message
    skipmsg =  "\n{:s}   >>>   Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format(pidstr,filename,dataset_name)
    skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr,filepath)
    logger.info(skipmsg)              
  else:
          
    ## actually load datasets
    source = loadfct() # load source 
    # check period
    if 'period' in source.atts and dataargs.periodstr != source.atts.period: # a NetCDF attribute
      raise DateError, "Specifed period is inconsistent with netcdf records: '{:s}' != '{:s}'".format(periodstr,source.atts.period)

    # print message
    if mode == 'climatology': opmsgstr = 'Regridding Climatology ({:s}) to {:s} Grid'.format(periodstr, griddef.name)
    elif mode == 'time-series': opmsgstr = 'Regridding Time-series to {:s} Grid'.format(griddef.name)
    else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)        
    # print feedback to logger
    logger.info('\n{0:s}   ***   {1:^65s}   ***   \n{0:s}   ***   {2:^65s}   ***   \n'.format(pidstr,datamsgstr,opmsgstr))
    if not lparallel and ldebug: logger.info('\n'+str(source)+'\n')
    
    ## create new sink/target file
    # set attributes   
    atts=source.atts.copy()
    atts['period'] = periodstr; atts['name'] = dataset_name; atts['grid'] = griddef.name
    if mode == 'climatology': atts['title'] = '{:s} Climatology on {:s} Grid'.format(dataset_name, griddef.name)
    elif mode == 'time-series':  atts['title'] = '{:s} Time-series on {:s} Grid'.format(dataset_name, griddef.name)
      
    # make new dataset
    if lwrite: # write to NetCDF file 
      if os.path.exists(tmpfilepath): os.remove(tmpfilepath) # remove old temp files 
      sink = DatasetNetCDF(folder=avgfolder, filelist=[tmpfilename], atts=atts, mode='w')
    else: sink = Dataset(atts=atts) # ony create dataset in memory
    
    # initialize processing
    CPU = CentralProcessingUnit(source, sink, varlist=varlist, tmp=False, feedback=ldebug)
  
    # perform regridding (if target grid is different from native grid!)
    if griddef.name != dataset:
      # reproject and resample (regrid) dataset
      CPU.Regrid(griddef=griddef, flush=True)

    # get results    
    CPU.sync(flush=True)
    
    # add geolocators
    sink = addGeoLocator(sink, griddef=griddef, lgdal=True, lreplace=True, lcheck=True)
    # N.B.: WRF datasets come with their own geolocator arrays - we need to replace those!
    
    # add length and names of month
    if mode == 'climatology' and not sink.hasVariable('length_of_month') and sink.hasVariable('time'): 
      addLengthAndNamesOfMonth(sink, noleap=True if dataset.upper() in ('WRF','CESM') else False) 
    
    # print dataset
    if not lparallel and ldebug:
      logger.info('\n'+str(sink)+'\n')   
    # write results to file
    if lwrite:
      sink.sync()
      writemsg =  "\n{:s}   >>>   Writing to file '{:s}' in dataset {:s}".format(pidstr,filename,dataset_name)
      writemsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr,filepath)
      logger.info(writemsg)      
      
      # rename file to proper name
      if not lreturn:
        sink.unload(); sink.close(); del sink # destroy all references 
        if os.path.exists(filepath): os.remove(filepath) # remove old file
        os.rename(tmpfilepath,filepath) # this would also overwrite the old file...
      # N.B.: there is no temporary file if the dataset is returned, because an open file can't be renamed
        
    # clean up and return
    source.unload(); del source, CPU
    if lreturn:      
      return sink # return dataset for further use (netcdf file still open!)
    else:            
      return 0 # "exit code"

Beispiel #5

Datei anzeigen

Datei: CFSR.py Projekt: xiefengy/GeoPy

      # determine averaging interval
      offset = source.time.getIndex(period[0]-1979)/12 # origin of monthly time-series is at January 1979 
      # initialize processing
      CPU = CentralProcessingUnit(source, sink, tmp=True)
      
      # start processing climatology
      CPU.Climatology(period=period[1]-period[0], offset=offset, flush=False)
      
      # shift longitude axis by 180 degrees left (i.e. 0 - 360 -> -180 - 180)
      CPU.Shift(lon=-180, flush=False)
      
      # sync temporary storage with output (sink variable; do not flush!)
      CPU.sync(flush=False)

      # make new masks
      if sink.hasVariable('landmask'):
        sink.mask(sink.landmask, maskSelf=False, varlist=['snow','snowh','zs'], invert=True, merge=False)

      # add names and length of months
      sink.axisAnnotation('name_of_month', name_of_month, 'time', 
                          atts=dict(name='name_of_month', units='', long_name='Name of the Month'))
      #print '   ===   month   ===   '
#       sink += VarNC(sink.dataset, name='length_of_month', units='days', axes=(sink.time,), data=days_per_month,
#                     atts=dict(name='length_of_month',units='days',long_name='Length of Month'))
      
      # close...
      sink.sync()
      sink.close()
      # print dataset
      print('')
      print(sink)

Beispiel #6

Datei anzeigen

def computeClimatology(experiment,
                       filetype,
                       domain,
                       periods=None,
                       offset=0,
                       griddef=None,
                       varlist=None,
                       ldebug=False,
                       loverwrite=False,
                       lparallel=False,
                       pidstr='',
                       logger=None):
    ''' worker function to compute climatologies for given file parameters. '''
    # input type checks
    if not isinstance(experiment, Exp): raise TypeError
    if not isinstance(filetype, basestring): raise TypeError
    if not isinstance(domain, (np.integer, int)): raise TypeError
    if periods is not None and not (isinstance(periods, (tuple, list))
                                    and isInt(periods)):
        raise TypeError
    if not isinstance(offset, (np.integer, int)): raise TypeError
    if not isinstance(loverwrite, (bool, np.bool)): raise TypeError
    if griddef is not None and not isinstance(griddef, GridDefinition):
        raise TypeError

    #if pidstr == '[proc01]': raise TypeError # to test error handling

    # load source
    dataset_name = experiment.name
    fileclass = fileclasses[filetype]  # used for target file name
    tsfile = fileclass.tsfile.format(domain, '')
    expfolder = experiment.avgfolder
    filepath = '{:s}/{:s}'.format(expfolder, tsfile)
    logger.info('\n\n{0:s}   ***   Processing Experiment {1:<15s}   ***   '.
                format(pidstr, "'{:s}'".format(dataset_name)) +
                '\n{0:s}   ***   {1:^37s}   ***   \n'.format(
                    pidstr, "'{:s}'".format(tsfile)))

    # check file and read begin/enddates
    if not os.path.exists(filepath):
        #raise IOError, "Source file '{:s}' does not exist!".format(filepath)
        # print message and skip
        skipmsg = "\n{:s}   >>>   File '{:s}' in dataset '{:s}' is missing --- skipping!".format(
            pidstr, tsfile, dataset_name)
        skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(pidstr, filepath)
        logger.warning(skipmsg)
        # N.B.: this can cause a lot of error messages, when not all files are present
    else:  # if monthly source file exists
        import netCDF4 as nc
        ncfile = nc.Dataset(filepath, mode='r')
        begintuple = ncfile.begin_date.split('-')
        endtuple = ncfile.end_date.split('-')
        ncfile.close()
        # N.B.: at this point we don't want to initialize a full GDAL-enabled dataset, since we don't even
        #       know if we need it, and it creates a lot of overhead

        # determine age of source file
        if not loverwrite:
            sourceage = datetime.fromtimestamp(os.path.getmtime(filepath))

        # figure out start date
        filebegin = int(begintuple[0])  # first element is the year
        fileend = int(endtuple[0])  # first element is the year
        begindate = offset + filebegin
        if not (filebegin <= begindate <= fileend): raise DateError
        # handle cases where the first month in the record is not January
        firstmonth = int(begintuple[1])  # second element is the month
        shift = firstmonth - 1  # will be zero for January (01)

        ## loop over periods
        if periods is None: periods = [begindate - fileend]
        #   periods.sort(reverse=True) # reverse, so that largest chunk is done first
        source = None  # will later be assigned to the source dataset
        for period in periods:

            # figure out period
            enddate = begindate + period
            if filebegin > enddate:
                raise DateError, 'End date earlier than begin date.'
            if enddate - 1 > fileend:  # if filebegin is 1979 and the simulation is 10 years, fileend will be 1988, not 1989!
                # if end date is not available, skip period
                endmsg = "\n{:s}   ---   Invalid Period for '{:s}': End Date {:4d} not in File!   ---   \n".format(
                    pidstr, dataset_name, enddate)
                endmsg += "{:s}   ---   ('{:s}')\n".format(pidstr, filepath)
                logger.info(endmsg)

            else:  ## perform averaging for selected period

                # determine if sink file already exists, and what to do about it
                periodstr = '{0:4d}-{1:4d}'.format(begindate, enddate)
                gridstr = '' if griddef is None or griddef.name is 'WRF' else '_' + griddef.name
                filename = fileclass.climfile.format(domain, gridstr,
                                                     '_' + periodstr)
                if ldebug: filename = 'test_' + filename
                if lparallel: tmppfx = 'tmp_wrfavg_{:s}_'.format(pidstr[1:-1])
                else: tmppfx = 'tmp_wrfavg_'.format(pidstr[1:-1])
                tmpfilename = tmppfx + filename
                assert os.path.exists(expfolder)
                filepath = expfolder + filename
                tmpfilepath = expfolder + tmpfilename
                lskip = False  # else just go ahead
                if os.path.exists(filepath):
                    if not loverwrite:
                        age = datetime.fromtimestamp(
                            os.path.getmtime(filepath))
                        # if sink file is newer than source file, skip (do not recompute)
                        if age > sourceage and os.path.getsize(filepath) > 1e6:
                            lskip = True
                        # N.B.: NetCDF files smaller than 1MB are usually incomplete header fragments from a previous crash
                        #print sourceage, age
                    if not lskip: os.remove(filepath)

                # depending on last modification time of file or overwrite setting, start computation, or skip
                if lskip:
                    # print message
                    skipmsg = "\n{:s}   >>>   Skipping: file '{:s}' in dataset '{:s}' already exists and is newer than source file.".format(
                        pidstr, filename, dataset_name)
                    skipmsg += "\n{:s}   >>>   ('{:s}')\n".format(
                        pidstr, filepath)
                    logger.info(skipmsg)
                else:

                    if griddef is None: lregrid = False
                    else: lregrid = True

                    ## begin actual computation
                    beginmsg = "\n{:s}   <<<   Computing '{:s}' (d{:02d}) Climatology from {:s}".format(
                        pidstr, dataset_name, domain, periodstr)
                    if not lregrid: beginmsg += "  >>>   \n"
                    else:
                        beginmsg += " ('{:s}' grid)  >>>   \n".format(
                            griddef.name)
                    logger.info(beginmsg)

                    ## actually load datasets
                    if source is None:
                        source = loadWRF_TS(
                            experiment=experiment,
                            filetypes=[filetype],
                            domains=domain)  # comes out as a tuple...
                    if not lparallel and ldebug:
                        logger.info('\n' + str(source) + '\n')

                    # prepare sink
                    if os.path.exists(tmpfilepath):
                        os.remove(tmpfilepath)  # remove old temp files
                    sink = DatasetNetCDF(name='WRF Climatology',
                                         folder=expfolder,
                                         filelist=[tmpfilename],
                                         atts=source.atts.copy(),
                                         mode='w')
                    sink.atts.period = periodstr
                    #           if lregrid: addGDALtoDataset(sink, griddef=griddef)

                    # initialize processing
                    CPU = CentralProcessingUnit(
                        source,
                        sink,
                        varlist=varlist,
                        tmp=lregrid,
                        feedback=ldebug)  # no need for lat/lon

                    # start processing climatology
                    if shift != 0:
                        logger.info(
                            '{0:s}   (shifting climatology by {1:d} month, to start with January)   \n'
                            .format(pidstr, shift))
                    CPU.Climatology(period=period,
                                    offset=offset,
                                    shift=shift,
                                    flush=False)
                    # N.B.: immediate flushing should not be necessary for climatologies, since they are much smaller!

                    # reproject and resample (regrid) dataset
                    if lregrid:
                        CPU.Regrid(griddef=griddef, flush=True)
                        logger.info('{:s}   ---   {:s}   ---   \n'.format(
                            pidstr, griddef.name))
                        logger.debug('{:s}   ---   {:s}   ---   \n'.format(
                            pidstr, str(griddef)))

                    # sync temporary storage with output dataset (sink)
                    CPU.sync(flush=True)

                    # add Geopotential Height Variance
                    if 'GHT_Var' in sink and 'Z_var' not in sink:
                        data_array = (sink['GHT_Var'].data_array -
                                      sink['Z'].data_array**2)**0.5
                        atts = dict(
                            name='Z_var',
                            units='m',
                            long_name=
                            'Square Root of Geopotential Height Variance')
                        sink += Variable(axes=sink['Z'].axes,
                                         data=data_array,
                                         atts=atts)

                    # add (relative) Vorticity Variance
                    if 'Vorticity_Var' in sink and 'zeta_var' not in sink:
                        data_array = (sink['Vorticity_Var'].data_array -
                                      sink['zeta'].data_array**2)**0.5
                        atts = dict(
                            name='zeta_var',
                            units='1/s',
                            long_name=
                            'Square Root of Relative Vorticity Variance')
                        sink += Variable(axes=sink['zeta'].axes,
                                         data=data_array,
                                         atts=atts)

                    # add names and length of months
                    sink.axisAnnotation('name_of_month',
                                        name_of_month,
                                        'time',
                                        atts=dict(
                                            name='name_of_month',
                                            units='',
                                            long_name='Name of the Month'))
                    if not sink.hasVariable('length_of_month'):
                        sink += Variable(name='length_of_month',
                                         units='days',
                                         axes=(sink.time, ),
                                         data=days_per_month,
                                         atts=dict(
                                             name='length_of_month',
                                             units='days',
                                             long_name='Length of Month'))

                    # close... and write results to file
                    sink.sync()
                    sink.close()
                    writemsg = "\n{:s}   >>>   Writing to file '{:s}' in dataset {:s}".format(
                        pidstr, filename, dataset_name)
                    writemsg += "\n{:s}   >>>   ('{:s}')\n".format(
                        pidstr, filepath)
                    logger.info(writemsg)
                    # rename file to proper name
                    if os.path.exists(filepath):
                        os.remove(filepath)  # remove old file
                    os.rename(tmpfilepath,
                              filepath)  # this will overwrite the old file

                    # print dataset
                    if not lparallel and ldebug:
                        logger.info('\n' + str(sink) + '\n')

                    # clean up (not sure if this is necessary, but there seems to be a memory leak...
                    del sink, CPU
                    gc.collect()  # get rid of these guys immediately

        # clean up and return
        if source is not None:
            source.unload()
            del source
        # N.B.: source is only loaded once for all periods

    # N.B.: garbage is collected in multi-processing wrapper as well
    # return
    return 0  # so far, there is no measure of success, hence, if there is no crash...

Beispiel #7

Datei anzeigen

Datei: Unity.py Projekt: EdwardBetts/GeoPy

                if lshp:
                    for varname in shp_params:
                        var = gpcc025.variables[varname].load()
                        sink.addVariable(var,
                                         asNC=True,
                                         copy=True,
                                         deepcopy=True)

                # add names and length of months
                sink.axisAnnotation('name_of_month',
                                    name_of_month,
                                    'time',
                                    atts=dict(name='name_of_month',
                                              units='',
                                              long_name='Name of the Month'))
                if not sink.hasVariable('length_of_month'):
                    sink += Variable(name='length_of_month',
                                     units='days',
                                     axes=(sink.time, ),
                                     data=days_per_month,
                                     atts=dict(name='length_of_month',
                                               units='days',
                                               long_name='Length of Month'))

                # apply higher resolution mask
                if griddef is not None:
                    sink.mask(sink.landmask,
                              maskSelf=False,
                              varlist=None,
                              skiplist=['prismmask', 'lon2d', 'lat2d'],
                              invert=False,

Beispiel #8

Datei anzeigen

        
        ## add remaining CRU data
        for varname in ['Q2','pet','cldfrc','wetfrq','frzfrq']:
          cruprd.variables[varname].load()
          sink.addVariable(cruprd.variables[varname], asNC=True, copy=True, deepcopy=True)
          cruprd.variables[varname].unload()
          sink.variables[varname].atts['source'] = 'CRU'
        
        ## add station meta data
        if lshp:
          for varname in shp_params:
            var = gpcc025.variables[varname].load()
            sink.addVariable(var, asNC=True, copy=True, deepcopy=True)
            
        # add names and length of months
        sink.axisAnnotation('name_of_month', name_of_month, 'time', 
                            atts=dict(name='name_of_month', units='', long_name='Name of the Month'))        
        if not sink.hasVariable('length_of_month'):
          sink += Variable(name='length_of_month', units='days', axes=(sink.time,), data=days_per_month,
                        atts=dict(name='length_of_month',units='days',long_name='Length of Month'))
        
        # apply higher resolution mask
        if griddef is not None:
          sink.mask(sink.landmask, maskSelf=False, varlist=None, skiplist=['prismmask','lon2d','lat2d'], invert=False, merge=True)
            
        # finalize changes
        sink.sync()     
        sink.close()
        print(sink)
        print('\n Writing to: \'{0:s}\'\n'.format(filename))

Beispiel #9

Datei anzeigen

def loadCFSR_TS(name=dataset_name,
                grid=None,
                varlist=None,
                varatts=None,
                resolution='hires',
                filelist=None,
                folder=None,
                lautoregrid=None):
    ''' Get a properly formatted CFSR dataset with monthly mean time-series. '''
    if grid is None:
        # load from original time-series files
        if folder is None: folder = orig_ts_folder
        # translate varlist
        if varatts is None: varatts = tsvaratts.copy()
        if varlist is None:
            if resolution == 'hires' or resolution == '03' or resolution == '031':
                varlist = varlist_hires
            elif resolution == 'lowres' or resolution == '05':
                varlist = varlist_lowres
        if varlist and varatts: varlist = translateVarNames(varlist, varatts)
        if filelist is None:  # generate default filelist
            if resolution == 'hires' or resolution == '03' or resolution == '031':
                files = [
                    hiresfiles[var] for var in varlist if var in hiresfiles
                ]
            elif resolution == 'lowres' or resolution == '05':
                files = [
                    lowresfiles[var] for var in varlist if var in lowresfiles
                ]
        # load dataset
        dataset = DatasetNetCDF(name=name,
                                folder=folder,
                                filelist=files,
                                varlist=varlist,
                                varatts=varatts,
                                check_override=['time'],
                                multifile=False,
                                ncformat='NETCDF4_CLASSIC')
        # load static data
        if filelist is None:  # generate default filelist
            if resolution == 'hires' or resolution == '03' or resolution == '031':
                files = [
                    hiresstatic[var] for var in varlist if var in hiresstatic
                ]
            elif resolution == 'lowres' or resolution == '05':
                files = [
                    lowresstatic[var] for var in varlist if var in lowresstatic
                ]
            # create singleton time axis
            staticdata = DatasetNetCDF(name=name,
                                       folder=folder,
                                       filelist=files,
                                       varlist=varlist,
                                       varatts=varatts,
                                       axes=dict(lon=dataset.lon,
                                                 lat=dataset.lat),
                                       multifile=False,
                                       check_override=['time'],
                                       ncformat='NETCDF4_CLASSIC')
            # N.B.: need to override the axes, so that the datasets are consistent
        if len(staticdata.variables) > 0:
            for var in staticdata.variables.values():
                if not dataset.hasVariable(var.name):
                    var.squeeze()  # remove time dimension
                    dataset.addVariable(
                        var, copy=False
                    )  # no need to copy... but we can't write to the netcdf file!
        # replace time axis with number of month since Jan 1979
        data = np.arange(0, len(dataset.time), 1,
                         dtype='int16')  # month since 1979 (Jan 1979 = 0)
        timeAxis = Axis(name='time',
                        units='month',
                        coord=data,
                        atts=dict(long_name='Month since 1979-01'))
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
        # add projection
        dataset = addGDALtoDataset(dataset,
                                   projection=None,
                                   geotransform=None,
                                   gridfolder=grid_folder)
        # N.B.: projection should be auto-detected as geographic
    else:
        # load from neatly formatted and regridded time-series files
        if folder is None: folder = avgfolder
        grid, resolution = checkGridRes(grid, resolution)
        dataset = loadObservations(name=name,
                                   folder=folder,
                                   projection=None,
                                   resolution=resolution,
                                   grid=grid,
                                   period=None,
                                   varlist=varlist,
                                   varatts=varatts,
                                   filepattern=tsfile,
                                   filelist=filelist,
                                   lautoregrid=lautoregrid,
                                   mode='time-series')
    # return formatted dataset
    return dataset

Beispiel #10

Datei anzeigen

            # initialize processing
            CPU = CentralProcessingUnit(source, sink, tmp=True)

            # start processing climatology
            CPU.Climatology(period=period[1] - period[0],
                            offset=offset,
                            flush=False)

            # shift longitude axis by 180 degrees left (i.e. 0 - 360 -> -180 - 180)
            CPU.Shift(lon=-180, flush=False)

            # sync temporary storage with output
            CPU.sync(flush=True)

            # make new masks
            if sink.hasVariable('landmask'):
                sink.mask(sink.landmask,
                          maskSelf=False,
                          varlist=['snow', 'snowh', 'zs'],
                          invert=True,
                          merge=False)

            # add names and length of months
            sink.axisAnnotation('name_of_month',
                                name_of_month,
                                'time',
                                atts=dict(name='name_of_month',
                                          units='',
                                          long_name='Name of the Month'))
            #print '   ===   month   ===   '
            #       sink += VarNC(sink.dataset, name='length_of_month', units='days', axes=(sink.time,), data=days_per_month,