Beispiel #1
0
def getTargetFile(dataset=None, mode=None, dataargs=None, grid=None, shape=None, station=None, period=None, filetype=None, 
                  lwrite=True):
  ''' generate filename for target dataset '''
  # for CESM & WRF
  if grid is None: grid = dataargs.gridstr # also use grid for station/shape type
  if period is None: period = dataargs.periodstr
  if dataset in ('WRF','CESM') and lwrite:
    # prepare some variables
    domain = dataargs.domain
    if filetype is None: filetype = dataargs.filetype
    gstr = '_{}'.format(grid) if grid else ''
    # prepend shape or station type before grid 
    if shape and station: raise ArgumentError
    elif shape: gstr = '_{}{}'.format(shape,gstr)
    elif station: gstr = '_{}{}'.format(station,gstr)
    pstr = '_{}'.format(period) if period else ''
    if dataset == 'WRF': 
        import datasets.WRF as WRF
        fileclass = WRF.fileclasses[filetype] if filetype in WRF.fileclasses else WRF.FileType(filetype)
        if mode == 'climatology': filename = fileclass.climfile.format(domain,gstr,pstr)
        elif mode == 'time-series': filename = fileclass.tsfile.format(domain,gstr)
    elif dataset == 'CESM': 
        import datasets.CESM as CESM
        fileclass = CESM.fileclasses[filetype] if filetype in CESM.fileclasses else CESM.FileType(filetype)
        if mode == 'climatology': filename = fileclass.climfile.format(gstr,pstr)
        elif mode == 'time-series': filename = fileclass.tsfile.format(gstr)
    else: raise NotImplementedError, "Unsupported Mode: '{:s}'".format(mode)        
  elif lwrite: # assume observational datasets
    filename = getFileName(grid=grid, shape=shape, station=station, period=period, name=dataargs.obs_res, filetype=mode)      
  else: raise DatasetError(dataset)
  if not os.path.exists(dataargs.avgfolder): 
    raise IOError, "Dataset folder '{:s}' does not exist!".format(dataargs.avgfolder)
  # return filename
  return filename
Beispiel #2
0
def getTargetFile(dataset=None, mode=None, dataargs=None, lwrite=True, grid=None, period=None, filetype=None):
  ''' generate filename for target dataset '''
  # prepare some variables
  domain = dataargs.domain
  if filetype is None: filetype = dataargs.filetype
  if grid is None: grid = dataargs.gridstr # also use grid for station type
  if period is None: period = dataargs.periodstr
  gstr = '_{}'.format(grid) if grid else ''
  pstr = '_{}'.format(period) if period else ''
  # figure out filename
  if dataset == 'WRF' and lwrite:
    if mode == 'climatology': filename = WRF.clim_file_pattern.format(filetype,domain,gstr,pstr)
    elif mode == 'time-series': filename = WRF.ts_file_pattern.format(filetype,domain,gstr)
    else: raise NotImplementedError
  elif dataset == 'CESM' and lwrite:
    if mode == 'climatology': filename = CESM.clim_file_pattern.format(filetype,gstr,pstr)
    elif mode == 'time-series': filename = CESM.ts_file_pattern.format(filetype,gstr)
    else: raise NotImplementedError
  elif ( dataset == dataset.upper() or dataset == 'Unity' ) and lwrite: # observational datasets
    filename = getFileName(grid=grid, period=dataargs.period, name=dataargs.obs_res, filetype=mode)      
  elif not lwrite: raise DatasetError
  if not os.path.exists(dataargs.avgfolder): 
    raise IOError, "Dataset folder '{:s}' does not exist!".format(dataargs.avgfolder)
  # return filename
  return filename
Beispiel #3
0
def getTargetFile(dataset=None,
                  mode=None,
                  dataargs=None,
                  grid=None,
                  shape=None,
                  station=None,
                  period=None,
                  filetype=None,
                  lwrite=True):
    ''' generate filename for target dataset '''
    # for CESM & WRF
    if grid is None:
        grid = dataargs.gridstr  # also use grid for station/shape type
    if period is None: period = dataargs.periodstr
    if dataset in ('WRF', 'CESM') and lwrite:
        # prepare some variables
        domain = dataargs.domain
        if filetype is None: filetype = dataargs.filetype
        gstr = '_{}'.format(grid) if grid else ''
        # prepend shape or station type before grid
        if shape and station: raise ArgumentError
        elif shape: gstr = '_{}{}'.format(shape, gstr)
        elif station: gstr = '_{}{}'.format(station, gstr)
        pstr = '_{}'.format(period) if period else ''
        if dataset == 'WRF':
            import datasets.WRF as WRF
            fileclass = WRF.fileclasses[
                filetype] if filetype in WRF.fileclasses else WRF.FileType(
                    filetype)
            if mode == 'climatology':
                filename = fileclass.climfile.format(domain, gstr, pstr)
            elif mode == 'time-series':
                filename = fileclass.tsfile.format(domain, gstr)
        elif dataset == 'CESM':
            import datasets.CESM as CESM
            fileclass = CESM.fileclasses[
                filetype] if filetype in CESM.fileclasses else CESM.FileType(
                    filetype)
            if mode == 'climatology':
                filename = fileclass.climfile.format(gstr, pstr)
            elif mode == 'time-series':
                filename = fileclass.tsfile.format(gstr)
        else:
            raise NotImplementedError, "Unsupported Mode: '{:s}'".format(mode)
    elif lwrite:  # assume observational datasets
        filename = getFileName(grid=grid,
                               shape=shape,
                               station=station,
                               period=period,
                               name=dataargs.obs_res,
                               filetype=mode)
    else:
        raise DatasetError(dataset)
    if not os.path.exists(dataargs.avgfolder):
        raise IOError, "Dataset folder '{:s}' does not exist!".format(
            dataargs.avgfolder)
    # return filename
    return filename
Beispiel #4
0
def getTargetFile(name, dataset, mode, module, dataargs, lwrite):
  ''' generate filename for target dataset '''
  # extract some variables
  periodstr = dataargs.periodstr; filetype = dataargs.filetype; domain = dataargs.domain
  sstr = '_{}'.format(name) # use name as "grid" designation for station data
  pstr = '_{}'.format(periodstr) if periodstr else ''
  # figure out filename
  if dataset == 'WRF' and lwrite:
    if mode == 'climatology': filename = module.clim_file_pattern.format(filetype,domain,sstr,pstr)
    elif mode == 'time-series': filename = module.ts_file_pattern.format(filetype,domain,sstr)
    else: raise NotImplementedError
  elif dataset == 'CESM' and lwrite:
    if mode == 'climatology': filename = module.clim_file_pattern.format(filetype,sstr,pstr)
    elif mode == 'time-series': filename = module.ts_file_pattern.format(filetype,sstr)
    else: raise NotImplementedError
  elif ( dataset == dataset.upper() or dataset == 'Unity' ) and lwrite: # observational datasets
    filename = getFileName(grid=name, period=dataargs.period, name=dataargs.obs_res, filetype=mode)      
  elif not lwrite: raise DatasetError
  if not os.path.exists(dataargs.avgfolder): 
    raise IOError, "Dataset folder '{:s}' does not exist!".format(dataargs.avgfolder)
  # return filename
  return filename
Beispiel #5
0
def getTargetFile(name, dataset, mode, module, dataargs, lwrite):
    ''' generate filename for target dataset '''
    # extract some variables
    periodstr = dataargs.periodstr
    filetype = dataargs.filetype
    domain = dataargs.domain
    sstr = '_{}'.format(
        name)  # use name as "grid" designation for station data
    pstr = '_{}'.format(periodstr) if periodstr else ''
    # figure out filename
    if dataset == 'WRF' and lwrite:
        if mode == 'climatology':
            filename = module.clim_file_pattern.format(filetype, domain, sstr,
                                                       pstr)
        elif mode == 'time-series':
            filename = module.ts_file_pattern.format(filetype, domain, sstr)
        else:
            raise NotImplementedError
    elif dataset == 'CESM' and lwrite:
        if mode == 'climatology':
            filename = module.clim_file_pattern.format(filetype, sstr, pstr)
        elif mode == 'time-series':
            filename = module.ts_file_pattern.format(filetype, sstr)
        else:
            raise NotImplementedError
    elif (dataset == dataset.upper()
          or dataset == 'Unity') and lwrite:  # observational datasets
        filename = getFileName(grid=name,
                               period=dataargs.period,
                               name=dataargs.obs_res,
                               filetype=mode)
    elif not lwrite:
        raise DatasetError
    if not os.path.exists(dataargs.avgfolder):
        raise IOError, "Dataset folder '{:s}' does not exist!".format(
            dataargs.avgfolder)
    # return filename
    return filename
Beispiel #6
0
      dataset = loadGPCC_LTM(varlist=['stations','precip'],resolution=res)
      # change meta-data
      dataset.name = 'GPCC'
      dataset.title = 'GPCC Long-term Climatology'
      dataset.atts.resolution = res      
      # load data into memory
      dataset.load()

      # add landmask
      addLandMask(dataset) # create landmask from precip mask
      dataset.mask(dataset.landmask) # mask all fields using the new landmask      
      # add length and names of month
      addLengthAndNamesOfMonth(dataset, noleap=False) 
      
      # figure out a different filename
      filename = getFileName(grid=res, period=None, name='GPCC', filepattern=avgfile)
      print('\n'+filename+'\n')      
      if os.path.exists(avgfolder+filename): os.remove(avgfolder+filename)      
      # write data and some annotation
      ncset = writeNetCDF(dataset, avgfolder+filename, close=False)
#       add_var(ncset,'name_of_month', name_of_month, 'time', # add names of month
#                  atts=dict(name='name_of_month', units='', long_name='Name of the Month')) 
       
      # close...
      ncset.close()
      dataset.close()
      # print dataset before
      print(dataset)
      print('')           
      
      
Beispiel #7
0
def getMetaData(dataset, mode, dataargs, lone=True):
    ''' determine dataset type and meta data, as well as path to main source file '''
    # determine dataset mode
    lclim = False
    lts = False
    if mode == 'climatology': lclim = True
    elif mode == 'time-series': lts = True
    elif mode[-5:] == '-mean':
        lclim = True
        mode = 'climatology'  # only for export to seasonal means (load entire monthly climatology)
    else:
        raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)
    # general arguments (dataset independent)
    varlist = dataargs.get('varlist', None)
    resolution = dataargs.get('resolution', None)
    grid = dataargs.get('grid', None)  # get grid
    period = dataargs.get('period', None)
    # determine meta data based on dataset type
    if dataset == 'WRF':
        import datasets.WRF as WRF
        # WRF datasets
        obs_res = None  # only for datasets (not used here)
        exp = dataargs['experiment']  # need that one
        dataset_name = exp.name
        avgfolder = exp.avgfolder
        filetypes = dataargs['filetypes']
        fileclasses = WRF.fileclasses.copy()
        for filetype in filetypes:
            if filetype not in fileclasses:
                fileclasses[filetype] = WRF.FileType(filetype)
        domain = dataargs.get('domain', None)
        periodstr, gridstr = getPeriodGridString(period, grid, exp=exp)
        # check arguments
        if period is None and lclim:
            raise DatasetError, "A 'period' argument is required to load climatologies!"
        if lone and len(filetypes) > 1:
            raise DatasetError  # process only one file at a time
        if not isinstance(domain, (np.integer, int)): raise DatasetError
        # construct dataset message
        if lone:
            datamsgstr = "Processing WRF '{:s}'-file from Experiment '{:s}' (d{:02d})".format(
                filetypes[0], dataset_name, domain)
        else:
            datamsgstr = "Processing WRF dataset from Experiment '{:s}' (d{:02d})".format(
                dataset_name, domain)
        # figure out age of source file(s)
        srcage = getSourceAge(fileclasses=fileclasses,
                              filetypes=filetypes,
                              exp=exp,
                              domain=domain,
                              periodstr=periodstr,
                              gridstr=gridstr,
                              lclim=lclim,
                              lts=lts)
        # load source data
        if lclim:
            loadfct = partial(WRF.loadWRF,
                              experiment=exp,
                              name=None,
                              domains=domain,
                              grid=grid,
                              varlist=varlist,
                              period=period,
                              filetypes=filetypes,
                              varatts=None,
                              lconst=True,
                              ltrimT=False)  # still want topography...
        elif lts:
            loadfct = partial(WRF.loadWRF_TS,
                              experiment=exp,
                              name=None,
                              domains=domain,
                              grid=grid,
                              varlist=varlist,
                              filetypes=filetypes,
                              varatts=None,
                              lconst=True,
                              ltrimT=False)  # still want topography...
    elif dataset == 'CESM':
        import datasets.CESM as CESM
        # CESM datasets
        obs_res = None  # only for datasets (not used here)
        domain = None  # only for WRF
        exp = dataargs['experiment']
        avgfolder = exp.avgfolder
        dataset_name = exp.name
        periodstr, gridstr = getPeriodGridString(period, grid, exp=exp)
        filetypes = dataargs['filetypes']
        fileclasses = CESM.fileclasses.copy()
        for filetype in filetypes:
            if filetype not in fileclasses:
                fileclasses[filetype] = CESM.FileType(filetype)
        # check arguments
        if period is None and lclim:
            raise DatasetError, "A 'period' argument is required to load climatologies!"
        if lone and len(filetypes) > 1:
            raise DatasetError  # process only one file at a time
        # construct dataset message
        if lone:
            datamsgstr = "Processing CESM '{:s}'-file from Experiment '{:s}'".format(
                filetypes[0], dataset_name)
        else:
            datamsgstr = "Processing CESM dataset from Experiment '{:s}'".format(
                dataset_name)
        # figure out age of source file(s)
        srcage = getSourceAge(fileclasses=fileclasses,
                              filetypes=filetypes,
                              exp=exp,
                              domain=None,
                              periodstr=periodstr,
                              gridstr=gridstr,
                              lclim=lclim,
                              lts=lts)
        # load source data
        load3D = dataargs.pop(
            'load3D', None)  # if 3D fields should be loaded (default: False)
        if lclim:
            loadfct = partial(CESM.loadCESM,
                              experiment=exp,
                              name=None,
                              grid=grid,
                              period=period,
                              varlist=varlist,
                              filetypes=filetypes,
                              varatts=None,
                              load3D=load3D,
                              translateVars=None)
        elif lts:
            loadfct = partial(CESM.loadCESM_TS,
                              experiment=exp,
                              name=None,
                              grid=grid,
                              varlist=varlist,
                              filetypes=filetypes,
                              varatts=None,
                              load3D=load3D,
                              translateVars=None)
    else:
        # assume observational datasets
        filetypes = [None]  # only for CESM & WRF
        domain = None  # only for WRF
        try:
            module = import_module('datasets.{0:s}'.format(dataset))
        except ImportError:
            raise DatasetError(
                "Error loading dataset module '{:s}' from 'datasets' package!".
                format(dataset))
        dataset_name = module.dataset_name
        resolution = dataargs['resolution']
        if resolution: obs_res = '{0:s}_{1:s}'.format(dataset_name, resolution)
        else: obs_res = dataset_name
        # figure out period
        periodstr, gridstr = getPeriodGridString(period, grid, beginyear=1979)
        if period is None and lclim: periodstr = 'LTM'
        datamsgstr = "Processing Dataset '{:s}'".format(dataset_name)
        # assemble filename to check modification dates (should be only one file)
        filename = getFileName(grid=grid,
                               period=period,
                               name=obs_res,
                               filetype=mode)
        avgfolder = module.avgfolder
        filepath = '{:s}/{:s}'.format(avgfolder, filename)
        # load pre-processed climatology
        kwargs = dict(name=dataset_name,
                      grid=grid,
                      varlist=varlist,
                      resolution=resolution,
                      varatts=None)
        if dataset == 'Unity': kwargs['unity_grid'] = dataargs['unity_grid']
        if lclim and module.loadClimatology is not None:
            loadfct = partial(module.loadClimatology, period=period, **kwargs)
        elif lts and module.loadTimeSeries is not None:
            loadfct = partial(module.loadTimeSeries, **kwargs)
        else:
            raise DatasetError(
                "Unable to identify time aggregation mode; the dataset " +
                "'{}' may not support selected mode '{}'.".format(
                    dataset, mode))
        # check if the source file is actually correct
        if os.path.exists(filepath): filelist = [filepath]
        else:
            source = loadfct(
            )  # don't load dataset, just construct the file list
            filelist = source.filelist
        # figure out age of source file(s)
        srcage = getSourceAge(filelist=filelist, lclim=lclim, lts=lts)
        # N.B.: it would be nice to print a message, but then we would have to make the logger available,
        #       which would be too much trouble
    ## assemble and return meta data
    dataargs = namedTuple(dataset_name=dataset_name,
                          period=period,
                          periodstr=periodstr,
                          avgfolder=avgfolder,
                          filetypes=filetypes,
                          filetype=filetypes[0],
                          domain=domain,
                          obs_res=obs_res,
                          varlist=varlist,
                          grid=grid,
                          gridstr=gridstr,
                          resolution=resolution)
    # return meta data
    return dataargs, loadfct, srcage, datamsgstr
Beispiel #8
0
      # load dataset
      dataset = loadNARR_LTM()
      # change meta-data
      dataset.name = 'NARR'
      dataset.title = 'NARR Long-term Climatology'
      # load data into memory
      dataset.load()

#       # add landmask
#       addLandMask(dataset) # create landmask from precip mask
#       dataset.mask(dataset.landmask) # mask all fields using the new landmask      
      # add length and names of month
      addLengthAndNamesOfMonth(dataset, noleap=False) 
      
      # figure out a different filename
      filename = getFileName(grid='NARR', period=None, name='NARR', filepattern=avgfile)
      print('\n'+filename+'\n')      
      if os.path.exists(avgfolder+filename): os.remove(avgfolder+filename)      
      # write data and some annotation
      ncset = writeNetCDF(dataset, avgfolder+filename, close=False)
      add_strvar(ncset,'name_of_month', name_of_month, 'time', # add names of month
                 atts=dict(name='name_of_month', units='', long_name='Name of the Month')) 
       
      # close...
      ncset.close()
      dataset.close()
      # print dataset before
      print(dataset)
      print('')           
      
   
Beispiel #9
0
        time = dataset.time
        time.load(data=np.arange(
            1, 13, dtype=time.dtype))  # 1 to 12 (incl.) for climatology
        time.units = 'month'
        time.atts.long_name = 'Month of the Year'
        print(time)
        # print diagnostic
        print(dataset)
        print('')
        for var in dataset:
            #print(var)
            if not var.strvar:
                print('Mean {0:s}: {1:s} {2:s}'.format(var.atts.long_name,
                                                       str(var.mean()),
                                                       var.units))
            #print('')
        print('')

        ## create new NetCDF file
        # figure out a different filename
        filename = getFileName(name='PCIC', filepattern=avgfile)
        if os.path.exists(avgfolder + filename):
            os.remove(avgfolder + filename)
        # write data and some annotation
        sink = writeNetCDF(dataset, avgfolder + filename, close=False)
        #     add_strvar(sink,'name_of_month', name_of_month, 'time', # add names of month
        #                atts=dict(name='name_of_month', units='', long_name='Name of the Month'))
        sink.close()  # close...
        print('Saving Climatology to: ' + filename)
        print(avgfolder)
Beispiel #10
0
def getMetaData(dataset, mode, dataargs, lone=True):
  ''' determine dataset type and meta data, as well as path to main source file '''
  # determine dataset mode
  lclim = False; lts = False
  if mode == 'climatology': lclim = True
  elif mode == 'time-series': lts = True
  else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)
  # general arguments (dataset independent)
  varlist = dataargs.get('varlist',None)
  grid = dataargs.get('grid',None) # get grid
  period = dataargs.get('period',None)
  # determine meta data based on dataset type
  if dataset == 'WRF': 
    # WRF datasets
    obs_res = None # only for datasets (not used here)
    exp = dataargs['experiment'] # need that one
    dataset_name = exp.name
    avgfolder = exp.avgfolder
    filetypes = dataargs['filetypes']
    domain = dataargs.get('domain',None)
    periodstr, gridstr = getPeriodGridString(period, grid, exp=exp)
    # check arguments
    if period is None and lclim: raise DatasetError, "A 'period' argument is required to load climatologies!"
    if lone and len(filetypes) > 1: raise DatasetError # process only one file at a time
    if not isinstance(domain, (np.integer,int)): raise DatasetError   
    # construct dataset message
    if lone: 
      datamsgstr = "Processing WRF '{:s}'-file from Experiment '{:s}' (d{:02d})".format(filetypes[0], dataset_name, domain)
    else: datamsgstr = "Processing WRF dataset from Experiment '{:s}' (d{:02d})".format(dataset_name, domain)       
    # figure out age of source file(s)
    srcage = getSourceAge(fileclasses=WRF.fileclasses, filetypes=filetypes, exp=exp, domain=domain,
                          periodstr=periodstr, gridstr=gridstr, lclim=lclim, lts=lts)
    # load source data
    if lclim:
      loadfct = partial(WRF.loadWRF, experiment=exp, name=None, domains=domain, grid=grid, varlist=varlist,
                        period=period, filetypes=filetypes, varatts=None, lconst=True) # still want topography...
    elif lts:
      loadfct = partial(WRF.loadWRF_TS, experiment=exp, name=None, domains=domain, grid=grid, varlist=varlist,
                        filetypes=filetypes, varatts=None, lconst=True) # still want topography...
  elif dataset == 'CESM': 
    # CESM datasets
    obs_res = None # only for datasets (not used here)
    domain = None # only for WRF
    exp = dataargs['experiment']  
    avgfolder = exp.avgfolder
    dataset_name = exp.name
    periodstr, gridstr = getPeriodGridString(period, grid, exp=exp)
    filetypes = dataargs['filetypes']
    # check arguments
    if period is None and lclim: raise DatasetError, "A 'period' argument is required to load climatologies!"
    if lone and len(filetypes) > 1: raise DatasetError # process only one file at a time
    # construct dataset message
    if lone:
      datamsgstr = "Processing CESM '{:s}'-file from Experiment '{:s}'".format(filetypes[0], dataset_name) 
    else: datamsgstr = "Processing CESM dataset from Experiment '{:s}'".format(dataset_name) 
    # figure out age of source file(s)
    srcage = getSourceAge(fileclasses=CESM.fileclasses, filetypes=filetypes, exp=exp, domain=None,
                          periodstr=periodstr, gridstr=gridstr, lclim=lclim, lts=lts)
    # load source data 
    load3D = dataargs.pop('load3D',None) # if 3D fields should be loaded (default: False)
    if lclim:
      loadfct = partial(CESM.loadCESM, experiment=exp, name=None, grid=grid, period=period, varlist=varlist, 
                        filetypes=filetypes, varatts=None, load3D=load3D, translateVars=None)
    elif lts:
      loadfct = partial(CESM.loadCESM_TS, experiment=exp, name=None, grid=grid, varlist=varlist,
                        filetypes=filetypes, varatts=None, load3D=load3D, translateVars=None)     
  elif dataset == dataset.upper() or dataset == 'Unity':
    # observational datasets
    filetypes = [None] # only for CESM & WRF
    domain = None # only for WRF
    module = import_module('datasets.{0:s}'.format(dataset))      
    dataset_name = module.dataset_name
    resolution = dataargs['resolution']
    if resolution: obs_res = '{0:s}_{1:s}'.format(dataset_name,resolution)
    else: obs_res = dataset_name   
    # figure out period
    periodstr, gridstr = getPeriodGridString(period, grid, beginyear=1979)
    if period is None and lclim: periodstr = 'LTM'
    datamsgstr = "Processing Dataset '{:s}'".format(dataset_name)
    # assemble filename to check modification dates (should be only one file)    
    filename = getFileName(grid=grid, period=period, name=obs_res, filetype=mode)
    avgfolder = module.avgfolder
    filepath = '{:s}/{:s}'.format(avgfolder,filename)
    # load pre-processed climatology
    if lclim:
      loadfct = partial(module.loadClimatology, name=dataset_name, period=period, grid=grid, 
                        varlist=varlist, resolution=resolution, varatts=None)
    elif lts:
      loadfct = partial(module.loadTimeSeries, name=dataset_name, grid=grid, varlist=varlist,
                        resolution=resolution, varatts=None)
    # check if the source file is actually correct
    if os.path.exists(filepath): filelist = [filepath]
    else:
      source = loadfct() # don't load dataset, just construct the file list
      filelist = source.filelist
    # figure out age of source file(s)
    srcage = getSourceAge(filelist=filelist, lclim=lclim, lts=lts)
      # N.B.: it would be nice to print a message, but then we would have to make the logger available,
      #       which would be too much trouble
  else:
    raise DatasetError, "Dataset '{:s}' not found!".format(dataset)
  ## assemble and return meta data
  dataargs = namedTuple(dataset_name=dataset_name, period=period, periodstr=periodstr, avgfolder=avgfolder, 
                        filetypes=filetypes,filetype=filetypes[0], domain=domain, obs_res=obs_res, 
                        varlist=varlist, grid=grid, gridstr=gridstr) 
  # return meta data
  return dataargs, loadfct, srcage, datamsgstr    
Beispiel #11
0
def getMetaData(dataset, mode, dataargs):
    ''' determine dataset type and meta data, as well as path to main source file '''
    # determine dataset mode
    lclim = False
    lts = False
    if mode == 'climatology': lclim = True
    elif mode == 'time-series': lts = True
    else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)
    # defaults for specific variables
    obs_res = None
    domain = None
    filetype = None
    varlist = dataargs.get('varlist', None)
    # determine meta data based on dataset type
    if dataset == 'WRF':
        # WRF datasets
        module = import_module('datasets.WRF')
        exp = dataargs['experiment']
        dataset_name = exp.name
        domain = dataargs['domain']
        grid = dataargs.get('grid', None)
        # figure out period
        period = dataargs['period']
        if period is None: pass
        elif isinstance(period, (int, np.integer)):
            beginyear = int(exp.begindate[0:4])
            period = (beginyear, beginyear + period)
        elif len(period) != 2 and all(isInt(period)):
            raise DateError
        if period is None: periodstr = ''
        else: periodstr = '{0:4d}-{1:4d}'.format(*period)
        gridstr = grid if grid is not None else ''
        # identify file and domain
        if len(dataargs['filetypes']) > 1:
            raise DatasetError  # process only one file at a time
        filetype = dataargs['filetypes'][0]
        if isinstance(domain, (list, tuple)): domain = domain[0]
        if not isinstance(domain, (np.integer, int)): raise DatasetError
        datamsgstr = "Processing WRF '{:s}'-file from Experiment '{:s}' (d{:02d})".format(
            filetype, dataset_name, domain)
        # assemble filename to check modification dates (should be only one file)
        fileclass = module.fileclasses[
            filetype]  # avoid WRF & CESM name collision
        pstr = '_' + periodstr if periodstr else ''
        gstr = '_' + gridstr if gridstr else ''
        if lclim:
            filename = fileclass.climfile.format(
                domain, gstr, pstr)  # insert domain number, grid, and period
        elif lts:
            filename = fileclass.tsfile.format(
                domain, gstr)  # insert domain number, and grid
        avgfolder = exp.avgfolder
        # load source data
        if lclim:
            loadfct = functools.partial(
                loadWRF,
                experiment=exp,
                name=None,
                domains=domain,
                grid=None,
                varlist=varlist,
                period=period,
                filetypes=[filetype],
                varatts=None,
                lconst=True)  # still want topography...
        elif lts:
            loadfct = functools.partial(
                loadWRF_TS,
                experiment=exp,
                name=None,
                domains=domain,
                grid=None,
                varlist=varlist,
                filetypes=[filetype],
                varatts=None,
                lconst=True)  # still want topography...
        filepath = '{:s}/{:s}'.format(avgfolder, filename)
    elif dataset == 'CESM':
        # CESM datasets
        module = import_module('datasets.CESM')
        exp = dataargs['experiment']
        dataset_name = exp.name
        # figure out period
        period = dataargs['period']
        if period is None: pass
        elif isinstance(period, (int, np.integer)):
            beginyear = int(exp.begindate[0:4])
            period = (beginyear, beginyear + period)
        elif len(period) != 2 and all(isInt(period)):
            raise DateError
        # identify file
        if len(dataargs['filetypes']) > 1:
            raise DatasetError  # process only one file at a time
        filetype = dataargs['filetypes'][0]
        # check period
        if period is None: periodstr = ''
        else: periodstr = '{0:4d}-{1:4d}'.format(*period)
        datamsgstr = "Processing CESM '{:s}'-file from Experiment '{:s}'".format(
            filetype, dataset_name)
        # assemble filename to check modification dates (should be only one file)
        fileclass = module.fileclasses[
            filetype]  # avoid WRF & CESM name collision
        pstr = '_' + periodstr if periodstr else ''
        if lclim:
            filename = fileclass.climfile.format(
                '', pstr)  # insert domain number, grid, and period
        elif lts:
            filename = fileclass.tsfile.format(
                '')  # insert domain number, and grid
        avgfolder = exp.avgfolder
        # load source data
        load3D = dataargs.pop(
            'load3D', None)  # if 3D fields should be loaded (default: False)
        if lclim:
            loadfct = functools.partial(loadCESM,
                                        experiment=exp,
                                        name=None,
                                        grid=None,
                                        period=period,
                                        varlist=varlist,
                                        filetypes=[filetype],
                                        varatts=None,
                                        load3D=load3D,
                                        translateVars=None)
        elif lts:
            loadfct = functools.partial(loadCESM_TS,
                                        experiment=exp,
                                        name=None,
                                        grid=None,
                                        varlist=varlist,
                                        filetypes=[filetype],
                                        varatts=None,
                                        load3D=load3D,
                                        translateVars=None)
        filepath = '{:s}/{:s}'.format(avgfolder, filename)
    elif dataset == dataset.upper() or dataset == 'Unity':
        # observational datasets
        module = import_module('datasets.{0:s}'.format(dataset))
        dataset_name = module.dataset_name
        resolution = dataargs['resolution']
        if resolution: obs_res = '{0:s}_{1:s}'.format(dataset_name, resolution)
        else: obs_res = dataset_name
        # figure out period
        period = dataargs['period']
        if period is None: pass
        elif isinstance(period, (int, np.integer)):
            period = (1979, 1979 + period)  # they all begin in 1979
        elif len(period) != 2 and not all(isInt(period)):
            raise DateError
        datamsgstr = "Processing Dataset '{:s}'".format(dataset_name)
        # check period
        if period is None:
            if mode == 'climatology': periodstr = 'Long-Term Mean'
            else: periodstr = ''
        else: periodstr = '{0:4d}-{1:4d}'.format(*period)
        # assemble filename to check modification dates (should be only one file)
        filename = getFileName(grid=None,
                               period=period,
                               name=obs_res,
                               filetype=mode)
        avgfolder = module.avgfolder
        # load pre-processed climatology
        if lclim:
            loadfct = functools.partial(module.loadClimatology,
                                        name=dataset_name,
                                        period=period,
                                        grid=None,
                                        varlist=varlist,
                                        resolution=resolution,
                                        varatts=None,
                                        folder=module.avgfolder,
                                        filelist=None)
        elif lts:
            loadfct = functools.partial(module.loadTimeSeries,
                                        name=dataset_name,
                                        grid=None,
                                        varlist=varlist,
                                        resolution=resolution,
                                        varatts=None,
                                        folder=None,
                                        filelist=None)
        # check if the source file is actually correct
        filepath = '{:s}/{:s}'.format(avgfolder, filename)
        if not os.path.exists(filepath):
            source = loadfct(
            )  # no varlist - obs don't have many variables anyways
            filepath = source.filelist[0]
            # N.B.: it would be nice to print a message, but then we would have to make the logger available,
            #       which would be too much trouble
    else:
        raise DatasetError, "Dataset '{:s}' not found!".format(dataset)
    ## assemble and return meta data
    if not os.path.exists(filepath):
        raise IOError, "Source file '{:s}' does not exist!".format(filepath)
    dataargs = namedTuple(dataset_name=dataset_name,
                          period=period,
                          periodstr=periodstr,
                          avgfolder=avgfolder,
                          filetype=filetype,
                          domain=domain,
                          obs_res=obs_res,
                          varlist=varlist)
    # return meta data
    return module, dataargs, loadfct, filepath, datamsgstr
Beispiel #12
0
            if grid in ('shpavg', ):
                # regional averages: shape index as grid
                uclim = loadUnity_Shp(shape=pntset, period=period)
                cruclim = loadCRU_Shp(shape=grid, period=period)
                cruts = loadCRU_ShpTS(shape=grid)
            else:
                raise NotImplementedError

            grid_name = grid
            periodstr = '{0:4d}-{1:4d}'.format(*period)
            print(
                '\n   ***   Merging Shape-Averaged Time-Series on {:s} Grid  ***   \n'
                .format(grid, ))
            ## prepare target dataset
            filename = getFileName(grid=grid_name,
                                   period=None,
                                   name=None,
                                   filepattern=tsfile)
            filepath = avgfolder + filename
            print(' Saving data to: \'{0:s}\'\n'.format(filepath))
            assert os.path.exists(avgfolder)
            if os.path.exists(filepath): os.remove(filepath)  # remove old file
            # set attributes
            atts = dict()  # collect attributes, but add prefixes
            atts = uclim.atts.copy()
            atts['title'] = 'Corrected Time-sries on {:s} Grid'.format(
                grid_name)
            # make new dataset
            sink = DatasetNetCDF(folder=avgfolder,
                                 filelist=[filename],
                                 atts=atts,
                                 mode='w')
Beispiel #13
0
        ## load source datasets
#         period = (1979,2009)
        period = (1979,1994)
        if grid in ('shpavg',):
          # regional averages: shape index as grid
          uclim = loadUnity_Shp(shape=pntset, period=period)
          cruclim = loadCRU_Shp(shape=grid, period=period)
          cruts = loadCRU_ShpTS(shape=grid)           
        else:
          raise NotImplementedError
          
        grid_name = grid
        periodstr = '{0:4d}-{1:4d}'.format(*period)        
        print('\n   ***   Merging Shape-Averaged Time-Series on {:s} Grid  ***   \n'.format(grid,))
        ## prepare target dataset 
        filename = getFileName(grid=grid_name, period=None, name=None, filepattern=tsfile)
        filepath = avgfolder + filename
        print(' Saving data to: \'{0:s}\'\n'.format(filepath))
        assert os.path.exists(avgfolder)
        if os.path.exists(filepath): os.remove(filepath) # remove old file
        # set attributes   
        atts=dict() # collect attributes, but add prefixes
        atts = uclim.atts.copy()
        atts['title'] = 'Corrected Time-sries on {:s} Grid'.format(grid_name)
        # make new dataset
        sink = DatasetNetCDF(folder=avgfolder, filelist=[filename], atts=atts, mode='w')
        # sync and write data so far 
        sink.sync()       
                
        ## correct data (create variables)
        for varname,var in uclim.variables.iteritems():
Beispiel #14
0
 T2 = dataset.Tmin + dataset.Tmax # average temperature is just the average between min and max
 T2 /= 2.
 T2.name = 'T2'; T2.atts.long_name='Average 2m Temperature'
 print(T2)
 dataset += T2 # add to dataset
 # rewrite time axis
 time = dataset.time
 time.load(data=np.arange(1,13))
 time.units = 'month'; time.atts.long_name='Month of the Year'
 print(time)
 # print diagnostic
 print(dataset)
 print('')
 for var in dataset:
   #print(var)
   print('Mean {0:s}: {1:s} {2:s}'.format(var.atts.long_name, str(var.mean()), var.units))
   #print('')
 print('')
    
 ## create new NetCDF file    
 # figure out a different filename
 filename = getFileName(name='PCIC', filepattern=avgfile)
 if os.path.exists(avgfolder+filename): os.remove(avgfolder+filename)      
 # write data and some annotation
 sink = writeNetCDF(dataset, avgfolder+filename, close=False)
 add_strvar(sink,'name_of_month', name_of_month, 'time', # add names of month
            atts=dict(name='name_of_month', units='', long_name='Name of the Month'))          
 sink.close() # close...
 print('Saving Climatology to: '+filename)
 print(avgfolder)
 
Beispiel #15
0
            dataset.name = 'GPCC'
            dataset.title = 'GPCC Long-term Climatology'
            dataset.atts.resolution = res
            # load data into memory
            dataset.load()

            # add landmask
            addLandMask(dataset)  # create landmask from precip mask
            dataset.mask(
                dataset.landmask)  # mask all fields using the new landmask
            # add length and names of month
            addLengthAndNamesOfMonth(dataset, noleap=False)

            # figure out a different filename
            filename = getFileName(grid=res,
                                   period=None,
                                   name='GPCC',
                                   filepattern=avgfile)
            print('\n' + filename + '\n')
            if os.path.exists(avgfolder + filename):
                os.remove(avgfolder + filename)
            # write data and some annotation
            ncset = writeNetCDF(dataset, avgfolder + filename, close=False)
            #       add_var(ncset,'name_of_month', name_of_month, 'time', # add names of month
            #                  atts=dict(name='name_of_month', units='', long_name='Name of the Month'))

            # close...
            ncset.close()
            dataset.close()
            # print dataset before
            print(dataset)
            print('')
Beispiel #16
0
def getMetaData(dataset, mode, dataargs):
  ''' determine dataset type and meta data, as well as path to main source file '''
  # determine dataset mode
  lclim = False; lts = False
  if mode == 'climatology': lclim = True
  elif mode == 'time-series': lts = True
  else: raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)
  # defaults for specific variables
  obs_res = None; domain = None; filetype = None
  varlist = dataargs.get('varlist',None)
  # determine meta data based on dataset type
  if dataset == 'WRF': 
    # WRF datasets
    module = import_module('datasets.WRF')
    exp = dataargs['experiment']    
    dataset_name = exp.name
    domain = dataargs['domain']
    grid = dataargs.get('grid',None)
    # figure out period
    period = dataargs['period']
    if period is None: pass
    elif isinstance(period,(int,np.integer)):
      beginyear = int(exp.begindate[0:4])
      period = (beginyear, beginyear+period)
    elif len(period) != 2 and all(isInt(period)): raise DateError
    if period is None: periodstr = '' 
    else: periodstr = '{0:4d}-{1:4d}'.format(*period)
    gridstr = grid if grid is not None else ''      
    # identify file and domain
    if len(dataargs['filetypes']) > 1: raise DatasetError # process only one file at a time
    filetype = dataargs['filetypes'][0]
    if isinstance(domain,(list,tuple)): domain = domain[0]
    if not isinstance(domain, (np.integer,int)): raise DatasetError    
    datamsgstr = "Processing WRF '{:s}'-file from Experiment '{:s}' (d{:02d})".format(filetype, dataset_name, domain)
    # assemble filename to check modification dates (should be only one file)    
    fileclass = module.fileclasses[filetype] # avoid WRF & CESM name collision
    pstr = '_'+periodstr if periodstr else ''
    gstr = '_'+gridstr if gridstr else ''
    if lclim: filename = fileclass.climfile.format(domain,gstr,pstr) # insert domain number, grid, and period
    elif lts: filename = fileclass.tsfile.format(domain,gstr) # insert domain number, and grid
    avgfolder = exp.avgfolder
    # load source data
    if lclim:
      loadfct = functools.partial(loadWRF, experiment=exp, name=None, domains=domain, grid=None, varlist=varlist,
                                  period=period, filetypes=[filetype], varatts=None, lconst=True) # still want topography...
    elif lts:
      loadfct = functools.partial(loadWRF_TS, experiment=exp, name=None, domains=domain, grid=None, varlist=varlist,
                                  filetypes=[filetype], varatts=None, lconst=True) # still want topography...
    filepath = '{:s}/{:s}'.format(avgfolder,filename)
  elif dataset == 'CESM': 
    # CESM datasets
    module = import_module('datasets.CESM')
    exp = dataargs['experiment']    
    dataset_name = exp.name
    # figure out period
    period = dataargs['period']
    if period is None: pass
    elif isinstance(period,(int,np.integer)):
      beginyear = int(exp.begindate[0:4])
      period = (beginyear, beginyear+period)
    elif len(period) != 2 and all(isInt(period)): raise DateError
    # identify file
    if len(dataargs['filetypes']) > 1: raise DatasetError # process only one file at a time
    filetype = dataargs['filetypes'][0]        
    # check period
    if period is None: periodstr = ''
    else: periodstr = '{0:4d}-{1:4d}'.format(*period)
    datamsgstr = "Processing CESM '{:s}'-file from Experiment '{:s}'".format(filetype, dataset_name) 
    # assemble filename to check modification dates (should be only one file)    
    fileclass = module.fileclasses[filetype] # avoid WRF & CESM name collision
    pstr = '_'+periodstr if periodstr else ''
    if lclim: filename = fileclass.climfile.format('',pstr) # insert domain number, grid, and period
    elif lts: filename = fileclass.tsfile.format('') # insert domain number, and grid
    avgfolder = exp.avgfolder
    # load source data 
    load3D = dataargs.pop('load3D',None) # if 3D fields should be loaded (default: False)
    if lclim:
      loadfct = functools.partial(loadCESM, experiment=exp, name=None, grid=None, period=period, varlist=varlist, 
                                  filetypes=[filetype], varatts=None, load3D=load3D, translateVars=None)
    elif lts:
      loadfct = functools.partial(loadCESM_TS, experiment=exp, name=None, grid=None, varlist=varlist,
                                  filetypes=[filetype], varatts=None, load3D=load3D, translateVars=None)     
    filepath = '{:s}/{:s}'.format(avgfolder,filename)
  elif dataset == dataset.upper() or dataset == 'Unity':
    # observational datasets
    module = import_module('datasets.{0:s}'.format(dataset))      
    dataset_name = module.dataset_name
    resolution = dataargs['resolution']
    if resolution: obs_res = '{0:s}_{1:s}'.format(dataset_name,resolution)
    else: obs_res = dataset_name   
    # figure out period
    period = dataargs['period']    
    if period is None: pass
    elif isinstance(period,(int,np.integer)):
      period = (1979, 1979+period) # they all begin in 1979
    elif len(period) != 2 and not all(isInt(period)): raise DateError
    datamsgstr = "Processing Dataset '{:s}'".format(dataset_name)
    # check period
    if period is None: 
      if mode == 'climatology': periodstr = 'Long-Term Mean'
      else: periodstr = ''
    else: periodstr = '{0:4d}-{1:4d}'.format(*period)
    # assemble filename to check modification dates (should be only one file)    
    filename = getFileName(grid=None, period=period, name=obs_res, filetype=mode)
    avgfolder = module.avgfolder
    # load pre-processed climatology
    if lclim:
      loadfct = functools.partial(module.loadClimatology, name=dataset_name, period=period, grid=None, varlist=varlist,
                                  resolution=resolution, varatts=None, folder=module.avgfolder, filelist=None)
    elif lts:
      loadfct = functools.partial(module.loadTimeSeries, name=dataset_name, grid=None, varlist=varlist,
                                  resolution=resolution, varatts=None, folder=None, filelist=None)
    # check if the source file is actually correct
    filepath = '{:s}/{:s}'.format(avgfolder,filename)
    if not os.path.exists(filepath): 
      source = loadfct() # no varlist - obs don't have many variables anyways
      filepath = source.filelist[0]
      # N.B.: it would be nice to print a message, but then we would have to make the logger available,
      #       which would be too much trouble
  else:
    raise DatasetError, "Dataset '{:s}' not found!".format(dataset)
  ## assemble and return meta data
  if not os.path.exists(filepath): raise IOError, "Source file '{:s}' does not exist!".format(filepath)        
  dataargs = namedTuple(dataset_name=dataset_name, period=period, periodstr=periodstr, avgfolder=avgfolder, 
                        filetype=filetype, domain=domain, obs_res=obs_res, varlist=varlist) 
  # return meta data
  return module, dataargs, loadfct, filepath, datamsgstr