Ejemplo n.º 1
0
def getTargetFile(dataset=None,
                  mode=None,
                  dataargs=None,
                  grid=None,
                  shape=None,
                  station=None,
                  period=None,
                  filetype=None,
                  lwrite=True):
    ''' generate filename for target dataset '''
    # for CESM & WRF
    if grid is None:
        grid = dataargs.gridstr  # also use grid for station/shape type
    if period is None: period = dataargs.periodstr
    if dataset in ('WRF', 'CESM') and lwrite:
        # prepare some variables
        domain = dataargs.domain
        if filetype is None: filetype = dataargs.filetype
        gstr = '_{}'.format(grid) if grid else ''
        # prepend shape or station type before grid
        if shape and station: raise ArgumentError
        elif shape: gstr = '_{}{}'.format(shape, gstr)
        elif station: gstr = '_{}{}'.format(station, gstr)
        pstr = '_{}'.format(period) if period else ''
        if dataset == 'WRF':
            import datasets.WRF as WRF
            fileclass = WRF.fileclasses[
                filetype] if filetype in WRF.fileclasses else WRF.FileType(
                    filetype)
            if mode == 'climatology':
                filename = fileclass.climfile.format(domain, gstr, pstr)
            elif mode == 'time-series':
                filename = fileclass.tsfile.format(domain, gstr)
        elif dataset == 'CESM':
            import datasets.CESM as CESM
            fileclass = CESM.fileclasses[
                filetype] if filetype in CESM.fileclasses else CESM.FileType(
                    filetype)
            if mode == 'climatology':
                filename = fileclass.climfile.format(gstr, pstr)
            elif mode == 'time-series':
                filename = fileclass.tsfile.format(gstr)
        else:
            raise NotImplementedError, "Unsupported Mode: '{:s}'".format(mode)
    elif lwrite:  # assume observational datasets
        filename = getFileName(grid=grid,
                               shape=shape,
                               station=station,
                               period=period,
                               name=dataargs.obs_res,
                               filetype=mode)
    else:
        raise DatasetError(dataset)
    if not os.path.exists(dataargs.avgfolder):
        raise IOError, "Dataset folder '{:s}' does not exist!".format(
            dataargs.avgfolder)
    # return filename
    return filename
Ejemplo n.º 2
0
def loadUnity(name=dataset_name,
              period=None,
              grid=None,
              varlist=None,
              varatts=None,
              folder=avgfolder,
              filelist=None,
              lautoregrid=False,
              resolution=None,
              unity_grid=None):
    ''' Get the pre-processed, unified monthly climatology as a DatasetNetCDF. '''
    #if lautoregrid: warn("Auto-regridding is currently not available for the unified dataset - use the generator routine instead.")
    # a climatology is not available
    if period is None:
        period = (1979, 2009)
        warn(
            'A climatology is not available for the Unified Dataset; loading period {0:4d}-{1:4d}.'
            .format(*period))
    # this dataset has not native/default grid
    if grid is None:
        if unity_grid is None:
            raise DatasetError(
                "The Unified Dataset has no native grid; need to define a default grid 'unity_grid'."
            )
        grid = unity_grid
        warn('The Unified Dataset has no native grid; loading {0:s} grid.'.
             format(grid))
    # load standardized climatology dataset with PRISM-specific parameters
    dataset = loadSpecialObs(name=name,
                             folder=folder,
                             period=period,
                             grid=grid,
                             shape=None,
                             station=None,
                             varlist=varlist,
                             varatts=varatts,
                             filepattern=avgfile,
                             filelist=filelist,
                             projection=None,
                             mode='climatology',
                             lautoregrid=False)
    # return formatted dataset
    return dataset
Ejemplo n.º 3
0
def loadHGS_StnTS(station=None, varlist=None, varatts=None, folder=None, name=None, title=None,
                  start_date=None, end_date=None, run_period=15, period=None, lskipNaN=False, lcheckComplete=True,
                  basin=None, WSC_station=None, basin_list=None, filename=None, prefix=None, 
                  scalefactors=None, **kwargs):
  ''' Get a properly formatted WRF dataset with monthly time-series at station locations; as in
      the hgsrun module, the capitalized kwargs can be used to construct folders and/or names '''
  if folder is None or ( filename is None and station is None ): raise ArgumentError
  # try to find meta data for gage station from WSC
  HGS_station = station
  if basin is not None and basin_list is not None:
    station_name = station
    station = getGageStation(basin=basin, station=station if WSC_station is None else WSC_station, 
                             basin_list=basin_list) # only works with registered basins
    if station_name is None: station_name = station.name # backup, in case we don't have a HGS station name
    metadata = station.getMetaData() # load station meta data
    if metadata is None: raise GageStationError(name)
  else: 
    metadata = dict(); station = None; station_name =  None    
  # prepare name expansion arguments (all capitalized)
  expargs = dict(ROOT_FOLDER=root_folder, STATION=HGS_station, NAME=name, TITLE=title,
                 PREFIX=prefix, BASIN=basin, WSC_STATION=WSC_station)
  for key,value in metadata.items():
      if isinstance(value,basestring):
          expargs['WSC_'+key.upper()] = value # in particular, this includes WSC_ID
  if 'WSC_ID' in expargs: 
      if expargs['WSC_ID'][0] == '0': expargs['WSC_ID0'] = expargs['WSC_ID'][1:]
      else: raise DatasetError('Expected leading zero in WSC station ID: {}'.format(expargs['WSC_ID']))
  # exparg preset keys will get overwritten if capitalized versions are defined
  for key,value in kwargs.items():
    KEY = key.upper() # we only use capitalized keywords, and non-capitalized keywords are only used/converted
    if KEY == key or KEY not in kwargs: expargs[KEY] = value # if no capitalized version is defined
  # read folder and infer prefix, if necessary
  folder = folder.format(**expargs)
  if not os.path.exists(folder): raise IOError(folder)
  if expargs['PREFIX'] is None:
    with open('{}/{}'.format(folder,prefix_file), 'r') as pfx:
      expargs['PREFIX'] = prefix = ''.join(pfx.readlines()).strip()      
  # now assemble file name for station timeseries
  filename = filename.format(**expargs)
  filepath = '{}/{}'.format(folder,filename)
  if not os.path.exists(filepath): IOError(filepath)
  if station_name is None: 
      station_name = filename[filename.index('hydrograph.')+1:-4] if station is None else station
  # set meta data (and allow keyword expansion of name and title)
  metadata['problem'] = prefix
  metadata['station_name'] = metadata.get('long_name', station_name)
  if name is not None: name = name.format(**expargs) # name expansion with capitalized keyword arguments
  else: name = 'HGS_{:s}'.format(station_name)
  metadata['name'] = name; expargs['Name'] = name.title() # name in title format
  if title is None: title = '{{Name:s}} (HGS, {problem:s})'.format(**metadata)
  title = title.format(**expargs) # name expansion with capitalized keyword arguments
  metadata['long_name'] = metadata['title'] = title
  # now determine start data for date_parser
  if end_date is None: 
      if start_date and run_period: end_date = start_date + run_period 
      elif period: end_date = period[1]
      else: raise ArgumentError("Need to specify either 'start_date' & 'run_period' or 'period' to infer 'end_date'.")
  end_year,end_month,end_day = convertDate(end_date)
  if start_date is None: 
      if end_date and run_period: start_date = end_date - run_period 
      elif period: start_date = period[0]
      else: raise ArgumentError("Need to specify either 'end_date' & 'run_period' or 'period' to infer 'start_date'.")
  start_year,start_month,start_day = convertDate(start_date)
  if start_day != 1 or end_day != 1: 
    raise NotImplementedError('Currently only monthly data is supported.')
#   import functools
#   date_parser = functools.partial(date_parser, year=start_year, month=start_month, day=start_day)
#   # now load data using pandas ascii reader
#   data_frame = pd.read_table(filepath, sep='\s+', header=2, dtype=np.float64, index_col=['time'], 
#                              date_parser=date_parser, names=ascii_varlist)
#   # resample to monthly data
#   data_frame = data_frame.resample(resampling).agg(np.mean)
#       data = data_frame[flowvar].values
  # parse header
  if varlist is None: varlist = variable_list[:] # default list 
  with open(filepath, 'r') as f:
      line = f.readline(); lline = line.lower() # 1st line
      if not "hydrograph" in lline: raise GageStationError(line,filepath)
      # parse variables and determine columns
      line = f.readline(); lline = line.lower() # 2nd line
      if not "variables" in lline: raise GageStationError(line)
      variable_order = [v.strip('"').lower() for v in line[line.find('"'):].strip().split(',')]
  # figure out varlist and data columns
  if variable_order[0] == 'time': del variable_order[0] # only keep variables
  else: raise GageStationError(variable_order)
  variable_order = [hgs_variables[v] for v in variable_order] # replace HGS names with GeoPy names
  vardict = {v:i+1 for i,v in enumerate(variable_order)} # column mapping; +1 because time was removed
  variable_order = [v for v in variable_order if v in varlist or flow_to_flux[v] in varlist]
  usecols = tuple(vardict[v] for v in variable_order) # variable columns that need to loaded (except time, which is col 0)
  assert 0 not in usecols, usecols
  # load data as tab separated values
  data = np.genfromtxt(filepath, dtype=np.float64, delimiter=None, skip_header=3, usecols = (0,)+usecols)
  assert data.shape[1] == len(usecols)+1, data.shape
  if lskipNaN:
      data = data[np.isnan(data).sum(axis=1)==0,:]
  elif np.any( np.isnan(data) ):
      raise DataError("Missing values (NaN) encountered in hydrograph file; use 'lskipNaN' to ignore.\n('{:s}')".format(filepath))    
  time_series = data[:,0]; flow_data = data[:,1:]
  assert flow_data.shape == (len(time_series),len(usecols)), flow_data.shape
  # original time deltas in seconds
  time_diff = time_series.copy(); time_diff[1:] = np.diff(time_series) # time period between time steps
  assert np.all( time_diff > 0 ), filepath
  time_diff = time_diff.reshape((len(time_diff),1)) # reshape to make sure broadcasting works
  # integrate flow over time steps before resampling
  flow_data[1:,:] -= np.diff(flow_data, axis=0)/2. # get average flow between time steps
  flow_data *= time_diff # integrate flow in time interval by multiplying average flow with time period
  flow_data = np.cumsum(flow_data, axis=0) # integrate by summing up total flow per time interval
  # generate regular monthly time steps
  start_datetime = np.datetime64(dt.datetime(year=start_year, month=start_month, day=start_day), 'M')
  end_datetime = np.datetime64(dt.datetime(year=end_year, month=end_month, day=end_day), 'M')
  time_monthly = np.arange(start_datetime, end_datetime+np.timedelta64(1, 'M'), dtype='datetime64[M]')
  assert time_monthly[0] == start_datetime, time_monthly[0]
  assert time_monthly[-1] == end_datetime, time_monthly[-1] 
  # convert monthly time series to regular array of seconds since start date
  time_monthly = ( time_monthly.astype('datetime64[s]') - start_datetime.astype('datetime64[s]') ) / np.timedelta64(1,'s')
  assert time_monthly[0] == 0, time_monthly[0]
  # interpolate integrated flow to new time axis
  #flow_data = np.interp(time_monthly, xp=time_series[:,0], fp=flow_data[:,0],).reshape((len(time_monthly),1))
  time_series = np.concatenate(([0],time_series), axis=0) # integrated flow at time zero must be zero...
  flow_data = np.concatenate(([[0,]*len(usecols)],flow_data), axis=0) # ... this is probably better than interpolation
  # N.B.: we are adding zeros here so we don't have to extrapolate to the left; on the right we just fill in NaN's
  if ( time_monthly[-1] - time_series[-1] ) > 3*86400. and lcheckComplete: 
      warn("Data record ends more than 3 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.))
  elif (time_monthly[-1]-time_series[-1]) > 5*86400.: 
      if lcheckComplete: 
        raise DataError("Data record ends more than 5 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.))
      else:
        warn("Data record ends more than 5 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.))
  flow_interp = si.interp1d(x=time_series, y=flow_data, kind='linear', axis=0, copy=False, 
                            bounds_error=False, fill_value=np.NaN, assume_sorted=True) 
  flow_data = flow_interp(time_monthly) # evaluate with call
  # compute monthly flow rate from interpolated integrated flow
  flow_data = np.diff(flow_data, axis=0) / np.diff(time_monthly, axis=0).reshape((len(time_monthly)-1,1))
  flow_data *= 1000 # convert from m^3/s to kg/s
  # construct time axis
  start_time = 12*(start_year - 1979) + start_month -1
  end_time = 12*(end_year - 1979) + end_month -1
  time = Axis(name='time', units='month', atts=dict(long_name='Month since 1979-01'), 
              coord=np.arange(start_time, end_time)) # not including the last, e.g. 1979-01 to 1980-01 is 12 month
  assert len(time_monthly) == end_time-start_time+1
  assert flow_data.shape == (len(time),len(variable_order)), (flow_data.shape,len(time),len(variable_order))
  # construct dataset
  dataset = Dataset(atts=metadata)
  dataset.station = station # add gage station object, if available (else None)
  for i,flowvar in enumerate(variable_order):
      data = flow_data[:,i]
      fluxvar = flow_to_flux[flowvar]
      if flowvar in varlist:
        flowatts = variable_attributes[flowvar]
        # convert variables and put into dataset (monthly time series)
        if flowatts['units'] != 'kg/s': 
          raise VariableError("Hydrograph data is read as kg/s; flow variable does not match.\n{}".format(flowatts))
        dataset += Variable(data=data, axes=(time,), **flowatts)
      if fluxvar in varlist and 'shp_area' in metadata:
        # compute surface flux variable based on drainage area
        fluxatts = variable_attributes[fluxvar]
        if fluxatts['units'] == 'kg/s' and fluxatts['units'] != 'kg/m^2/s': raise VariableError(fluxatts)
        data = data / metadata['shp_area'] # need to make a copy
        dataset += Variable(data=data, axes=(time,), **fluxatts)
  # apply analysis period
  if period is not None:
      dataset = dataset(years=period)
  # adjust scalefactors, if necessary
  if scalefactors:
      if isinstance(scalefactors,dict):
          dataset = updateScalefactor(dataset, varlist=scalefactors, scalefactor=None)
      elif isNumber(scalefactors):
          scalelist = ('discharge','seepage','flow')
          dataset = updateScalefactor(dataset, varlist=scalelist, scalefactor=scalefactors)
      else: 
          raise TypeError(scalefactors) 
  # return completed dataset
  return dataset
Ejemplo n.º 4
0
def getMetaData(dataset, mode, dataargs, lone=True):
    ''' determine dataset type and meta data, as well as path to main source file '''
    # determine dataset mode
    lclim = False
    lts = False
    if mode == 'climatology': lclim = True
    elif mode == 'time-series': lts = True
    elif mode[-5:] == '-mean':
        lclim = True
        mode = 'climatology'  # only for export to seasonal means (load entire monthly climatology)
    else:
        raise NotImplementedError, "Unrecognized Mode: '{:s}'".format(mode)
    # general arguments (dataset independent)
    varlist = dataargs.get('varlist', None)
    resolution = dataargs.get('resolution', None)
    grid = dataargs.get('grid', None)  # get grid
    period = dataargs.get('period', None)
    # determine meta data based on dataset type
    if dataset == 'WRF':
        import datasets.WRF as WRF
        # WRF datasets
        obs_res = None  # only for datasets (not used here)
        exp = dataargs['experiment']  # need that one
        dataset_name = exp.name
        avgfolder = exp.avgfolder
        filetypes = dataargs['filetypes']
        fileclasses = WRF.fileclasses.copy()
        for filetype in filetypes:
            if filetype not in fileclasses:
                fileclasses[filetype] = WRF.FileType(filetype)
        domain = dataargs.get('domain', None)
        periodstr, gridstr = getPeriodGridString(period, grid, exp=exp)
        # check arguments
        if period is None and lclim:
            raise DatasetError, "A 'period' argument is required to load climatologies!"
        if lone and len(filetypes) > 1:
            raise DatasetError  # process only one file at a time
        if not isinstance(domain, (np.integer, int)): raise DatasetError
        # construct dataset message
        if lone:
            datamsgstr = "Processing WRF '{:s}'-file from Experiment '{:s}' (d{:02d})".format(
                filetypes[0], dataset_name, domain)
        else:
            datamsgstr = "Processing WRF dataset from Experiment '{:s}' (d{:02d})".format(
                dataset_name, domain)
        # figure out age of source file(s)
        srcage = getSourceAge(fileclasses=fileclasses,
                              filetypes=filetypes,
                              exp=exp,
                              domain=domain,
                              periodstr=periodstr,
                              gridstr=gridstr,
                              lclim=lclim,
                              lts=lts)
        # load source data
        if lclim:
            loadfct = partial(WRF.loadWRF,
                              experiment=exp,
                              name=None,
                              domains=domain,
                              grid=grid,
                              varlist=varlist,
                              period=period,
                              filetypes=filetypes,
                              varatts=None,
                              lconst=True,
                              ltrimT=False)  # still want topography...
        elif lts:
            loadfct = partial(WRF.loadWRF_TS,
                              experiment=exp,
                              name=None,
                              domains=domain,
                              grid=grid,
                              varlist=varlist,
                              filetypes=filetypes,
                              varatts=None,
                              lconst=True,
                              ltrimT=False)  # still want topography...
    elif dataset == 'CESM':
        import datasets.CESM as CESM
        # CESM datasets
        obs_res = None  # only for datasets (not used here)
        domain = None  # only for WRF
        exp = dataargs['experiment']
        avgfolder = exp.avgfolder
        dataset_name = exp.name
        periodstr, gridstr = getPeriodGridString(period, grid, exp=exp)
        filetypes = dataargs['filetypes']
        fileclasses = CESM.fileclasses.copy()
        for filetype in filetypes:
            if filetype not in fileclasses:
                fileclasses[filetype] = CESM.FileType(filetype)
        # check arguments
        if period is None and lclim:
            raise DatasetError, "A 'period' argument is required to load climatologies!"
        if lone and len(filetypes) > 1:
            raise DatasetError  # process only one file at a time
        # construct dataset message
        if lone:
            datamsgstr = "Processing CESM '{:s}'-file from Experiment '{:s}'".format(
                filetypes[0], dataset_name)
        else:
            datamsgstr = "Processing CESM dataset from Experiment '{:s}'".format(
                dataset_name)
        # figure out age of source file(s)
        srcage = getSourceAge(fileclasses=fileclasses,
                              filetypes=filetypes,
                              exp=exp,
                              domain=None,
                              periodstr=periodstr,
                              gridstr=gridstr,
                              lclim=lclim,
                              lts=lts)
        # load source data
        load3D = dataargs.pop(
            'load3D', None)  # if 3D fields should be loaded (default: False)
        if lclim:
            loadfct = partial(CESM.loadCESM,
                              experiment=exp,
                              name=None,
                              grid=grid,
                              period=period,
                              varlist=varlist,
                              filetypes=filetypes,
                              varatts=None,
                              load3D=load3D,
                              translateVars=None)
        elif lts:
            loadfct = partial(CESM.loadCESM_TS,
                              experiment=exp,
                              name=None,
                              grid=grid,
                              varlist=varlist,
                              filetypes=filetypes,
                              varatts=None,
                              load3D=load3D,
                              translateVars=None)
    else:
        # assume observational datasets
        filetypes = [None]  # only for CESM & WRF
        domain = None  # only for WRF
        try:
            module = import_module('datasets.{0:s}'.format(dataset))
        except ImportError:
            raise DatasetError(
                "Error loading dataset module '{:s}' from 'datasets' package!".
                format(dataset))
        dataset_name = module.dataset_name
        resolution = dataargs['resolution']
        if resolution: obs_res = '{0:s}_{1:s}'.format(dataset_name, resolution)
        else: obs_res = dataset_name
        # figure out period
        periodstr, gridstr = getPeriodGridString(period, grid, beginyear=1979)
        if period is None and lclim: periodstr = 'LTM'
        datamsgstr = "Processing Dataset '{:s}'".format(dataset_name)
        # assemble filename to check modification dates (should be only one file)
        filename = getFileName(grid=grid,
                               period=period,
                               name=obs_res,
                               filetype=mode)
        avgfolder = module.avgfolder
        filepath = '{:s}/{:s}'.format(avgfolder, filename)
        # load pre-processed climatology
        kwargs = dict(name=dataset_name,
                      grid=grid,
                      varlist=varlist,
                      resolution=resolution,
                      varatts=None)
        if dataset == 'Unity': kwargs['unity_grid'] = dataargs['unity_grid']
        if lclim and module.loadClimatology is not None:
            loadfct = partial(module.loadClimatology, period=period, **kwargs)
        elif lts and module.loadTimeSeries is not None:
            loadfct = partial(module.loadTimeSeries, **kwargs)
        else:
            raise DatasetError(
                "Unable to identify time aggregation mode; the dataset " +
                "'{}' may not support selected mode '{}'.".format(
                    dataset, mode))
        # check if the source file is actually correct
        if os.path.exists(filepath): filelist = [filepath]
        else:
            source = loadfct(
            )  # don't load dataset, just construct the file list
            filelist = source.filelist
        # figure out age of source file(s)
        srcage = getSourceAge(filelist=filelist, lclim=lclim, lts=lts)
        # N.B.: it would be nice to print a message, but then we would have to make the logger available,
        #       which would be too much trouble
    ## assemble and return meta data
    dataargs = namedTuple(dataset_name=dataset_name,
                          period=period,
                          periodstr=periodstr,
                          avgfolder=avgfolder,
                          filetypes=filetypes,
                          filetype=filetypes[0],
                          domain=domain,
                          obs_res=obs_res,
                          varlist=varlist,
                          grid=grid,
                          gridstr=gridstr,
                          resolution=resolution)
    # return meta data
    return dataargs, loadfct, srcage, datamsgstr
Ejemplo n.º 5
0
    if isinstance(periods, (np.integer, int)): periods = [periods]
    # check and expand WRF experiment list
    WRF_experiments = getExperimentList(WRF_experiments, WRF_project, 'WRF')
    if isinstance(domains, (np.integer, int)): domains = [domains]
    # check and expand CESM experiment list
    CESM_experiments = getExperimentList(CESM_experiments, CESM_project,
                                         'CESM')
    # expand datasets and resolutions
    if datasets is None: datasets = gridded_datasets
    if unity_grid is None and 'Unity' in datasets:
        if WRF_project:
            unity_grid = import_module(
                'projects.{:s}'.format(WRF_project)).unity_grid
        else:
            raise DatasetError(
                "Dataset 'Unity' has no native grid - please set 'unity_grid'."
            )

    # print an announcement
    if len(WRF_experiments) > 0:
        print('\n Regridding WRF Datasets:')
        print([exp.name for exp in WRF_experiments])
    if len(CESM_experiments) > 0:
        print('\n Regridding CESM Datasets:')
        print([exp.name for exp in CESM_experiments])
    if len(datasets) > 0:
        print('\n And Observational Datasets:')
        print(datasets)
    print('\n To Grid and Resolution:')
    for grid, reses in grids.iteritems():
        print('   {0:s} {1:s}'.format(grid, printList(reses) if reses else ''))