Esempio n. 1
0
 def setUp(self):
     ''' create two test variables '''
     # create axis and variable instances (make *copies* of data and attributes!)
     x1 = np.random.randn(180)
     xax1 = Axis(name='X1-Axis', units='X Units', length=len(x1))
     var1 = Variable(axes=(xax1, ),
                     data=x1.copy(),
                     atts=dict(name='blue', units='units'))
     self.var1 = var1
     self.xax1 = xax1
     x2 = np.random.randn(180)
     xax2 = Axis(name='X2-Axis', units='X Units', length=len(x2))
     var2 = Variable(name='purple', units='units', axes=(xax2, ), data=x2)
     self.var2 = var2
     self.xax2 = xax2
     # actual normal distribution
     self.dist = 'norm'
     distvar = VarRV(name=self.dist,
                     units='units',
                     dist=self.dist,
                     params=(0, 1))
     self.distVar = distvar
     # add to list
     self.vars = [var1, var2]
     self.axes = [xax1, xax2]
Esempio n. 2
0
 def setUp(self):
   ''' create two test variables '''
   # create axis and variable instances (make *copies* of data and attributes!)
   x1 = np.linspace(0,10,15); 
   x2 = np.linspace(2,8,18);
   if self.ldatetime:
       start_datetime, end_datetime = pd.to_datetime('1981-05-01'), pd.to_datetime('1981-05-16')
       t1 = np.arange(start_datetime, end_datetime, dtype='datetime64[D]') 
       xax1 = Axis(name='Time1-Axis', units='X Time', coord=t1) 
       t2 = np.arange(start_datetime, end_datetime+np.timedelta64(3, 'D'), dtype='datetime64[D]')
       xax2 = Axis(name='Time2-Axis', units='X Time', coord=t2)
   else:
       xax1 = Axis(name='X1-Axis', units='X Units', coord=x1)
       xax2 = Axis(name='X2-Axis', units='X Units', coord=x2)
   var0 = Variable(axes=(xax1,), data=np.sin(x1), atts=dict(name='relative', units=''))
   var1 = Variable(axes=(xax1,), data=x1.copy(), atts=dict(name='blue', units='units'))
   self.var0 = var0; self.var1 = var1; self.xax1 = xax1
   var2 = Variable(name='purple',units='units',axes=(xax2,), data=(x2**2)/5.)
   self.var2 = var2; self.xax2 = xax2
   # create error variables with random noise
   noise1 = np.random.rand(len(xax1))*var1.data_array.std()/2.
   err1 = Variable(axes=(xax1,), data=noise1, atts=dict(name='blue_std', units='units'))
   noise2 = np.random.rand(len(xax2))*var2.data_array.std()/2.
   err2 = Variable(name='purple',units='units',axes=(xax2,), data=noise2)
   self.err1 = err1; self.err2 = err2
   # add to list
   self.vars = [var1, var2]
   self.errs = [err1, err2]
   self.axes = [xax1, xax2]
Esempio n. 3
0
def timeAxis(start_date=None,
             end_date=None,
             sampling=None,
             date_range=None,
             time_axis=None,
             llastIncl=True,
             ntime=None,
             varatts=None):
    ''' figure out type and dimensions of time axis '''
    # check time input
    if date_range: start_date, end_date, sampling = date_range
    if start_date and end_date and sampling:
        start_year, start_month, start_day = convertDate(start_date)
        start_datetime = np.datetime64(
            dt.datetime(year=start_year, month=start_month, day=start_day),
            sampling)
        end_year, end_month, end_day = convertDate(end_date)
        end_datetime = np.datetime64(
            dt.datetime(year=end_year, month=end_month, day=end_day), sampling)
        if llastIncl: end_datetime += np.timedelta64(1, sampling)
        date_range = np.arange(start_datetime,
                               end_datetime,
                               dtype='datetime64[{}]'.format(sampling))
        assert date_range[0] == start_datetime, date_range[0]
        if ntime:
            if ntime > len(date_range):
                raise ArgumentError(date_range)
            else:
                # trim
                date_range = date_range[0:ntime]
        else:
            ntime = len(date_range)
    elif time_axis == 'datetime':
        raise ArgumentError('Insufficient time axis information!')
    # construct time axis
    atts = varatts['time']
    if time_axis.lower() == 'simple':
        time = Axis(atts=atts, coord=np.arange(1, ntime + 1))
    elif time_axis.lower() == 'datetime':
        if sampling.lower() == 'y' or sampling.lower() == '1y': units = 'year'
        elif sampling.lower() == 'm' or sampling.lower() == '1m':
            units = 'month'
        elif sampling.lower() == 'd' or sampling.lower() == '1d':
            units = 'day'
        elif sampling.lower() == 'h' or sampling.lower() == '1h':
            units = 'hour'
        else:
            units = sampling
        long_name = '{}s since {}'.format(units.title(), str(
            date_range[0]))  # hope this makes sense...
        atts.update(long_name=long_name, units=units)
        time = Axis(atts=atts, coord=date_range)
    else:
        raise ArgumentError(time_axis)
    # return time axis
    return time
Esempio n. 4
0
 def setUp(self):
   ''' create a 2D test variable '''
   # create axis and variable instances (make *copies* of data and attributes!)
   xax = Axis(name='X-Axis', units='X Units', coord=np.linspace(0,10,15))
   yax = Axis(name='Y-Axis', units='Y Units', coord=np.linspace(2,8,18))
   xx,yy = np.meshgrid(yax[:],xax[:],) # create mesh (transposed w.r.t. values)
   var0 = Variable(axes=(xax,yax), data=np.sin(xx)*np.cos(yy), atts=dict(name='Color', units='Color Units'))
   var1 = Variable(axes=(xax,yax), data=np.cos(xx)*np.sin(yy), atts=dict(name='Contour', units='Contour Units'))
   self.var0 = var0; self.var1 = var1; self.xax = xax; self.yax = yax
   # add to list
   self.axes = [xax, yax]
   self.vars = [var0, var1]
Esempio n. 5
0
def loadNARR_LTM(name=dataset_name, varlist=None, grid=None, interval='monthly', varatts=None, filelist=None, folder=ltmfolder):
  ''' Get a properly formatted dataset of daily or monthly NARR climatologies (LTM). '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # prepare input
    if varatts is None: varatts = ltmvaratts.copy()
    if varlist is None: varlist = ltmvarlist
    if interval == 'monthly': 
      pfx = '.mon.ltm.nc'; tlen = 12
    elif interval == 'daily': 
      pfx = '.day.ltm.nc'; tlen = 365
    else: raise DatasetError, "Selected interval '%s' is not supported!"%interval
    # translate varlist
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)  
    # axes dictionary, primarily to override time axis 
    axes = dict(time=Axis(name='time',units='day',coord=(1,tlen,tlen)),load=True)
    if filelist is None: # generate default filelist
      filelist = [special[var]+pfx if var in special else var+pfx for var in varlist if var not in nofile]
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, 
                            axes=axes, atts=projdict, multifile=False, ncformat='NETCDF4_CLASSIC')
    # add projection
    projection = getProjFromDict(projdict, name='{0:s} Coordinate System'.format(name))
    dataset = addGDALtoDataset(dataset, projection=projection, geotransform=None, folder=grid_folder)
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    raise NotImplementedError, "Need to implement loading neatly formatted and regridded time-series!"
  # return formatted dataset
  return dataset
Esempio n. 6
0
def loadNARR_TS(name=dataset_name, grid=None, varlist=None, resolution=None, varatts=None, filelist=None, 
               folder=None, lautoregrid=None):
  ''' Get a properly formatted NARR dataset with monthly mean time-series. '''
  if grid is None:
    # load from original time-series files 
    if folder is None: folder = orig_ts_folder
    # translate varlist
    if varatts is None: varatts = tsvaratts.copy()
    if varlist is None: varlist = tsvarlist
    if varlist and varatts: varlist = translateVarNames(varlist, varatts)
    if filelist is None: # generate default filelist
      filelist = [orig_ts_file.format(special[var]) if var in special else orig_ts_file.format(var) for var in varlist 
                  if var not in nofile and var in varatts]
    # load dataset
    dataset = DatasetNetCDF(name=name, folder=folder, filelist=filelist, varlist=varlist, varatts=varatts, 
                            atts=projdict, multifile=False, ncformat='NETCDF4_CLASSIC')
    # replace time axis with number of month since Jan 1979 
    data = np.arange(0,len(dataset.time),1, dtype='int16') # month since 1979 (Jan 1979 = 0)
    timeAxis = Axis(name='time', units='month', coord=data, atts=dict(long_name='Month since 1979-01'))
    dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
    # add projection
    projection = getProjFromDict(projdict, name='{0:s} Coordinate System'.format(name))
    dataset = addGDALtoDataset(dataset, projection=projection, geotransform=None, gridfolder=grid_folder)
  else:
    # load from neatly formatted and regridded time-series files
    if folder is None: folder = avgfolder
    dataset = loadObservations(name=name, folder=folder, projection=None, resolution=None, grid=grid, 
                               period=None, varlist=varlist, varatts=varatts, filepattern=tsfile, 
                               filelist=filelist, lautoregrid=lautoregrid, mode='time-series')
  # return formatted dataset
  return dataset
Esempio n. 7
0
 def setUp(self):
     ''' create two test variables '''
     # create axis and variable instances (make *copies* of data and attributes!)
     x1 = np.linspace(0, 10, 11)
     xax1 = Axis(name='X1-Axis', units='X Units', coord=x1)
     var0 = Variable(axes=(xax1, ),
                     data=np.sin(x1),
                     atts=dict(name='relative', units=''))
     var1 = Variable(axes=(xax1, ),
                     data=x1.copy(),
                     atts=dict(name='blue', units='units'))
     self.var0 = var0
     self.var1 = var1
     self.xax1 = xax1
     x2 = np.linspace(2, 8, 13)
     xax2 = Axis(name='X2-Axis', units='X Units', coord=x2)
     var2 = Variable(name='purple',
                     units='units',
                     axes=(xax2, ),
                     data=(x2**2) / 5.)
     self.var2 = var2
     self.xax2 = xax2
     # create error variables with random noise
     noise1 = np.random.rand(len(xax1)) * var1.data_array.std() / 2.
     err1 = Variable(axes=(xax1, ),
                     data=noise1,
                     atts=dict(name='blue_std', units='units'))
     noise2 = np.random.rand(len(xax2)) * var2.data_array.std() / 2.
     err2 = Variable(name='purple',
                     units='units',
                     axes=(xax2, ),
                     data=noise2)
     self.err1 = err1
     self.err2 = err2
     # add to list
     self.vars = [var1, var2]
     self.errs = [err1, err2]
     self.axes = [xax1, xax2]
Esempio n. 8
0
 def setUp(self):
   ''' create a reference and two test variables for Taylor plot'''
   self.thetamin = 0.; self.Rmin = 0.; self.thetamax = np.pi/2.; self.Rmax = 2.
   # create axis and variable instances (make *copies* of data and attributes!)
   self.x1 = np.linspace(0,10,11); self.xax1 = Axis(name='X1-Axis', units='X Units', coord=self.x1)
   self.data0 = np.sin(self.x1)
   self.var0 = Variable(axes=(self.xax1,), data=self.data0, atts=dict(name='Reference', units='units'))
   # create error variables with random noise
   self.data1 = self.data0 + ( np.random.rand(len(self.xax1))-0.5 )*0.5
   self.var1 = Variable(axes=(self.xax1,), data=self.data1, atts=dict(name='Blue', units='units'))
   self.data2 = self.data0 + ( np.random.rand(len(self.xax1))-0.5 )*1.5
   self.var2 = Variable(axes=(self.xax1,), data=self.data2, atts=dict(name='Red', units='units'))
   self.data3 = 1. + np.random.rand(len(self.xax1))*1.5
   self.var3 = Variable(axes=(self.xax1,), data=self.data3, atts=dict(name='Random', units='units'))
   # add to list
   self.vars = [self.var0, self.var1, self.var2, self.var3]
   self.data = [self.data0, self.data1, self.data2, self.data3]
   self.axes = [self.xax1,]
Esempio n. 9
0
 def setUp(self):
   ''' create two test variables '''
   # define plot ranges
   self.thetamin = 0.; self.Rmin = 0.; self.thetamax = 2*np.pi; self.Rmax = 2.
   # create theta axis and variable instances (values are radius values, I believe)
   theta1 = np.linspace(self.thetamin,self.thetamax,361)
   thax1 = Axis(atts=dict(name='$\\theta$-Axis', units='Radians'), coord=theta1) 
   var0 = Variable(axes=(thax1,), data=np.sin(theta1), atts=dict(name='Blue', units='units'))
   tmp = theta1.copy()*(self.Rmax-self.Rmin)/(self.thetamax-self.thetamin)
   var1 = Variable(axes=(thax1,), data=tmp, atts=dict(name='Red', units='units'))
   self.var0 = var0; self.var1 = var1; self.xax1 = theta1
   # create error variables with random noise
   noise0 = np.random.rand(len(thax1))*var0.data_array.std()/2.
   err0 = Variable(axes=(thax1,), data=noise0, atts=dict(name='Blue Noise', units='units'))
   noise1 = np.random.rand(len(thax1))*var1.data_array.std()/2.
   err1 = Variable(axes=(thax1,), data=noise1, atts=dict(name='Red Noise', units='units'))
   self.err1 = err1; self.err0 = err0
   # add to list
   self.vars = [var0, var1]
   self.errs = [err0, err1]
   self.axes = [thax1,]
Esempio n. 10
0
def loadHGS_StnTS(station=None, varlist=None, varatts=None, folder=None, name=None, title=None,
                  start_date=None, end_date=None, run_period=15, period=None, lskipNaN=False, lcheckComplete=True,
                  basin=None, WSC_station=None, basin_list=None, filename=None, prefix=None, 
                  scalefactors=None, **kwargs):
  ''' Get a properly formatted WRF dataset with monthly time-series at station locations; as in
      the hgsrun module, the capitalized kwargs can be used to construct folders and/or names '''
  if folder is None or ( filename is None and station is None ): raise ArgumentError
  # try to find meta data for gage station from WSC
  HGS_station = station
  if basin is not None and basin_list is not None:
    station_name = station
    station = getGageStation(basin=basin, station=station if WSC_station is None else WSC_station, 
                             basin_list=basin_list) # only works with registered basins
    if station_name is None: station_name = station.name # backup, in case we don't have a HGS station name
    metadata = station.getMetaData() # load station meta data
    if metadata is None: raise GageStationError(name)
  else: 
    metadata = dict(); station = None; station_name =  None    
  # prepare name expansion arguments (all capitalized)
  expargs = dict(ROOT_FOLDER=root_folder, STATION=HGS_station, NAME=name, TITLE=title,
                 PREFIX=prefix, BASIN=basin, WSC_STATION=WSC_station)
  for key,value in metadata.items():
      if isinstance(value,basestring):
          expargs['WSC_'+key.upper()] = value # in particular, this includes WSC_ID
  if 'WSC_ID' in expargs: 
      if expargs['WSC_ID'][0] == '0': expargs['WSC_ID0'] = expargs['WSC_ID'][1:]
      else: raise DatasetError('Expected leading zero in WSC station ID: {}'.format(expargs['WSC_ID']))
  # exparg preset keys will get overwritten if capitalized versions are defined
  for key,value in kwargs.items():
    KEY = key.upper() # we only use capitalized keywords, and non-capitalized keywords are only used/converted
    if KEY == key or KEY not in kwargs: expargs[KEY] = value # if no capitalized version is defined
  # read folder and infer prefix, if necessary
  folder = folder.format(**expargs)
  if not os.path.exists(folder): raise IOError(folder)
  if expargs['PREFIX'] is None:
    with open('{}/{}'.format(folder,prefix_file), 'r') as pfx:
      expargs['PREFIX'] = prefix = ''.join(pfx.readlines()).strip()      
  # now assemble file name for station timeseries
  filename = filename.format(**expargs)
  filepath = '{}/{}'.format(folder,filename)
  if not os.path.exists(filepath): IOError(filepath)
  if station_name is None: 
      station_name = filename[filename.index('hydrograph.')+1:-4] if station is None else station
  # set meta data (and allow keyword expansion of name and title)
  metadata['problem'] = prefix
  metadata['station_name'] = metadata.get('long_name', station_name)
  if name is not None: name = name.format(**expargs) # name expansion with capitalized keyword arguments
  else: name = 'HGS_{:s}'.format(station_name)
  metadata['name'] = name; expargs['Name'] = name.title() # name in title format
  if title is None: title = '{{Name:s}} (HGS, {problem:s})'.format(**metadata)
  title = title.format(**expargs) # name expansion with capitalized keyword arguments
  metadata['long_name'] = metadata['title'] = title
  # now determine start data for date_parser
  if end_date is None: 
      if start_date and run_period: end_date = start_date + run_period 
      elif period: end_date = period[1]
      else: raise ArgumentError("Need to specify either 'start_date' & 'run_period' or 'period' to infer 'end_date'.")
  end_year,end_month,end_day = convertDate(end_date)
  if start_date is None: 
      if end_date and run_period: start_date = end_date - run_period 
      elif period: start_date = period[0]
      else: raise ArgumentError("Need to specify either 'end_date' & 'run_period' or 'period' to infer 'start_date'.")
  start_year,start_month,start_day = convertDate(start_date)
  if start_day != 1 or end_day != 1: 
    raise NotImplementedError('Currently only monthly data is supported.')
#   import functools
#   date_parser = functools.partial(date_parser, year=start_year, month=start_month, day=start_day)
#   # now load data using pandas ascii reader
#   data_frame = pd.read_table(filepath, sep='\s+', header=2, dtype=np.float64, index_col=['time'], 
#                              date_parser=date_parser, names=ascii_varlist)
#   # resample to monthly data
#   data_frame = data_frame.resample(resampling).agg(np.mean)
#       data = data_frame[flowvar].values
  # parse header
  if varlist is None: varlist = variable_list[:] # default list 
  with open(filepath, 'r') as f:
      line = f.readline(); lline = line.lower() # 1st line
      if not "hydrograph" in lline: raise GageStationError(line,filepath)
      # parse variables and determine columns
      line = f.readline(); lline = line.lower() # 2nd line
      if not "variables" in lline: raise GageStationError(line)
      variable_order = [v.strip('"').lower() for v in line[line.find('"'):].strip().split(',')]
  # figure out varlist and data columns
  if variable_order[0] == 'time': del variable_order[0] # only keep variables
  else: raise GageStationError(variable_order)
  variable_order = [hgs_variables[v] for v in variable_order] # replace HGS names with GeoPy names
  vardict = {v:i+1 for i,v in enumerate(variable_order)} # column mapping; +1 because time was removed
  variable_order = [v for v in variable_order if v in varlist or flow_to_flux[v] in varlist]
  usecols = tuple(vardict[v] for v in variable_order) # variable columns that need to loaded (except time, which is col 0)
  assert 0 not in usecols, usecols
  # load data as tab separated values
  data = np.genfromtxt(filepath, dtype=np.float64, delimiter=None, skip_header=3, usecols = (0,)+usecols)
  assert data.shape[1] == len(usecols)+1, data.shape
  if lskipNaN:
      data = data[np.isnan(data).sum(axis=1)==0,:]
  elif np.any( np.isnan(data) ):
      raise DataError("Missing values (NaN) encountered in hydrograph file; use 'lskipNaN' to ignore.\n('{:s}')".format(filepath))    
  time_series = data[:,0]; flow_data = data[:,1:]
  assert flow_data.shape == (len(time_series),len(usecols)), flow_data.shape
  # original time deltas in seconds
  time_diff = time_series.copy(); time_diff[1:] = np.diff(time_series) # time period between time steps
  assert np.all( time_diff > 0 ), filepath
  time_diff = time_diff.reshape((len(time_diff),1)) # reshape to make sure broadcasting works
  # integrate flow over time steps before resampling
  flow_data[1:,:] -= np.diff(flow_data, axis=0)/2. # get average flow between time steps
  flow_data *= time_diff # integrate flow in time interval by multiplying average flow with time period
  flow_data = np.cumsum(flow_data, axis=0) # integrate by summing up total flow per time interval
  # generate regular monthly time steps
  start_datetime = np.datetime64(dt.datetime(year=start_year, month=start_month, day=start_day), 'M')
  end_datetime = np.datetime64(dt.datetime(year=end_year, month=end_month, day=end_day), 'M')
  time_monthly = np.arange(start_datetime, end_datetime+np.timedelta64(1, 'M'), dtype='datetime64[M]')
  assert time_monthly[0] == start_datetime, time_monthly[0]
  assert time_monthly[-1] == end_datetime, time_monthly[-1] 
  # convert monthly time series to regular array of seconds since start date
  time_monthly = ( time_monthly.astype('datetime64[s]') - start_datetime.astype('datetime64[s]') ) / np.timedelta64(1,'s')
  assert time_monthly[0] == 0, time_monthly[0]
  # interpolate integrated flow to new time axis
  #flow_data = np.interp(time_monthly, xp=time_series[:,0], fp=flow_data[:,0],).reshape((len(time_monthly),1))
  time_series = np.concatenate(([0],time_series), axis=0) # integrated flow at time zero must be zero...
  flow_data = np.concatenate(([[0,]*len(usecols)],flow_data), axis=0) # ... this is probably better than interpolation
  # N.B.: we are adding zeros here so we don't have to extrapolate to the left; on the right we just fill in NaN's
  if ( time_monthly[-1] - time_series[-1] ) > 3*86400. and lcheckComplete: 
      warn("Data record ends more than 3 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.))
  elif (time_monthly[-1]-time_series[-1]) > 5*86400.: 
      if lcheckComplete: 
        raise DataError("Data record ends more than 5 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.))
      else:
        warn("Data record ends more than 5 days befor end of period: {} days".format((time_monthly[-1]-time_series[-1])/86400.))
  flow_interp = si.interp1d(x=time_series, y=flow_data, kind='linear', axis=0, copy=False, 
                            bounds_error=False, fill_value=np.NaN, assume_sorted=True) 
  flow_data = flow_interp(time_monthly) # evaluate with call
  # compute monthly flow rate from interpolated integrated flow
  flow_data = np.diff(flow_data, axis=0) / np.diff(time_monthly, axis=0).reshape((len(time_monthly)-1,1))
  flow_data *= 1000 # convert from m^3/s to kg/s
  # construct time axis
  start_time = 12*(start_year - 1979) + start_month -1
  end_time = 12*(end_year - 1979) + end_month -1
  time = Axis(name='time', units='month', atts=dict(long_name='Month since 1979-01'), 
              coord=np.arange(start_time, end_time)) # not including the last, e.g. 1979-01 to 1980-01 is 12 month
  assert len(time_monthly) == end_time-start_time+1
  assert flow_data.shape == (len(time),len(variable_order)), (flow_data.shape,len(time),len(variable_order))
  # construct dataset
  dataset = Dataset(atts=metadata)
  dataset.station = station # add gage station object, if available (else None)
  for i,flowvar in enumerate(variable_order):
      data = flow_data[:,i]
      fluxvar = flow_to_flux[flowvar]
      if flowvar in varlist:
        flowatts = variable_attributes[flowvar]
        # convert variables and put into dataset (monthly time series)
        if flowatts['units'] != 'kg/s': 
          raise VariableError("Hydrograph data is read as kg/s; flow variable does not match.\n{}".format(flowatts))
        dataset += Variable(data=data, axes=(time,), **flowatts)
      if fluxvar in varlist and 'shp_area' in metadata:
        # compute surface flux variable based on drainage area
        fluxatts = variable_attributes[fluxvar]
        if fluxatts['units'] == 'kg/s' and fluxatts['units'] != 'kg/m^2/s': raise VariableError(fluxatts)
        data = data / metadata['shp_area'] # need to make a copy
        dataset += Variable(data=data, axes=(time,), **fluxatts)
  # apply analysis period
  if period is not None:
      dataset = dataset(years=period)
  # adjust scalefactors, if necessary
  if scalefactors:
      if isinstance(scalefactors,dict):
          dataset = updateScalefactor(dataset, varlist=scalefactors, scalefactor=None)
      elif isNumber(scalefactors):
          scalelist = ('discharge','seepage','flow')
          dataset = updateScalefactor(dataset, varlist=scalelist, scalefactor=scalefactors)
      else: 
          raise TypeError(scalefactors) 
  # return completed dataset
  return dataset
Esempio n. 11
0
def loadEnKF_StnTS(folder=None,
                   varlist='all',
                   varatts=None,
                   name='enkf',
                   title='EnKF',
                   basin=None,
                   start_date=None,
                   end_date=None,
                   sampling=None,
                   period=None,
                   date_range=None,
                   llastIncl=True,
                   WSC_station=None,
                   basin_list=None,
                   filenames=None,
                   prefix=None,
                   time_axis='datetime',
                   scalefactors=None,
                   metadata=None,
                   lkgs=False,
                   out_dir='out/',
                   yaml_file='../input_data/obs_meta.yaml',
                   lYAML=True,
                   nreal=None,
                   ntime=None,
                   **kwargs):
    ''' load EnKF ensemble data as formatted GeoPy Dataset '''
    out_folder = os.path.join(folder, 'out/')  # default output folder
    if not os.path.exists(out_folder): raise IOError(out_folder)
    # default values
    if isinstance(varlist, str) and varlist == 'hydro':
        varlist = Hydro.varlist
    elif isinstance(varlist, str) and varlist == 'obs':
        varlist = Obs.varlist
    elif isinstance(varlist, str) and varlist == 'all':
        varlist = Hydro.varlist + Obs.varlist
    elif not isinstance(varlist, (tuple, list)):
        raise TypeError(varlist)
    if varatts is None: varatts = variable_attributes.copy()
    varmap = {
        varatt['name']: enkf_name
        for enkf_name, varatt in list(varatts.items())
    }
    varlist = [varmap[var] for var in varlist]
    # load WSC station meta data
    pass
    # initialize Dataset
    dataset = Dataset(name=name,
                      title=title if title else name.title(),
                      atts=metadata)
    ensemble = None
    time = None
    observation = None
    # load observation/innovation data
    if any([var in Obs.atts for var in varlist]):
        # load data
        vardata = loadObs(varlist=[var for var in varlist if var in Obs.atts],
                          folder=out_folder,
                          lpandas=False)
        ntime, nobs, nreal = list(vardata.values())[0].shape
        # create Axes
        if time is None:
            # figure out time axis
            time = timeAxis(start_date=start_date,
                            end_date=end_date,
                            sampling=sampling,
                            date_range=date_range,
                            time_axis=time_axis,
                            llastIncl=llastIncl,
                            ntime=ntime,
                            varatts=varatts)
        elif len(time) != ntime:
            raise AxisError(time)
        if ensemble is None:
            # construct ensemble axis
            ensemble = Axis(atts=varatts['ensemble'],
                            coord=np.arange(1, nreal + 1))
        elif len(ensemble) != nreal:
            raise AxisError(ensemble)
        if observation is None:
            # construct ensemble axis
            observation = Axis(atts=varatts['observation'],
                               coord=np.arange(1, nobs + 1))
        elif len(observation) != nobs:
            raise AxisError(observation)
        # create variables
        for varname, data in list(vardata.items()):
            dataset += Variable(atts=varatts[varname],
                                data=data,
                                axes=(time, observation, ensemble))
        # load YAML data, if available
        if lYAML:
            # load YAML file
            yaml_path = os.path.join(out_folder, yaml_file)
            if not os.path.exists(yaml_path): raise IOError(yaml_path)
            with open(yaml_path, 'r') as yf:
                obs_meta = yaml.load(yf)
            if obs_meta is None: raise IOError(yaml_path)  # not a YAML file?
            # constant create variables
            for cvar, cval in list(obs_meta[0].items()):
                if isinstance(cval, str): dtype, missing = np.string_, ''
                elif isinstance(cval, (np.integer, int)):
                    dtype, missing = np.int_, 0
                elif isinstance(cval, (np.inexact, float)):
                    dtype, missing = np.float_, np.NaN
                else:
                    dtype = None  # skip
                if dtype:
                    data = np.asarray([
                        missing if obs[cvar] is None else obs[cvar]
                        for obs in obs_meta
                    ],
                                      dtype=dtype)
                    if cvar in varatts: atts = varatts[cvar]
                    else: atts = dict(name=cvar, units='')
                    dataset += Variable(atts=atts,
                                        data=data,
                                        axes=(observation, ))
    elif ntime is None:
        # try to infer time dimension from backup.info file
        backup_info = os.path.join(folder, 'backup.info')
        if os.path.exists(backup_info):
            with open(backup_info, 'r') as bf:
                ntime = int(bf.readline())
    # load discharge/hydrograph data
    if 'discharge' in varlist:
        data = loadHydro(folder=out_folder, nreal=nreal, ntime=ntime)
        ntime, nreal = data.shape
        if time is None:
            # figure out time axis
            time = timeAxis(start_date=start_date,
                            end_date=end_date,
                            sampling=sampling,
                            date_range=date_range,
                            time_axis=time_axis,
                            llastIncl=llastIncl,
                            ntime=ntime,
                            varatts=varatts)
        elif len(time) != ntime:
            raise AxisError(time)
        if ensemble is None:
            # construct ensemble axis
            ensemble = Axis(atts=varatts['ensemble'],
                            coord=np.arange(1, nreal + 1))
        elif len(ensemble) != nreal:
            raise AxisError(ensemble)
        atts = varatts['discharge']
        if lkgs:
            data *= 1000.
            if atts['units'] == 'm^3/s': atts['units'] = 'kg/s'
        dataset += Variable(atts=atts, data=data, axes=(time, ensemble))
    # return formatted Dataset
    if scalefactors is not None and scalefactors != 1:
        raise NotImplementedError
    return dataset
Esempio n. 12
0
def loadGPCC_TS(name=dataset_name,
                grid=None,
                varlist=None,
                resolution='25',
                varatts=None,
                filelist=None,
                folder=None,
                lautoregrid=None):
    ''' Get a properly formatted dataset with the monthly GPCC time-series. '''
    if grid is None:
        # load from original time-series files
        if folder is None: folder = orig_ts_folder
        # prepare input
        if resolution not in ('05', '10', '25'):
            raise DatasetError, "Selected resolution '%s' is not available!" % resolution
        # translate varlist
        if varatts is None: varatts = tsvaratts.copy()
        if varlist is None: varlist = varatts.keys()
        if varlist and varatts: varlist = translateVarNames(varlist, varatts)
        if filelist is None:  # generate default filelist
            filelist = []
            if 'p' in varlist:
                filelist.append(orig_ts_file.format('precip', resolution))
            if 's' in varlist:
                filelist.append(orig_ts_file.format('statio', resolution))
        # load dataset
        dataset = DatasetNetCDF(name=name,
                                folder=folder,
                                filelist=filelist,
                                varlist=varlist,
                                varatts=varatts,
                                multifile=False,
                                ncformat='NETCDF4_CLASSIC')
        # replace time axis with number of month since Jan 1979
        data = np.arange(0, len(dataset.time), 1, dtype='int16') + (
            1901 - 1979) * 12  # month since 1979 (Jan 1979 = 0)
        timeAxis = Axis(name='time',
                        units='month',
                        coord=data,
                        atts=dict(long_name='Month since 1979-01'))
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
        # add GDAL info
        dataset = addGDALtoDataset(dataset, projection=None, geotransform=None)
        # N.B.: projection should be auto-detected as geographic
    else:
        # load from neatly formatted and regridded time-series files
        if folder is None: folder = avgfolder
        grid, resolution = checkGridRes(grid,
                                        resolution,
                                        period=None,
                                        lclim=False)
        dataset = loadObservations(name=name,
                                   folder=folder,
                                   projection=None,
                                   resolution=resolution,
                                   grid=grid,
                                   period=None,
                                   varlist=varlist,
                                   varatts=varatts,
                                   filepattern=tsfile,
                                   filelist=filelist,
                                   lautoregrid=lautoregrid,
                                   mode='time-series')
    # return formatted dataset
    return dataset
Esempio n. 13
0
def loadCRU_TS(name=dataset_name,
               grid=None,
               varlist=None,
               resolution=None,
               varatts=None,
               filelist=None,
               folder=None,
               lautoregrid=None):
    ''' Get a properly formatted  CRU dataset with monthly mean time-series. '''
    if grid is None:
        # load from original time-series files
        if folder is None: folder = orig_ts_folder
        # translate varlist
        if varatts is None: varatts = tsvaratts.copy()
        if varlist is None: varlist = varatts.keys()
        if varlist and varatts: varlist = translateVarNames(varlist, varatts)
        # assemble filelist
        if filelist is None:  # generate default filelist
            filelist = [
                orig_ts_file.format(var) for var in varlist
                if var not in nofile
            ]
        # load dataset
        dataset = DatasetNetCDF(name=name,
                                folder=folder,
                                filelist=filelist,
                                varlist=varlist,
                                varatts=varatts,
                                multifile=False,
                                ncformat='NETCDF4_CLASSIC')
        # replace time axis with number of month since Jan 1979
        data = np.arange(0, len(dataset.time), 1, dtype='int16') + (
            1901 - 1979) * 12  # month since 1979 (Jan 1979 = 0)
        timeAxis = Axis(name='time',
                        units='month',
                        coord=data,
                        atts=dict(long_name='Month since 1979-01'))
        dataset.replaceAxis(dataset.time, timeAxis, asNC=False, deepcopy=False)
        # add projection
        dataset = addGDALtoDataset(dataset,
                                   projection=None,
                                   geotransform=None,
                                   gridfolder=grid_folder)
        # N.B.: projection should be auto-detected as geographic
    else:
        # load from neatly formatted and regridded time-series files
        if folder is None: folder = avgfolder
        dataset = loadObservations(name=name,
                                   folder=folder,
                                   projection=None,
                                   resolution=None,
                                   grid=grid,
                                   period=None,
                                   varlist=varlist,
                                   varatts=varatts,
                                   filepattern=tsfile,
                                   filelist=filelist,
                                   lautoregrid=lautoregrid,
                                   mode='time-series')
    # return formatted dataset
    return dataset
Esempio n. 14
0
## convert from XLS files to netcdf
    elif mode == 'convert_XLS':

        # imports
        from glob import glob
        from geodata.base import Dataset, Axis, Variable
        from geodata.netcdf import writeNetCDF

        # load list if well files and generate list of wells
        well_files = glob(os.path.join(data_folder, 'W*.xlsx'))
        well_files.sort()
        wells = [os.path.basename(name[:-5]) for name in well_files]
        print(wells)

        # dataset
        time_ax = Axis(coord=np.arange(12 * (period[1] - period[0])) + 252,
                       **varatts['time'])  # origin: 1979-01
        well_ax = Axis(coord=np.arange(len(wells)) + 1, name='well', units='')
        dataset = Dataset(name=conservation_authority,
                          title=conservation_authority + ' Observation Wells')
        # add meta data
        meta_dicts = [
            loadMetadata(well, conservation_authority=conservation_authority)
            for well in wells
        ]
        for key in meta_dicts[0].keys():
            if key in varatts: atts = varatts[key]
            elif key.lower() in varatts: atts = varatts[key.lower()]
            else: atts = dict(name=key, units='')
            if atts['units']:
                data = np.asarray([wmd[key] for wmd in meta_dicts],
                                  dtype=np.float64)
Esempio n. 15
0
def rasterDataset(name=None,
                  title=None,
                  vardefs=None,
                  axdefs=None,
                  atts=None,
                  projection=None,
                  griddef=None,
                  lgzip=None,
                  lgdal=True,
                  lmask=True,
                  fillValue=None,
                  lskipMissing=True,
                  lgeolocator=True,
                  file_pattern=None,
                  lfeedback=True,
                  **kwargs):
    ''' function to load a set of variables that are stored in raster format in a systematic directory tree into a Dataset
        Variables and Axis are defined as follows:
          vardefs[varname] = dict(name=string, units=string, axes=tuple of strings, atts=dict, plot=dict, dtype=np.dtype, fillValue=value)
          axdefs[axname]   = dict(name=string, units=string, atts=dict, coord=array or list) or None
        The path to raster files is constructed as variable_pattern+axes_pattern, where axes_pattern is defined through the axes, 
        (as in rasterVarialbe) and variable_pattern takes the special keywords VAR, which is the variable key in vardefs.
    '''

    ## prepare input data and axes
    if griddef:
        xlon, ylat = griddef.xlon, griddef.ylat
        if projection is None:
            projection = griddef.projection
        elif projection != griddef.projection:
            raise ArgumentError("Conflicting projection and GridDef!")
        geotransform = griddef.geotransform
        isProjected = griddef.isProjected
    else:
        xlon = ylat = geotransform = None
        isProjected = False if projection is None else True
    # construct axes dict
    axes = dict()
    for axname, axdef in axdefs.items():
        assert 'coord' in axdef, axdef
        assert ('name' in axdef and 'units' in axdef) or 'atts' in axdef, axdef
        if axdef is None:
            axes[axname] = None
        else:
            ax = Axis(**axdef)
            axes[ax.name] = ax
    # check for map Axis
    if isProjected:
        if 'x' not in axes: axes['x'] = xlon
        if 'y' not in axes: axes['y'] = ylat
    else:
        if 'lon' not in axes: axes['lon'] = xlon
        if 'lat' not in axes: axes['lat'] = ylat

    ## load raster data into Variable objects
    varlist = []
    for varname, vardef in vardefs.items():
        # check definitions
        assert 'axes' in vardef and 'dtype' in vardef, vardef
        assert ('name' in vardef
                and 'units' in vardef) or 'atts' in vardef, vardef
        # determine relevant axes
        vardef = vardef.copy()
        axes_list = [
            None if ax is None else axes[ax] for ax in vardef.pop('axes')
        ]
        # define path parameters (with varname)
        path_params = vardef.pop('path_params', None)
        path_params = dict() if path_params is None else path_params.copy()
        if 'VAR' not in path_params:
            path_params['VAR'] = varname  # a special key
        # add kwargs and relevant axis indices
        relaxes = [ax.name for ax in axes_list
                   if ax is not None]  # relevant axes
        for key, value in kwargs.items():
            if key not in axes or key in relaxes:
                vardef[key] = value
        # create Variable object
        var = rasterVariable(projection=projection,
                             griddef=griddef,
                             file_pattern=file_pattern,
                             lgzip=lgzip,
                             lgdal=lgdal,
                             lmask=lmask,
                             lskipMissing=lskipMissing,
                             axes=axes_list,
                             path_params=path_params,
                             lfeedback=lfeedback,
                             **vardef)
        # vardef components: name, units, atts, plot, dtype, fillValue
        varlist.append(var)
        # check that map axes are correct
        for ax in var.xlon, var.ylat:
            if axes[ax.name] is None: axes[ax.name] = ax
            elif axes[ax.name] != ax:
                raise AxisError("{} axes are incompatible.".format(ax.name))
        if griddef is None: griddef = var.griddef
        elif griddef != var.griddef:
            raise AxisError("GridDefs are inconsistent.")
        if geotransform is None: geotransform = var.geotransform
        elif geotransform != var.geotransform:
            raise AxisError(
                "Conflicting geotransform (from Variable) and GridDef!\n {} != {}"
                .format(var.geotransform, geotransform))

    ## create Dataset
    # create dataset
    dataset = Dataset(name=name,
                      title=title,
                      varlist=varlist,
                      axes=axes,
                      atts=atts)
    # add GDAL functionality
    dataset = addGDALtoDataset(dataset,
                               griddef=griddef,
                               projection=projection,
                               geotransform=geotransform,
                               gridfolder=None,
                               lwrap360=None,
                               geolocator=lgeolocator,
                               lforce=False)
    # N.B.: for some reason we also need to pass the geotransform, otherwise it is recomputed internally and some consistency
    #       checks fail due to machine-precision differences

    # return GDAL-enabled Dataset
    return dataset
Esempio n. 16
0
def loadGageStation(basin=None, station=None, varlist=None, varatts=None, mode='climatology', 
                    aggregation=None, filetype='monthly', folder=None, name=None, period=None,
                    basin_list=None, lcheck=True, lexpand=True, lfill=True, lflatten=True,
                    lkgs=True, scalefactors=None, title=None):
  ''' function to load hydrograph climatologies and timeseries for a given basin '''
  ## resolve input
  if mode == 'timeseries' and aggregation: 
    raise ArgumentError('Timeseries does not support aggregation.')
  # get GageStation instance
  station = getGageStation(basin=basin, station=station, name=name, folder=folder, 
                           river=None, basin_list=basin_list, lcheck=True)
  # variable attributes
  if varlist is None: varlist = variable_list
  elif not isinstance(varlist,(list,tuple)): raise TypeError  
  varlist = list(varlist) # make copy of varlist to avoid interference
  if varatts is None: 
    if aggregation is None: varatts = variable_attributes_kgs if lkgs else variable_attributes_mms
    else: varatts = agg_varatts_kgs if lkgs else agg_varatts_mms
  elif not isinstance(varatts,dict): raise TypeError
  
  ## read csv data
  # time series data and time coordinates
  lexpand = True; lfill = True
  if mode == 'climatology': lexpand = False; lfill = False; lflatten = False
  data, time = station.getTimeseriesData(units='kg/s' if lkgs else 'm^3/s', lcheck=True, lexpand=lexpand, 
                                         lfill=lfill, period=period, lflatten=lflatten)
  # station meta data
  metadata = station.getMetaData(lcheck=True)
  den = metadata['shp_area'] if lkgs else ( metadata['shp_area'] / 1000. )
  ## create dataset for station
  dataset = Dataset(name='WSC', title=title or metadata['Station Name'], varlist=[], atts=metadata,) 
  if mode.lower() in ('timeseries','time-series'): 
    time = time.flatten(); data = data.flatten() # just to make sure...
    # make time axis based on time coordinate from csv file
    timeAxis = Axis(name='time', units='month', coord=time, # time series centered at 1979-01
                    atts=dict(long_name='Month since 1979-01'))
    dataset += timeAxis
    # load mean discharge
    dataset += Variable(axes=[timeAxis], data=data, atts=varatts['discharge'])
    # load mean runoff
    doa = data / den 
    dataset += Variable(axes=[timeAxis], data=doa, atts=varatts['runoff'])
  elif mode == 'climatology': 
    # N.B.: this is primarily for backwards compatibility; it should not be used anymore...
    # make common time axis for climatology
    te = 12 # length of time axis: 12 month
    climAxis = Axis(name='time', units='month', length=12, coord=np.arange(1,te+1,1)) # monthly climatology
    dataset.addAxis(climAxis, copy=False)
    # extract variables (min/max/mean are separate variables)
    # N.B.: this is mainly for backwards compatibility
    doa = data / den
    if aggregation is None or aggregation.lower() == 'mean':
      # load mean discharge
      tmpdata = nf.nanmean(data, axis=0)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discharge'])
      dataset.addVariable(tmpvar, copy=False)
      # load mean runoff
      tmpdata = nf.nanmean(doa, axis=0)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['runoff'])
      dataset.addVariable(tmpvar, copy=False)
    if aggregation is None or aggregation.lower() == 'std':
      # load  discharge standard deviation
      tmpdata = nf.nanstd(data, axis=0, ddof=1) # very few values means large uncertainty!
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discstd'])
      dataset.addVariable(tmpvar, copy=False)
      # load  runoff standard deviation
      tmpdata = nf.nanstd(doa, axis=0, ddof=1)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_std'])
      dataset.addVariable(tmpvar, copy=False)
    if aggregation is None or aggregation.lower() == 'sem':
      # load  discharge standard deviation
      tmpdata = nf.nansem(data, axis=0, ddof=1) # very few values means large uncertainty!
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discsem'])
      dataset.addVariable(tmpvar, copy=False)
      # load  runoff standard deviation
      tmpdata = nf.nansem(doa, axis=0, ddof=1)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_sem'])
      dataset.addVariable(tmpvar, copy=False)
    if aggregation is None or aggregation.lower() == 'max':
      # load maximum discharge
      tmpdata = nf.nanmax(data, axis=0)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discmax'])
      dataset.addVariable(tmpvar, copy=False)
      # load maximum runoff
      tmpdata = nf.nanmax(doa, axis=0)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_max'])
      dataset.addVariable(tmpvar, copy=False)
    if aggregation is None or aggregation.lower() == 'min':
      # load minimum discharge
      tmpdata = nf.nanmin(data, axis=0)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['discmin'])
      dataset.addVariable(tmpvar, copy=False)
      # load minimum runoff
      tmpdata = nf.nanmin(doa, axis=0)
      tmpvar = Variable(axes=[climAxis], data=tmpdata, atts=varatts['roff_min'])
      dataset.addVariable(tmpvar, copy=False)
  else: 
    raise NotImplementedError, "Time axis mode '{}' is not supported.".format(mode)
  # adjust scalefactors, if necessary
  if scalefactors:
      if isinstance(scalefactors,dict):
          dataset = updateScalefactor(dataset, varlist=scalefactors, scalefactor=None)
      elif isNumber(scalefactors):
          scalelist = ('discharge','StdDisc','SEMDisc','MaxDisc','MinDisc',)
          dataset = updateScalefactor(dataset, varlist=scalelist, scalefactor=scalefactors)
      else: 
          raise TypeError(scalefactors) 
  # return station dataset
  return dataset