def loadDBstation(dbfile,stationID,varname,timeinfo=None,filttype=None,cutoff=3600.0,output_meta=False): """ Load station data from a database file Inputs: dbfile - location of database file stationID - Station ID in database varname - variable name e.g. 'waterlevel', 'discharge', 'salinity' timeinfo (optional) - tuple with (starttime,endtime,dt). Format 'yyyymmdd.HHMMSS' Use this to interpolate onto a constant time vector filttype (optional) - 'low' or 'high' Set this to filter data Returns: timeseries object -1 on error """ from netcdfio import queryNC outvar = ['NetCDF_Filename','NetCDF_GroupID','StationName'] tablename = 'observations' #condition = 'Variable_Name = "%s" and StationID = "%s"' % (varname,stationID) condition = 'Variable_Name = "%s" and StationID LIKE "%%%s"' % (varname,stationID) print 'Querying database...' print condition data, query = queryNC(dbfile,outvar,tablename,condition) if len(data)==0: print '!!! Warning - Did not find any stations matching query. Returning -1 !!!' return -1 else: ts = timeseries(data[0]['time'],data[0][varname].squeeze()) if not timeinfo==None: print 'Interpolating station data between %s and %s\n'%(timeinfo[0],timeinfo[1]) tnew,ynew = ts.interp((timeinfo[0],timeinfo[1],timeinfo[2])) ts = timeseries(tnew,ynew) ts.dt = timeinfo[2] # This needs updating if not filttype==None: print '%s-pass filtering output data. Cutoff period = %f [s].'%(filttype,cutoff) yfilt = ts.filt(cutoff,btype=filttype,axis=-1) ts.y = yfilt.copy() if output_meta: if data[0].has_key('elevation'): ele = data[0]['elevation'] else: ele = 0.0 meta = {'longitude':data[0]['longitude'],'latitude':data[0]['latitude'],'elevation':ele,'StationName':query['StationName'][0]} return ts, meta else: return ts
def interpWeatherStations(latlon,tstart,tend,dt,utmzone,dbfile, maxgap=40, showplot=False): """ Temporally interpolate weather station data onto a specified time grid""" ### # Convert to datetime format timestart = datetime.strptime(tstart,'%Y%m%d') timeend = datetime.strptime(tend,'%Y%m%d') # Create the time variable timeList = [] tnow=timestart while tnow<timeend: timeList.append(tnow) tnow += timedelta(hours=dt) nctime = convertTime(timeList) ntime = len(timeList) varnames = ['Tair','Pair','Uwind','Vwind','RH','rain','cloud'] coords={} output = {} # Read in the semi-processed data for vv in varnames: print 'Interpolating variable %s...'%vv outvar = ['NetCDF_Filename','NetCDF_GroupID','StationName'] tablename = 'observations' condition = 'Variable_Name = "%s"' % vv + \ 'and time_start <= "%s"'% datetime.strftime(timestart,'%Y-%m-%d %H:%M:%S') + \ 'and time_end >= "%s"'% datetime.strftime(timeend,'%Y-%m-%d %H:%M:%S') + \ 'and lon_start >= %3.6f '%latlon[0] + 'and lon_end <= %3.6f '%latlon[1] + \ 'and lat_start >= %3.6f '%latlon[2] + 'and lat_end <= %3.6f '%latlon[3] data, query = netcdfio.queryNC(dbfile,outvar,tablename,condition) #print data[0].keys() ii=0 for dd in data: ind = np.isfinite(np.ravel(dd[vv])) timenow = convertTime(dd['time']) timegood = timenow[ind] if nctime[0] <= timegood[0] or nctime[-1] >= timegood[-1]: data.pop(ii) else: ii+=1 # Remove points that have large gaps ii=0 for dd in data: ind = np.isfinite(dd[vv]) timenow = convertTime(dd['time']) i=-1 for t in timenow: i+=1 if t < nctime[0]: t1=i i=-1 for t in timenow: i+=1 if t < nctime[-1]: t2=i # Find the maximum gap size between the two time limits gapsize = 0 gap=0 for gg in ind[t1:t2]: if ~gg: gap+=1 if gap > gapsize: gapsize=gap else: gap = 0 #print t1,t2,len(timenow),gapsize if gapsize > maxgap: print 'Removing data point - gap size %d is > %d'%(gapsize,maxgap) data.pop(ii) else: ii+=1 #print gapsize,percgood # Work out the number of spatial points of each variable based on quality control coords['x_'+vv] = [] coords['y_'+vv] = [] coords['z_'+vv] = [] for dd in data: # Convert to utm ll = np.hstack((dd['longitude'],dd['latitude'])) xy = ll2utm(ll,utmzone) coords['x_'+vv].append(xy[0][0]) coords['y_'+vv].append(xy[0][1]) #coords['x_'+vv].append(dd['longitude']) #coords['y_'+vv].append(dd['latitude']) coords['z_'+vv].append(dd['elevation']) varlen = len(data) # Initialize the output arrays output[vv] = {'Data':np.zeros((ntime,varlen))} # Loop trough and interpolate each variables onto the time array ctr=0 for dd in data: # Interpolate the data tmp = np.array(np.ravel(dd[vv])) timenow = convertTime(dd['time']) ind = np.isfinite(tmp) F = interpolate.interp1d(timenow[ind],tmp[ind],kind='linear') varinterp = F(nctime) #F = interpolate.InterpolatedUnivariateSpline(timenow[ind],tmp[ind]) #varinterp = F(nctime) #F = interpolate.splrep(timenow[ind],tmp[ind],s=0) #varinterp = interpolate.splev(nctime,F,der=0) #F = interpolate.Rbf(timenow[ind],tmp[ind]) #varinterp = F(nctime) output[vv]['Data'][:,ctr]=varinterp ctr+=1 # Add the other info #output[vv].update({'long_name':dd[vv]['Longname'],'units':dd[vv]['Units']}) if showplot: plt.figure() plt.hold('on') plt.plot(timenow,tmp) plt.plot(nctime,varinterp,'r') plt.title(dd['StationName']+' - '+vv) plt.show() # Return the data return coords, output, nctime
def QueryNC(dbfile,staname=None,yearrange=None,cons=None): """ Query the tidal station data """ outvar = ['NetCDF_Filename','NetCDF_GroupID','StationName','StationID'] tablename = 'observations' #condition = "Variable_Name = '%s' and (StationName = '%s' or StationName = '%s' or StationName = '%s')" % (varname,staname1,staname2,staname3 ) # Create the query varname1 = 'ssh_amp' varname2 = 'ssh_phs' if staname == None and yearrange == None: condition = "(Variable_Name = '%s' or Variable_Name = '%s')"%(varname1,varname2) ydim = None elif not staname == None and yearrange == None: condition = "(Variable_Name = '%s' or Variable_Name = '%s') and StationName = '%s'"%(varname1,varname2, staname) ydim = 'year' elif not staname == None and not yearrange == None: t1 = '%s-01-01 00:00:00'%yearrange[0] t2 = '%4d-01-01 00:00:00'%yearrange[1] condition = "(Variable_Name = '%s' or Variable_Name = '%s') and StationName = '%s' and time_start > %s and time_start < %s"%(varname1,varname2,staname,t1,t2) ydim = None elif staname == None and not yearrange == None: t1 = '%s-01-01 00:00:00'%yearrange[0] t2 = '%4d-01-01 00:00:00'%yearrange[1] condition = "(Variable_Name = '%s' or Variable_Name = '%s') and time_start > '%s' and time_start < '%s'"%(varname1,varname2,t1,t2) ydim = 'station' data, query = netcdfio.queryNC(dbfile,outvar,tablename,condition,fastmode=True) # Read the constituents from the netcdf file ncfile = query['NetCDF_Filename'][0] nc = Dataset(ncfile,'r') #names = nc.__dict__.keys() names = nc.Tidal_Constituents.split(', ') #print nc.['Tidal Constituents'] nc.close() # Find the constituent indices if cons == None: cons=names ind = [] for nn in cons: ind.append((i for i, j in enumerate(names) if j == nn).next()) # Output the query data into a nicer format #amp = [if dd.has_key('ssh_amp'): dd['ssh_amp'][[1,3,8]].ravel() for dd in data] amp = [] phs = [] time = [] lon = [] lat = [] StationName=[] for ii,dd in enumerate(data): if dd.has_key('ssh_amp'): amp.append(dd['ssh_amp'][ind].ravel()) if ydim == 'year': time.append(dd['time'][0]) elif ydim == 'station': lon.append(dd['longitude']) lat.append(dd['latitude']) StationName.append(query['StationName'][ii]) else: lon.append(dd['longitude']) lat.append(dd['latitude']) StationName.append(query['StationName'][ii]) time.append(dd['time'][0]) if dd.has_key('ssh_phs'): phs.append(dd['ssh_phs'][ind].ravel()) amp = np.array(amp) phs = np.array(phs) # Get the time and station coordinates if ydim == 'station': time = data[0]['time'][0] lon = np.array(lon) lat = np.array(lat) elif ydim == 'year': lon = data[0]['longitude'] lat = data[0]['latitude'] StationName = query['StationName'][0] time = np.array(time) return amp, phs, time, lon, lat, StationName, cons
def interpWeatherStations(latlon, tstart, tend, dt, utmzone, dbfile, maxgap=40, showplot=False): """ Temporally interpolate weather station data onto a specified time grid""" ### # Convert to datetime format timestart = datetime.strptime(tstart, '%Y%m%d') timeend = datetime.strptime(tend, '%Y%m%d') # Create the time variable timeList = [] tnow = timestart while tnow < timeend: timeList.append(tnow) tnow += timedelta(hours=dt) nctime = convertTime(timeList) ntime = len(timeList) varnames = ['Tair', 'Pair', 'Uwind', 'Vwind', 'RH', 'rain', 'cloud'] coords = {} output = {} # Read in the semi-processed data for vv in varnames: print('Interpolating variable %s...' % vv) outvar = ['NetCDF_Filename', 'NetCDF_GroupID', 'StationName'] tablename = 'observations' condition = 'Variable_Name = "%s"' % vv + \ 'and time_start <= "%s"'% datetime.strftime(timestart,'%Y-%m-%d %H:%M:%S') + \ 'and time_end >= "%s"'% datetime.strftime(timeend,'%Y-%m-%d %H:%M:%S') + \ 'and lon_start >= %3.6f '%latlon[0] + 'and lon_end <= %3.6f '%latlon[1] + \ 'and lat_start >= %3.6f '%latlon[2] + 'and lat_end <= %3.6f '%latlon[3] data, query = netcdfio.queryNC(dbfile, outvar, tablename, condition) #print data[0].keys() ii = 0 for dd in data: ind = np.isfinite(np.ravel(dd[vv])) timenow = convertTime(dd['time']) timegood = timenow[ind] if nctime[0] <= timegood[0] or nctime[-1] >= timegood[-1]: data.pop(ii) else: ii += 1 # Remove points that have large gaps ii = 0 for dd in data: ind = np.isfinite(dd[vv]) timenow = convertTime(dd['time']) i = -1 for t in timenow: i += 1 if t < nctime[0]: t1 = i i = -1 for t in timenow: i += 1 if t < nctime[-1]: t2 = i # Find the maximum gap size between the two time limits gapsize = 0 gap = 0 for gg in ind[t1:t2]: if ~gg: gap += 1 if gap > gapsize: gapsize = gap else: gap = 0 #print t1,t2,len(timenow),gapsize if gapsize > maxgap: print('Removing data point - gap size %d is > %d' % (gapsize, maxgap)) data.pop(ii) else: ii += 1 #print gapsize,percgood # Work out the number of spatial points of each variable based on quality control coords['x_' + vv] = [] coords['y_' + vv] = [] coords['z_' + vv] = [] for dd in data: # Convert to utm ll = np.hstack((dd['longitude'], dd['latitude'])) xy = ll2utm(ll, utmzone) coords['x_' + vv].append(xy[0][0]) coords['y_' + vv].append(xy[0][1]) #coords['x_'+vv].append(dd['longitude']) #coords['y_'+vv].append(dd['latitude']) coords['z_' + vv].append(dd['elevation']) varlen = len(data) # Initialize the output arrays output[vv] = {'Data': np.zeros((ntime, varlen))} # Loop trough and interpolate each variables onto the time array ctr = 0 for dd in data: # Interpolate the data tmp = np.array(np.ravel(dd[vv])) timenow = convertTime(dd['time']) ind = np.isfinite(tmp) F = interpolate.interp1d(timenow[ind], tmp[ind], kind='linear') varinterp = F(nctime) #F = interpolate.InterpolatedUnivariateSpline(timenow[ind],tmp[ind]) #varinterp = F(nctime) #F = interpolate.splrep(timenow[ind],tmp[ind],s=0) #varinterp = interpolate.splev(nctime,F,der=0) #F = interpolate.Rbf(timenow[ind],tmp[ind]) #varinterp = F(nctime) output[vv]['Data'][:, ctr] = varinterp ctr += 1 # Add the other info #output[vv].update({'long_name':dd[vv]['Longname'],'units':dd[vv]['Units']}) if showplot: plt.figure() plt.hold('on') plt.plot(timenow, tmp) plt.plot(nctime, varinterp, 'r') plt.title(dd['StationName'] + ' - ' + vv) plt.show() # Return the data return coords, output, nctime
def loadDBstation(dbfile, stationID, varname, timeinfo=None, filttype=None, cutoff=3600.0, output_meta=False, method='linear'): """ Load station data from a database file Inputs: dbfile - location of database file stationID - Station ID in database varname - variable name e.g. 'waterlevel', 'discharge', 'salinity' timeinfo (optional) - tuple with (starttime,endtime,dt). Format 'yyyymmdd.HHMMSS' Use this to interpolate onto a constant time vector filttype (optional) - 'low' or 'high' Set this to filter data Returns: timeseries object -1 on error """ from netcdfio import queryNC outvar = ['NetCDF_Filename', 'NetCDF_GroupID', 'StationName'] tablename = 'observations' #condition = 'Variable_Name = "%s" and StationID = "%s"' % (varname,stationID) condition = 'Variable_Name = "%s" and StationID LIKE "%%%s"' % (varname, stationID) print 'Querying database...' print condition data, query = queryNC(dbfile, outvar, tablename, condition) yout = data[0][varname].squeeze() # Zero nan yout[np.isnan(yout)] = 0.0 if len(data) == 0: print '!!! Warning - Did not find any stations matching query. Returning -1 !!!' return -1 else: ts = timeseries(data[0]['time'], yout) if not timeinfo == None: print 'Interpolating station data between %s and %s\n' % (timeinfo[0], timeinfo[1]) tnew,ynew =\ ts.interp((timeinfo[0],timeinfo[1],timeinfo[2]),method=method) ts = timeseries(tnew, ynew) ts.dt = timeinfo[2] # This needs updating if not filttype == None: print '%s-pass filtering output data. Cutoff period = %f [s].' % ( filttype, cutoff) yfilt = ts.filt(cutoff, btype=filttype, axis=-1) ts.y = yfilt.copy() if output_meta: if data[0].has_key('elevation'): ele = data[0]['elevation'] else: ele = np.array([0.0]) meta = { 'longitude': data[0]['longitude'], 'latitude': data[0]['latitude'], 'elevation': ele, 'StationName': query['StationName'][0] } return ts, meta else: return ts