Beispiel #1
0
def loadDBstation(dbfile,stationID,varname,timeinfo=None,filttype=None,cutoff=3600.0,output_meta=False):
    """
    Load station data from a database file
    
    Inputs:
        dbfile - location of database file
        stationID - Station ID in database
        varname - variable name e.g. 'waterlevel', 'discharge', 'salinity'
        
        timeinfo (optional) - tuple with (starttime,endtime,dt). Format 'yyyymmdd.HHMMSS'
            Use this to interpolate onto a constant time vector
        filttype (optional) - 'low' or 'high' 
            Set this to filter data
            
    Returns:
        timeseries object
        -1 on error
            
    """
    from netcdfio import queryNC
    
    outvar = ['NetCDF_Filename','NetCDF_GroupID','StationName']
    tablename = 'observations'
    #condition = 'Variable_Name = "%s" and StationID = "%s"' % (varname,stationID)
    condition = 'Variable_Name = "%s" and StationID LIKE "%%%s"' % (varname,stationID)
    
    print 'Querying database...'
    print condition
    data, query = queryNC(dbfile,outvar,tablename,condition)  
    
    if len(data)==0:
        print '!!! Warning - Did not find any stations matching query. Returning -1 !!!'
        return -1
    else:
        ts = timeseries(data[0]['time'],data[0][varname].squeeze())
        
        
    if not timeinfo==None:
        print 'Interpolating station data between %s and %s\n'%(timeinfo[0],timeinfo[1])
        tnew,ynew = ts.interp((timeinfo[0],timeinfo[1],timeinfo[2]))
        ts = timeseries(tnew,ynew)
        ts.dt = timeinfo[2] # This needs updating
        
    if not filttype==None:
        print '%s-pass filtering output data. Cutoff period = %f [s].'%(filttype,cutoff)
        yfilt = ts.filt(cutoff,btype=filttype,axis=-1)
        ts.y = yfilt.copy()
    
    if output_meta:
        if data[0].has_key('elevation'):
            ele = data[0]['elevation']
        else:
            ele = 0.0
        meta = {'longitude':data[0]['longitude'],'latitude':data[0]['latitude'],'elevation':ele,'StationName':query['StationName'][0]}
        return ts, meta        
    else:
        return ts
Beispiel #2
0
def interpWeatherStations(latlon,tstart,tend,dt,utmzone,dbfile, maxgap=40, showplot=False):
    """ Temporally interpolate weather station data onto a specified time grid"""
    
    ###
    # Convert to datetime format
    timestart = datetime.strptime(tstart,'%Y%m%d')
    timeend = datetime.strptime(tend,'%Y%m%d')
    
    # Create the time variable
    timeList = []
    tnow=timestart
    while tnow<timeend:
        timeList.append(tnow)
        tnow += timedelta(hours=dt)
    
    nctime = convertTime(timeList)
    ntime = len(timeList)
        
    varnames = ['Tair','Pair','Uwind','Vwind','RH','rain','cloud']
    
    coords={}
    output = {}
    # Read in the semi-processed data
    for vv in varnames:
        print 'Interpolating variable %s...'%vv
        outvar = ['NetCDF_Filename','NetCDF_GroupID','StationName']
        tablename = 'observations'
        condition = 'Variable_Name = "%s"' % vv + \
            'and time_start <= "%s"'% datetime.strftime(timestart,'%Y-%m-%d %H:%M:%S') + \
            'and time_end >= "%s"'% datetime.strftime(timeend,'%Y-%m-%d %H:%M:%S') + \
            'and lon_start >= %3.6f '%latlon[0] + 'and lon_end <= %3.6f '%latlon[1] + \
            'and lat_start >= %3.6f '%latlon[2] + 'and lat_end <= %3.6f '%latlon[3]
        
        data, query = netcdfio.queryNC(dbfile,outvar,tablename,condition)
        #print data[0].keys()

        ii=0   
        for dd in data:
            ind = np.isfinite(np.ravel(dd[vv]))
            timenow = convertTime(dd['time'])
            timegood = timenow[ind]  
            if nctime[0] <= timegood[0] or nctime[-1] >= timegood[-1]:
                data.pop(ii) 
            else:
                ii+=1    

        # Remove points that have large gaps
        ii=0  

        for dd in data:
            ind = np.isfinite(dd[vv])
            timenow = convertTime(dd['time'])
            
            i=-1
            for t in timenow:
                i+=1
                if t < nctime[0]:
                    t1=i
                   
            i=-1
            for t in timenow:
                i+=1
                if t < nctime[-1]:
                    t2=i
                      
            # Find the maximum gap size between the two time limits
            gapsize = 0
            gap=0
            for gg in ind[t1:t2]:
                if ~gg:
                    gap+=1
                    if gap > gapsize:
                        gapsize=gap
                else:
                    gap = 0   
            #print t1,t2,len(timenow),gapsize
            if gapsize > maxgap:
                print 'Removing data point - gap size %d is > %d'%(gapsize,maxgap)
                data.pop(ii)
            else:
                ii+=1
                
            #print gapsize,percgood
           

        # Work out the number of spatial points of each variable based on quality control
        coords['x_'+vv] = []
        coords['y_'+vv] = []
        coords['z_'+vv] = []              
        
        for dd in data:
            # Convert to utm
            ll = np.hstack((dd['longitude'],dd['latitude']))
            xy = ll2utm(ll,utmzone)
            coords['x_'+vv].append(xy[0][0])
            coords['y_'+vv].append(xy[0][1])
            #coords['x_'+vv].append(dd['longitude'])
            #coords['y_'+vv].append(dd['latitude'])
            coords['z_'+vv].append(dd['elevation'])
        
        varlen = len(data)
        
        # Initialize the output arrays
        output[vv] = {'Data':np.zeros((ntime,varlen))}
        
        # Loop trough and interpolate each variables onto the time array
        ctr=0
        for dd in data:
            # Interpolate the data 
            tmp = np.array(np.ravel(dd[vv]))
            timenow = convertTime(dd['time'])
            ind = np.isfinite(tmp)
            F = interpolate.interp1d(timenow[ind],tmp[ind],kind='linear')
            varinterp = F(nctime)
            #F = interpolate.InterpolatedUnivariateSpline(timenow[ind],tmp[ind])
            #varinterp = F(nctime)
            #F = interpolate.splrep(timenow[ind],tmp[ind],s=0)
            #varinterp = interpolate.splev(nctime,F,der=0)
            #F = interpolate.Rbf(timenow[ind],tmp[ind])
            #varinterp = F(nctime)
            output[vv]['Data'][:,ctr]=varinterp
            ctr+=1
            
            # Add the other info
            #output[vv].update({'long_name':dd[vv]['Longname'],'units':dd[vv]['Units']})

        
            if showplot:
                plt.figure()
                plt.hold('on')
                plt.plot(timenow,tmp)
                plt.plot(nctime,varinterp,'r')
                plt.title(dd['StationName']+' - '+vv)
                plt.show()
            
    # Return the data
    return coords, output, nctime
Beispiel #3
0
def QueryNC(dbfile,staname=None,yearrange=None,cons=None):
    """
    Query the tidal station data
    
    """
    outvar = ['NetCDF_Filename','NetCDF_GroupID','StationName','StationID']
    tablename = 'observations'
    #condition = "Variable_Name = '%s' and (StationName = '%s' or StationName = '%s' or StationName = '%s')" % (varname,staname1,staname2,staname3 )
    
    # Create the query
    varname1 = 'ssh_amp'
    varname2 = 'ssh_phs'
    if staname == None and yearrange == None:
        condition = "(Variable_Name = '%s' or Variable_Name = '%s')"%(varname1,varname2)
        ydim = None
    elif not staname == None and yearrange == None:
        condition = "(Variable_Name = '%s' or Variable_Name = '%s') and StationName = '%s'"%(varname1,varname2, staname)
        
        ydim = 'year'
    elif not staname == None and not yearrange == None:
        t1 = '%s-01-01 00:00:00'%yearrange[0]
        t2 = '%4d-01-01 00:00:00'%yearrange[1]
        condition = "(Variable_Name = '%s' or Variable_Name = '%s') and StationName = '%s' and time_start > %s and time_start < %s"%(varname1,varname2,staname,t1,t2)
        
        ydim = None
    elif staname == None and not yearrange == None:
        t1 = '%s-01-01 00:00:00'%yearrange[0]
        t2 = '%4d-01-01 00:00:00'%yearrange[1]
        condition = "(Variable_Name = '%s' or Variable_Name = '%s') and time_start > '%s' and time_start < '%s'"%(varname1,varname2,t1,t2)
        ydim = 'station'
        
    data, query = netcdfio.queryNC(dbfile,outvar,tablename,condition,fastmode=True)
    
    # Read the constituents from the netcdf file
    ncfile = query['NetCDF_Filename'][0]
    nc = Dataset(ncfile,'r')
    #names =  nc.__dict__.keys()
    names = nc.Tidal_Constituents.split(', ')
    #print nc.['Tidal Constituents']
    nc.close()
    
    # Find the constituent indices
    if cons == None:
        cons=names
    ind = []
    for nn in cons:
        ind.append((i for i, j in enumerate(names) if j == nn).next())                
     
    # Output the query data into a nicer format 
    #amp = [if dd.has_key('ssh_amp'): dd['ssh_amp'][[1,3,8]].ravel() for dd in data]
    amp = []
    phs = []
    time = []
    lon = []
    lat = []
    StationName=[]
    for ii,dd in enumerate(data):
        if dd.has_key('ssh_amp'): 
            amp.append(dd['ssh_amp'][ind].ravel())
            
            if ydim == 'year':
                time.append(dd['time'][0])
            elif ydim == 'station':
                lon.append(dd['longitude'])
                lat.append(dd['latitude'])
                StationName.append(query['StationName'][ii])
            else:
                lon.append(dd['longitude'])
                lat.append(dd['latitude'])
                StationName.append(query['StationName'][ii])
                time.append(dd['time'][0])

                
    
        if dd.has_key('ssh_phs'): 
            phs.append(dd['ssh_phs'][ind].ravel())
        
    amp = np.array(amp)
    phs = np.array(phs)
    
    # Get the time and station coordinates
    if ydim == 'station':
        time = data[0]['time'][0]
        lon = np.array(lon)
        lat = np.array(lat)
        
    elif ydim == 'year':
        lon = data[0]['longitude']
        lat = data[0]['latitude']
        StationName = query['StationName'][0]
        time = np.array(time)   
        
        
    return amp, phs, time, lon, lat, StationName, cons
Beispiel #4
0
def interpWeatherStations(latlon,
                          tstart,
                          tend,
                          dt,
                          utmzone,
                          dbfile,
                          maxgap=40,
                          showplot=False):
    """ Temporally interpolate weather station data onto a specified time grid"""

    ###
    # Convert to datetime format
    timestart = datetime.strptime(tstart, '%Y%m%d')
    timeend = datetime.strptime(tend, '%Y%m%d')

    # Create the time variable
    timeList = []
    tnow = timestart
    while tnow < timeend:
        timeList.append(tnow)
        tnow += timedelta(hours=dt)

    nctime = convertTime(timeList)
    ntime = len(timeList)

    varnames = ['Tair', 'Pair', 'Uwind', 'Vwind', 'RH', 'rain', 'cloud']

    coords = {}
    output = {}
    # Read in the semi-processed data
    for vv in varnames:
        print('Interpolating variable %s...' % vv)
        outvar = ['NetCDF_Filename', 'NetCDF_GroupID', 'StationName']
        tablename = 'observations'
        condition = 'Variable_Name = "%s"' % vv + \
            'and time_start <= "%s"'% datetime.strftime(timestart,'%Y-%m-%d %H:%M:%S') + \
            'and time_end >= "%s"'% datetime.strftime(timeend,'%Y-%m-%d %H:%M:%S') + \
            'and lon_start >= %3.6f '%latlon[0] + 'and lon_end <= %3.6f '%latlon[1] + \
            'and lat_start >= %3.6f '%latlon[2] + 'and lat_end <= %3.6f '%latlon[3]

        data, query = netcdfio.queryNC(dbfile, outvar, tablename, condition)
        #print data[0].keys()

        ii = 0
        for dd in data:
            ind = np.isfinite(np.ravel(dd[vv]))
            timenow = convertTime(dd['time'])
            timegood = timenow[ind]
            if nctime[0] <= timegood[0] or nctime[-1] >= timegood[-1]:
                data.pop(ii)
            else:
                ii += 1

        # Remove points that have large gaps
        ii = 0

        for dd in data:
            ind = np.isfinite(dd[vv])
            timenow = convertTime(dd['time'])

            i = -1
            for t in timenow:
                i += 1
                if t < nctime[0]:
                    t1 = i

            i = -1
            for t in timenow:
                i += 1
                if t < nctime[-1]:
                    t2 = i

            # Find the maximum gap size between the two time limits
            gapsize = 0
            gap = 0
            for gg in ind[t1:t2]:
                if ~gg:
                    gap += 1
                    if gap > gapsize:
                        gapsize = gap
                else:
                    gap = 0
            #print t1,t2,len(timenow),gapsize
            if gapsize > maxgap:
                print('Removing data point - gap size %d is > %d' %
                      (gapsize, maxgap))
                data.pop(ii)
            else:
                ii += 1

            #print gapsize,percgood

        # Work out the number of spatial points of each variable based on quality control
        coords['x_' + vv] = []
        coords['y_' + vv] = []
        coords['z_' + vv] = []

        for dd in data:
            # Convert to utm
            ll = np.hstack((dd['longitude'], dd['latitude']))
            xy = ll2utm(ll, utmzone)
            coords['x_' + vv].append(xy[0][0])
            coords['y_' + vv].append(xy[0][1])
            #coords['x_'+vv].append(dd['longitude'])
            #coords['y_'+vv].append(dd['latitude'])
            coords['z_' + vv].append(dd['elevation'])

        varlen = len(data)

        # Initialize the output arrays
        output[vv] = {'Data': np.zeros((ntime, varlen))}

        # Loop trough and interpolate each variables onto the time array
        ctr = 0
        for dd in data:
            # Interpolate the data
            tmp = np.array(np.ravel(dd[vv]))
            timenow = convertTime(dd['time'])
            ind = np.isfinite(tmp)
            F = interpolate.interp1d(timenow[ind], tmp[ind], kind='linear')
            varinterp = F(nctime)
            #F = interpolate.InterpolatedUnivariateSpline(timenow[ind],tmp[ind])
            #varinterp = F(nctime)
            #F = interpolate.splrep(timenow[ind],tmp[ind],s=0)
            #varinterp = interpolate.splev(nctime,F,der=0)
            #F = interpolate.Rbf(timenow[ind],tmp[ind])
            #varinterp = F(nctime)
            output[vv]['Data'][:, ctr] = varinterp
            ctr += 1

            # Add the other info
            #output[vv].update({'long_name':dd[vv]['Longname'],'units':dd[vv]['Units']})

            if showplot:
                plt.figure()
                plt.hold('on')
                plt.plot(timenow, tmp)
                plt.plot(nctime, varinterp, 'r')
                plt.title(dd['StationName'] + ' - ' + vv)
                plt.show()

    # Return the data
    return coords, output, nctime
Beispiel #5
0
def loadDBstation(dbfile,
                  stationID,
                  varname,
                  timeinfo=None,
                  filttype=None,
                  cutoff=3600.0,
                  output_meta=False,
                  method='linear'):
    """
    Load station data from a database file
    
    Inputs:
        dbfile - location of database file
        stationID - Station ID in database
        varname - variable name e.g. 'waterlevel', 'discharge', 'salinity'
        
        timeinfo (optional) - tuple with (starttime,endtime,dt). Format 'yyyymmdd.HHMMSS'
            Use this to interpolate onto a constant time vector
        filttype (optional) - 'low' or 'high' 
            Set this to filter data
            
    Returns:
        timeseries object
        -1 on error
            
    """
    from netcdfio import queryNC

    outvar = ['NetCDF_Filename', 'NetCDF_GroupID', 'StationName']
    tablename = 'observations'
    #condition = 'Variable_Name = "%s" and StationID = "%s"' % (varname,stationID)
    condition = 'Variable_Name = "%s" and StationID LIKE "%%%s"' % (varname,
                                                                    stationID)

    print 'Querying database...'
    print condition
    data, query = queryNC(dbfile, outvar, tablename, condition)

    yout = data[0][varname].squeeze()
    # Zero nan
    yout[np.isnan(yout)] = 0.0

    if len(data) == 0:
        print '!!! Warning - Did not find any stations matching query. Returning -1 !!!'
        return -1
    else:
        ts = timeseries(data[0]['time'], yout)

    if not timeinfo == None:
        print 'Interpolating station data between %s and %s\n' % (timeinfo[0],
                                                                  timeinfo[1])
        tnew,ynew =\
            ts.interp((timeinfo[0],timeinfo[1],timeinfo[2]),method=method)
        ts = timeseries(tnew, ynew)
        ts.dt = timeinfo[2]  # This needs updating

    if not filttype == None:
        print '%s-pass filtering output data. Cutoff period = %f [s].' % (
            filttype, cutoff)
        yfilt = ts.filt(cutoff, btype=filttype, axis=-1)
        ts.y = yfilt.copy()

    if output_meta:
        if data[0].has_key('elevation'):
            ele = data[0]['elevation']
        else:
            ele = np.array([0.0])
        meta = {
            'longitude': data[0]['longitude'],
            'latitude': data[0]['latitude'],
            'elevation': ele,
            'StationName': query['StationName'][0]
        }
        return ts, meta
    else:
        return ts