03.compute_cc.py

import numpy as np
from obspy.core import read, utcdatetime, Stream
from obspy.signal import cosTaper
from obspy.signal.filter import lowpass, highpass
from scikits.samplerate import resample
import time, calendar

from database_tools import *
from myCorr import myCorr
from whiten import whiten

import logging
logging.basicConfig(level=logging.DEBUG,
                    filename="./compute_cc.log",
                    format='%(asctime)s [%(levelname)s] %(message)s',
                    filemode='w')

console = logging.StreamHandler()
console.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)

logging.info('*** Starting: Compute CC ***')

#Connection to the DB
db = connect()

#Get Configuration
components_to_compute = []
for comp in ['ZZ','RR','TT','TR','RT','ZR','RZ','TZ','ZT']:
    if get_config(db, comp) in ['Y','y','1',1]:
        components_to_compute.append(comp)

logging.info("Will compute %s" % " ".join(components_to_compute))

allow_large_concats(db)

goal_sampling_rate = float(get_config(db, "cc_sampling_rate")) # was 20.0
goal_duration = float(get_config(db, "analysis_duration")) #was 86400
maxlag = float(get_config(db, "maxlag"))
min30 = float(get_config(db, "corr_duration")) * goal_sampling_rate
windsorizing= float(get_config(db, "windsorizing"))
resampling_method = get_config(db, "resampling_method")
decimation_factor = int(get_config(db, "decimation_factor"))
preprocess_lowpass = float(get_config(db, "preprocess_lowpass"))
preprocess_highpass = float(get_config(db, "preprocess_highpass"))

keep_all = False
if get_config(db, 'keep_all') in ['Y','y','1',1]:
    keep_all = True

keep_days = False
if get_config(db, 'keep_days') in ['Y','y','1',1]:
    keep_days = True

#Process !
while is_next_job(db,type='CC'):
    job = get_next_job(db,type='CC')
    stations = []
    
    goal_day, pairs, refs = job
    
    
    if pairs.count(',') != 0:
        pairs = pairs.split(',')
        refs = refs.split(',')
    else:
        pairs = [pairs,]
        refs = [refs,]
    
    for pair in pairs:
        netsta1, netsta2 = pair.split(':')
        stations.append(netsta1)
        stations.append(netsta2)
        update_job(db, goal_day, pair, 'CC','I')
        
    
    fi=len(get_filters(db,all=False))
    
    stations = np.unique(stations)
    
    logging.info("New CC Job: %s (%i pairs with %i stations)" % (goal_day,len(pairs),len(stations)))
    jt = time.time()
    
    datafilesZ = {}
    datafilesE = {}
    datafilesN = {}
    
    durations = []
    
    for station in stations:
        datafilesZ[station] = []
        datafilesE[station] = []
        datafilesN[station] = []
        net, sta = station.split('.')
        files = get_filenames(db, goal_day, net, sta)
        for file in files:
            net,sta,comp,path,file,datetime,endtime, duration,samplerate = file
            if comp[-1] == 'Z':
                datafilesZ[station].append(os.path.join(path,file))
            elif comp[-1] == 'E':
                datafilesE[station].append(os.path.join(path,file))
            elif comp[-1] == 'N':
                datafilesN[station].append(os.path.join(path,file))
   
    
    TimeVec = np.arange(0., goal_duration, 1./goal_sampling_rate)
    
    if ''.join(components_to_compute).count('R') > 0 or ''.join(components_to_compute).count('T') > 0:
        comps = ['Z','E','N']
        tramef_Z= np.zeros((len(stations),len(TimeVec)))
        tramef_E= np.zeros((len(stations),len(TimeVec)))
        tramef_N= np.zeros((len(stations),len(TimeVec)))
    else:
        comps = ['Z']
        tramef_Z= np.zeros((len(stations),len(TimeVec)))
  
    
    j = 0
    for istation, station in enumerate(stations):
        for comp in comps:
            files = eval("datafiles%s['%s']"%(comp,station))
            if len(files) != 0:
                logging.debug("%s.%s Reading %i Files" % (station, comp, len(files)))
                stream = Stream()
                for file in sorted(files):
                    st = read(file,format="MSEED")
                    stream += st
                    del st
                stream.merge()
                stream = stream.split()
                for trace in stream:
                    data = trace.data
                    if len(data) > 2:
                        tp = cosTaper(len(data), 0.01 )
                        data -= np.mean(data)
                        data *= tp
                        trace.data = data
                    else:
                        trace.data *= 0
                    del data
                logging.debug("%s.%s Merging Stream" % (station, comp))
                stream.merge(fill_value=0) #fills gaps with 0s and gives only one 'Trace'
                logging.debug("%s.%s Slicing Stream to %s:%s" % (station, comp,utcdatetime.UTCDateTime(goal_day.replace('-','')),utcdatetime.UTCDateTime(goal_day.replace('-',''))+goal_duration-stream[0].stats.delta))
                
                stream[0].trim(utcdatetime.UTCDateTime(goal_day.replace('-','')),utcdatetime.UTCDateTime(goal_day.replace('-',''))+goal_duration-stream[0].stats.delta, pad=True,fill_value=0.0)
                trace = stream[0]

                data = trace.data
                freq = preprocess_lowpass
                logging.debug("%s.%s Lowpass at %.2f Hz" % (station, comp,freq))
                data = lowpass(trace.data, freq, trace.stats.sampling_rate,zerophase=True)
                
                freq = preprocess_highpass
                logging.debug("%s.%s Highpass at %.2f Hz" % (station, comp,freq))
                data = highpass(data, freq, trace.stats.sampling_rate,zerophase=True)

                samplerate = trace.stats['sampling_rate']
                if samplerate != goal_sampling_rate:
                    if resampling_method == "Resample":
                        logging.debug("%s.%s Downsample to %.1f Hz" % (station, comp,goal_sampling_rate))
                        data = resample(data, goal_sampling_rate/trace.stats.sampling_rate, 'sinc_best')
                    elif resampling_method == "Decimate":
                        logging.debug("%s.%s Decimate by a factor of %i" % (station, comp,decimation_factor))
                        data = data[::decimation_factor]
                    
                
                # logging.debug('Data for %s: %s - %s' % (station, trace.stats.starttime , trace.stats.endtime))
                # print 'Data for %s: %s - %s' % (station, trace.stats.starttime , trace.stats.endtime)
                year, month, day, hourf, minf, secf, wday,yday,isdst = trace.stats.starttime.utctimetuple()
                
                TimeVec = np.arange(0., goal_duration, 1./goal_sampling_rate)
                # trame = np.zeros(len(TimeVec))
                
                trame = data
                
                
                if j == 0:
                    FirstFile_TimeSecDebFic = hourf*60*60+minf*60+secf
                    t = time.strptime("%04i:%02i:%02i:%02i:%02i:%02i"%(year,month, day, hourf, minf, secf),"%Y:%m:%d:%H:%M:%S")
                    basetime = calendar.timegm(t)
                    
                # TimeSecDebFic = hourf*60*60+minf*60+secf
                # Relative_TimeSecDebFic = TimeSecDebFic - FirstFile_TimeSecDebFic
                
                # VecDiff=Relative_TimeSecDebFic-TimeVec
                # Valdmin = np.amin(abs(VecDiff))
                # Indmin = np.where(VecDiff==Valdmin)[0][0]
                # if np.round(Valdmin*1e5)/1e5 != 0:
                    # print "Correction decalage en temps"
                    # FFTdata = np.fft.fft(data)
                    # FFTdata[np.ceil(len(data)/2):] *= 0.
                    # VecFre = np.arange(0,len(data)-1) / (samplerate/ (len(data)-1))
                    # FFTcorr = FFTdata * np.exp(1j * 2. * np.pi * VecFre * Valdmin).T
                    # datac=2. * np.real(np.fft.ifft(FFTcorr))
                    
                    # trame[Indmin:len(datac)+Indmin-1]=datac
                # else:
                    # trame[Indmin:Indmin+len(data)]=data
                
                # del VecDiff
                
                if len(trame) % 2 != 0:
                    trame = np.append(trame,0.)
                if comp == "Z":
                    tramef_Z[istation] = trame    
                elif comp == "E":
                    tramef_E[istation] = trame    
                elif comp == "N":
                    tramef_N[istation] = trame    
                
                del data, trace, stream, trame

        
    # print '##### STREAMS ARE ALL PREPARED AT goal Hz #####'
    dt = 1./goal_sampling_rate
    fe = goal_sampling_rate
    #Calculate the number of slices
    tranches = int(goal_duration *fe / min30)
    # print
    # print '##### ITERATING OVER PAIRS #####'
    
    for pair in pairs:
        orig_pair=pair
        logging.debug('Processing pair: %s' % pair.replace(':',' vs '))
        tt = time.time()
        # print ">PROCESSING PAIR %s"%pair.replace(':',' vs ')
        station1, station2 = pair.split(':')
        pair = (np.where(stations == station1)[0][0], np.where(stations == station2)[0][0])
        
        s1 = get_station(db, station1.split('.')[0],station1.split('.')[1])
        s2 = get_station(db, station2.split('.')[0],station2.split('.')[1])
        
        X0 = s1.X
        Y0 = s1.Y
        c0 = s1.coordinates
        
        X1 = s2.X
        Y1 = s2.Y
        c1 = s2.coordinates

        if c0==c1:
            if c0 == 'DEG':
                # print "> I will compute the azimut based on degrees"
                coordinates = 'DEG'
            else:
                # print "> I will compute the azimut based on meters"
                coordinates = 'UTM'
        else:
            # print "> Coordinates type don't match, I will need to compute more stuff !!"
            coordinates = 'MIX'
        # print "X0,Y0 ; X1,Y1:", X0, Y0, X1, Y1
        cplAz = azimuth(coordinates, X0, Y0, X1, Y1)
        
        for components in components_to_compute:
            # we create the two parts of the correlation array checking for the right components :
            if components[0] == "Z":
                t1 = tramef_Z[pair[0]]
            elif components[0] == "R":
                t1 = tramef_N[pair[0]] * np.cos(cplAz*np.pi/180.) + tramef_E[pair[0]] * np.sin(cplAz*np.pi/180.)
            elif components[0] == "T":
                t1 = tramef_N[pair[0]] * np.sin(cplAz*np.pi/180.) - tramef_E[pair[0]] * np.cos(cplAz*np.pi/180.)
            
            if components[1] == "Z":
                t2 = tramef_Z[pair[1]]
            elif components[1] == "R":
                t2 = tramef_N[pair[1]] * np.cos(cplAz*np.pi/180.) + tramef_E[pair[1]] * np.sin(cplAz*np.pi/180.)
            elif components[1] == "T":
                t2 = tramef_N[pair[1]] * np.sin(cplAz*np.pi/180.) - tramef_E[pair[1]] * np.cos(cplAz*np.pi/180.)
        
            trames = np.vstack((t1,t2))
            del t1, t2
            ncorr = 0
            
            daycorr = {}
            ndaycorr = {}
            for filterdb in get_filters(db,all=False):
                filterid = filterdb.ref
                daycorr[filterid] = np.zeros(get_maxlag_samples(db,))
                ndaycorr[filterid] = 0
            
            
            for itranche in range(0,tranches):
                # print "Avancement: %#2d/%2d"% (itranche+1,tranches)
                trame2h = trames[:,itranche*min30:(itranche+1)*min30]
                rmsmat = np.std(np.abs(trame2h),axis=1)
                for filterdb in get_filters(db,all=False):
                    filterid = filterdb.ref
                    low = float(filterdb.low)
                    high = float(filterdb.high)
                    rms_threshold = filterdb.rms_threshold
                    # print "Filter Bounds used:", filterid, low, high
                    trames2hWb= np.zeros(np.shape(trame2h))
                    for i, station in enumerate(pair):
                        # print "USING rms threshold = %f" % rms_threshold
                        # logging.debug("rmsmat[i] = %f" % rmsmat[i])
                        if rmsmat[i] > rms_threshold:
                            if windsorizing != 0:
                                indexes = np.where(np.abs(trame2h[i]) > ( windsorizing*rmsmat[i] ) )[0]
                                #clipping at windsorizing*rms
                                trame2h[i][indexes] = (trame2h[i][indexes]/np.abs(trame2h[i][indexes])) * windsorizing * rmsmat[i]
                            
                            # logging.debug('whiten')
                            trames2hWb[i] = whiten(trame2h[i],min30, dt, low, high, plot=False)
                        else:
                            # logging.debug("Station no %d, pas de pretraitement car rms < %f ou NaN"% (i, rms_threshold))
                            trames2hWb[i] = trame2h[i]
                    
                    corr = myCorr(trames2hWb, np.ceil(maxlag/dt),plot=False)
                    thisdate = time.strftime("%Y-%m-%d",time.gmtime(basetime+itranche*min30/fe)) 
                    thistime = time.strftime("%H_%M",time.gmtime(basetime+itranche*min30/fe)) 
                    if keep_all:
                        add_corr(db, station1.replace('.','_'), station2.replace('.','_'),filterid, thisdate, thistime, min30/fe, components, corr, fe)
                    
                    if keep_days:
                        if not np.any(np.isnan(corr)) and not np.any(np.isinf(corr)):
                            daycorr[filterid] += corr
                            ndaycorr[filterid] += 1
                    
                    del corr, thistime, trames2hWb
            
            if keep_days:
                try:
                    for filterdb in get_filters(db,all=False):
                        filterid = filterdb.ref
                        corr = daycorr[filterid] 
                        ncorr = ndaycorr[filterid] 
                        if ncorr > 0:
                            logging.debug("Saving daily CCF for filter %02i (stack of %02i CCF)"%(filterid,ncorr))
                            
                            corr /= ncorr
                            thisdate = time.strftime("%Y-%m-%d",time.gmtime(basetime)) 
                            thistime = time.strftime("%H_%M",time.gmtime(basetime))
                            add_corr(db, station1.replace('.','_'), station2.replace('.','_'),filterid, thisdate, thistime, min30/fe, components, corr, fe, day=True,ncorr=ncorr)
                        del corr, ncorr
                except Exception as e:
                    logging.debug(str(e))
            del trames, daycorr, ndaycorr
                    
        update_job(db, goal_day, orig_pair,'CC','D')
        
        logging.debug("Finished processing this pair. It took %.2f seconds"%(time.time()-tt))
    logging.info("Job Finished. It took %.2f seconds" % (time.time() - jt))
logging.info('*** Finished: Compute CC ***')


###EOF