def multiplex(st, Nc=None, trimTolerance=15, template=False, returnlist=False, retst=False): """ Multiplex an obspy stream object Parameters ---------- st : instance of obspy stream The stream containing the data to multiplex. Nc : None or int if not None the number of channels in stream, else try to determine trimTolerance : int The number of samples each channel can vary before being rejected Template : bool If True st is a template waveform, therefore an exception will be raised if trimeTolerance is exceeded returnlist : bool If true also return np array of un-multiplexed data as a list Returns ------ list with multiplexed data and other desired waveforms """ if Nc is None: Nc = len(set([x.stats.station for x in st])) if Nc == 1: # If only one channel do nothing C1 = st[0].data C = st[0].data else: chans = [x.data for x in st] # Data on each channel minlen = np.array([len(x) for x in chans]) if max(minlen) - min(minlen) > trimTolerance: netsta = st[0].stats.network + '.' + st[0].stats.station utc1 = str(st[0].stats.starttime).split('.')[0] utc2 = str(st[0].stats.endtime).split('.')[0] msg = ('Channel lengths are not within %d on %s from %s to %s' % (trimTolerance, netsta, utc1, utc2)) if template: detex.log(__name__, msg, level='error') else: msg = msg + ' trimming to shortest channel' detex.log(__name__, msg, level='warning', pri=True) trimDim = min(minlen) # trim to smalles dimension chansTrimed = [x[:trimDim] for x in chans] elif max(minlen) - min(minlen) > 0: # if all channels not equal lengths trimDim = min(minlen) chansTrimed = [x[:trimDim] for x in chans] # trim to shortest elif max(minlen) - min(minlen) == 0: # if chan lengths are exactly equal chansTrimed = chans C = np.vstack((chansTrimed)) C1 = np.ndarray.flatten(C, order='F') out = [C1] # init output list if returnlist: out.append(C) if retst: out.append(st) if len(out) == 1: return out[0] else: return out
def _divideIntoChunks(utc1, utc2, duration, randSamps): """ Function to take two utc date time objects and create a generator to yield all time in between by intercals of duration. If randSamps is not None it will return a random subsample, still divisible by randSamps to make loading files easier. The randSamps parameter can at most rep. Inputs can be any obspy readable format """ utc1 = obspy.UTCDateTime(utc1) utc2 = obspy.UTCDateTime(utc2) # convert to time stamps (epoch time) ts1 = utc1.timestamp - utc1.timestamp % duration ts2 = utc2.timestamp - utc2.timestamp % duration if randSamps is None: t = ts1 while t <= ts2: yield obspy.UTCDateTime(t) # yield a value t += duration # add an hour else: utcList = np.arange(utc1.timestamp, utc2.timestamp, duration) if randSamps > len(utcList) / 4: msg = ('Population too small for %d random samples, taking %d' % ( randSamps, len(utcList))) detex.log(__name__, msg, level='info') randSamps = len(utcList) ranutc = random.sample(utcList, randSamps) rsamps = [obspy.UTCDateTime(x) for x in ranutc] for samp in rsamps: yield samp
def _ensureUnique(cx, cxdf): """ Make sure each coeficient is unique so it can be used as a key to reference time lags, if not unique perturb slightly """ se = pd.Series(cx) dups = se[se.duplicated()] count = 0 while len(dups) > 0: msg = ('Duplicates found in correlation coefficients,' 'perturbing slightly to get unique values') detex.log(__name__, msg, level='warning', pri=True) for a in dups.iteritems(): se[a[0]] = a[1] - abs(.00001 * np.random.rand()) count += 1 dups = se[se.duplicated()] if count > 10: msg = 'cannot make Coeficients unique, killing program' detex.log(__name__, msg, level='error') if count > 1: # if the cx has been perturbed update cxdf for a in range(len(cxdf)): sindex = sum(pd.isnull(a[1])) tri1 = _triangular(len(cxdf)) tri2 = _triangular(len(cxdf) - a) tri3 = _triangular(len(cxdf) - (a + 1)) cxdf.values[a, sindex:] = cx[tri1 - tri2, tri1 - tri3] return se.values, cxdf
def _checkClusterInputs(filt, dtype, trim, decimate): """ Check a few key input parameters to make sure everything is kosher """ if filt is not None and len(filt) != 4: # check filt msg = 'filt must either be None (no filter) or a len 4 list or tuple' detex.log(__name__, msg, level='error') if dtype != 'double' and dtype != 'single': # check dtype msg = ('dype must be either "double" or "single" not %s, setting to \ double' % dtype) dtype = 'double' detex.log(__name__, msg, level='warn', pri=True) if trim is not None: # check trim if len(trim) != 2: msg = 'Trim must be a list or tuple of length 2' detex.log(__name__, msg, level='warn', pri=True) else: if -trim[0] > trim[1]: msg = 'Invalid trim parameters' detex.log(__name__, msg, level='error') if decimate is not None: if not isinstance(decimate, int): msg = 'decimate must be an int' detex.log(__name__, msg, level='error', e=TypeError)
def _checkSTALTA(st, filt, STATime, LTATime, limit): """ Take a stream and make sure it's vert. component (or first comp if no vert) does not exceed limit given STATime and LTATime Return True if passes, false if fails """ if limit is None: return True if len(st) < 1: return None try: stz = st.select(component='Z')[0] except IndexError: # if no Z found on trace return None if len(stz) < 1: stz = st[0] sz = stz.copy() sr = sz.stats.sampling_rate ltaSamps = LTATime * sr staSamps = STATime * sr cft = classic_sta_lta(sz.data, staSamps, ltaSamps) if np.max(cft) <= limit: return True else: sta = sz.stats.station t1 = sz.stats.starttime t2 = sz.stats.endtime msg = ('%s fails sta/lta req of %d between %s and %s' % (sta, limit, t1, t2)) detex.log(__name__, msg, level='warn') return False
def _subSamp(Ceval, ind): """ Method to estimate subsample time delays using cosine-fit interpolation Cespedes, I., Huang, Y., Ophir, J. & Spratt, S. Methods for estimation of sub-sample time delays of digitized echo signals. Ultrason. Imaging 17, 142–171 (1995) Returns ------- The amount the sample should be shifted (float between -.5 and .5) """ # If max occurs at start or end of CC no extrapolation if ind == 0 or ind == len(Ceval) - 1: tau = 0.0 else: cb4 = Ceval[ind - 1] caf = Ceval[ind + 1] cn = Ceval[ind] alpha = np.arccos((cb4 + caf) / (2 * cn)) alsi = np.sin(alpha) tau = -(np.arctan((cb4 - caf) / (2 * cn * alsi)) / alpha) if abs(tau) > .5: msg = ('subsample failing, more than .5 sample shift predicted') detex.log(__name__, msg, level='Warning', pri=True) return ind return tau
def writeKMLFromStationKey(df='StationKey.csv', outname='stations.kml'): """ Write a KML file from a station key Parameters ------------- DF : str or pandas Dataframe If str then the path to the station key. If dataframe then station key loaded with readKey function with key_type='template' outname : str name of the kml file """ if isinstance(df, string_types): df = pd.read_csv(df) elif not isinstance(df, pd.DataFrame): msg = ('Input type not understood, must be path to station key or ' 'dataframe of station key') detex.log(__name__, msg, level='error') kml = simplekml.Kml(open=1) for a in df.iterrows(): pnt = kml.newpoint() pnt.name = str(a[1].STATION) pnt.coords = [(a[1].LON, a[1].LAT)] # print(a[1].STATION,a[1].LON,a[1].LAT) kml.save(outname)
def _CreateCoeffArray(self, corSeries, name, threshold, sta, offsets, mags, ewf, MPcon, events, ssTD, WFU, UtU): """ function to create an array of results for each detection, including time of detection, estimated magnitude, etc. """ dpv = 0 cols = [ 'DS', 'DS_STALTA', 'STMP', 'Name', 'Sta', 'MSTAMPmin', 'MSTAMPmax', 'Mag', 'SNR', 'ProEnMag' ] sr = corSeries.SampRate # sample rate start = corSeries.TimeStamp # start time of data block # set array to evaluate for successful triggers if self.trigCon == 0: Ceval = corSeries.SSdetect.copy() elif self.trigCon == 1: Ceval = corSeries.STALTA.copy() Sar = pd.DataFrame(columns=cols) count = 0 # while there are any values in the det stat. vect that exceed thresh. while Ceval.max() >= threshold[name]: trigIndex = Ceval.argmax() coef = corSeries.SSdetect[trigIndex] times = float(trigIndex) / sr + start if self.fillZeros: # if zeros are being filled dont try STA/LTA SLValue = 0.0 else: try: SLValue = corSeries.STALTA[trigIndex] except TypeError: SLValue = 0.0 Ceval = self._downPlayArrayAroundMax(Ceval, sr, dpv) # estimate mags else return NaNs as mag estimates if self.estimateMags: # estimate magnitudes M1, M2, SNR = self._estMag(trigIndex, corSeries, MPcon, mags[name], events[name], WFU[name], UtU[name], ewf[name], coef, times, name, sta) peMag, stMag = M1, M2 else: peMag, stMag, SNR = np.NaN, np.NaN, np.NaN # kill switch to prevent infinite loop (just in case) if count > 4000: msg = (('over 4000 events found in single data block on %s for' '%s around %s') % (sta, name, times)) detex.log(__name__, msg, level='error') # get predicted origin time ranges minof = np.min(offsets[name]) maxof = np.max(offsets[name]) MSTAMPmax, MSTAMPmin = times - minof, times - maxof Sar.loc[count] = [ coef, SLValue, times, name, sta, MSTAMPmin, MSTAMPmax, stMag, SNR, peMag ] count += 1 return Sar
def _deleteDetDups(ssDB, trigCon, trigParameter, associateBuffer, starttime, endtime, stations, tableName, PfKey=None): """ delete dections of same event, keep only detection with highest detection statistic """ sslist = [] SQLstr = _buildSQL(PfKey, trigCon, trigParameter, starttime, stations, endtime, tableName) for sql in SQLstr: loadedRes = detex.util.loadSQLite(ssDB, tableName, sql=sql) if isinstance(loadedRes, pd.DataFrame): sslist.append(loadedRes) if len(sslist) < 1: # if no events found return None try: ssdf = pd.concat(sslist, ignore_index=True) except ValueError: msg = 'Cant create detResults instance, no detections meet all reqs' detex.log(__name__, msg, level='error') ssdf.reset_index(drop=True, inplace=True) ssdf.sort_values(by=['Sta', 'MSTAMPmin'], inplace=True) con1 = ((ssdf.MSTAMPmin - associateBuffer) > ssdf.MSTAMPmax.shift()) con2 = ssdf.Sta != ssdf.Sta.shift() ssdf['Gnum'] = (con1 | con2).cumsum() ssdf.sort_values(by=['Gnum', 'DS'], inplace=True) ssdf.drop_duplicates(subset='Gnum', keep='last', inplace=True) ssdf.reset_index(inplace=True, drop=True) return ssdf
def _tryDownloadData(net,sta,chan,loc, utcStart,utcEnd,client): # get data, return False if fail try: st=client.get_waveforms(net,sta,loc,chan,utcStart,utcEnd,attach_response=True) return st except: detex.log(__name__,'Download failed for %s.%s %s from %s to %s' % (net,sta,chan,str(utcStart),str(utcEnd))) return False
def _removeInstrumentResposne(st,prefilt,opType): st.detrend('linear')# detrend st= _fftprep(st) try: st.remove_response(output=opType,pre_filt=prefilt) except: detex.log(__name__,'RemoveResponse Failed for %s,%s, not saving' %(st[0].stats.network,st[0].stats.station),level='warning') st=False return st
def _CreateCoeffArray(self, corSeries, name, threshold, sta, offsets, mags, ewf, MPcon, events, ssTD, WFU, UtU): """ function to create an array of results for each detection, including time of detection, estimated magnitude, etc. """ dpv = 0 cols = ['DS', 'DS_STALTA', 'STMP', 'Name', 'Sta', 'MSTAMPmin', 'MSTAMPmax', 'Mag', 'SNR', 'ProEnMag'] sr = corSeries.SampRate # sample rate start = corSeries.TimeStamp # start time of data block # set array to evaluate for successful triggers if self.trigCon == 0: Ceval = corSeries.SSdetect.copy() elif self.trigCon == 1: Ceval = corSeries.STALTA.copy() Sar = pd.DataFrame(columns=cols) count = 0 # while there are any values in the det stat. vect that exceed thresh. while Ceval.max() >= threshold[name]: trigIndex = Ceval.argmax() coef = corSeries.SSdetect[trigIndex] times = float(trigIndex) / sr + start if self.fillZeros: # if zeros are being filled dont try STA/LTA SLValue = 0.0 else: try: SLValue = corSeries.STALTA[trigIndex] except TypeError: SLValue = 0.0 Ceval = self._downPlayArrayAroundMax(Ceval, sr, dpv) # estimate mags else return NaNs as mag estimates if self.estimateMags: # estimate magnitudes M1, M2, SNR = self._estMag(trigIndex, corSeries, MPcon, mags[name], events[name], WFU[name], UtU[name], ewf[name], coef, times, name, sta) peMag, stMag = M1, M2 else: peMag, stMag, SNR = np.NaN, np.NaN, np.NaN # kill switch to prevent infinite loop (just in case) if count > 4000: msg = (('over 4000 events found in single data block on %s for' '%s around %s') % (sta, name, times)) detex.log(__name__, msg, level='error') # get predicted origin time ranges minof = np.min(offsets[name]) maxof = np.max(offsets[name]) MSTAMPmax, MSTAMPmin = times - minof, times - maxof Sar.loc[count] = [coef, SLValue, times, name, sta, MSTAMPmin, MSTAMPmax, stMag, SNR, peMag] count += 1 return Sar
def _loadDirectoryData(fet, start, end, net, sta, chan, loc): """ Function to load continuous data from the detex directory structure """ # get times with slight buffer t1 = obspy.UTCDateTime(start).timestamp t2 = obspy.UTCDateTime(end).timestamp buf = 3 * fet.conDatDuration dfind = _loadIndexDb(fet.directoryName, net + '.' + sta, t1 - buf, t2 + buf) if dfind is None: t1p = obspy.UTCDateTime(t1) t2p = obspy.UTCDateTime(t2) msg = 'data from %s to %s on %s not found in %s' % (t1p, t2p, sta, fet.directoryName) detex.log(__name__, msg, level='warning', pri=False) return None # define conditions in which condata should not be loaded # con1 and con2 - No overlap (other than 10%) tra = t2 - t1 # time range con1 = ((dfind.Starttime <= t1) & (dfind.Endtime - tra * .1 < t1) & (dfind.Starttime < t2) & (dfind.Endtime < t2)) con2 = ((dfind.Starttime > t1) & (dfind.Endtime > t1) & (dfind.Starttime + tra * .1 > t2) & (dfind.Endtime >= t2)) df = dfind[~(con1 | con2)] if len(df) < 1: t1p = obspy.UTCDateTime(t1) t2p = obspy.UTCDateTime(t2) msg = 'data from %s to %s on %s not found in %s' % (t1p, t2p, sta, fet.directoryName) detex.log(__name__, msg, level='warning', pri=False) return None st = obspy.core.Stream() if len(df.Path) < 1: # if no event fits description return None for path, fname in zip(df.Path, df.FileName): fil = os.path.join(path, fname) st1 = read(fil) if not st1 is None: st += st1 # st.trim(starttime=start, endtime=end) # check if chan variable is string else iterate if isinstance(chan, string_types): stout = st.select(channel=chan) else: stout = obspy.core.Stream() for cha in chan: stout += st.select(channel=cha) loc = '*' if loc in ['???', '??'] else loc # convert ? to * stout = stout.select(location=loc) return stout
def get_number_channels(st): """ Take an obspy stream and get the number of unique channels in stream (stream must have only one station) """ if len(set([x.stats.station for x in st])) > 1: msg = 'function only takes streams with exactly 1 station' detex.log(__name__, msg, level='error') nc = len(list(set([x.stats.channel for x in st]))) return nc
def _getTemData(templatekey, stakey, templateDir, formatout, removeResponse, prefilt, client, timeBeforeOrigin, timeAfterOrigin, loc, opType): HD = os.getcwd() # Get current directory client = Client(client) for temkey in templatekey.iterrows( ): # iterate through each template key row eventID = temkey[1]['NAME'] SK = stakey SK = SK[[not np.isnan(x) for x in SK.LAT]] try: oTime = obspy.core.UTCDateTime(temkey[1]['TIME']) # origin time except: detex.log(__name__, '%s is a bad entry in TIME column for event %s' % (temkey[1]['TIME'], temkey[1]['NAME']), level='error') raise Exception('%s is a bad entry in TIME column for event %s' % (temkey[1]['TIME'], temkey[1]['NAME'])) utcStart = oTime - timeBeforeOrigin # start time is event origin time minus timeBeforeOrigin utcEnd = oTime + timeAfterOrigin # end time is event origin time plus timeafterOrigin for sk in SK.iterrows(): # iterate through each row of station keys chans = sk[1]['CHANNELS'].replace('-', ',') net = sk[1]['NETWORK'] sta = sk[1]['STATION'] st = True UTCstr = '%04d-%03dT%02d-%02d-%02d' % (oTime.year, oTime.julday, oTime.hour, oTime.minute, oTime.second) sdir = os.path.join(HD, templateDir, eventID).replace(' ', '') svar = ('%s.%s.%s.pkl' % (net, sta, UTCstr)).replace(' ', '') if os.path.isfile(os.path.join( sdir, svar)): #IF file aready exits skip process #detex.log(__name__,'%s already exists'%svar) st = False if st != False: st = _tryDownloadData(net, sta, chans, loc, utcStart, utcEnd, client) if st != False: #_tryDownloadData can return false, second st check is needed if (temkey[0] + 1) % 25 == 0: detex.log( __name__, '%d events downloaded out of %d for Station %s' % (temkey[0] + 1, len(templatekey), sk[1].STATION), pri=True) if removeResponse == True: st = _removeInstrumentResposne(st, prefilt, opType) if st != False: if not os.path.isdir( sdir ): # Create Waveform sub directory if it does not exis os.makedirs(sdir) st.write(os.path.join(sdir, svar), formatout)
def _estMag(self, trigIndex, corSeries, MPcon, mags, events, WFU, UtU, ewf, coef, times, name, sta): """ Estimate magnitudes by applying projected subspace mag estimates and standard deviation mag estimates as outlined in Chambers et al. 2015. """ WFlen = np.shape(WFU)[1] # event waveform length nc = corSeries.Nc # number of chans # continuous data chunk that triggered subspace ConDat = MPcon[trigIndex * nc:trigIndex * nc + WFlen] if self.issubspace: # continuous data chunk projected into subspace ssCon = np.dot(UtU, ConDat) # projected energy proEn = np.var(ssCon) / np.var(WFU, axis=1) # Try and estimate pre-event noise level (for estimating SNR) if trigIndex * nc > 5 * WFlen: # take 5x waveform length before event pe = MPcon[trigIndex * nc - 5 * WFlen: trigIndex * nc] rollingstd = pd.rolling_std(pe, WFlen)[WFlen - 1:] else: # if not enough data take 6 times after event pe = MPcon[trigIndex * nc: trigIndex * nc + WFlen + 6 * WFlen] rollingstd = pd.rolling_std(pe, WFlen)[WFlen - 1:] baseNoise = np.median(rollingstd) # take median of std for noise level SNR = np.std(ConDat) / baseNoise # estiamte SNR # ensure mags are greater than -15, else assume no mag value for event touse = mags > -15 if self.issubspace: # if subspace if not any(touse): # if no defined magnitudes avaliable msg = (('No magnitudes above -15 usable for detection at %s on' ' station %s and %s') % (times, sta, name)) detex.log(__name__, msg, level='warn') return np.NaN, np.Nan, SNR else: # correlation coefs between each event and data block ecor = [fast_normcorr(x, ConDat)[0] for x in ewf] eventCors = np.array(ecor) projectedEnergyMags = _estPEMag(mags, proEn, eventCors, touse) stdMags = _estSTDMag(mags, ConDat, ewf, eventCors, touse) else: # if singleton assert len(mags) == 1 if np.isnan(mags[0]) or mags[0] < -15: projectedEnergyMags = np.NaN stdMags = np.NaN else: # use simple waveform scaling if single d1 = np.dot(ConDat, WFU[0]) d2 = np.dot(WFU[0], WFU[0]) projectedEnergyMags = mags[0] + d1 / d2 stdMags = mags[0] + np.log10(np.std(ConDat) / np.std(WFU[0])) return projectedEnergyMags, stdMags, SNR
def _estMag(self, trigIndex, corSeries, MPcon, mags, events, WFU, UtU, ewf, coef, times, name, sta): """ Estimate magnitudes by applying projected subspace mag estimates and standard deviation mag estimates as outlined in Chambers et al. 2015. """ WFlen = np.shape(WFU)[1] # event waveform length nc = corSeries.Nc # number of chans # continuous data chunk that triggered subspace ConDat = MPcon[trigIndex * nc:trigIndex * nc + WFlen] if self.issubspace: # continuous data chunk projected into subspace ssCon = np.dot(UtU, ConDat) # projected energy proEn = np.var(ssCon) / np.var(WFU, axis=1) # Try and estimate pre-event noise level (for estimating SNR) if trigIndex * nc > 5 * WFlen: # take 5x waveform length before event pe = MPcon[trigIndex * nc - 5 * WFlen:trigIndex * nc] rollingstd = pd.rolling_std(pe, WFlen)[WFlen - 1:] else: # if not enough data take 6 times after event pe = MPcon[trigIndex * nc:trigIndex * nc + WFlen + 6 * WFlen] rollingstd = pd.rolling_std(pe, WFlen)[WFlen - 1:] baseNoise = np.median(rollingstd) # take median of std for noise level SNR = np.std(ConDat) / baseNoise # estiamte SNR # ensure mags are greater than -15, else assume no mag value for event touse = mags > -15 if self.issubspace: # if subspace if not any(touse): # if no defined magnitudes avaliable msg = (('No magnitudes above -15 usable for detection at %s on' ' station %s and %s') % (times, sta, name)) detex.log(__name__, msg, level='warn') return np.NaN, np.Nan, SNR else: # correlation coefs between each event and data block ecor = [fast_normcorr(x, ConDat)[0] for x in ewf] eventCors = np.array(ecor) projectedEnergyMags = _estPEMag(mags, proEn, eventCors, touse) stdMags = _estSTDMag(mags, ConDat, ewf, eventCors, touse) else: # if singleton assert len(mags) == 1 if np.isnan(mags[0]) or mags[0] < -15: projectedEnergyMags = np.NaN stdMags = np.NaN else: # use simple waveform scaling if single d1 = np.dot(ConDat, WFU[0]) d2 = np.dot(WFU[0], WFU[0]) projectedEnergyMags = mags[0] + d1 / d2 stdMags = mags[0] + np.log10(np.std(ConDat) / np.std(WFU[0])) return projectedEnergyMags, stdMags, SNR
def _removeInstrumentResposne(st, prefilt, opType): st.detrend('linear') # detrend st = _fftprep(st) try: st.remove_response(output=opType, pre_filt=prefilt) except: detex.log(__name__, 'RemoveResponse Failed for %s,%s, not saving' % (st[0].stats.network, st[0].stats.station), level='warning') st = False return st
def _getConDataStation(sk, HD, ConDir, removeResponse, prefilt, opType, formatout, secBuf, loc, client, reverse): chans = sk[1]['CHANNELS'].replace('-', ',') net = sk[1]['NETWORK'] sta = sk[1]['STATION'] utcStart = obspy.core.UTCDateTime(sk[1]['STARTTIME']) utcStart = utcStart - utcStart.timestamp % 3600 # get time to nearest hour utcEnd = obspy.core.UTCDateTime(sk[1]['ENDTIME']) utcEnd = utcEnd - utcEnd.timestamp % 3600 # get time to nearest hour timeArray = [ utcStart + x * 3600 for x in range(int(utcEnd.timestamp - utcStart.timestamp) / 3600 + 1) ] #get array with start stop times by hour if reverse: timeArray = timeArray[::-1] for t in range(len(timeArray) - 1): if reverse: oTime = timeArray[t + 1] else: oTime = timeArray[t] st = True UTCstr = '%04d-%03dT%02d' % (oTime.year, oTime.julday, oTime.hour) sdir = os.path.join(HD, ConDir, net + '.' + sta, str(oTime.year), "%03d" % (oTime.julday)) # Save directory for current loop svar = ('%s.%s.%s.pkl' % (net, sta, UTCstr)) if os.path.isfile(os.path.join( sdir, svar)): #IF file aready exits skip process #detex.log(__name__,'%s already exists'%svar) st = False if st != False: if reverse: st = _tryDownloadData(net, sta, chans, loc, timeArray[t + 1], timeArray[t] + secBuf, client) else: st = _tryDownloadData(net, sta, chans, loc, timeArray[t], timeArray[t + 1] + secBuf, client) if st != False: if removeResponse == True: st = _removeInstrumentResposne(st, prefilt, opType) if not os.path.isdir( sdir ): # Create Waveform sub directory if it does not exist os.makedirs(sdir) if st != False: try: st.write(os.path.join(sdir, svar), formatout) except: detex.log(__name__, 'wrtiing %s in %s failed', level='warning')
def _assignClientFunction(client): """ function to take an obspy client FDSN, NEIC, EW, etc. return the correct loadFromClient function for getting data. """ if isinstance(client, obspy.clients.fdsn.Client): return _loadFromFDSN elif isinstance(client, obspy.clients.neic.Client): return _loadFromNEIC elif isinstance(client, obspy.clients.earthworm.Client): return _loadFromEarthworm else: msg = 'Client type not supported' detex.log(__name__, msg, level='error', e=TypeError)
def read(path): """ function to read a file from a path. If IOError or TypeError simply try appending os.set to start """ try: st = obspy.read(path) except (IOError, TypeError): try: st = obspy.read(os.path.join(os.path.sep, path)) except (IOError, TypeError): msg = 'Cannot read %s, the file may be corrupt, skipping it' % path detex.log(__name__, msg, level='warn', pri=True) return None return st
def _removeInstrumentResponse(fet, st): if not fet.removeResponse: # pass stream back if no response removal return st st.detrend('linear') # detrend st = _fftprep(st) try: st.remove_response(output=fet.opType, pre_filt=fet.prefilt) except: utc1 = str(st[0].stats.starttime).split('.')[0] utc2 = str(st[0].stats.endtime).split('.')[0] msg = 'RemoveResponse Failed for %s,%s, from %s to %s, skipping' % ( st[0].stats.network, st[0].stats.station, utc1, utc2) detex.log(__name__, msg, level='warning', pri=True) st = None return st
def _tryDownloadData(net, sta, chan, loc, utcStart, utcEnd, client): # get data, return False if fail try: st = client.get_waveforms(net, sta, loc, chan, utcStart, utcEnd, attach_response=True) return st except: detex.log( __name__, 'Download failed for %s.%s %s from %s to %s' % (net, sta, chan, str(utcStart), str(utcEnd))) return False
def __init__(self, method, client=None, removeResponse=True, inventoryArg=None, directoryName=None, opType='VEL', prefilt=[.05, .1, 15, 20], conDatDuration=3600, conBuff=120, timeBeforeOrigin=1 * 60, timeAfterOrigin=4 * 60, checkData=True, fillZeros=False): self.__dict__.update(locals()) # Instantiate all inputs self.inventory = _getInventory(inventoryArg) self._checkInputs() if self.removeResponse and self.inventory is None: if self.method == 'dir': msg = ('Cannot remove response without a valid inventoryArg, ' 'setting removeResponse to False') detex.log(__name__, msg, level='warning', pri=True) self.removeResponse = False
def _getSampleRates(df): """ Function to get the sample rates on the main detex DataFrame """ if isinstance(df, pd.DataFrame): row = df.iloc[0] else: row = df srs = set([row.Stats[x]['sampling_rate'] for x in row.Events]) # make sure all sampling rates are the same else error out if len(srs) > 1: msg = ('Not all samp rates equal for all events on %s, skipping ' 'subspace or singles %s') % (row.Station, df.Names.values) detex.log(__name__, msg, level='warn', pri=True) return None return list(srs)
def readKey(dfkey, key_type='template'): """ Read a template key csv and perform checks for required columns Parameters --------- dfkey : str or pandas DataFrame A path to the template key csv or the DataFrame itself key_type : str "template" for template key or "station" for station key Returns -------- A pandas DataFrame if required columns exist, else raise Exception """ # key types and required columns key_types = req_columns.keys() if key_type not in key_types: msg = "unsported key type, supported types are %s" % (key_types) detex.log(__name__, msg, level='error') if isinstance(dfkey, string_types): if not os.path.exists(dfkey): msg = '%s does not exists, check path' % dfkey detex.log(__name__, msg, level='error') else: df = pd.read_csv(dfkey) elif isinstance(dfkey, pd.DataFrame): df = dfkey else: msg = 'Data type of dfkey not understood' detex.log(__name__, msg, level='error') # Check required columns if not req_columns[key_type].issubset(df.columns): msg = ( 'Required columns not in %s, required columns for %s key are %s' % (df.columns, key_type, req_columns[key_type])) detex.log(__name__, msg, level='error') tdf = df.loc[:, list(req_columns[key_type])] condition = [ all([x != '' for item, x in row.iteritems()]) for num, row in tdf.iterrows() ] df = df[condition] # TODO if column TIME is utcDateTime object sorting fails, fix this df.sort_values(by=list(req_columns[key_type]), inplace=True) df.reset_index(drop=True, inplace=True) # specific operations for various key types if key_type == 'station': df['STATION'] = [str(x) for x in df['STATION']] df['NETWORK'] = [str(x) for x in df['NETWORK']] return df
def loadClusters(filename='clust.pkl'): """ Function that uses pandas.read_pickle to load a pickled cluster (instance of detex.subspace.ClusterStream) Parameters ---------- filename : str Path to the saved cluster isntance Returns ---------- An instance of detex.subspace.ClusterStream """ cl = pd.read_pickle(filename) if not isinstance(cl, detex.subspace.ClusterStream): msg = '%s is not a ClusterStream instance' % filename detex.log(__name__, msg, level='error') return cl
def loadSubSpace(filename='subspace.pkl'): """ Function that uses pandas.read_pickle to load a pickled subspace (instance of detex.subspace.SubSpaceStream) Parameters ---------- filename : str Path to the saved subspace instance Returns ---------- An instance of detex.subspace.SubSpaceStream """ ss = pd.read_pickle(filename) if not isinstance(ss, detex.subspace.SubSpace): msg = '%s is not a SubSpaceStream instance' % filename detex.log(__name__, msg, level='error') return ss
def _getChannels(df): """ Function to get the channels on the main detex DataFrame """ if isinstance(df, pd.DataFrame): row = df.iloc[0] else: row = df chansAr = np.array(row.Channels.values()) chans = set([x for x in chansAr.flat]) # make sure all channels are the same for each event if not all([chans == set(x) for x in row.Channels.values()]): msg = ('Not all channels are the same for all event on %s, skipping ' 'subspace or singles %s') % (row.Station, df.Names.values) detex.log(__name__, msg, level='warning', pri=True) return None return list(chans)
def _loadFromEarthworm(fet, start, end, net, sta, chan, loc): client = fet.client startstr = str(start) endstr = str(end) st = obspy.Stream() if '*' in loc or '?' in loc: # adjust for earthworm loc codes loc = '--' for cha in chan: try: # try neic client st += client.get_waveforms(net, sta, loc, cha, start, end) except: msg = ('Could not fetch data on %s from %s to %s' % (net + '.' + sta, startstr, endstr)) detex.log(__name__, msg, level='warning', pri=False) st = None return st
def _getDSVect( fetcher, stakey, utc1, utc2, filt, deci, dtype, conDatNum, Nc, reqlen, sta, lta, ssArrayTD, ssArrayFD, limit=None, ): # get a generator to return streams, ask for 4x more for rejects stgen = fetcher.getConData(stakey, utcstart=utc1, utcend=utc2, randSamps=conDatNum * 4) count = 0 # normal count scount = 0 # success count DSmat = [] for st in stgen: # loop over random samps of continuous data if st is None or len(st) < 1: continue # no need to log, fetcher will do it count += 1 st = detex.construct._applyFilter(st, filt, deci, dtype) if st is None or len(st) < 1: continue # no need to log, fetcher will do it passSTALTA = _checkSTALTA(st, filt, sta, lta, limit) if not passSTALTA: continue if scount >= conDatNum: # if we have all we need break mpCon = detex.construct.multiplex(st, Nc) dsVect = _MPXSSCorr(mpCon, reqlen, ssArrayTD, ssArrayFD, Nc) DSmat.append(dsVect) scount += 1 if count == 0: msg = 'Could not get any data for %s' % (stakey.STATION.iloc[0]) detex.log(__name__, msg, level='error') return DSmat, count, scount
def _loadFromNEIC(fet, start, end, net, sta, chan, loc): """ Use obspy.neic.Client to fetch waveforms """ client = fet.client # str reps of utc objects for error messages startstr = str(start) endstr = str(end) st = obspy.Stream() for cha in chan: try: # try neic client st += client.get_waveforms(net, sta, loc, cha, start, end) except: msg = ('Could not fetch data on %s from %s to %s' % (net + '.' + sta, startstr, endstr)) detex.log(__name__, msg, level='warning', pri=False) st = None return st
def readKey(dfkey, key_type='template'): """ Read a template key csv and perform checks for required columns Parameters --------- dfkey : str or pandas DataFrame A path to the template key csv or the DataFrame itself key_type : str "template" for template key or "station" for station key Returns -------- A pandas DataFrame if required columns exist, else raise Exception """ # key types and required columns key_types = req_columns.keys() if key_type not in key_types: msg = "unsported key type, supported types are %s" % (key_types) detex.log(__name__, msg, level='error') if isinstance(dfkey, string_types): if not os.path.exists(dfkey): msg = '%s does not exists, check path' % dfkey detex.log(__name__, msg, level='error') else: df = pd.read_csv(dfkey) elif isinstance(dfkey, pd.DataFrame): df = dfkey else: msg = 'Data type of dfkey not understood' detex.log(__name__, msg, level='error') # Check required columns if not req_columns[key_type].issubset(df.columns): msg = ('Required columns not in %s, required columns for %s key are %s' % (df.columns, key_type, req_columns[key_type])) detex.log(__name__, msg, level='error') tdf = df.loc[:, list(req_columns[key_type])] condition = [all([x != '' for item, x in row.iteritems()]) for num, row in tdf.iterrows()] df = df[condition] # TODO if column TIME is utcDateTime object sorting fails, fix this df.sort_values(by=list(req_columns[key_type]), inplace=True) df.reset_index(drop=True, inplace=True) # specific operations for various key types if key_type == 'station': df['STATION'] = [str(x) for x in df['STATION']] df['NETWORK'] = [str(x) for x in df['NETWORK']] return df
def _applyFilter(st, filt, decimate=False, dtype='double', fillZeros=False): """ Apply a filter, decimate, and trim to even start/end times """ if st is None or len(st) < 1: msg = '_applyFilter got a stream with 0 length' detex.log(__name__, msg, level='warn') return obspy.Stream() st.sort() st1 = st.copy() if dtype == 'single': # cast into single for num, tr in enumerate(st): st[num].data = tr.data.astype(np.float32) nc = list(set([x.stats.channel for x in st])) if len(st) > len(nc): # if data is fragmented only keep largest chunk if fillZeros: st = _mergeChannelsFill(st) else: st = _mergeChannels(st) if not len(st) == len(nc) or len(st) < 1: sta = st1[0].stats.station stime = str(st1[0].stats.starttime) msg = 'Stream is too fractured around %s on %s' % (str(stime), sta) detex.log(__name__, msg, level='warn') return obspy.Stream() # st1.write('failed_merge-%s-%s.pkl'%(sta, stime), 'pickle') # assert len(st) == len(nc) if decimate: st.decimate(decimate) startTrim = max([x.stats.starttime for x in st]) endTrim = min([x.stats.endtime for x in st]) if startTrim > endTrim: # return empty string if chans dont overlap return obspy.Stream() st.trim(starttime=startTrim, endtime=endTrim) st = st.split() st.detrend('linear') if isinstance(filt, list) or isinstance(filt, tuple): st.filter('bandpass', freqmin=filt[0], freqmax=filt[1], corners=filt[2], zerophase=filt[3]) return st
def _readVeriFile(veriFile): try: df = pd.read_csv(veriFile) except Exception: try: df = pd.read_pickle(veriFile) except Exception: try: df = detex.util.loadSQLite(veriFile, 'verify') except Exception: msg = ('%s could not be read, it must either be csv, pickled' 'dataframe or sqlite database') % veriFile detex.log(__name__, msg, level='error') reqcols = ['TIME', 'LAT', 'LON', 'MAG', 'DEPTH', 'NAME'] # required cols if not set(reqcols).issubset(df.columns): msg = ('%s does not have the required columns, it needs ' 'TIME,LAT,LON,MAG,DEPTH,NAME') % veriFile detex.log(__name__, msg, level='error') return df
def _alignTD(delayDF, srow): """ loop through delay Df and apply offsets to create alligned arrays dictionary """ aligned = {} # find the required length for each aligned stream TDlengths = len(srow.MPtd[delayDF.Events[0]]) - max(delayDF.SampleDelays) for ind, row in delayDF.iterrows(): orig = srow.MPtd[row.Events] orig = orig[row.SampleDelays:] orig = orig[:TDlengths] aligned[row.Events] = orig if len(orig) == 0: msg = ('Alignment of multiplexed stream failing on %s, \ try raising ccreq or widenning trim window' % srow.Station) msg2 = _idAlignProblems(delayDF) detex.log(__name__, msg + msg2, level='error') return aligned
def _CCX2(mpfd1, mpfd2, mptd1, mptd2, Nc1, Nc2): """ Function find max correlation coeficient and corresponding lag time between 2 traces. fft should already have been calculated """ if len(Nc1) != len(Nc2): # make sure there are the same number of channels msg = 'Number of Channels not equal, cannot perform correlation' detex.log(__name__, msg, level='error') Nc = len(Nc1) # Number of channels if len(mptd1) != len(mptd2) or len(mpfd2) != len(mpfd1): msg = 'Lengths not equal on multiplexed data, cannot correlate' detex.log(__name__, msg, level='error') n = len(mptd1) trunc = n // (2 * Nc) - 1 # truncate value # trunc = n - 1 # n = trunc + 1 mptd2Temp = mptd2.copy() mptd2Temp = np.lib.pad(mptd2Temp, (n - 1, n - 1), str('constant'), constant_values=(0, 0)) a = pd.rolling_mean(mptd2Temp, n)[n - 1:] b = pd.rolling_std(mptd2Temp, n)[n - 1:] b *= np.sqrt((n - 1.0) / n) c = np.real(scipy.fftpack.ifft(np.multiply(np.conj(mpfd1), mpfd2))) c1 = np.concatenate([c[-(n - 1):], c[:n]]) # swap end to start # slice by # of channels as not to mix match chans in multplexed stream result = ((c1 - mptd1.sum() * a) / (n * b * np.std(mptd1)))[Nc - 1::Nc] result = result[trunc:-trunc] try: maxcc = np.nanmax(result) mincc = np.nanmin(result) maxind = np.nanargmax(result) if maxcc > 1. or mincc < -1.: # if a inf is found in array # this can happen if some of the waveforms have been zeroed out result[(result > 1) | (result < -1)] = 0 maxcc = np.nanmax(result) maxind = np.nanargmax(result) except ValueError: # if fails skip return 0.0, 0.0, 0.0 subsamp = _subSamp(result, maxind) return maxcc, (maxind + 1 + trunc) * Nc - (n), subsamp
def _verifyEvents(Dets,Autos,veriFile,veriBuffer,includeAllVeriColumns): if not veriFile or not os.path.exists(veriFile): detex.log(__name__, 'No veriFile passed or it does not exist, skipping verification', pri=True) return else: vertem=_readVeriFile(veriFile) vertem['STMP']=[obspy.core.UTCDateTime(x) for x in vertem['TIME']] verlist=[] additionalColumns=list(set(vertem.columns)-set(['TIME','LAT','LON','MAG','ProEnMag','DEPTH','NAME'])) vertem for vernum,verrow in vertem.iterrows(): temDets=Dets[(Dets.MSTAMPmin-veriBuffer/2.<verrow.STMP)&(Dets.MSTAMPmax+veriBuffer/2.0>verrow.STMP)&([not x for x in Dets.Verified])] if len(temDets)>0: #todo handle this when multiple verifications occur trudet=temDets[temDets.DSav==temDets.DSav.max()] Dets.loc[trudet.index[0],'Verified']=True if includeAllVeriColumns: for col in additionalColumns: if not col in trudet.columns: trudet[col]=verrow[col] trudet['VerMag'],trudet['VerLat'],trudet['VerLon'],trudet['VerDepth'],trudet['VerName']=verrow.MAG,verrow.LAT,verrow.LON,verrow.DEPTH,verrow.NAME verlist.append(trudet) else: temAutos=Autos[(Autos.MSTAMPmin-veriBuffer/2.<verrow.STMP)&(Autos.MSTAMPmax+veriBuffer/2.0>verrow.STMP)&([not x for x in Autos.Verified])] if len(temAutos)>0: #todo handle this when multiple verifications occur trudet=temAutos[temAutos.DSav==temAutos.DSav.max()] Autos.loc[trudet.index[0],'Verified']=True if includeAllVeriColumns: for col in additionalColumns: if not col in trudet.columns: trudet[col]=verrow[col] trudet['VerMag'],trudet['VerLat'],trudet['VerLon'],trudet['VerDepth'],trudet['VerName']=verrow.MAG,verrow.LAT,verrow.LON,verrow.DEPTH,verrow.NAME verlist.append(trudet) if len(verlist)>0: verifs=pd.concat(verlist,ignore_index=True) verifs.sort(columns=['Event','DSav']) #sort and drop duplicates so each verify event is verified only once verifs.drop_duplicates(subset='Event') verifs.drop('Verified', axis=1, inplace=True) else: verifs=pd.DataFrame() return verifs
def _CCX2(mpfd1, mpfd2, mptd1, mptd2, Nc1, Nc2): """ Function find max correlation coeficient and corresponding lag time between 2 traces. fft should already have been calculated """ if len(Nc1) != len(Nc2): # make sure there are the same number of channels msg = 'Number of Channels not equal, cannot perform correlation' detex.log(__name__, msg, level='error') Nc = len(Nc1) # Number of channels if len(mptd1) != len(mptd2) or len(mpfd2) != len(mpfd1): msg = 'Lengths not equal on multiplexed data, cannot correlate' detex.log(__name__, msg, level='error') n = len(mptd1) trunc = n // (2 * Nc) - 1 # truncate value # trunc = n - 1 # n = trunc + 1 mptd2Temp = mptd2.copy() mptd2Temp = np.lib.pad(mptd2Temp, (n - 1, n - 1), str('constant'), constant_values=(0, 0)) a = pd.rolling_mean(mptd2Temp, n)[n - 1:] b = pd.rolling_std(mptd2Temp, n)[n - 1:] b *= np.sqrt((n - 1.0) / n) c = np.real(scipy.fftpack.ifft(np.multiply(np.conj(mpfd1), mpfd2))) c1 = np.concatenate([c[-(n - 1):], c[:n]]) # swap end to start # slice by # of channels as not to mix match chans in multplexed stream result = ((c1 - mptd1.sum() * a) / (n * b * np.std(mptd1)))[Nc - 1::Nc] result = result[trunc: -trunc] try: maxcc = np.nanmax(result) mincc = np.nanmin(result) maxind = np.nanargmax(result) if maxcc > 1. or mincc < -1.: # if a inf is found in array # this can happen if some of the waveforms have been zeroed out result[(result > 1) | (result < -1)] = 0 maxcc = np.nanmax(result) maxind = np.nanargmax(result) except ValueError: # if fails skip return 0.0, 0.0, 0.0 subsamp = _subSamp(result, maxind) return maxcc, (maxind + 1 + trunc) * Nc - (n), subsamp
def _testStreamLengths(TRDF, row, ind): lens = np.array([len(x) for x in row.MPtd.values()]) # trim to smallest length if within 90% of median, else kill key le = np.min(lens[lens > np.median(lens) * .9]) keysToKill = [x for x in row.Events if len(row.MPtd[x]) < le] # trim events slightly too small if any for key in row.Events: trimed = row.MPtd[key][:le] TRDF.loc[ind, 'MPtd'][key] = trimed # rest keys on TRDF tmar = np.array(TRDF.Events[ind]) tk = [not x in keysToKill for x in TRDF.Events[ind]] TRDF.Events[ind] = tmar[np.array(tk)] for key in keysToKill: msg = ('%s on %s is out of length tolerance, removing' % (key, row.Station)) detex.log(__name__, msg, level='warn', pri=True) TRDF.MPtd[ind].pop(key, None) return TRDF