wavefiles += glob.glob(os.path.join(data_directory, '.'.join([pick.station, '*']))) wavefiles = list(set(wavefiles)) for wavefile in wavefiles: print ' '.join(['Reading data from', wavefile]) if 'st' not in locals(): st = read(wavefile) else: st += read(wavefile) st = st.merge(fill_value='interpolate') day = st[0].stats.starttime.date # Process the data with our required parameters for tr in st: tr = pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\ debug, day) # Use the template generation function to cut our templates template = template_gen._template_gen(picks, st, length=1.0, swin='all', prepick=0.1, plot=True) # This will generate an obspy.Stream object # Append this Stream to the list of templates templates += [template] template_names.append('_'.join(['tutorial', str(i)])) # Save template for later template.write(os.path.join(data_directory, '_'.join([template_names[i], 'template.ms'])), format='MSEED') # Delete excess information from memory If you are re-using this script # with the same templates you should be able to comment out this loop
stations = list(set([tr.stats.station for tr in st])) # Set how many cores we want to parallel across, we will set this to four # as this is the number of templates, if your machine has fewer than four # cores/CPUs the multiprocessing will wait until there is a free core. # Setting this to be higher than the number of templates will have no # increase in speed as only detections for each template are computed in # parallel. It may also slow your processing by using more memory than # needed, to the extent that swap may be filled. ncores = 4 # Pre-process the data to set frequency band and sampling rate # Note that this is, and MUST BE the same as the parameters used for the # template creation. print("Processing the seismic data") st = pre_processing.dayproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, debug=0, starttime=t1) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection detections = match_filter.match_filter( template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type="MAD", trig_int=6.0, plotvar=True, plotdir=".", cores=ncores, tempdir=False,
def from_contbase(sfile, contbase_list, lowcut, highcut, samp_rate, filt_order,\ length, prepick, swin, debug=0): """ Function to read in picks from sfile then generate the template from the picks within this and the wavefiles from the continous database of day-long files. Included is a section to sanity check that the files are daylong and that they start at the start of the day. You should ensure this is the case otherwise this may alter your data if your data are daylong but the headers are incorrectly set. :type sfile: string :param sfile: sfilename must be the path to a seisan nordic type s-file \ containing waveform and pick information, all other arguments can \ be numbers save for swin which must be either P, S or all \ (case-sensitive). :type contbase_list: List of tuple of string :param contbase_list: List of tuples of the form ['path', 'type', 'network']\ Where path is the path to the continuous database, type is\ the directory structure, which can be either Yyyyy/Rjjj.01,\ which is the standard IRIS Year, julian day structure, or,\ yyyymmdd which is a single directory for every day. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template\ defaults file :type highcut: float :param lowcut: High cut (Hz), if set to None will look in template\ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in\ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in\ template defaults file :type length: float :param length: Extract length in seconds, if None will look in template\ defaults file. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type debug: int :param debug: Level of debugging output, higher=more """ # Perform some checks first import os, sys if not os.path.isfile(sfile): raise IOError('sfile does not exist') # import some things from eqcorrscan.utils import Sfile_util from eqcorrscan.utils import pre_processing import glob from obspy import UTCDateTime # Read in the header of the sfile header=Sfile_util.readheader(sfile) day=UTCDateTime(str(header.time.year)+'-'+str(header.time.month).zfill(2)+\ '-'+str(header.time.day).zfill(2)) # Read in pick info picks=Sfile_util.readpicks(sfile) print "I have found the following picks" pick_chans=[] used_picks=[] for pick in picks: if not pick.station+pick.channel in pick_chans and pick.phase in ['P','S']: pick_chans.append(pick.station+pick.channel) used_picks.append(pick) print pick for contbase in contbase_list: if contbase[1] == 'yyyy/mm/dd': daydir=str(day.year)+'/'+str(day.month).zfill(2)+'/'+\ str(day.day).zfill(2) elif contbase[1]=='Yyyyy/Rjjj.01': daydir='Y'+str(day.year)+'/R'+str(day.julday).zfill(3)+'.01' elif contbase[1]=='yyyymmdd': daydir=str(day.year)+str(day.month).zfill(2)+str(day.day).zfill(2) if 'wavefiles' in locals(): wavefiles+=glob.glob(contbase[0]+'/'+daydir+'/*'+pick.station+\ '.*') else: wavefiles=(glob.glob(contbase[0]+'/'+daydir+'/*'+pick.station+\ '.*')) elif pick.phase in ['P','S']: print 'Duplicate pick '+pick.station+' '+pick.channel+' '+pick.phase+\ ' '+str(pick.time) elif pick.phase =='IAML': print 'Amplitude pick '+pick.station+' '+pick.channel+' '+pick.phase+\ ' '+str(pick.time) picks=used_picks wavefiles=list(set(wavefiles)) # Read in waveform file from obspy import read as obsread wavefiles.sort() for wavefile in wavefiles: print "I am going to read waveform data from: "+wavefile if 'st' in locals(): st+=obsread(wavefile) else: st=obsread(wavefile) # Porcess waveform data st.merge(fill_value='interpolate') for tr in st: tr=pre_processing.dayproc(tr, lowcut, highcut, filt_order,\ samp_rate, debug, day) # Cut and extract the templates st1=_template_gen(picks, st, length, swin, prepick=prepick) return st1
actual_stations.append(station) actual_stations=list(set(actual_stations)) st=st.merge(fill_value='interpolate') # Enforce trace continuity if not 'st' in locals(): print 'No data found for day: '+str(day) elif len(actual_stations) < matchdef.minsta: print 'Data from fewer than '+str(matchdef.minsta)+' stations found, will not detect' else: if not Test: # Process data print 'Processing the data for day '+daydir if matchdef.debug >= 4: for tr in st: tr=pre_processing.dayproc(tr, templatedef.lowcut, templatedef.highcut,\ templatedef.filter_order, templatedef.samp_rate,\ matchdef.debug, day) else: st=Parallel(n_jobs=10)(delayed(pre_processing.dayproc)(tr, templatedef.lowcut,\ templatedef.highcut,\ templatedef.filter_order,\ templatedef.samp_rate,\ matchdef.debug, day)\ for tr in st) if not Prep: # For some reason st is now a list rather than a stream if 'stream_st' in locals(): del stream_st for tr in st: if 'stream_st' in locals(): stream_st+=tr
def plot_detection_wavs(family, tribe, wav_dirs, start=None, end=None, save=False, save_dir=None, no_dets=5): """ Wrapper on detection_multiplot() for our dataset :param cat: catalog of detections :param temp_dir: template waveform dict :param det_dir: detection waveform dict :return: matplotlib.pyplot.Figure """ # Random range of dates in detections rand_inds = np.random.choice(range(len(family)), no_dets, replace=False) cat = Catalog(events=[det.event for i, det in enumerate(family) if i in rand_inds]) # Always plot self_detection cat += [det.event for det in family if det.detect_val / det.no_chans == 1.0][0] cat.events.sort(key=lambda x: x.picks[0].time) sub_fam = Family(template=family.template, detections=[det for i, det in enumerate(family) if i in rand_inds]) sub_fam.detections.extend([det for det in family if det.detect_val / det.no_chans == 1.0]) temp = tribe[sub_fam.template.name] if start: cat_start = datetime.strptime(start, '%d/%m/%Y') cat_end = datetime.strptime(end, '%d/%m/%Y') else: cat_start = cat[0].picks[0].time.date cat_end = cat[-1].picks[0].time.date for date in date_generator(cat_start, cat_end): dto = UTCDateTime(date) dets = [det for det in sub_fam if dto < det.detect_time < dto + 86400] if len(dets) == 0: print('No detections on: {!s}'.format(dto)) continue print('Running for date: %s' % str(dto)) stachans = {} for det in dets: ev = det.event for pk in ev.picks: sta = pk.waveform_id.station_code chan = pk.waveform_id.channel_code if sta not in stachans: stachans[sta] = [chan] elif chan not in stachans[sta]: stachans[sta].append(chan) # Grab day's wav files wav_ds = ['%s%d' % (d, dto.year) for d in wav_dirs] stream = grab_day_wavs(wav_ds, dto, stachans) print('Preprocessing') st1 = pre_processing.dayproc(stream, temp.lowcut, temp.highcut, temp.filt_order, temp.samp_rate, starttime=dto, num_cores=3) for det in dets: det_st = st1.slice(starttime=det.detect_time - 3, endtime=det.detect_time + 7).copy() fname = '{}/{}.png'.format( save_dir, str(det.event.resource_id).split('/')[-1]) det_t = 'Template {}: {}'.format(temp.name, det.detect_time) detection_multiplot(det_st, temp.st, [det.detect_time], save=save, savefile=fname, title=det_t) plt.close('all') return
def from_client(catalog, client_id, lowcut, highcut, samp_rate, filt_order, length, prepick, swin, debug=0, plot=False): r"""Function to generate templates from a SeisHub database.Must be given \ an obspy.Catalog class and the SeisHub url as input. The function returns \ a list of obspy.Stream classes containting steams for each desired \ template. :type catalog: obspy.Catalog :param catalog: Catalog class containing desired template events :type url: string :param url: url of SeisHub database instance :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template\ defaults file :type highcut: float :param lowcut: High cut (Hz), if set to None will look in template\ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in\ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in\ template defaults file :type length: float :param length: Extract length in seconds, if None will look in template\ defaults file. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Plot templates or not. :returns: obspy.Stream Newly cut template """ # This import section copes with namespace changes between obspy versions import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy.clients.fdsn import Client from obspy.clients.fdsn.header import FDSNException else: from obspy.fdsn import Client from obspy.fdsn.header import FDSNException from eqcorrscan.utils import pre_processing from obspy import UTCDateTime import warnings client = Client(client_id) temp_list = [] for event in catalog: # Figure out which picks we have day = event.origins[0].time print("Fetching the following traces from " + client_id) for pick in event.picks: net = pick.waveform_id.network_code sta = pick.waveform_id.station_code chan = pick.waveform_id.channel_code loc = pick.waveform_id.location_code starttime = UTCDateTime(pick.time.date) endtime = starttime + 86400 # Here we download a full day of data. We do this so that minor # differences in processing during processing due to the effect # of resampling do not impinge on our cross-correaltions. if debug > 0: print('start-time: ' + str(starttime)) print('end-time: ' + str(endtime)) print('pick-time: ' + str(pick.time)) print('pick phase: ' + pick.phase_hint) print('.'.join([net, sta, loc, chan])) if 'st' not in locals(): try: st = client.get_waveforms(net, sta, loc, chan, starttime, endtime) except FDSNException: warnings.warn('Found no data for this station') else: try: st += client.get_waveforms(net, sta, loc, chan, starttime, endtime) except FDSNException: warnings.warn('Found no data for this station') if debug > 0: st.plot() print('Pre-processing data for event: '+str(event.resource_id)) st.merge(fill_value='interpolate') st1 = pre_processing.dayproc(st, lowcut, highcut, filt_order, samp_rate, starttime=starttime, debug=debug, parallel=True) if debug > 0: st1.plot() template = _template_gen(event.picks, st1, length, swin, prepick, plot) del st, st1 temp_list.append(template) return temp_list
def run_tutorial(plot=False): """Main function to run the tutorial dataset.""" from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter import glob # This import section copes with namespace changes between obspy versions import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy.clients.fdsn import Client else: from obspy.fdsn import Client from obspy import UTCDateTime, Stream, read # First we want to load our templates template_names = glob.glob('tutorial_template_*.ms') if len(template_names) == 0: raise IOError('Template files not found, have you run the template ' + 'creation tutorial?') templates = [read(template_name) for template_name in template_names] # Work out what stations we have and get the data for them stations = [] for template in templates: for tr in template: stations.append((tr.stats.station, tr.stats.channel)) # Get a unique list of stations stations = list(set(stations)) # We are going to look for detections on the day of our template, however, to # generalize, we will write a loop through the days between our templates, in # this case that is only one day. template_days = [] for template in templates: template_days.append(template[0].stats.starttime.date) template_days = sorted(template_days) kdays = (template_days[-1] - template_days[0]).days + 1 unique_detections = [] for i in range(kdays): t1 = UTCDateTime(template_days[0]) + (86400 * i) t2 = t1 + 86400 # Generate the bulk information to query the GeoNet database bulk_info = [] for station in stations: bulk_info.append(('NZ', station[0], '*', station[1][0] + 'H' + station[1][-1], t1, t2)) # Set up a client to access the GeoNet database client = Client("GEONET") # Note this will take a little while. print('Downloading seismic data, this may take a while') st = client.get_waveforms_bulk(bulk_info) # Merge the stream, it will be downloaded in chunks st.merge(fill_value='interpolate') # Work out what data we actually have to cope with possible lost data stations = list(set([tr.stats.station for tr in st])) # Set how many cores we want to parallel across, we will set this to four # as this is the number of templates, if your machine has fewer than four # cores/CPUs the multiprocessing will wait until there is a free core. # Setting this to be higher than the number of templates will have no # increase in speed as only detections for each template are computed in # parallel. It may also slow your processing by using more memory than # needed, to the extent that swap may be filled. ncores = 4 # Pre-process the data to set frequency band and sampling rate # Note that this is, and MUST BE the same as the parameters used for the # template creation. print('Processing the seismic data') st = pre_processing.dayproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, debug=0, starttime=t1, num_cores=ncores) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=plot, plotdir='.', cores=ncores, tempdir=False, debug=1, plot_format='jpg') # Now lets try and work out how many unique events we have just to compare # with the GeoNet catalog of 20 events on this day in this sequence for master in detections: keep = True for slave in detections: if not master == slave and\ abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False break if keep: unique_detections.append(master) print('We made a total of ' + str(len(unique_detections)) + ' detections') for detection in unique_detections: print('Detection at :' + str(detection.detect_time) + ' for template ' + detection.template_name + ' with a cross-correlation sum of: ' + str(detection.detect_val)) # We can plot these too if plot: stplot = st.copy() template = templates[template_names.index(detection.template_name)] lags = sorted([tr.stats.starttime for tr in template]) maxlag = lags[-1] - lags[0] stplot.trim(starttime=detection.detect_time - 10, endtime=detection.detect_time + maxlag + 10) plotting.detection_multiplot(stplot, template, [detection.detect_time.datetime]) return unique_detections
def test_match_filter(samp_rate=20.0, debug=0): """ Function to test the capabilities of match_filter and just check that it\ is working! Uses synthetic templates and seeded, randomised data. :type debug: int :param debug: Debug level, higher the number the more output. """ from eqcorrscan.utils import pre_processing from eqcorrscan.utils import EQcorrscan_plotting as plotting from eqcorrscan.core import match_filter from obspy import UTCDateTime import string # Generate a random dataset templates, data, seeds = generate_synth_data(nsta=5, ntemplates=2, nseeds=50, samp_rate=samp_rate, t_length=6.0, max_amp=5.0, debug=debug) # Notes to the user: If you use more templates you should ensure they are # more different, e.g. set the data to have larger moveouts, otherwise # similar templates will detect events seeded by another template. # Test the pre_processing functions for tr in data: pre_processing.dayproc(tr=tr, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=20.0, debug=0, starttime=UTCDateTime(0)) if debug > 0: data.plot() # Filter the data and the templates for template in templates: pre_processing.shortproc(st=template, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=20.0) if debug > 0: template.plot() template_names = list(string.ascii_lowercase)[0:len(templates)] detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=data, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1) # Compare the detections to the seeds print('This test made ' + str(len(detections)) + ' detections') ktrue = 0 kfalse = 0 for detection in detections: print(detection.template_name) i = template_names.index(detection.template_name) t_seeds = seeds[i] dtime_samples = int( (detection.detect_time - UTCDateTime(0)) * samp_rate) if dtime_samples in t_seeds['time']: j = list(t_seeds['time']).index(dtime_samples) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: min_diff = min(abs(t_seeds['time'] - dtime_samples)) if min_diff < 10: # If there is a match within ten samples then its good enough j = list(abs(t_seeds['time'] - dtime_samples)).index(min_diff) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: print('Detection at sample: ' + str(dtime_samples) + ' does not match anything in seed times:') kfalse += 1 print('Minimum difference in samples is: ' + str(min_diff)) # Plot the detections if debug > 3: for i, template in enumerate(templates): times = [ d.detect_time.datetime for d in detections if d.template_name == template_names[i] ] print(times) plotting.detection_multiplot(data, template, times) # Set an 'acceptable' ratio of positive to false detections print( str(ktrue) + ' true detections and ' + str(kfalse) + ' false detections') if kfalse / ktrue < 0.25: return True else: return False
def from_contbase(sfile, contbase_list, lowcut, highcut, samp_rate, filt_order, length, prepick, swin, debug=0, plot=False): r"""Function to read in picks from sfile then generate the template from \ the picks within this and the wavefiles from the continous database of \ day-long files. Included is a section to sanity check that the files are \ daylong and that they start at the start of the day. You should ensure \ this is the case otherwise this may alter your data if your data are \ daylong but the headers are incorrectly set. :type sfile: string :param sfile: sfilename must be the path to a seisan nordic type s-file \ containing waveform and pick information, all other arguments can \ be numbers save for swin which must be either P, S or all \ (case-sensitive). :type contbase_list: List of tuple of string :param contbase_list: List of tuples of the form \ ['path', 'type', 'network']. Where path is the path to the \ continuous database, type is the directory structure, which can be \ either Yyyyy/Rjjj.01, which is the standard IRIS Year, julian day \ structure, or, yyyymmdd which is a single directory for every day. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param lowcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Turns template plotting on or off. :returns: obspy.Stream Newly cut template """ # Perform some checks first import os if not os.path.isfile(sfile): raise IOError('sfile does not exist') # import some things from eqcorrscan.utils import pre_processing from eqcorrscan.utils import Sfile_util import glob from obspy import read as obsread # Read in the header of the sfile event = Sfile_util.readheader(sfile) day = event.origins[0].time # Read in pick info catalog = Sfile_util.readpicks(sfile) picks = catalog[0].picks print("I have found the following picks") pick_chans = [] used_picks = [] for pick in picks: station = pick.waveform_id.station_code channel = pick.waveform_id.channel_code phase = pick.phase_hint pcktime = pick.time if station + channel not in pick_chans and phase in ['P', 'S']: pick_chans.append(station + channel) used_picks.append(pick) print(pick) # #########Left off here for contbase in contbase_list: if contbase[1] == 'yyyy/mm/dd': daydir = os.path.join([ str(day.year), str(day.month).zfill(2), str(day.day).zfill(2) ]) elif contbase[1] == 'Yyyyy/Rjjj.01': daydir = os.path.join([ 'Y' + str(day.year), 'R' + str(day.julday).zfill(3) + '.01' ]) elif contbase[1] == 'yyyymmdd': daydir = day.datetime.strftime('%Y%m%d') if 'wavefiles' not in locals(): wavefiles = (glob.glob( os.path.join( [contbase[0], daydir, '*' + station + '.*']))) else: wavefiles += glob.glob( os.path.join( [contbase[0], daydir, '*' + station + '.*'])) elif phase in ['P', 'S']: print(' '.join( ['Duplicate pick', station, channel, phase, str(pcktime)])) elif phase == 'IAML': print(' '.join( ['Amplitude pick', station, channel, phase, str(pcktime)])) picks = used_picks wavefiles = list(set(wavefiles)) # Read in waveform file wavefiles.sort() for wavefile in wavefiles: print("I am going to read waveform data from: " + wavefile) if 'st' not in locals(): st = obsread(wavefile) else: st += obsread(wavefile) # Process waveform data st.merge(fill_value='interpolate') for tr in st: tr = pre_processing.dayproc(tr, lowcut, highcut, filt_order, samp_rate, debug, day) # Cut and extract the templates st1 = _template_gen(picks, st, length, swin, prepick=prepick, plot=plot) return st1
for rawfile in raw_files: if not 'st' in locals(): st = read(rawfile) else: st += read(rawfile) # Merge the data to account for miniseed files being written in chunks # We need continuous day-long data, so data are padded if there are gaps st = st.merge(fill_value='interpolate') # Work out what day we are working on, required as we will pad the data to be daylong day = st[0].stats.starttime.date # Process the data in the same way as the template for tr in st: tr=pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\ matchdef.debug, day) #Set directory for match filter output plots # plot_dir = '/projects/nesi00228/data/plots/' plot_dir = '/home/chet/data/plot/' # Compute detections detections=match_filter.match_filter(template_names, templates, st,\ 8.0, matchdef.threshtype,\ matchdef.trig_int, True, plot_dir, cores=10) # We now have a list of detections! We can output these to a file to check later for detection in detections: f.write(detection.template_name+', '+str(detection.detect_time)+\ ', '+str(detection.detect_val)+', '+str(detection.threshold)+\ ', '+str(detection.no_chans)+'\n') del detections
def template_gen(method, lowcut, highcut, samp_rate, filt_order, length, prepick, swin="all", process_len=86400, all_horiz=False, delayed=True, plot=False, plotdir=None, return_event=False, min_snr=None, parallel=False, num_cores=False, save_progress=False, skip_short_chans=False, **kwargs): """ Generate processed and cut waveforms for use as templates. :type method: str :param method: Template generation method, must be one of ('from_client', 'from_seishub', 'from_sac', 'from_meta_file'). - Each method requires associated arguments, see note below. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will not apply a lowcut. :type highcut: float :param highcut: High cut (Hz), if set to None will not apply a highcut. :type samp_rate: float :param samp_rate: New sampling rate in Hz. :type filt_order: int :param filt_order: Filter level (number of corners). :type length: float :param length: Length of template waveform in seconds. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: P, S, P_all, S_all or all, defaults to all: see note in :func:`eqcorrscan.core.template_gen.template_gen` :type process_len: int :param process_len: Length of data in seconds to download and process. :type all_horiz: bool :param all_horiz: To use both horizontal channels even if there is only a pick on one of them. Defaults to False. :type delayed: bool :param delayed: If True, each channel will begin relative to it's own \ pick-time, if set to False, each channel will begin at the same time. :type plot: bool :param plot: Plot templates or not. :type plotdir: str :param plotdir: The path to save plots to. If `plotdir=None` (default) then the figure will be shown on screen. :type return_event: bool :param return_event: Whether to return the event and process length or not. :type min_snr: float :param min_snr: Minimum signal-to-noise ratio for a channel to be included in the template, where signal-to-noise ratio is calculated as the ratio of the maximum amplitude in the template window to the rms amplitude in the whole window given. :type parallel: bool :param parallel: Whether to process data in parallel or not. :type num_cores: int :param num_cores: Number of cores to try and use, if False and parallel=True, will use either all your cores, or as many traces as in the data (whichever is smaller). :type save_progress: bool :param save_progress: Whether to save the resulting templates at every data step or not. Useful for long-running processes. :type skip_short_chans: bool :param skip_short_chans: Whether to ignore channels that have insufficient length data or not. Useful when the quality of data is not known, e.g. when downloading old, possibly triggered data from a datacentre :returns: List of :class:`obspy.core.stream.Stream` Templates :rtype: list .. note:: By convention templates are generated with P-phases on the vertical channel and S-phases on the horizontal channels, normal seismograph naming conventions are assumed, where Z denotes vertical and N, E, R, T, 1 and 2 denote horizontal channels, either oriented or not. To this end we will **only** use Z channels if they have a P-pick, and will use one or other horizontal channels **only** if there is an S-pick on it. .. warning:: If there is no phase_hint included in picks, and swin=all, all channels with picks will be used. .. note:: If swin=all, then all picks will be used, not just phase-picks (e.g. it will use amplitude picks). If you do not want this then we suggest that you remove any picks you do not want to use in your templates before using the event. .. note:: *Method specific arguments:* - `from_client` requires: :param str client_id: string passable by obspy to generate Client, or any object with a `get_waveforms` method, including a Client instance. :param `obspy.core.event.Catalog` catalog: Catalog of events to generate template for :param float data_pad: Pad length for data-downloads in seconds - `from_seishub` requires: :param str url: url to seishub database :param `obspy.core.event.Catalog` catalog: Catalog of events to generate template for :param float data_pad: Pad length for data-downloads in seconds - `from_sac` requires: :param list sac_files: osbpy.core.stream.Stream of sac waveforms, or list of paths to sac waveforms. .. note:: See `eqcorrscan.utils.sac_util.sactoevent` for details on how pick information is collected. - `from_meta_file` requires: :param str meta_file: Path to obspy-readable event file, or an obspy Catalog :param `obspy.core.stream.Stream` st: Stream containing waveform data for template. Note that this should be the same length of stream as you will use for the continuous detection, e.g. if you detect in day-long files, give this a day-long file! :param bool process: Whether to process the data or not, defaults to True. .. note:: process_len should be set to the same length as used when computing detections using match_filter.match_filter, e.g. if you read in day-long data for match_filter, process_len should be 86400. .. rubric:: Example >>> from obspy.clients.fdsn import Client >>> from eqcorrscan.core.template_gen import template_gen >>> client = Client('NCEDC') >>> catalog = client.get_events(eventid='72572665', includearrivals=True) >>> # We are only taking two picks for this example to speed up the >>> # example, note that you don't have to! >>> catalog[0].picks = catalog[0].picks[0:2] >>> templates = template_gen( ... method='from_client', catalog=catalog, client_id='NCEDC', ... lowcut=2.0, highcut=9.0, samp_rate=20.0, filt_order=4, length=3.0, ... prepick=0.15, swin='all', process_len=300, all_horiz=True) >>> templates[0].plot(equal_scale=False, size=(800,600)) # doctest: +SKIP .. figure:: ../../plots/template_gen.from_client.png .. rubric:: Example >>> from obspy import read >>> from eqcorrscan.core.template_gen import template_gen >>> # Get the path to the test data >>> import eqcorrscan >>> import os >>> TEST_PATH = os.path.dirname(eqcorrscan.__file__) + '/tests/test_data' >>> st = read(TEST_PATH + '/WAV/TEST_/' + ... '2013-09-01-0410-35.DFDPC_024_00') >>> quakeml = TEST_PATH + '/20130901T041115.xml' >>> templates = template_gen( ... method='from_meta_file', meta_file=quakeml, st=st, lowcut=2.0, ... highcut=9.0, samp_rate=20.0, filt_order=3, length=2, prepick=0.1, ... swin='S', all_horiz=True) >>> print(len(templates[0])) 10 >>> templates = template_gen( ... method='from_meta_file', meta_file=quakeml, st=st, lowcut=2.0, ... highcut=9.0, samp_rate=20.0, filt_order=3, length=2, prepick=0.1, ... swin='S_all', all_horiz=True) >>> print(len(templates[0])) 15 .. rubric:: Example >>> from eqcorrscan.core.template_gen import template_gen >>> import glob >>> # Get all the SAC-files associated with one event. >>> sac_files = glob.glob(TEST_PATH + '/SAC/2014p611252/*') >>> templates = template_gen( ... method='from_sac', sac_files=sac_files, lowcut=2.0, highcut=10.0, ... samp_rate=25.0, filt_order=4, length=2.0, swin='all', prepick=0.1, ... all_horiz=True) >>> print(templates[0][0].stats.sampling_rate) 25.0 >>> print(len(templates[0])) 15 """ client_map = {'from_client': 'fdsn', 'from_seishub': 'seishub'} assert method in ('from_client', 'from_seishub', 'from_meta_file', 'from_sac') if not isinstance(swin, list): swin = [swin] process = True if method in ['from_client', 'from_seishub']: catalog = kwargs.get('catalog', Catalog()) data_pad = kwargs.get('data_pad', 90) # Group catalog into days and only download the data once per day sub_catalogs = _group_events( catalog=catalog, process_len=process_len, template_length=length, data_pad=data_pad) if method == 'from_client': client_id = kwargs.get('client_id', None) if hasattr(client_id, 'get_waveforms'): client = client_id elif isinstance(client_id, str): client = FDSNClient(client_id) else: raise NotImplementedError( "client_id must be an FDSN client string, or a Client " "with a get_waveforms method" ) available_stations = [] else: client = SeisHubClient(kwargs.get('url', None), timeout=10) available_stations = client.waveform.get_station_ids() elif method == 'from_meta_file': if isinstance(kwargs.get('meta_file'), Catalog): catalog = kwargs.get('meta_file') elif kwargs.get('meta_file'): catalog = read_events(kwargs.get('meta_file')) else: catalog = kwargs.get('catalog') sub_catalogs = [catalog] st = kwargs.get('st', Stream()) process = kwargs.get('process', True) elif method == 'from_sac': sac_files = kwargs.get('sac_files') if isinstance(sac_files, list): if isinstance(sac_files[0], (Stream, Trace)): # This is a list of streams... st = Stream(sac_files[0]) for sac_file in sac_files[1:]: st += sac_file else: sac_files = [read(sac_file)[0] for sac_file in sac_files] st = Stream(sac_files) else: st = sac_files # Make an event object... catalog = Catalog([sactoevent(st)]) sub_catalogs = [catalog] temp_list = [] process_lengths = [] catalog_out = Catalog() if "P_all" in swin or "S_all" in swin or all_horiz: all_channels = True else: all_channels = False for sub_catalog in sub_catalogs: if method in ['from_seishub', 'from_client']: Logger.info("Downloading data") st = _download_from_client( client=client, client_type=client_map[method], catalog=sub_catalog, data_pad=data_pad, process_len=process_len, available_stations=available_stations, all_channels=all_channels) Logger.info('Pre-processing data') st.merge() if len(st) == 0: Logger.info("No data") continue if process: data_len = max([len(tr.data) / tr.stats.sampling_rate for tr in st]) if 80000 < data_len < 90000: daylong = True starttime = min([tr.stats.starttime for tr in st]) min_delta = min([tr.stats.delta for tr in st]) # Cope with the common starttime less than 1 sample before the # start of day. if (starttime + min_delta).date > starttime.date: starttime = (starttime + min_delta) # Check if this is stupid: if abs(starttime - UTCDateTime(starttime.date)) > 600: daylong = False starttime = starttime.date else: daylong = False # Check if the required amount of data have been downloaded - skip # channels if arg set. for tr in st: if np.ma.is_masked(tr.data): _len = np.ma.count(tr.data) * tr.stats.delta else: _len = tr.stats.npts * tr.stats.delta if _len < process_len * .8: Logger.info( "Data for {0} are too short, skipping".format( tr.id)) if skip_short_chans: continue # Trim to enforce process-len tr.data = tr.data[0:int(process_len * tr.stats.sampling_rate)] if len(st) == 0: Logger.info("No data") continue if daylong: st = pre_processing.dayproc( st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order, samp_rate=samp_rate, parallel=parallel, starttime=UTCDateTime(starttime), num_cores=num_cores) else: st = pre_processing.shortproc( st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order, parallel=parallel, samp_rate=samp_rate, num_cores=num_cores) data_start = min([tr.stats.starttime for tr in st]) data_end = max([tr.stats.endtime for tr in st]) for event in sub_catalog: stations, channels, st_stachans = ([], [], []) if len(event.picks) == 0: Logger.warning( 'No picks for event {0}'.format(event.resource_id)) continue use_event = True # Check that the event is within the data for pick in event.picks: if not data_start < pick.time < data_end: Logger.warning( "Pick outside of data span: Pick time {0} Start " "time {1} End time: {2}".format( str(pick.time), str(data_start), str(data_end))) use_event = False if not use_event: Logger.error('Event is not within data time-span') continue # Read in pick info Logger.debug("I have found the following picks") for pick in event.picks: if not pick.waveform_id: Logger.warning( 'Pick not associated with waveforms, will not use:' ' {0}'.format(pick)) continue Logger.debug(pick) stations.append(pick.waveform_id.station_code) channels.append(pick.waveform_id.channel_code) # Check to see if all picks have a corresponding waveform for tr in st: st_stachans.append('.'.join([tr.stats.station, tr.stats.channel])) # Cut and extract the templates template = _template_gen( event.picks, st, length, swin, prepick=prepick, plot=plot, all_horiz=all_horiz, delayed=delayed, min_snr=min_snr, plotdir=plotdir) process_lengths.append(len(st[0].data) / samp_rate) temp_list.append(template) catalog_out += event if save_progress: if not os.path.isdir("eqcorrscan_temporary_templates"): os.makedirs("eqcorrscan_temporary_templates") for template in temp_list: template.write( "eqcorrscan_temporary_templates{0}{1}.ms".format( os.path.sep, template[0].stats.starttime.strftime( "%Y-%m-%dT%H%M%S")), format="MSEED") del st if return_event: return temp_list, catalog_out, process_lengths return temp_list
def from_client(catalog, client_id, lowcut, highcut, samp_rate, filt_order, length, prepick, swin, debug=0, plot=False): """ Generate multiplexed template from FDSN client. Function to generate templates from an FDSN client. Must be given \ an obspy.Catalog class and the client_id as input. The function returns \ a list of obspy.Stream classes containing steams for each desired \ template. :type catalog: obspy.core.event.Catalog :param catalog: Catalog class containing desired template events :type client_id: str :param client_id: Name of the client, either url, or Obspy \ mappable. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template\ defaults file :type highcut: float :param lowcut: High cut (Hz), if set to None will look in template\ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in\ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in\ template defaults file :type length: float :param length: Extract length in seconds, if None will look in template\ defaults file. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Plot templates or not. :returns: obspy.core.stream.Stream Newly cut template .. rubric:: Example >>> import obspy >>> if int(obspy.__version__.split('.')[0]) >= 1: ... from obspy.clients.fdsn import Client ... else: ... from obspy.fdsn import Client >>> from obspy.core.event import Catalog >>> from eqcorrscan.core.template_gen import from_client >>> client = Client('NCEDC') >>> catalog = client.get_events(eventid='72572665', includearrivals=True) >>> # We are only taking two picks for this example to speed up the example, >>> # note that you don't have to! >>> catalog[0].picks = catalog[0].picks[0:2] >>> templates = from_client(catalog=catalog, client_id='NCEDC', ... lowcut=2.0, highcut=9.0, samp_rate=20.0, ... filt_order=4, length=3.0, prepick=0.15, ... swin='all') Fetching the following traces from NCEDC BG.CLV..DPZ BK.BKS.00.HHZ Pre-processing data for event: quakeml:nc.anss.org/Event/NC/72572665 >>> templates[0].plot(equal_scale=False, size=(800,600)) # doctest: +SKIP .. figure:: ../../plots/template_gen.from_client.png """ # This import section copes with namespace changes between obspy versions import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy.clients.fdsn import Client from obspy.clients.fdsn.header import FDSNException else: from obspy.fdsn import Client from obspy.fdsn.header import FDSNException from eqcorrscan.utils import pre_processing from obspy import UTCDateTime import warnings client = Client(client_id) temp_list = [] for event in catalog: # Figure out which picks we have day = event.origins[0].time print("Fetching the following traces from " + client_id) dropped_pick_stations = 0 for pick in event.picks: net = pick.waveform_id.network_code sta = pick.waveform_id.station_code chan = pick.waveform_id.channel_code loc = pick.waveform_id.location_code starttime = UTCDateTime(pick.time.date) endtime = starttime + 86400 # Here we download a full day of data. We do this so that minor # differences in processing during processing due to the effect # of resampling do not impinge on our cross-correlations. if debug > 0: print('start-time: ' + str(starttime)) print('end-time: ' + str(endtime)) print('pick-time: ' + str(pick.time)) print('pick phase: ' + pick.phase_hint) print('.'.join([net, sta, loc, chan])) if 'st' not in locals(): try: st = client.get_waveforms(net, sta, loc, chan, starttime, endtime) except FDSNException: warnings.warn('Found no data for this station') dropped_pick_stations += 1 else: try: st += client.get_waveforms(net, sta, loc, chan, starttime, endtime) except FDSNException: warnings.warn('Found no data for this station') dropped_pick_stations += 1 if debug > 0: st.plot() if not st and dropped_pick_stations == len(event.picks): raise FDSNException('No data available, is the server down?') print('Pre-processing data for event: '+str(event.resource_id)) st.merge(fill_value='interpolate') st1 = pre_processing.dayproc(st, lowcut, highcut, filt_order, samp_rate, starttime=starttime, debug=debug, parallel=True) if debug > 0: st1.plot() template = _template_gen(event.picks, st1, length, swin, prepick, plot=plot, debug=debug) del st, st1 temp_list.append(template) return temp_list
def from_seishub(catalog, url, lowcut, highcut, samp_rate, filt_order, length, prepick, swin, debug=0, plot=False): """ Generate multiplexed template from SeisHub database. Function to generate templates from a SeisHub database. Must be given \ an obspy.Catalog class and the SeisHub url as input. The function returns \ a list of obspy.Stream classes containting steams for each desired \ template. :type catalog: obspy.core.event.Catalog :param catalog: Catalog class containing desired template events :type url: str :param url: url of SeisHub database instance :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param highcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Plot templates or not. :returns: obspy.core.stream.Stream Newly cut template """ # This import section copes with namespace changes between obspy versions import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy.clients.seishub import Client else: from obspy.seishub import Client from eqcorrscan.utils import pre_processing from obspy import UTCDateTime client = Client(url, timeout=10) temp_list = [] for event in catalog: # Figure out which picks we have day = event.origins[0].time picks = event.picks print("Fetching the following traces from SeisHub") for pick in picks: if pick.waveform_id.network_code: net = pick.waveform_id.network_code else: raise IOError('No network code defined for pick: ' + pick) if pick.waveform_id.station_code: sta = pick.waveform_id.station_code else: raise IOError('No station code defined for pick: ' + pick) if pick.waveform_id.channel_code: chan = pick.waveform_id.channel_code else: raise IOError('No channel code defined for pick: ' + pick) if pick.waveform_id.location_code: loc = pick.waveform_id.location_code else: loc = '*' starttime = UTCDateTime(pick.time.date) endtime = starttime + 86400 # Here we download a full day of data. We do this so that minor # differences in processing during processing due to the effect # of resampling do not impinge on our cross-correlations. if debug > 0: print('start-time: ' + str(starttime)) print('end-time: ' + str(endtime)) print('pick-time: ' + str(pick.time)) print('.'.join([net, sta, loc, chan])) if sta in client.waveform.get_station_ids(network=net): if 'st' not in locals(): st = client.waveform.get_waveform(net, sta, loc, chan, starttime, endtime) else: st += client.waveform.get_waveform(net, sta, loc, chan, starttime, endtime) else: print('Station not found in SeisHub DB') if len(st) == 0: raise IOError('No waveforms found') if debug > 0: st.plot() print('Preprocessing data for event: '+str(event.resource_id)) st.merge(fill_value='interpolate') st1 = pre_processing.dayproc(st, lowcut, highcut, filt_order, samp_rate, starttime=starttime, debug=debug) template = _template_gen(event.picks, st1, length, swin, prepick, plot=plot, debug=debug) del st, st1 temp_list.append(template) return temp_list
def from_quakeml(quakeml, st, lowcut, highcut, samp_rate, filt_order, length, prepick, swin, debug=0, plot=False): """ Generate a multiplexed template from a local quakeML file. Function to generate a template from a local quakeml file \ and an obspy.Stream object. :type quakeml: str :param quakeml: QuakeML file containing pick information, can contain \ multiple events. :type st: obspy.core.stream.Stream :param st: Stream containing waveform data for template (hopefully). \ Note that this should be the same length of stream as you will use \ for the continuous detection, e.g. if you detect in day-long files, \ give this a day-long file! :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param lowcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Display template plots or not :returns: list of obspy.Stream Newly cut templates .. warning:: We suggest giving this function a full day of data, to \ ensure templates are generated with **exactly** the same processing \ as the continuous data. Not doing this will result in slightly \ reduced cross-correlation values. .. rubric:: Example >>> from obspy import read >>> from eqcorrscan.core.template_gen import from_quakeml >>> st = read('eqcorrscan/tests/test_data/WAV/TEST_/' + ... '2013-09-01-0410-35.DFDPC_024_00') >>> quakeml = 'eqcorrscan/tests/test_data/20130901T041115.xml' >>> templates = from_quakeml(quakeml=quakeml, st=st, lowcut=2.0, ... highcut=9.0, samp_rate=20.0, filt_order=3, ... length=2, prepick=0.1, swin='S') >>> print(len(templates[0])) 15 """ # Perform some checks first import os import warnings if not os.path.isfile(quakeml): raise IOError('QuakeML file does not exist') import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy import read_events else: from obspy import readEvents as read_events from obspy import UTCDateTime from eqcorrscan.utils import pre_processing stations = [] channels = [] st_stachans = [] # Process waveform data st.merge(fill_value='interpolate') # Work out if the data are daylong or not... data_len = max([len(tr.data)/tr.stats.sampling_rate for tr in st]) if 80000 < data_len < 90000: daylong = True else: daylong = False if daylong: st = pre_processing.dayproc(st, lowcut, highcut, filt_order, samp_rate, debug=debug, starttime=UTCDateTime(st[0].stats. starttime.date)) else: st = pre_processing.shortproc(st, lowcut, highcut, filt_order, samp_rate, debug=debug) data_start = min([tr.stats.starttime for tr in st]) data_end = max([tr.stats.endtime for tr in st]) # Read QuakeML file into Catalog class catalog = read_events(quakeml) templates = [] for event in catalog: use_event = True # Check that the event is within the data for pick in event.picks: if not data_start < pick.time < data_end: if debug > 0: print('Pick outside of data span:') print('Pick time: ' + str(pick.time)) print('Start time: ' + str(data_start)) print('End time: ' + str(data_end)) use_event = False if not use_event: warnings.warn('Event is not within data time-span') continue # Read in pick info if debug > 0: print("I have found the following picks") for pick in event.picks: if debug > 0: print(' '.join([pick.waveform_id.station_code, pick.waveform_id.channel_code, pick.phase_hint, str(pick.time)])) stations.append(pick.waveform_id.station_code) channels.append(pick.waveform_id.channel_code) # Check to see if all picks have a corresponding waveform for tr in st: st_stachans.append('.'.join([tr.stats.station, tr.stats.channel])) for i in range(len(stations)): if not '.'.join([stations[i], channels[i]]) in st_stachans: warnings.warn('No data provided for ' + stations[i] + '.' + channels[i]) st1 = st.copy() # Cut and extract the templates template = _template_gen(event.picks, st1, length, swin, prepick=prepick, plot=plot, debug=debug) templates.append(template) return templates
def from_contbase(sfile, contbase_list, lowcut, highcut, samp_rate, filt_order, length, prepick, swin, debug=0, plot=False): r"""Function to read in picks from sfile then generate the template from \ the picks within this and the wavefiles from the continous database of \ day-long files. Included is a section to sanity check that the files are \ daylong and that they start at the start of the day. You should ensure \ this is the case otherwise this may alter your data if your data are \ daylong but the headers are incorrectly set. :type sfile: string :param sfile: sfilename must be the path to a seisan nordic type s-file \ containing waveform and pick information, all other arguments can \ be numbers save for swin which must be either P, S or all \ (case-sensitive). :type contbase_list: List of tuple of string :param contbase_list: List of tuples of the form \ ['path', 'type', 'network']. Where path is the path to the \ continuous database, type is the directory structure, which can be \ either Yyyyy/Rjjj.01, which is the standard IRIS Year, julian day \ structure, or, yyyymmdd which is a single directory for every day. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param lowcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Turns template plotting on or off. :returns: obspy.Stream Newly cut template """ # Perform some checks first import os if not os.path.isfile(sfile): raise IOError('sfile does not exist') # import some things from eqcorrscan.utils import pre_processing from eqcorrscan.utils import sfile_util import glob from obspy import read as obsread # Read in the header of the sfile event = sfile_util.readheader(sfile) day = event.origins[0].time # Read in pick info catalog = sfile_util.readpicks(sfile) picks = catalog[0].picks print("I have found the following picks") pick_chans = [] used_picks = [] for pick in picks: station = pick.waveform_id.station_code channel = pick.waveform_id.channel_code phase = pick.phase_hint pcktime = pick.time if station + channel not in pick_chans and phase in ['P', 'S']: pick_chans.append(station + channel) used_picks.append(pick) print(pick) # #########Left off here for contbase in contbase_list: if contbase[1] == 'yyyy/mm/dd': daydir = os.path.join([str(day.year), str(day.month).zfill(2), str(day.day).zfill(2)]) elif contbase[1] == 'Yyyyy/Rjjj.01': daydir = os.path.join(['Y' + str(day.year), 'R' + str(day.julday).zfill(3) + '.01']) elif contbase[1] == 'yyyymmdd': daydir = day.datetime.strftime('%Y%m%d') if 'wavefiles' not in locals(): wavefiles = (glob.glob(os.path.join([contbase[0], daydir, '*' + station + '.*']))) else: wavefiles += glob.glob(os.path.join([contbase[0], daydir, '*' + station + '.*'])) elif phase in ['P', 'S']: print(' '.join(['Duplicate pick', station, channel, phase, str(pcktime)])) elif phase == 'IAML': print(' '.join(['Amplitude pick', station, channel, phase, str(pcktime)])) picks = used_picks wavefiles = list(set(wavefiles)) # Read in waveform file wavefiles.sort() for wavefile in wavefiles: print("I am going to read waveform data from: " + wavefile) if 'st' not in locals(): st = obsread(wavefile) else: st += obsread(wavefile) # Process waveform data st.merge(fill_value='interpolate') for tr in st: tr = pre_processing.dayproc(tr, lowcut, highcut, filt_order, samp_rate, debug, day) # Cut and extract the templates st1 = _template_gen(picks, st, length, swin, prepick=prepick, plot=plot) return st1
def from_quakeml(quakeml, st, lowcut, highcut, samp_rate, filt_order, length, prepick, swin, debug=0, plot=False): r"""Function to generate a template from a local quakeml file \ and an obspy.Stream object. :type quakeml: string :param quakeml: QuakeML file containing pick information, can contain \ multiple events. :type st: class: obspy.Stream :param st: Stream containing waveform data for template (hopefully) :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param lowcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Display template plots or not :returns: list of obspy.Stream Newly cut templates """ # Perform some checks first import os import warnings if not os.path.isfile(quakeml): raise IOError('QuakeML file does not exist') import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy import read_events else: from obspy import readEvents as read_events from obspy import UTCDateTime from eqcorrscan.utils import pre_processing stations = [] channels = [] st_stachans = [] # Process waveform data st.merge(fill_value='interpolate') for tr in st: tr = pre_processing.dayproc(tr, lowcut, highcut, filt_order, samp_rate, debug, UTCDateTime(tr.stats.starttime.date)) # Read QuakeML file into Catalog class catalog = read_events(quakeml) templates = [] for event in catalog: # Read in pick info print("I have found the following picks") for pick in event.picks: print(' '.join([ pick.waveform_id.station_code, pick.waveform_id.channel_code, pick.phase_hint, str(pick.time) ])) stations.append(pick.waveform_id.station_code) channels.append(pick.waveform_id.channel_code) # Check to see if all picks have a corresponding waveform for tr in st: st_stachans.append('.'.join([tr.stats.station, tr.stats.channel])) for i in xrange(len(stations)): if not '.'.join([stations[i], channels[i]]) in st_stachans: warnings.warn('No data provided for ' + stations[i] + '.' + channels[i]) st1 = st.copy() # Cut and extract the templates template = _template_gen(event.picks, st1, length, swin, prepick=prepick, plot=plot) templates.append(template) return templates
def from_quakeml(quakeml, st, lowcut, highcut, samp_rate, filt_order, length, prepick, swin, debug=0, plot=False): r"""Function to generate a template from a local quakeml file \ and an obspy.Stream object. :type quakeml: string :param quakeml: QuakeML file containing pick information, can contain \ multiple events. :type st: class: obspy.Stream :param st: Stream containing waveform data for template (hopefully). \ Note that this should be the same length of stream as you will use \ for the continuous detection, e.g. if you detect in day-long files, \ give this a day-long file! :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param lowcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Display template plots or not :returns: list of obspy.Stream Newly cut templates .. warning:: We suggest giving this function a full day of data, to \ ensure templates are generated with **exactly** the same processing \ as the continuous data. Not doing this will result in slightly \ reduced cross-correlation values. """ # Perform some checks first import os import warnings if not os.path.isfile(quakeml): raise IOError('QuakeML file does not exist') import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy import read_events else: from obspy import readEvents as read_events from obspy import UTCDateTime from eqcorrscan.utils import pre_processing stations = [] channels = [] st_stachans = [] # Process waveform data st.merge(fill_value='interpolate') for tr in st: tr = pre_processing.dayproc(tr, lowcut, highcut, filt_order, samp_rate, debug=debug, starttime=UTCDateTime(tr.stats. starttime.date)) # Read QuakeML file into Catalog class catalog = read_events(quakeml) templates = [] for event in catalog: # Read in pick info print("I have found the following picks") for pick in event.picks: print(' '.join([pick.waveform_id.station_code, pick.waveform_id.channel_code, pick.phase_hint, str(pick.time)])) stations.append(pick.waveform_id.station_code) channels.append(pick.waveform_id.channel_code) # Check to see if all picks have a corresponding waveform for tr in st: st_stachans.append('.'.join([tr.stats.station, tr.stats.channel])) for i in xrange(len(stations)): if not '.'.join([stations[i], channels[i]]) in st_stachans: warnings.warn('No data provided for ' + stations[i] + '.' + channels[i]) st1 = st.copy() # Cut and extract the templates template = _template_gen(event.picks, st1, length, swin, prepick=prepick, plot=plot) templates.append(template) return templates
ds.q.channel == chans, ds.q.starttime >= q_start, ds.q.endtime <= q_end): st += station.raw_recording wav_read_stop = timer() print('Reading waveforms took %.3f seconds' % (wav_read_stop - wav_read_start)) merg_strt = timer() st.merge(fill_value='interpolate') merg_stp = timer() print('Merging took %.3f seconds' % (merg_stp - merg_strt)) proc_strt = timer() st1 = pre_processing.dayproc(st, lowcut=1.0, highcut=20.0, filt_order=3, samp_rate=50.0, starttime=dto, debug=2, parallel=True) del st proc_stp = timer() print('Pre-processing took %.3f seconds' % (proc_stp - proc_strt)) # Grab all detections from this day day_dets = [det for det in detect_list if det.detect_time.julday == day] # Select random sample from list of dets for plotting purposes rand_dets = [ day_dets[i] for i in np.random.choice(range(len(day_dets)), 30) ] # Create new catalog for day_dets new_cat = cat_util.detections_2_cat( day_dets,
if glob.glob(fname): if not 'stream' in locals(): stream=obsread(fname) else: stream+=obsread(fname) # Process the stream if not Test: print 'Processing the data' stream=stream.merge(fill_value='interpolate') # Merge stream so that each trace is a single channel to # send to pre-processing if not parallel: for tr in stream: # tr.plot() tr=pre_processing.dayproc(tr, brightedef.lowcut, brightdef.highcut,\ brightdef.filter_order, brightdef.samp_rate,\ templatedef.debug, day) else: stream=Parallel(n_jobs=10)(delayed(pre_processing.dayproc)(tr, brightdef.lowcut,\ brightdef.highcut,\ brightdef.filter_order,\ brightdef.samp_rate,\ templatedef.debug, day)\ for tr in stream) stream=Stream(stream) print stream if not Prep: #stream_copy=stream.copy() # Keep the stream safe print "Running the detection routine" # Check that the data are okay detect_templates, detect_nodes=bright_lights.brightness(stations, \
def cat_2_stefan_SAC(cat, inv, wav_dirs, outdir, start=None, end=None): """ Temp gen function for Stefan SAC files :param cat: :param wav_dirs: :param outdir: :param start: :param end: :return: """ import os from obspy import UTCDateTime import datetime from eqcorrscan.utils import pre_processing cat.events.sort(key=lambda x: x.origins[-1].time) if start: cat_start = datetime.datetime.strptime(start, '%d/%m/%Y') cat_end = datetime.datetime.strptime(end, '%d/%m/%Y') else: cat_start = cat[0].origins[-1].time.date cat_end = cat[-1].origins[-1].time.date for date in date_generator(cat_start, cat_end): dto = UTCDateTime(date) print('Processing templates for: %s' % str(dto)) # Establish which events are in this day sch_str_start = 'time >= %s' % str(dto) sch_str_end = 'time <= %s' % str(dto + 86400) tmp_cat = cat.filter(sch_str_start, sch_str_end) if len(tmp_cat) == 0: print('No events on: %s' % str(dto)) continue stations = list( set([ pk.waveform_id.station_code for ev in tmp_cat for pk in ev.picks ])) wav_ds = ['%s%d' % (d, dto.year) for d in wav_dirs] sta_st = grab_day_wavs_stations(wav_ds, dto, stations) print('Processing data:') # Process the stream try: st1 = pre_processing.dayproc(sta_st, lowcut=None, highcut=None, filt_order=None, samp_rate=100., starttime=dto, debug=0, ignore_length=True, num_cores=2) except NotImplementedError or Exception as e: print('Found error in dayproc, noting date and continuing') print(e) with open('%s/dayproc_errors.txt' % outdir, mode='a') as fo: fo.write('%s\n%s\n' % (str(date), e)) continue for event in tmp_cat: if len(event.picks) < 5: print('Too few picks for event. Continuing.') continue ev_name = str(event.resource_id).split('/')[-1] if not os.path.exists('%s/%s' % (outdir, ev_name)): os.mkdir('%s/%s' % (outdir, ev_name)) elif os.path.exists('%s/%s' % (outdir, ev_name)): print('Event already written. Moving to next.') continue big_o = event.origins[-1] ev_time = big_o.time tr_starttime = ev_time - 5 tr_endtime = ev_time + 25 for pick in event.picks: # Only take waveforms for stations with P-picks # Take all channels for these stations # Stefan will make S-picks himself pk_sta = pick.waveform_id.station_code if pick.phase_hint != 'P': continue # Grab just this station from whole day stream sta_wavs = st1.select(station=pk_sta) # Copy it out of the way and trim work_st = sta_wavs.copy().trim(tr_starttime, tr_endtime) if len(work_st) == 0: continue rel_origin_t = ev_time - work_st[0].stats.starttime # Grab stationXML sta_inv = inv.select(station=pick.waveform_id.station_code) for tr in work_st: stachan = '%s.%s' % (tr.stats.station, tr.stats.channel) print('Populating SAC header for ' + stachan) # For each trace manually set the ref time to origin # Create SAC dictionary tr.stats.sac = {} # Reference times (note microsec --> millisec change) tr.stats['sac']['nzyear'] = tr_starttime.year tr.stats['sac']['nzjday'] = tr_starttime.julday tr.stats['sac']['nzhour'] = tr_starttime.hour tr.stats['sac']['nzmin'] = tr_starttime.minute tr.stats['sac']['nzsec'] = tr_starttime.second tr.stats['sac']['nzmsec'] = int(tr_starttime.microsecond // 1000) # Origin time in relation to relative time tr.stats['sac']['o'] = rel_origin_t tr.stats['sac']['iztype'] = 9 # Event info tr.stats['sac']['evdp'] = big_o.depth / 1000 tr.stats['sac']['evla'] = big_o.latitude tr.stats['sac']['evlo'] = big_o.longitude # Network/Station info tr.stats['sac']['knetwk'] = sta_inv[0].code tr.stats['sac']['kstnm'] = sta_inv[0][0].code tr.stats['sac']['stla'] = sta_inv[0][0].latitude tr.stats['sac']['stlo'] = sta_inv[0][0].longitude tr.stats['sac']['stel'] = sta_inv[0][0].elevation # Channel specific info for chan in sta_inv[0][0]: if chan.code == tr.stats.channel: tr.stats['sac']['stdp'] = chan.depth tr.stats['sac']['cmpaz'] = chan.azimuth tr.stats['sac']['kcmpnm'] = chan.code # SAC cmpinc is deg from vertical (not horiz) if chan.dip == -90.0: tr.stats['sac']['cmpinc'] = 180.0 tr.stats['sac']['lpspol'] = False elif chan.dip == 90.0: tr.stats['sac']['cmpinc'] = 0.0 tr.stats['sac']['lpspol'] = True else: tr.stats['sac']['cmpinc'] = 90.0 # Assign the pick time and type if exists if tr.stats.channel == pick.waveform_id.channel_code and \ pick.phase_hint == 'P': print('Writing pick to "a" header') tr.stats['sac']['a'] = pick.time - tr.stats.starttime tr.stats['sac']['ka'] = pick.phase_hint elif tr.stats.channel == pick.waveform_id.channel_code and \ pick.phase_hint == 'S': tr.stats['sac']['t0'] = pick.time - tr.stats.starttime tr.stats['sac']['kt0'] = pick.phase_hint else: print('No pick on %s' % stachan) filename = '%s/%s/%s%s_%s_%s.sac' % ( outdir, ev_name, ev_name, tr.stats.network, tr.stats.station, tr.stats.channel) print('Writing event ' + filename + ' to file...') tr.write(filename, format="SAC") return
def test_match_filter(self, samp_rate=20.0, debug=0): """ Function to test the capabilities of match_filter and just check that \ it is working! Uses synthetic templates and seeded, randomised data. :type debug: int :param debug: Debug level, higher the number the more output. """ from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter from eqcorrscan.utils.synth_seis import generate_synth_data from obspy import UTCDateTime import string # Generate a random dataset templates, data, seeds = generate_synth_data(nsta=5, ntemplates=2, nseeds=50, samp_rate=samp_rate, t_length=6.0, max_amp=5.0, debug=debug) # Notes to the user: If you use more templates you should ensure they # are more different, e.g. set the data to have larger moveouts, # otherwise similar templates will detect events seeded by another # template. # Test the pre_processing functions data = pre_processing.dayproc(st=data, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate, debug=0, starttime=UTCDateTime(0)) if debug > 0: data.plot() # Filter the data and the templates for template in templates: pre_processing.shortproc(st=template, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate) if debug > 0: template.plot() template_names = list(string.ascii_lowercase)[0:len(templates)] detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=data, threshold=10.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1, debug=0) # Compare the detections to the seeds print('This test made ' + str(len(detections)) + ' detections') ktrue = 0 kfalse = 0 for detection in detections: print(detection.template_name) i = template_names.index(detection.template_name) t_seeds = seeds[i] dtime_samples = int((detection.detect_time - UTCDateTime(0)) * samp_rate) if dtime_samples in t_seeds['time']: j = list(t_seeds['time']).index(dtime_samples) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: min_diff = min(abs(t_seeds['time'] - dtime_samples)) if min_diff < 10: # If there is a match within ten samples then it is # good enough j = list(abs(t_seeds['time'] - dtime_samples)).index(min_diff) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: print('Detection at sample: ' + str(dtime_samples) + ' does not match anything in seed times:') kfalse += 1 print('Minimum difference in samples is: ' + str(min_diff)) # Plot the detections if debug > 3: for i, template in enumerate(templates): times = [d.detect_time.datetime for d in detections if d.template_name == template_names[i]] print(times) plotting.detection_multiplot(data, template, times) # Set an 'acceptable' ratio of positive to false detections print(str(ktrue) + ' true detections and ' + str(kfalse) + ' false detections') self.assertTrue(kfalse / ktrue < 0.25)
def run(): """Internal run function so that this can be called from interactive \ python session for debugging.""" from eqcorrscan.utils import pre_processing from eqcorrscan.utils.archive_read import read_data from eqcorrscan.core.match_filter import match_filter from obspy import UTCDateTime, Stream from eqcorrscan.utils.parameters import read_parameters import warnings import os import datetime as dt from obspy import read import copy # Read parameter files par = read_parameters('../parameters/VSP_parameters.txt') # Log the input parameters log_name = ('EQcorrscan_detection_log_' + dt.datetime.now().strftime('%Y.%j.%H:%M:%S') + '.log') f = open(os.path.join('..', 'detections', log_name), 'w') for parameter in par.__dict__.keys(): f.write(parameter + ': ' + str(par.__dict__.get(parameter)) + '\n') f.write('\n###################################\n') f.write('template, detect-time, cccsum, threshold, number of channels\n') days = (par.enddate.date - par.startdate.date).days dates = [par.startdate + (i * 86400) for i in range(days)] # Read in templates templates = [ read(os.path.join('..', 'templates', template)) for template in par.template_names ] # We don't need the full file path in the match-filter routine, just the # final 'name' template_names_short = [ t_name.split(os.sep)[-1] for t_name in par.template_names ] warnings.warn('Unable to check whether filters are correct in templates') # Check that the sampling rate is correct... for st in templates: for tr in st: if not tr.stats.sampling_rate == par.samp_rate: msg = 'Template sampling rate is not correct: ' + tr.__str__() raise IOError(msg) # Work out which stations and channels we will be using stachans = [(tr.stats.station, tr.stats.channel) for st in templates for tr in st] stachans = list(set(stachans)) # Loop through days for date in dates: # Read in the data st = read_data(par.archive, par.arc_type, date.date, stachans) # Process the data st.merge(fill_value='interpolate') st = pre_processing.dayproc(st, lowcut=par.lowcut, highcut=par.highcut, filt_order=par.filt_order, samp_rate=par.samp_rate, debug=par.debug, starttime=UTCDateTime(date.date)) # Will remove templates if they are deemed useless # (eg no matching channels) template_names_short_copy = copy.deepcopy(template_names_short) templates_copy = copy.deepcopy(templates) # Now conduct matched-filter detections = match_filter(template_names=template_names_short_copy, template_list=templates_copy, st=st, threshold=par.threshold, threshold_type=par.threshold_type, trig_int=par.trigger_interval, plotvar=par.plotvar, plotdir=par.plotdir, cores=par.cores, tempdir=par.tempdir, debug=par.debug, plot_format=par.plot_format) # Log the output for detection in detections: f.write(', '.join([ detection.template_name, str(detection.detect_time), str(detection.detect_val), str(detection.threshold), str(detection.no_chans) + '\n' ])) f.close()
wavefiles=glob.glob('test_data/tutorial_data/'+\ pick.station+'.*') else: wavefiles+=glob.glob('test_data/tutorial_data/'+\ pick.station+'.*') wavefiles=list(set(wavefiles)) for wavefile in wavefiles: print 'Reading data from '+wavefile if not 'st' in locals(): st=read(wavefile) else: st+=read(wavefile) st=st.merge(fill_value='interpolate') day=st[0].stats.starttime.date for tr in st: tr=pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\ matchdef.debug, day) # Apply a small amoutn of delay before the pick for pick in picks: pick.time=pick.time-0.1 template=template_gen._template_gen(picks, st, 1.0, 'all',\ tempdef=templatedef) # This will generate an obspy.Stream object # Append this Stream to the list of templates templates+=[template] template_names.append('tutorial_'+str(i)) # Plot the template just to check that all is well! template.plot(size=(800,600), equal_scale=False) # Save template for later template.write('test_data/tutorial_data/'+template_names[i]+'_template.ms',\ format='MSEED') i+=1
def plot_network_arrivals(wav_dirs, lowcut, highcut, start, end, sta_list=None, remove_resp=False, inv=None, dto=None, ev=None): """ Plot data for the whole network at a given dto This is intended for plotting teleseismic arrivals to check polarities :param dto: :param wav_dirs: :return: """ if not sta_list: sta_list = [ 'ALRZ', 'ARAZ', 'HRRZ', 'NS01', 'NS02', 'NS03', 'NS04', 'NS05', 'NS06', 'NS07', 'NS08', 'NS09', 'NS10', 'NS11', 'NS12', 'NS13', 'NS14', 'NS15', 'NS16', 'NS18', 'PRRZ', 'RT01', 'RT02', 'RT03', 'RT05', 'RT06', 'RT07', 'RT08', 'RT09', 'RT10', 'RT11', 'RT12', 'RT13', 'RT14', 'RT15', 'RT16', 'RT17', 'RT18', 'RT19', 'RT20', 'RT21', 'RT22', 'RT23', 'THQ2', 'WPRZ' ] stachans = {sta: ['EHZ'] for sta in sta_list} if ev: dto = ev.origins[-1].time # Get start of day dto_start = copy.deepcopy(dto) dto_start.hour = 0 dto_start.minute = 0 dto_start.second = 0 dto_start.microsecond = 0 st = grab_day_wavs(wav_dirs, dto_start, stachans) pf_dict = { 'MERC': [0.001, 1.0, 35., 45.], 'WPRZ': [0.001, 0.5, 35., 45.], 'GEONET': [0.001, 0.01, 40., 48.] } st.traces.sort( key=lambda x: inv.select(station=x.stats.station)[0][0].latitude) st1 = pre_processing.dayproc(st, lowcut, highcut, 3, 100., starttime=dto, num_cores=4) trimmed = st1.trim(starttime=dto + start, endtime=dto + end) for tr in trimmed: sta = tr.stats.station if sta.endswith('Z'): if sta == 'WPRZ': prefilt = pf_dict['WPRZ'] else: prefilt = pf_dict['GEONET'] else: prefilt = pf_dict['MERC'] if remove_resp: # Cosine taper and demeaning applied by default tr.remove_response(inventory=inv, pre_filt=prefilt, output='DISP') labels = [] for tr in trimmed: labels.append(tr.stats.station) tr.data = tr.data / max(tr.data) fig, ax = plt.subplots(figsize=(3, 6)) vert_steps = np.linspace(0, len(trimmed), len(trimmed)) for tr, vert_step in zip(trimmed, vert_steps): ax.plot(tr.data + vert_step, color='k', linewidth=0.3) ax.yaxis.set_ticks(vert_steps) ax.set_yticklabels(labels, fontsize=8) plt.show() return