def test_same_detections_individual_and_parallel(self): """ Check that the same detections are made regardless of whether templates are run together or separately. """ individual_detections = [] for template, template_name in zip(self.templates, self.template_names): individual_detections += match_filter( template_names=[template_name], template_list=[template], st=self.st.copy(), threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1) individual_dict = [] for detection in individual_detections: individual_dict.append({'template_name': detection.template_name, 'time': detection.detect_time, 'cccsum': detection.detect_val}) detections = match_filter(template_names=self.template_names, template_list=self.templates, st=self.st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1) self.assertEqual(len(individual_detections), len(detections)) for detection in detections: detection_dict = {'template_name': detection.template_name, 'time': detection.detect_time, 'cccsum': detection.detect_val} self.assertTrue(detection_dict in individual_dict)
def test_non_equal_template_lengths(self): templates = [self.templates[0].copy()] templates[0][0].data = np.concatenate([templates[0][0].data, np.random.randn(10)]) with self.assertRaises(MatchFilterError): match_filter(template_names=[self.template_names[0]], template_list=templates, st=self.st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1)
def test_no_matching_data(self): """ No matching data between continuous and templates.""" st = self.st.copy() for tr, staname in zip(st, ['a', 'b', 'c', 'd', 'e']): tr.stats.station = staname with self.assertRaises(IndexError): match_filter(template_names=self.template_names, template_list=self.templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1)
def test_masked_template(self): templates = [self.templates[0].copy()] tr = templates[0][0].copy() tr.stats.starttime += 3600 templates[0] += tr templates[0].merge() with self.assertRaises(MatchFilterError): match_filter(template_names=[self.template_names[0]], template_list=templates, st=self.st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1)
def test_duplicate_cont_data(self): """ Check that error is raised if duplicate channels are present in the continuous data.""" tr = self.st[0].copy() tr.data = np.random.randn(100) st = self.st.copy() + tr with self.assertRaises(MatchFilterError): match_filter(template_names=self.template_names, template_list=self.templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1)
def predict(self, dataset_path, template_path, catalog_path): template = load_stream(template_path) stream = load_stream(dataset_path) try: print self.beta except AttributeError: self.beta = 8.5 templates = [template] template_name = [os.path.split(template_path)[-1].split('.')[0]] day_streams = self._split_stream_into_day_streams(stream) print '-------' print ' + Running template matching method on test set' detections = [] for st in day_streams: detections.append(mf.match_filter(template_names=template_name, template_list=templates, st=st, threshold=self.beta, threshold_type='MAD', trig_int=1.0, plotvar=False)) # Flatten list of detections self.detections = [d for detection in detections for d in detection]
def fit(self, dataset_path, template_path, catalog_path): """ Detect events in stream for various beta and find the optimal beta parameter """ template = load_stream(template_path) stream = load_stream(dataset_path) templates = [template] template_name = [os.path.split(template_path)[-1].split('.')[0]] day_streams = self._split_stream_into_day_streams(stream) detection_results = np.zeros(len(self._betas)) for k in range(len(self._betas)): beta = self._betas[k] print '------' print ' + Running template matching method for beta =',beta detections = [] for st in day_streams: detections.append(mf.match_filter(template_names=template_name, template_list=templates, st=st, threshold=beta, threshold_type='MAD', trig_int=1.0, plotvar=False)) # Flatten list of detections detections = [d for detection in detections for d in detection] false_pos, false_neg = self.score(detections, catalog_path) print 'FP: {}, FN: {}'.format(false_pos, false_neg) detection_results[k] = false_pos + false_neg self.beta = self._betas[np.argmin(detection_results)]
def test_synth_large(): print('\tGenerating Synthetic data\n\n') templates, data, seeds = synth_seis.generate_synth_data( nsta=10, ntemplates=20, nseeds=5, samp_rate=100, t_length=6, max_amp=5, max_lag=5, debug=0) print('\tRunning the parallel detections\n\n') with Timer() as t: detections = match_filter( template_names=[str(i) for i in range(len(templates))], template_list=templates, st=data, threshold=8, threshold_type='MAD', trig_int=6, plotvar=False, cores=4, output_event=False) print('Parallel run took %f seconds' % t.secs) print('\tRunning the serial detections\n\n') with Timer() as t: detections = match_filter( template_names=[str(i) for i in range(len(templates))], template_list=templates, st=data, threshold=8, threshold_type='MAD', trig_int=6, plotvar=False, cores=None, output_event=False) print('Serial run took %f seconds' % t.secs)
def test_catalog_extraction(self): detections, det_cat, detection_streams = \ match_filter(template_names=self.template_names, template_list=self.templates, st=self.st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1, extract_detections=True, output_cat=True) self.assertEqual(len(detections), 4) self.assertEqual(len(detection_streams), len(detections)) self.assertEqual(len(detection_streams), len(det_cat))
def test_missing_cont_channel(self): """ Remove one channel from continuous data and check that everything still works. """ st = self.st.copy() st.remove(st[-1]) detections, det_cat = match_filter( template_names=self.template_names, template_list=self.templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1, output_cat=True) self.assertEqual(len(detections), 1) self.assertEqual(detections[0].no_chans, 5) self.assertEqual(len(detections), len(det_cat))
def test_duplicate_channels_in_template(self): """ Test using a template with duplicate channels. """ templates = copy.deepcopy(self.templates) # Do this to test an extra condition in match_filter templates[0].remove(templates[0].select(station='CNGZ')[0]) detections = match_filter(template_names=self.template_names, template_list=templates, st=self.st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1) self.assertEqual(len(detections), 1) self.assertEqual(detections[0].no_chans, 6)
def run(): """Internal run function so that this can be called from interactive \ python session for debugging.""" from eqcorrscan.utils import pre_processing from eqcorrscan.utils.archive_read import read_data from eqcorrscan.core.match_filter import match_filter from obspy import UTCDateTime, Stream from eqcorrscan.utils.parameters import read_parameters import warnings import os import datetime as dt from obspy import read import copy # Read parameter files par = read_parameters('../parameters/VSP_parameters.txt') # Log the input parameters log_name = ('EQcorrscan_detection_log_' + dt.datetime.now().strftime('%Y.%j.%H:%M:%S') + '.log') f = open(os.path.join('..', 'detections', log_name), 'w') for parameter in par.__dict__.keys(): f.write(parameter + ': ' + str(par.__dict__.get(parameter)) + '\n') f.write('\n###################################\n') f.write('template, detect-time, cccsum, threshold, number of channels\n') days = (par.enddate.date - par.startdate.date).days dates = [par.startdate + (i * 86400) for i in range(days)] # Read in templates templates = [ read(os.path.join('..', 'templates', template)) for template in par.template_names ] # We don't need the full file path in the match-filter routine, just the # final 'name' template_names_short = [ t_name.split(os.sep)[-1] for t_name in par.template_names ] warnings.warn('Unable to check whether filters are correct in templates') # Check that the sampling rate is correct... for st in templates: for tr in st: if not tr.stats.sampling_rate == par.samp_rate: msg = 'Template sampling rate is not correct: ' + tr.__str__() raise IOError(msg) # Work out which stations and channels we will be using stachans = [(tr.stats.station, tr.stats.channel) for st in templates for tr in st] stachans = list(set(stachans)) # Loop through days for date in dates: # Read in the data st = read_data(par.archive, par.arc_type, date.date, stachans) # Process the data st.merge(fill_value='interpolate') st = pre_processing.dayproc(st, lowcut=par.lowcut, highcut=par.highcut, filt_order=par.filt_order, samp_rate=par.samp_rate, debug=par.debug, starttime=UTCDateTime(date.date)) # Will remove templates if they are deemed useless # (eg no matching channels) template_names_short_copy = copy.deepcopy(template_names_short) templates_copy = copy.deepcopy(templates) # Now conduct matched-filter detections = match_filter(template_names=template_names_short_copy, template_list=templates_copy, st=st, threshold=par.threshold, threshold_type=par.threshold_type, trig_int=par.trigger_interval, plotvar=par.plotvar, plotdir=par.plotdir, cores=par.cores, tempdir=par.tempdir, debug=par.debug, plot_format=par.plot_format) # Log the output for detection in detections: f.write(', '.join([ detection.template_name, str(detection.detect_time), str(detection.detect_val), str(detection.threshold), str(detection.no_chans) + '\n' ])) f.close()
print('Merging took %.3f seconds' % (merg_stp - merg_strt)) proc_strt = timer() st1 = pre_processing.dayproc(st, lowcut=1.0, highcut=20.0, filt_order=3, samp_rate=50.0, starttime=dto, debug=2, parallel=True, as_float32=True) del st proc_stp = timer() print('Pre-processing took %.3f seconds' % (proc_stp - proc_strt)) # RUN MATCH FILTER (looping through chunks of templates due to RAM) chunk_size = len(templates) // 40 chunk_temps = [templates[i:i+chunk_size] for i in range(0, len(templates), chunk_size)] chunk_temp_names = [template_names[i:i+chunk_size] for i in range(0, len(template_names), chunk_size)] for temps, temp_names in itertools.izip(chunk_temps, chunk_temp_names): detections = match_filter.match_filter(temp_names, temps, st1, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, cores='all', debug=2) # Write detections to a file to check later for detection in detections: det_writer.writerow([detection.template_name, detection.detect_time, detection.detect_val, detection.threshold, detection.no_chans]) del detections #Print out runtime script_end = timer() print('Instance took %.3f seconds' % (script_end - script_start))
# Work out what day we are working on, required as we will pad the data to be daylong day = st[0].stats.starttime.date # Process the data in the same way as the template for tr in st: tr = pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\ debug, day) # Compute detections detections = match_filter.match_filter(template_names, templates, st, threshold, threshtype, trig_int, plotvar=True, cores=2, tempdir=False, debug=debug, plot_format='pdf') # We now have a list of detections! We can output these to a file to check later f = open('tutorial_detections.csv', 'w') for detection in detections: line = ', '.join([ detection.template_name, str(detection.detect_time), str(detection.detect_val), str(detection.threshold), str(detection.no_chans)
if not 'st' in locals(): st=read('test_data/tutorial_data/'+stachan[0]+'.*..*'+stachan[1][-1]+'.*') else: st+=read('test_data/tutorial_data/'+stachan[0]+'.*..*'+stachan[1][-1]+'.*') # Merge the data to account for miniseed files being written in chunks # We need continuous day-long data, so data are padded if there are gaps st=st.merge(fill_value='interpolate') # Work out what day we are working on, required as we will pad the data to be daylong day=st[0].stats.starttime.date # Process the data in the same way as the template for tr in st: tr=pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\ matchdef.debug, day) # Compute detections detections=match_filter.match_filter(template_names, templates, st,\ matchdef.threshold, matchdef.threshtype,\ matchdef.trig_int, True,\ 'temp_0') # We now have a list of detections! We can output these to a file to check later f=open('tutorial_detections.csv','w') for detection in detections: f.write(detection.template_name+', '+str(detection.detect_time)+\ ', '+str(detection.detect_val)+', '+str(detection.threshold)+\ ', '+str(detection.no_chans)+'\n') f.close()
def test_match_filter(self, samp_rate=20.0, debug=0): """ Function to test the capabilities of match_filter and just check that \ it is working! Uses synthetic templates and seeded, randomised data. :type debug: int :param debug: Debug level, higher the number the more output. """ from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter from eqcorrscan.utils.synth_seis import generate_synth_data from obspy import UTCDateTime import string # Generate a random dataset templates, data, seeds = generate_synth_data(nsta=5, ntemplates=2, nseeds=50, samp_rate=samp_rate, t_length=6.0, max_amp=5.0, debug=debug) # Notes to the user: If you use more templates you should ensure they # are more different, e.g. set the data to have larger moveouts, # otherwise similar templates will detect events seeded by another # template. # Test the pre_processing functions data = pre_processing.dayproc(st=data, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate, debug=0, starttime=UTCDateTime(0)) if debug > 0: data.plot() # Filter the data and the templates for template in templates: pre_processing.shortproc(st=template, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate) if debug > 0: template.plot() template_names = list(string.ascii_lowercase)[0:len(templates)] detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=data, threshold=10.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1, debug=0) # Compare the detections to the seeds print('This test made ' + str(len(detections)) + ' detections') ktrue = 0 kfalse = 0 for detection in detections: print(detection.template_name) i = template_names.index(detection.template_name) t_seeds = seeds[i] dtime_samples = int((detection.detect_time - UTCDateTime(0)) * samp_rate) if dtime_samples in t_seeds['time']: j = list(t_seeds['time']).index(dtime_samples) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: min_diff = min(abs(t_seeds['time'] - dtime_samples)) if min_diff < 10: # If there is a match within ten samples then it is # good enough j = list(abs(t_seeds['time'] - dtime_samples)).index(min_diff) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: print('Detection at sample: ' + str(dtime_samples) + ' does not match anything in seed times:') kfalse += 1 print('Minimum difference in samples is: ' + str(min_diff)) # Plot the detections if debug > 3: for i, template in enumerate(templates): times = [d.detect_time.datetime for d in detections if d.template_name == template_names[i]] print(times) plotting.detection_multiplot(data, template, times) # Set an 'acceptable' ratio of positive to false detections print(str(ktrue) + ' true detections and ' + str(kfalse) + ' false detections') self.assertTrue(kfalse / ktrue < 0.25)
def run_tutorial(plot=False): """Main function to run the tutorial dataset.""" from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter import glob # This import section copes with namespace changes between obspy versions import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy.clients.fdsn import Client else: from obspy.fdsn import Client from obspy import UTCDateTime, Stream, read # First we want to load our templates template_names = glob.glob('tutorial_template_*.ms') if len(template_names) == 0: raise IOError('Template files not found, have you run the template ' + 'creation tutorial?') templates = [read(template_name) for template_name in template_names] # Work out what stations we have and get the data for them stations = [] for template in templates: for tr in template: stations.append((tr.stats.station, tr.stats.channel)) # Get a unique list of stations stations = list(set(stations)) # We are going to look for detections on the day of our template, however, to # generalize, we will write a loop through the days between our templates, in # this case that is only one day. template_days = [] for template in templates: template_days.append(template[0].stats.starttime.date) template_days = sorted(template_days) kdays = (template_days[-1] - template_days[0]).days + 1 unique_detections = [] for i in range(kdays): t1 = UTCDateTime(template_days[0]) + (86400 * i) t2 = t1 + 86400 # Generate the bulk information to query the GeoNet database bulk_info = [] for station in stations: bulk_info.append(('NZ', station[0], '*', station[1][0] + 'H' + station[1][-1], t1, t2)) # Set up a client to access the GeoNet database client = Client("GEONET") # Note this will take a little while. print('Downloading seismic data, this may take a while') st = client.get_waveforms_bulk(bulk_info) # Merge the stream, it will be downloaded in chunks st.merge(fill_value='interpolate') # Work out what data we actually have to cope with possible lost data stations = list(set([tr.stats.station for tr in st])) # Set how many cores we want to parallel across, we will set this to four # as this is the number of templates, if your machine has fewer than four # cores/CPUs the multiprocessing will wait until there is a free core. # Setting this to be higher than the number of templates will have no # increase in speed as only detections for each template are computed in # parallel. It may also slow your processing by using more memory than # needed, to the extent that swap may be filled. ncores = 4 # Pre-process the data to set frequency band and sampling rate # Note that this is, and MUST BE the same as the parameters used for the # template creation. print('Processing the seismic data') st = pre_processing.dayproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, debug=0, starttime=t1, num_cores=ncores) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=plot, plotdir='.', cores=ncores, tempdir=False, debug=1, plot_format='jpg') # Now lets try and work out how many unique events we have just to compare # with the GeoNet catalog of 20 events on this day in this sequence for master in detections: keep = True for slave in detections: if not master == slave and\ abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False break if keep: unique_detections.append(master) print('We made a total of ' + str(len(unique_detections)) + ' detections') for detection in unique_detections: print('Detection at :' + str(detection.detect_time) + ' for template ' + detection.template_name + ' with a cross-correlation sum of: ' + str(detection.detect_val)) # We can plot these too if plot: stplot = st.copy() template = templates[template_names.index(detection.template_name)] lags = sorted([tr.stats.starttime for tr in template]) maxlag = lags[-1] - lags[0] stplot.trim(starttime=detection.detect_time - 10, endtime=detection.detect_time + maxlag + 10) plotting.detection_multiplot(stplot, template, [detection.detect_time.datetime]) return unique_detections
st += read(data_file) # Merge the data to account for miniseed files being written in chunks # We need continuous day-long data, so data are padded if there are gaps st = st.merge(fill_value='interpolate') # Work out what day we are working on, required as we will pad the data to be daylong day = st[0].stats.starttime.date # Process the data in the same way as the template for tr in st: tr = pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\ debug, day) # Compute detections detections = match_filter.match_filter(template_names, templates, st, threshold, threshtype, trig_int, plotvar=True, cores=2, tempdir=False, debug=debug, plot_format='pdf') # We now have a list of detections! We can output these to a file to check later f = open('tutorial_detections.csv', 'w') for detection in detections: line = ', '.join([detection.template_name, str(detection.detect_time), str(detection.detect_val), str(detection.threshold), str(detection.no_chans)]) f.write(line) print line f.write(os.linesep) f.close()
def test_match_filter(self, samp_rate=20.0, debug=0): """ Function to test the capabilities of match_filter and just check that \ it is working! Uses synthetic templates and seeded, randomised data. :type debug: int :param debug: Debug level, higher the number the more output. """ from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter from eqcorrscan.utils.synth_seis import generate_synth_data from obspy import UTCDateTime import string # Generate a random dataset templates, data, seeds = generate_synth_data(nsta=5, ntemplates=2, nseeds=50, samp_rate=samp_rate, t_length=6.0, max_amp=5.0, max_lag=12.0, debug=debug) # Notes to the user: If you use more templates you should ensure they # are more different, e.g. set the data to have larger moveouts, # otherwise similar templates will detect events seeded by another # template. # Test the pre_processing functions data = pre_processing.dayproc(st=data, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate, debug=0, starttime=UTCDateTime(0)) if debug > 0: data.plot() # Filter the data and the templates for template in templates: pre_processing.shortproc(st=template, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate) if debug > 0: template.plot() template_names = list(string.ascii_lowercase)[0:len(templates)] detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=data, threshold=10.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1, debug=0) # Compare the detections to the seeds print('This test made ' + str(len(detections)) + ' detections') ktrue = 0 kfalse = 0 for detection in detections: print(detection) i = template_names.index(detection.template_name) t_seeds = seeds[i] dtime_samples = int( (detection.detect_time - UTCDateTime(0)) * samp_rate) if dtime_samples in t_seeds['time']: j = list(t_seeds['time']).index(dtime_samples) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: min_diff = min(abs(t_seeds['time'] - dtime_samples)) if min_diff < 10: # If there is a match within ten samples then it is # good enough j = list(abs(t_seeds['time'] - dtime_samples)).index(min_diff) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: print('Detection at sample: ' + str(dtime_samples) + ' does not match anything in seed times:') kfalse += 1 print('Minimum difference in samples is: ' + str(min_diff)) # Plot the detections if debug > 3: for i, template in enumerate(templates): times = [ d.detect_time.datetime for d in detections if d.template_name == template_names[i] ] print(times) plotting.detection_multiplot(data, template, times) # Set an 'acceptable' ratio of positive to false detections print( str(ktrue) + ' true detections and ' + str(kfalse) + ' false detections') self.assertTrue(kfalse / ktrue < 0.25)
highcut=10, filt_order=4, samp_rate=25, debug=0, starttime=starttime1, parallel=True, num_cores=10) print("MAKING DETECTIONS.", starttime1) #match filtering detections = match_filter.match_filter( template_names=template_names, template_list=templates, st=std_filter, threshold=10, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=10, debug=2) print("PLOTTING DETECTIONS AND SAVING TO FOLDER.") # print(detections) for detection in detections: if detection.detect_val / detection.no_chans >= 0.6 and detection.no_chans >= 3: #Only save and plot detections with avg CC >= 0.6 with min 3 channels with open('Detections.csv', 'a') as csvfile: detwriter = csv.writer(csvfile, delimiter=',', quotechar='|',
st.detrend("linear") st.taper(max_percentage=0.01, max_length=10.) if filtType == "bandpass": st.filter(filtType, freqmin=freq[0], freqmax=freq[1]) st.resample(freq[1] * 2) elif filtType == "lowpass": st.filter(filtType, freq=freq[0]) st.resample(freq[0] * 2) elif filtType == "highpass": st.filter(filtType, freq=freq[0]) # start timer and give output timer = time.time() print("Starting scan...") # run eqcorrscan's match filter routine detections = match_filter(template_names=template_names, template_list=templates, st=st, threshold=8, threshold_type="MAD", trig_int=6, cores=20) # stop timer and give output runtime = time.time() - timer print(detections) print("Scanned 1 day of data with " + str(len(templates)) + " templates in " + str(runtime) + " seconds and found " + str(len(detections)) + " detections")
# Pre-process the data to set frequency band and sampling rate # Note that this is, and MUST BE the same as the parameters used for the # template creation. print("Processing the seismic data") st = pre_processing.dayproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, debug=0, starttime=t1) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection detections = match_filter.match_filter( template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type="MAD", trig_int=6.0, plotvar=True, plotdir=".", cores=ncores, tempdir=False, debug=1, plot_format="jpg", ) # Now lets try and work out how many unique events we have just to compare # with the GeoNet catalog of 20 events on this day in this sequence for master in detections: keep = True for slave in detections: if not master == slave and abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection
filt_order=18, samp_rate=100, starttime=st[0].stats.starttime, endtime=st[0].stats.endtime) st = Stream(st) # st.plot() # Read in the templates templates = [] template_names = ['template.ms'] for template_file in template_names: templates.append(read(template_file)) detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=5.5, threshold_type='MAD', trig_int=1, plotvar=True, cores=6) for detection in detections: #detection.write('detections.csv', append=True) detection.write('detections.csv') # plot # multi_trace_plot(st, corr=True, stack='linstack', size=(7, 12), show=True, title=None) times = [] for dc in detections: for pick in dc.event.picks: times.append(pick.time)
def self_test(template, low_cut, high_cut, filt_order, samp_rate,\ threshold, thresh_type, trig_int ,debug=0): """ :type template: :class: obspy.Stream :param template: Template to check for self-detectability :type highcut: float :param highcut: High cut in Hz for bandpass :type lowcut: float :type lowcut: Low cut in Hz for bandpass :type filt_order: int :param filt_order: Corners for bandpass :type samp_rate: float :param samp_rate: Desired sampling rate in Hz :type threshold: float :param threshold: A threshold value set based on the threshold_type\ :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD,\ absolute or av_chan_corr. MAD threshold is calculated as the\ threshold*(median(abs(cccsum))) where cccsum is the cross-correlation\ sum for a given template. absolute threhsold is a true absolute\ threshold based on the cccsum value av_chan_corr is based on the mean\ values of single-channel cross-correlations assuming all data are\ present as required for the template, \ e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where\ template is a single template from the input and the length is the\ number of channels within this template. :type trig_int: float :param trig_int: Minimum gap between detections in seconds. :type debug: int :param debug: Debug output level, higher=more output. """ import sys sys.path.append('..') import datetime as dt import pre_processing from eqcorrscan.core import match_filter from obspy import read # Work out the date of the template date=template[0].stats.starttime.datetime.date() # Read in the appropriate data sta_chans=[(tr.stats.station,tr.stats.channel,tr.stats.network) for tr in template] for sta_chan in sta_chans: base=matchdef.contbase[[i for i in xrange(len(matchdef.contbase)) \ if matchdef.contbase[i][2]==sta_chan[2]][0]] if base[1]=='yyyymmdd': daydir=date.strftime('%Y%m%d') staform='*'+sta_chan[0]+'.'+sta_chan[1][0]+'*'+sta_chan[1][1]+'.*' elif base[1]=='Yyyyy/Rjjj.01': daydir=date.strftime('Y%Y/R%j.01') staform=sta_chan[0]+'.*.'+sta_chan[1][0]+'*'+sta_chan[1][1]+\ '.'+date.strftime('%Y.%j') else: raise IOError('Not in the correct form'+base[1]) if not 'image' in locals(): image=read(base[0]+'/'+daydir+'/'+staform) else: image+=read(base[0]+'/'+daydir+'/'+staform) # Process the data using pre-processing for tr in image: tr=pre_processing.dayproc(tr, lowcut, highcut, filt_order, samp_rate,\ matchdef.debug, date) # image.plot(size=(800,600), equal_scale=False) # Apply the detection routine with plot on detections=match_filter.match_filter(str(template[0].stats.starttime), \ [template], image, threshold,\ threshtype, trig_int,\ True) for detection in detections: print 'Detection using template: '+detection.template_name+' at '+\ str(detection.detect_time)+' with a cccsum of: '+\ str(detection.detect_val)
def match_synth(sfile, cont_base, freqmin=2.0, freqmax=10.0, samp_rate=100.0,\ threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=True,\ save_template=True): """ Function to generate a basic synthetic from a real event, given by an s-file and cross-correlate this with the day of continuous data including the event :type sfile: str :param sfile: Path to the s-file for the event :type cont_base: str :param cont_base: Path to the continuous data, should be in Yyyyy/Rjjj.01\ directories :type freqmin: float :param freqmin: Low-cut for bandpass in Hz, defualts to 2.0 :type freqmax: float :param freqmax: High-cut for bandpass in Hz, defaults to 10.0 :type samp_rate: float :param samp_rate: Desired sampling rate in Hz, defaults to 100.0 :type threshold: float :param threshold: Threshold for detection in cccsum, defaults to 8.0 :type threshold_type: str :param threshold_type: Type to threshold, either MAD or ABS, defaults to MAD :type trig_int: float :param trig_int: Trigger interval in seconds, defaults to 6.0 :type plotvar: bool :param plotvar: To plot or not, defaults to true :returns: detections """ # import matplotlib.pyplot as plt from eqcorrscan.core import match_filter, template_gen from eqcorrscan.utils import Sfile_util, pre_processing import glob from obspy import read, Stream, UTCDateTime from obspy.signal.cross_correlation import xcorr from joblib import Parallel, delayed from multiprocessing import cpu_count import numpy as np # Generate the synthetic synth_template=synth_from_sfile(sfile, samp_rate, length=1.0,\ PS_ratio=1.68) synth_template.filter('bandpass', freqmin=freqmin, freqmax=freqmax) for tr in synth_template: tr.data=(tr.data*1000).astype(np.int32) # Find the date from the sfile event_date=Sfile_util.readheader(sfile).time.datetime day=UTCDateTime(event_date.date()) # Work out which stations we have template info for stachans=[(tr.stats.station, tr.stats.channel) for tr in synth_template] # Read in the day of data for stachan in stachans: wavfile=glob.glob(cont_base+event_date.strftime('/Y%Y/R%j.01/')+\ stachan[0]+'.*.'+stachan[1][0]+'?'+stachan[1][-1]+\ '.'+event_date.strftime('%Y.%j')) if len(wavfile) != 0: for wavf in wavfile: if not 'st' in locals(): st=read(wavf) else: st+=read(wavf) st=st.merge(fill_value='interpolate') cores=cpu_count() if len(st) < cores: jobs=len(st) else: jobs=cores st=Parallel(n_jobs=jobs)(delayed(pre_processing.dayproc)(tr, freqmin,\ freqmax, 3,\ samp_rate, 0,\ day) for tr in st) st=Stream(st) # Make the real template picks=Sfile_util.readpicks(sfile) real_template=template_gen._template_gen(picks, st, 1.0, 'all',\ prepick=10/samp_rate) for tr in real_template: tr.data=tr.data.astype(np.int32) if save_template: real_template.write('Real_'+sfile.split('/')[-1], format='MSEED',\ encoding='STEIM2') # Shift the synthetic to better align with the real one for tr in real_template: synth_tr=synth_template.select(station=tr.stats.station,\ channel=tr.stats.channel)[0] shift, corr = xcorr(tr.data, synth_tr.data, 20) print tr.stats.station+'.'+tr.stats.channel+\ ' shift='+str(shift)+'samples corr='+str(corr) if corr < 0: synth_tr.data*=-1 # Apply a pad pad=np.zeros(abs(shift)) if shift < 0: synth_tr.data=np.append(synth_tr.data, pad)[abs(shift):] elif shift > 0: synth_tr.data=np.append(pad, synth_tr.data)[0:-shift] if save_template: synth_template.write('Synthetic_'+sfile.split('/')[-1], format='MSEED', encoding='STEIM2') # Now we have processed data and a template, we can try and detect! detections=match_filter.match_filter(['Synthetic_'+sfile.split('/')[-1], 'Real_'+sfile.split('/')[-1]],\ [synth_template, real_template],\ st, threshold, \ threshold_type, trig_int,\ plotvar, 'synth_temp') f=open('Synthetic_test.csv', 'w') f.write('template, detect-time, cccsum, threshold, number of channels\n') for detection in detections: # output detections to file f.write(detection.template_name+', '+str(detection.detect_time)+\ ', '+str(detection.detect_val)+', '+str(detection.threshold)+\ ', '+str(detection.no_chans)+'\n') print 'template: '+detection.template_name+' detection at: '\ +str(detection.detect_time)+' with a cccsum of: '+\ str(detection.detect_val) if detections: f.write('\n') f.close()
def run_tutorial(min_magnitude=2, shift_len=0.2, num_cores=4, min_cc=0.5): """Functional, tested example script for running the lag-calc tutorial.""" if num_cores > cpu_count(): num_cores = cpu_count() client = Client('NCEDC') t1 = UTCDateTime(2004, 9, 28) t2 = t1 + 86400 print('Downloading catalog') catalog = client.get_events(starttime=t1, endtime=t2, minmagnitude=min_magnitude, minlatitude=35.7, maxlatitude=36.1, minlongitude=-120.6, maxlongitude=-120.2, includearrivals=True) # We don't need all the picks, lets take the information from the # five most used stations - note that this is done to reduce computational # costs. catalog = catalog_utils.filter_picks(catalog, channels=['EHZ'], top_n_picks=5) # There is a duplicate pick in event 3 in the catalog - this has the effect # of reducing our detections - check it yourself. for pick in catalog[3].picks: if pick.waveform_id.station_code == 'PHOB' and \ pick.onset == 'emergent': catalog[3].picks.remove(pick) print('Generating templates') templates = template_gen.template_gen(method="from_client", catalog=catalog, client_id='NCEDC', lowcut=2.0, highcut=9.0, samp_rate=50.0, filt_order=4, length=3.0, prepick=0.15, swin='all', process_len=3600) # In this section we generate a series of chunks of data. start_time = UTCDateTime(2004, 9, 28, 17) end_time = UTCDateTime(2004, 9, 28, 20) process_len = 3600 chunks = [] chunk_start = start_time while chunk_start < end_time: chunk_end = chunk_start + process_len if chunk_end > end_time: chunk_end = end_time chunks.append((chunk_start, chunk_end)) chunk_start += process_len all_detections = [] picked_catalog = Catalog() template_names = [ template[0].stats.starttime.strftime("%Y%m%d_%H%M%S") for template in templates ] for t1, t2 in chunks: print('Downloading and processing for start-time: %s' % t1) # Download and process the data bulk_info = [(tr.stats.network, tr.stats.station, '*', tr.stats.channel, t1, t2) for tr in templates[0]] # Just downloading a chunk of data try: st = client.get_waveforms_bulk(bulk_info) except FDSNException: st = Stream() for _bulk in bulk_info: st += client.get_waveforms(*_bulk) st.merge(fill_value='interpolate') st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=50.0, num_cores=num_cores) detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=num_cores) # Extract unique detections from set. unique_detections = [] for master in detections: keep = True for slave in detections: if not master == slave and\ abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False break if keep: unique_detections.append(master) all_detections += unique_detections picked_catalog += lag_calc.lag_calc(detections=unique_detections, detect_data=st, template_names=template_names, templates=templates, shift_len=shift_len, min_cc=min_cc, interpolate=False, plot=False) # Return all of this so that we can use this function for testing. return all_detections, picked_catalog, templates, template_names
highcut=9.0, filt_order=4, samp_rate=20.0, starttime=st[0].stats.starttime.date) # Reading the templates templates = [] template_names = [ 'kaik_eq-WEL.ms', 'kaik_eq-WEL2.ms', 'kaik_eq-WEL3.ms', 'kaik_eq-WEL4.ms' ] for template_file in template_names: templates.append(read(template_file)) detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st_processed, threshold=8, threshold_type='MAD', trig_int=6, plotvar=False, cores=4, debug=1) for detection in detections: detection.write('detection-attempt.csv', append=True) # code only came up with a single detection, and it falls # outside of the start and end times which is strange"
def self_test(template, low_cut, high_cut, filt_order, samp_rate,\ threshold, thresh_type, trig_int ,debug=0): """ :type template: :class: obspy.Stream :param template: Template to check for self-detectability :type highcut: float :param highcut: High cut in Hz for bandpass :type lowcut: float :type lowcut: Low cut in Hz for bandpass :type filt_order: int :param filt_order: Corners for bandpass :type samp_rate: float :param samp_rate: Desired sampling rate in Hz :type threshold: float :param threshold: A threshold value set based on the threshold_type\ :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD,\ absolute or av_chan_corr. MAD threshold is calculated as the\ threshold*(median(abs(cccsum))) where cccsum is the cross-correlation\ sum for a given template. absolute threhsold is a true absolute\ threshold based on the cccsum value av_chan_corr is based on the mean\ values of single-channel cross-correlations assuming all data are\ present as required for the template, \ e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where\ template is a single template from the input and the length is the\ number of channels within this template. :type trig_int: float :param trig_int: Minimum gap between detections in seconds. :type debug: int :param debug: Debug output level, higher=more output. """ import sys sys.path.append('..') import datetime as dt import pre_processing from eqcorrscan.core import match_filter from obspy import read # Work out the date of the template date = template[0].stats.starttime.datetime.date() # Read in the appropriate data sta_chans = [(tr.stats.station, tr.stats.channel, tr.stats.network) for tr in template] for sta_chan in sta_chans: base=matchdef.contbase[[i for i in xrange(len(matchdef.contbase)) \ if matchdef.contbase[i][2]==sta_chan[2]][0]] if base[1] == 'yyyymmdd': daydir = date.strftime('%Y%m%d') staform = '*' + sta_chan[0] + '.' + sta_chan[1][ 0] + '*' + sta_chan[1][1] + '.*' elif base[1] == 'Yyyyy/Rjjj.01': daydir = date.strftime('Y%Y/R%j.01') staform=sta_chan[0]+'.*.'+sta_chan[1][0]+'*'+sta_chan[1][1]+\ '.'+date.strftime('%Y.%j') else: raise IOError('Not in the correct form' + base[1]) if not 'image' in locals(): image = read(base[0] + '/' + daydir + '/' + staform) else: image += read(base[0] + '/' + daydir + '/' + staform) # Process the data using pre-processing for tr in image: tr=pre_processing.dayproc(tr, lowcut, highcut, filt_order, samp_rate,\ matchdef.debug, date) # image.plot(size=(800,600), equal_scale=False) # Apply the detection routine with plot on detections=match_filter.match_filter(str(template[0].stats.starttime), \ [template], image, threshold,\ threshtype, trig_int,\ True) for detection in detections: print 'Detection using template: '+detection.template_name+' at '+\ str(detection.detect_time)+' with a cccsum of: '+\ str(detection.detect_val)
str(k) + '.h5') # occasionally templates are one sample too short- trim to the correct number of samples # this handling is a bit clumsy- redo later for c in range(numChan): stTemp[c].data = stTemp[c].data[:trimIdx] templates.append(stTemp) template_names.append("template_" + str(k)) except: pass # run eqcorrscan's match filter routine det = match_filter(template_names=template_names, template_list=templates, st=st, threshold=threshold, threshold_type="MAD", trig_int=tolerance, cores=20) # append to list detections.extend(det) # stop timer and give output runtime = time.time() - timer # give some output if blockSize * (j + 1) >= numTemp: print("Scanned " + currentDate.date.strftime("%Y-%m-%d") + " with " + str(len(templates)) + " templates (" + str(numTemp) + "/" + str(numTemp) + ") in " +
# Merge the data to account for miniseed files being written in chunks # We need continuous day-long data, so data are padded if there are gaps st = st.merge(fill_value='interpolate') # Work out what day we are working on, required as we will pad the data to be daylong day = st[0].stats.starttime.date # Process the data in the same way as the template for tr in st: tr = pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\ 1, day) #Set directory for match filter output plots plot_dir = '/projects/nesi00228/data/plots/' # Compute detections detections = match_filter.match_filter(template_names, templates, st, 8.0, 'MAD', 6.0, False, plot_dir) # We now have a list of detections! We can output these to a file to check later for detection in detections: f.write(detection.template_name+', '+str(detection.detect_time) + ', '+str(detection.detect_val)+', '+str(detection.threshold) + ', '+str(detection.no_chans)+'\n') del detections f.close() #Print out runtime print 'Script took ', time.time() - start, ' seconds.' ##Instead of saving all of these waveforms, just save the plots as pdf # wav_dir='/home/chet/data/detections/' # det_wav = Stream()
groups=0 detections=[] # Cope with having heaps of templates if len(all_templates) > 100: groups=int(len(all_templates)/100) for i in xrange(groups): if i==groups: templates=all_templates[i*100:] template_names=all_template_names[i*100:] else: templates=all_templates[i*100:(i+1)*100] template_names=all_template_names[i*100:(i+1)*100] detections+=match_filter.match_filter(template_names, templates, st, matchdef.threshold, matchdef.threshtype, matchdef.trig_int, matchdef.plot, matchdef=matchdef, tempdir='temp_'+str(instance)) for detection in detections: # output detections to file f.write(detection.template_name+', '+str(detection.detect_time)+\ ', '+str(detection.detect_val)+', '+str(detection.threshold)+\ ', '+str(detection.no_chans)+'\n') print 'template: '+detection.template_name+' detection at: '\ +str(detection.detect_time)+' with a cccsum of: '+str(detection.detect_val) if detections: f.write('\n') else: for tr in st: tr.write('test_data/'+tr.stats.station+'-'+tr.stats.channel+\
lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, debug=0, starttime=t1) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=True, plotdir='.', cores=ncores, tempdir=False, debug=1, plot_format='jpg') # Now lets try and work out how many unique events we have just to compare # with the GeoNet catalog of 20 events on this day in this sequence for master in detections: keep = True for slave in detections: if not master == slave and\ abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection
def run_tutorial(plot=False, process_len=3600, num_cores=cpu_count(), **kwargs): """Main function to run the tutorial dataset.""" # First we want to load our templates template_names = glob.glob('tutorial_template_*.ms') if len(template_names) == 0: raise IOError('Template files not found, have you run the template ' + 'creation tutorial?') templates = [read(template_name) for template_name in template_names] # Work out what stations we have and get the data for them stations = [] for template in templates: for tr in template: stations.append((tr.stats.station, tr.stats.channel)) # Get a unique list of stations stations = list(set(stations)) # We will loop through the data chunks at a time, these chunks can be any # size, in general we have used 1 day as our standard, but this can be # as short as five minutes (for MAD thresholds) or shorter for other # threshold metrics. However the chunk size should be the same as your # template process_len. # You should test different parameters!!! start_time = UTCDateTime(2016, 1, 4) end_time = UTCDateTime(2016, 1, 5) chunks = [] chunk_start = start_time while chunk_start < end_time: chunk_end = chunk_start + process_len if chunk_end > end_time: chunk_end = end_time chunks.append((chunk_start, chunk_end)) chunk_start += process_len unique_detections = [] # Set up a client to access the GeoNet database client = Client("GEONET") # Note that these chunks do not rely on each other, and could be paralleled # on multiple nodes of a distributed cluster, see the SLURM tutorial for # an example of this. for t1, t2 in chunks: # Generate the bulk information to query the GeoNet database bulk_info = [] for station in stations: bulk_info.append(('NZ', station[0], '*', station[1][0] + 'H' + station[1][-1], t1, t2)) # Note this will take a little while. print('Downloading seismic data, this may take a while') st = client.get_waveforms_bulk(bulk_info) # Merge the stream, it will be downloaded in chunks st.merge() # Pre-process the data to set frequency band and sampling rate # Note that this is, and MUST BE the same as the parameters used for # the template creation. print('Processing the seismic data') st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, num_cores=num_cores, starttime=t1, endtime=t2) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=plot, plotdir='.', cores=num_cores, plot_format='png', **kwargs) # Now lets try and work out how many unique events we have just to # compare with the GeoNet catalog of 20 events on this day in this # sequence for master in detections: keep = True for slave in detections: if not master == slave and abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False print('Removed detection at %s with cccsum %s' % (master.detect_time, master.detect_val)) print('Keeping detection at %s with cccsum %s' % (slave.detect_time, slave.detect_val)) break if keep: unique_detections.append(master) print('Detection at :' + str(master.detect_time) + ' for template ' + master.template_name + ' with a cross-correlation sum of: ' + str(master.detect_val)) # We can plot these too if plot: stplot = st.copy() template = templates[template_names.index( master.template_name)] lags = sorted([tr.stats.starttime for tr in template]) maxlag = lags[-1] - lags[0] stplot.trim(starttime=master.detect_time - 10, endtime=master.detect_time + maxlag + 10) plotting.detection_multiplot(stplot, template, [master.detect_time.datetime]) print('We made a total of ' + str(len(unique_detections)) + ' detections') return unique_detections
print('Pre-processing took %.3f seconds' % (proc_stp - proc_strt)) # RUN MATCH FILTER (looping through chunks of templates due to RAM) chunk_temps = partition(templates, 120) chunk_temp_names = partition(template_names, 120) print('Starting correlation runs for %s' % str(day)) i = 0 for temps, temp_names in zip(chunk_temps, chunk_temp_names): i += 1 # Silly counter for debug grp_corr_st = timer() print('On template group %d of %d' % (i, len(chunk_temps))) dets, cat, sts = match_filter.match_filter(temp_names, temps, st1, threshold=8.0, threshold_type='MAD', trig_int=1.0, plotvar=False, cores=12, output_cat=True, extract_detections=True, debug=2) # Append detections to a file for this instance to check later print('Correlations for group %d took %.3f sec, now extracting them' % (i, timer() - grp_corr_st)) extrct_st = timer() with open( '/projects/nesi00228/data/detections/raw_det_txt/%s/%d_dets.txt' % (str(dto.year), instance), mode='a') as fo: det_writer = csv.writer(fo) for det, st in zip(dets, sts):
# We need continuous day-long data, so data are padded if there are gaps st = st.merge(fill_value='interpolate') # Work out what day we are working on, required as we will pad the data to be daylong day = st[0].stats.starttime.date # Process the data in the same way as the template for tr in st: tr=pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\ matchdef.debug, day) #Set directory for match filter output plots plot_dir = '/home/chet/data/plot/' # Compute detections detections=match_filter.match_filter(template_names, templates, st,\ 8.0, matchdef.threshtype,\ matchdef.trig_int, True, plot_dir, cores=4) # We now have a list of detections! We can output these to a file to check later for detection in detections: f.write(detection.template_name+', '+str(detection.detect_time)+\ ', '+str(detection.detect_val)+', '+str(detection.threshold)+\ ', '+str(detection.no_chans)+'\n') del detections f.close() print 'Runtime: ', time.time() - start, ' seconds' ##Instead of saving all of these waveforms, just save the plots as pdf # wav_dir='/home/chet/data/detections/' # det_wav = Stream() # for detection in detections: # st.plot(starttime=detection.detect_time-2, endtime=detection.detect_time+8, \
def run_tutorial(min_magnitude=2, shift_len=0.2, num_cores=4): import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy.clients.fdsn import Client else: from obspy.fdsn import Client from obspy.core.event import Catalog from obspy import UTCDateTime from eqcorrscan.core import template_gen, match_filter, lag_calc from eqcorrscan.utils import pre_processing, catalog_utils client = Client('NCEDC') t1 = UTCDateTime(2004, 9, 28) t2 = t1 + 86400 print('Downloading catalog') catalog = client.get_events(starttime=t1, endtime=t2, minmagnitude=min_magnitude, minlatitude=35.7, maxlatitude=36.1, minlongitude=-120.6, maxlongitude=-120.2, includearrivals=True) # We don't need all the picks, lets take the information from the # five most used stations - note that this is done to reduce computational # costs. catalog = catalog_utils.filter_picks(catalog, channels=['EHZ'], top_n_picks=5) print('Generating templates') templates = template_gen.from_client(catalog=catalog, client_id='NCEDC', lowcut=2.0, highcut=9.0, samp_rate=50.0, filt_order=4, length=3.0, prepick=0.15, swin='all', process_len=3600) start_time = UTCDateTime(2004, 9, 28, 17) end_time = UTCDateTime(2004, 9, 28, 20) process_len = 1800 chunks = [] chunk_start = start_time while chunk_start < end_time: chunk_end = chunk_start + process_len if chunk_end > end_time: chunk_end = end_time chunks.append((chunk_start, chunk_end)) chunk_start += process_len all_detections = [] picked_catalog = Catalog() template_names = [ str(template[0].stats.starttime) for template in templates ] for t1, t2 in chunks: print('Downloading and processing for start-time: %s' % t1) # Download and process the data bulk_info = [(tr.stats.network, tr.stats.station, '*', tr.stats.channel[0] + 'H' + tr.stats.channel[1], t1, t2) for tr in templates[0]] # Just downloading a chunk of data st = client.get_waveforms_bulk(bulk_info) st.merge(fill_value='interpolate') st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=50.0, debug=0, num_cores=num_cores) detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=num_cores) # Extract unique detections from set. unique_detections = [] for master in detections: keep = True for slave in detections: if not master == slave and\ abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False break if keep: unique_detections.append(master) all_detections += unique_detections picked_catalog += lag_calc.lag_calc(detections=unique_detections, detect_data=st, template_names=template_names, templates=templates, shift_len=shift_len, min_cc=0.5, interpolate=True, plot=False) # Return all of this so that we can use this function for testing. return all_detections, picked_catalog, templates, template_names
def run_tutorial(plot=False): """Main function to run the tutorial dataset.""" from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter import glob from multiprocessing import cpu_count # This import section copes with namespace changes between obspy versions import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy.clients.fdsn import Client else: from obspy.fdsn import Client from obspy import UTCDateTime, Stream, read # First we want to load our templates template_names = glob.glob('tutorial_template_*.ms') if len(template_names) == 0: raise IOError('Template files not found, have you run the template ' + 'creation tutorial?') templates = [read(template_name) for template_name in template_names] # Work out what stations we have and get the data for them stations = [] for template in templates: for tr in template: stations.append((tr.stats.station, tr.stats.channel)) # Get a unique list of stations stations = list(set(stations)) # We will loop through the data chunks at a time, these chunks can be any # size, in general we have used 1 day as our standard, but this can be # as short as five minutes (for MAD thresholds) or shorter for other # threshold metrics. However the chunk size should be the same as your # template process_len. # You should test different parameters!!! start_time = UTCDateTime(2016, 1, 4) end_time = UTCDateTime(2016, 1, 5) process_len = 3600 chunks = [] chunk_start = start_time while chunk_start < end_time: chunk_end = chunk_start + process_len if chunk_end > end_time: chunk_end = end_time chunks.append((chunk_start, chunk_end)) chunk_start += process_len unique_detections = [] detections = [] # Set up a client to access the GeoNet database client = Client("GEONET") # Note that these chunks do not rely on each other, and could be paralleled # on multiple nodes of a distributed cluster, see the SLURM tutorial for # an example of this. for t1, t2 in chunks: # Generate the bulk information to query the GeoNet database bulk_info = [] for station in stations: bulk_info.append(('NZ', station[0], '*', station[1][0] + 'H' + station[1][-1], t1, t2)) # Note this will take a little while. print('Downloading seismic data, this may take a while') st = client.get_waveforms_bulk(bulk_info) # Merge the stream, it will be downloaded in chunks st.merge(fill_value='interpolate') # Set how many cores we want to parallel across, we will set this to four # as this is the number of templates, if your machine has fewer than four # cores/CPUs the multiprocessing will wait until there is a free core. # Setting this to be higher than the number of templates will have no # increase in speed as only detections for each template are computed in # parallel. It may also slow your processing by using more memory than # needed, to the extent that swap may be filled. if cpu_count() < 4: ncores = cpu_count() else: ncores = 4 # Pre-process the data to set frequency band and sampling rate # Note that this is, and MUST BE the same as the parameters used for the # template creation. print('Processing the seismic data') st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, debug=2, num_cores=ncores, starttime=t1, endtime=t2) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection detections += match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=plot, plotdir='.', cores=ncores, tempdir=False, debug=1, plot_format='jpg') # Now lets try and work out how many unique events we have just to compare # with the GeoNet catalog of 20 events on this day in this sequence for master in detections: keep = True for slave in detections: if not master == slave and\ abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False break if keep: unique_detections.append(master) print('We made a total of ' + str(len(unique_detections)) + ' detections') for detection in unique_detections: print('Detection at :' + str(detection.detect_time) + ' for template ' + detection.template_name + ' with a cross-correlation sum of: ' + str(detection.detect_val)) # We can plot these too if plot: stplot = st.copy() template = templates[template_names.index(detection.template_name)] lags = sorted([tr.stats.starttime for tr in template]) maxlag = lags[-1] - lags[0] stplot.trim(starttime=detection.detect_time - 10, endtime=detection.detect_time + maxlag + 10) plotting.detection_multiplot(stplot, template, [detection.detect_time.datetime]) return unique_detections
# Convert from list to stream st = Stream(st) # Set how many cores we want to parallel across, we will set this to four # as this is the number of templates, if your machine has fewer than four # cores/CPUs the multiprocessing will wait until there is a free core. # Setting this to be higher than the number of templates will have no i # increase in speed as only detections for each template are computed in # parallel. ncores = 4 # Now we can conduct the matched-filter detection detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=True, plotdir='.', cores=ncores, tempdir=False, debug=0, plot_format='jpg') # Now lets try and work out how many unique events we have just to compare # with the GeoNet catalog of 20 events on this day in this sequence for master in detections: keep = True for slave in detections: if not master == slave and\ abs(master.detect_time - slave.detect_time) <= 6.0: # If the events are within 6s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False