def test_detection_multiplot(self): times = [min([pk.time - 0.05 for pk in self.event.picks])] times.append(times[0] + 10) fig = detection_multiplot( stream=self.st, template=self.template, times=times, show=False, return_figure=True) return fig
def run_tutorial(plot=False): """Main function to run the tutorial dataset.""" from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter import glob from multiprocessing import cpu_count # This import section copes with namespace changes between obspy versions import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy.clients.fdsn import Client else: from obspy.fdsn import Client from obspy import UTCDateTime, Stream, read # First we want to load our templates template_names = glob.glob('tutorial_template_*.ms') if len(template_names) == 0: raise IOError('Template files not found, have you run the template ' + 'creation tutorial?') templates = [read(template_name) for template_name in template_names] # Work out what stations we have and get the data for them stations = [] for template in templates: for tr in template: stations.append((tr.stats.station, tr.stats.channel)) # Get a unique list of stations stations = list(set(stations)) # We will loop through the data chunks at a time, these chunks can be any # size, in general we have used 1 day as our standard, but this can be # as short as five minutes (for MAD thresholds) or shorter for other # threshold metrics. However the chunk size should be the same as your # template process_len. # You should test different parameters!!! start_time = UTCDateTime(2016, 1, 4) end_time = UTCDateTime(2016, 1, 5) process_len = 3600 chunks = [] chunk_start = start_time while chunk_start < end_time: chunk_end = chunk_start + process_len if chunk_end > end_time: chunk_end = end_time chunks.append((chunk_start, chunk_end)) chunk_start += process_len unique_detections = [] detections = [] # Set up a client to access the GeoNet database client = Client("GEONET") # Note that these chunks do not rely on each other, and could be paralleled # on multiple nodes of a distributed cluster, see the SLURM tutorial for # an example of this. for t1, t2 in chunks: # Generate the bulk information to query the GeoNet database bulk_info = [] for station in stations: bulk_info.append(('NZ', station[0], '*', station[1][0] + 'H' + station[1][-1], t1, t2)) # Note this will take a little while. print('Downloading seismic data, this may take a while') st = client.get_waveforms_bulk(bulk_info) # Merge the stream, it will be downloaded in chunks st.merge(fill_value='interpolate') # Set how many cores we want to parallel across, we will set this to four # as this is the number of templates, if your machine has fewer than four # cores/CPUs the multiprocessing will wait until there is a free core. # Setting this to be higher than the number of templates will have no # increase in speed as only detections for each template are computed in # parallel. It may also slow your processing by using more memory than # needed, to the extent that swap may be filled. if cpu_count() < 4: ncores = cpu_count() else: ncores = 4 # Pre-process the data to set frequency band and sampling rate # Note that this is, and MUST BE the same as the parameters used for the # template creation. print('Processing the seismic data') st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, debug=2, num_cores=ncores, starttime=t1, endtime=t2) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection detections += match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=plot, plotdir='.', cores=ncores, tempdir=False, debug=1, plot_format='jpg') # Now lets try and work out how many unique events we have just to compare # with the GeoNet catalog of 20 events on this day in this sequence for master in detections: keep = True for slave in detections: if not master == slave and\ abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False break if keep: unique_detections.append(master) print('We made a total of ' + str(len(unique_detections)) + ' detections') for detection in unique_detections: print('Detection at :' + str(detection.detect_time) + ' for template ' + detection.template_name + ' with a cross-correlation sum of: ' + str(detection.detect_val)) # We can plot these too if plot: stplot = st.copy() template = templates[template_names.index(detection.template_name)] lags = sorted([tr.stats.starttime for tr in template]) maxlag = lags[-1] - lags[0] stplot.trim(starttime=detection.detect_time - 10, endtime=detection.detect_time + maxlag + 10) plotting.detection_multiplot(stplot, template, [detection.detect_time.datetime]) return unique_detections
def run_tutorial(plot=False, process_len=3600, num_cores=cpu_count(), **kwargs): """Main function to run the tutorial dataset.""" # First we want to load our templates template_names = glob.glob('tutorial_template_*.ms') if len(template_names) == 0: raise IOError('Template files not found, have you run the template ' + 'creation tutorial?') templates = [read(template_name) for template_name in template_names] # Work out what stations we have and get the data for them stations = [] for template in templates: for tr in template: stations.append((tr.stats.station, tr.stats.channel)) # Get a unique list of stations stations = list(set(stations)) # We will loop through the data chunks at a time, these chunks can be any # size, in general we have used 1 day as our standard, but this can be # as short as five minutes (for MAD thresholds) or shorter for other # threshold metrics. However the chunk size should be the same as your # template process_len. # You should test different parameters!!! start_time = UTCDateTime(2016, 1, 4) end_time = UTCDateTime(2016, 1, 5) chunks = [] chunk_start = start_time while chunk_start < end_time: chunk_end = chunk_start + process_len if chunk_end > end_time: chunk_end = end_time chunks.append((chunk_start, chunk_end)) chunk_start += process_len unique_detections = [] # Set up a client to access the GeoNet database client = Client("GEONET") # Note that these chunks do not rely on each other, and could be paralleled # on multiple nodes of a distributed cluster, see the SLURM tutorial for # an example of this. for t1, t2 in chunks: # Generate the bulk information to query the GeoNet database bulk_info = [] for station in stations: bulk_info.append(('NZ', station[0], '*', station[1][0] + 'H' + station[1][-1], t1, t2)) # Note this will take a little while. print('Downloading seismic data, this may take a while') st = client.get_waveforms_bulk(bulk_info) # Merge the stream, it will be downloaded in chunks st.merge() # Pre-process the data to set frequency band and sampling rate # Note that this is, and MUST BE the same as the parameters used for # the template creation. print('Processing the seismic data') st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, num_cores=num_cores, starttime=t1, endtime=t2) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=plot, plotdir='.', cores=num_cores, plot_format='png', **kwargs) # Now lets try and work out how many unique events we have just to # compare with the GeoNet catalog of 20 events on this day in this # sequence for master in detections: keep = True for slave in detections: if not master == slave and abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False print('Removed detection at %s with cccsum %s' % (master.detect_time, master.detect_val)) print('Keeping detection at %s with cccsum %s' % (slave.detect_time, slave.detect_val)) break if keep: unique_detections.append(master) print('Detection at :' + str(master.detect_time) + ' for template ' + master.template_name + ' with a cross-correlation sum of: ' + str(master.detect_val)) # We can plot these too if plot: stplot = st.copy() template = templates[template_names.index( master.template_name)] lags = sorted([tr.stats.starttime for tr in template]) maxlag = lags[-1] - lags[0] stplot.trim(starttime=master.detect_time - 10, endtime=master.detect_time + maxlag + 10) plotting.detection_multiplot(stplot, template, [master.detect_time.datetime]) print('We made a total of ' + str(len(unique_detections)) + ' detections') return unique_detections
def _prepare_data(detect_data, detections, zipped_templates, delays, shift_len, plot): """ Prepare data for lag_calc - reduce memory here. :type detect_data: obspy.core.stream.Stream :param detect_data: Stream to extract detection streams from. :type detections: list :param detections: List of :class:`eqcorrscan.core.match_filter.DETECTION` to get data for. :type zipped_templates: zip :param zipped_templates: Zipped list of (template_name, template) :type delays: list :param delays: List of lists of the delays for each template :type shift_len: float :param shift_len: Shift length in seconds allowed for picking. :type plot: bool :param plot: Whether to plot the data extracted or not, used for debugging. :returns: List of detect_streams to be worked on :rtype: list """ detect_streams = [] for detection in detections: # Stream to be saved for new detection detect_stream = [] max_delay = 0 for tr in detect_data: tr_copy = tr.copy() # Right now, copying each trace hundreds of times... template = [ t for t in zipped_templates if str(t[0]) == str(detection.template_name) ] if len(template) > 0: template = template[0] else: warnings.warn('No template with name: %s' % detection.template_name) for t in zipped_templates: print(t) continue template = template[1].select(station=tr.stats.station, channel=tr.stats.channel) if template: # Save template trace length in seconds template_len = len(template[0]) / \ template[0].stats.sampling_rate else: continue # If there is no template-data match then skip the rest # of the trace loop. # Grab the delays for the desired template: [(sta, chan, delay)] delay = [ delay for delay in delays if delay[0] == detection.template_name ][0][1] # Now grab the delay for the desired trace for this template delay = [ d for d in delay if d[0] == tr.stats.station and d[1] == tr.stats.channel ][0][2] if delay > max_delay: max_delay = delay detect_stream.append( tr_copy.trim( starttime=detection.detect_time - shift_len + delay, endtime=detection.detect_time + delay + shift_len + template_len)) del tr_copy for tr in detect_stream: if len(tr.data) == 0: msg = ('No data in %s.%s for detection at time %s' % (tr.stats.station, tr.stats.channel, detection.detect_time)) log.debug(msg) warnings.warn(msg) detect_stream.remove(tr) if tr.stats.endtime - tr.stats.starttime < template_len: msg = ("Insufficient data for %s.%s will not use." % (tr.stats.station, tr.stats.channel)) log.debug(msg) warnings.warn(msg) detect_stream.remove(tr) # Check for duplicate traces stachans = [(tr.stats.station, tr.stats.channel) for tr in detect_stream] c_stachans = Counter(stachans) for key in c_stachans.keys(): if c_stachans[key] > 1: msg = ('Multiple channels for %s.%s, likely a data issue' % (key[0], key[1])) raise LagCalcError(msg) if plot: background = detect_data.copy().trim( starttime=detection.detect_time - (shift_len + 5), endtime=detection.detect_time + shift_len + max_delay + 7) for tr in background: if len(tr.data) == 0: background.remove(tr) detection_multiplot(stream=background, template=Stream(detect_stream), times=[detection.detect_time - shift_len], title='Detection Extracted') if not len(detect_stream) == 0: # Create tuple of (template name, data stream) detect_streams.append( (detection.template_name, Stream(detect_stream))) return detect_streams
def detections_2_cat(detections, template_dict, stream, temp_prepick, max_lag, cc_thresh, extract_pre_pick=3.0, extract_post_pick=7.0, write_wav=False, debug=0): r"""Function to create a catalog from a list of detections, adjusting template pick \ times using cross correlation with data stream at the time of detection. :type detections: list of DETECTION objects :param detections: Detections which we want to extract and locate. :type template_dict: dict :param template_dict: Dictionary of template name: template stream for the entire \ catalog. Template names must be in the format found in the DETECTION objects. :type stream: obspy.Stream :param stream: stream encompassing time span of the detections. Will be used for pick \ refinement by cross correlation. Should be fed a stream processed in the same way \ as the streams in template dict (and in the same way that they were processed \ during matched filtering). The waveforms will not be processed here. :type write_wav: bool or str :param write_wav: If false, will not write detection waveforms to miniseed files. \ Otherwise, specify a directory to write the templates to. Will use name \ template_name_detection_time.mseed. :returns: :class: obspy.Catalog """ from obspy import UTCDateTime, Catalog, Stream from obspy.core.event import ResourceIdentifier, Event, Pick, CreationInfo, Comment, WaveformStreamID from obspy.signal.cross_correlation import xcorr from eqcorrscan.utils import plotting #XXX TODO Scripts havent been saving the actual detection objects so we cannot make #XXX TODO use of DETECTION.chans. Would be useful. # Copy stream out of the way st = stream.copy() # Create nested dictionary of delays template_name: stachan: delay # dict.items() works in both python 2 and 3 but is memory inefficient in 2 as both vars are # read into memory as lists delays = {} for name, temp in template_dict.items(): sorted_temp = temp.sort(['starttime']) stachans = [(tr.stats.station, tr.stats.channel, tr.stats.network) for tr in sorted_temp] mintime = sorted_temp[0].stats.starttime delays[name] = {(tr.stats.station, tr.stats.channel): tr.stats.starttime - mintime for tr in sorted_temp} # Loop over all detections, saving each as a new event in a catalog new_cat = Catalog() for detection in detections: if write_wav: new_stream = Stream() if hasattr(detection, 'event'): new_event = detection.event else: rid = ResourceIdentifier(id=detection.template_name + '_' +\ detection.detect_time.strftime('%Y%m%dT%H%M%S.%f'), prefix='smi:local') new_event = Event(resource_id=rid) cr_i = CreationInfo(author='EQcorrscan', creation_time=UTCDateTime()) new_event.creation_info = cr_i thresh_str = 'threshold=' + str(detection.threshold) ccc_str = 'detect_val=' + str(detection.detect_val) det_time_str = 'det_time=%s' % str(detection.detect_time) if detection.chans: used_chans = 'channels used: ' + \ ' '.join([str(pair) for pair in detection.chans]) new_event.comments.append(Comment(text=used_chans)) new_event.comments.append(Comment(text=thresh_str)) new_event.comments.append(Comment(text=ccc_str)) new_event.comments.append(Comment(text=det_time_str)) template = template_dict[detection.template_name] temp_len = template[0].stats.npts * template[0].stats.sampling_rate if template.sort(['starttime'])[0].stats.starttime == detection.detect_time: print('Template %s detected itself at %s.' % (detection.template_name, str(detection.detect_time))) new_event.resource_id = ResourceIdentifier(id=detection.template_name + '_self', prefix='smi:local') if debug >= 2: print('Plotting detection for template: %s' % detection.template_name) plt_st = Stream([st.select(station=tr.stats.station, channel=tr.stats.channel)[0].slice(detection.detect_time-extract_pre_pick, detection.detect_time+extract_post_pick) for tr in template if len(st.select(station=tr.stats.station, channel=tr.stats.channel)) > 0]) plotting.detection_multiplot(plt_st, template, [detection.detect_time.datetime]) # Loop over each trace in the template, correcting picks for new event if need be for tr in template: sta = tr.stats.station chan = tr.stats.channel if len(st.select(station=sta, channel=chan)) != 0: st_tr = st.select(station=sta, channel=chan)[0] else: print('No stream for %s: %s' % (sta, chan)) continue st_tr_pick = detection.detect_time + delays[detection.template_name][(sta, chan)] + temp_prepick i, absval, full_corr = xcorr(tr, st_tr.slice(st_tr_pick - temp_prepick, st_tr_pick - temp_prepick + temp_len), shift_len=max_lag, full_xcorr=True) ccval = max(full_corr) index = np.argmax(full_corr) - max_lag pk_str = 'ccval=' + str(ccval) if index == 0 or index == max_lag * 2: msg = 'Correlation correction at max_lag. Consider increasing max_lag.' warnings.warn(msg) if debug >= 3: print('Plotting full correlation function') print('index: %d' % index) print('max_ccval: %.2f' % ccval) plt.plot(full_corr) plt.show() plt.close() if ccval > cc_thresh: print('Threshold exceeded at %s: %s' % (sta, chan)) pick_tm = st_tr_pick + (index / tr.stats.sampling_rate) else: print('Correlation at %s: %s not good enough to correct pick' % (sta, chan)) pick_tm = st_tr_pick if tr.stats.channel[-1] in ['Z']: phase_hint = 'P' elif tr.stats.channel[-1] in ['N', 'E', '1', '2']: phase_hint = 'S' wv_id = WaveformStreamID(network_code=tr.stats.network, station_code=tr.stats.station, channel_code=tr.stats.channel) new_event.picks.append(Pick(time=pick_tm, waveform_id=wv_id, phase_hint=phase_hint, comments=[Comment(text=pk_str)])) if write_wav: new_stream.append(st_tr.slice(starttime=pick_tm - extract_pre_pick, endtime=pick_tm + extract_post_pick)) # Append to new catalog new_cat += new_event if write_wav: filename = '%s%s.mseed' % (write_wav, str(new_event.resource_id)) print('Writing new stream for detection to %s' % filename) new_stream.write(filename, format='MSEED') return new_cat
def test_match_filter(self, samp_rate=20.0, debug=0): """ Function to test the capabilities of match_filter and just check that \ it is working! Uses synthetic templates and seeded, randomised data. :type debug: int :param debug: Debug level, higher the number the more output. """ from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter from eqcorrscan.utils.synth_seis import generate_synth_data from obspy import UTCDateTime import string # Generate a random dataset templates, data, seeds = generate_synth_data(nsta=5, ntemplates=2, nseeds=50, samp_rate=samp_rate, t_length=6.0, max_amp=5.0, debug=debug) # Notes to the user: If you use more templates you should ensure they # are more different, e.g. set the data to have larger moveouts, # otherwise similar templates will detect events seeded by another # template. # Test the pre_processing functions data = pre_processing.dayproc(st=data, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate, debug=0, starttime=UTCDateTime(0)) if debug > 0: data.plot() # Filter the data and the templates for template in templates: pre_processing.shortproc(st=template, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate) if debug > 0: template.plot() template_names = list(string.ascii_lowercase)[0:len(templates)] detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=data, threshold=10.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1, debug=0) # Compare the detections to the seeds print('This test made ' + str(len(detections)) + ' detections') ktrue = 0 kfalse = 0 for detection in detections: print(detection.template_name) i = template_names.index(detection.template_name) t_seeds = seeds[i] dtime_samples = int((detection.detect_time - UTCDateTime(0)) * samp_rate) if dtime_samples in t_seeds['time']: j = list(t_seeds['time']).index(dtime_samples) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: min_diff = min(abs(t_seeds['time'] - dtime_samples)) if min_diff < 10: # If there is a match within ten samples then it is # good enough j = list(abs(t_seeds['time'] - dtime_samples)).index(min_diff) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: print('Detection at sample: ' + str(dtime_samples) + ' does not match anything in seed times:') kfalse += 1 print('Minimum difference in samples is: ' + str(min_diff)) # Plot the detections if debug > 3: for i, template in enumerate(templates): times = [d.detect_time.datetime for d in detections if d.template_name == template_names[i]] print(times) plotting.detection_multiplot(data, template, times) # Set an 'acceptable' ratio of positive to false detections print(str(ktrue) + ' true detections and ' + str(kfalse) + ' false detections') self.assertTrue(kfalse / ktrue < 0.25)
def run_tutorial(plot=False): """Main function to run the tutorial dataset.""" from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter import glob # This import section copes with namespace changes between obspy versions import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy.clients.fdsn import Client else: from obspy.fdsn import Client from obspy import UTCDateTime, Stream, read # First we want to load our templates template_names = glob.glob('tutorial_template_*.ms') if len(template_names) == 0: raise IOError('Template files not found, have you run the template ' + 'creation tutorial?') templates = [read(template_name) for template_name in template_names] # Work out what stations we have and get the data for them stations = [] for template in templates: for tr in template: stations.append((tr.stats.station, tr.stats.channel)) # Get a unique list of stations stations = list(set(stations)) # We are going to look for detections on the day of our template, however, to # generalize, we will write a loop through the days between our templates, in # this case that is only one day. template_days = [] for template in templates: template_days.append(template[0].stats.starttime.date) template_days = sorted(template_days) kdays = (template_days[-1] - template_days[0]).days + 1 unique_detections = [] for i in range(kdays): t1 = UTCDateTime(template_days[0]) + (86400 * i) t2 = t1 + 86400 # Generate the bulk information to query the GeoNet database bulk_info = [] for station in stations: bulk_info.append(('NZ', station[0], '*', station[1][0] + 'H' + station[1][-1], t1, t2)) # Set up a client to access the GeoNet database client = Client("GEONET") # Note this will take a little while. print('Downloading seismic data, this may take a while') st = client.get_waveforms_bulk(bulk_info) # Merge the stream, it will be downloaded in chunks st.merge(fill_value='interpolate') # Work out what data we actually have to cope with possible lost data stations = list(set([tr.stats.station for tr in st])) # Set how many cores we want to parallel across, we will set this to four # as this is the number of templates, if your machine has fewer than four # cores/CPUs the multiprocessing will wait until there is a free core. # Setting this to be higher than the number of templates will have no # increase in speed as only detections for each template are computed in # parallel. It may also slow your processing by using more memory than # needed, to the extent that swap may be filled. ncores = 4 # Pre-process the data to set frequency band and sampling rate # Note that this is, and MUST BE the same as the parameters used for the # template creation. print('Processing the seismic data') st = pre_processing.dayproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, debug=0, starttime=t1, num_cores=ncores) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=plot, plotdir='.', cores=ncores, tempdir=False, debug=1, plot_format='jpg') # Now lets try and work out how many unique events we have just to compare # with the GeoNet catalog of 20 events on this day in this sequence for master in detections: keep = True for slave in detections: if not master == slave and\ abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False break if keep: unique_detections.append(master) print('We made a total of ' + str(len(unique_detections)) + ' detections') for detection in unique_detections: print('Detection at :' + str(detection.detect_time) + ' for template ' + detection.template_name + ' with a cross-correlation sum of: ' + str(detection.detect_val)) # We can plot these too if plot: stplot = st.copy() template = templates[template_names.index(detection.template_name)] lags = sorted([tr.stats.starttime for tr in template]) maxlag = lags[-1] - lags[0] stplot.trim(starttime=detection.detect_time - 10, endtime=detection.detect_time + maxlag + 10) plotting.detection_multiplot(stplot, template, [detection.detect_time.datetime]) return unique_detections
raw_dict = {} for filename in raw_files: uri_name = 'smi:org.gfz-potsdam.de/geofon/' +\ filename.split('/')[-1].split('_')[-1].rstrip('.mseed') uri = ResourceIdentifier(uri_name) raw_dict[uri] = read(filename) # Grab some catalog of interest cat_list = glob('/media/chet/hdd/seismic/NZ/catalogs/qml/corr_groups/*029*') cat = read_events( '/media/chet/hdd/seismic/NZ/catalogs/qml/2015_nlloc_final_run02_group_refined.xml' ) # Plotting with multi_event_singlechan # Plot a template over raw data? Not sure this works correctly rid = cat[0].resource_id temp_st = template_dict[rid] raw_st = raw_dict[rid] raw_st.filter('bandpass', freqmin=1.0, freqmax=20) times = [] for tr in raw_st: temp_tr_time = [ p.time for p in cat[0].picks if p.waveform_id.station_code == tr.stats.station and p.waveform_id.channel_code == tr.stats.channel ] if temp_tr_time: times.append(temp_tr_time[0]) plotting.detection_multiplot(raw_st, temp_st, times, plot_mode='single')
cores=6) for detection in detections: #detection.write('detections.csv', append=True) detection.write('detections.csv') # plot # multi_trace_plot(st, corr=True, stack='linstack', size=(7, 12), show=True, title=None) times = [] for dc in detections: for pick in dc.event.picks: times.append(pick.time) template = read('template.ms') template.plot() detection_multiplot(st, template, times, streamcolour='k', templatecolour='r') # f, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True, sharey=False) # ax1.plot(st.select(id='YN.ZAT.00.BZ')[0].data, 'k') # y1min, y1max = ax1.get_ylim() # ax2.plot(st.select(id='YN.ZAT.00.BE')[0].data, 'k') # y2min, y2max = ax2.get_ylim() # ax3.plot(st.select(id='YN.ZAT.00.BN')[0].data, 'k') # y3min, y3max = ax3.get_ylim() # # for detection in detections: # t = detection.detect_time # tt = (t- st[0].stats['starttime']) / st[0].stats['delta'] # ax1.vlines(tt, y1min, y1max, color='r', linewidth=2) # ax2.vlines(tt, y2min, y2max, color='r', linewidth=2) # ax3.vlines(tt, y3min, y3max, color='r', linewidth=2)
def test_match_filter(self, samp_rate=20.0, debug=0): """ Function to test the capabilities of match_filter and just check that \ it is working! Uses synthetic templates and seeded, randomised data. :type debug: int :param debug: Debug level, higher the number the more output. """ from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter from eqcorrscan.utils.synth_seis import generate_synth_data from obspy import UTCDateTime import string # Generate a random dataset templates, data, seeds = generate_synth_data(nsta=5, ntemplates=2, nseeds=50, samp_rate=samp_rate, t_length=6.0, max_amp=5.0, max_lag=12.0, debug=debug) # Notes to the user: If you use more templates you should ensure they # are more different, e.g. set the data to have larger moveouts, # otherwise similar templates will detect events seeded by another # template. # Test the pre_processing functions data = pre_processing.dayproc(st=data, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate, debug=0, starttime=UTCDateTime(0)) if debug > 0: data.plot() # Filter the data and the templates for template in templates: pre_processing.shortproc(st=template, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate) if debug > 0: template.plot() template_names = list(string.ascii_lowercase)[0:len(templates)] detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=data, threshold=10.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1, debug=0) # Compare the detections to the seeds print('This test made ' + str(len(detections)) + ' detections') ktrue = 0 kfalse = 0 for detection in detections: print(detection) i = template_names.index(detection.template_name) t_seeds = seeds[i] dtime_samples = int( (detection.detect_time - UTCDateTime(0)) * samp_rate) if dtime_samples in t_seeds['time']: j = list(t_seeds['time']).index(dtime_samples) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: min_diff = min(abs(t_seeds['time'] - dtime_samples)) if min_diff < 10: # If there is a match within ten samples then it is # good enough j = list(abs(t_seeds['time'] - dtime_samples)).index(min_diff) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: print('Detection at sample: ' + str(dtime_samples) + ' does not match anything in seed times:') kfalse += 1 print('Minimum difference in samples is: ' + str(min_diff)) # Plot the detections if debug > 3: for i, template in enumerate(templates): times = [ d.detect_time.datetime for d in detections if d.template_name == template_names[i] ] print(times) plotting.detection_multiplot(data, template, times) # Set an 'acceptable' ratio of positive to false detections print( str(ktrue) + ' true detections and ' + str(kfalse) + ' false detections') self.assertTrue(kfalse / ktrue < 0.25)
print('Detection at :' + str(detection.detect_time) + ' for template ' + detection.template_name + ' with a cross-correlation sum of: ' + str(detection.detect_val)) stplot2 = std_filter.copy() template = templates[template_names.index( detection.template_name)] lags2 = sorted([tr.stats.starttime for tr in template]) maxlag2 = lags2[-1] - lags2[0] starttime = detection.detect_time stplot2.trim(starttime=starttime - 10, endtime=starttime + maxlag2 + 10) plotting.detection_multiplot( stplot2, template, [detection.detect_time.datetime], size=[24.0, 11.77], save=True, savefile=os.getcwd() + '/Detection_Plots/Detection_' + str(starttime) + '.png') #Clear streams to keep memory usage low std1.clear() std_filter.clear() std.clear() st.clear() #Delete automatically generated template*.npy files filelist = glob.glob(os.getcwd() + "/template_*.npy") for file in filelist: os.remove(file)
def _prepare_data(detect_data, detections, template, delays, shift_len, plot): """ Prepare data for lag_calc - reduce memory here. :type detect_data: obspy.core.stream.Stream :param detect_data: Stream to extract detection streams from. :type detections: list :param detections: List of :class:`eqcorrscan.core.match_filter.Detection` to get data for. :type template: tuple :param template: tuple of (template_name, template) :type delays: list :param delays: Dictionary of delay times in seconds keyed by sta.channel. :type shift_len: float :param shift_len: Shift length in seconds allowed for picking. :type plot: bool :param plot: Whether to plot the data extracted or not, used for debugging. :returns: List of detect_streams to be worked on :rtype: list """ detect_streams = [] for detection in detections: if detection.template_name != template[0]: continue # Stream to be saved for new detection detect_stream = [] max_delay = 0 for tr in detect_data: template_tr = template[1].select(station=tr.stats.station, channel=tr.stats.channel) if len(template_tr) >= 1: # Save template trace length in seconds template_len = (len(template_tr[0]) / template_tr[0].stats.sampling_rate) else: continue # If there is no template-data match then skip the rest # of the trace loop. # Grab the delays for the desired template: [(sta, chan, delay)] # Now grab the delay for the desired trace for this template delay = delays[tr.stats.station + '.' + tr.stats.channel] if delay > max_delay: max_delay = delay detect_stream.append( tr.slice(starttime=detection.detect_time - shift_len + delay, endtime=detection.detect_time + delay + shift_len + template_len).copy()) for tr in detect_stream: if len(tr.data) == 0: msg = ('No data in %s.%s for detection at time %s' % (tr.stats.station, tr.stats.channel, detection.detect_time)) warnings.warn(msg) detect_stream.remove(tr) elif tr.stats.endtime - tr.stats.starttime < ( 2 * shift_len) + template_len: msg = ("Insufficient data for %s.%s will not use." % (tr.stats.station, tr.stats.channel)) warnings.warn(msg) detect_stream.remove(tr) elif np.ma.is_masked(tr.data): msg = ("Masked data found for %s.%s, will not use." % (tr.stats.station, tr.stats.channel)) warnings.warn(msg) detect_stream.remove(tr) # Check for duplicate traces stachans = [(tr.stats.station, tr.stats.channel) for tr in detect_stream] c_stachans = Counter(stachans) for key in c_stachans.keys(): if c_stachans[key] > 1: msg = ('Multiple channels for %s.%s, likely a data issue' % (key[0], key[1])) raise LagCalcError(msg) if plot: background = detect_data.slice( starttime=detection.detect_time - (shift_len + 5), endtime=detection.detect_time + shift_len + max_delay + 7).copy() for tr in background: if len(tr.data) == 0: background.remove(tr) detection_multiplot(stream=background, template=Stream(detect_stream), times=[detection.detect_time - shift_len], title='Detection Extracted') if not len(detect_stream) == 0: detect_stream = Stream(detect_stream).split() # Make sure there are no masks left over. # Create tuple of (template name, data stream) detect_streams.append( (detection.template_name, Stream(detect_stream))) return detect_streams
# Now lets try and work out how many unique events we have just to compare # with the GeoNet catalog of 20 events on this day in this sequence for master in detections: keep = True for slave in detections: if not master == slave and\ abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False break if keep: unique_detections.append(master) print('We made a total of ' + str(len(unique_detections)) + ' detections') for detection in unique_detections: print('Detection at :' + str(detection.detect_time) + ' for template ' + detection.template_name + ' with a cross-correlation sum of: ' + str(detection.detect_val)) # We can plot these too stplot = st.copy() template = templates[template_names.index(detection.template_name)] lags = sorted([tr.stats.starttime for tr in template]) maxlag = lags[-1] - lags[0] stplot.trim(starttime=detection.detect_time - 10, endtime=detection.detect_time + maxlag + 10) plotting.detection_multiplot(stplot, template, [detection.detect_time.datetime])
for master in detections: keep = True for slave in detections: if not master == slave and abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False break if keep: unique_detections.append(master) print("We made a total of " + str(len(unique_detections)) + " detections") for detection in unique_detections: print( "Detection at :" + str(detection.detect_time) + " for template " + detection.template_name + " with a cross-correlation sum of: " + str(detection.detect_val) ) # We can plot these too stplot = st.copy() template = templates[template_names.index(detection.template_name)] lags = sorted([tr.stats.starttime for tr in template]) maxlag = lags[-1] - lags[0] stplot.trim(starttime=detection.detect_time - 10, endtime=detection.detect_time + maxlag + 10) plotting.detection_multiplot(stplot, template, [detection.detect_time.datetime])
def plot_detection_wavs(family, tribe, wav_dirs, start=None, end=None, save=False, save_dir=None, no_dets=5): """ Wrapper on detection_multiplot() for our dataset :param cat: catalog of detections :param temp_dir: template waveform dict :param det_dir: detection waveform dict :return: matplotlib.pyplot.Figure """ # Random range of dates in detections rand_inds = np.random.choice(range(len(family)), no_dets, replace=False) cat = Catalog(events=[det.event for i, det in enumerate(family) if i in rand_inds]) # Always plot self_detection cat += [det.event for det in family if det.detect_val / det.no_chans == 1.0][0] cat.events.sort(key=lambda x: x.picks[0].time) sub_fam = Family(template=family.template, detections=[det for i, det in enumerate(family) if i in rand_inds]) sub_fam.detections.extend([det for det in family if det.detect_val / det.no_chans == 1.0]) temp = tribe[sub_fam.template.name] if start: cat_start = datetime.strptime(start, '%d/%m/%Y') cat_end = datetime.strptime(end, '%d/%m/%Y') else: cat_start = cat[0].picks[0].time.date cat_end = cat[-1].picks[0].time.date for date in date_generator(cat_start, cat_end): dto = UTCDateTime(date) dets = [det for det in sub_fam if dto < det.detect_time < dto + 86400] if len(dets) == 0: print('No detections on: {!s}'.format(dto)) continue print('Running for date: %s' % str(dto)) stachans = {} for det in dets: ev = det.event for pk in ev.picks: sta = pk.waveform_id.station_code chan = pk.waveform_id.channel_code if sta not in stachans: stachans[sta] = [chan] elif chan not in stachans[sta]: stachans[sta].append(chan) # Grab day's wav files wav_ds = ['%s%d' % (d, dto.year) for d in wav_dirs] stream = grab_day_wavs(wav_ds, dto, stachans) print('Preprocessing') st1 = pre_processing.dayproc(stream, temp.lowcut, temp.highcut, temp.filt_order, temp.samp_rate, starttime=dto, num_cores=3) for det in dets: det_st = st1.slice(starttime=det.detect_time - 3, endtime=det.detect_time + 7).copy() fname = '{}/{}.png'.format( save_dir, str(det.event.resource_id).split('/')[-1]) det_t = 'Template {}: {}'.format(temp.name, det.detect_time) detection_multiplot(det_st, temp.st, [det.detect_time], save=save, savefile=fname, title=det_t) plt.close('all') return