def _median_window(window, window_start, multiplier, starttime, sampling_rate): """ Internal function to aid parallel processing :type window: numpy.ndarry :param window: Data to look for peaks in. :type window_start: int :param window_start: Index of window start point in larger array, used \ for peak indexing. :type multiplier: float :param multiplier: Multiple of MAD to use as threshold :type starttime: obspy.core.utcdatetime.UTCDateTime :param starttime: Starttime of window, used in debug plotting. :type sampling_rate: float :param sampling_rate in Hz, used for debug plotting :returns: peaks :rtype: list """ MAD = np.median(np.abs(window)) thresh = multiplier * MAD Logger.debug('Threshold for window is: ' + str(thresh) + '\nMedian is: ' + str(MAD) + '\nMax is: ' + str(np.max(window))) peaks = find_peaks2_short(arr=window, thresh=thresh, trig_int=5) if peaks: peaks = [(peak[0], peak[1] + window_start) for peak in peaks] else: peaks = [] return peaks
def full_peak_array_py(self, cc_array): """ run find_peaks2_short on cc_array and return results """ peaks = find_peaks2_short(arr=cc_array, thresh=0.2, trig_int=self.trig_index, full_peaks=True) return peaks
def _find_detections(cum_net_resp, nodes, threshold, thresh_type, samp_rate, realstations, length): """ Find detections within the cumulative network response. :type cum_net_resp: numpy.ndarray :param cum_net_resp: Array of cumulative network response for nodes :type nodes: list :param nodes: Nodes associated with the source of energy in the \ cum_net_resp :type threshold: float :param threshold: Threshold value :type thresh_type: str :param thresh_type: Either MAD (Median Absolute Deviation) or abs \ (absolute) or RMS (Root Mean Squared) :type samp_rate: float :param samp_rate: Sampling rate in Hz :type realstations: list :param realstations: List of stations used to make the cumulative network \ response, will be reported in the DETECTION :type length: float :param length: Maximum length of peak to look for in seconds :return: detections as :class: DETECTION .. note:: This is an internal function to ease parallel processing and \ should not be called directly. """ from eqcorrscan.core.match_filter import DETECTION from eqcorrscan.utils import findpeaks cum_net_resp = np.nan_to_num(cum_net_resp) # Force no NaNs if np.isnan(cum_net_resp).any(): raise ValueError("Nans present") print('Mean of data is: ' + str(np.median(cum_net_resp))) print('RMS of data is: ' + str(np.sqrt(np.mean(np.square(cum_net_resp))))) print('MAD of data is: ' + str(np.median(np.abs(cum_net_resp)))) if thresh_type == 'MAD': thresh = (np.median(np.abs(cum_net_resp)) * threshold) elif thresh_type == 'abs': thresh = threshold elif thresh_type == 'RMS': thresh = _rms(cum_net_resp) * threshold print('Threshold is set to: ' + str(thresh)) print('Max of data is: ' + str(max(cum_net_resp))) peaks = findpeaks.find_peaks2_short(cum_net_resp, thresh, length * samp_rate, debug=0) detections = [] if peaks: for peak in peaks: node = nodes[peak[1]] detections.append(DETECTION(str(node[0]) + '_' + str(node[1]) + '_' + str(node[2]), peak[1] / samp_rate, len(realstations), peak[0], thresh, 'brightness', realstations)) else: detections = [] print('I have found ' + str(len(peaks)) + ' possible detections') return detections
def peak_array(self, cc_array): """ run find_peaks2_short on cc_array and return results """ peaks = find_peaks2_short(arr=cc_array, thresh=0.2, trig_int=self.trig_index, debug=0, starttime=None, samp_rate=200.0) return peaks
def template_remove(tr, template, cc_thresh, windowlength, interp_len, debug=0): """ Looks for instances of template in the trace and removes the matches. :type tr: obspy.core.Trace :param tr: Trace to remove spikes from. :type template: osbpy.core.Trace :param template: Spike template to look for in data. :type cc_thresh: float :param cc_thresh: Cross-correlation threshold (-1 - 1). :type windowlength: float :param windowlength: Length of window to look for spikes in in seconds. :type interp_len: float :param interp_len: Window length to remove and fill in seconds. :type debug: int :param debug: Debug level. :returns: tr, works in place. """ from eqcorrscan.core.match_filter import normxcorr2 from eqcorrscan.utils.findpeaks import find_peaks2_short from obspy import Trace from eqcorrscan.utils.timer import Timer import matplotlib.pyplot as plt import warnings data_in = tr.copy() _interp_len = int(tr.stats.sampling_rate * interp_len) if _interp_len < len(template.data): warnings.warn('Interp_len is less than the length of the template,' 'will used the length of the template!') _interp_len = len(template.data) if isinstance(template, Trace): template = template.data with Timer() as t: cc = normxcorr2(tr.data.astype(np.float32), template.astype(np.float32)) if debug > 3: plt.plot(cc.flatten(), 'k', label='cross-correlation') plt.legend() plt.show() peaks = find_peaks2_short(arr=cc.flatten(), thresh=cc_thresh, trig_int=windowlength * tr.stats. sampling_rate) for peak in peaks: tr.data = _interp_gap(data=tr.data, peak_loc=peak[1] + int(0.5 * _interp_len), interp_len=_interp_len) print("Despiking took: %s s" % t.secs) if debug > 2: plt.plot(data_in.data, 'r', label='raw') plt.plot(tr.data, 'k', label='despiked') plt.legend() plt.show() return tr
def test_peaks_plot(self): data = self.data.copy() data[30] = 100 data[60] = 40 threshold = 10 peaks = findpeaks.find_peaks2_short(data, threshold, 3) fig = peaks_plot(data=data, starttime=UTCDateTime("2008001"), samp_rate=10, peaks=peaks, show=False, return_figure=True) return fig
def template_remove(tr, template, cc_thresh, windowlength, interp_len, debug=0): """ Looks for instances of template in the trace and removes the matches. :type tr: obspy.core.trace.Trace :param tr: Trace to remove spikes from. :type template: osbpy.core.trace.Trace :param template: Spike template to look for in data. :type cc_thresh: float :param cc_thresh: Cross-correlation threshold (-1 - 1). :type windowlength: float :param windowlength: Length of window to look for spikes in in seconds. :type interp_len: float :param interp_len: Window length to remove and fill in seconds. :type debug: int :param debug: Debug level. :returns: tr, works in place. :rtype: :class:`obspy.core.trace.Trace` """ data_in = tr.copy() _interp_len = int(tr.stats.sampling_rate * interp_len) if _interp_len < len(template.data): warnings.warn('Interp_len is less than the length of the template,' 'will used the length of the template!') _interp_len = len(template.data) if isinstance(template, Trace): template = template.data with Timer() as t: cc = normxcorr2(image=tr.data.astype(np.float32), template=template.astype(np.float32)) if debug > 3: plt.plot(cc.flatten(), 'k', label='cross-correlation') plt.legend() plt.show() peaks = find_peaks2_short(arr=cc.flatten(), thresh=cc_thresh, trig_int=windowlength * tr.stats.sampling_rate) for peak in peaks: tr.data = _interp_gap(data=tr.data, peak_loc=peak[1] + int(0.5 * _interp_len), interp_len=_interp_len) print("Despiking took: %s s" % t.secs) if debug > 2: plt.plot(data_in.data, 'r', label='raw') plt.plot(tr.data, 'k', label='despiked') plt.legend() plt.show() return tr
def test_main_find_peaks(self): """Test find_peaks2_short""" from eqcorrscan.utils.findpeaks import find_peaks2_short import numpy as np import os testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'test_data') expected_ccc = np.load(os.path.join(testing_path, 'test_ccc.npy')) peaks = find_peaks2_short(arr=expected_ccc, thresh=0.2, trig_int=10, debug=0, starttime=False, samp_rate=200.0) expected_peaks = np.load(os.path.join(testing_path, 'test_peaks.npy')) # Check length first as this will be a more obvious issue self.assertEqual(len(peaks), len(expected_peaks), msg='Peaks are not the same length, has ccc been ' + 'updated?') self.assertTrue((np.array(peaks) == expected_peaks).all())
def _median_window(window, window_start, multiplier, starttime, sampling_rate, debug=0): """ Internal function to aid parallel processing :type window: numpy.ndarry :param window: Data to look for peaks in. :type window_start: int :param window_start: Index of window start point in larger array, used \ for peak indexing. :type multiplier: float :param multiplier: Multiple of MAD to use as threshold :type starttime: obspy.core.utcdatetime.UTCDateTime :param starttime: Starttime of window, used in debug plotting. :type sampling_rate: float :param sampling_rate in Hz, used for debug plotting :type debug: int :param debug: debug level, if want plots, >= 4. :returns: peaks :rtype: list """ from eqcorrscan.utils.findpeaks import find_peaks2_short from eqcorrscan.utils.plotting import peaks_plot MAD = np.median(np.abs(window)) thresh = multiplier * MAD if debug >= 2: print('Threshold for window is: ' + str(thresh) + '\nMedian is: ' + str(MAD) + '\nMax is: ' + str(np.max(window))) peaks = find_peaks2_short(arr=window, thresh=thresh, trig_int=5, debug=0) if debug >= 4 and peaks: peaks_plot(window, starttime, sampling_rate, save=False, peaks=peaks) if peaks: peaks = [(peak[0], peak[1] + window_start) for peak in peaks] else: peaks = [] return peaks
def template_remove(tr, template, cc_thresh, windowlength, interp_len): """ Looks for instances of template in the trace and removes the matches. :type tr: obspy.core.trace.Trace :param tr: Trace to remove spikes from. :type template: osbpy.core.trace.Trace :param template: Spike template to look for in data. :type cc_thresh: float :param cc_thresh: Cross-correlation threshold (-1 - 1). :type windowlength: float :param windowlength: Length of window to look for spikes in in seconds. :type interp_len: float :param interp_len: Window length to remove and fill in seconds. :returns: tr, works in place. :rtype: :class:`obspy.core.trace.Trace` """ _interp_len = int(tr.stats.sampling_rate * interp_len) if _interp_len < len(template.data): Logger.warning('Interp_len is less than the length of the template, ' 'will used the length of the template!') _interp_len = len(template.data) if isinstance(template, Trace): template = np.array([template.data]) with Timer() as t: normxcorr = get_array_xcorr("fftw") cc, _ = normxcorr(stream=tr.data.astype(np.float32), templates=template.astype(np.float32), pads=[0]) peaks = find_peaks2_short(arr=cc.flatten(), thresh=cc_thresh, trig_int=windowlength * tr.stats.sampling_rate) for peak in peaks: tr.data = _interp_gap(data=tr.data, peak_loc=peak[1] + int(0.5 * _interp_len), interp_len=_interp_len) Logger.info("Despiking took: {0:.4f} s".format(t.secs)) return tr
def _median_window(window, window_start, multiplier, starttime, sampling_rate, debug=0): """Internal function to aid parallel processing :type window: np.ndarry :param window: Data to look for peaks in. :type window_start: int :param window_start: Index of window start point in larger array, used \ for peak indexing. :type multiplier: float :param multiplier: Multiple of MAD to use as threshold :type starttime: obspy.UTCDateTime :param starttime: Starttime of window, used in debug plotting. :type sampling_rate: float :param sampling_rate in Hz, used for debug plotting :type debug: int :param debug: debug level, if want plots, >= 4. :returns: peaks """ from eqcorrscan.utils.findpeaks import find_peaks2_short from eqcorrscan.utils.plotting import peaks_plot MAD = np.median(np.abs(window)) thresh = multiplier * MAD if debug >= 2: print('Threshold for window is: ' + str(thresh) + '\nMedian is: ' + str(MAD) + '\nMax is: ' + str(np.max(window))) peaks = find_peaks2_short(arr=window, thresh=thresh, trig_int=5, debug=0) if debug >= 4 and peaks: peaks_plot(window, starttime, sampling_rate, save=False, peaks=peaks) if peaks: peaks = [(peak[0], peak[1] + window_start) for peak in peaks] else: peaks = [] return peaks
def _find_detections(cum_net_resp, nodes, threshold, thresh_type, samp_rate, realstations, length): """ Find detections within the cumulative network response. :type cum_net_resp: numpy.ndarray :param cum_net_resp: Array of cumulative network response for nodes :type nodes: list :param nodes: Nodes associated with the source of energy in the \ cum_net_resp :type threshold: float :param threshold: Threshold value :type thresh_type: str :param thresh_type: Either MAD (Median Absolute Deviation) or abs \ (absolute) or RMS (Root Mean Squared) :type samp_rate: float :param samp_rate: Sampling rate in Hz :type realstations: list :param realstations: List of stations used to make the cumulative network \ response, will be reported in the DETECTION :type length: float :param length: Maximum length of peak to look for in seconds :return: detections as :class: DETECTION .. note:: This is an internal function to ease parallel processing and \ should not be called directly. """ from eqcorrscan.core.match_filter import DETECTION from eqcorrscan.utils import findpeaks cum_net_resp = np.nan_to_num(cum_net_resp) # Force no NaNs if np.isnan(cum_net_resp).any(): raise ValueError("Nans present") print('Mean of data is: ' + str(np.median(cum_net_resp))) print('RMS of data is: ' + str(np.sqrt(np.mean(np.square(cum_net_resp))))) print('MAD of data is: ' + str(np.median(np.abs(cum_net_resp)))) if thresh_type == 'MAD': thresh = (np.median(np.abs(cum_net_resp)) * threshold) elif thresh_type == 'abs': thresh = threshold elif thresh_type == 'RMS': thresh = _rms(cum_net_resp) * threshold print('Threshold is set to: ' + str(thresh)) print('Max of data is: ' + str(max(cum_net_resp))) peaks = findpeaks.find_peaks2_short(cum_net_resp, thresh, length * samp_rate, debug=0) detections = [] if peaks: for peak in peaks: node = nodes[peak[1]] detections.append( DETECTION( str(node[0]) + '_' + str(node[1]) + '_' + str(node[2]), peak[1] / samp_rate, len(realstations), peak[0], thresh, 'brightness', realstations)) else: detections = [] print('I have found ' + str(len(peaks)) + ' possible detections') return detections
def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0, process=True, extract_detections=False, cores=1): """ Detect within continuous data using the subspace method. Not to be called directly, use the detector.detect method. :type detector: eqcorrscan.core.subspace.Detector :param detector: Detector to use. :type st: obspy.core.stream.Stream :param st: Un-processed stream to detect within using the subspace \ detector :type threshold: float :param threshold: Threshold value for detections between 0-1 :type trig_int: float :param trig_int: Minimum trigger interval in seconds. :type moveout: float :param moveout: Maximum allowable moveout window for non-multiplexed, network detection. See note. :type min_trig: int :param min_trig: Minimum number of stations exceeding threshold for \ non-multiplexed, network detection. See note. :type process: bool :param process: Whether or not to process the stream according to the \ parameters defined by the detector. Default is to process the \ data (True). :type extract_detections: bool :param extract_detections: Whether to extract waveforms for each \ detection or not, if true will return detections and streams. :return: list of detections :rtype: list of eqcorrscan.core.match_filter.Detection """ detections = [] # First process the stream if process: Logger.info('Processing Stream') stream, stachans = _subspace_process( streams=[st.copy()], lowcut=detector.lowcut, highcut=detector.highcut, filt_order=detector.filt_order, sampling_rate=detector.sampling_rate, multiplex=detector.multiplex, stachans=detector.stachans, parallel=True, align=False, shift_len=None, reject=False, cores=cores) else: # Check the sampling rate at the very least for tr in st: if not tr.stats.sampling_rate == detector.sampling_rate: raise ValueError('Sampling rates do not match.') stream = [st] stachans = detector.stachans outtic = time.clock() # If multiplexed, how many samples do we increment by? if detector.multiplex: Nc = len(detector.stachans) else: Nc = 1 # Here do all ffts fft_vars = _do_ffts(detector, stream, Nc) Logger.info('Computing detection statistics') Logger.info('Preallocating stats matrix') stats = np.zeros( (len(stream[0]), (len(stream[0][0]) // Nc) - (fft_vars[4] // Nc) + 1)) for det_freq, data_freq_sq, data_freq, i in zip(fft_vars[0], fft_vars[1], fft_vars[2], np.arange(len(stream[0]))): # Calculate det_statistic in frequency domain stats[i] = _det_stat_freq(det_freq, data_freq_sq, data_freq, fft_vars[3], Nc, fft_vars[4], fft_vars[5]) Logger.info('Stats matrix is shape %s' % str(stats[i].shape)) trig_int_samples = detector.sampling_rate * trig_int Logger.info('Finding peaks') peaks = [] for i in range(len(stream[0])): peaks.append( findpeaks.find_peaks2_short(arr=stats[i], thresh=threshold, trig_int=trig_int_samples)) if not detector.multiplex: # Conduct network coincidence triggering peaks = findpeaks.coin_trig(peaks=peaks, samp_rate=detector.sampling_rate, moveout=moveout, min_trig=min_trig, stachans=stachans, trig_int=trig_int) else: peaks = peaks[0] if len(peaks) > 0: for peak in peaks: detecttime = st[0].stats.starttime + \ (peak[1] / detector.sampling_rate) rid = ResourceIdentifier(id=detector.name + '_' + str(detecttime), prefix='smi:local') ev = Event(resource_id=rid) cr_i = CreationInfo(author='EQcorrscan', creation_time=UTCDateTime()) ev.creation_info = cr_i # All detection info in Comments for lack of a better idea thresh_str = 'threshold=' + str(threshold) ccc_str = 'detect_val=' + str(peak[0]) used_chans = 'channels used: ' +\ ' '.join([str(pair) for pair in detector.stachans]) ev.comments.append(Comment(text=thresh_str)) ev.comments.append(Comment(text=ccc_str)) ev.comments.append(Comment(text=used_chans)) for stachan in detector.stachans: tr = st.select(station=stachan[0], channel=stachan[1]) if tr: net_code = tr[0].stats.network else: net_code = '' pick_tm = detecttime wv_id = WaveformStreamID(network_code=net_code, station_code=stachan[0], channel_code=stachan[1]) ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id)) detections.append( Detection(template_name=detector.name, detect_time=detecttime, no_chans=len(detector.stachans), detect_val=peak[0], threshold=threshold, typeofdet='subspace', threshold_type='abs', threshold_input=threshold, chans=detector.stachans, event=ev)) outtoc = time.clock() Logger.info('Detection took %s seconds' % str(outtoc - outtic)) if extract_detections: detection_streams = extract_from_stream(st, detections) return detections, detection_streams return detections
def match_filter(template_names, template_list, st, threshold, threshold_type, trig_int, plotvar, plotdir='.', cores=1, debug=0, plot_format='png', output_cat=False, extract_detections=False, arg_check=True): """ Main matched-filter detection function. Over-arching code to run the correlations of given templates with a \ day of seismic data and output the detections based on a given threshold. For a functional example see the tutorials. :type template_names: list :param template_names: List of template names in the same order as \ template_list :type template_list: list :param template_list: A list of templates of which each template is a \ Stream of obspy traces containing seismic data and header information. :type st: obspy.core.stream.Stream :param st: A Stream object containing all the data available and \ required for the correlations with templates given. For efficiency \ this should contain no excess traces which are not in one or more of \ the templates. This will now remove excess traces internally, but \ will copy the stream and work on the copy, leaving your input stream \ untouched. :type threshold: float :param threshold: A threshold value set based on the threshold_type :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD, \ absolute or av_chan_corr. See Note on thresholding below. :type trig_int: float :param trig_int: Minimum gap between detections in seconds. :type plotvar: bool :param plotvar: Turn plotting on or off :type plotdir: str :param plotdir: Path to plotting folder, plots will be output here, \ defaults to run location. :type cores: int :param cores: Number of cores to use :type debug: int :param debug: Debug output level, the bigger the number, the more the \ output. :type plot_format: str :param plot_format: Specify format of output plots if saved :type output_cat: bool :param output_cat: Specifies if matched_filter will output an \ obspy.Catalog class containing events for each detection. Default \ is False, in which case matched_filter will output a list of \ detection classes, as normal. :type extract_detections: bool :param extract_detections: Specifies whether or not to return a list of \ streams, one stream per detection. :type arg_check: bool :param arg_check: Check arguments, defaults to True, but if running in \ bulk, and you are certain of your arguments, then set to False.\n .. rubric:: If neither `output_cat` or `extract_detections` are set to `True`, then only the list of :class:`eqcorrscan.core.match_filter.DETECTION`'s will be output: :return: :class:`eqcorrscan.core.match_filter.DETECTION`'s detections for each detection made. :rtype: list .. rubric:: If `output_cat` is set to `True`, then the :class:`obspy.core.event.Catalog` will also be output: :return: Catalog containing events for each detection, see above. :rtype: :class:`obspy.core.event.Catalog` .. rubric:: If `extract_detections` is set to `True` then the list of :class:`obspy.core.stream.Stream`'s will also be output. :return: list of :class:`obspy.core.stream.Stream`'s for each detection, see above. :rtype: list .. warning:: Plotting within the match-filter routine uses the Agg backend with interactive plotting turned off. This is because the function is designed to work in bulk. If you wish to turn interactive plotting on you must import matplotlib in your script first, when you them import match_filter you will get the warning that this call to matplotlib has no effect, which will mean that match_filter has not changed the plotting behaviour. .. note:: **Thresholding:** **MAD** threshold is calculated as the: .. math:: threshold {\\times} (median(abs(cccsum))) where :math:`cccsum` is the cross-correlation sum for a given template. **absolute** threshold is a true absolute threshold based on the cccsum value. **av_chan_corr** is based on the mean values of single-channel cross-correlations assuming all data are present as required for the template, e.g: .. math:: av\_chan\_corr\_thresh=threshold \\times (cccsum / len(template)) where :math:`template` is a single template from the input and the length is the number of channels within this template. .. note:: The output_cat flag will create an :class:`obspy.core.eventCatalog` containing one event for each :class:`eqcorrscan.core.match_filter.DETECTION`'s generated by match_filter. Each event will contain a number of comments dealing with correlation values and channels used for the detection. Each channel used for the detection will have a corresponding :class:`obspy.core.event.Pick` which will contain time and waveform information. **HOWEVER**, the user should note that, at present, the pick times do not account for the prepick times inherent in each template. For example, if a template trace starts 0.1 seconds before the actual arrival of that phase, then the pick time generated by match_filter for that phase will be 0.1 seconds early. We are working on a solution that will involve saving templates alongside associated metadata. """ import matplotlib matplotlib.use('Agg') if arg_check: # Check the arguments to be nice - if arguments wrong type the parallel # output for the error won't be useful if not type(template_names) == list: raise MatchFilterError('template_names must be of type: list') if not type(template_list) == list: raise MatchFilterError('templates must be of type: list') if not len(template_list) == len(template_names): raise MatchFilterError('Not the same number of templates as names') for template in template_list: if not type(template) == Stream: msg = 'template in template_list must be of type: ' +\ 'obspy.core.stream.Stream' raise MatchFilterError(msg) if not type(st) == Stream: msg = 'st must be of type: obspy.core.stream.Stream' raise MatchFilterError(msg) if str(threshold_type) not in [str('MAD'), str('absolute'), str('av_chan_corr')]: msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr' raise MatchFilterError(msg) # Copy the stream here because we will muck about with it stream = st.copy() templates = copy.deepcopy(template_list) _template_names = copy.deepcopy(template_names) # Debug option to confirm that the channel names match those in the # templates if debug >= 2: template_stachan = [] data_stachan = [] for template in templates: for tr in template: if isinstance(tr.data, np.ma.core.MaskedArray): raise MatchFilterError('Template contains masked array,' ' split first') template_stachan.append(tr.stats.station + '.' + tr.stats.channel) for tr in stream: data_stachan.append(tr.stats.station + '.' + tr.stats.channel) template_stachan = list(set(template_stachan)) data_stachan = list(set(data_stachan)) if debug >= 3: print('I have template info for these stations:') print(template_stachan) print('I have daylong data for these stations:') print(data_stachan) # Perform a check that the continuous data are all the same length min_start_time = min([tr.stats.starttime for tr in stream]) max_end_time = max([tr.stats.endtime for tr in stream]) longest_trace_length = stream[0].stats.sampling_rate * (max_end_time - min_start_time) for tr in stream: if not tr.stats.npts == longest_trace_length: msg = 'Data are not equal length, padding short traces' warnings.warn(msg) start_pad = np.zeros(int(tr.stats.sampling_rate * (tr.stats.starttime - min_start_time))) end_pad = np.zeros(int(tr.stats.sampling_rate * (max_end_time - tr.stats.endtime))) tr.data = np.concatenate([start_pad, tr.data, end_pad]) # Perform check that all template lengths are internally consistent for i, temp in enumerate(template_list): if len(set([tr.stats.npts for tr in temp])) > 1: msg = ('Template %s contains traces of differing length, this is ' 'not currently supported' % _template_names[i]) raise MatchFilterError(msg) outtic = time.clock() if debug >= 2: print('Ensuring all template channels have matches in long data') template_stachan = {} # Work out what station-channel pairs are in the templates, including # duplicate station-channel pairs. We will use this information to fill # all templates with the same station-channel pairs as required by # _template_loop. for template in templates: stachans_in_template = [] for tr in template: stachans_in_template.append((tr.stats.network, tr.stats.station, tr.stats.location, tr.stats.channel)) stachans_in_template = dict(Counter(stachans_in_template)) for stachan in stachans_in_template.keys(): if stachan not in template_stachan.keys(): template_stachan.update({stachan: stachans_in_template[stachan]}) elif stachans_in_template[stachan] > template_stachan[stachan]: template_stachan.update({stachan: stachans_in_template[stachan]}) # Remove un-matched channels from templates. _template_stachan = copy.deepcopy(template_stachan) for stachan in template_stachan.keys(): if not stream.select(network=stachan[0], station=stachan[1], location=stachan[2], channel=stachan[3]): # Remove stachan from list of dictionary of template_stachans _template_stachan.pop(stachan) # Remove template traces rather than adding NaN data for template in templates: if template.select(network=stachan[0], station=stachan[1], location=stachan[2], channel=stachan[3]): for tr in template.select(network=stachan[0], station=stachan[1], location=stachan[2], channel=stachan[3]): template.remove(tr) template_stachan = _template_stachan # Remove un-needed channels from continuous data. for tr in stream: if not (tr.stats.network, tr.stats.station, tr.stats.location, tr.stats.channel) in \ template_stachan.keys(): stream.remove(tr) # Check for duplicate channels stachans = [(tr.stats.network, tr.stats.station, tr.stats.location, tr.stats.channel) for tr in stream] c_stachans = Counter(stachans) for key in c_stachans.keys(): if c_stachans[key] > 1: msg = ('Multiple channels for %s.%s.%s.%s, likely a data issue' % (key[0], key[1], key[2], key[3])) raise MatchFilterError(msg) # Pad out templates to have all channels for template, template_name in zip(templates, _template_names): if len(template) == 0: msg = ('No channels matching in continuous data for ' + 'template' + template_name) warnings.warn(msg) templates.remove(template) _template_names.remove(template_name) continue for stachan in template_stachan.keys(): number_of_channels = len(template.select(network=stachan[0], station=stachan[1], location=stachan[2], channel=stachan[3])) if number_of_channels < template_stachan[stachan]: missed_channels = template_stachan[stachan] -\ number_of_channels nulltrace = Trace() nulltrace.stats.update( {'network': stachan[0], 'station': stachan[1], 'location': stachan[2], 'channel': stachan[3], 'sampling_rate': template[0].stats.sampling_rate, 'starttime': template[0].stats.starttime}) nulltrace.data = np.array([np.NaN] * len(template[0].data), dtype=np.float32) for dummy in range(missed_channels): template += nulltrace template.sort() # Quick check that this has all worked if len(template) != max([len(t) for t in templates]): raise MatchFilterError('Internal error forcing same template ' 'lengths, report this error.') if debug >= 2: print('Starting the correlation run for this day') if debug >= 4: for template in templates: print(template) print(stream) [cccsums, no_chans, chans] = _channel_loop(templates=templates, stream=stream, cores=cores, debug=debug) if len(cccsums[0]) == 0: raise MatchFilterError('Correlation has not run, zero length cccsum') outtoc = time.clock() print(' '.join(['Looping over templates and streams took:', str(outtoc - outtic), 's'])) if debug >= 2: print(' '.join(['The shape of the returned cccsums is:', str(np.shape(cccsums))])) print(' '.join(['This is from', str(len(templates)), 'templates'])) print(' '.join(['Correlated with', str(len(stream)), 'channels of data'])) detections = [] if output_cat: det_cat = Catalog() for i, cccsum in enumerate(cccsums): template = templates[i] if str(threshold_type) == str('MAD'): rawthresh = threshold * np.median(np.abs(cccsum)) elif str(threshold_type) == str('absolute'): rawthresh = threshold elif str(threshold_type) == str('av_chan_corr'): rawthresh = threshold * no_chans[i] # Findpeaks returns a list of tuples in the form [(cccsum, sample)] print(' '.join(['Threshold is set at:', str(rawthresh)])) print(' '.join(['Max of data is:', str(max(cccsum))])) print(' '.join(['Mean of data is:', str(np.mean(cccsum))])) if np.abs(np.mean(cccsum)) > 0.05: warnings.warn('Mean is not zero! Check this!') # Set up a trace object for the cccsum as this is easier to plot and # maintains timing if plotvar: _match_filter_plot(stream=stream, cccsum=cccsum, template_names=_template_names, rawthresh=rawthresh, plotdir=plotdir, plot_format=plot_format, i=i) if debug >= 4: print(' '.join(['Saved the cccsum to:', _template_names[i], stream[0].stats.starttime.datetime. strftime('%Y%j')])) np.save(_template_names[i] + stream[0].stats.starttime.datetime.strftime('%Y%j'), cccsum) tic = time.clock() if max(cccsum) > rawthresh: peaks = findpeaks.find_peaks2_short( arr=cccsum, thresh=rawthresh, trig_int=trig_int * stream[0].stats.sampling_rate, debug=debug, starttime=stream[0].stats.starttime, samp_rate=stream[0].stats.sampling_rate) else: print('No peaks found above threshold') peaks = False toc = time.clock() if debug >= 1: print(' '.join(['Finding peaks took:', str(toc - tic), 's'])) if peaks: for peak in peaks: detecttime = stream[0].stats.starttime +\ peak[1] / stream[0].stats.sampling_rate # Detect time must be valid QuakeML uri within resource_id. # This will write a formatted string which is still # readable by UTCDateTime rid = ResourceIdentifier(id=_template_names[i] + '_' + str(detecttime. strftime('%Y%m%dT%H%M%S.%f')), prefix='smi:local') ev = Event(resource_id=rid) cr_i = CreationInfo(author='EQcorrscan', creation_time=UTCDateTime()) ev.creation_info = cr_i # All detection info in Comments for lack of a better idea thresh_str = 'threshold=' + str(rawthresh) ccc_str = 'detect_val=' + str(peak[0]) used_chans = 'channels used: ' +\ ' '.join([str(pair) for pair in chans[i]]) ev.comments.append(Comment(text=thresh_str)) ev.comments.append(Comment(text=ccc_str)) ev.comments.append(Comment(text=used_chans)) min_template_tm = min([tr.stats.starttime for tr in template]) for tr in template: if (tr.stats.station, tr.stats.channel) not in chans[i]: continue else: pick_tm = detecttime + (tr.stats.starttime - min_template_tm) wv_id = WaveformStreamID(network_code=tr.stats.network, station_code=tr.stats.station, channel_code=tr.stats.channel) ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id)) detections.append(DETECTION(_template_names[i], detecttime, no_chans[i], peak[0], rawthresh, 'corr', chans[i], event=ev)) if output_cat: det_cat.append(ev) if extract_detections: detection_streams = extract_from_stream(stream, detections) del stream, templates if output_cat and not extract_detections: return detections, det_cat elif not extract_detections: return detections elif extract_detections and not output_cat: return detections, detection_streams else: return detections, det_cat, detection_streams
def match_filter(template_names, template_list, st, threshold, threshold_type, trig_int, plotvar, plotdir='.', cores=1, tempdir=False, debug=0, plot_format='png', output_cat=False, extract_detections=False, arg_check=True): """ Main matched-filter detection function. Over-arching code to run the correlations of given templates with a \ day of seismic data and output the detections based on a given threshold. For a functional example see the tutorials. :type template_names: list :param template_names: List of template names in the same order as \ template_list :type template_list: list :param template_list: A list of templates of which each template is a \ Stream of obspy traces containing seismic data and header information. :type st: obspy.core.stream.Stream :param st: An obspy.Stream object containing all the data available and \ required for the correlations with templates given. For efficiency \ this should contain no excess traces which are not in one or more of \ the templates. This will now remove excess traces internally, but \ will copy the stream and work on the copy, leaving your input stream \ untouched. :type threshold: float :param threshold: A threshold value set based on the threshold_type :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD, \ absolute or av_chan_corr. MAD threshold is calculated as the \ threshold*(median(abs(cccsum))) where cccsum is the cross-correlation \ sum for a given template. absolute threhsold is a true absolute \ threshold based on the cccsum value av_chan_corr is based on the mean \ values of single-channel cross-correlations assuming all data are \ present as required for the template, \ e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where \ template is a single template from the input and the length is the \ number of channels within this template. :type trig_int: float :param trig_int: Minimum gap between detections in seconds. :type plotvar: bool :param plotvar: Turn plotting on or off :type plotdir: str :param plotdir: Path to plotting folder, plots will be output here, \ defaults to run location. :type tempdir: str :param tempdir: Directory to put temporary files, or False :type cores: int :param cores: Number of cores to use :type debug: int :param debug: Debug output level, the bigger the number, the more the \ output. :type plot_format: str :param plot_format: Specify format of output plots if saved :type output_cat: bool :param output_cat: Specifies if matched_filter will output an \ obspy.Catalog class containing events for each detection. Default \ is False, in which case matched_filter will output a list of \ detection classes, as normal. :type extract_detections: bool :param extract_detections: Specifies whether or not to return a list of \ streams, one stream per detection. :type arg_check: bool :param arg_check: Check arguments, defaults to True, but if running in \ bulk, and you are certain of your arguments, then set to False. :return: :class: 'DETECTIONS' detections for each channel formatted as \ :class: 'obspy.UTCDateTime' objects. :return: :class: obspy.Catalog containing events for each detection. :return: list of :class: obspy.Stream objects for each detection. .. note:: Plotting within the match-filter routine uses the Agg backend \ with interactive plotting turned off. This is because the function \ is designed to work in bulk. If you wish to turn interactive \ plotting on you must import matplotlib in your script first, when you \ them import match_filter you will get the warning that this call to \ matplotlib has no effect, which will mean that match_filter has not \ changed the plotting behaviour. .. note:: The output_cat flag will create an :class: obspy.Catalog \ containing one event for each :class: 'DETECTIONS' generated by \ match_filter. Each event will contain a number of comments dealing \ with correlation values and channels used for the detection. Each \ channel used for the detection will have a corresponding :class: Pick \ which will contain time and waveform information. HOWEVER, the user \ should note that, at present, the pick times do not account for the \ prepick times inherent in each template. For example, if a template \ trace starts 0.1 seconds before the actual arrival of that phase, \ then the pick time generated by match_filter for that phase will be \ 0.1 seconds early. We are looking towards a solution which will \ involve saving templates alongside associated metadata. """ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt plt.ioff() import copy from eqcorrscan.utils import plotting from eqcorrscan.utils import findpeaks from obspy import Trace, Catalog, UTCDateTime, Stream from obspy.core.event import Event, Pick, CreationInfo, ResourceIdentifier from obspy.core.event import Comment, WaveformStreamID import time if arg_check: # Check the arguments to be nice - if arguments wrong type the parallel # output for the error won't be useful if not type(template_names) == list: raise IOError('template_names must be of type: list') if not type(template_list) == list: raise IOError('templates must be of type: list') for template in template_list: if not type(template) == Stream: msg = 'template in template_list must be of type: ' +\ 'obspy.core.stream.Stream' raise IOError(msg) if not type(st) == Stream: msg = 'st must be of type: obspy.core.stream.Stream' raise IOError(msg) if threshold_type not in ['MAD', 'absolute', 'av_chan_corr']: msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr' raise IOError(msg) # Copy the stream here because we will muck about with it stream = st.copy() templates = copy.deepcopy(template_list) # Debug option to confirm that the channel names match those in the # templates if debug >= 2: template_stachan = [] data_stachan = [] for template in templates: for tr in template: template_stachan.append(tr.stats.station + '.' + tr.stats.channel) for tr in stream: data_stachan.append(tr.stats.station + '.' + tr.stats.channel) template_stachan = list(set(template_stachan)) data_stachan = list(set(data_stachan)) if debug >= 3: print('I have template info for these stations:') print(template_stachan) print('I have daylong data for these stations:') print(data_stachan) # Perform a check that the daylong vectors are daylong for tr in stream: if not tr.stats.sampling_rate * 86400 == tr.stats.npts: msg = ' '.join(['Data are not daylong for', tr.stats.station, tr.stats.channel]) raise ValueError(msg) # Perform check that all template lengths are internally consistent for i, temp in enumerate(template_list): if len(set([tr.stats.npts for tr in temp])) > 1: msg = 'Template %s contains traces of differing length!! THIS \ WILL CAUSE ISSUES' % template_names[i] raise ValueError(msg) # Call the _template_loop function to do all the correlation work outtic = time.clock() # Edit here from previous, stable, but slow match_filter # Would be worth testing without an if statement, but with every station in # the possible template stations having data, but for those without real # data make the data NaN to return NaN ccc_sum # Note: this works if debug >= 2: print('Ensuring all template channels have matches in daylong data') template_stachan = [] for template in templates: for tr in template: template_stachan += [(tr.stats.station, tr.stats.channel)] template_stachan = list(set(template_stachan)) # Copy this here to keep it safe for stachan in template_stachan: if not stream.select(station=stachan[0], channel=stachan[1]): # Remove template traces rather than adding NaN data for template in templates: if template.select(station=stachan[0], channel=stachan[1]): for tr in template.select(station=stachan[0], channel=stachan[1]): template.remove(tr) # Remove un-needed channels for tr in stream: if not (tr.stats.station, tr.stats.channel) in template_stachan: stream.remove(tr) # Also pad out templates to have all channels for template, template_name in zip(templates, template_names): if len(template) == 0: msg = ('No channels matching in continuous data for ' + 'template' + template_name) warnings.warn(msg) templates.remove(template) template_names.remove(template_name) continue for stachan in template_stachan: if not template.select(station=stachan[0], channel=stachan[1]): nulltrace = Trace() nulltrace.stats.station = stachan[0] nulltrace.stats.channel = stachan[1] nulltrace.stats.sampling_rate = template[0].stats.sampling_rate nulltrace.stats.starttime = template[0].stats.starttime nulltrace.data = np.array([np.NaN] * len(template[0].data), dtype=np.float32) template += nulltrace if debug >= 2: print('Starting the correlation run for this day') [cccsums, no_chans, chans] = _channel_loop(templates, stream, cores, debug) if len(cccsums[0]) == 0: raise ValueError('Correlation has not run, zero length cccsum') outtoc = time.clock() print(' '.join(['Looping over templates and streams took:', str(outtoc - outtic), 's'])) if debug >= 2: print(' '.join(['The shape of the returned cccsums is:', str(np.shape(cccsums))])) print(' '.join(['This is from', str(len(templates)), 'templates'])) print(' '.join(['Correlated with', str(len(stream)), 'channels of data'])) detections = [] if output_cat: det_cat = Catalog() for i, cccsum in enumerate(cccsums): template = templates[i] if threshold_type == 'MAD': rawthresh = threshold * np.median(np.abs(cccsum)) elif threshold_type == 'absolute': rawthresh = threshold elif threshold_type == 'av_chan_corr': rawthresh = threshold * no_chans[i] # Findpeaks returns a list of tuples in the form [(cccsum, sample)] print(' '.join(['Threshold is set at:', str(rawthresh)])) print(' '.join(['Max of data is:', str(max(cccsum))])) print(' '.join(['Mean of data is:', str(np.mean(cccsum))])) if np.abs(np.mean(cccsum)) > 0.05: warnings.warn('Mean is not zero! Check this!') # Set up a trace object for the cccsum as this is easier to plot and # maintains timing if plotvar: stream_plot = copy.deepcopy(stream[0]) # Downsample for plotting stream_plot.decimate(int(stream[0].stats.sampling_rate / 10)) cccsum_plot = Trace(cccsum) cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate # Resample here to maintain shape better cccsum_hist = cccsum_plot.copy() cccsum_hist = cccsum_hist.decimate(int(stream[0].stats. sampling_rate / 10)).data cccsum_plot = plotting.chunk_data(cccsum_plot, 10, 'Maxabs').data # Enforce same length stream_plot.data = stream_plot.data[0:len(cccsum_plot)] cccsum_plot = cccsum_plot[0:len(stream_plot.data)] cccsum_hist = cccsum_hist[0:len(stream_plot.data)] plotting.triple_plot(cccsum_plot, cccsum_hist, stream_plot, rawthresh, True, plotdir + '/cccsum_plot_' + template_names[i] + '_' + stream[0].stats.starttime. datetime.strftime('%Y-%m-%d') + '.' + plot_format) if debug >= 4: print(' '.join(['Saved the cccsum to:', template_names[i], stream[0].stats.starttime.datetime. strftime('%Y%j')])) np.save(template_names[i] + stream[0].stats.starttime.datetime.strftime('%Y%j'), cccsum) tic = time.clock() if debug >= 4: np.save('cccsum_' + str(i) + '.npy', cccsum) if debug >= 3 and max(cccsum) > rawthresh: peaks = findpeaks.find_peaks2_short(cccsum, rawthresh, trig_int * stream[0].stats. sampling_rate, debug, stream[0].stats.starttime, stream[0].stats.sampling_rate) elif max(cccsum) > rawthresh: peaks = findpeaks.find_peaks2_short(cccsum, rawthresh, trig_int * stream[0].stats. sampling_rate, debug) else: print('No peaks found above threshold') peaks = False toc = time.clock() if debug >= 1: print(' '.join(['Finding peaks took:', str(toc - tic), 's'])) if peaks: for peak in peaks: detecttime = stream[0].stats.starttime +\ peak[1] / stream[0].stats.sampling_rate # Detect time must be valid QuakeML uri within resource_id. # This will write a formatted string which is still readable by UTCDateTime rid = ResourceIdentifier(id=template_names[i] + '_' + str(detecttime.strftime('%Y%m%dT%H%M%S.%f')), prefix='smi:local') ev = Event(resource_id=rid) cr_i = CreationInfo(author='EQcorrscan', creation_time=UTCDateTime()) ev.creation_info = cr_i # All detection info in Comments for lack of a better idea thresh_str = 'threshold=' + str(rawthresh) ccc_str = 'detect_val=' + str(peak[0]) used_chans = 'channels used: ' +\ ' '.join([str(pair) for pair in chans[i]]) ev.comments.append(Comment(text=thresh_str)) ev.comments.append(Comment(text=ccc_str)) ev.comments.append(Comment(text=used_chans)) min_template_tm = min([tr.stats.starttime for tr in template]) for tr in template: if (tr.stats.station, tr.stats.channel) not in chans[i]: continue else: pick_tm = detecttime + (tr.stats.starttime - min_template_tm) wv_id = WaveformStreamID(network_code=tr.stats.network, station_code=tr.stats.station, channel_code=tr.stats.channel) ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id)) detections.append(DETECTION(template_names[i], detecttime, no_chans[i], peak[0], rawthresh, 'corr', chans[i], event=ev)) if output_cat: det_cat.append(ev) if extract_detections: detection_streams = extract_from_stream(stream, detections) del stream, templates if output_cat and not extract_detections: return detections, det_cat elif not extract_detections: return detections elif extract_detections and not output_cat: return detections, detection_streams else: return detections, det_cat, detection_streams
def match_filter(template_names, template_list, st, threshold, threshold_type, trig_int, plotvar, plotdir='.', cores=1, tempdir=False, debug=0, plot_format='jpg'): r"""Over-arching code to run the correlations of given templates with a\ day of seismic data and output the detections based on a given threshold. :type template_names: list :param template_names: List of template names in the same order as\ template_list :type template_list: list :class: 'obspy.Stream' :param template_list: A list of templates of which each template is a\ Stream of obspy traces containing seismic data and header information. :type st: :class: 'obspy.Stream' :param st: An obspy.Stream object containing all the data available and\ required for the correlations with templates given. For efficiency\ this should contain no excess traces which are not in one or more of\ the templates. This will now remove excess traces internally, but\ will copy the stream and work on the copy, leaving your input stream\ untouched. :type threshold: float :param threshold: A threshold value set based on the threshold_type :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD,\ absolute or av_chan_corr. MAD threshold is calculated as the\ threshold*(median(abs(cccsum))) where cccsum is the cross-correlation\ sum for a given template. absolute threhsold is a true absolute\ threshold based on the cccsum value av_chan_corr is based on the mean\ values of single-channel cross-correlations assuming all data are\ present as required for the template, \ e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where\ template is a single template from the input and the length is the\ number of channels within this template. :type trig_int: float :param trig_int: Minimum gap between detections in seconds. :type plotvar: bool :param plotvar: Turn plotting on or off :type plotdir: str :param plotdir: Path to plotting folder, plots will be output here,\ defaults to run location. :type tempdir: String or False :param tempdir: Directory to put temporary files, or False :type cores: int :param cores: Number of cores to use :type debug: int :param debug: Debug output level, the bigger the number, the more the\ output. :return: :class: 'DETECTIONS' detections for each channel formatted as\ :class: 'obspy.UTCDateTime' objects. .. rubric:: Note Plotting within the match-filter routine uses the Agg backend with\ interactive plotting turned off. This is because the function is\ designed to work in bulk. If you wish to turn interactive plotting on\ you must import matplotlib in your script first, when you them import\ match_filter you will get the warning that this call to matplotlib has\ no effect, which will mean that match_filter has not changed the\ plotting behaviour. """ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt plt.ioff() import copy from eqcorrscan.utils import EQcorrscan_plotting from eqcorrscan.utils import findpeaks from obspy import Trace import time # Copy the stream here because we will f**k about with it stream = st.copy() templates = copy.deepcopy(template_list) # Debug option to confirm that the channel names match those in the # templates if debug >= 2: template_stachan = [] data_stachan = [] for template in templates: for tr in template: template_stachan.append(tr.stats.station + '.' + tr.stats.channel) for tr in stream: data_stachan.append(tr.stats.station + '.' + tr.stats.channel) template_stachan = list(set(template_stachan)) data_stachan = list(set(data_stachan)) if debug >= 3: print 'I have template info for these stations:' print template_stachan print 'I have daylong data for these stations:' print data_stachan # Perform a check that the daylong vectors are daylong for tr in stream: if not tr.stats.sampling_rate * 86400 == tr.stats.npts: msg = ' '.join(['Data are not daylong for', tr.stats.station, tr.stats.channel]) raise ValueError(msg) # Call the _template_loop function to do all the correlation work outtic = time.clock() # Edit here from previous, stable, but slow match_filter # Would be worth testing without an if statement, but with every station in # the possible template stations having data, but for those without real # data make the data NaN to return NaN ccc_sum # Note: this works if debug >= 2: print 'Ensuring all template channels have matches in daylong data' template_stachan = [] for template in templates: for tr in template: template_stachan += [(tr.stats.station, tr.stats.channel)] template_stachan = list(set(template_stachan)) # Copy this here to keep it safe for stachan in template_stachan: if not stream.select(station=stachan[0], channel=stachan[1]): # Remove template traces rather than adding NaN data for template in templates: if template.select(station=stachan[0], channel=stachan[1]): for tr in template.select(station=stachan[0], channel=stachan[1]): template.remove(tr) # Remove un-needed channels for tr in stream: if not (tr.stats.station, tr.stats.channel) in template_stachan: stream.remove(tr) # Also pad out templates to have all channels for template in templates: for stachan in template_stachan: if not template.select(station=stachan[0], channel=stachan[1]): nulltrace = Trace() nulltrace.stats.station = stachan[0] nulltrace.stats.channel = stachan[1] nulltrace.stats.sampling_rate = template[0].stats.sampling_rate nulltrace.stats.starttime = template[0].stats.starttime nulltrace.data = np.array([np.NaN] * len(template[0].data), dtype=np.float32) template += nulltrace if debug >= 2: print 'Starting the correlation run for this day' [cccsums, no_chans] = _channel_loop(templates, stream, cores, debug) if len(cccsums[0]) == 0: raise ValueError('Correlation has not run, zero length cccsum') outtoc = time.clock() print ' '.join(['Looping over templates and streams took:', str(outtoc - outtic), 's']) if debug >= 2: print ' '.join(['The shape of the returned cccsums is:', str(np.shape(cccsums))]) print ' '.join(['This is from', str(len(templates)), 'templates']) print ' '.join(['Correlated with', str(len(stream)), 'channels of data']) detections = [] for i, cccsum in enumerate(cccsums): template = templates[i] if threshold_type == 'MAD': rawthresh = threshold * np.median(np.abs(cccsum)) elif threshold_type == 'absolute': rawthresh = threshold elif threshold == 'av_chan_corr': rawthresh = threshold * (cccsum / len(template)) else: print 'You have not selected the correct threshold type, I will' +\ 'use MAD as I like it' rawthresh = threshold * np.mean(np.abs(cccsum)) # Findpeaks returns a list of tuples in the form [(cccsum, sample)] print ' '.join(['Threshold is set at:', str(rawthresh)]) print ' '.join(['Max of data is:', str(max(cccsum))]) print ' '.join(['Mean of data is:', str(np.mean(cccsum))]) if np.abs(np.mean(cccsum)) > 0.05: warnings.warn('Mean is not zero! Check this!') # Set up a trace object for the cccsum as this is easier to plot and # maintins timing if plotvar: stream_plot = copy.deepcopy(stream[0]) # Downsample for plotting stream_plot.decimate(int(stream[0].stats.sampling_rate / 10)) cccsum_plot = Trace(cccsum) cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate # Resample here to maintain shape better cccsum_hist = cccsum_plot.copy() cccsum_hist = cccsum_hist.decimate(int(stream[0].stats.sampling_rate / 10)).data cccsum_plot = EQcorrscan_plotting.chunk_data(cccsum_plot, 10, 'Maxabs').data # Enforce same length stream_plot.data = stream_plot.data[0:len(cccsum_plot)] cccsum_plot = cccsum_plot[0:len(stream_plot.data)] cccsum_hist = cccsum_hist[0:len(stream_plot.data)] EQcorrscan_plotting.triple_plot(cccsum_plot, cccsum_hist, stream_plot, rawthresh, True, plotdir + '/cccsum_plot_' + template_names[i] + '_' + stream[0].stats.starttime.datetime.strftime('%Y-%m-%d') + '.' + plot_format) if debug >= 4: print ' '.join(['Saved the cccsum to:', template_names[i], stream[0].stats.starttime.datetime.strftime('%Y%j')]) np.save(template_names[i] + stream[0].stats.starttime.datetime.strftime('%Y%j'), cccsum) tic = time.clock() if debug >= 4: np.save('cccsum_' + str(i) + '.npy', cccsum) if debug >= 3 and max(cccsum) > rawthresh: peaks = findpeaks.find_peaks2_short(cccsum, rawthresh, trig_int * stream[0].stats.sampling_rate, debug, stream[0].stats.starttime, stream[0].stats.sampling_rate) elif max(cccsum) > rawthresh: peaks = findpeaks.find_peaks2_short(cccsum, rawthresh, trig_int * stream[0].stats.sampling_rate, debug) else: print 'No peaks found above threshold' peaks = False toc = time.clock() if debug >= 1: print ' '.join(['Finding peaks took:', str(toc - tic), 's']) if peaks: for peak in peaks: detecttime = stream[0].stats.starttime +\ peak[1] / stream[0].stats.sampling_rate detections.append(DETECTION(template_names[i], detecttime, no_chans[i], peak[0], rawthresh, 'corr')) del stream, templates return detections
def _find_detections(cum_net_resp, nodes, threshold, thresh_type, samp_rate, realstations, length): """ Find detections within the cumulative network response. :type cum_net_resp: numpy.ndarray :param cum_net_resp: Array of cumulative network response for nodes :type nodes: list :param nodes: Nodes associated with the source of energy in the \ cum_net_resp :type threshold: float :param threshold: Threshold value :type thresh_type: str :param thresh_type: Either MAD (Median Absolute Deviation) or abs \ (absolute) or RMS (Root Mean Squared) :type samp_rate: float :param samp_rate: Sampling rate in Hz :type realstations: list :param realstations: List of stations used to make the cumulative network response, will be reported in the :class:`eqcorrscan.core.match_filter.Detection` :type length: float :param length: Maximum length of peak to look for in seconds :returns: Detections as :class:`eqcorrscan.core.match_filter.Detection` objects. :rtype: list """ cum_net_resp = np.nan_to_num(cum_net_resp) # Force no NaNs if np.isnan(cum_net_resp).any(): raise ValueError("Nans present") print('Mean of data is: ' + str(np.median(cum_net_resp))) print('RMS of data is: ' + str(np.sqrt(np.mean(np.square(cum_net_resp))))) print('MAD of data is: ' + str(np.median(np.abs(cum_net_resp)))) if thresh_type == 'MAD': thresh = (np.median(np.abs(cum_net_resp)) * threshold) elif thresh_type == 'abs': thresh = threshold elif thresh_type == 'RMS': thresh = _rms(cum_net_resp) * threshold print('Threshold is set to: ' + str(thresh)) print('Max of data is: ' + str(max(cum_net_resp))) peaks = findpeaks.find_peaks2_short(cum_net_resp, thresh, length * samp_rate, debug=0) detections = [] if peaks: for peak in peaks: node = nodes[peak[1]] detections.append( Detection(template_name=str(node[0]) + '_' + str(node[1]) + '_' + str(node[2]), detect_time=peak[1] / samp_rate, no_chans=len(realstations), detect_val=peak[0], threshold=thresh, typeofdet='brightness', chans=realstations, id=str(node[0]) + '_' + str(node[1]) + '_' + str(node[2]) + str(peak[1] / samp_rate), threshold_type=thresh_type, threshold_input=threshold)) else: detections = [] print('I have found ' + str(len(peaks)) + ' possible detections') return detections
def match_filter(template_names, template_list, st, threshold, threshold_type, trig_int, plotvar, plotdir='.', cores=1, tempdir=False, debug=0, plot_format='jpg'): r"""Over-arching code to run the correlations of given templates with a\ day of seismic data and output the detections based on a given threshold. :type template_names: list :param template_names: List of template names in the same order as\ template_list :type template_list: list :class: 'obspy.Stream' :param template_list: A list of templates of which each template is a\ Stream of obspy traces containing seismic data and header information. :type st: :class: 'obspy.Stream' :param st: An obspy.Stream object containing all the data available and\ required for the correlations with templates given. For efficiency\ this should contain no excess traces which are not in one or more of\ the templates. This will now remove excess traces internally, but\ will copy the stream and work on the copy, leaving your input stream\ untouched. :type threshold: float :param threshold: A threshold value set based on the threshold_type :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD,\ absolute or av_chan_corr. MAD threshold is calculated as the\ threshold*(median(abs(cccsum))) where cccsum is the cross-correlation\ sum for a given template. absolute threhsold is a true absolute\ threshold based on the cccsum value av_chan_corr is based on the mean\ values of single-channel cross-correlations assuming all data are\ present as required for the template, \ e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where\ template is a single template from the input and the length is the\ number of channels within this template. :type trig_int: float :param trig_int: Minimum gap between detections in seconds. :type plotvar: bool :param plotvar: Turn plotting on or off :type plotdir: str :param plotdir: Path to plotting folder, plots will be output here,\ defaults to run location. :type tempdir: String or False :param tempdir: Directory to put temporary files, or False :type cores: int :param cores: Number of cores to use :type debug: int :param debug: Debug output level, the bigger the number, the more the\ output. :return: :class: 'DETECTIONS' detections for each channel formatted as\ :class: 'obspy.UTCDateTime' objects. .. rubric:: Note Plotting within the match-filter routine uses the Agg backend with\ interactive plotting turned off. This is because the function is\ designed to work in bulk. If you wish to turn interactive plotting on\ you must import matplotlib in your script first, when you them import\ match_filter you will get the warning that this call to matplotlib has\ no effect, which will mean that match_filter has not changed the\ plotting behaviour. """ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt plt.ioff() import copy from eqcorrscan.utils import EQcorrscan_plotting from eqcorrscan.utils import findpeaks from obspy import Trace import time # Copy the stream here because we will f**k about with it stream = st.copy() templates = copy.deepcopy(template_list) # Debug option to confirm that the channel names match those in the # templates if debug >= 2: template_stachan = [] data_stachan = [] for template in templates: for tr in template: template_stachan.append(tr.stats.station + '.' + tr.stats.channel) for tr in stream: data_stachan.append(tr.stats.station + '.' + tr.stats.channel) template_stachan = list(set(template_stachan)) data_stachan = list(set(data_stachan)) if debug >= 3: print 'I have template info for these stations:' print template_stachan print 'I have daylong data for these stations:' print data_stachan # Perform a check that the daylong vectors are daylong for tr in stream: if not tr.stats.sampling_rate * 86400 == tr.stats.npts: msg = ' '.join([ 'Data are not daylong for', tr.stats.station, tr.stats.channel ]) raise ValueError(msg) # Call the _template_loop function to do all the correlation work outtic = time.clock() # Edit here from previous, stable, but slow match_filter # Would be worth testing without an if statement, but with every station in # the possible template stations having data, but for those without real # data make the data NaN to return NaN ccc_sum # Note: this works if debug >= 2: print 'Ensuring all template channels have matches in daylong data' template_stachan = [] for template in templates: for tr in template: template_stachan += [(tr.stats.station, tr.stats.channel)] template_stachan = list(set(template_stachan)) # Copy this here to keep it safe for stachan in template_stachan: if not stream.select(station=stachan[0], channel=stachan[1]): # Remove template traces rather than adding NaN data for template in templates: if template.select(station=stachan[0], channel=stachan[1]): for tr in template.select(station=stachan[0], channel=stachan[1]): template.remove(tr) # Remove un-needed channels for tr in stream: if not (tr.stats.station, tr.stats.channel) in template_stachan: stream.remove(tr) # Also pad out templates to have all channels for template in templates: for stachan in template_stachan: if not template.select(station=stachan[0], channel=stachan[1]): nulltrace = Trace() nulltrace.stats.station = stachan[0] nulltrace.stats.channel = stachan[1] nulltrace.stats.sampling_rate = template[0].stats.sampling_rate nulltrace.stats.starttime = template[0].stats.starttime nulltrace.data = np.array([np.NaN] * len(template[0].data), dtype=np.float32) template += nulltrace if debug >= 2: print 'Starting the correlation run for this day' [cccsums, no_chans] = _channel_loop(templates, stream, cores, debug) if len(cccsums[0]) == 0: raise ValueError('Correlation has not run, zero length cccsum') outtoc = time.clock() print ' '.join([ 'Looping over templates and streams took:', str(outtoc - outtic), 's' ]) if debug >= 2: print ' '.join( ['The shape of the returned cccsums is:', str(np.shape(cccsums))]) print ' '.join(['This is from', str(len(templates)), 'templates']) print ' '.join( ['Correlated with', str(len(stream)), 'channels of data']) detections = [] for i, cccsum in enumerate(cccsums): template = templates[i] if threshold_type == 'MAD': rawthresh = threshold * np.median(np.abs(cccsum)) elif threshold_type == 'absolute': rawthresh = threshold elif threshold == 'av_chan_corr': rawthresh = threshold * (cccsum / len(template)) else: print 'You have not selected the correct threshold type, I will' +\ 'use MAD as I like it' rawthresh = threshold * np.mean(np.abs(cccsum)) # Findpeaks returns a list of tuples in the form [(cccsum, sample)] print ' '.join(['Threshold is set at:', str(rawthresh)]) print ' '.join(['Max of data is:', str(max(cccsum))]) print ' '.join(['Mean of data is:', str(np.mean(cccsum))]) if np.abs(np.mean(cccsum)) > 0.05: warnings.warn('Mean is not zero! Check this!') # Set up a trace object for the cccsum as this is easier to plot and # maintins timing if plotvar: stream_plot = copy.deepcopy(stream[0]) # Downsample for plotting stream_plot.decimate(int(stream[0].stats.sampling_rate / 10)) cccsum_plot = Trace(cccsum) cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate # Resample here to maintain shape better cccsum_hist = cccsum_plot.copy() cccsum_hist = cccsum_hist.decimate( int(stream[0].stats.sampling_rate / 10)).data cccsum_plot = EQcorrscan_plotting.chunk_data( cccsum_plot, 10, 'Maxabs').data # Enforce same length stream_plot.data = stream_plot.data[0:len(cccsum_plot)] cccsum_plot = cccsum_plot[0:len(stream_plot.data)] cccsum_hist = cccsum_hist[0:len(stream_plot.data)] EQcorrscan_plotting.triple_plot( cccsum_plot, cccsum_hist, stream_plot, rawthresh, True, plotdir + '/cccsum_plot_' + template_names[i] + '_' + stream[0].stats.starttime.datetime.strftime('%Y-%m-%d') + '.' + plot_format) if debug >= 4: print ' '.join([ 'Saved the cccsum to:', template_names[i], stream[0].stats.starttime.datetime.strftime('%Y%j') ]) np.save( template_names[i] + stream[0].stats.starttime.datetime.strftime('%Y%j'), cccsum) tic = time.clock() if debug >= 4: np.save('cccsum_' + str(i) + '.npy', cccsum) if debug >= 3 and max(cccsum) > rawthresh: peaks = findpeaks.find_peaks2_short( cccsum, rawthresh, trig_int * stream[0].stats.sampling_rate, debug, stream[0].stats.starttime, stream[0].stats.sampling_rate) elif max(cccsum) > rawthresh: peaks = findpeaks.find_peaks2_short( cccsum, rawthresh, trig_int * stream[0].stats.sampling_rate, debug) else: print 'No peaks found above threshold' peaks = False toc = time.clock() if debug >= 1: print ' '.join(['Finding peaks took:', str(toc - tic), 's']) if peaks: for peak in peaks: detecttime = stream[0].stats.starttime +\ peak[1] / stream[0].stats.sampling_rate detections.append( DETECTION(template_names[i], detecttime, no_chans[i], peak[0], rawthresh, 'corr')) del stream, templates return detections
def match_filter(template_names, template_list, st, threshold, threshold_type, trig_int, plotvar, plotdir='.', cores=1, tempdir=False, debug=0, plot_format='png', output_cat=False, extract_detections=False, arg_check=True): """ Main matched-filter detection function. Over-arching code to run the correlations of given templates with a \ day of seismic data and output the detections based on a given threshold. For a functional example see the tutorials. :type template_names: list :param template_names: List of template names in the same order as \ template_list :type template_list: list :param template_list: A list of templates of which each template is a \ Stream of obspy traces containing seismic data and header information. :type st: obspy.core.stream.Stream :param st: An obspy.Stream object containing all the data available and \ required for the correlations with templates given. For efficiency \ this should contain no excess traces which are not in one or more of \ the templates. This will now remove excess traces internally, but \ will copy the stream and work on the copy, leaving your input stream \ untouched. :type threshold: float :param threshold: A threshold value set based on the threshold_type :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD, \ absolute or av_chan_corr. MAD threshold is calculated as the \ threshold*(median(abs(cccsum))) where cccsum is the cross-correlation \ sum for a given template. absolute threhsold is a true absolute \ threshold based on the cccsum value av_chan_corr is based on the mean \ values of single-channel cross-correlations assuming all data are \ present as required for the template, \ e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where \ template is a single template from the input and the length is the \ number of channels within this template. :type trig_int: float :param trig_int: Minimum gap between detections in seconds. :type plotvar: bool :param plotvar: Turn plotting on or off :type plotdir: str :param plotdir: Path to plotting folder, plots will be output here, \ defaults to run location. :type tempdir: str :param tempdir: Directory to put temporary files, or False :type cores: int :param cores: Number of cores to use :type debug: int :param debug: Debug output level, the bigger the number, the more the \ output. :type plot_format: str :param plot_format: Specify format of output plots if saved :type output_cat: bool :param output_cat: Specifies if matched_filter will output an \ obspy.Catalog class containing events for each detection. Default \ is False, in which case matched_filter will output a list of \ detection classes, as normal. :type extract_detections: bool :param extract_detections: Specifies whether or not to return a list of \ streams, one stream per detection. :type arg_check: bool :param arg_check: Check arguments, defaults to True, but if running in \ bulk, and you are certain of your arguments, then set to False. :return: :class: 'DETECTIONS' detections for each channel formatted as \ :class: 'obspy.UTCDateTime' objects. :return: :class: obspy.Catalog containing events for each detection. :return: list of :class: obspy.Stream objects for each detection. .. note:: Plotting within the match-filter routine uses the Agg backend \ with interactive plotting turned off. This is because the function \ is designed to work in bulk. If you wish to turn interactive \ plotting on you must import matplotlib in your script first, when you \ them import match_filter you will get the warning that this call to \ matplotlib has no effect, which will mean that match_filter has not \ changed the plotting behaviour. .. note:: The output_cat flag will create an :class: obspy.Catalog \ containing one event for each :class: 'DETECTIONS' generated by \ match_filter. Each event will contain a number of comments dealing \ with correlation values and channels used for the detection. Each \ channel used for the detection will have a corresponding :class: Pick \ which will contain time and waveform information. HOWEVER, the user \ should note that, at present, the pick times do not account for the \ prepick times inherent in each template. For example, if a template \ trace starts 0.1 seconds before the actual arrival of that phase, \ then the pick time generated by match_filter for that phase will be \ 0.1 seconds early. We are looking towards a solution which will \ involve saving templates alongside associated metadata. """ import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt plt.ioff() import copy from eqcorrscan.utils import plotting from eqcorrscan.utils import findpeaks from obspy import Trace, Catalog, UTCDateTime, Stream from obspy.core.event import Event, Pick, CreationInfo, ResourceIdentifier from obspy.core.event import Comment, WaveformStreamID import time if arg_check: # Check the arguments to be nice - if arguments wrong type the parallel # output for the error won't be useful if not type(template_names) == list: raise IOError('template_names must be of type: list') if not type(template_list) == list: raise IOError('templates must be of type: list') for template in template_list: if not type(template) == Stream: msg = 'template in template_list must be of type: ' +\ 'obspy.core.stream.Stream' raise IOError(msg) if not type(st) == Stream: msg = 'st must be of type: obspy.core.stream.Stream' raise IOError(msg) if str(threshold_type) not in [str('MAD'), str('absolute'), str('av_chan_corr')]: msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr' raise IOError(msg) # Copy the stream here because we will muck about with it stream = st.copy() templates = copy.deepcopy(template_list) # Debug option to confirm that the channel names match those in the # templates if debug >= 2: template_stachan = [] data_stachan = [] for template in templates: for tr in template: template_stachan.append(tr.stats.station + '.' + tr.stats.channel) for tr in stream: data_stachan.append(tr.stats.station + '.' + tr.stats.channel) template_stachan = list(set(template_stachan)) data_stachan = list(set(data_stachan)) if debug >= 3: print('I have template info for these stations:') print(template_stachan) print('I have daylong data for these stations:') print(data_stachan) # Perform a check that the daylong vectors are all the same length min_start_time = min([tr.stats.starttime for tr in stream]) max_end_time = max([tr.stats.endtime for tr in stream]) longest_trace_length = stream[0].stats.sampling_rate * (max_end_time - min_start_time) for tr in stream: if not tr.stats.npts == longest_trace_length: msg = 'Data are not equal length, padding short traces' warnings.warn(msg) start_pad = np.zeros(int(tr.stats.sampling_rate * (tr.stats.starttime - min_start_time))) end_pad = np.zeros(int(tr.stats.sampling_rate * (max_end_time - tr.stats.endtime))) tr.data = np.concatenate([start_pad, tr.data, end_pad]) # Perform check that all template lengths are internally consistent for i, temp in enumerate(template_list): if len(set([tr.stats.npts for tr in temp])) > 1: msg = 'Template %s contains traces of differing length!! THIS \ WILL CAUSE ISSUES' % template_names[i] raise ValueError(msg) # Call the _template_loop function to do all the correlation work outtic = time.clock() # Edit here from previous, stable, but slow match_filter # Would be worth testing without an if statement, but with every station in # the possible template stations having data, but for those without real # data make the data NaN to return NaN ccc_sum # Note: this works if debug >= 2: print('Ensuring all template channels have matches in long data') template_stachan = [] for template in templates: for tr in template: template_stachan += [(tr.stats.station, tr.stats.channel)] template_stachan = list(set(template_stachan)) # Copy this here to keep it safe for stachan in template_stachan: if not stream.select(station=stachan[0], channel=stachan[1]): # Remove template traces rather than adding NaN data for template in templates: if template.select(station=stachan[0], channel=stachan[1]): for tr in template.select(station=stachan[0], channel=stachan[1]): template.remove(tr) # Remove un-needed channels for tr in stream: if not (tr.stats.station, tr.stats.channel) in template_stachan: stream.remove(tr) # Also pad out templates to have all channels for template, template_name in zip(templates, template_names): if len(template) == 0: msg = ('No channels matching in continuous data for ' + 'template' + template_name) warnings.warn(msg) templates.remove(template) template_names.remove(template_name) continue for stachan in template_stachan: if not template.select(station=stachan[0], channel=stachan[1]): nulltrace = Trace() nulltrace.stats.station = stachan[0] nulltrace.stats.channel = stachan[1] nulltrace.stats.sampling_rate = template[0].stats.sampling_rate nulltrace.stats.starttime = template[0].stats.starttime nulltrace.data = np.array([np.NaN] * len(template[0].data), dtype=np.float32) template += nulltrace if debug >= 2: print('Starting the correlation run for this day') [cccsums, no_chans, chans] = _channel_loop(templates=templates, stream=stream, cores=cores, debug=debug) if len(cccsums[0]) == 0: raise ValueError('Correlation has not run, zero length cccsum') outtoc = time.clock() print(' '.join(['Looping over templates and streams took:', str(outtoc - outtic), 's'])) if debug >= 2: print(' '.join(['The shape of the returned cccsums is:', str(np.shape(cccsums))])) print(' '.join(['This is from', str(len(templates)), 'templates'])) print(' '.join(['Correlated with', str(len(stream)), 'channels of data'])) detections = [] if output_cat: det_cat = Catalog() for i, cccsum in enumerate(cccsums): template = templates[i] if str(threshold_type) == str('MAD'): rawthresh = threshold * np.median(np.abs(cccsum)) elif str(threshold_type) == str('absolute'): rawthresh = threshold elif str(threshold_type) == str('av_chan_corr'): rawthresh = threshold * no_chans[i] # Findpeaks returns a list of tuples in the form [(cccsum, sample)] print(' '.join(['Threshold is set at:', str(rawthresh)])) print(' '.join(['Max of data is:', str(max(cccsum))])) print(' '.join(['Mean of data is:', str(np.mean(cccsum))])) if np.abs(np.mean(cccsum)) > 0.05: warnings.warn('Mean is not zero! Check this!') # Set up a trace object for the cccsum as this is easier to plot and # maintains timing if plotvar: stream_plot = copy.deepcopy(stream[0]) # Downsample for plotting stream_plot.decimate(int(stream[0].stats.sampling_rate / 10)) cccsum_plot = Trace(cccsum) cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate # Resample here to maintain shape better cccsum_hist = cccsum_plot.copy() cccsum_hist = cccsum_hist.decimate(int(stream[0].stats. sampling_rate / 10)).data cccsum_plot = plotting.chunk_data(cccsum_plot, 10, 'Maxabs').data # Enforce same length stream_plot.data = stream_plot.data[0:len(cccsum_plot)] cccsum_plot = cccsum_plot[0:len(stream_plot.data)] cccsum_hist = cccsum_hist[0:len(stream_plot.data)] plotting.triple_plot(cccsum_plot, cccsum_hist, stream_plot, rawthresh, True, plotdir + '/cccsum_plot_' + template_names[i] + '_' + stream[0].stats.starttime. datetime.strftime('%Y-%m-%d') + '.' + plot_format) if debug >= 4: print(' '.join(['Saved the cccsum to:', template_names[i], stream[0].stats.starttime.datetime. strftime('%Y%j')])) np.save(template_names[i] + stream[0].stats.starttime.datetime.strftime('%Y%j'), cccsum) tic = time.clock() if debug >= 4: np.save('cccsum_' + str(i) + '.npy', cccsum) if debug >= 3 and max(cccsum) > rawthresh: peaks = findpeaks.find_peaks2_short(cccsum, rawthresh, trig_int * stream[0].stats. sampling_rate, debug, stream[0].stats.starttime, stream[0].stats.sampling_rate) elif max(cccsum) > rawthresh: peaks = findpeaks.find_peaks2_short(cccsum, rawthresh, trig_int * stream[0].stats. sampling_rate, debug) else: print('No peaks found above threshold') peaks = False toc = time.clock() if debug >= 1: print(' '.join(['Finding peaks took:', str(toc - tic), 's'])) if peaks: for peak in peaks: detecttime = stream[0].stats.starttime +\ peak[1] / stream[0].stats.sampling_rate # Detect time must be valid QuakeML uri within resource_id. # This will write a formatted string which is still # readable by UTCDateTime rid = ResourceIdentifier(id=template_names[i] + '_' + str(detecttime. strftime('%Y%m%dT%H%M%S.%f')), prefix='smi:local') ev = Event(resource_id=rid) cr_i = CreationInfo(author='EQcorrscan', creation_time=UTCDateTime()) ev.creation_info = cr_i # All detection info in Comments for lack of a better idea thresh_str = 'threshold=' + str(rawthresh) ccc_str = 'detect_val=' + str(peak[0]) used_chans = 'channels used: ' +\ ' '.join([str(pair) for pair in chans[i]]) ev.comments.append(Comment(text=thresh_str)) ev.comments.append(Comment(text=ccc_str)) ev.comments.append(Comment(text=used_chans)) min_template_tm = min([tr.stats.starttime for tr in template]) for tr in template: if (tr.stats.station, tr.stats.channel) not in chans[i]: continue else: pick_tm = detecttime + (tr.stats.starttime - min_template_tm) wv_id = WaveformStreamID(network_code=tr.stats.network, station_code=tr.stats.station, channel_code=tr.stats.channel) ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id)) detections.append(DETECTION(template_names[i], detecttime, no_chans[i], peak[0], rawthresh, 'corr', chans[i], event=ev)) if output_cat: det_cat.append(ev) if extract_detections: detection_streams = extract_from_stream(stream, detections) del stream, templates if output_cat and not extract_detections: return detections, det_cat elif not extract_detections: return detections elif extract_detections and not output_cat: return detections, detection_streams else: return detections, det_cat, detection_streams
def match_filter( template_names, templates, stream, threshold, threshold_type, trig_int, plotvar, cores=1, tempdir=False, debug=0 ): """ Over-arching code to run the correlations of given templates with a day of seismic data and output the detections based on a given threshold. :type templates: list :class: 'obspy.Stream' :param templates: A list of templates of which each template is a Stream of\ obspy traces containing seismic data and header information. :type stream: :class: 'obspy.Stream' :param stream: An obspy.Stream object containing all the data available and\ required for the correlations with templates given. For efficiency this\ should contain no excess traces which are not in one or more of the\ templates. :type threshold: float :param threshold: A threshold value set based on the threshold_type :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD,\ absolute or av_chan_corr. MAD threshold is calculated as the\ threshold*(median(abs(cccsum))) where cccsum is the cross-correlation\ sum for a given template. absolute threhsold is a true absolute\ threshold based on the cccsum value av_chan_corr is based on the mean\ values of single-channel cross-correlations assuming all data are\ present as required for the template, \ e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where\ template is a single template from the input and the length is the\ number of channels within this template. :type trig_int: float :param trig_int: Minimum gap between detections in seconds. :type tempdir: String or False :param tempdir: Directory to put temporary files, or False :type cores: int :param cores: Number of cores to use :type debug: int :param debug: Debug output level, the bigger the number, the more the output :return: :class: 'DETECTIONS' detections for each channel formatted as\ :class: 'obspy.UTCDateTime' objects. """ from eqcorrscan.utils import findpeaks, EQcorrscan_plotting import time, copy from obspy import Trace match_internal = False # Set to True if memory is an issue, if True, will only # use about the same amount of memory as the seismic dat # take up. If False, it will use 20-100GB per instance # Debug option to confirm that the channel names match those in the templates if debug >= 2: template_stachan = [] data_stachan = [] for template in templates: for tr in template: template_stachan.append(tr.stats.station + "." + tr.stats.channel) for tr in stream: data_stachan.append(tr.stats.station + "." + tr.stats.channel) template_stachan = list(set(template_stachan)) data_stachan = list(set(data_stachan)) if debug >= 3: print "I have template info for these stations:" print template_stachan print "I have daylong data for these stations:" print data_stachan # Perform a check that the daylong vectors are daylong for tr in stream: if not tr.stats.sampling_rate * 86400 == tr.stats.npts: raise ValueError("Data are not daylong for " + tr.stats.station + "." + tr.stats.channel) # Call the _template_loop function to do all the correlation work outtic = time.clock() # Edit here from previous, stable, but slow match_filter # Would be worth testing without an if statement, but with every station in # the possible template stations having data, but for those without real # data make the data NaN to return NaN ccc_sum if debug >= 2: print "Ensuring all template channels have matches in daylong data" template_stachan = [] for template in templates: for tr in template: template_stachan += [(tr.stats.station, tr.stats.channel)] template_stachan = list(set(template_stachan)) # Copy this here to keep it safe for stachan in template_stachan: if not stream.select(station=stachan[0], channel=stachan[1]): # Add a trace of NaN's nulltrace = Trace() nulltrace.stats.station = stachan[0] nulltrace.stats.channel = stachan[1] nulltrace.stats.sampling_rate = stream[0].stats.sampling_rate nulltrace.stats.starttime = stream[0].stats.starttime nulltrace.data = np.array([np.NaN] * len(stream[0].data), dtype=np.float32) stream += nulltrace # Also pad out templates to have all channels for template in templates: for stachan in template_stachan: if not template.select(station=stachan[0], channel=stachan[1]): nulltrace = Trace() nulltrace.stats.station = stachan[0] nulltrace.stats.channel = stachan[1] nulltrace.stats.sampling_rate = template[0].stats.sampling_rate nulltrace.stats.starttime = template[0].stats.starttime nulltrace.data = np.array([np.NaN] * len(template[0].data), dtype=np.float32) template += nulltrace if debug >= 2: print "Starting the correlation run for this day" if match_internal: [cccsums, no_chans] = run_channel_loop(templates, stream, tempdir) else: [cccsums, no_chans] = _channel_loop(templates, stream, cores, debug) if len(cccsums[0]) == 0: raise ValueError("Correlation has not run, zero length cccsum") outtoc = time.clock() print "Looping over templates and streams took: " + str(outtoc - outtic) + " s" if debug >= 2: print "The shape of the returned cccsums is: " + str(np.shape(cccsums)) print "This is from " + str(len(templates)) + " templates" print "Correlated with " + str(len(stream)) + " channels of data" i = 0 detections = [] for cccsum in cccsums: template = templates[i] if threshold_type == "MAD": rawthresh = threshold * np.median(np.abs(cccsum)) elif threshold_type == "absolute": rawthresh = threshold elif threshold == "av_chan_corr": rawthresh = threshold * (cccsum / len(template)) else: print "You have not selected the correct threshold type, I will use MAD as I like it" rawthresh = threshold * np.mean(np.abs(cccsum)) # Findpeaks returns a list of tuples in the form [(cccsum, sample)] print "Threshold is set at: " + str(rawthresh) print "Max of data is: " + str(max(cccsum)) print "Mean of data is: " + str(np.mean(cccsum)) if np.abs(np.mean(cccsum)) > 0.05: warnings.warn("Mean is not zero! Check this!") # Set up a trace object for the cccsum as this is easier to plot and # maintins timing if plotvar: stream_plot = copy.deepcopy(stream[0]) # Downsample for plotting stream_plot.decimate(int(stream[0].stats.sampling_rate / 20)) cccsum_plot = Trace(cccsum) cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate # Resample here to maintain shape better cccsum_hist = cccsum_plot.copy() cccsum_hist = cccsum_hist.decimate(int(stream[0].stats.sampling_rate / 20)).data cccsum_plot = EQcorrscan_plotting.chunk_data(cccsum_plot, 20, "Maxabs").data # Enforce same length stream_plot.data = stream_plot.data[0 : len(cccsum_plot)] cccsum_plot = cccsum_plot[0 : len(stream_plot.data)] cccsum_hist = cccsum_hist[0 : len(stream_plot.data)] EQcorrscan_plotting.triple_plot( cccsum_plot, cccsum_hist, stream_plot, rawthresh, True, "plot/cccsum_plot_" + template_names[i] + "_" + str(stream[0].stats.starttime.year) + "-" + str(stream[0].stats.starttime.month) + "-" + str(stream[0].stats.starttime.day) + ".jpg", ) np.save(template_names[i] + stream[0].stats.starttime.datetime.strftime("%Y%j"), cccsum) tic = time.clock() if debug >= 4: np.save("cccsum_" + str(i) + ".npy", cccsum) if debug >= 3 and max(cccsum) > rawthresh: peaks = findpeaks.find_peaks2_short( cccsum, rawthresh, trig_int * stream[0].stats.sampling_rate, debug, stream[0].stats.starttime, stream[0].stats.sampling_rate, ) elif max(cccsum) > rawthresh: peaks = findpeaks.find_peaks2_short(cccsum, rawthresh, trig_int * stream[0].stats.sampling_rate, debug) else: print "No peaks found above threshold" peaks = False toc = time.clock() if debug >= 1: print "Finding peaks took: " + str(toc - tic) + " s" if peaks: for peak in peaks: detecttime = stream[0].stats.starttime + peak[1] / stream[0].stats.sampling_rate detections.append(DETECTION(template_names[i], detecttime, no_chans[i], peak[0], rawthresh, "corr")) i += 1 return detections
def _detect(detector, st, threshold, trig_int, moveout=0, min_trig=0, process=True, extract_detections=False, debug=0): """ Detect within continuous data using the subspace method. Not to be called directly, use the detector.detect method. :type detector: eqcorrscan.core.subspace.Detector :param detector: Detector to use. :type st: obspy.core.stream.Stream :param st: Un-processed stream to detect within using the subspace \ detector :type threshold: float :param threshold: Threshold value for detections between 0-1 :type trig_int: float :param trig_int: Minimum trigger interval in seconds. :type moveout: float :param moveout: Maximum allowable moveout window for non-multiplexed, network detection. See note. :type min_trig: int :param min_trig: Minimum number of stations exceeding threshold for \ non-multiplexed, network detection. See note. :type process: bool :param process: Whether or not to process the stream according to the \ parameters defined by the detector. Default is to process the \ data (True). :type extract_detections: bool :param extract_detections: Whether to extract waveforms for each \ detection or not, if true will return detections and streams. :type debug: int :param debug: Debug output level from 0-5. :return: list of detections :rtype: list of eqcorrscan.core.match_filter.DETECTION """ from eqcorrscan.core import subspace_statistic detections = [] # First process the stream if process: if debug > 0: print('Processing Stream') stream, stachans = _subspace_process( streams=[st.copy()], lowcut=detector.lowcut, highcut=detector.highcut, filt_order=detector.filt_order, sampling_rate=detector.sampling_rate, multiplex=detector.multiplex, stachans=detector.stachans, parallel=True, align=False, shift_len=None, reject=False) else: # Check the sampling rate at the very least for tr in st: if not tr.stats.sampling_rate == detector.sampling_rate: raise ValueError('Sampling rates do not match.') stream = [st] stachans = detector.stachans outtic = time.clock() if debug > 0: print('Computing detection statistics') stats = np.zeros( (len(stream[0]), len(stream[0][0]) - len(detector.data[0][0]) + 1), dtype=np.float32) for det_channel, in_channel, i in zip(detector.data, stream[0], np.arange(len(stream[0]))): stats[i] = subspace_statistic.\ det_statistic(detector=det_channel.astype(np.float32), data=in_channel.data.astype(np.float32)) if debug > 0: print(stats[i].shape) if debug > 3: plt.plot(stats[i]) plt.show() # Hard typing in Cython loop requires float32 type. # statistics if detector.multiplex: trig_int_samples = (len(detector.stachans) * detector.sampling_rate * trig_int) else: trig_int_samples = detector.sampling_rate * trig_int if debug > 0: print('Finding peaks') peaks = [] for i in range(len(stream[0])): peaks.append( findpeaks.find_peaks2_short(arr=stats[i], thresh=threshold, trig_int=trig_int_samples, debug=debug)) if not detector.multiplex: # Conduct network coincidence triggering peaks = findpeaks.coin_trig(peaks=peaks, samp_rate=detector.sampling_rate, moveout=moveout, min_trig=min_trig, stachans=stachans, trig_int=trig_int) else: peaks = peaks[0] if len(peaks) > 0: for peak in peaks: if detector.multiplex: detecttime = st[0].stats.starttime + ( peak[1] / (detector.sampling_rate * len(detector.stachans))) else: detecttime = st[0].stats.starttime + (peak[1] / detector.sampling_rate) rid = ResourceIdentifier(id=detector.name + '_' + str(detecttime), prefix='smi:local') ev = Event(resource_id=rid) cr_i = CreationInfo(author='EQcorrscan', creation_time=UTCDateTime()) ev.creation_info = cr_i # All detection info in Comments for lack of a better idea thresh_str = 'threshold=' + str(threshold) ccc_str = 'detect_val=' + str(peak[0]) used_chans = 'channels used: ' +\ ' '.join([str(pair) for pair in detector.stachans]) ev.comments.append(Comment(text=thresh_str)) ev.comments.append(Comment(text=ccc_str)) ev.comments.append(Comment(text=used_chans)) for stachan in detector.stachans: tr = st.select(station=stachan[0], channel=stachan[1]) if tr: net_code = tr[0].stats.network else: net_code = '' pick_tm = detecttime wv_id = WaveformStreamID(network_code=net_code, station_code=stachan[0], channel_code=stachan[1]) ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id)) detections.append( DETECTION(detector.name, detecttime, len(detector.stachans), peak[0], threshold, 'subspace', detector.stachans, event=ev)) outtoc = time.clock() print('Detection took %s seconds' % str(outtoc - outtic)) if extract_detections: detection_streams = extract_from_stream(st, detections) return detections, detection_streams return detections