Python find_peaks2_short Examples

Programming Language: Python

Namespace/Package Name: eqcorrscan.utils.findpeaks

Method/Function: find_peaks2_short

Examples at hotexamples.com: 22

Python find_peaks2_short - 22 examples found. These are the top rated real world Python examples of eqcorrscan.utils.findpeaks.find_peaks2_short extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def _median_window(window, window_start, multiplier, starttime, sampling_rate):
    """
    Internal function to aid parallel processing

    :type window: numpy.ndarry
    :param window: Data to look for peaks in.
    :type window_start: int
    :param window_start: Index of window start point in larger array, used \
        for peak indexing.
    :type multiplier: float
    :param multiplier: Multiple of MAD to use as threshold
    :type starttime: obspy.core.utcdatetime.UTCDateTime
    :param starttime: Starttime of window, used in debug plotting.
    :type sampling_rate: float
    :param sampling_rate in Hz, used for debug plotting

    :returns: peaks
    :rtype: list
    """
    MAD = np.median(np.abs(window))
    thresh = multiplier * MAD
    Logger.debug('Threshold for window is: ' + str(thresh) + '\nMedian is: ' +
                 str(MAD) + '\nMax is: ' + str(np.max(window)))
    peaks = find_peaks2_short(arr=window, thresh=thresh, trig_int=5)
    if peaks:
        peaks = [(peak[0], peak[1] + window_start) for peak in peaks]
    else:
        peaks = []
    return peaks

Example #2

Show file

File: find_peaks_test.py Project: xtyangpsp/EQcorrscan

 def full_peak_array_py(self, cc_array):
     """ run find_peaks2_short on cc_array and return results """
     peaks = find_peaks2_short(arr=cc_array,
                               thresh=0.2,
                               trig_int=self.trig_index,
                               full_peaks=True)
     return peaks

Example #3

Show file

File: bright_lights.py Project: xiansch/EQcorrscan

def _find_detections(cum_net_resp, nodes, threshold, thresh_type,
                     samp_rate, realstations, length):
    """
    Find detections within the cumulative network response.

    :type cum_net_resp: numpy.ndarray
    :param cum_net_resp: Array of cumulative network response for nodes
    :type nodes: list
    :param nodes: Nodes associated with the source of energy in the \
        cum_net_resp
    :type threshold: float
    :param threshold: Threshold value
    :type thresh_type: str
    :param thresh_type: Either MAD (Median Absolute Deviation) or abs \
        (absolute) or RMS (Root Mean Squared)
    :type samp_rate: float
    :param samp_rate: Sampling rate in Hz
    :type realstations: list
    :param realstations: List of stations used to make the cumulative network \
        response, will be reported in the DETECTION
    :type length: float
    :param length: Maximum length of peak to look for in seconds

    :return: detections as :class: DETECTION

    .. note:: This is an internal function to ease parallel processing and \
        should not be called directly.
    """
    from eqcorrscan.core.match_filter import DETECTION
    from eqcorrscan.utils import findpeaks

    cum_net_resp = np.nan_to_num(cum_net_resp)  # Force no NaNs
    if np.isnan(cum_net_resp).any():
        raise ValueError("Nans present")
    print('Mean of data is: ' + str(np.median(cum_net_resp)))
    print('RMS of data is: ' + str(np.sqrt(np.mean(np.square(cum_net_resp)))))
    print('MAD of data is: ' + str(np.median(np.abs(cum_net_resp))))
    if thresh_type == 'MAD':
        thresh = (np.median(np.abs(cum_net_resp)) * threshold)
    elif thresh_type == 'abs':
        thresh = threshold
    elif thresh_type == 'RMS':
        thresh = _rms(cum_net_resp) * threshold
    print('Threshold is set to: ' + str(thresh))
    print('Max of data is: ' + str(max(cum_net_resp)))
    peaks = findpeaks.find_peaks2_short(cum_net_resp, thresh,
                                        length * samp_rate, debug=0)
    detections = []
    if peaks:
        for peak in peaks:
            node = nodes[peak[1]]
            detections.append(DETECTION(str(node[0]) + '_' + str(node[1]) + '_' +
                                        str(node[2]), peak[1] / samp_rate,
                                        len(realstations), peak[0], thresh,
                                        'brightness', realstations))
    else:
        detections = []
    print('I have found ' + str(len(peaks)) + ' possible detections')
    return detections

Example #4

Show file

 def peak_array(self, cc_array):
     """ run find_peaks2_short on cc_array and return results """
     peaks = find_peaks2_short(arr=cc_array,
                               thresh=0.2,
                               trig_int=self.trig_index,
                               debug=0,
                               starttime=None,
                               samp_rate=200.0)
     return peaks

Example #5

Show file

File: despike.py Project: xiansch/EQcorrscan

def template_remove(tr, template, cc_thresh, windowlength,
                    interp_len, debug=0):
    """
    Looks for instances of template in the trace and removes the matches.

    :type tr: obspy.core.Trace
    :param tr: Trace to remove spikes from.
    :type template: osbpy.core.Trace
    :param template: Spike template to look for in data.
    :type cc_thresh: float
    :param cc_thresh: Cross-correlation threshold (-1 - 1).
    :type windowlength: float
    :param windowlength: Length of window to look for spikes in in seconds.
    :type interp_len: float
    :param interp_len: Window length to remove and fill in seconds.
    :type debug: int
    :param debug: Debug level.

    :returns: tr, works in place.
    """
    from eqcorrscan.core.match_filter import normxcorr2
    from eqcorrscan.utils.findpeaks import find_peaks2_short
    from obspy import Trace
    from eqcorrscan.utils.timer import Timer
    import matplotlib.pyplot as plt
    import warnings

    data_in = tr.copy()
    _interp_len = int(tr.stats.sampling_rate * interp_len)
    if _interp_len < len(template.data):
        warnings.warn('Interp_len is less than the length of the template,'
                      'will used the length of the template!')
        _interp_len = len(template.data)
    if isinstance(template, Trace):
        template = template.data
    with Timer() as t:
        cc = normxcorr2(tr.data.astype(np.float32),
                        template.astype(np.float32))
        if debug > 3:
            plt.plot(cc.flatten(), 'k', label='cross-correlation')
            plt.legend()
            plt.show()
        peaks = find_peaks2_short(arr=cc.flatten(), thresh=cc_thresh,
                                  trig_int=windowlength * tr.stats.
                                  sampling_rate)
        for peak in peaks:
            tr.data = _interp_gap(data=tr.data,
                                  peak_loc=peak[1] + int(0.5 * _interp_len),
                                  interp_len=_interp_len)
    print("Despiking took: %s s" % t.secs)
    if debug > 2:
        plt.plot(data_in.data, 'r', label='raw')
        plt.plot(tr.data, 'k', label='despiked')
        plt.legend()
        plt.show()
    return tr

Example #6

Show file

 def test_peaks_plot(self):
     data = self.data.copy()
     data[30] = 100
     data[60] = 40
     threshold = 10
     peaks = findpeaks.find_peaks2_short(data, threshold, 3)
     fig = peaks_plot(data=data, starttime=UTCDateTime("2008001"),
                      samp_rate=10, peaks=peaks, show=False,
                      return_figure=True)
     return fig

Example #7

Show file

def template_remove(tr,
                    template,
                    cc_thresh,
                    windowlength,
                    interp_len,
                    debug=0):
    """
    Looks for instances of template in the trace and removes the matches.

    :type tr: obspy.core.trace.Trace
    :param tr: Trace to remove spikes from.
    :type template: osbpy.core.trace.Trace
    :param template: Spike template to look for in data.
    :type cc_thresh: float
    :param cc_thresh: Cross-correlation threshold (-1 - 1).
    :type windowlength: float
    :param windowlength: Length of window to look for spikes in in seconds.
    :type interp_len: float
    :param interp_len: Window length to remove and fill in seconds.
    :type debug: int
    :param debug: Debug level.

    :returns: tr, works in place.
    :rtype: :class:`obspy.core.trace.Trace`
    """
    data_in = tr.copy()
    _interp_len = int(tr.stats.sampling_rate * interp_len)
    if _interp_len < len(template.data):
        warnings.warn('Interp_len is less than the length of the template,'
                      'will used the length of the template!')
        _interp_len = len(template.data)
    if isinstance(template, Trace):
        template = template.data
    with Timer() as t:
        cc = normxcorr2(image=tr.data.astype(np.float32),
                        template=template.astype(np.float32))
        if debug > 3:
            plt.plot(cc.flatten(), 'k', label='cross-correlation')
            plt.legend()
            plt.show()
        peaks = find_peaks2_short(arr=cc.flatten(),
                                  thresh=cc_thresh,
                                  trig_int=windowlength *
                                  tr.stats.sampling_rate)
        for peak in peaks:
            tr.data = _interp_gap(data=tr.data,
                                  peak_loc=peak[1] + int(0.5 * _interp_len),
                                  interp_len=_interp_len)
    print("Despiking took: %s s" % t.secs)
    if debug > 2:
        plt.plot(data_in.data, 'r', label='raw')
        plt.plot(tr.data, 'k', label='despiked')
        plt.legend()
        plt.show()
    return tr

Example #8

Show file

File: find_peaks_test.py Project: xiansch/EQcorrscan

 def test_main_find_peaks(self):
     """Test find_peaks2_short"""
     from eqcorrscan.utils.findpeaks import find_peaks2_short
     import numpy as np
     import os
     testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                 'test_data')
     expected_ccc = np.load(os.path.join(testing_path, 'test_ccc.npy'))
     peaks = find_peaks2_short(arr=expected_ccc, thresh=0.2, trig_int=10,
                               debug=0, starttime=False, samp_rate=200.0)
     expected_peaks = np.load(os.path.join(testing_path, 'test_peaks.npy'))
     # Check length first as this will be a more obvious issue
     self.assertEqual(len(peaks), len(expected_peaks),
                      msg='Peaks are not the same length, has ccc been ' +
                      'updated?')
     self.assertTrue((np.array(peaks) == expected_peaks).all())

Example #9

Show file

File: despike.py Project: woxin5295/EQcorrscan

def _median_window(window,
                   window_start,
                   multiplier,
                   starttime,
                   sampling_rate,
                   debug=0):
    """
    Internal function to aid parallel processing

    :type window: numpy.ndarry
    :param window: Data to look for peaks in.
    :type window_start: int
    :param window_start: Index of window start point in larger array, used \
        for peak indexing.
    :type multiplier: float
    :param multiplier: Multiple of MAD to use as threshold
    :type starttime: obspy.core.utcdatetime.UTCDateTime
    :param starttime: Starttime of window, used in debug plotting.
    :type sampling_rate: float
    :param sampling_rate in Hz, used for debug plotting
    :type debug: int
    :param debug: debug level, if want plots, >= 4.

    :returns: peaks
    :rtype: list
    """
    from eqcorrscan.utils.findpeaks import find_peaks2_short
    from eqcorrscan.utils.plotting import peaks_plot

    MAD = np.median(np.abs(window))
    thresh = multiplier * MAD
    if debug >= 2:
        print('Threshold for window is: ' + str(thresh) + '\nMedian is: ' +
              str(MAD) + '\nMax is: ' + str(np.max(window)))
    peaks = find_peaks2_short(arr=window, thresh=thresh, trig_int=5, debug=0)
    if debug >= 4 and peaks:
        peaks_plot(window, starttime, sampling_rate, save=False, peaks=peaks)
    if peaks:
        peaks = [(peak[0], peak[1] + window_start) for peak in peaks]
    else:
        peaks = []
    return peaks

Example #10

Show file

 def test_main_find_peaks(self):
     """Test find_peaks2_short"""
     from eqcorrscan.utils.findpeaks import find_peaks2_short
     import numpy as np
     import os
     testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                 'test_data')
     expected_ccc = np.load(os.path.join(testing_path, 'test_ccc.npy'))
     peaks = find_peaks2_short(arr=expected_ccc,
                               thresh=0.2,
                               trig_int=10,
                               debug=0,
                               starttime=False,
                               samp_rate=200.0)
     expected_peaks = np.load(os.path.join(testing_path, 'test_peaks.npy'))
     # Check length first as this will be a more obvious issue
     self.assertEqual(len(peaks),
                      len(expected_peaks),
                      msg='Peaks are not the same length, has ccc been ' +
                      'updated?')
     self.assertTrue((np.array(peaks) == expected_peaks).all())

Example #11

Show file

def template_remove(tr, template, cc_thresh, windowlength, interp_len):
    """
    Looks for instances of template in the trace and removes the matches.

    :type tr: obspy.core.trace.Trace
    :param tr: Trace to remove spikes from.
    :type template: osbpy.core.trace.Trace
    :param template: Spike template to look for in data.
    :type cc_thresh: float
    :param cc_thresh: Cross-correlation threshold (-1 - 1).
    :type windowlength: float
    :param windowlength: Length of window to look for spikes in in seconds.
    :type interp_len: float
    :param interp_len: Window length to remove and fill in seconds.

    :returns: tr, works in place.
    :rtype: :class:`obspy.core.trace.Trace`
    """
    _interp_len = int(tr.stats.sampling_rate * interp_len)
    if _interp_len < len(template.data):
        Logger.warning('Interp_len is less than the length of the template, '
                       'will used the length of the template!')
        _interp_len = len(template.data)
    if isinstance(template, Trace):
        template = np.array([template.data])
    with Timer() as t:
        normxcorr = get_array_xcorr("fftw")
        cc, _ = normxcorr(stream=tr.data.astype(np.float32),
                          templates=template.astype(np.float32),
                          pads=[0])
        peaks = find_peaks2_short(arr=cc.flatten(),
                                  thresh=cc_thresh,
                                  trig_int=windowlength *
                                  tr.stats.sampling_rate)
        for peak in peaks:
            tr.data = _interp_gap(data=tr.data,
                                  peak_loc=peak[1] + int(0.5 * _interp_len),
                                  interp_len=_interp_len)
    Logger.info("Despiking took: {0:.4f} s".format(t.secs))
    return tr

Example #12

Show file

File: despike.py Project: xiansch/EQcorrscan

def _median_window(window, window_start, multiplier, starttime, sampling_rate,
                   debug=0):
    """Internal function to aid parallel processing

    :type window: np.ndarry
    :param window: Data to look for peaks in.
    :type window_start: int
    :param window_start: Index of window start point in larger array, used \
        for peak indexing.
    :type multiplier: float
    :param multiplier: Multiple of MAD to use as threshold
    :type starttime: obspy.UTCDateTime
    :param starttime: Starttime of window, used in debug plotting.
    :type sampling_rate: float
    :param sampling_rate in Hz, used for debug plotting
    :type debug: int
    :param debug: debug level, if want plots, >= 4.

    :returns: peaks
    """
    from eqcorrscan.utils.findpeaks import find_peaks2_short
    from eqcorrscan.utils.plotting import peaks_plot

    MAD = np.median(np.abs(window))
    thresh = multiplier * MAD
    if debug >= 2:
        print('Threshold for window is: ' + str(thresh) +
              '\nMedian is: ' + str(MAD) +
              '\nMax is: ' + str(np.max(window)))
    peaks = find_peaks2_short(arr=window,
                              thresh=thresh, trig_int=5, debug=0)
    if debug >= 4 and peaks:
        peaks_plot(window, starttime, sampling_rate,
                   save=False, peaks=peaks)
    if peaks:
        peaks = [(peak[0], peak[1] + window_start) for peak in peaks]
    else:
        peaks = []
    return peaks

Example #13

Show file

File: bright_lights.py Project: emilyws1/EQcorrscan

def _find_detections(cum_net_resp, nodes, threshold, thresh_type, samp_rate,
                     realstations, length):
    """
    Find detections within the cumulative network response.

    :type cum_net_resp: numpy.ndarray
    :param cum_net_resp: Array of cumulative network response for nodes
    :type nodes: list
    :param nodes: Nodes associated with the source of energy in the \
        cum_net_resp
    :type threshold: float
    :param threshold: Threshold value
    :type thresh_type: str
    :param thresh_type: Either MAD (Median Absolute Deviation) or abs \
        (absolute) or RMS (Root Mean Squared)
    :type samp_rate: float
    :param samp_rate: Sampling rate in Hz
    :type realstations: list
    :param realstations: List of stations used to make the cumulative network \
        response, will be reported in the DETECTION
    :type length: float
    :param length: Maximum length of peak to look for in seconds

    :return: detections as :class: DETECTION

    .. note:: This is an internal function to ease parallel processing and \
        should not be called directly.
    """
    from eqcorrscan.core.match_filter import DETECTION
    from eqcorrscan.utils import findpeaks

    cum_net_resp = np.nan_to_num(cum_net_resp)  # Force no NaNs
    if np.isnan(cum_net_resp).any():
        raise ValueError("Nans present")
    print('Mean of data is: ' + str(np.median(cum_net_resp)))
    print('RMS of data is: ' + str(np.sqrt(np.mean(np.square(cum_net_resp)))))
    print('MAD of data is: ' + str(np.median(np.abs(cum_net_resp))))
    if thresh_type == 'MAD':
        thresh = (np.median(np.abs(cum_net_resp)) * threshold)
    elif thresh_type == 'abs':
        thresh = threshold
    elif thresh_type == 'RMS':
        thresh = _rms(cum_net_resp) * threshold
    print('Threshold is set to: ' + str(thresh))
    print('Max of data is: ' + str(max(cum_net_resp)))
    peaks = findpeaks.find_peaks2_short(cum_net_resp,
                                        thresh,
                                        length * samp_rate,
                                        debug=0)
    detections = []
    if peaks:
        for peak in peaks:
            node = nodes[peak[1]]
            detections.append(
                DETECTION(
                    str(node[0]) + '_' + str(node[1]) + '_' + str(node[2]),
                    peak[1] / samp_rate, len(realstations), peak[0], thresh,
                    'brightness', realstations))
    else:
        detections = []
    print('I have found ' + str(len(peaks)) + ' possible detections')
    return detections

Example #14

Show file

def _detect(detector,
            st,
            threshold,
            trig_int,
            moveout=0,
            min_trig=0,
            process=True,
            extract_detections=False,
            cores=1):
    """
    Detect within continuous data using the subspace method.

    Not to be called directly, use the detector.detect method.

    :type detector: eqcorrscan.core.subspace.Detector
    :param detector: Detector to use.
    :type st: obspy.core.stream.Stream
    :param st: Un-processed stream to detect within using the subspace \
        detector
    :type threshold: float
    :param threshold: Threshold value for detections between 0-1
    :type trig_int: float
    :param trig_int: Minimum trigger interval in seconds.
    :type moveout: float
    :param moveout: Maximum allowable moveout window for non-multiplexed,
        network detection.  See note.
    :type min_trig: int
    :param min_trig: Minimum number of stations exceeding threshold for \
        non-multiplexed, network detection. See note.
    :type process: bool
    :param process: Whether or not to process the stream according to the \
        parameters defined by the detector.  Default is to process the \
        data (True).
    :type extract_detections: bool
    :param extract_detections: Whether to extract waveforms for each \
        detection or not, if true will return detections and streams.

    :return: list of detections
    :rtype: list of eqcorrscan.core.match_filter.Detection
    """
    detections = []
    # First process the stream
    if process:
        Logger.info('Processing Stream')
        stream, stachans = _subspace_process(
            streams=[st.copy()],
            lowcut=detector.lowcut,
            highcut=detector.highcut,
            filt_order=detector.filt_order,
            sampling_rate=detector.sampling_rate,
            multiplex=detector.multiplex,
            stachans=detector.stachans,
            parallel=True,
            align=False,
            shift_len=None,
            reject=False,
            cores=cores)
    else:
        # Check the sampling rate at the very least
        for tr in st:
            if not tr.stats.sampling_rate == detector.sampling_rate:
                raise ValueError('Sampling rates do not match.')
        stream = [st]
        stachans = detector.stachans
    outtic = time.clock()
    # If multiplexed, how many samples do we increment by?
    if detector.multiplex:
        Nc = len(detector.stachans)
    else:
        Nc = 1
    # Here do all ffts
    fft_vars = _do_ffts(detector, stream, Nc)
    Logger.info('Computing detection statistics')
    Logger.info('Preallocating stats matrix')
    stats = np.zeros(
        (len(stream[0]), (len(stream[0][0]) // Nc) - (fft_vars[4] // Nc) + 1))
    for det_freq, data_freq_sq, data_freq, i in zip(fft_vars[0], fft_vars[1],
                                                    fft_vars[2],
                                                    np.arange(len(stream[0]))):
        # Calculate det_statistic in frequency domain
        stats[i] = _det_stat_freq(det_freq, data_freq_sq, data_freq,
                                  fft_vars[3], Nc, fft_vars[4], fft_vars[5])
        Logger.info('Stats matrix is shape %s' % str(stats[i].shape))
    trig_int_samples = detector.sampling_rate * trig_int
    Logger.info('Finding peaks')
    peaks = []
    for i in range(len(stream[0])):
        peaks.append(
            findpeaks.find_peaks2_short(arr=stats[i],
                                        thresh=threshold,
                                        trig_int=trig_int_samples))
    if not detector.multiplex:
        # Conduct network coincidence triggering
        peaks = findpeaks.coin_trig(peaks=peaks,
                                    samp_rate=detector.sampling_rate,
                                    moveout=moveout,
                                    min_trig=min_trig,
                                    stachans=stachans,
                                    trig_int=trig_int)
    else:
        peaks = peaks[0]
    if len(peaks) > 0:
        for peak in peaks:
            detecttime = st[0].stats.starttime + \
                (peak[1] / detector.sampling_rate)
            rid = ResourceIdentifier(id=detector.name + '_' + str(detecttime),
                                     prefix='smi:local')
            ev = Event(resource_id=rid)
            cr_i = CreationInfo(author='EQcorrscan',
                                creation_time=UTCDateTime())
            ev.creation_info = cr_i
            # All detection info in Comments for lack of a better idea
            thresh_str = 'threshold=' + str(threshold)
            ccc_str = 'detect_val=' + str(peak[0])
            used_chans = 'channels used: ' +\
                ' '.join([str(pair) for pair in detector.stachans])
            ev.comments.append(Comment(text=thresh_str))
            ev.comments.append(Comment(text=ccc_str))
            ev.comments.append(Comment(text=used_chans))
            for stachan in detector.stachans:
                tr = st.select(station=stachan[0], channel=stachan[1])
                if tr:
                    net_code = tr[0].stats.network
                else:
                    net_code = ''
                pick_tm = detecttime
                wv_id = WaveformStreamID(network_code=net_code,
                                         station_code=stachan[0],
                                         channel_code=stachan[1])
                ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id))
            detections.append(
                Detection(template_name=detector.name,
                          detect_time=detecttime,
                          no_chans=len(detector.stachans),
                          detect_val=peak[0],
                          threshold=threshold,
                          typeofdet='subspace',
                          threshold_type='abs',
                          threshold_input=threshold,
                          chans=detector.stachans,
                          event=ev))
    outtoc = time.clock()
    Logger.info('Detection took %s seconds' % str(outtoc - outtic))
    if extract_detections:
        detection_streams = extract_from_stream(st, detections)
        return detections, detection_streams
    return detections

Example #15

Show file

File: match_filter.py Project: woxin5295/EQcorrscan

def match_filter(template_names, template_list, st, threshold,
                 threshold_type, trig_int, plotvar, plotdir='.', cores=1,
                 debug=0, plot_format='png', output_cat=False,
                 extract_detections=False, arg_check=True):
    """
    Main matched-filter detection function.

    Over-arching code to run the correlations of given templates with a \
    day of seismic data and output the detections based on a given threshold.
    For a functional example see the tutorials.

    :type template_names: list
    :param template_names: List of template names in the same order as \
        template_list
    :type template_list: list
    :param template_list: A list of templates of which each template is a \
        Stream of obspy traces containing seismic data and header information.
    :type st: obspy.core.stream.Stream
    :param st: A Stream object containing all the data available and \
        required for the correlations with templates given.  For efficiency \
        this should contain no excess traces which are not in one or more of \
        the templates.  This will now remove excess traces internally, but \
        will copy the stream and work on the copy, leaving your input stream \
        untouched.
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type
    :type threshold_type: str
    :param threshold_type: The type of threshold to be used, can be MAD, \
        absolute or av_chan_corr.  See Note on thresholding below.
    :type trig_int: float
    :param trig_int: Minimum gap between detections in seconds.
    :type plotvar: bool
    :param plotvar: Turn plotting on or off
    :type plotdir: str
    :param plotdir: Path to plotting folder, plots will be output here, \
        defaults to run location.
    :type cores: int
    :param cores: Number of cores to use
    :type debug: int
    :param debug: Debug output level, the bigger the number, the more the \
        output.
    :type plot_format: str
    :param plot_format: Specify format of output plots if saved
    :type output_cat: bool
    :param output_cat: Specifies if matched_filter will output an \
        obspy.Catalog class containing events for each detection. Default \
        is False, in which case matched_filter will output a list of \
        detection classes, as normal.
    :type extract_detections: bool
    :param extract_detections: Specifies whether or not to return a list of \
        streams, one stream per detection.
    :type arg_check: bool
    :param arg_check: Check arguments, defaults to True, but if running in \
        bulk, and you are certain of your arguments, then set to False.\n

    .. rubric::
        If neither `output_cat` or `extract_detections` are set to `True`,
        then only the list of :class:`eqcorrscan.core.match_filter.DETECTION`'s
        will be output:
    :return: :class:`eqcorrscan.core.match_filter.DETECTION`'s detections for
        each detection made.
    :rtype: list
    .. rubric::
        If `output_cat` is set to `True`, then the
        :class:`obspy.core.event.Catalog` will also be output:
    :return: Catalog containing events for each detection, see above.
    :rtype: :class:`obspy.core.event.Catalog`
    .. rubric::
        If `extract_detections` is set to `True` then the list of
        :class:`obspy.core.stream.Stream`'s will also be output.
    :return:
        list of :class:`obspy.core.stream.Stream`'s for each detection, see
        above.
    :rtype: list

    .. warning::
        Plotting within the match-filter routine uses the Agg backend
        with interactive plotting turned off.  This is because the function
        is designed to work in bulk.  If you wish to turn interactive
        plotting on you must import matplotlib in your script first, when you
        them import match_filter you will get the warning that this call to
        matplotlib has no effect, which will mean that match_filter has not
        changed the plotting behaviour.

    .. note::
        **Thresholding:**

        **MAD** threshold is calculated as the:

        .. math::

            threshold {\\times} (median(abs(cccsum)))

        where :math:`cccsum` is the cross-correlation sum for a given template.

        **absolute** threshold is a true absolute threshold based on the
        cccsum value.

        **av_chan_corr** is based on the mean values of single-channel
        cross-correlations assuming all data are present as required for the
        template, e.g:

        .. math::

            av\_chan\_corr\_thresh=threshold \\times (cccsum / len(template))

        where :math:`template` is a single template from the input and the
        length is the number of channels within this template.

    .. note::
        The output_cat flag will create an :class:`obspy.core.eventCatalog`
        containing one event for each
        :class:`eqcorrscan.core.match_filter.DETECTION`'s generated by
        match_filter. Each event will contain a number of comments dealing
        with correlation values and channels used for the detection. Each
        channel used for the detection will have a corresponding
        :class:`obspy.core.event.Pick` which will contain time and
        waveform information. **HOWEVER**, the user should note that, at
        present, the pick times do not account for the
        prepick times inherent in each template. For example, if a template
        trace starts 0.1 seconds before the actual arrival of that phase,
        then the pick time generated by match_filter for that phase will be
        0.1 seconds early. We are working on a solution that will involve
        saving templates alongside associated metadata.
    """
    import matplotlib
    matplotlib.use('Agg')
    if arg_check:
        # Check the arguments to be nice - if arguments wrong type the parallel
        # output for the error won't be useful
        if not type(template_names) == list:
            raise MatchFilterError('template_names must be of type: list')
        if not type(template_list) == list:
            raise MatchFilterError('templates must be of type: list')
        if not len(template_list) == len(template_names):
            raise MatchFilterError('Not the same number of templates as names')
        for template in template_list:
            if not type(template) == Stream:
                msg = 'template in template_list must be of type: ' +\
                      'obspy.core.stream.Stream'
                raise MatchFilterError(msg)
        if not type(st) == Stream:
            msg = 'st must be of type: obspy.core.stream.Stream'
            raise MatchFilterError(msg)
        if str(threshold_type) not in [str('MAD'), str('absolute'),
                                       str('av_chan_corr')]:
            msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr'
            raise MatchFilterError(msg)

    # Copy the stream here because we will muck about with it
    stream = st.copy()
    templates = copy.deepcopy(template_list)
    _template_names = copy.deepcopy(template_names)
    # Debug option to confirm that the channel names match those in the
    # templates
    if debug >= 2:
        template_stachan = []
        data_stachan = []
        for template in templates:
            for tr in template:
                if isinstance(tr.data, np.ma.core.MaskedArray):
                    raise MatchFilterError('Template contains masked array,'
                                           ' split first')
                template_stachan.append(tr.stats.station + '.' +
                                        tr.stats.channel)
        for tr in stream:
            data_stachan.append(tr.stats.station + '.' + tr.stats.channel)
        template_stachan = list(set(template_stachan))
        data_stachan = list(set(data_stachan))
        if debug >= 3:
            print('I have template info for these stations:')
            print(template_stachan)
            print('I have daylong data for these stations:')
            print(data_stachan)
    # Perform a check that the continuous data are all the same length
    min_start_time = min([tr.stats.starttime for tr in stream])
    max_end_time = max([tr.stats.endtime for tr in stream])
    longest_trace_length = stream[0].stats.sampling_rate * (max_end_time -
                                                            min_start_time)
    for tr in stream:
        if not tr.stats.npts == longest_trace_length:
            msg = 'Data are not equal length, padding short traces'
            warnings.warn(msg)
            start_pad = np.zeros(int(tr.stats.sampling_rate *
                                     (tr.stats.starttime - min_start_time)))
            end_pad = np.zeros(int(tr.stats.sampling_rate *
                                   (max_end_time - tr.stats.endtime)))
            tr.data = np.concatenate([start_pad, tr.data, end_pad])
    # Perform check that all template lengths are internally consistent
    for i, temp in enumerate(template_list):
        if len(set([tr.stats.npts for tr in temp])) > 1:
            msg = ('Template %s contains traces of differing length, this is '
                   'not currently supported' % _template_names[i])
            raise MatchFilterError(msg)
    outtic = time.clock()
    if debug >= 2:
        print('Ensuring all template channels have matches in long data')
    template_stachan = {}
    # Work out what station-channel pairs are in the templates, including
    # duplicate station-channel pairs.  We will use this information to fill
    # all templates with the same station-channel pairs as required by
    # _template_loop.
    for template in templates:
        stachans_in_template = []
        for tr in template:
            stachans_in_template.append((tr.stats.network, tr.stats.station,
                                         tr.stats.location, tr.stats.channel))
        stachans_in_template = dict(Counter(stachans_in_template))
        for stachan in stachans_in_template.keys():
            if stachan not in template_stachan.keys():
                template_stachan.update({stachan:
                                         stachans_in_template[stachan]})
            elif stachans_in_template[stachan] > template_stachan[stachan]:
                template_stachan.update({stachan:
                                         stachans_in_template[stachan]})
    # Remove un-matched channels from templates.
    _template_stachan = copy.deepcopy(template_stachan)
    for stachan in template_stachan.keys():
        if not stream.select(network=stachan[0], station=stachan[1],
                             location=stachan[2], channel=stachan[3]):
            # Remove stachan from list of dictionary of template_stachans
            _template_stachan.pop(stachan)
            # Remove template traces rather than adding NaN data
            for template in templates:
                if template.select(network=stachan[0], station=stachan[1],
                                   location=stachan[2], channel=stachan[3]):
                    for tr in template.select(network=stachan[0],
                                              station=stachan[1],
                                              location=stachan[2],
                                              channel=stachan[3]):
                        template.remove(tr)
    template_stachan = _template_stachan
    # Remove un-needed channels from continuous data.
    for tr in stream:
        if not (tr.stats.network, tr.stats.station,
                tr.stats.location, tr.stats.channel) in \
                template_stachan.keys():
            stream.remove(tr)
    # Check for duplicate channels
    stachans = [(tr.stats.network, tr.stats.station,
                 tr.stats.location, tr.stats.channel) for tr in stream]
    c_stachans = Counter(stachans)
    for key in c_stachans.keys():
        if c_stachans[key] > 1:
            msg = ('Multiple channels for %s.%s.%s.%s, likely a data issue'
                   % (key[0], key[1], key[2], key[3]))
            raise MatchFilterError(msg)
    # Pad out templates to have all channels
    for template, template_name in zip(templates, _template_names):
        if len(template) == 0:
            msg = ('No channels matching in continuous data for ' +
                   'template' + template_name)
            warnings.warn(msg)
            templates.remove(template)
            _template_names.remove(template_name)
            continue
        for stachan in template_stachan.keys():
            number_of_channels = len(template.select(network=stachan[0],
                                                     station=stachan[1],
                                                     location=stachan[2],
                                                     channel=stachan[3]))
            if number_of_channels < template_stachan[stachan]:
                missed_channels = template_stachan[stachan] -\
                                  number_of_channels
                nulltrace = Trace()
                nulltrace.stats.update(
                    {'network': stachan[0], 'station': stachan[1],
                     'location': stachan[2], 'channel': stachan[3],
                     'sampling_rate': template[0].stats.sampling_rate,
                     'starttime': template[0].stats.starttime})
                nulltrace.data = np.array([np.NaN] * len(template[0].data),
                                          dtype=np.float32)
                for dummy in range(missed_channels):
                    template += nulltrace
        template.sort()
        # Quick check that this has all worked
        if len(template) != max([len(t) for t in templates]):
            raise MatchFilterError('Internal error forcing same template '
                                   'lengths, report this error.')
    if debug >= 2:
        print('Starting the correlation run for this day')
    if debug >= 4:
        for template in templates:
            print(template)
        print(stream)
    [cccsums, no_chans, chans] = _channel_loop(templates=templates,
                                               stream=stream,
                                               cores=cores,
                                               debug=debug)
    if len(cccsums[0]) == 0:
        raise MatchFilterError('Correlation has not run, zero length cccsum')
    outtoc = time.clock()
    print(' '.join(['Looping over templates and streams took:',
                    str(outtoc - outtic), 's']))
    if debug >= 2:
        print(' '.join(['The shape of the returned cccsums is:',
                        str(np.shape(cccsums))]))
        print(' '.join(['This is from', str(len(templates)), 'templates']))
        print(' '.join(['Correlated with', str(len(stream)),
                        'channels of data']))
    detections = []
    if output_cat:
        det_cat = Catalog()
    for i, cccsum in enumerate(cccsums):
        template = templates[i]
        if str(threshold_type) == str('MAD'):
            rawthresh = threshold * np.median(np.abs(cccsum))
        elif str(threshold_type) == str('absolute'):
            rawthresh = threshold
        elif str(threshold_type) == str('av_chan_corr'):
            rawthresh = threshold * no_chans[i]
        # Findpeaks returns a list of tuples in the form [(cccsum, sample)]
        print(' '.join(['Threshold is set at:', str(rawthresh)]))
        print(' '.join(['Max of data is:', str(max(cccsum))]))
        print(' '.join(['Mean of data is:', str(np.mean(cccsum))]))
        if np.abs(np.mean(cccsum)) > 0.05:
            warnings.warn('Mean is not zero!  Check this!')
        # Set up a trace object for the cccsum as this is easier to plot and
        # maintains timing
        if plotvar:
            _match_filter_plot(stream=stream, cccsum=cccsum,
                               template_names=_template_names,
                               rawthresh=rawthresh, plotdir=plotdir,
                               plot_format=plot_format, i=i)
        if debug >= 4:
            print(' '.join(['Saved the cccsum to:', _template_names[i],
                            stream[0].stats.starttime.datetime.
                           strftime('%Y%j')]))
            np.save(_template_names[i] +
                    stream[0].stats.starttime.datetime.strftime('%Y%j'),
                    cccsum)
        tic = time.clock()
        if max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(
                arr=cccsum, thresh=rawthresh,
                trig_int=trig_int * stream[0].stats.sampling_rate, debug=debug,
                starttime=stream[0].stats.starttime,
                samp_rate=stream[0].stats.sampling_rate)
        else:
            print('No peaks found above threshold')
            peaks = False
        toc = time.clock()
        if debug >= 1:
            print(' '.join(['Finding peaks took:', str(toc - tic), 's']))
        if peaks:
            for peak in peaks:
                detecttime = stream[0].stats.starttime +\
                    peak[1] / stream[0].stats.sampling_rate
                # Detect time must be valid QuakeML uri within resource_id.
                # This will write a formatted string which is still
                # readable by UTCDateTime
                rid = ResourceIdentifier(id=_template_names[i] + '_' +
                                         str(detecttime.
                                             strftime('%Y%m%dT%H%M%S.%f')),
                                         prefix='smi:local')
                ev = Event(resource_id=rid)
                cr_i = CreationInfo(author='EQcorrscan',
                                    creation_time=UTCDateTime())
                ev.creation_info = cr_i
                # All detection info in Comments for lack of a better idea
                thresh_str = 'threshold=' + str(rawthresh)
                ccc_str = 'detect_val=' + str(peak[0])
                used_chans = 'channels used: ' +\
                             ' '.join([str(pair) for pair in chans[i]])
                ev.comments.append(Comment(text=thresh_str))
                ev.comments.append(Comment(text=ccc_str))
                ev.comments.append(Comment(text=used_chans))
                min_template_tm = min([tr.stats.starttime for tr in template])
                for tr in template:
                    if (tr.stats.station, tr.stats.channel) not in chans[i]:
                        continue
                    else:
                        pick_tm = detecttime + (tr.stats.starttime -
                                                min_template_tm)
                        wv_id = WaveformStreamID(network_code=tr.stats.network,
                                                 station_code=tr.stats.station,
                                                 channel_code=tr.stats.channel)
                        ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id))
                detections.append(DETECTION(_template_names[i],
                                            detecttime,
                                            no_chans[i], peak[0], rawthresh,
                                            'corr', chans[i], event=ev))
                if output_cat:
                    det_cat.append(ev)
        if extract_detections:
            detection_streams = extract_from_stream(stream, detections)
    del stream, templates
    if output_cat and not extract_detections:
        return detections, det_cat
    elif not extract_detections:
        return detections
    elif extract_detections and not output_cat:
        return detections, detection_streams
    else:
        return detections, det_cat, detection_streams

Example #16

Show file

File: match_filter.py Project: xiansch/EQcorrscan

def match_filter(template_names, template_list, st, threshold,
                 threshold_type, trig_int, plotvar, plotdir='.', cores=1,
                 tempdir=False, debug=0, plot_format='png',
                 output_cat=False, extract_detections=False,
                 arg_check=True):
    """
    Main matched-filter detection function.
    Over-arching code to run the correlations of given templates with a \
    day of seismic data and output the detections based on a given threshold.
    For a functional example see the tutorials.

    :type template_names: list
    :param template_names: List of template names in the same order as \
        template_list
    :type template_list: list
    :param template_list: A list of templates of which each template is a \
        Stream of obspy traces containing seismic data and header information.
    :type st: obspy.core.stream.Stream
    :param st: An obspy.Stream object containing all the data available and \
        required for the correlations with templates given.  For efficiency \
        this should contain no excess traces which are not in one or more of \
        the templates.  This will now remove excess traces internally, but \
        will copy the stream and work on the copy, leaving your input stream \
        untouched.
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type
    :type threshold_type: str
    :param threshold_type: The type of threshold to be used, can be MAD, \
        absolute or av_chan_corr.    MAD threshold is calculated as the \
        threshold*(median(abs(cccsum))) where cccsum is the cross-correlation \
        sum for a given template. absolute threhsold is a true absolute \
        threshold based on the cccsum value av_chan_corr is based on the mean \
        values of single-channel cross-correlations assuming all data are \
        present as required for the template, \
        e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where \
        template is a single template from the input and the length is the \
        number of channels within this template.
    :type trig_int: float
    :param trig_int: Minimum gap between detections in seconds.
    :type plotvar: bool
    :param plotvar: Turn plotting on or off
    :type plotdir: str
    :param plotdir: Path to plotting folder, plots will be output here, \
        defaults to run location.
    :type tempdir: str
    :param tempdir: Directory to put temporary files, or False
    :type cores: int
    :param cores: Number of cores to use
    :type debug: int
    :param debug: Debug output level, the bigger the number, the more the \
        output.
    :type plot_format: str
    :param plot_format: Specify format of output plots if saved
    :type output_cat: bool
    :param output_cat: Specifies if matched_filter will output an \
        obspy.Catalog class containing events for each detection. Default \
        is False, in which case matched_filter will output a list of \
        detection classes, as normal.
    :type extract_detections: bool
    :param extract_detections: Specifies whether or not to return a list of \
        streams, one stream per detection.
    :type arg_check: bool
    :param arg_check: Check arguments, defaults to True, but if running in \
        bulk, and you are certain of your arguments, then set to False.

    :return: :class: 'DETECTIONS' detections for each channel formatted as \
        :class: 'obspy.UTCDateTime' objects.
    :return: :class: obspy.Catalog containing events for each detection.
    :return: list of :class: obspy.Stream objects for each detection.

    .. note:: Plotting within the match-filter routine uses the Agg backend \
        with interactive plotting turned off.  This is because the function \
        is designed to work in bulk.  If you wish to turn interactive \
        plotting on you must import matplotlib in your script first, when you \
        them import match_filter you will get the warning that this call to \
        matplotlib has no effect, which will mean that match_filter has not \
        changed the plotting behaviour.

    .. note:: The output_cat flag will create an :class: obspy.Catalog \
        containing one event for each :class: 'DETECTIONS' generated by \
        match_filter. Each event will contain a number of comments dealing \
        with correlation values and channels used for the detection. Each \
        channel used for the detection will have a corresponding :class: Pick \
        which will contain time and waveform information. HOWEVER, the user \
        should note that, at present, the pick times do not account for the \
        prepick times inherent in each template. For example, if a template \
        trace starts 0.1 seconds before the actual arrival of that phase, \
        then the pick time generated by match_filter for that phase will be \
        0.1 seconds early. We are looking towards a solution which will \
        involve saving templates alongside associated metadata.
    """
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    plt.ioff()
    import copy
    from eqcorrscan.utils import plotting
    from eqcorrscan.utils import findpeaks
    from obspy import Trace, Catalog, UTCDateTime, Stream
    from obspy.core.event import Event, Pick, CreationInfo, ResourceIdentifier
    from obspy.core.event import Comment, WaveformStreamID
    import time

    if arg_check:
        # Check the arguments to be nice - if arguments wrong type the parallel
        # output for the error won't be useful
        if not type(template_names) == list:
            raise IOError('template_names must be of type: list')
        if not type(template_list) == list:
            raise IOError('templates must be of type: list')
        for template in template_list:
            if not type(template) == Stream:
                msg = 'template in template_list must be of type: ' +\
                      'obspy.core.stream.Stream'
                raise IOError(msg)
        if not type(st) == Stream:
            msg = 'st must be of type: obspy.core.stream.Stream'
            raise IOError(msg)
        if threshold_type not in ['MAD', 'absolute', 'av_chan_corr']:
            msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr'
            raise IOError(msg)

    # Copy the stream here because we will muck about with it
    stream = st.copy()
    templates = copy.deepcopy(template_list)
    # Debug option to confirm that the channel names match those in the
    # templates
    if debug >= 2:
        template_stachan = []
        data_stachan = []
        for template in templates:
            for tr in template:
                template_stachan.append(tr.stats.station + '.' +
                                        tr.stats.channel)
        for tr in stream:
            data_stachan.append(tr.stats.station + '.' + tr.stats.channel)
        template_stachan = list(set(template_stachan))
        data_stachan = list(set(data_stachan))
        if debug >= 3:
            print('I have template info for these stations:')
            print(template_stachan)
            print('I have daylong data for these stations:')
            print(data_stachan)
    # Perform a check that the daylong vectors are daylong
    for tr in stream:
        if not tr.stats.sampling_rate * 86400 == tr.stats.npts:
            msg = ' '.join(['Data are not daylong for', tr.stats.station,
                            tr.stats.channel])
            raise ValueError(msg)
    # Perform check that all template lengths are internally consistent
    for i, temp in enumerate(template_list):
        if len(set([tr.stats.npts for tr in temp])) > 1:
            msg = 'Template %s contains traces of differing length!! THIS \
                  WILL CAUSE ISSUES' % template_names[i]
            raise ValueError(msg)
    # Call the _template_loop function to do all the correlation work
    outtic = time.clock()
    # Edit here from previous, stable, but slow match_filter
    # Would be worth testing without an if statement, but with every station in
    # the possible template stations having data, but for those without real
    # data make the data NaN to return NaN ccc_sum
    # Note: this works
    if debug >= 2:
        print('Ensuring all template channels have matches in daylong data')
    template_stachan = []
    for template in templates:
        for tr in template:
            template_stachan += [(tr.stats.station, tr.stats.channel)]
    template_stachan = list(set(template_stachan))
    # Copy this here to keep it safe
    for stachan in template_stachan:
        if not stream.select(station=stachan[0], channel=stachan[1]):
            # Remove template traces rather than adding NaN data
            for template in templates:
                if template.select(station=stachan[0], channel=stachan[1]):
                    for tr in template.select(station=stachan[0],
                                              channel=stachan[1]):
                        template.remove(tr)
    # Remove un-needed channels
    for tr in stream:
        if not (tr.stats.station, tr.stats.channel) in template_stachan:
            stream.remove(tr)
    # Also pad out templates to have all channels
    for template, template_name in zip(templates, template_names):
        if len(template) == 0:
            msg = ('No channels matching in continuous data for ' +
                   'template' + template_name)
            warnings.warn(msg)
            templates.remove(template)
            template_names.remove(template_name)
            continue
        for stachan in template_stachan:
            if not template.select(station=stachan[0], channel=stachan[1]):
                nulltrace = Trace()
                nulltrace.stats.station = stachan[0]
                nulltrace.stats.channel = stachan[1]
                nulltrace.stats.sampling_rate = template[0].stats.sampling_rate
                nulltrace.stats.starttime = template[0].stats.starttime
                nulltrace.data = np.array([np.NaN] * len(template[0].data),
                                          dtype=np.float32)
                template += nulltrace
    if debug >= 2:
        print('Starting the correlation run for this day')
    [cccsums, no_chans, chans] = _channel_loop(templates, stream, cores, debug)
    if len(cccsums[0]) == 0:
        raise ValueError('Correlation has not run, zero length cccsum')
    outtoc = time.clock()
    print(' '.join(['Looping over templates and streams took:',
                    str(outtoc - outtic), 's']))
    if debug >= 2:
        print(' '.join(['The shape of the returned cccsums is:',
                        str(np.shape(cccsums))]))
        print(' '.join(['This is from', str(len(templates)), 'templates']))
        print(' '.join(['Correlated with', str(len(stream)),
                        'channels of data']))
    detections = []
    if output_cat:
        det_cat = Catalog()
    for i, cccsum in enumerate(cccsums):
        template = templates[i]
        if threshold_type == 'MAD':
            rawthresh = threshold * np.median(np.abs(cccsum))
        elif threshold_type == 'absolute':
            rawthresh = threshold
        elif threshold_type == 'av_chan_corr':
            rawthresh = threshold * no_chans[i]
        # Findpeaks returns a list of tuples in the form [(cccsum, sample)]
        print(' '.join(['Threshold is set at:', str(rawthresh)]))
        print(' '.join(['Max of data is:', str(max(cccsum))]))
        print(' '.join(['Mean of data is:', str(np.mean(cccsum))]))
        if np.abs(np.mean(cccsum)) > 0.05:
            warnings.warn('Mean is not zero!  Check this!')
        # Set up a trace object for the cccsum as this is easier to plot and
        # maintains timing
        if plotvar:
            stream_plot = copy.deepcopy(stream[0])
            # Downsample for plotting
            stream_plot.decimate(int(stream[0].stats.sampling_rate / 10))
            cccsum_plot = Trace(cccsum)
            cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate
            # Resample here to maintain shape better
            cccsum_hist = cccsum_plot.copy()
            cccsum_hist = cccsum_hist.decimate(int(stream[0].stats.
                                                   sampling_rate / 10)).data
            cccsum_plot = plotting.chunk_data(cccsum_plot, 10,
                                              'Maxabs').data
            # Enforce same length
            stream_plot.data = stream_plot.data[0:len(cccsum_plot)]
            cccsum_plot = cccsum_plot[0:len(stream_plot.data)]
            cccsum_hist = cccsum_hist[0:len(stream_plot.data)]
            plotting.triple_plot(cccsum_plot, cccsum_hist,
                                 stream_plot, rawthresh, True,
                                 plotdir + '/cccsum_plot_' +
                                 template_names[i] + '_' +
                                 stream[0].stats.starttime.
                                 datetime.strftime('%Y-%m-%d') +
                                 '.' + plot_format)
            if debug >= 4:
                print(' '.join(['Saved the cccsum to:', template_names[i],
                                stream[0].stats.starttime.datetime.
                                strftime('%Y%j')]))
                np.save(template_names[i] +
                        stream[0].stats.starttime.datetime.strftime('%Y%j'),
                        cccsum)
        tic = time.clock()
        if debug >= 4:
            np.save('cccsum_' + str(i) + '.npy', cccsum)
        if debug >= 3 and max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
                                                trig_int * stream[0].stats.
                                                sampling_rate, debug,
                                                stream[0].stats.starttime,
                                                stream[0].stats.sampling_rate)
        elif max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
                                                trig_int * stream[0].stats.
                                                sampling_rate, debug)
        else:
            print('No peaks found above threshold')
            peaks = False
        toc = time.clock()
        if debug >= 1:
            print(' '.join(['Finding peaks took:', str(toc - tic), 's']))
        if peaks:
            for peak in peaks:
                detecttime = stream[0].stats.starttime +\
                    peak[1] / stream[0].stats.sampling_rate
                # Detect time must be valid QuakeML uri within resource_id.
                # This will write a formatted string which is still readable by UTCDateTime
                rid = ResourceIdentifier(id=template_names[i] + '_' +
                                         str(detecttime.strftime('%Y%m%dT%H%M%S.%f')),
                                         prefix='smi:local')
                ev = Event(resource_id=rid)
                cr_i = CreationInfo(author='EQcorrscan',
                                    creation_time=UTCDateTime())
                ev.creation_info = cr_i
                # All detection info in Comments for lack of a better idea
                thresh_str = 'threshold=' + str(rawthresh)
                ccc_str = 'detect_val=' + str(peak[0])
                used_chans = 'channels used: ' +\
                             ' '.join([str(pair) for pair in chans[i]])
                ev.comments.append(Comment(text=thresh_str))
                ev.comments.append(Comment(text=ccc_str))
                ev.comments.append(Comment(text=used_chans))
                min_template_tm = min([tr.stats.starttime for tr in template])
                for tr in template:
                    if (tr.stats.station, tr.stats.channel) not in chans[i]:
                        continue
                    else:
                        pick_tm = detecttime + (tr.stats.starttime - min_template_tm)
                        wv_id = WaveformStreamID(network_code=tr.stats.network,
                                                 station_code=tr.stats.station,
                                                 channel_code=tr.stats.channel)
                        ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id))
                detections.append(DETECTION(template_names[i],
                                            detecttime,
                                            no_chans[i], peak[0], rawthresh,
                                            'corr', chans[i], event=ev))
                if output_cat:
                    det_cat.append(ev)
        if extract_detections:
            detection_streams = extract_from_stream(stream, detections)
    del stream, templates
    if output_cat and not extract_detections:
        return detections, det_cat
    elif not extract_detections:
        return detections
    elif extract_detections and not output_cat:
        return detections, detection_streams
    else:
        return detections, det_cat, detection_streams

Example #17

Show file

File: match_filter.py Project: WeijunW/EQcorrscan

def match_filter(template_names, template_list, st, threshold,
                 threshold_type, trig_int, plotvar, plotdir='.', cores=1,
                 tempdir=False, debug=0, plot_format='jpg'):
    r"""Over-arching code to run the correlations of given templates with a\
    day of seismic data and output the detections based on a given threshold.

    :type template_names: list
    :param template_names: List of template names in the same order as\
     template_list
    :type template_list: list :class: 'obspy.Stream'
    :param template_list: A list of templates of which each template is a\
        Stream of obspy traces containing seismic data and header information.
    :type st: :class: 'obspy.Stream'
    :param st: An obspy.Stream object containing all the data available and\
        required for the correlations with templates given.  For efficiency\
        this should contain no excess traces which are not in one or more of\
        the templates.  This will now remove excess traces internally, but\
        will copy the stream and work on the copy, leaving your input stream\
        untouched.
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type
    :type threshold_type: str
    :param threshold_type: The type of threshold to be used, can be MAD,\
        absolute or av_chan_corr.    MAD threshold is calculated as the\
        threshold*(median(abs(cccsum))) where cccsum is the cross-correlation\
        sum for a given template. absolute threhsold is a true absolute\
        threshold based on the cccsum value av_chan_corr is based on the mean\
        values of single-channel cross-correlations assuming all data are\
        present as required for the template, \
        e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where\
        template is a single template from the input and the length is the\
        number of channels within this template.
    :type trig_int: float
    :param trig_int: Minimum gap between detections in seconds.
    :type plotvar: bool
    :param plotvar: Turn plotting on or off
    :type plotdir: str
    :param plotdir: Path to plotting folder, plots will be output here,\
        defaults to run location.
    :type tempdir: String or False
    :param tempdir: Directory to put temporary files, or False
    :type cores: int
    :param cores: Number of cores to use
    :type debug: int
    :param debug: Debug output level, the bigger the number, the more the\
        output.

    :return: :class: 'DETECTIONS' detections for each channel formatted as\
    :class: 'obspy.UTCDateTime' objects.

    .. rubric:: Note
        Plotting within the match-filter routine uses the Agg backend with\
        interactive plotting turned off.  This is because the function is\
        designed to work in bulk.  If you wish to turn interactive plotting on\
        you must import matplotlib in your script first, when you them import\
        match_filter you will get the warning that this call to matplotlib has\
        no effect, which will mean that match_filter has not changed the\
        plotting behaviour.
    """
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    plt.ioff()
    import copy
    from eqcorrscan.utils import EQcorrscan_plotting
    from eqcorrscan.utils import findpeaks
    from obspy import Trace
    import time

    # Copy the stream here because we will f**k about with it
    stream = st.copy()
    templates = copy.deepcopy(template_list)
    # Debug option to confirm that the channel names match those in the
    # templates
    if debug >= 2:
        template_stachan = []
        data_stachan = []
        for template in templates:
            for tr in template:
                template_stachan.append(tr.stats.station + '.' +
                                        tr.stats.channel)
        for tr in stream:
            data_stachan.append(tr.stats.station + '.' + tr.stats.channel)
        template_stachan = list(set(template_stachan))
        data_stachan = list(set(data_stachan))
        if debug >= 3:
            print 'I have template info for these stations:'
            print template_stachan
            print 'I have daylong data for these stations:'
            print data_stachan
    # Perform a check that the daylong vectors are daylong
    for tr in stream:
        if not tr.stats.sampling_rate * 86400 == tr.stats.npts:
            msg = ' '.join(['Data are not daylong for', tr.stats.station,
                            tr.stats.channel])
            raise ValueError(msg)
    # Call the _template_loop function to do all the correlation work
    outtic = time.clock()
    # Edit here from previous, stable, but slow match_filter
    # Would be worth testing without an if statement, but with every station in
    # the possible template stations having data, but for those without real
    # data make the data NaN to return NaN ccc_sum
    # Note: this works
    if debug >= 2:
        print 'Ensuring all template channels have matches in daylong data'
    template_stachan = []
    for template in templates:
        for tr in template:
            template_stachan += [(tr.stats.station, tr.stats.channel)]
    template_stachan = list(set(template_stachan))
    # Copy this here to keep it safe
    for stachan in template_stachan:
        if not stream.select(station=stachan[0], channel=stachan[1]):
            # Remove template traces rather than adding NaN data
            for template in templates:
                if template.select(station=stachan[0], channel=stachan[1]):
                    for tr in template.select(station=stachan[0],
                                              channel=stachan[1]):
                        template.remove(tr)
    # Remove un-needed channels
    for tr in stream:
        if not (tr.stats.station, tr.stats.channel) in template_stachan:
            stream.remove(tr)
    # Also pad out templates to have all channels
    for template in templates:
        for stachan in template_stachan:
            if not template.select(station=stachan[0], channel=stachan[1]):
                nulltrace = Trace()
                nulltrace.stats.station = stachan[0]
                nulltrace.stats.channel = stachan[1]
                nulltrace.stats.sampling_rate = template[0].stats.sampling_rate
                nulltrace.stats.starttime = template[0].stats.starttime
                nulltrace.data = np.array([np.NaN] * len(template[0].data),
                                          dtype=np.float32)
                template += nulltrace
    if debug >= 2:
        print 'Starting the correlation run for this day'
    [cccsums, no_chans] = _channel_loop(templates, stream, cores, debug)
    if len(cccsums[0]) == 0:
        raise ValueError('Correlation has not run, zero length cccsum')
    outtoc = time.clock()
    print ' '.join(['Looping over templates and streams took:',
                    str(outtoc - outtic), 's'])
    if debug >= 2:
        print ' '.join(['The shape of the returned cccsums is:',
                        str(np.shape(cccsums))])
        print ' '.join(['This is from', str(len(templates)), 'templates'])
        print ' '.join(['Correlated with', str(len(stream)),
                        'channels of data'])
    detections = []
    for i, cccsum in enumerate(cccsums):
        template = templates[i]
        if threshold_type == 'MAD':
            rawthresh = threshold * np.median(np.abs(cccsum))
        elif threshold_type == 'absolute':
            rawthresh = threshold
        elif threshold == 'av_chan_corr':
            rawthresh = threshold * (cccsum / len(template))
        else:
            print 'You have not selected the correct threshold type, I will' +\
                  'use MAD as I like it'
            rawthresh = threshold * np.mean(np.abs(cccsum))
        # Findpeaks returns a list of tuples in the form [(cccsum, sample)]
        print ' '.join(['Threshold is set at:', str(rawthresh)])
        print ' '.join(['Max of data is:', str(max(cccsum))])
        print ' '.join(['Mean of data is:', str(np.mean(cccsum))])
        if np.abs(np.mean(cccsum)) > 0.05:
            warnings.warn('Mean is not zero!  Check this!')
        # Set up a trace object for the cccsum as this is easier to plot and
        # maintins timing
        if plotvar:
            stream_plot = copy.deepcopy(stream[0])
            # Downsample for plotting
            stream_plot.decimate(int(stream[0].stats.sampling_rate / 10))
            cccsum_plot = Trace(cccsum)
            cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate
            # Resample here to maintain shape better
            cccsum_hist = cccsum_plot.copy()
            cccsum_hist = cccsum_hist.decimate(int(stream[0].stats.sampling_rate /
                                                   10)).data
            cccsum_plot = EQcorrscan_plotting.chunk_data(cccsum_plot, 10,
                                                         'Maxabs').data
            # Enforce same length
            stream_plot.data = stream_plot.data[0:len(cccsum_plot)]
            cccsum_plot = cccsum_plot[0:len(stream_plot.data)]
            cccsum_hist = cccsum_hist[0:len(stream_plot.data)]
            EQcorrscan_plotting.triple_plot(cccsum_plot, cccsum_hist,
                                            stream_plot, rawthresh, True,
                                            plotdir + '/cccsum_plot_' +
                                            template_names[i] + '_' +
                                            stream[0].stats.starttime.datetime.strftime('%Y-%m-%d') +
                                            '.' + plot_format)
            if debug >= 4:
                print ' '.join(['Saved the cccsum to:', template_names[i],
                                stream[0].stats.starttime.datetime.strftime('%Y%j')])
                np.save(template_names[i] +
                        stream[0].stats.starttime.datetime.strftime('%Y%j'),
                        cccsum)
        tic = time.clock()
        if debug >= 4:
            np.save('cccsum_' + str(i) + '.npy', cccsum)
        if debug >= 3 and max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
                                                trig_int * stream[0].stats.sampling_rate,
                                                debug,
                                                stream[0].stats.starttime,
                                                stream[0].stats.sampling_rate)
        elif max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
                                                trig_int * stream[0].stats.sampling_rate,
                                                debug)
        else:
            print 'No peaks found above threshold'
            peaks = False
        toc = time.clock()
        if debug >= 1:
            print ' '.join(['Finding peaks took:', str(toc - tic), 's'])
        if peaks:
            for peak in peaks:
                detecttime = stream[0].stats.starttime +\
                    peak[1] / stream[0].stats.sampling_rate
                detections.append(DETECTION(template_names[i],
                                            detecttime,
                                            no_chans[i], peak[0], rawthresh,
                                            'corr'))
    del stream, templates
    return detections

Example #18

Show file

File: bright_lights.py Project: mikehagerty/EQcorrscan

def _find_detections(cum_net_resp, nodes, threshold, thresh_type, samp_rate,
                     realstations, length):
    """
    Find detections within the cumulative network response.

    :type cum_net_resp: numpy.ndarray
    :param cum_net_resp: Array of cumulative network response for nodes
    :type nodes: list
    :param nodes: Nodes associated with the source of energy in the \
        cum_net_resp
    :type threshold: float
    :param threshold: Threshold value
    :type thresh_type: str
    :param thresh_type: Either MAD (Median Absolute Deviation) or abs \
        (absolute) or RMS (Root Mean Squared)
    :type samp_rate: float
    :param samp_rate: Sampling rate in Hz
    :type realstations: list
    :param realstations:
        List of stations used to make the cumulative network response, will be
        reported in the :class:`eqcorrscan.core.match_filter.Detection`
    :type length: float
    :param length: Maximum length of peak to look for in seconds

    :returns:
        Detections as :class:`eqcorrscan.core.match_filter.Detection` objects.
    :rtype: list
    """
    cum_net_resp = np.nan_to_num(cum_net_resp)  # Force no NaNs
    if np.isnan(cum_net_resp).any():
        raise ValueError("Nans present")
    print('Mean of data is: ' + str(np.median(cum_net_resp)))
    print('RMS of data is: ' + str(np.sqrt(np.mean(np.square(cum_net_resp)))))
    print('MAD of data is: ' + str(np.median(np.abs(cum_net_resp))))
    if thresh_type == 'MAD':
        thresh = (np.median(np.abs(cum_net_resp)) * threshold)
    elif thresh_type == 'abs':
        thresh = threshold
    elif thresh_type == 'RMS':
        thresh = _rms(cum_net_resp) * threshold
    print('Threshold is set to: ' + str(thresh))
    print('Max of data is: ' + str(max(cum_net_resp)))
    peaks = findpeaks.find_peaks2_short(cum_net_resp,
                                        thresh,
                                        length * samp_rate,
                                        debug=0)
    detections = []
    if peaks:
        for peak in peaks:
            node = nodes[peak[1]]
            detections.append(
                Detection(template_name=str(node[0]) + '_' + str(node[1]) +
                          '_' + str(node[2]),
                          detect_time=peak[1] / samp_rate,
                          no_chans=len(realstations),
                          detect_val=peak[0],
                          threshold=thresh,
                          typeofdet='brightness',
                          chans=realstations,
                          id=str(node[0]) + '_' + str(node[1]) + '_' +
                          str(node[2]) + str(peak[1] / samp_rate),
                          threshold_type=thresh_type,
                          threshold_input=threshold))
    else:
        detections = []
    print('I have found ' + str(len(peaks)) + ' possible detections')
    return detections

Example #19

Show file

File: match_filter.py Project: iceseismic/EQcorrscan

def match_filter(template_names,
                 template_list,
                 st,
                 threshold,
                 threshold_type,
                 trig_int,
                 plotvar,
                 plotdir='.',
                 cores=1,
                 tempdir=False,
                 debug=0,
                 plot_format='jpg'):
    r"""Over-arching code to run the correlations of given templates with a\
    day of seismic data and output the detections based on a given threshold.

    :type template_names: list
    :param template_names: List of template names in the same order as\
     template_list
    :type template_list: list :class: 'obspy.Stream'
    :param template_list: A list of templates of which each template is a\
        Stream of obspy traces containing seismic data and header information.
    :type st: :class: 'obspy.Stream'
    :param st: An obspy.Stream object containing all the data available and\
        required for the correlations with templates given.  For efficiency\
        this should contain no excess traces which are not in one or more of\
        the templates.  This will now remove excess traces internally, but\
        will copy the stream and work on the copy, leaving your input stream\
        untouched.
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type
    :type threshold_type: str
    :param threshold_type: The type of threshold to be used, can be MAD,\
        absolute or av_chan_corr.    MAD threshold is calculated as the\
        threshold*(median(abs(cccsum))) where cccsum is the cross-correlation\
        sum for a given template. absolute threhsold is a true absolute\
        threshold based on the cccsum value av_chan_corr is based on the mean\
        values of single-channel cross-correlations assuming all data are\
        present as required for the template, \
        e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where\
        template is a single template from the input and the length is the\
        number of channels within this template.
    :type trig_int: float
    :param trig_int: Minimum gap between detections in seconds.
    :type plotvar: bool
    :param plotvar: Turn plotting on or off
    :type plotdir: str
    :param plotdir: Path to plotting folder, plots will be output here,\
        defaults to run location.
    :type tempdir: String or False
    :param tempdir: Directory to put temporary files, or False
    :type cores: int
    :param cores: Number of cores to use
    :type debug: int
    :param debug: Debug output level, the bigger the number, the more the\
        output.

    :return: :class: 'DETECTIONS' detections for each channel formatted as\
    :class: 'obspy.UTCDateTime' objects.

    .. rubric:: Note
        Plotting within the match-filter routine uses the Agg backend with\
        interactive plotting turned off.  This is because the function is\
        designed to work in bulk.  If you wish to turn interactive plotting on\
        you must import matplotlib in your script first, when you them import\
        match_filter you will get the warning that this call to matplotlib has\
        no effect, which will mean that match_filter has not changed the\
        plotting behaviour.
    """
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    plt.ioff()
    import copy
    from eqcorrscan.utils import EQcorrscan_plotting
    from eqcorrscan.utils import findpeaks
    from obspy import Trace
    import time

    # Copy the stream here because we will f**k about with it
    stream = st.copy()
    templates = copy.deepcopy(template_list)
    # Debug option to confirm that the channel names match those in the
    # templates
    if debug >= 2:
        template_stachan = []
        data_stachan = []
        for template in templates:
            for tr in template:
                template_stachan.append(tr.stats.station + '.' +
                                        tr.stats.channel)
        for tr in stream:
            data_stachan.append(tr.stats.station + '.' + tr.stats.channel)
        template_stachan = list(set(template_stachan))
        data_stachan = list(set(data_stachan))
        if debug >= 3:
            print 'I have template info for these stations:'
            print template_stachan
            print 'I have daylong data for these stations:'
            print data_stachan
    # Perform a check that the daylong vectors are daylong
    for tr in stream:
        if not tr.stats.sampling_rate * 86400 == tr.stats.npts:
            msg = ' '.join([
                'Data are not daylong for', tr.stats.station, tr.stats.channel
            ])
            raise ValueError(msg)
    # Call the _template_loop function to do all the correlation work
    outtic = time.clock()
    # Edit here from previous, stable, but slow match_filter
    # Would be worth testing without an if statement, but with every station in
    # the possible template stations having data, but for those without real
    # data make the data NaN to return NaN ccc_sum
    # Note: this works
    if debug >= 2:
        print 'Ensuring all template channels have matches in daylong data'
    template_stachan = []
    for template in templates:
        for tr in template:
            template_stachan += [(tr.stats.station, tr.stats.channel)]
    template_stachan = list(set(template_stachan))
    # Copy this here to keep it safe
    for stachan in template_stachan:
        if not stream.select(station=stachan[0], channel=stachan[1]):
            # Remove template traces rather than adding NaN data
            for template in templates:
                if template.select(station=stachan[0], channel=stachan[1]):
                    for tr in template.select(station=stachan[0],
                                              channel=stachan[1]):
                        template.remove(tr)
    # Remove un-needed channels
    for tr in stream:
        if not (tr.stats.station, tr.stats.channel) in template_stachan:
            stream.remove(tr)
    # Also pad out templates to have all channels
    for template in templates:
        for stachan in template_stachan:
            if not template.select(station=stachan[0], channel=stachan[1]):
                nulltrace = Trace()
                nulltrace.stats.station = stachan[0]
                nulltrace.stats.channel = stachan[1]
                nulltrace.stats.sampling_rate = template[0].stats.sampling_rate
                nulltrace.stats.starttime = template[0].stats.starttime
                nulltrace.data = np.array([np.NaN] * len(template[0].data),
                                          dtype=np.float32)
                template += nulltrace
    if debug >= 2:
        print 'Starting the correlation run for this day'
    [cccsums, no_chans] = _channel_loop(templates, stream, cores, debug)
    if len(cccsums[0]) == 0:
        raise ValueError('Correlation has not run, zero length cccsum')
    outtoc = time.clock()
    print ' '.join([
        'Looping over templates and streams took:',
        str(outtoc - outtic), 's'
    ])
    if debug >= 2:
        print ' '.join(
            ['The shape of the returned cccsums is:',
             str(np.shape(cccsums))])
        print ' '.join(['This is from', str(len(templates)), 'templates'])
        print ' '.join(
            ['Correlated with',
             str(len(stream)), 'channels of data'])
    detections = []
    for i, cccsum in enumerate(cccsums):
        template = templates[i]
        if threshold_type == 'MAD':
            rawthresh = threshold * np.median(np.abs(cccsum))
        elif threshold_type == 'absolute':
            rawthresh = threshold
        elif threshold == 'av_chan_corr':
            rawthresh = threshold * (cccsum / len(template))
        else:
            print 'You have not selected the correct threshold type, I will' +\
                  'use MAD as I like it'
            rawthresh = threshold * np.mean(np.abs(cccsum))
        # Findpeaks returns a list of tuples in the form [(cccsum, sample)]
        print ' '.join(['Threshold is set at:', str(rawthresh)])
        print ' '.join(['Max of data is:', str(max(cccsum))])
        print ' '.join(['Mean of data is:', str(np.mean(cccsum))])
        if np.abs(np.mean(cccsum)) > 0.05:
            warnings.warn('Mean is not zero!  Check this!')
        # Set up a trace object for the cccsum as this is easier to plot and
        # maintins timing
        if plotvar:
            stream_plot = copy.deepcopy(stream[0])
            # Downsample for plotting
            stream_plot.decimate(int(stream[0].stats.sampling_rate / 10))
            cccsum_plot = Trace(cccsum)
            cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate
            # Resample here to maintain shape better
            cccsum_hist = cccsum_plot.copy()
            cccsum_hist = cccsum_hist.decimate(
                int(stream[0].stats.sampling_rate / 10)).data
            cccsum_plot = EQcorrscan_plotting.chunk_data(
                cccsum_plot, 10, 'Maxabs').data
            # Enforce same length
            stream_plot.data = stream_plot.data[0:len(cccsum_plot)]
            cccsum_plot = cccsum_plot[0:len(stream_plot.data)]
            cccsum_hist = cccsum_hist[0:len(stream_plot.data)]
            EQcorrscan_plotting.triple_plot(
                cccsum_plot, cccsum_hist, stream_plot, rawthresh, True,
                plotdir + '/cccsum_plot_' + template_names[i] + '_' +
                stream[0].stats.starttime.datetime.strftime('%Y-%m-%d') + '.' +
                plot_format)
            if debug >= 4:
                print ' '.join([
                    'Saved the cccsum to:', template_names[i],
                    stream[0].stats.starttime.datetime.strftime('%Y%j')
                ])
                np.save(
                    template_names[i] +
                    stream[0].stats.starttime.datetime.strftime('%Y%j'),
                    cccsum)
        tic = time.clock()
        if debug >= 4:
            np.save('cccsum_' + str(i) + '.npy', cccsum)
        if debug >= 3 and max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(
                cccsum, rawthresh, trig_int * stream[0].stats.sampling_rate,
                debug, stream[0].stats.starttime,
                stream[0].stats.sampling_rate)
        elif max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(
                cccsum, rawthresh, trig_int * stream[0].stats.sampling_rate,
                debug)
        else:
            print 'No peaks found above threshold'
            peaks = False
        toc = time.clock()
        if debug >= 1:
            print ' '.join(['Finding peaks took:', str(toc - tic), 's'])
        if peaks:
            for peak in peaks:
                detecttime = stream[0].stats.starttime +\
                    peak[1] / stream[0].stats.sampling_rate
                detections.append(
                    DETECTION(template_names[i], detecttime, no_chans[i],
                              peak[0], rawthresh, 'corr'))
    del stream, templates
    return detections

Example #20

Show file

File: match_filter.py Project: emilyws1/EQcorrscan

def match_filter(template_names, template_list, st, threshold,
                 threshold_type, trig_int, plotvar, plotdir='.', cores=1,
                 tempdir=False, debug=0, plot_format='png',
                 output_cat=False, extract_detections=False,
                 arg_check=True):
    """
    Main matched-filter detection function.
    Over-arching code to run the correlations of given templates with a \
    day of seismic data and output the detections based on a given threshold.
    For a functional example see the tutorials.

    :type template_names: list
    :param template_names: List of template names in the same order as \
        template_list
    :type template_list: list
    :param template_list: A list of templates of which each template is a \
        Stream of obspy traces containing seismic data and header information.
    :type st: obspy.core.stream.Stream
    :param st: An obspy.Stream object containing all the data available and \
        required for the correlations with templates given.  For efficiency \
        this should contain no excess traces which are not in one or more of \
        the templates.  This will now remove excess traces internally, but \
        will copy the stream and work on the copy, leaving your input stream \
        untouched.
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type
    :type threshold_type: str
    :param threshold_type: The type of threshold to be used, can be MAD, \
        absolute or av_chan_corr.    MAD threshold is calculated as the \
        threshold*(median(abs(cccsum))) where cccsum is the cross-correlation \
        sum for a given template. absolute threhsold is a true absolute \
        threshold based on the cccsum value av_chan_corr is based on the mean \
        values of single-channel cross-correlations assuming all data are \
        present as required for the template, \
        e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where \
        template is a single template from the input and the length is the \
        number of channels within this template.
    :type trig_int: float
    :param trig_int: Minimum gap between detections in seconds.
    :type plotvar: bool
    :param plotvar: Turn plotting on or off
    :type plotdir: str
    :param plotdir: Path to plotting folder, plots will be output here, \
        defaults to run location.
    :type tempdir: str
    :param tempdir: Directory to put temporary files, or False
    :type cores: int
    :param cores: Number of cores to use
    :type debug: int
    :param debug: Debug output level, the bigger the number, the more the \
        output.
    :type plot_format: str
    :param plot_format: Specify format of output plots if saved
    :type output_cat: bool
    :param output_cat: Specifies if matched_filter will output an \
        obspy.Catalog class containing events for each detection. Default \
        is False, in which case matched_filter will output a list of \
        detection classes, as normal.
    :type extract_detections: bool
    :param extract_detections: Specifies whether or not to return a list of \
        streams, one stream per detection.
    :type arg_check: bool
    :param arg_check: Check arguments, defaults to True, but if running in \
        bulk, and you are certain of your arguments, then set to False.

    :return: :class: 'DETECTIONS' detections for each channel formatted as \
        :class: 'obspy.UTCDateTime' objects.
    :return: :class: obspy.Catalog containing events for each detection.
    :return: list of :class: obspy.Stream objects for each detection.

    .. note:: Plotting within the match-filter routine uses the Agg backend \
        with interactive plotting turned off.  This is because the function \
        is designed to work in bulk.  If you wish to turn interactive \
        plotting on you must import matplotlib in your script first, when you \
        them import match_filter you will get the warning that this call to \
        matplotlib has no effect, which will mean that match_filter has not \
        changed the plotting behaviour.

    .. note:: The output_cat flag will create an :class: obspy.Catalog \
        containing one event for each :class: 'DETECTIONS' generated by \
        match_filter. Each event will contain a number of comments dealing \
        with correlation values and channels used for the detection. Each \
        channel used for the detection will have a corresponding :class: Pick \
        which will contain time and waveform information. HOWEVER, the user \
        should note that, at present, the pick times do not account for the \
        prepick times inherent in each template. For example, if a template \
        trace starts 0.1 seconds before the actual arrival of that phase, \
        then the pick time generated by match_filter for that phase will be \
        0.1 seconds early. We are looking towards a solution which will \
        involve saving templates alongside associated metadata.
    """
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    plt.ioff()
    import copy
    from eqcorrscan.utils import plotting
    from eqcorrscan.utils import findpeaks
    from obspy import Trace, Catalog, UTCDateTime, Stream
    from obspy.core.event import Event, Pick, CreationInfo, ResourceIdentifier
    from obspy.core.event import Comment, WaveformStreamID
    import time

    if arg_check:
        # Check the arguments to be nice - if arguments wrong type the parallel
        # output for the error won't be useful
        if not type(template_names) == list:
            raise IOError('template_names must be of type: list')
        if not type(template_list) == list:
            raise IOError('templates must be of type: list')
        for template in template_list:
            if not type(template) == Stream:
                msg = 'template in template_list must be of type: ' +\
                      'obspy.core.stream.Stream'
                raise IOError(msg)
        if not type(st) == Stream:
            msg = 'st must be of type: obspy.core.stream.Stream'
            raise IOError(msg)
        if str(threshold_type) not in [str('MAD'), str('absolute'),
                                       str('av_chan_corr')]:
            msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr'
            raise IOError(msg)

    # Copy the stream here because we will muck about with it
    stream = st.copy()
    templates = copy.deepcopy(template_list)
    # Debug option to confirm that the channel names match those in the
    # templates
    if debug >= 2:
        template_stachan = []
        data_stachan = []
        for template in templates:
            for tr in template:
                template_stachan.append(tr.stats.station + '.' +
                                        tr.stats.channel)
        for tr in stream:
            data_stachan.append(tr.stats.station + '.' + tr.stats.channel)
        template_stachan = list(set(template_stachan))
        data_stachan = list(set(data_stachan))
        if debug >= 3:
            print('I have template info for these stations:')
            print(template_stachan)
            print('I have daylong data for these stations:')
            print(data_stachan)
    # Perform a check that the daylong vectors are all the same length
    min_start_time = min([tr.stats.starttime for tr in stream])
    max_end_time = max([tr.stats.endtime for tr in stream])
    longest_trace_length = stream[0].stats.sampling_rate * (max_end_time -
                                                            min_start_time)
    for tr in stream:
        if not tr.stats.npts == longest_trace_length:
            msg = 'Data are not equal length, padding short traces'
            warnings.warn(msg)
            start_pad = np.zeros(int(tr.stats.sampling_rate *
                                     (tr.stats.starttime - min_start_time)))
            end_pad = np.zeros(int(tr.stats.sampling_rate *
                                   (max_end_time - tr.stats.endtime)))
            tr.data = np.concatenate([start_pad, tr.data, end_pad])
    # Perform check that all template lengths are internally consistent
    for i, temp in enumerate(template_list):
        if len(set([tr.stats.npts for tr in temp])) > 1:
            msg = 'Template %s contains traces of differing length!! THIS \
                  WILL CAUSE ISSUES' % template_names[i]
            raise ValueError(msg)
    # Call the _template_loop function to do all the correlation work
    outtic = time.clock()
    # Edit here from previous, stable, but slow match_filter
    # Would be worth testing without an if statement, but with every station in
    # the possible template stations having data, but for those without real
    # data make the data NaN to return NaN ccc_sum
    # Note: this works
    if debug >= 2:
        print('Ensuring all template channels have matches in long data')
    template_stachan = []
    for template in templates:
        for tr in template:
            template_stachan += [(tr.stats.station, tr.stats.channel)]
    template_stachan = list(set(template_stachan))
    # Copy this here to keep it safe
    for stachan in template_stachan:
        if not stream.select(station=stachan[0], channel=stachan[1]):
            # Remove template traces rather than adding NaN data
            for template in templates:
                if template.select(station=stachan[0], channel=stachan[1]):
                    for tr in template.select(station=stachan[0],
                                              channel=stachan[1]):
                        template.remove(tr)
    # Remove un-needed channels
    for tr in stream:
        if not (tr.stats.station, tr.stats.channel) in template_stachan:
            stream.remove(tr)
    # Also pad out templates to have all channels
    for template, template_name in zip(templates, template_names):
        if len(template) == 0:
            msg = ('No channels matching in continuous data for ' +
                   'template' + template_name)
            warnings.warn(msg)
            templates.remove(template)
            template_names.remove(template_name)
            continue
        for stachan in template_stachan:
            if not template.select(station=stachan[0], channel=stachan[1]):
                nulltrace = Trace()
                nulltrace.stats.station = stachan[0]
                nulltrace.stats.channel = stachan[1]
                nulltrace.stats.sampling_rate = template[0].stats.sampling_rate
                nulltrace.stats.starttime = template[0].stats.starttime
                nulltrace.data = np.array([np.NaN] * len(template[0].data),
                                          dtype=np.float32)
                template += nulltrace
    if debug >= 2:
        print('Starting the correlation run for this day')
    [cccsums, no_chans, chans] = _channel_loop(templates=templates,
                                               stream=stream,
                                               cores=cores,
                                               debug=debug)
    if len(cccsums[0]) == 0:
        raise ValueError('Correlation has not run, zero length cccsum')
    outtoc = time.clock()
    print(' '.join(['Looping over templates and streams took:',
                    str(outtoc - outtic), 's']))
    if debug >= 2:
        print(' '.join(['The shape of the returned cccsums is:',
                        str(np.shape(cccsums))]))
        print(' '.join(['This is from', str(len(templates)), 'templates']))
        print(' '.join(['Correlated with', str(len(stream)),
                        'channels of data']))
    detections = []
    if output_cat:
        det_cat = Catalog()
    for i, cccsum in enumerate(cccsums):
        template = templates[i]
        if str(threshold_type) == str('MAD'):
            rawthresh = threshold * np.median(np.abs(cccsum))
        elif str(threshold_type) == str('absolute'):
            rawthresh = threshold
        elif str(threshold_type) == str('av_chan_corr'):
            rawthresh = threshold * no_chans[i]
        # Findpeaks returns a list of tuples in the form [(cccsum, sample)]
        print(' '.join(['Threshold is set at:', str(rawthresh)]))
        print(' '.join(['Max of data is:', str(max(cccsum))]))
        print(' '.join(['Mean of data is:', str(np.mean(cccsum))]))
        if np.abs(np.mean(cccsum)) > 0.05:
            warnings.warn('Mean is not zero!  Check this!')
        # Set up a trace object for the cccsum as this is easier to plot and
        # maintains timing
        if plotvar:
            stream_plot = copy.deepcopy(stream[0])
            # Downsample for plotting
            stream_plot.decimate(int(stream[0].stats.sampling_rate / 10))
            cccsum_plot = Trace(cccsum)
            cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate
            # Resample here to maintain shape better
            cccsum_hist = cccsum_plot.copy()
            cccsum_hist = cccsum_hist.decimate(int(stream[0].stats.
                                                   sampling_rate / 10)).data
            cccsum_plot = plotting.chunk_data(cccsum_plot, 10,
                                              'Maxabs').data
            # Enforce same length
            stream_plot.data = stream_plot.data[0:len(cccsum_plot)]
            cccsum_plot = cccsum_plot[0:len(stream_plot.data)]
            cccsum_hist = cccsum_hist[0:len(stream_plot.data)]
            plotting.triple_plot(cccsum_plot, cccsum_hist,
                                 stream_plot, rawthresh, True,
                                 plotdir + '/cccsum_plot_' +
                                 template_names[i] + '_' +
                                 stream[0].stats.starttime.
                                 datetime.strftime('%Y-%m-%d') +
                                 '.' + plot_format)
            if debug >= 4:
                print(' '.join(['Saved the cccsum to:', template_names[i],
                                stream[0].stats.starttime.datetime.
                                strftime('%Y%j')]))
                np.save(template_names[i] +
                        stream[0].stats.starttime.datetime.strftime('%Y%j'),
                        cccsum)
        tic = time.clock()
        if debug >= 4:
            np.save('cccsum_' + str(i) + '.npy', cccsum)
        if debug >= 3 and max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
                                                trig_int * stream[0].stats.
                                                sampling_rate, debug,
                                                stream[0].stats.starttime,
                                                stream[0].stats.sampling_rate)
        elif max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
                                                trig_int * stream[0].stats.
                                                sampling_rate, debug)
        else:
            print('No peaks found above threshold')
            peaks = False
        toc = time.clock()
        if debug >= 1:
            print(' '.join(['Finding peaks took:', str(toc - tic), 's']))
        if peaks:
            for peak in peaks:
                detecttime = stream[0].stats.starttime +\
                    peak[1] / stream[0].stats.sampling_rate
                # Detect time must be valid QuakeML uri within resource_id.
                # This will write a formatted string which is still
                # readable by UTCDateTime
                rid = ResourceIdentifier(id=template_names[i] + '_' +
                                         str(detecttime.
                                             strftime('%Y%m%dT%H%M%S.%f')),
                                         prefix='smi:local')
                ev = Event(resource_id=rid)
                cr_i = CreationInfo(author='EQcorrscan',
                                    creation_time=UTCDateTime())
                ev.creation_info = cr_i
                # All detection info in Comments for lack of a better idea
                thresh_str = 'threshold=' + str(rawthresh)
                ccc_str = 'detect_val=' + str(peak[0])
                used_chans = 'channels used: ' +\
                             ' '.join([str(pair) for pair in chans[i]])
                ev.comments.append(Comment(text=thresh_str))
                ev.comments.append(Comment(text=ccc_str))
                ev.comments.append(Comment(text=used_chans))
                min_template_tm = min([tr.stats.starttime for tr in template])
                for tr in template:
                    if (tr.stats.station, tr.stats.channel) not in chans[i]:
                        continue
                    else:
                        pick_tm = detecttime + (tr.stats.starttime -
                                                min_template_tm)
                        wv_id = WaveformStreamID(network_code=tr.stats.network,
                                                 station_code=tr.stats.station,
                                                 channel_code=tr.stats.channel)
                        ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id))
                detections.append(DETECTION(template_names[i],
                                            detecttime,
                                            no_chans[i], peak[0], rawthresh,
                                            'corr', chans[i], event=ev))
                if output_cat:
                    det_cat.append(ev)
        if extract_detections:
            detection_streams = extract_from_stream(stream, detections)
    del stream, templates
    if output_cat and not extract_detections:
        return detections, det_cat
    elif not extract_detections:
        return detections
    elif extract_detections and not output_cat:
        return detections, detection_streams
    else:
        return detections, det_cat, detection_streams

Example #21

Show file

File: match_filter.py Project: gitter-badger/EQcorrscan

def match_filter(
    template_names, templates, stream, threshold, threshold_type, trig_int, plotvar, cores=1, tempdir=False, debug=0
):
    """
    Over-arching code to run the correlations of given templates with a day of
    seismic data and output the detections based on a given threshold.

    :type templates: list :class: 'obspy.Stream'
    :param templates: A list of templates of which each template is a Stream of\
        obspy traces containing seismic data and header information.
    :type stream: :class: 'obspy.Stream'
    :param stream: An obspy.Stream object containing all the data available and\
        required for the correlations with templates given.  For efficiency this\
        should contain no excess traces which are not in one or more of the\
        templates.
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type
    :type threshold_type: str
    :param threshold_type: The type of threshold to be used, can be MAD,\
        absolute or av_chan_corr.    MAD threshold is calculated as the\
        threshold*(median(abs(cccsum))) where cccsum is the cross-correlation\
        sum for a given template. absolute threhsold is a true absolute\
        threshold based on the cccsum value av_chan_corr is based on the mean\
        values of single-channel cross-correlations assuming all data are\
        present as required for the template, \
        e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where\
        template is a single template from the input and the length is the\
        number of channels within this template.
    :type trig_int: float
    :param trig_int: Minimum gap between detections in seconds.
    :type tempdir: String or False
    :param tempdir: Directory to put temporary files, or False
    :type cores: int
    :param cores: Number of cores to use
    :type debug: int
    :param debug: Debug output level, the bigger the number, the more the output

    :return: :class: 'DETECTIONS' detections for each channel formatted as\
    :class: 'obspy.UTCDateTime' objects.

    """
    from eqcorrscan.utils import findpeaks, EQcorrscan_plotting
    import time, copy
    from obspy import Trace

    match_internal = False  # Set to True if memory is an issue, if True, will only
    # use about the same amount of memory as the seismic dat
    # take up.  If False, it will use 20-100GB per instance
    # Debug option to confirm that the channel names match those in the templates
    if debug >= 2:
        template_stachan = []
        data_stachan = []
        for template in templates:
            for tr in template:
                template_stachan.append(tr.stats.station + "." + tr.stats.channel)
        for tr in stream:
            data_stachan.append(tr.stats.station + "." + tr.stats.channel)
        template_stachan = list(set(template_stachan))
        data_stachan = list(set(data_stachan))
        if debug >= 3:
            print "I have template info for these stations:"
            print template_stachan
            print "I have daylong data for these stations:"
            print data_stachan
    # Perform a check that the daylong vectors are daylong
    for tr in stream:
        if not tr.stats.sampling_rate * 86400 == tr.stats.npts:
            raise ValueError("Data are not daylong for " + tr.stats.station + "." + tr.stats.channel)
    # Call the _template_loop function to do all the correlation work
    outtic = time.clock()
    # Edit here from previous, stable, but slow match_filter
    # Would be worth testing without an if statement, but with every station in
    # the possible template stations having data, but for those without real
    # data make the data NaN to return NaN ccc_sum
    if debug >= 2:
        print "Ensuring all template channels have matches in daylong data"
    template_stachan = []
    for template in templates:
        for tr in template:
            template_stachan += [(tr.stats.station, tr.stats.channel)]
    template_stachan = list(set(template_stachan))
    # Copy this here to keep it safe
    for stachan in template_stachan:
        if not stream.select(station=stachan[0], channel=stachan[1]):
            # Add a trace of NaN's
            nulltrace = Trace()
            nulltrace.stats.station = stachan[0]
            nulltrace.stats.channel = stachan[1]
            nulltrace.stats.sampling_rate = stream[0].stats.sampling_rate
            nulltrace.stats.starttime = stream[0].stats.starttime
            nulltrace.data = np.array([np.NaN] * len(stream[0].data), dtype=np.float32)
            stream += nulltrace
    # Also pad out templates to have all channels
    for template in templates:
        for stachan in template_stachan:
            if not template.select(station=stachan[0], channel=stachan[1]):
                nulltrace = Trace()
                nulltrace.stats.station = stachan[0]
                nulltrace.stats.channel = stachan[1]
                nulltrace.stats.sampling_rate = template[0].stats.sampling_rate
                nulltrace.stats.starttime = template[0].stats.starttime
                nulltrace.data = np.array([np.NaN] * len(template[0].data), dtype=np.float32)
                template += nulltrace

    if debug >= 2:
        print "Starting the correlation run for this day"
    if match_internal:
        [cccsums, no_chans] = run_channel_loop(templates, stream, tempdir)
    else:
        [cccsums, no_chans] = _channel_loop(templates, stream, cores, debug)
    if len(cccsums[0]) == 0:
        raise ValueError("Correlation has not run, zero length cccsum")
    outtoc = time.clock()
    print "Looping over templates and streams took: " + str(outtoc - outtic) + " s"
    if debug >= 2:
        print "The shape of the returned cccsums is: " + str(np.shape(cccsums))
        print "This is from " + str(len(templates)) + " templates"
        print "Correlated with " + str(len(stream)) + " channels of data"
    i = 0
    detections = []
    for cccsum in cccsums:
        template = templates[i]
        if threshold_type == "MAD":
            rawthresh = threshold * np.median(np.abs(cccsum))
        elif threshold_type == "absolute":
            rawthresh = threshold
        elif threshold == "av_chan_corr":
            rawthresh = threshold * (cccsum / len(template))
        else:
            print "You have not selected the correct threshold type, I will use MAD as I like it"
            rawthresh = threshold * np.mean(np.abs(cccsum))
        # Findpeaks returns a list of tuples in the form [(cccsum, sample)]
        print "Threshold is set at: " + str(rawthresh)
        print "Max of data is: " + str(max(cccsum))
        print "Mean of data is: " + str(np.mean(cccsum))
        if np.abs(np.mean(cccsum)) > 0.05:
            warnings.warn("Mean is not zero!  Check this!")
        # Set up a trace object for the cccsum as this is easier to plot and
        # maintins timing
        if plotvar:
            stream_plot = copy.deepcopy(stream[0])
            # Downsample for plotting
            stream_plot.decimate(int(stream[0].stats.sampling_rate / 20))
            cccsum_plot = Trace(cccsum)
            cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate
            # Resample here to maintain shape better
            cccsum_hist = cccsum_plot.copy()
            cccsum_hist = cccsum_hist.decimate(int(stream[0].stats.sampling_rate / 20)).data
            cccsum_plot = EQcorrscan_plotting.chunk_data(cccsum_plot, 20, "Maxabs").data
            # Enforce same length
            stream_plot.data = stream_plot.data[0 : len(cccsum_plot)]
            cccsum_plot = cccsum_plot[0 : len(stream_plot.data)]
            cccsum_hist = cccsum_hist[0 : len(stream_plot.data)]
            EQcorrscan_plotting.triple_plot(
                cccsum_plot,
                cccsum_hist,
                stream_plot,
                rawthresh,
                True,
                "plot/cccsum_plot_"
                + template_names[i]
                + "_"
                + str(stream[0].stats.starttime.year)
                + "-"
                + str(stream[0].stats.starttime.month)
                + "-"
                + str(stream[0].stats.starttime.day)
                + ".jpg",
            )
            np.save(template_names[i] + stream[0].stats.starttime.datetime.strftime("%Y%j"), cccsum)
        tic = time.clock()
        if debug >= 4:
            np.save("cccsum_" + str(i) + ".npy", cccsum)
        if debug >= 3 and max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(
                cccsum,
                rawthresh,
                trig_int * stream[0].stats.sampling_rate,
                debug,
                stream[0].stats.starttime,
                stream[0].stats.sampling_rate,
            )
        elif max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh, trig_int * stream[0].stats.sampling_rate, debug)
        else:
            print "No peaks found above threshold"
            peaks = False
        toc = time.clock()
        if debug >= 1:
            print "Finding peaks took: " + str(toc - tic) + " s"
        if peaks:
            for peak in peaks:
                detecttime = stream[0].stats.starttime + peak[1] / stream[0].stats.sampling_rate
                detections.append(DETECTION(template_names[i], detecttime, no_chans[i], peak[0], rawthresh, "corr"))
        i += 1

    return detections

Example #22

Show file

def _detect(detector,
            st,
            threshold,
            trig_int,
            moveout=0,
            min_trig=0,
            process=True,
            extract_detections=False,
            debug=0):
    """
    Detect within continuous data using the subspace method.

    Not to be called directly, use the detector.detect method.

    :type detector: eqcorrscan.core.subspace.Detector
    :param detector: Detector to use.
    :type st: obspy.core.stream.Stream
    :param st: Un-processed stream to detect within using the subspace \
        detector
    :type threshold: float
    :param threshold: Threshold value for detections between 0-1
    :type trig_int: float
    :param trig_int: Minimum trigger interval in seconds.
    :type moveout: float
    :param moveout: Maximum allowable moveout window for non-multiplexed,
        network detection.  See note.
    :type min_trig: int
    :param min_trig: Minimum number of stations exceeding threshold for \
        non-multiplexed, network detection. See note.
    :type process: bool
    :param process: Whether or not to process the stream according to the \
        parameters defined by the detector.  Default is to process the \
        data (True).
    :type extract_detections: bool
    :param extract_detections: Whether to extract waveforms for each \
        detection or not, if true will return detections and streams.
    :type debug: int
    :param debug: Debug output level from 0-5.

    :return: list of detections
    :rtype: list of eqcorrscan.core.match_filter.DETECTION
    """
    from eqcorrscan.core import subspace_statistic
    detections = []
    # First process the stream
    if process:
        if debug > 0:
            print('Processing Stream')
        stream, stachans = _subspace_process(
            streams=[st.copy()],
            lowcut=detector.lowcut,
            highcut=detector.highcut,
            filt_order=detector.filt_order,
            sampling_rate=detector.sampling_rate,
            multiplex=detector.multiplex,
            stachans=detector.stachans,
            parallel=True,
            align=False,
            shift_len=None,
            reject=False)
    else:
        # Check the sampling rate at the very least
        for tr in st:
            if not tr.stats.sampling_rate == detector.sampling_rate:
                raise ValueError('Sampling rates do not match.')
        stream = [st]
        stachans = detector.stachans
    outtic = time.clock()
    if debug > 0:
        print('Computing detection statistics')
    stats = np.zeros(
        (len(stream[0]), len(stream[0][0]) - len(detector.data[0][0]) + 1),
        dtype=np.float32)
    for det_channel, in_channel, i in zip(detector.data, stream[0],
                                          np.arange(len(stream[0]))):
        stats[i] = subspace_statistic.\
            det_statistic(detector=det_channel.astype(np.float32),
                          data=in_channel.data.astype(np.float32))
        if debug > 0:
            print(stats[i].shape)
        if debug > 3:
            plt.plot(stats[i])
            plt.show()
        # Hard typing in Cython loop requires float32 type.
    # statistics
    if detector.multiplex:
        trig_int_samples = (len(detector.stachans) * detector.sampling_rate *
                            trig_int)
    else:
        trig_int_samples = detector.sampling_rate * trig_int
    if debug > 0:
        print('Finding peaks')
    peaks = []
    for i in range(len(stream[0])):
        peaks.append(
            findpeaks.find_peaks2_short(arr=stats[i],
                                        thresh=threshold,
                                        trig_int=trig_int_samples,
                                        debug=debug))
    if not detector.multiplex:
        # Conduct network coincidence triggering
        peaks = findpeaks.coin_trig(peaks=peaks,
                                    samp_rate=detector.sampling_rate,
                                    moveout=moveout,
                                    min_trig=min_trig,
                                    stachans=stachans,
                                    trig_int=trig_int)
    else:
        peaks = peaks[0]
    if len(peaks) > 0:
        for peak in peaks:
            if detector.multiplex:
                detecttime = st[0].stats.starttime + (
                    peak[1] /
                    (detector.sampling_rate * len(detector.stachans)))
            else:
                detecttime = st[0].stats.starttime + (peak[1] /
                                                      detector.sampling_rate)
            rid = ResourceIdentifier(id=detector.name + '_' + str(detecttime),
                                     prefix='smi:local')
            ev = Event(resource_id=rid)
            cr_i = CreationInfo(author='EQcorrscan',
                                creation_time=UTCDateTime())
            ev.creation_info = cr_i
            # All detection info in Comments for lack of a better idea
            thresh_str = 'threshold=' + str(threshold)
            ccc_str = 'detect_val=' + str(peak[0])
            used_chans = 'channels used: ' +\
                ' '.join([str(pair) for pair in detector.stachans])
            ev.comments.append(Comment(text=thresh_str))
            ev.comments.append(Comment(text=ccc_str))
            ev.comments.append(Comment(text=used_chans))
            for stachan in detector.stachans:
                tr = st.select(station=stachan[0], channel=stachan[1])
                if tr:
                    net_code = tr[0].stats.network
                else:
                    net_code = ''
                pick_tm = detecttime
                wv_id = WaveformStreamID(network_code=net_code,
                                         station_code=stachan[0],
                                         channel_code=stachan[1])
                ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id))
            detections.append(
                DETECTION(detector.name,
                          detecttime,
                          len(detector.stachans),
                          peak[0],
                          threshold,
                          'subspace',
                          detector.stachans,
                          event=ev))
    outtoc = time.clock()
    print('Detection took %s seconds' % str(outtoc - outtic))
    if extract_detections:
        detection_streams = extract_from_stream(st, detections)
        return detections, detection_streams
    return detections