Exemple #1
0
 def test_chunk_data(self):
     desired_rate = 10
     trout = chunk_data(self.st[0], samp_rate=desired_rate, state="Mean")
     assert trout.stats.sampling_rate == desired_rate
     assert np.allclose(trout.data.mean(), self.st[0].data.mean(),
                        atol=.00000000001)
     trout = chunk_data(self.st[0], samp_rate=desired_rate, state="Min")
     assert trout.data.mean() < self.st[0].data.mean()
     assert trout.stats.sampling_rate == desired_rate
     trout = chunk_data(self.st[0], samp_rate=desired_rate, state="Max")
     assert trout.stats.sampling_rate == desired_rate
     assert trout.data.mean() > self.st[0].data.mean()
     trout = chunk_data(self.st[0], samp_rate=desired_rate, state="Maxabs")
     assert trout.stats.sampling_rate == desired_rate
Exemple #2
0
def _match_filter_plot(stream, cccsum, template_names, rawthresh, plotdir,
                       plot_format, i):
    """
    Plotting function to match_filter.

    :param stream:
    :param cccsum:
    :param template_names:
    :param rawthresh:
    :param plotdir:
    :param plot_format:
    :param i:
    :param debug:
    :return:
    """
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    plt.ioff()
    from eqcorrscan.utils import plotting
    stream_plot = copy.deepcopy(stream[0])
    # Downsample for plotting
    stream_plot.decimate(int(stream[0].stats.sampling_rate / 10))
    cccsum_plot = Trace(cccsum)
    cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate
    # Resample here to maintain shape better
    cccsum_hist = cccsum_plot.copy()
    cccsum_hist = cccsum_hist.decimate(int(stream[0].stats.
                                           sampling_rate / 10)).data
    cccsum_plot = plotting.chunk_data(cccsum_plot, 10, 'Maxabs').data
    # Enforce same length
    stream_plot.data = stream_plot.data[0:len(cccsum_plot)]
    cccsum_plot = cccsum_plot[0:len(stream_plot.data)]
    cccsum_hist = cccsum_hist[0:len(stream_plot.data)]
    plot_name = (plotdir + os.sep + 'cccsum_plot_' + template_names[i] + '_' +
                 stream[0].stats.starttime.datetime.strftime('%Y-%m-%d') +
                 '.' + plot_format)
    plotting.triple_plot(cccsum=cccsum_plot, cccsum_hist=cccsum_hist,
                         trace=stream_plot, threshold=rawthresh, save=True,
                         savefile=plot_name)
Exemple #3
0
def match_filter(template_names,
                 template_list,
                 st,
                 threshold,
                 threshold_type,
                 trig_int,
                 plotvar,
                 plotdir='.',
                 cores=1,
                 tempdir=False,
                 debug=0,
                 plot_format='jpg'):
    r"""Over-arching code to run the correlations of given templates with a \
    day of seismic data and output the detections based on a given threshold.

    :type template_names: list
    :param template_names: List of template names in the same order as \
        template_list
    :type template_list: list :class: 'obspy.Stream'
    :param template_list: A list of templates of which each template is a \
        Stream of obspy traces containing seismic data and header information.
    :type st: :class: 'obspy.Stream'
    :param st: An obspy.Stream object containing all the data available and \
        required for the correlations with templates given.  For efficiency \
        this should contain no excess traces which are not in one or more of \
        the templates.  This will now remove excess traces internally, but \
        will copy the stream and work on the copy, leaving your input stream \
        untouched.
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type
    :type threshold_type: str
    :param threshold_type: The type of threshold to be used, can be MAD, \
        absolute or av_chan_corr.    MAD threshold is calculated as the \
        threshold*(median(abs(cccsum))) where cccsum is the cross-correlation \
        sum for a given template. absolute threhsold is a true absolute \
        threshold based on the cccsum value av_chan_corr is based on the mean \
        values of single-channel cross-correlations assuming all data are \
        present as required for the template, \
        e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where \
        template is a single template from the input and the length is the \
        number of channels within this template.
    :type trig_int: float
    :param trig_int: Minimum gap between detections in seconds.
    :type plotvar: bool
    :param plotvar: Turn plotting on or off
    :type plotdir: str
    :param plotdir: Path to plotting folder, plots will be output here, \
        defaults to run location.
    :type tempdir: String or False
    :param tempdir: Directory to put temporary files, or False
    :type cores: int
    :param cores: Number of cores to use
    :type debug: int
    :param debug: Debug output level, the bigger the number, the more the \
        output.

    :return: :class: 'DETECTIONS' detections for each channel formatted as \
        :class: 'obspy.UTCDateTime' objects.

    .. note:: Plotting within the match-filter routine uses the Agg backend \
        with interactive plotting turned off.  This is because the function \
        is designed to work in bulk.  If you wish to turn interactive \
        plotting on you must import matplotlib in your script first, when you \
        them import match_filter you will get the warning that this call to \
        matplotlib has no effect, which will mean that match_filter has not \
        changed the plotting behaviour.
    """
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    plt.ioff()
    import copy
    from eqcorrscan.utils import plotting
    from eqcorrscan.utils import findpeaks
    from obspy import Trace
    import time

    # Copy the stream here because we will muck about with it
    stream = st.copy()
    templates = copy.deepcopy(template_list)
    # Debug option to confirm that the channel names match those in the
    # templates
    if debug >= 2:
        template_stachan = []
        data_stachan = []
        for template in templates:
            for tr in template:
                template_stachan.append(tr.stats.station + '.' +
                                        tr.stats.channel)
        for tr in stream:
            data_stachan.append(tr.stats.station + '.' + tr.stats.channel)
        template_stachan = list(set(template_stachan))
        data_stachan = list(set(data_stachan))
        if debug >= 3:
            print('I have template info for these stations:')
            print(template_stachan)
            print('I have daylong data for these stations:')
            print(data_stachan)
    # Perform a check that the daylong vectors are daylong
    for tr in stream:
        if not tr.stats.sampling_rate * 86400 == tr.stats.npts:
            msg = ' '.join([
                'Data are not daylong for', tr.stats.station, tr.stats.channel
            ])
            raise ValueError(msg)
    # Call the _template_loop function to do all the correlation work
    outtic = time.clock()
    # Edit here from previous, stable, but slow match_filter
    # Would be worth testing without an if statement, but with every station in
    # the possible template stations having data, but for those without real
    # data make the data NaN to return NaN ccc_sum
    # Note: this works
    if debug >= 2:
        print('Ensuring all template channels have matches in daylong data')
    template_stachan = []
    for template in templates:
        for tr in template:
            template_stachan += [(tr.stats.station, tr.stats.channel)]
    template_stachan = list(set(template_stachan))
    # Copy this here to keep it safe
    for stachan in template_stachan:
        if not stream.select(station=stachan[0], channel=stachan[1]):
            # Remove template traces rather than adding NaN data
            for template in templates:
                if template.select(station=stachan[0], channel=stachan[1]):
                    for tr in template.select(station=stachan[0],
                                              channel=stachan[1]):
                        template.remove(tr)
    # Remove un-needed channels
    for tr in stream:
        if not (tr.stats.station, tr.stats.channel) in template_stachan:
            stream.remove(tr)
    # Also pad out templates to have all channels
    for template, template_name in zip(templates, template_names):
        if len(template) == 0:
            msg = ('No channels matching in continuous data for ' +
                   'template' + template_name)
            warnings.warn(msg)
            templates.remove(template)
            template_names.remove(template_name)
            continue
        for stachan in template_stachan:
            if not template.select(station=stachan[0], channel=stachan[1]):
                nulltrace = Trace()
                nulltrace.stats.station = stachan[0]
                nulltrace.stats.channel = stachan[1]
                nulltrace.stats.sampling_rate = template[0].stats.sampling_rate
                nulltrace.stats.starttime = template[0].stats.starttime
                nulltrace.data = np.array([np.NaN] * len(template[0].data),
                                          dtype=np.float32)
                template += nulltrace
    if debug >= 2:
        print('Starting the correlation run for this day')
    [cccsums, no_chans, chans] = _channel_loop(templates, stream, cores, debug)
    if len(cccsums[0]) == 0:
        raise ValueError('Correlation has not run, zero length cccsum')
    outtoc = time.clock()
    print(' '.join([
        'Looping over templates and streams took:',
        str(outtoc - outtic), 's'
    ]))
    if debug >= 2:
        print(' '.join(
            ['The shape of the returned cccsums is:',
             str(np.shape(cccsums))]))
        print(' '.join(['This is from', str(len(templates)), 'templates']))
        print(' '.join(
            ['Correlated with',
             str(len(stream)), 'channels of data']))
    detections = []
    for i, cccsum in enumerate(cccsums):
        template = templates[i]
        if threshold_type == 'MAD':
            rawthresh = threshold * np.median(np.abs(cccsum))
        elif threshold_type == 'absolute':
            rawthresh = threshold
        elif threshold == 'av_chan_corr':
            rawthresh = threshold * (cccsum / len(template))
        else:
            print('You have not selected the correct threshold type, I will' +
                  'use MAD as I like it')
            rawthresh = threshold * np.mean(np.abs(cccsum))
        # Findpeaks returns a list of tuples in the form [(cccsum, sample)]
        print(' '.join(['Threshold is set at:', str(rawthresh)]))
        print(' '.join(['Max of data is:', str(max(cccsum))]))
        print(' '.join(['Mean of data is:', str(np.mean(cccsum))]))
        if np.abs(np.mean(cccsum)) > 0.05:
            warnings.warn('Mean is not zero!  Check this!')
        # Set up a trace object for the cccsum as this is easier to plot and
        # maintins timing
        if plotvar:
            stream_plot = copy.deepcopy(stream[0])
            # Downsample for plotting
            stream_plot.decimate(int(stream[0].stats.sampling_rate / 10))
            cccsum_plot = Trace(cccsum)
            cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate
            # Resample here to maintain shape better
            cccsum_hist = cccsum_plot.copy()
            cccsum_hist = cccsum_hist.decimate(
                int(stream[0].stats.sampling_rate / 10)).data
            cccsum_plot = plotting.chunk_data(cccsum_plot, 10, 'Maxabs').data
            # Enforce same length
            stream_plot.data = stream_plot.data[0:len(cccsum_plot)]
            cccsum_plot = cccsum_plot[0:len(stream_plot.data)]
            cccsum_hist = cccsum_hist[0:len(stream_plot.data)]
            plotting.triple_plot(
                cccsum_plot, cccsum_hist, stream_plot, rawthresh, True,
                plotdir + '/cccsum_plot_' + template_names[i] + '_' +
                stream[0].stats.starttime.datetime.strftime('%Y-%m-%d') + '.' +
                plot_format)
            if debug >= 4:
                print(' '.join([
                    'Saved the cccsum to:', template_names[i],
                    stream[0].stats.starttime.datetime.strftime('%Y%j')
                ]))
                np.save(
                    template_names[i] +
                    stream[0].stats.starttime.datetime.strftime('%Y%j'),
                    cccsum)
        tic = time.clock()
        if debug >= 4:
            np.save('cccsum_' + str(i) + '.npy', cccsum)
        if debug >= 3 and max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(
                cccsum, rawthresh, trig_int * stream[0].stats.sampling_rate,
                debug, stream[0].stats.starttime,
                stream[0].stats.sampling_rate)
        elif max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(
                cccsum, rawthresh, trig_int * stream[0].stats.sampling_rate,
                debug)
        else:
            print('No peaks found above threshold')
            peaks = False
        toc = time.clock()
        if debug >= 1:
            print(' '.join(['Finding peaks took:', str(toc - tic), 's']))
        if peaks:
            for peak in peaks:
                detecttime = stream[0].stats.starttime +\
                    peak[1] / stream[0].stats.sampling_rate
                detections.append(
                    DETECTION(template_names[i], detecttime, no_chans[i],
                              peak[0], rawthresh, 'corr', chans[i]))
    del stream, templates
    return detections
Exemple #4
0
def match_filter(template_names, template_list, st, threshold,
                 threshold_type, trig_int, plotvar, plotdir='.', cores=1,
                 tempdir=False, debug=0, plot_format='png',
                 output_cat=False, extract_detections=False,
                 arg_check=True):
    """
    Main matched-filter detection function.
    Over-arching code to run the correlations of given templates with a \
    day of seismic data and output the detections based on a given threshold.
    For a functional example see the tutorials.

    :type template_names: list
    :param template_names: List of template names in the same order as \
        template_list
    :type template_list: list
    :param template_list: A list of templates of which each template is a \
        Stream of obspy traces containing seismic data and header information.
    :type st: obspy.core.stream.Stream
    :param st: An obspy.Stream object containing all the data available and \
        required for the correlations with templates given.  For efficiency \
        this should contain no excess traces which are not in one or more of \
        the templates.  This will now remove excess traces internally, but \
        will copy the stream and work on the copy, leaving your input stream \
        untouched.
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type
    :type threshold_type: str
    :param threshold_type: The type of threshold to be used, can be MAD, \
        absolute or av_chan_corr.    MAD threshold is calculated as the \
        threshold*(median(abs(cccsum))) where cccsum is the cross-correlation \
        sum for a given template. absolute threhsold is a true absolute \
        threshold based on the cccsum value av_chan_corr is based on the mean \
        values of single-channel cross-correlations assuming all data are \
        present as required for the template, \
        e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where \
        template is a single template from the input and the length is the \
        number of channels within this template.
    :type trig_int: float
    :param trig_int: Minimum gap between detections in seconds.
    :type plotvar: bool
    :param plotvar: Turn plotting on or off
    :type plotdir: str
    :param plotdir: Path to plotting folder, plots will be output here, \
        defaults to run location.
    :type tempdir: str
    :param tempdir: Directory to put temporary files, or False
    :type cores: int
    :param cores: Number of cores to use
    :type debug: int
    :param debug: Debug output level, the bigger the number, the more the \
        output.
    :type plot_format: str
    :param plot_format: Specify format of output plots if saved
    :type output_cat: bool
    :param output_cat: Specifies if matched_filter will output an \
        obspy.Catalog class containing events for each detection. Default \
        is False, in which case matched_filter will output a list of \
        detection classes, as normal.
    :type extract_detections: bool
    :param extract_detections: Specifies whether or not to return a list of \
        streams, one stream per detection.
    :type arg_check: bool
    :param arg_check: Check arguments, defaults to True, but if running in \
        bulk, and you are certain of your arguments, then set to False.

    :return: :class: 'DETECTIONS' detections for each channel formatted as \
        :class: 'obspy.UTCDateTime' objects.
    :return: :class: obspy.Catalog containing events for each detection.
    :return: list of :class: obspy.Stream objects for each detection.

    .. note:: Plotting within the match-filter routine uses the Agg backend \
        with interactive plotting turned off.  This is because the function \
        is designed to work in bulk.  If you wish to turn interactive \
        plotting on you must import matplotlib in your script first, when you \
        them import match_filter you will get the warning that this call to \
        matplotlib has no effect, which will mean that match_filter has not \
        changed the plotting behaviour.

    .. note:: The output_cat flag will create an :class: obspy.Catalog \
        containing one event for each :class: 'DETECTIONS' generated by \
        match_filter. Each event will contain a number of comments dealing \
        with correlation values and channels used for the detection. Each \
        channel used for the detection will have a corresponding :class: Pick \
        which will contain time and waveform information. HOWEVER, the user \
        should note that, at present, the pick times do not account for the \
        prepick times inherent in each template. For example, if a template \
        trace starts 0.1 seconds before the actual arrival of that phase, \
        then the pick time generated by match_filter for that phase will be \
        0.1 seconds early. We are looking towards a solution which will \
        involve saving templates alongside associated metadata.
    """
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    plt.ioff()
    import copy
    from eqcorrscan.utils import plotting
    from eqcorrscan.utils import findpeaks
    from obspy import Trace, Catalog, UTCDateTime, Stream
    from obspy.core.event import Event, Pick, CreationInfo, ResourceIdentifier
    from obspy.core.event import Comment, WaveformStreamID
    import time

    if arg_check:
        # Check the arguments to be nice - if arguments wrong type the parallel
        # output for the error won't be useful
        if not type(template_names) == list:
            raise IOError('template_names must be of type: list')
        if not type(template_list) == list:
            raise IOError('templates must be of type: list')
        for template in template_list:
            if not type(template) == Stream:
                msg = 'template in template_list must be of type: ' +\
                      'obspy.core.stream.Stream'
                raise IOError(msg)
        if not type(st) == Stream:
            msg = 'st must be of type: obspy.core.stream.Stream'
            raise IOError(msg)
        if str(threshold_type) not in [str('MAD'), str('absolute'),
                                       str('av_chan_corr')]:
            msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr'
            raise IOError(msg)

    # Copy the stream here because we will muck about with it
    stream = st.copy()
    templates = copy.deepcopy(template_list)
    # Debug option to confirm that the channel names match those in the
    # templates
    if debug >= 2:
        template_stachan = []
        data_stachan = []
        for template in templates:
            for tr in template:
                template_stachan.append(tr.stats.station + '.' +
                                        tr.stats.channel)
        for tr in stream:
            data_stachan.append(tr.stats.station + '.' + tr.stats.channel)
        template_stachan = list(set(template_stachan))
        data_stachan = list(set(data_stachan))
        if debug >= 3:
            print('I have template info for these stations:')
            print(template_stachan)
            print('I have daylong data for these stations:')
            print(data_stachan)
    # Perform a check that the daylong vectors are all the same length
    min_start_time = min([tr.stats.starttime for tr in stream])
    max_end_time = max([tr.stats.endtime for tr in stream])
    longest_trace_length = stream[0].stats.sampling_rate * (max_end_time -
                                                            min_start_time)
    for tr in stream:
        if not tr.stats.npts == longest_trace_length:
            msg = 'Data are not equal length, padding short traces'
            warnings.warn(msg)
            start_pad = np.zeros(int(tr.stats.sampling_rate *
                                     (tr.stats.starttime - min_start_time)))
            end_pad = np.zeros(int(tr.stats.sampling_rate *
                                   (max_end_time - tr.stats.endtime)))
            tr.data = np.concatenate([start_pad, tr.data, end_pad])
    # Perform check that all template lengths are internally consistent
    for i, temp in enumerate(template_list):
        if len(set([tr.stats.npts for tr in temp])) > 1:
            msg = 'Template %s contains traces of differing length!! THIS \
                  WILL CAUSE ISSUES' % template_names[i]
            raise ValueError(msg)
    # Call the _template_loop function to do all the correlation work
    outtic = time.clock()
    # Edit here from previous, stable, but slow match_filter
    # Would be worth testing without an if statement, but with every station in
    # the possible template stations having data, but for those without real
    # data make the data NaN to return NaN ccc_sum
    # Note: this works
    if debug >= 2:
        print('Ensuring all template channels have matches in long data')
    template_stachan = []
    for template in templates:
        for tr in template:
            template_stachan += [(tr.stats.station, tr.stats.channel)]
    template_stachan = list(set(template_stachan))
    # Copy this here to keep it safe
    for stachan in template_stachan:
        if not stream.select(station=stachan[0], channel=stachan[1]):
            # Remove template traces rather than adding NaN data
            for template in templates:
                if template.select(station=stachan[0], channel=stachan[1]):
                    for tr in template.select(station=stachan[0],
                                              channel=stachan[1]):
                        template.remove(tr)
    # Remove un-needed channels
    for tr in stream:
        if not (tr.stats.station, tr.stats.channel) in template_stachan:
            stream.remove(tr)
    # Also pad out templates to have all channels
    for template, template_name in zip(templates, template_names):
        if len(template) == 0:
            msg = ('No channels matching in continuous data for ' +
                   'template' + template_name)
            warnings.warn(msg)
            templates.remove(template)
            template_names.remove(template_name)
            continue
        for stachan in template_stachan:
            if not template.select(station=stachan[0], channel=stachan[1]):
                nulltrace = Trace()
                nulltrace.stats.station = stachan[0]
                nulltrace.stats.channel = stachan[1]
                nulltrace.stats.sampling_rate = template[0].stats.sampling_rate
                nulltrace.stats.starttime = template[0].stats.starttime
                nulltrace.data = np.array([np.NaN] * len(template[0].data),
                                          dtype=np.float32)
                template += nulltrace
    if debug >= 2:
        print('Starting the correlation run for this day')
    [cccsums, no_chans, chans] = _channel_loop(templates=templates,
                                               stream=stream,
                                               cores=cores,
                                               debug=debug)
    if len(cccsums[0]) == 0:
        raise ValueError('Correlation has not run, zero length cccsum')
    outtoc = time.clock()
    print(' '.join(['Looping over templates and streams took:',
                    str(outtoc - outtic), 's']))
    if debug >= 2:
        print(' '.join(['The shape of the returned cccsums is:',
                        str(np.shape(cccsums))]))
        print(' '.join(['This is from', str(len(templates)), 'templates']))
        print(' '.join(['Correlated with', str(len(stream)),
                        'channels of data']))
    detections = []
    if output_cat:
        det_cat = Catalog()
    for i, cccsum in enumerate(cccsums):
        template = templates[i]
        if str(threshold_type) == str('MAD'):
            rawthresh = threshold * np.median(np.abs(cccsum))
        elif str(threshold_type) == str('absolute'):
            rawthresh = threshold
        elif str(threshold_type) == str('av_chan_corr'):
            rawthresh = threshold * no_chans[i]
        # Findpeaks returns a list of tuples in the form [(cccsum, sample)]
        print(' '.join(['Threshold is set at:', str(rawthresh)]))
        print(' '.join(['Max of data is:', str(max(cccsum))]))
        print(' '.join(['Mean of data is:', str(np.mean(cccsum))]))
        if np.abs(np.mean(cccsum)) > 0.05:
            warnings.warn('Mean is not zero!  Check this!')
        # Set up a trace object for the cccsum as this is easier to plot and
        # maintains timing
        if plotvar:
            stream_plot = copy.deepcopy(stream[0])
            # Downsample for plotting
            stream_plot.decimate(int(stream[0].stats.sampling_rate / 10))
            cccsum_plot = Trace(cccsum)
            cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate
            # Resample here to maintain shape better
            cccsum_hist = cccsum_plot.copy()
            cccsum_hist = cccsum_hist.decimate(int(stream[0].stats.
                                                   sampling_rate / 10)).data
            cccsum_plot = plotting.chunk_data(cccsum_plot, 10,
                                              'Maxabs').data
            # Enforce same length
            stream_plot.data = stream_plot.data[0:len(cccsum_plot)]
            cccsum_plot = cccsum_plot[0:len(stream_plot.data)]
            cccsum_hist = cccsum_hist[0:len(stream_plot.data)]
            plotting.triple_plot(cccsum_plot, cccsum_hist,
                                 stream_plot, rawthresh, True,
                                 plotdir + '/cccsum_plot_' +
                                 template_names[i] + '_' +
                                 stream[0].stats.starttime.
                                 datetime.strftime('%Y-%m-%d') +
                                 '.' + plot_format)
            if debug >= 4:
                print(' '.join(['Saved the cccsum to:', template_names[i],
                                stream[0].stats.starttime.datetime.
                                strftime('%Y%j')]))
                np.save(template_names[i] +
                        stream[0].stats.starttime.datetime.strftime('%Y%j'),
                        cccsum)
        tic = time.clock()
        if debug >= 4:
            np.save('cccsum_' + str(i) + '.npy', cccsum)
        if debug >= 3 and max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
                                                trig_int * stream[0].stats.
                                                sampling_rate, debug,
                                                stream[0].stats.starttime,
                                                stream[0].stats.sampling_rate)
        elif max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
                                                trig_int * stream[0].stats.
                                                sampling_rate, debug)
        else:
            print('No peaks found above threshold')
            peaks = False
        toc = time.clock()
        if debug >= 1:
            print(' '.join(['Finding peaks took:', str(toc - tic), 's']))
        if peaks:
            for peak in peaks:
                detecttime = stream[0].stats.starttime +\
                    peak[1] / stream[0].stats.sampling_rate
                # Detect time must be valid QuakeML uri within resource_id.
                # This will write a formatted string which is still
                # readable by UTCDateTime
                rid = ResourceIdentifier(id=template_names[i] + '_' +
                                         str(detecttime.
                                             strftime('%Y%m%dT%H%M%S.%f')),
                                         prefix='smi:local')
                ev = Event(resource_id=rid)
                cr_i = CreationInfo(author='EQcorrscan',
                                    creation_time=UTCDateTime())
                ev.creation_info = cr_i
                # All detection info in Comments for lack of a better idea
                thresh_str = 'threshold=' + str(rawthresh)
                ccc_str = 'detect_val=' + str(peak[0])
                used_chans = 'channels used: ' +\
                             ' '.join([str(pair) for pair in chans[i]])
                ev.comments.append(Comment(text=thresh_str))
                ev.comments.append(Comment(text=ccc_str))
                ev.comments.append(Comment(text=used_chans))
                min_template_tm = min([tr.stats.starttime for tr in template])
                for tr in template:
                    if (tr.stats.station, tr.stats.channel) not in chans[i]:
                        continue
                    else:
                        pick_tm = detecttime + (tr.stats.starttime -
                                                min_template_tm)
                        wv_id = WaveformStreamID(network_code=tr.stats.network,
                                                 station_code=tr.stats.station,
                                                 channel_code=tr.stats.channel)
                        ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id))
                detections.append(DETECTION(template_names[i],
                                            detecttime,
                                            no_chans[i], peak[0], rawthresh,
                                            'corr', chans[i], event=ev))
                if output_cat:
                    det_cat.append(ev)
        if extract_detections:
            detection_streams = extract_from_stream(stream, detections)
    del stream, templates
    if output_cat and not extract_detections:
        return detections, det_cat
    elif not extract_detections:
        return detections
    elif extract_detections and not output_cat:
        return detections, detection_streams
    else:
        return detections, det_cat, detection_streams
Exemple #5
0
def match_filter(template_names, template_list, st, threshold,
                 threshold_type, trig_int, plotvar, plotdir='.', cores=1,
                 tempdir=False, debug=0, plot_format='jpg'):
    r"""Over-arching code to run the correlations of given templates with a \
    day of seismic data and output the detections based on a given threshold.

    :type template_names: list
    :param template_names: List of template names in the same order as \
        template_list
    :type template_list: list :class: 'obspy.Stream'
    :param template_list: A list of templates of which each template is a \
        Stream of obspy traces containing seismic data and header information.
    :type st: :class: 'obspy.Stream'
    :param st: An obspy.Stream object containing all the data available and \
        required for the correlations with templates given.  For efficiency \
        this should contain no excess traces which are not in one or more of \
        the templates.  This will now remove excess traces internally, but \
        will copy the stream and work on the copy, leaving your input stream \
        untouched.
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type
    :type threshold_type: str
    :param threshold_type: The type of threshold to be used, can be MAD, \
        absolute or av_chan_corr.    MAD threshold is calculated as the \
        threshold*(median(abs(cccsum))) where cccsum is the cross-correlation \
        sum for a given template. absolute threhsold is a true absolute \
        threshold based on the cccsum value av_chan_corr is based on the mean \
        values of single-channel cross-correlations assuming all data are \
        present as required for the template, \
        e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where \
        template is a single template from the input and the length is the \
        number of channels within this template.
    :type trig_int: float
    :param trig_int: Minimum gap between detections in seconds.
    :type plotvar: bool
    :param plotvar: Turn plotting on or off
    :type plotdir: str
    :param plotdir: Path to plotting folder, plots will be output here, \
        defaults to run location.
    :type tempdir: String or False
    :param tempdir: Directory to put temporary files, or False
    :type cores: int
    :param cores: Number of cores to use
    :type debug: int
    :param debug: Debug output level, the bigger the number, the more the \
        output.

    :return: :class: 'DETECTIONS' detections for each channel formatted as \
        :class: 'obspy.UTCDateTime' objects.

    .. note:: Plotting within the match-filter routine uses the Agg backend \
        with interactive plotting turned off.  This is because the function \
        is designed to work in bulk.  If you wish to turn interactive \
        plotting on you must import matplotlib in your script first, when you \
        them import match_filter you will get the warning that this call to \
        matplotlib has no effect, which will mean that match_filter has not \
        changed the plotting behaviour.
    """
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    plt.ioff()
    import copy
    from eqcorrscan.utils import plotting
    from eqcorrscan.utils import findpeaks
    from obspy import Trace
    import time

    # Copy the stream here because we will muck about with it
    stream = st.copy()
    templates = copy.deepcopy(template_list)
    # Debug option to confirm that the channel names match those in the
    # templates
    if debug >= 2:
        template_stachan = []
        data_stachan = []
        for template in templates:
            for tr in template:
                template_stachan.append(tr.stats.station + '.' +
                                        tr.stats.channel)
        for tr in stream:
            data_stachan.append(tr.stats.station + '.' + tr.stats.channel)
        template_stachan = list(set(template_stachan))
        data_stachan = list(set(data_stachan))
        if debug >= 3:
            print('I have template info for these stations:')
            print(template_stachan)
            print('I have daylong data for these stations:')
            print(data_stachan)
    # Perform a check that the daylong vectors are daylong
    for tr in stream:
        if not tr.stats.sampling_rate * 86400 == tr.stats.npts:
            msg = ' '.join(['Data are not daylong for', tr.stats.station,
                            tr.stats.channel])
            raise ValueError(msg)
    # Call the _template_loop function to do all the correlation work
    outtic = time.clock()
    # Edit here from previous, stable, but slow match_filter
    # Would be worth testing without an if statement, but with every station in
    # the possible template stations having data, but for those without real
    # data make the data NaN to return NaN ccc_sum
    # Note: this works
    if debug >= 2:
        print('Ensuring all template channels have matches in daylong data')
    template_stachan = []
    for template in templates:
        for tr in template:
            template_stachan += [(tr.stats.station, tr.stats.channel)]
    template_stachan = list(set(template_stachan))
    # Copy this here to keep it safe
    for stachan in template_stachan:
        if not stream.select(station=stachan[0], channel=stachan[1]):
            # Remove template traces rather than adding NaN data
            for template in templates:
                if template.select(station=stachan[0], channel=stachan[1]):
                    for tr in template.select(station=stachan[0],
                                              channel=stachan[1]):
                        template.remove(tr)
    # Remove un-needed channels
    for tr in stream:
        if not (tr.stats.station, tr.stats.channel) in template_stachan:
            stream.remove(tr)
    # Also pad out templates to have all channels
    for template, template_name in zip(templates, template_names):
        if len(template) == 0:
            msg = ('No channels matching in continuous data for ' +
                   'template' + template_name)
            warnings.warn(msg)
            templates.remove(template)
            template_names.remove(template_name)
            continue
        for stachan in template_stachan:
            if not template.select(station=stachan[0], channel=stachan[1]):
                nulltrace = Trace()
                nulltrace.stats.station = stachan[0]
                nulltrace.stats.channel = stachan[1]
                nulltrace.stats.sampling_rate = template[0].stats.sampling_rate
                nulltrace.stats.starttime = template[0].stats.starttime
                nulltrace.data = np.array([np.NaN] * len(template[0].data),
                                          dtype=np.float32)
                template += nulltrace
    if debug >= 2:
        print('Starting the correlation run for this day')
    [cccsums, no_chans, chans] = _channel_loop(templates, stream, cores, debug)
    if len(cccsums[0]) == 0:
        raise ValueError('Correlation has not run, zero length cccsum')
    outtoc = time.clock()
    print(' '.join(['Looping over templates and streams took:',
                    str(outtoc - outtic), 's']))
    if debug >= 2:
        print(' '.join(['The shape of the returned cccsums is:',
                        str(np.shape(cccsums))]))
        print(' '.join(['This is from', str(len(templates)), 'templates']))
        print(' '.join(['Correlated with', str(len(stream)),
                        'channels of data']))
    detections = []
    for i, cccsum in enumerate(cccsums):
        template = templates[i]
        if threshold_type == 'MAD':
            rawthresh = threshold * np.median(np.abs(cccsum))
        elif threshold_type == 'absolute':
            rawthresh = threshold
        elif threshold == 'av_chan_corr':
            rawthresh = threshold * (cccsum / len(template))
        else:
            print('You have not selected the correct threshold type, I will' +
                  'use MAD as I like it')
            rawthresh = threshold * np.mean(np.abs(cccsum))
        # Findpeaks returns a list of tuples in the form [(cccsum, sample)]
        print(' '.join(['Threshold is set at:', str(rawthresh)]))
        print(' '.join(['Max of data is:', str(max(cccsum))]))
        print(' '.join(['Mean of data is:', str(np.mean(cccsum))]))
        if np.abs(np.mean(cccsum)) > 0.05:
            warnings.warn('Mean is not zero!  Check this!')
        # Set up a trace object for the cccsum as this is easier to plot and
        # maintins timing
        if plotvar:
            stream_plot = copy.deepcopy(stream[0])
            # Downsample for plotting
            stream_plot.decimate(int(stream[0].stats.sampling_rate / 10))
            cccsum_plot = Trace(cccsum)
            cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate
            # Resample here to maintain shape better
            cccsum_hist = cccsum_plot.copy()
            cccsum_hist = cccsum_hist.decimate(int(stream[0].stats.
                                                   sampling_rate / 10)).data
            cccsum_plot = plotting.chunk_data(cccsum_plot, 10,
                                              'Maxabs').data
            # Enforce same length
            stream_plot.data = stream_plot.data[0:len(cccsum_plot)]
            cccsum_plot = cccsum_plot[0:len(stream_plot.data)]
            cccsum_hist = cccsum_hist[0:len(stream_plot.data)]
            plotting.triple_plot(cccsum_plot, cccsum_hist,
                                 stream_plot, rawthresh, True,
                                 plotdir + '/cccsum_plot_' +
                                 template_names[i] + '_' +
                                 stream[0].stats.starttime.
                                 datetime.strftime('%Y-%m-%d') +
                                 '.' + plot_format)
            if debug >= 4:
                print(' '.join(['Saved the cccsum to:', template_names[i],
                                stream[0].stats.starttime.datetime.
                                strftime('%Y%j')]))
                np.save(template_names[i] +
                        stream[0].stats.starttime.datetime.strftime('%Y%j'),
                        cccsum)
        tic = time.clock()
        if debug >= 4:
            np.save('cccsum_' + str(i) + '.npy', cccsum)
        if debug >= 3 and max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
                                                trig_int * stream[0].stats.
                                                sampling_rate, debug,
                                                stream[0].stats.starttime,
                                                stream[0].stats.sampling_rate)
        elif max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
                                                trig_int * stream[0].stats.
                                                sampling_rate, debug)
        else:
            print('No peaks found above threshold')
            peaks = False
        toc = time.clock()
        if debug >= 1:
            print(' '.join(['Finding peaks took:', str(toc - tic), 's']))
        if peaks:
            for peak in peaks:
                detecttime = stream[0].stats.starttime +\
                    peak[1] / stream[0].stats.sampling_rate
                detections.append(DETECTION(template_names[i],
                                            detecttime,
                                            no_chans[i], peak[0], rawthresh,
                                            'corr', chans[i]))
    del stream, templates
    return detections
Exemple #6
0
def match_filter(template_names, template_list, st, threshold,
                 threshold_type, trig_int, plotvar, plotdir='.', cores=1,
                 tempdir=False, debug=0, plot_format='png',
                 output_cat=False, extract_detections=False,
                 arg_check=True):
    """
    Main matched-filter detection function.
    Over-arching code to run the correlations of given templates with a \
    day of seismic data and output the detections based on a given threshold.
    For a functional example see the tutorials.

    :type template_names: list
    :param template_names: List of template names in the same order as \
        template_list
    :type template_list: list
    :param template_list: A list of templates of which each template is a \
        Stream of obspy traces containing seismic data and header information.
    :type st: obspy.core.stream.Stream
    :param st: An obspy.Stream object containing all the data available and \
        required for the correlations with templates given.  For efficiency \
        this should contain no excess traces which are not in one or more of \
        the templates.  This will now remove excess traces internally, but \
        will copy the stream and work on the copy, leaving your input stream \
        untouched.
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type
    :type threshold_type: str
    :param threshold_type: The type of threshold to be used, can be MAD, \
        absolute or av_chan_corr.    MAD threshold is calculated as the \
        threshold*(median(abs(cccsum))) where cccsum is the cross-correlation \
        sum for a given template. absolute threhsold is a true absolute \
        threshold based on the cccsum value av_chan_corr is based on the mean \
        values of single-channel cross-correlations assuming all data are \
        present as required for the template, \
        e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where \
        template is a single template from the input and the length is the \
        number of channels within this template.
    :type trig_int: float
    :param trig_int: Minimum gap between detections in seconds.
    :type plotvar: bool
    :param plotvar: Turn plotting on or off
    :type plotdir: str
    :param plotdir: Path to plotting folder, plots will be output here, \
        defaults to run location.
    :type tempdir: str
    :param tempdir: Directory to put temporary files, or False
    :type cores: int
    :param cores: Number of cores to use
    :type debug: int
    :param debug: Debug output level, the bigger the number, the more the \
        output.
    :type plot_format: str
    :param plot_format: Specify format of output plots if saved
    :type output_cat: bool
    :param output_cat: Specifies if matched_filter will output an \
        obspy.Catalog class containing events for each detection. Default \
        is False, in which case matched_filter will output a list of \
        detection classes, as normal.
    :type extract_detections: bool
    :param extract_detections: Specifies whether or not to return a list of \
        streams, one stream per detection.
    :type arg_check: bool
    :param arg_check: Check arguments, defaults to True, but if running in \
        bulk, and you are certain of your arguments, then set to False.

    :return: :class: 'DETECTIONS' detections for each channel formatted as \
        :class: 'obspy.UTCDateTime' objects.
    :return: :class: obspy.Catalog containing events for each detection.
    :return: list of :class: obspy.Stream objects for each detection.

    .. note:: Plotting within the match-filter routine uses the Agg backend \
        with interactive plotting turned off.  This is because the function \
        is designed to work in bulk.  If you wish to turn interactive \
        plotting on you must import matplotlib in your script first, when you \
        them import match_filter you will get the warning that this call to \
        matplotlib has no effect, which will mean that match_filter has not \
        changed the plotting behaviour.

    .. note:: The output_cat flag will create an :class: obspy.Catalog \
        containing one event for each :class: 'DETECTIONS' generated by \
        match_filter. Each event will contain a number of comments dealing \
        with correlation values and channels used for the detection. Each \
        channel used for the detection will have a corresponding :class: Pick \
        which will contain time and waveform information. HOWEVER, the user \
        should note that, at present, the pick times do not account for the \
        prepick times inherent in each template. For example, if a template \
        trace starts 0.1 seconds before the actual arrival of that phase, \
        then the pick time generated by match_filter for that phase will be \
        0.1 seconds early. We are looking towards a solution which will \
        involve saving templates alongside associated metadata.
    """
    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    plt.ioff()
    import copy
    from eqcorrscan.utils import plotting
    from eqcorrscan.utils import findpeaks
    from obspy import Trace, Catalog, UTCDateTime, Stream
    from obspy.core.event import Event, Pick, CreationInfo, ResourceIdentifier
    from obspy.core.event import Comment, WaveformStreamID
    import time

    if arg_check:
        # Check the arguments to be nice - if arguments wrong type the parallel
        # output for the error won't be useful
        if not type(template_names) == list:
            raise IOError('template_names must be of type: list')
        if not type(template_list) == list:
            raise IOError('templates must be of type: list')
        for template in template_list:
            if not type(template) == Stream:
                msg = 'template in template_list must be of type: ' +\
                      'obspy.core.stream.Stream'
                raise IOError(msg)
        if not type(st) == Stream:
            msg = 'st must be of type: obspy.core.stream.Stream'
            raise IOError(msg)
        if threshold_type not in ['MAD', 'absolute', 'av_chan_corr']:
            msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr'
            raise IOError(msg)

    # Copy the stream here because we will muck about with it
    stream = st.copy()
    templates = copy.deepcopy(template_list)
    # Debug option to confirm that the channel names match those in the
    # templates
    if debug >= 2:
        template_stachan = []
        data_stachan = []
        for template in templates:
            for tr in template:
                template_stachan.append(tr.stats.station + '.' +
                                        tr.stats.channel)
        for tr in stream:
            data_stachan.append(tr.stats.station + '.' + tr.stats.channel)
        template_stachan = list(set(template_stachan))
        data_stachan = list(set(data_stachan))
        if debug >= 3:
            print('I have template info for these stations:')
            print(template_stachan)
            print('I have daylong data for these stations:')
            print(data_stachan)
    # Perform a check that the daylong vectors are daylong
    for tr in stream:
        if not tr.stats.sampling_rate * 86400 == tr.stats.npts:
            msg = ' '.join(['Data are not daylong for', tr.stats.station,
                            tr.stats.channel])
            raise ValueError(msg)
    # Perform check that all template lengths are internally consistent
    for i, temp in enumerate(template_list):
        if len(set([tr.stats.npts for tr in temp])) > 1:
            msg = 'Template %s contains traces of differing length!! THIS \
                  WILL CAUSE ISSUES' % template_names[i]
            raise ValueError(msg)
    # Call the _template_loop function to do all the correlation work
    outtic = time.clock()
    # Edit here from previous, stable, but slow match_filter
    # Would be worth testing without an if statement, but with every station in
    # the possible template stations having data, but for those without real
    # data make the data NaN to return NaN ccc_sum
    # Note: this works
    if debug >= 2:
        print('Ensuring all template channels have matches in daylong data')
    template_stachan = []
    for template in templates:
        for tr in template:
            template_stachan += [(tr.stats.station, tr.stats.channel)]
    template_stachan = list(set(template_stachan))
    # Copy this here to keep it safe
    for stachan in template_stachan:
        if not stream.select(station=stachan[0], channel=stachan[1]):
            # Remove template traces rather than adding NaN data
            for template in templates:
                if template.select(station=stachan[0], channel=stachan[1]):
                    for tr in template.select(station=stachan[0],
                                              channel=stachan[1]):
                        template.remove(tr)
    # Remove un-needed channels
    for tr in stream:
        if not (tr.stats.station, tr.stats.channel) in template_stachan:
            stream.remove(tr)
    # Also pad out templates to have all channels
    for template, template_name in zip(templates, template_names):
        if len(template) == 0:
            msg = ('No channels matching in continuous data for ' +
                   'template' + template_name)
            warnings.warn(msg)
            templates.remove(template)
            template_names.remove(template_name)
            continue
        for stachan in template_stachan:
            if not template.select(station=stachan[0], channel=stachan[1]):
                nulltrace = Trace()
                nulltrace.stats.station = stachan[0]
                nulltrace.stats.channel = stachan[1]
                nulltrace.stats.sampling_rate = template[0].stats.sampling_rate
                nulltrace.stats.starttime = template[0].stats.starttime
                nulltrace.data = np.array([np.NaN] * len(template[0].data),
                                          dtype=np.float32)
                template += nulltrace
    if debug >= 2:
        print('Starting the correlation run for this day')
    [cccsums, no_chans, chans] = _channel_loop(templates, stream, cores, debug)
    if len(cccsums[0]) == 0:
        raise ValueError('Correlation has not run, zero length cccsum')
    outtoc = time.clock()
    print(' '.join(['Looping over templates and streams took:',
                    str(outtoc - outtic), 's']))
    if debug >= 2:
        print(' '.join(['The shape of the returned cccsums is:',
                        str(np.shape(cccsums))]))
        print(' '.join(['This is from', str(len(templates)), 'templates']))
        print(' '.join(['Correlated with', str(len(stream)),
                        'channels of data']))
    detections = []
    if output_cat:
        det_cat = Catalog()
    for i, cccsum in enumerate(cccsums):
        template = templates[i]
        if threshold_type == 'MAD':
            rawthresh = threshold * np.median(np.abs(cccsum))
        elif threshold_type == 'absolute':
            rawthresh = threshold
        elif threshold_type == 'av_chan_corr':
            rawthresh = threshold * no_chans[i]
        # Findpeaks returns a list of tuples in the form [(cccsum, sample)]
        print(' '.join(['Threshold is set at:', str(rawthresh)]))
        print(' '.join(['Max of data is:', str(max(cccsum))]))
        print(' '.join(['Mean of data is:', str(np.mean(cccsum))]))
        if np.abs(np.mean(cccsum)) > 0.05:
            warnings.warn('Mean is not zero!  Check this!')
        # Set up a trace object for the cccsum as this is easier to plot and
        # maintains timing
        if plotvar:
            stream_plot = copy.deepcopy(stream[0])
            # Downsample for plotting
            stream_plot.decimate(int(stream[0].stats.sampling_rate / 10))
            cccsum_plot = Trace(cccsum)
            cccsum_plot.stats.sampling_rate = stream[0].stats.sampling_rate
            # Resample here to maintain shape better
            cccsum_hist = cccsum_plot.copy()
            cccsum_hist = cccsum_hist.decimate(int(stream[0].stats.
                                                   sampling_rate / 10)).data
            cccsum_plot = plotting.chunk_data(cccsum_plot, 10,
                                              'Maxabs').data
            # Enforce same length
            stream_plot.data = stream_plot.data[0:len(cccsum_plot)]
            cccsum_plot = cccsum_plot[0:len(stream_plot.data)]
            cccsum_hist = cccsum_hist[0:len(stream_plot.data)]
            plotting.triple_plot(cccsum_plot, cccsum_hist,
                                 stream_plot, rawthresh, True,
                                 plotdir + '/cccsum_plot_' +
                                 template_names[i] + '_' +
                                 stream[0].stats.starttime.
                                 datetime.strftime('%Y-%m-%d') +
                                 '.' + plot_format)
            if debug >= 4:
                print(' '.join(['Saved the cccsum to:', template_names[i],
                                stream[0].stats.starttime.datetime.
                                strftime('%Y%j')]))
                np.save(template_names[i] +
                        stream[0].stats.starttime.datetime.strftime('%Y%j'),
                        cccsum)
        tic = time.clock()
        if debug >= 4:
            np.save('cccsum_' + str(i) + '.npy', cccsum)
        if debug >= 3 and max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
                                                trig_int * stream[0].stats.
                                                sampling_rate, debug,
                                                stream[0].stats.starttime,
                                                stream[0].stats.sampling_rate)
        elif max(cccsum) > rawthresh:
            peaks = findpeaks.find_peaks2_short(cccsum, rawthresh,
                                                trig_int * stream[0].stats.
                                                sampling_rate, debug)
        else:
            print('No peaks found above threshold')
            peaks = False
        toc = time.clock()
        if debug >= 1:
            print(' '.join(['Finding peaks took:', str(toc - tic), 's']))
        if peaks:
            for peak in peaks:
                detecttime = stream[0].stats.starttime +\
                    peak[1] / stream[0].stats.sampling_rate
                # Detect time must be valid QuakeML uri within resource_id.
                # This will write a formatted string which is still readable by UTCDateTime
                rid = ResourceIdentifier(id=template_names[i] + '_' +
                                         str(detecttime.strftime('%Y%m%dT%H%M%S.%f')),
                                         prefix='smi:local')
                ev = Event(resource_id=rid)
                cr_i = CreationInfo(author='EQcorrscan',
                                    creation_time=UTCDateTime())
                ev.creation_info = cr_i
                # All detection info in Comments for lack of a better idea
                thresh_str = 'threshold=' + str(rawthresh)
                ccc_str = 'detect_val=' + str(peak[0])
                used_chans = 'channels used: ' +\
                             ' '.join([str(pair) for pair in chans[i]])
                ev.comments.append(Comment(text=thresh_str))
                ev.comments.append(Comment(text=ccc_str))
                ev.comments.append(Comment(text=used_chans))
                min_template_tm = min([tr.stats.starttime for tr in template])
                for tr in template:
                    if (tr.stats.station, tr.stats.channel) not in chans[i]:
                        continue
                    else:
                        pick_tm = detecttime + (tr.stats.starttime - min_template_tm)
                        wv_id = WaveformStreamID(network_code=tr.stats.network,
                                                 station_code=tr.stats.station,
                                                 channel_code=tr.stats.channel)
                        ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id))
                detections.append(DETECTION(template_names[i],
                                            detecttime,
                                            no_chans[i], peak[0], rawthresh,
                                            'corr', chans[i], event=ev))
                if output_cat:
                    det_cat.append(ev)
        if extract_detections:
            detection_streams = extract_from_stream(stream, detections)
    del stream, templates
    if output_cat and not extract_detections:
        return detections, det_cat
    elif not extract_detections:
        return detections
    elif extract_detections and not output_cat:
        return detections, detection_streams
    else:
        return detections, det_cat, detection_streams
def test_stability():
    """Test various threshold window lengths."""
    from eqcorrscan.core.match_filter import _channel_loop
    from eqcorrscan.utils import pre_processing, catalog_utils, plotting
    from eqcorrscan.core import template_gen
    from obspy.clients.fdsn import Client
    from obspy import UTCDateTime, Trace
    import numpy as np
    import matplotlib.pyplot as plt

    # Do some set-up
    client = Client('NCEDC')
    t1 = UTCDateTime(2004, 9, 28)
    t2 = t1 + 86400
    catalog = client.get_events(starttime=t1,
                                endtime=t2,
                                minmagnitude=2,
                                minlatitude=35.7,
                                maxlatitude=36.1,
                                minlongitude=-120.6,
                                maxlongitude=-120.2,
                                includearrivals=True)
    catalog = catalog_utils.filter_picks(catalog,
                                         channels=['EHZ'],
                                         top_n_picks=5)
    templates = template_gen.from_client(catalog=catalog,
                                         client_id='NCEDC',
                                         lowcut=2.0,
                                         highcut=9.0,
                                         samp_rate=20.0,
                                         filt_order=4,
                                         length=3.0,
                                         prepick=0.15,
                                         swin='all')
    bulk_info = [
        (tr.stats.network, tr.stats.station, '*',
         tr.stats.channel[0] + 'H' + tr.stats.channel[1], t2 - 3600, t2)
        for tr in templates[0]
    ]
    st = client.get_waveforms_bulk(bulk_info)
    st.merge(fill_value='interpolate')
    st = pre_processing.shortproc(st,
                                  lowcut=2.0,
                                  highcut=9.0,
                                  filt_order=4,
                                  samp_rate=20.0,
                                  debug=0,
                                  num_cores=4)
    i = 0
    cccsums, no_chans, chans = _channel_loop(templates, st)

    cccsum = cccsums[0]
    MAD_thresh = 8
    MAD_daylong = MAD_thresh * np.median(np.abs(cccsum))
    MAD_hours = []
    MAD_five_mins = []
    MAD_minutes = []
    for hour in range(24):
        ccc_hour_slice = cccsum[hour * 3600 *
                                st[0].stats.sampling_rate:(hour + 1) * 3600 *
                                st[0].stats.sampling_rate]
        MAD_hour_slice = MAD_thresh * np.median(np.abs(ccc_hour_slice))
        MAD_hours.append(MAD_hour_slice)
        for five_min in range(12):
            ccc_five_slice = ccc_hour_slice[five_min * 300 *
                                            st[0].stats.sampling_rate:
                                            (five_min + 1) * 300 *
                                            st[0].stats.sampling_rate]
            MAD_five_slice = MAD_thresh * np.median(np.abs(ccc_five_slice))
            MAD_five_mins.append(MAD_five_slice)
        for minute in range(60):
            ccc_min_slice = ccc_hour_slice[minute * 60 *
                                           st[0].stats.sampling_rate:(minute +
                                                                      1) * 60 *
                                           st[0].stats.sampling_rate]
            MAD_min_slice = MAD_thresh * np.median(np.abs(ccc_min_slice))
            MAD_minutes.append(MAD_min_slice)
    plotting_cccsum = Trace(cccsum)
    plotting_cccsum.stats.sampling_rate = st[0].stats.sampling_rate
    plotting_cccsum = plotting.chunk_data(plotting_cccsum, 1, 'Maxabs')
    x = np.arange(0, 24, 1.0 / (3600 * plotting_cccsum.stats.sampling_rate))
    x = x[0:len(plotting_cccsum.data)]
    plt.plot(x, plotting_cccsum.data, linewidth=0.7, color='k')
    plt.plot(np.arange(0, 24, 1.0 / 60),
             MAD_minutes,
             label='1 minute MAD',
             color='y')
    plt.plot(np.arange(0, 24, 1.0 / 60), [-1 * m for m in MAD_minutes],
             color='y')
    plt.plot(np.arange(0, 24, 1.0 / 12),
             MAD_five_mins,
             label='5 minute MAD',
             color='b')
    plt.plot(np.arange(0, 24, 1.0 / 12), [-1 * m for m in MAD_five_mins],
             color='b')
    plt.plot(np.arange(24),
             MAD_hours,
             label='Hourly MAD',
             color='r',
             linewidth=1.4)
    plt.plot(np.arange(24), [-1 * m for m in MAD_hours],
             color='r',
             linewidth=1.4)
    plt.plot([0, 24], [MAD_daylong, MAD_daylong],
             label='Day-long MAD',
             linewidth=1.5,
             color='g')
    plt.plot([0, 24], [-1 * MAD_daylong, -1 * MAD_daylong],
             linewidth=1.5,
             color='g')
    plt.legend()
    plt.show()