Exemple #1
0
def _detect(detector,
            st,
            threshold,
            trig_int,
            moveout=0,
            min_trig=0,
            process=True,
            extract_detections=False,
            cores=1):
    """
    Detect within continuous data using the subspace method.

    Not to be called directly, use the detector.detect method.

    :type detector: eqcorrscan.core.subspace.Detector
    :param detector: Detector to use.
    :type st: obspy.core.stream.Stream
    :param st: Un-processed stream to detect within using the subspace \
        detector
    :type threshold: float
    :param threshold: Threshold value for detections between 0-1
    :type trig_int: float
    :param trig_int: Minimum trigger interval in seconds.
    :type moveout: float
    :param moveout: Maximum allowable moveout window for non-multiplexed,
        network detection.  See note.
    :type min_trig: int
    :param min_trig: Minimum number of stations exceeding threshold for \
        non-multiplexed, network detection. See note.
    :type process: bool
    :param process: Whether or not to process the stream according to the \
        parameters defined by the detector.  Default is to process the \
        data (True).
    :type extract_detections: bool
    :param extract_detections: Whether to extract waveforms for each \
        detection or not, if true will return detections and streams.

    :return: list of detections
    :rtype: list of eqcorrscan.core.match_filter.Detection
    """
    detections = []
    # First process the stream
    if process:
        Logger.info('Processing Stream')
        stream, stachans = _subspace_process(
            streams=[st.copy()],
            lowcut=detector.lowcut,
            highcut=detector.highcut,
            filt_order=detector.filt_order,
            sampling_rate=detector.sampling_rate,
            multiplex=detector.multiplex,
            stachans=detector.stachans,
            parallel=True,
            align=False,
            shift_len=None,
            reject=False,
            cores=cores)
    else:
        # Check the sampling rate at the very least
        for tr in st:
            if not tr.stats.sampling_rate == detector.sampling_rate:
                raise ValueError('Sampling rates do not match.')
        stream = [st]
        stachans = detector.stachans
    outtic = time.clock()
    # If multiplexed, how many samples do we increment by?
    if detector.multiplex:
        Nc = len(detector.stachans)
    else:
        Nc = 1
    # Here do all ffts
    fft_vars = _do_ffts(detector, stream, Nc)
    Logger.info('Computing detection statistics')
    Logger.info('Preallocating stats matrix')
    stats = np.zeros(
        (len(stream[0]), (len(stream[0][0]) // Nc) - (fft_vars[4] // Nc) + 1))
    for det_freq, data_freq_sq, data_freq, i in zip(fft_vars[0], fft_vars[1],
                                                    fft_vars[2],
                                                    np.arange(len(stream[0]))):
        # Calculate det_statistic in frequency domain
        stats[i] = _det_stat_freq(det_freq, data_freq_sq, data_freq,
                                  fft_vars[3], Nc, fft_vars[4], fft_vars[5])
        Logger.info('Stats matrix is shape %s' % str(stats[i].shape))
    trig_int_samples = detector.sampling_rate * trig_int
    Logger.info('Finding peaks')
    peaks = []
    for i in range(len(stream[0])):
        peaks.append(
            findpeaks.find_peaks2_short(arr=stats[i],
                                        thresh=threshold,
                                        trig_int=trig_int_samples))
    if not detector.multiplex:
        # Conduct network coincidence triggering
        peaks = findpeaks.coin_trig(peaks=peaks,
                                    samp_rate=detector.sampling_rate,
                                    moveout=moveout,
                                    min_trig=min_trig,
                                    stachans=stachans,
                                    trig_int=trig_int)
    else:
        peaks = peaks[0]
    if len(peaks) > 0:
        for peak in peaks:
            detecttime = st[0].stats.starttime + \
                (peak[1] / detector.sampling_rate)
            rid = ResourceIdentifier(id=detector.name + '_' + str(detecttime),
                                     prefix='smi:local')
            ev = Event(resource_id=rid)
            cr_i = CreationInfo(author='EQcorrscan',
                                creation_time=UTCDateTime())
            ev.creation_info = cr_i
            # All detection info in Comments for lack of a better idea
            thresh_str = 'threshold=' + str(threshold)
            ccc_str = 'detect_val=' + str(peak[0])
            used_chans = 'channels used: ' +\
                ' '.join([str(pair) for pair in detector.stachans])
            ev.comments.append(Comment(text=thresh_str))
            ev.comments.append(Comment(text=ccc_str))
            ev.comments.append(Comment(text=used_chans))
            for stachan in detector.stachans:
                tr = st.select(station=stachan[0], channel=stachan[1])
                if tr:
                    net_code = tr[0].stats.network
                else:
                    net_code = ''
                pick_tm = detecttime
                wv_id = WaveformStreamID(network_code=net_code,
                                         station_code=stachan[0],
                                         channel_code=stachan[1])
                ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id))
            detections.append(
                Detection(template_name=detector.name,
                          detect_time=detecttime,
                          no_chans=len(detector.stachans),
                          detect_val=peak[0],
                          threshold=threshold,
                          typeofdet='subspace',
                          threshold_type='abs',
                          threshold_input=threshold,
                          chans=detector.stachans,
                          event=ev))
    outtoc = time.clock()
    Logger.info('Detection took %s seconds' % str(outtoc - outtic))
    if extract_detections:
        detection_streams = extract_from_stream(st, detections)
        return detections, detection_streams
    return detections
Exemple #2
0
def match_filter(template_names,
                 template_list,
                 st,
                 threshold,
                 threshold_type,
                 trig_int,
                 plot=False,
                 plotdir=None,
                 xcorr_func=None,
                 concurrency=None,
                 cores=None,
                 plot_format='png',
                 output_cat=False,
                 output_event=True,
                 extract_detections=False,
                 arg_check=True,
                 full_peaks=False,
                 peak_cores=None,
                 spike_test=True,
                 **kwargs):
    """
    Main matched-filter detection function.

    Over-arching code to run the correlations of given templates with a
    day of seismic data and output the detections based on a given threshold.
    For a functional example see the tutorials.

    :type template_names: list
    :param template_names:
        List of template names in the same order as template_list
    :type template_list: list
    :param template_list:
        A list of templates of which each template is a
        :class:`obspy.core.stream.Stream` of obspy traces containing seismic
        data and header information.
    :type st: :class:`obspy.core.stream.Stream`
    :param st:
        A Stream object containing all the data available and
        required for the correlations with templates given.  For efficiency
        this should contain no excess traces which are not in one or more of
        the templates.  This will now remove excess traces internally, but
        will copy the stream and work on the copy, leaving your input stream
        untouched.
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type
    :type threshold_type: str
    :param threshold_type:
        The type of threshold to be used, can be MAD, absolute or av_chan_corr.
        See Note on thresholding below.
    :type trig_int: float
    :param trig_int:
        Minimum gap between detections from one template in seconds.
        If multiple detections occur within trig_int of one-another, the one
        with the highest cross-correlation sum will be selected.
    :type plot: bool
    :param plot: Turn plotting on or off
    :type plotdir: str
    :param plotdir:
        Path to plotting folder, plots will be output here, defaults to None,
        and plots are shown on screen.
    :type xcorr_func: str or callable
    :param xcorr_func:
        A str of a registered xcorr function or a callable for implementing
        a custom xcorr function. For more information see:
        :func:`eqcorrscan.utils.correlate.register_array_xcorr`
    :type concurrency: str
    :param concurrency:
        The type of concurrency to apply to the xcorr function. Options are
        'multithread', 'multiprocess', 'concurrent'. For more details see
        :func:`eqcorrscan.utils.correlate.get_stream_xcorr`
    :type cores: int
    :param cores: Number of cores to use
    :type plot_format: str
    :param plot_format: Specify format of output plots if saved
    :type output_cat: bool
    :param output_cat:
        Specifies if matched_filter will output an obspy.Catalog class
        containing events for each detection. Default is False, in which case
        matched_filter will output a list of detection classes, as normal.
    :type output_event: bool
    :param output_event:
        Whether to include events in the Detection objects, defaults to True,
        but for large cases you may want to turn this off as Event objects
        can be quite memory intensive.
    :type extract_detections: bool
    :param extract_detections:
        Specifies whether or not to return a list of streams, one stream per
        detection.
    :type arg_check: bool
    :param arg_check:
        Check arguments, defaults to True, but if running in bulk, and you are
        certain of your arguments, then set to False.
    :type full_peaks: bool
    :param full_peaks: See
        :func: `eqcorrscan.utils.findpeaks.find_peaks_compiled`
    :type peak_cores: int
    :param peak_cores:
        Number of processes to use for parallel peak-finding (if different to
        `cores`).
    :type spike_test: bool
    :param spike_test: If set True, raise error when there is a spike in data.
        defaults to True.

    .. Note::
        When using the "fftw" correlation backend the length of the fft
        can be set. See :mod:`eqcorrscan.utils.correlate` for more info.

    .. note::
        **Returns:**

        If neither `output_cat` or `extract_detections` are set to `True`,
        then only the list of :class:`eqcorrscan.core.match_filter.Detection`'s
        will be output:

        :return:
            :class:`eqcorrscan.core.match_filter.Detection` detections for each
            detection made.
        :rtype: list

        If `output_cat` is set to `True`, then the
        :class:`obspy.core.event.Catalog` will also be output:

        :return: Catalog containing events for each detection, see above.
        :rtype: :class:`obspy.core.event.Catalog`

        If `extract_detections` is set to `True` then the list of
        :class:`obspy.core.stream.Stream`'s will also be output.

        :return:
            list of :class:`obspy.core.stream.Stream`'s for each detection, see
            above.
        :rtype: list

    .. note::
        If your data contain gaps these must be padded with zeros before
        using this function. The `eqcorrscan.utils.pre_processing` functions
        will provide gap-filled data in the appropriate format.  Note that if
        you pad your data with zeros before filtering or resampling the gaps
        will not be all zeros after filtering. This will result in the
        calculation of spurious correlations in the gaps.

    .. Note::
        Detections are not corrected for `pre-pick`, the
        detection.detect_time corresponds to the beginning of the earliest
        template channel at detection.

    .. note::
        **Data overlap:**

        Internally this routine shifts and trims the data according to the
        offsets in the template (e.g. if trace 2 starts 2 seconds after trace 1
        in the template then the continuous data will be shifted by 2 seconds
        to align peak correlations prior to summing).  Because of this,
        detections at the start and end of continuous data streams
        **may be missed**.  The maximum time-period that might be missing
        detections is the maximum offset in the template.

        To work around this, if you are conducting matched-filter detections
        through long-duration continuous data, we suggest using some overlap
        (a few seconds, on the order of the maximum offset in the templates)
        in the continous data.  You will then need to post-process the
        detections (which should be done anyway to remove duplicates).

    .. note::
        **Thresholding:**

        **MAD** threshold is calculated as the:

        .. math::

            threshold {\\times} (median(abs(cccsum)))

        where :math:`cccsum` is the cross-correlation sum for a given template.

        **absolute** threshold is a true absolute threshold based on the
        cccsum value.

        **av_chan_corr** is based on the mean values of single-channel
        cross-correlations assuming all data are present as required for the
        template, e.g:

        .. math::

            av\_chan\_corr\_thresh=threshold \\times (cccsum\ /\ len(template))

        where :math:`template` is a single template from the input and the
        length is the number of channels within this template.

    .. note::
        The output_cat flag will create an :class:`obspy.core.event.Catalog`
        containing one event for each
        :class:`eqcorrscan.core.match_filter.Detection`'s generated by
        match_filter. Each event will contain a number of comments dealing
        with correlation values and channels used for the detection. Each
        channel used for the detection will have a corresponding
        :class:`obspy.core.event.Pick` which will contain time and
        waveform information. **HOWEVER**, the user should note that
        the pick times do not account for the prepick times inherent in
        each template. For example, if a template trace starts 0.1 seconds
        before the actual arrival of that phase, then the pick time generated
        by match_filter for that phase will be 0.1 seconds early.

    .. Note::
        xcorr_func can be used as follows:

        .. rubric::xcorr_func argument example

        >>> import obspy
        >>> import numpy as np
        >>> from eqcorrscan.core.match_filter.matched_filter import (
        ...    match_filter)
        >>> from eqcorrscan.utils.correlate import time_multi_normxcorr
        >>> # define a custom xcorr function
        >>> def custom_normxcorr(templates, stream, pads, *args, **kwargs):
        ...     # Just to keep example short call other xcorr function
        ...     # in practice you would define your own function here
        ...     print('calling custom xcorr function')
        ...     return time_multi_normxcorr(templates, stream, pads)
        >>> # generate some toy templates and stream
        >>> random = np.random.RandomState(42)
        >>> template = obspy.read()
        >>> stream = obspy.read()
        >>> for num, tr in enumerate(stream):  # iter st and embed templates
        ...     data = tr.data
        ...     tr.data = random.randn(6000) * 5
        ...     tr.data[100: 100 + len(data)] = data
        >>> # call match_filter ane ensure the custom function is used
        >>> detections = match_filter(
        ...     template_names=['1'], template_list=[template], st=stream,
        ...     threshold=.5, threshold_type='absolute', trig_int=1,
        ...     plotvar=False,
        ...     xcorr_func=custom_normxcorr)  # doctest:+ELLIPSIS
        calling custom xcorr function...
    """
    from eqcorrscan.core.match_filter.detection import Detection
    from eqcorrscan.utils.plotting import _match_filter_plot

    if "plotvar" in kwargs.keys():
        Logger.warning("plotvar is depreciated, use plot instead")
        plot = kwargs.get("plotvar")

    if arg_check:
        # Check the arguments to be nice - if arguments wrong type the parallel
        # output for the error won't be useful
        if not isinstance(template_names, list):
            raise MatchFilterError('template_names must be of type: list')
        if not isinstance(template_list, list):
            raise MatchFilterError('templates must be of type: list')
        if not len(template_list) == len(template_names):
            raise MatchFilterError('Not the same number of templates as names')
        for template in template_list:
            if not isinstance(template, Stream):
                msg = 'template in template_list must be of type: ' + \
                      'obspy.core.stream.Stream'
                raise MatchFilterError(msg)
        if not isinstance(st, Stream):
            msg = 'st must be of type: obspy.core.stream.Stream'
            raise MatchFilterError(msg)
        if str(threshold_type) not in [
                str('MAD'), str('absolute'),
                str('av_chan_corr')
        ]:
            msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr'
            raise MatchFilterError(msg)
        for tr in st:
            if not tr.stats.sampling_rate == st[0].stats.sampling_rate:
                raise MatchFilterError(
                    'Sampling rates are not equal %f: %f' %
                    (tr.stats.sampling_rate, st[0].stats.sampling_rate))
        for template in template_list:
            for tr in template:
                if not tr.stats.sampling_rate == st[0].stats.sampling_rate:
                    raise MatchFilterError('Template sampling rate does not '
                                           'match continuous data')
        for template in template_list:
            for tr in template:
                if isinstance(tr.data, np.ma.core.MaskedArray):
                    raise MatchFilterError(
                        'Template contains masked array, split first')
    if spike_test:
        Logger.info("Checking for spikes in data")
        _spike_test(st)
    if cores is not None:
        parallel = True
    else:
        parallel = False
    if peak_cores is None:
        peak_cores = cores
    # Copy the stream here because we will muck about with it
    Logger.info("Copying data to keep your input safe")
    stream = st.copy()
    templates = [t.copy() for t in template_list]
    _template_names = template_names.copy()  # This can just be a shallow copy

    Logger.info("Reshaping templates")
    stream, templates, _template_names = _prep_data_for_correlation(
        stream=stream, templates=templates, template_names=_template_names)
    if len(templates) == 0:
        raise IndexError("No matching data")
    Logger.info('Starting the correlation run for these data')
    for template in templates:
        Logger.debug(template.__str__())
    Logger.debug(stream.__str__())
    multichannel_normxcorr = get_stream_xcorr(xcorr_func, concurrency)
    outtic = default_timer()
    [cccsums, no_chans, chans] = multichannel_normxcorr(templates=templates,
                                                        stream=stream,
                                                        cores=cores,
                                                        **kwargs)
    if len(cccsums[0]) == 0:
        raise MatchFilterError('Correlation has not run, zero length cccsum')
    outtoc = default_timer()
    Logger.info(
        'Looping over templates and streams took: {0:.4f}s'.format(outtoc -
                                                                   outtic))
    Logger.debug('The shape of the returned cccsums is: {0}'.format(
        cccsums.shape))
    Logger.debug('This is from {0} templates correlated with {1} channels of '
                 'data'.format(len(templates), len(stream)))
    detections = []
    if output_cat:
        det_cat = Catalog()
    if str(threshold_type) == str("absolute"):
        thresholds = [threshold for _ in range(len(cccsums))]
    elif str(threshold_type) == str('MAD'):
        thresholds = [
            threshold * np.median(np.abs(cccsum)) for cccsum in cccsums
        ]
    else:
        thresholds = [threshold * no_chans[i] for i in range(len(cccsums))]
    if peak_cores is None:
        peak_cores = cores
    outtic = default_timer()
    all_peaks = multi_find_peaks(arr=cccsums,
                                 thresh=thresholds,
                                 parallel=parallel,
                                 trig_int=int(trig_int *
                                              stream[0].stats.sampling_rate),
                                 full_peaks=full_peaks,
                                 cores=peak_cores)
    outtoc = default_timer()
    Logger.info("Finding peaks took {0:.4f}s".format(outtoc - outtic))
    for i, cccsum in enumerate(cccsums):
        if np.abs(np.mean(cccsum)) > 0.05:
            Logger.warning('Mean is not zero!  Check this!')
        # Set up a trace object for the cccsum as this is easier to plot and
        # maintains timing
        if plot:
            _match_filter_plot(stream=stream,
                               cccsum=cccsum,
                               template_names=_template_names,
                               rawthresh=thresholds[i],
                               plotdir=plotdir,
                               plot_format=plot_format,
                               i=i)
        if all_peaks[i]:
            Logger.debug("Found {0} peaks for template {1}".format(
                len(all_peaks[i]), _template_names[i]))
            for peak in all_peaks[i]:
                detecttime = (stream[0].stats.starttime +
                              peak[1] / stream[0].stats.sampling_rate)
                detection = Detection(template_name=_template_names[i],
                                      detect_time=detecttime,
                                      no_chans=no_chans[i],
                                      detect_val=peak[0],
                                      threshold=thresholds[i],
                                      typeofdet='corr',
                                      chans=chans[i],
                                      threshold_type=threshold_type,
                                      threshold_input=threshold)
                if output_cat or output_event:
                    detection._calculate_event(template_st=templates[i])
                detections.append(detection)
                if output_cat:
                    det_cat.append(detection.event)
        else:
            Logger.debug("Found 0 peaks for template {0}".format(
                _template_names[i]))
    Logger.info("Made {0} detections from {1} templates".format(
        len(detections), len(templates)))
    if extract_detections:
        detection_streams = extract_from_stream(stream, detections)
    del stream, templates

    if output_cat and not extract_detections:
        return detections, det_cat
    elif not extract_detections:
        return detections
    elif extract_detections and not output_cat:
        return detections, detection_streams
    else:
        return detections, det_cat, detection_streams