def test_daylong_checks(self):
     """Test that the data are day-long."""
     self.assertTrue(_check_daylong(self.st[0]))
     not_daylong = self.st[0].copy().trim(self.st[0].stats.starttime,
                                          self.st[0].stats.starttime + 3600)
     not_daylong.data = np.append(
         not_daylong.data,
         np.zeros(3602 * int(self.st[0].stats.sampling_rate)))
     self.assertFalse(_check_daylong(not_daylong))
Beispiel #2
0
    def client_detect(self, client, starttime, endtime, threshold,
                      threshold_type, trig_int, plot=False, plotdir=None,
                      min_gap=None, daylong=False, parallel_process=True,
                      xcorr_func=None, concurrency=None, cores=None,
                      ignore_length=False, ignore_bad_data=False,
                      group_size=None, return_stream=False, full_peaks=False,
                      save_progress=False, process_cores=None, retries=3,
                      **kwargs):
        """
        Detect using a Tribe of templates within a continuous stream.

        :type client: `obspy.clients.*.Client`
        :param client: Any obspy client with a dataselect service.
        :type starttime: :class:`obspy.core.UTCDateTime`
        :param starttime: Start-time for detections.
        :type endtime: :class:`obspy.core.UTCDateTime`
        :param endtime: End-time for detections
        :type threshold: float
        :param threshold:
            Threshold level, if using `threshold_type='MAD'` then this will be
            the multiple of the median absolute deviation.
        :type threshold_type: str
        :param threshold_type:
            The type of threshold to be used, can be MAD, absolute or
            av_chan_corr.  See Note on thresholding below.
        :type trig_int: float
        :param trig_int:
            Minimum gap between detections from one template in seconds.
            If multiple detections occur within trig_int of one-another, the
            one with the highest cross-correlation sum will be selected.
        :type plot: bool
        :param plot: Turn plotting on or off.
        :type plotdir: str
        :param plotdir:
            The path to save plots to. If `plotdir=None` (default) then the
            figure will be shown on screen.
        :type min_gap: float
        :param min_gap:
            Minimum gap allowed in data - use to remove traces with known
            issues
        :type daylong: bool
        :param daylong:
            Set to True to use the
            :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which
            preforms additional checks and is more efficient for day-long data
            over other methods.
        :type parallel_process: bool
        :param parallel_process:
        :type xcorr_func: str or callable
        :param xcorr_func:
            A str of a registered xcorr function or a callable for implementing
            a custom xcorr function. For more information see:
            :func:`eqcorrscan.utils.correlate.register_array_xcorr`
        :type concurrency: str
        :param concurrency:
            The type of concurrency to apply to the xcorr function. Options are
            'multithread', 'multiprocess', 'concurrent'. For more details see
            :func:`eqcorrscan.utils.correlate.get_stream_xcorr`
        :type cores: int
        :param cores: Number of workers for processing and detection.
        :type ignore_length: bool
        :param ignore_length:
            If using daylong=True, then dayproc will try check that the data
            are there for at least 80% of the day, if you don't want this check
            (which will raise an error if too much data are missing) then set
            ignore_length=True.  This is not recommended!
        :type ignore_bad_data: bool
        :param ignore_bad_data:
            If False (default), errors will be raised if data are excessively
            gappy or are mostly zeros. If True then no error will be raised,
            but an empty trace will be returned (and not used in detection).
        :type group_size: int
        :param group_size:
            Maximum number of templates to run at once, use to reduce memory
            consumption, if unset will use all templates.
        :type full_peaks: bool
        :param full_peaks: See `eqcorrscan.utils.findpeaks.find_peaks2_short`
        :type save_progress: bool
        :param save_progress:
            Whether to save the resulting party at every data step or not.
            Useful for long-running processes.
        :type process_cores: int
        :param process_cores:
            Number of processes to use for pre-processing (if different to
            `cores`).
        :type return_stream: bool
        :param return_stream:
            Whether to also output the stream downloaded, useful if you plan
            to use the stream for something else, e.g. lag_calc.
        :type retries: int
        :param retries:
            Number of attempts allowed for downloading - allows for transient
            server issues.

        :return:
            :class:`eqcorrscan.core.match_filter.Party` of Families of
            detections.


        .. Note::
            When using the "fftw" correlation backend the length of the fft
            can be set. See :mod:`eqcorrscan.utils.correlate` for more info.

        .. Note::
            Ensures that data overlap between loops, which will lead to no
            missed detections at data start-stop points (see note for
            :meth:`eqcorrscan.core.match_filter.Tribe.detect` method).
            This will result in end-time not being strictly
            honoured, so detections may occur after the end-time set.  This is
            because data must be run in the correct process-length.

        .. warning::
            Plotting within the match-filter routine uses the Agg backend
            with interactive plotting turned off.  This is because the function
            is designed to work in bulk.  If you wish to turn interactive
            plotting on you must import matplotlib in your script first,
            when you then import match_filter you will get the warning that
            this call to matplotlib has no effect, which will mean that
            match_filter has not changed the plotting behaviour.

        .. note::
            **Thresholding:**

            **MAD** threshold is calculated as the:

            .. math::

                threshold {\\times} (median(abs(cccsum)))

            where :math:`cccsum` is the cross-correlation sum for a given
            template.

            **absolute** threshold is a true absolute threshold based on the
            cccsum value.

            **av_chan_corr** is based on the mean values of single-channel
            cross-correlations assuming all data are present as required for
            the template, e.g:

            .. math::

                av\_chan\_corr\_thresh=threshold \\times (cccsum /
                len(template))

            where :math:`template` is a single template from the input and the
            length is the number of channels within this template.
        """
        from obspy.clients.fdsn.client import FDSNException

        # This uses get_waveforms_bulk to get data - not all client types have
        # this, so we check and monkey patch here.
        if not hasattr(client, "get_waveforms_bulk"):
            assert hasattr(client, "get_waveforms"), (
                f"client {client} must have at least a get_waveforms method")
            Logger.info(f"Client {client} does not have a get_waveforms_bulk "
                        "method, monkey-patching this")
            client = get_waveform_client(client)

        party = Party()
        buff = 300
        # Apply a buffer, often data downloaded is not the correct length
        data_length = max([t.process_length for t in self.templates])
        pad = 0
        for template in self.templates:
            max_delay = (template.st.sort(['starttime'])[-1].stats.starttime -
                         template.st.sort(['starttime'])[0].stats.starttime)
            if max_delay > pad:
                pad = max_delay
        download_groups = int(endtime - starttime) / data_length
        template_channel_ids = []
        for template in self.templates:
            for tr in template.st:
                if tr.stats.network not in [None, '']:
                    chan_id = (tr.stats.network,)
                else:
                    chan_id = ('*',)
                if tr.stats.station not in [None, '']:
                    chan_id += (tr.stats.station,)
                else:
                    chan_id += ('*',)
                if tr.stats.location not in [None, '']:
                    chan_id += (tr.stats.location,)
                else:
                    chan_id += ('*',)
                if tr.stats.channel not in [None, '']:
                    if len(tr.stats.channel) == 2:
                        chan_id += (tr.stats.channel[0] + '?' +
                                    tr.stats.channel[-1],)
                    else:
                        chan_id += (tr.stats.channel,)
                else:
                    chan_id += ('*',)
                template_channel_ids.append(chan_id)
        template_channel_ids = list(set(template_channel_ids))
        if return_stream:
            stream = Stream()
        if int(download_groups) < download_groups:
            download_groups = int(download_groups) + 1
        else:
            download_groups = int(download_groups)
        for i in range(download_groups):
            bulk_info = []
            for chan_id in template_channel_ids:
                bulk_info.append((
                    chan_id[0], chan_id[1], chan_id[2], chan_id[3],
                    starttime + (i * data_length) - (pad + buff),
                    starttime + ((i + 1) * data_length) + (pad + buff)))
            for retry_attempt in range(retries):
                try:
                    Logger.info("Downloading data")
                    st = client.get_waveforms_bulk(bulk_info)
                    Logger.info(
                        "Downloaded data for {0} traces".format(len(st)))
                    break
                except FDSNException as e:
                    if "Split the request in smaller" in " ".join(e.args):
                        Logger.warning(
                            "Datacentre does not support large requests: "
                            "splitting request into smaller chunks")
                        st = Stream()
                        for _bulk in bulk_info:
                            try:
                                st += client.get_waveforms_bulk([_bulk])
                            except Exception as e:
                                Logger.error("No data for {0}".format(_bulk))
                                Logger.error(e)
                                continue
                        Logger.info("Downloaded data for {0} traces".format(
                            len(st)))
                        break
                except Exception as e:
                    Logger.error(e)
                    continue
            else:
                raise MatchFilterError(
                    "Could not download data after {0} attempts".format(
                        retries))
            # Get gaps and remove traces as necessary
            if min_gap:
                gaps = st.get_gaps(min_gap=min_gap)
                if len(gaps) > 0:
                    Logger.warning("Large gaps in downloaded data")
                    st.merge()
                    gappy_channels = list(
                        set([(gap[0], gap[1], gap[2], gap[3])
                             for gap in gaps]))
                    _st = Stream()
                    for tr in st:
                        tr_stats = (tr.stats.network, tr.stats.station,
                                    tr.stats.location, tr.stats.channel)
                        if tr_stats in gappy_channels:
                            Logger.warning(
                                "Removing gappy channel: {0}".format(tr))
                        else:
                            _st += tr
                    st = _st
                    st.split()
            st.detrend("simple").merge()
            st.trim(starttime=starttime + (i * data_length) - pad,
                    endtime=starttime + ((i + 1) * data_length) + pad)
            for tr in st:
                if not _check_daylong(tr):
                    st.remove(tr)
                    Logger.warning(
                        "{0} contains more zeros than non-zero, "
                        "removed".format(tr.id))
            for tr in st:
                if tr.stats.endtime - tr.stats.starttime < \
                   0.8 * data_length:
                    st.remove(tr)
                    Logger.warning(
                        "{0} is less than 80% of the required length"
                        ", removed".format(tr.id))
            if return_stream:
                stream += st
            try:
                party += self.detect(
                    stream=st, threshold=threshold,
                    threshold_type=threshold_type, trig_int=trig_int,
                    plot=plot, plotdir=plotdir, daylong=daylong,
                    parallel_process=parallel_process, xcorr_func=xcorr_func,
                    concurrency=concurrency, cores=cores,
                    ignore_length=ignore_length,
                    ignore_bad_data=ignore_bad_data, group_size=group_size,
                    overlap=None, full_peaks=full_peaks,
                    process_cores=process_cores, **kwargs)
                if save_progress:
                    party.write("eqcorrscan_temporary_party")
            except Exception as e:
                Logger.critical(
                    'Error, routine incomplete, returning incomplete Party')
                Logger.error('Error: {0}'.format(e))
                if return_stream:
                    return party, stream
                else:
                    return party
        for family in party:
            if family is not None:
                family.detections = family._uniq().detections
        if return_stream:
            return party, stream
        else:
            return party
Beispiel #3
0
def _download_from_client(client,
                          client_type,
                          catalog,
                          data_pad,
                          process_len,
                          available_stations=[],
                          all_channels=False):
    """
    Internal function to handle downloading from either seishub or fdsn client
    """
    st = Stream()
    catalog = Catalog(sorted(catalog, key=lambda e: e.origins[0].time))
    all_waveform_info = []
    for event in catalog:
        for pick in event.picks:
            if not pick.waveform_id:
                Logger.warning(
                    "Pick not associated with waveforms, will not use:"
                    " {0}".format(pick))
                continue
            if all_channels:
                channel_code = pick.waveform_id.channel_code[0:2] + "?"
            else:
                channel_code = pick.waveform_id.channel_code
            all_waveform_info.append(
                (pick.waveform_id.network_code, pick.waveform_id.station_code,
                 channel_code, pick.waveform_id.location_code))
    starttime = UTCDateTime(catalog[0].origins[0].time - data_pad)
    endtime = starttime + process_len
    # Check that endtime is after the last event
    if not endtime > catalog[-1].origins[0].time + data_pad:
        raise TemplateGenError('Events do not fit in processing window')
    all_waveform_info = sorted(list(set(all_waveform_info)))
    dropped_pick_stations = 0
    for waveform_info in all_waveform_info:
        net, sta, chan, loc = waveform_info
        if client_type == 'seishub' and sta not in available_stations:
            Logger.error("Station not found in SeisHub DB")
            dropped_pick_stations += 1
            continue
        Logger.info('Downloading for start-time: {0} end-time: {1}'.format(
            starttime, endtime))
        Logger.debug('.'.join([net, sta, loc, chan]))
        query_params = dict(network=net,
                            station=sta,
                            location=loc,
                            channel=chan,
                            starttime=starttime,
                            endtime=endtime)
        try:
            st += client.get_waveforms(**query_params)
        except Exception as e:
            Logger.error(e)
            Logger.error(
                'Found no data for this station: {0}'.format(query_params))
            dropped_pick_stations += 1
    if not st and dropped_pick_stations == len(event.picks):
        raise Exception('No data available, is the server down?')
    st.merge()
    # clients download chunks, we need to check that the data are
    # the desired length
    final_channels = []
    for tr in st:
        tr.trim(starttime, endtime)
        if len(tr.data) == (process_len * tr.stats.sampling_rate) + 1:
            tr.data = tr.data[1:len(tr.data)]
        if tr.stats.endtime - tr.stats.starttime < 0.8 * process_len:
            Logger.warning(
                "Data for {0}.{1} is {2} hours long, which is less than 80 "
                "percent of the desired length, will not use".format(
                    tr.stats.station, tr.stats.channel,
                    (tr.stats.endtime - tr.stats.starttime) / 3600))
        elif not pre_processing._check_daylong(tr):
            Logger.warning("Data are mostly zeros, removing trace: {0}".format(
                tr.id))
        else:
            final_channels.append(tr)
    st.traces = final_channels
    return st