Beispiel #1
0
    def filter(self, dates=None, min_dets=1):
        """
        Return a new Party filtered according to conditions.

        Return a new Party with only detections within a date range and
        only families with a minimum number of detections.

        :type dates: list of obspy.core.UTCDateTime objects
        :param dates: A start and end date for the new Party
        :type min_dets: int
        :param min_dets: Minimum number of detections per family

        .. rubric:: Example

        >>> from obspy import UTCDateTime
        >>> Party().read().filter(dates=[UTCDateTime(2016, 1, 1),
        ...                              UTCDateTime(2017, 1, 1)],
        ...                       min_dets=30) # doctest: +SKIP
        """
        if dates is None:
            raise MatchFilterError('Need a list defining a date range')
        new_party = Party()
        for fam in self.families:
            new_fam = Family(template=fam.template,
                             detections=[
                                 det for det in fam
                                 if dates[0] < det.detect_time < dates[1]
                             ])
            if len(new_fam) >= min_dets:
                new_party.families.append(new_fam)
        return new_party
Beispiel #2
0
def _spike_test(stream, percent=0.99, multiplier=1e7):
    """
    Check for very large spikes in data and raise an error if found.

    :param stream: Stream to look for spikes in.
    :type stream: :class:`obspy.core.stream.Stream`
    :param percent: Percentage as a decimal to calculate range for.
    :type percent: float
    :param multiplier: Multiplier of range to define a spike.
    :type multiplier: float
    """
    from eqcorrscan.core.match_filter.matched_filter import MatchFilterError

    list_ids = []
    for tr in stream:
        if (tr.data > 2 * np.max(np.sort(
                np.abs(tr.data))[0:int(percent * len(tr.data))]
                                 ) * multiplier).sum() > 0:
            list_ids.append(tr.id)
    if list_ids != []:
        ids = ', '.join(list_ids)
        msg = ('Spikes above ' + str(multiplier) +
               ' of the range of ' + str(percent) +
               ' of the data present, check:\n' + ids + '.\n'
               'This would otherwise likely result in an issue during ' +
               'FFT prior to cross-correlation.\n' +
               'If you think this spike is real please report ' +
               'this as a bug.')
        raise MatchFilterError(msg)
Beispiel #3
0
def _par_read(dirname, compressed=True):
    """
    Internal write function to read a formatted parameter file.

    :type dirname: str
    :param dirname: Directory to read the parameter file from.
    :type compressed: bool
    :param compressed: Whether the directory is compressed or not.
    """
    from eqcorrscan.core.match_filter.matched_filter import MatchFilterError
    from eqcorrscan.core.match_filter.template import Template

    templates = []
    if compressed:
        arc = tarfile.open(dirname, "r:*")
        members = arc.getmembers()
        _parfile = [member for member in members
                    if member.name.split(os.sep)[-1] ==
                    'template_parameters.csv']
        if len(_parfile) == 0:
            arc.close()
            raise MatchFilterError(
                'No template parameter file in archive')
        parfile = arc.extractfile(_parfile[0])
    else:
        parfile = open(dirname + '/' + 'template_parameters.csv', 'r')
    for line in parfile:
        t_in = Template()
        for key_pair in line.rstrip().split(','):
            if key_pair.split(':')[0].strip() == 'name':
                t_in.__dict__[key_pair.split(':')[0].strip()] = \
                    key_pair.split(':')[-1].strip()
            elif key_pair.split(':')[0].strip() == 'filt_order':
                try:
                    t_in.__dict__[key_pair.split(':')[0].strip()] = \
                        int(key_pair.split(':')[-1])
                except ValueError:
                    pass
            else:
                try:
                    t_in.__dict__[key_pair.split(':')[0].strip()] = \
                        float(key_pair.split(':')[-1])
                except ValueError:
                    pass
        templates.append(t_in)
    parfile.close()
    if compressed:
        arc.close()
    return templates
Beispiel #4
0
 def __init__(self,
              name=None,
              st=None,
              lowcut=None,
              highcut=None,
              samp_rate=None,
              filt_order=None,
              process_length=None,
              prepick=None,
              event=None):
     name_regex = re.compile(r"^[a-z_0-9]+$")
     if name is not None and not re.match(name_regex, name):
         raise ValueError("Invalid name: '%s' - Must satisfy the regex "
                          "'%s'." % (name, name_regex.pattern))
     if name is None:
         temp_name = "unnamed"
     else:
         temp_name = name
     self.name = name
     self.st = st
     self.lowcut = lowcut
     self.highcut = highcut
     self.samp_rate = samp_rate
     if st and samp_rate is not None:
         for tr in st:
             if not tr.stats.sampling_rate == self.samp_rate:
                 raise MatchFilterError(
                     'Sampling rates do not match in data.')
     self.filt_order = filt_order
     self.process_length = process_length
     self.prepick = prepick
     if event is not None:
         if "eqcorrscan_template_" + temp_name not in \
                 [c.text for c in event.comments]:
             event.comments.append(
                 Comment(
                     text="eqcorrscan_template_" + temp_name,
                     creation_info=CreationInfo(agency='eqcorrscan',
                                                author=getpass.getuser())))
     self.event = event
Beispiel #5
0
    def client_detect(self, client, starttime, endtime, threshold,
                      threshold_type, trig_int, plot=False, plotdir=None,
                      min_gap=None, daylong=False, parallel_process=True,
                      xcorr_func=None, concurrency=None, cores=None,
                      ignore_length=False, ignore_bad_data=False,
                      group_size=None, return_stream=False, full_peaks=False,
                      save_progress=False, process_cores=None, retries=3,
                      **kwargs):
        """
        Detect using a Tribe of templates within a continuous stream.

        :type client: `obspy.clients.*.Client`
        :param client: Any obspy client with a dataselect service.
        :type starttime: :class:`obspy.core.UTCDateTime`
        :param starttime: Start-time for detections.
        :type endtime: :class:`obspy.core.UTCDateTime`
        :param endtime: End-time for detections
        :type threshold: float
        :param threshold:
            Threshold level, if using `threshold_type='MAD'` then this will be
            the multiple of the median absolute deviation.
        :type threshold_type: str
        :param threshold_type:
            The type of threshold to be used, can be MAD, absolute or
            av_chan_corr.  See Note on thresholding below.
        :type trig_int: float
        :param trig_int:
            Minimum gap between detections from one template in seconds.
            If multiple detections occur within trig_int of one-another, the
            one with the highest cross-correlation sum will be selected.
        :type plot: bool
        :param plot: Turn plotting on or off.
        :type plotdir: str
        :param plotdir:
            The path to save plots to. If `plotdir=None` (default) then the
            figure will be shown on screen.
        :type min_gap: float
        :param min_gap:
            Minimum gap allowed in data - use to remove traces with known
            issues
        :type daylong: bool
        :param daylong:
            Set to True to use the
            :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which
            preforms additional checks and is more efficient for day-long data
            over other methods.
        :type parallel_process: bool
        :param parallel_process:
        :type xcorr_func: str or callable
        :param xcorr_func:
            A str of a registered xcorr function or a callable for implementing
            a custom xcorr function. For more information see:
            :func:`eqcorrscan.utils.correlate.register_array_xcorr`
        :type concurrency: str
        :param concurrency:
            The type of concurrency to apply to the xcorr function. Options are
            'multithread', 'multiprocess', 'concurrent'. For more details see
            :func:`eqcorrscan.utils.correlate.get_stream_xcorr`
        :type cores: int
        :param cores: Number of workers for processing and detection.
        :type ignore_length: bool
        :param ignore_length:
            If using daylong=True, then dayproc will try check that the data
            are there for at least 80% of the day, if you don't want this check
            (which will raise an error if too much data are missing) then set
            ignore_length=True.  This is not recommended!
        :type ignore_bad_data: bool
        :param ignore_bad_data:
            If False (default), errors will be raised if data are excessively
            gappy or are mostly zeros. If True then no error will be raised,
            but an empty trace will be returned (and not used in detection).
        :type group_size: int
        :param group_size:
            Maximum number of templates to run at once, use to reduce memory
            consumption, if unset will use all templates.
        :type full_peaks: bool
        :param full_peaks: See `eqcorrscan.utils.findpeaks.find_peaks2_short`
        :type save_progress: bool
        :param save_progress:
            Whether to save the resulting party at every data step or not.
            Useful for long-running processes.
        :type process_cores: int
        :param process_cores:
            Number of processes to use for pre-processing (if different to
            `cores`).
        :type return_stream: bool
        :param return_stream:
            Whether to also output the stream downloaded, useful if you plan
            to use the stream for something else, e.g. lag_calc.
        :type retries: int
        :param retries:
            Number of attempts allowed for downloading - allows for transient
            server issues.

        :return:
            :class:`eqcorrscan.core.match_filter.Party` of Families of
            detections.


        .. Note::
            When using the "fftw" correlation backend the length of the fft
            can be set. See :mod:`eqcorrscan.utils.correlate` for more info.

        .. Note::
            Ensures that data overlap between loops, which will lead to no
            missed detections at data start-stop points (see note for
            :meth:`eqcorrscan.core.match_filter.Tribe.detect` method).
            This will result in end-time not being strictly
            honoured, so detections may occur after the end-time set.  This is
            because data must be run in the correct process-length.

        .. warning::
            Plotting within the match-filter routine uses the Agg backend
            with interactive plotting turned off.  This is because the function
            is designed to work in bulk.  If you wish to turn interactive
            plotting on you must import matplotlib in your script first,
            when you then import match_filter you will get the warning that
            this call to matplotlib has no effect, which will mean that
            match_filter has not changed the plotting behaviour.

        .. note::
            **Thresholding:**

            **MAD** threshold is calculated as the:

            .. math::

                threshold {\\times} (median(abs(cccsum)))

            where :math:`cccsum` is the cross-correlation sum for a given
            template.

            **absolute** threshold is a true absolute threshold based on the
            cccsum value.

            **av_chan_corr** is based on the mean values of single-channel
            cross-correlations assuming all data are present as required for
            the template, e.g:

            .. math::

                av\_chan\_corr\_thresh=threshold \\times (cccsum /
                len(template))

            where :math:`template` is a single template from the input and the
            length is the number of channels within this template.
        """
        from obspy.clients.fdsn.client import FDSNException

        # This uses get_waveforms_bulk to get data - not all client types have
        # this, so we check and monkey patch here.
        if not hasattr(client, "get_waveforms_bulk"):
            assert hasattr(client, "get_waveforms"), (
                f"client {client} must have at least a get_waveforms method")
            Logger.info(f"Client {client} does not have a get_waveforms_bulk "
                        "method, monkey-patching this")
            client = get_waveform_client(client)

        party = Party()
        buff = 300
        # Apply a buffer, often data downloaded is not the correct length
        data_length = max([t.process_length for t in self.templates])
        pad = 0
        for template in self.templates:
            max_delay = (template.st.sort(['starttime'])[-1].stats.starttime -
                         template.st.sort(['starttime'])[0].stats.starttime)
            if max_delay > pad:
                pad = max_delay
        download_groups = int(endtime - starttime) / data_length
        template_channel_ids = []
        for template in self.templates:
            for tr in template.st:
                if tr.stats.network not in [None, '']:
                    chan_id = (tr.stats.network,)
                else:
                    chan_id = ('*',)
                if tr.stats.station not in [None, '']:
                    chan_id += (tr.stats.station,)
                else:
                    chan_id += ('*',)
                if tr.stats.location not in [None, '']:
                    chan_id += (tr.stats.location,)
                else:
                    chan_id += ('*',)
                if tr.stats.channel not in [None, '']:
                    if len(tr.stats.channel) == 2:
                        chan_id += (tr.stats.channel[0] + '?' +
                                    tr.stats.channel[-1],)
                    else:
                        chan_id += (tr.stats.channel,)
                else:
                    chan_id += ('*',)
                template_channel_ids.append(chan_id)
        template_channel_ids = list(set(template_channel_ids))
        if return_stream:
            stream = Stream()
        if int(download_groups) < download_groups:
            download_groups = int(download_groups) + 1
        else:
            download_groups = int(download_groups)
        for i in range(download_groups):
            bulk_info = []
            for chan_id in template_channel_ids:
                bulk_info.append((
                    chan_id[0], chan_id[1], chan_id[2], chan_id[3],
                    starttime + (i * data_length) - (pad + buff),
                    starttime + ((i + 1) * data_length) + (pad + buff)))
            for retry_attempt in range(retries):
                try:
                    Logger.info("Downloading data")
                    st = client.get_waveforms_bulk(bulk_info)
                    Logger.info(
                        "Downloaded data for {0} traces".format(len(st)))
                    break
                except FDSNException as e:
                    if "Split the request in smaller" in " ".join(e.args):
                        Logger.warning(
                            "Datacentre does not support large requests: "
                            "splitting request into smaller chunks")
                        st = Stream()
                        for _bulk in bulk_info:
                            try:
                                st += client.get_waveforms_bulk([_bulk])
                            except Exception as e:
                                Logger.error("No data for {0}".format(_bulk))
                                Logger.error(e)
                                continue
                        Logger.info("Downloaded data for {0} traces".format(
                            len(st)))
                        break
                except Exception as e:
                    Logger.error(e)
                    continue
            else:
                raise MatchFilterError(
                    "Could not download data after {0} attempts".format(
                        retries))
            # Get gaps and remove traces as necessary
            if min_gap:
                gaps = st.get_gaps(min_gap=min_gap)
                if len(gaps) > 0:
                    Logger.warning("Large gaps in downloaded data")
                    st.merge()
                    gappy_channels = list(
                        set([(gap[0], gap[1], gap[2], gap[3])
                             for gap in gaps]))
                    _st = Stream()
                    for tr in st:
                        tr_stats = (tr.stats.network, tr.stats.station,
                                    tr.stats.location, tr.stats.channel)
                        if tr_stats in gappy_channels:
                            Logger.warning(
                                "Removing gappy channel: {0}".format(tr))
                        else:
                            _st += tr
                    st = _st
                    st.split()
            st.detrend("simple").merge()
            st.trim(starttime=starttime + (i * data_length) - pad,
                    endtime=starttime + ((i + 1) * data_length) + pad)
            for tr in st:
                if not _check_daylong(tr):
                    st.remove(tr)
                    Logger.warning(
                        "{0} contains more zeros than non-zero, "
                        "removed".format(tr.id))
            for tr in st:
                if tr.stats.endtime - tr.stats.starttime < \
                   0.8 * data_length:
                    st.remove(tr)
                    Logger.warning(
                        "{0} is less than 80% of the required length"
                        ", removed".format(tr.id))
            if return_stream:
                stream += st
            try:
                party += self.detect(
                    stream=st, threshold=threshold,
                    threshold_type=threshold_type, trig_int=trig_int,
                    plot=plot, plotdir=plotdir, daylong=daylong,
                    parallel_process=parallel_process, xcorr_func=xcorr_func,
                    concurrency=concurrency, cores=cores,
                    ignore_length=ignore_length,
                    ignore_bad_data=ignore_bad_data, group_size=group_size,
                    overlap=None, full_peaks=full_peaks,
                    process_cores=process_cores, **kwargs)
                if save_progress:
                    party.write("eqcorrscan_temporary_party")
            except Exception as e:
                Logger.critical(
                    'Error, routine incomplete, returning incomplete Party')
                Logger.error('Error: {0}'.format(e))
                if return_stream:
                    return party, stream
                else:
                    return party
        for family in party:
            if family is not None:
                family.detections = family._uniq().detections
        if return_stream:
            return party, stream
        else:
            return party
Beispiel #6
0
    def write(self,
              filename,
              format='tar',
              write_detection_catalog=True,
              catalog_format="QUAKEML",
              overwrite=False):
        """
        Write Family out, select output format.

        :type format: str
        :param format:
            One of either 'tar', 'csv', or any obspy supported
            catalog output. See note below on formats
        :type filename: str
        :param filename: Path to write file to.
        :type write_detection_catalog: bool
        :param write_detection_catalog:
            Whether to write the detection catalog object or not - writing
            large catalog files can be slow, and catalogs can be reconstructed
            from the Tribe.
        :type catalog_format: str
        :param catalog_format:
            What format to write the detection-catalog with. Only Nordic,
            SC3ML, QUAKEML are supported. Note that not all information is
            written for all formats (QUAKEML is the most complete, but is
            slow for IO).
        :type overwrite: bool
        :param overwrite:
            Specifies whether detection-files are overwritten if they exist
            already. By default, no files are overwritten.

        .. NOTE::
            csv format will write out detection objects, all other
            outputs will write the catalog.  These cannot be rebuilt into
            a Family object.  The only format that can be read back into
            Family objects is the 'tar' type.

        .. NOTE::
            We recommend writing to the 'tar' format, which will write out
            all the template information (wavefiles as miniseed and metadata)
            alongside the detections and store these in a tar archive. This
            is readable by other programs and maintains all information
            required for further study.

        .. rubric:: Example

        >>> party = Party().read()
        >>> party.write('test_tar_write', format='tar')
        Party of 4 Families.
        >>> party.write('test_csv_write.csv', format='csv')
        Party of 4 Families.
        >>> party.write('test_quakeml.xml', format='quakeml')
        Party of 4 Families.
        """
        from eqcorrscan.core.match_filter.tribe import Tribe
        from eqcorrscan.core.match_filter import CAT_EXT_MAP

        if catalog_format not in CAT_EXT_MAP.keys():
            raise TypeError("{0} is not supported".format(catalog_format))
        if format.lower() == 'csv':
            if os.path.isfile(filename) and not overwrite:
                raise MatchFilterError('Will not overwrite existing file: %s' %
                                       filename)
            if os.path.isfile(filename) and overwrite:
                os.remove(filename)
            for family in self.families:
                write_detections(fname=filename,
                                 detections=family.detections,
                                 mode="a")
        elif format.lower() == 'tar':
            if not filename.endswith('.tgz'):
                filename = filename + ".tgz"
            if os.path.exists(filename) and not overwrite:
                raise IOError('Will not overwrite existing file: %s' %
                              filename)
            # os.makedirs(filename)
            with temporary_directory() as temp_dir:
                Tribe([f.template for f in self.families
                       ]).write(filename=temp_dir,
                                compress=False,
                                catalog_format=catalog_format)
                if write_detection_catalog:
                    all_cat = Catalog()
                    for family in self.families:
                        all_cat += family.catalog
                    if not len(all_cat) == 0:
                        all_cat.write(join(
                            temp_dir,
                            'catalog.{0}'.format(CAT_EXT_MAP[catalog_format])),
                                      format=catalog_format)
                for i, family in enumerate(self.families):
                    Logger.debug('Writing family %i' % i)
                    name = family.template.name + '_detections.csv'
                    name_to_write = join(temp_dir, name)
                    _write_family(family=family, filename=name_to_write)
                with tarfile.open(filename, "w:gz") as tar:
                    tar.add(temp_dir, arcname=os.path.basename(filename))
        else:
            Logger.warning('Writing only the catalog component, metadata '
                           'will not be preserved')
            self.get_catalog().write(filename=filename, format=format)
        return self
Beispiel #7
0
    def rethreshold(self,
                    new_threshold,
                    new_threshold_type='MAD',
                    abs_values=False):
        """
        Remove detections from the Party that are below a new threshold.

        .. Note:: threshold can only be set higher.

        .. Warning::
            Works in place on Party.

        :type new_threshold: float
        :param new_threshold: New threshold level
        :type new_threshold_type: str
        :param new_threshold_type: Either 'MAD', 'absolute' or 'av_chan_corr'
        :type abs_values: bool
        :param abs_values:
            Whether to compare the absolute value of the detection-value.

        .. rubric:: Examples

        Using the MAD threshold on detections made using the MAD threshold:

        >>> party = Party().read()
        >>> len(party)
        4
        >>> party = party.rethreshold(10.0)
        >>> len(party)
        4
        >>> # Note that all detections are self detections


        Using the absolute thresholding method on the same Party:

        >>> party = Party().read().rethreshold(5.9, 'absolute')
        >>> len(party)
        1


        Using the av_chan_corr method on the same Party:

        >>> party = Party().read().rethreshold(0.9, 'av_chan_corr')
        >>> len(party)
        4
        """
        for family in self.families:
            rethresh_detections = []
            for d in family.detections:
                if new_threshold_type == 'MAD' and d.threshold_type == 'MAD':
                    new_thresh = (d.threshold /
                                  d.threshold_input) * new_threshold
                elif new_threshold_type == 'MAD' and d.threshold_type != 'MAD':
                    raise MatchFilterError('Cannot recalculate MAD level, '
                                           'use another threshold type')
                elif new_threshold_type == 'absolute':
                    new_thresh = new_threshold
                elif new_threshold_type == 'av_chan_corr':
                    new_thresh = new_threshold * d.no_chans
                else:
                    raise MatchFilterError(
                        'new_threshold_type %s is not recognised' %
                        str(new_threshold_type))
                rethresh = False
                if abs_values:
                    if abs(float(d.detect_val)) >= new_thresh:
                        rethresh = True
                else:
                    if float(d.detect_val) >= new_thresh:
                        rethresh = True
                if rethresh:
                    d.threshold = new_thresh
                    d.threshold_input = new_threshold
                    d.threshold_type = new_threshold_type
                    rethresh_detections.append(d)
            family.detections = rethresh_detections
        return self
Beispiel #8
0
    def decluster(self, trig_int, timing='detect', metric='avg_cor'):
        """
        De-cluster a Party of detections by enforcing a detection separation.

        De-clustering occurs between events detected by different (or the same)
        templates. If multiple detections occur within trig_int then the
        preferred detection will be determined by the metric argument. This
        can be either the average single-station correlation coefficient which
        is calculated as Detection.detect_val / Detection.no_chans, or the
        raw cross channel correlation sum which is simply Detection.detect_val.

        :type trig_int: float
        :param trig_int: Minimum detection separation in seconds.
        :type metric: str
        :param metric: What metric to sort peaks by. Either 'avg_cor' which
            takes the single station average correlation or 'cor_sum' which
            takes the total correlation sum across all channels.
        :type timing: str
        :param timing:
            Either 'detect' or 'origin' to decluster based on either the
            detection time or the origin time.

        .. Warning::
            Works in place on object, if you need to keep the original safe
            then run this on a copy of the object!

        .. rubric:: Example

        >>> party = Party().read()
        >>> len(party)
        4
        >>> declustered = party.decluster(20)
        >>> len(party)
        3
        """
        if self.__len__() == 0:
            return self
        all_detections = []
        for fam in self.families:
            all_detections.extend(fam.detections)
        if timing == 'detect':
            if metric == 'avg_cor':
                detect_info = [(d.detect_time, d.detect_val / d.no_chans)
                               for d in all_detections]
            elif metric == 'cor_sum':
                detect_info = [(d.detect_time, d.detect_val)
                               for d in all_detections]
            else:
                raise MatchFilterError('metric is not cor_sum or avg_cor')
        elif timing == 'origin':
            if metric == 'avg_cor':
                detect_info = [(_get_origin(d.event).time,
                                d.detect_val / d.no_chans)
                               for d in all_detections]
            elif metric == 'cor_sum':
                detect_info = [(_get_origin(d.event).time, d.detect_val)
                               for d in all_detections]
            else:
                raise MatchFilterError('metric is not cor_sum or avg_cor')
        else:
            raise MatchFilterError('timing is not detect or origin')
        min_det = sorted([d[0] for d in detect_info])[0]
        detect_vals = np.array([d[1] for d in detect_info], dtype=np.float32)
        detect_times = np.array([
            _total_microsec(d[0].datetime, min_det.datetime)
            for d in detect_info
        ])
        # Trig_int must be converted from seconds to micro-seconds
        peaks_out = decluster(peaks=detect_vals,
                              index=detect_times,
                              trig_int=trig_int * 10**6)
        # Need to match both the time and the detection value
        declustered_detections = []
        for ind in peaks_out:
            matching_time_indices = np.where(detect_times == ind[-1])[0]
            matches = matching_time_indices[np.where(
                detect_vals[matching_time_indices] == ind[0])[0][0]]
            declustered_detections.append(all_detections[matches])
        # Convert this list into families
        template_names = list(
            set([d.template_name for d in declustered_detections]))
        new_families = []
        for template_name in template_names:
            template = [
                fam.template for fam in self.families
                if fam.template.name == template_name
            ][0]
            new_families.append(
                Family(template=template,
                       detections=[
                           d for d in declustered_detections
                           if d.template_name == template_name
                       ]))
        self.families = new_families
        return self