def write(self, filename, format='tar', overwrite=False): """ Write Family out, select output format. :type format: str :param format: One of either 'tar', 'csv', or any obspy supported catalog output. :type filename: str :param filename: Path to write file to. :type overwrite: bool :param overwrite: Specifies whether detection-files are overwritten if they exist already. By default, no files are overwritten. .. Note:: csv format will write out detection objects, all other outputs will write the catalog. These cannot be rebuilt into a Family object. The only format that can be read back into Family objects is the 'tar' type. .. Note:: csv format will append detections to filename, all others will overwrite any existing files. .. rubric:: Example >>> from eqcorrscan import Template, Detection >>> from obspy import read >>> family = Family( ... template=Template(name='a', st=read()), detections=[ ... Detection(template_name='a', detect_time=UTCDateTime(0) + 200, ... no_chans=8, detect_val=4.2, threshold=1.2, ... typeofdet='corr', threshold_type='MAD', ... threshold_input=8.0), ... Detection(template_name='a', detect_time=UTCDateTime(0), ... no_chans=8, detect_val=4.5, threshold=1.2, ... typeofdet='corr', threshold_type='MAD', ... threshold_input=8.0), ... Detection(template_name='a', detect_time=UTCDateTime(0) + 10, ... no_chans=8, detect_val=4.5, threshold=1.2, ... typeofdet='corr', threshold_type='MAD', ... threshold_input=8.0)]) >>> family.write('test_family') """ from eqcorrscan.core.match_filter.party import Party Party(families=[self]).write(filename=filename, format=format, overwrite=overwrite) return
def client_detect(self, client, starttime, endtime, threshold, threshold_type, trig_int, plot=False, plotdir=None, min_gap=None, daylong=False, parallel_process=True, xcorr_func=None, concurrency=None, cores=None, ignore_length=False, ignore_bad_data=False, group_size=None, return_stream=False, full_peaks=False, save_progress=False, process_cores=None, retries=3, **kwargs): """ Detect using a Tribe of templates within a continuous stream. :type client: `obspy.clients.*.Client` :param client: Any obspy client with a dataselect service. :type starttime: :class:`obspy.core.UTCDateTime` :param starttime: Start-time for detections. :type endtime: :class:`obspy.core.UTCDateTime` :param endtime: End-time for detections :type threshold: float :param threshold: Threshold level, if using `threshold_type='MAD'` then this will be the multiple of the median absolute deviation. :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD, absolute or av_chan_corr. See Note on thresholding below. :type trig_int: float :param trig_int: Minimum gap between detections from one template in seconds. If multiple detections occur within trig_int of one-another, the one with the highest cross-correlation sum will be selected. :type plot: bool :param plot: Turn plotting on or off. :type plotdir: str :param plotdir: The path to save plots to. If `plotdir=None` (default) then the figure will be shown on screen. :type min_gap: float :param min_gap: Minimum gap allowed in data - use to remove traces with known issues :type daylong: bool :param daylong: Set to True to use the :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which preforms additional checks and is more efficient for day-long data over other methods. :type parallel_process: bool :param parallel_process: :type xcorr_func: str or callable :param xcorr_func: A str of a registered xcorr function or a callable for implementing a custom xcorr function. For more information see: :func:`eqcorrscan.utils.correlate.register_array_xcorr` :type concurrency: str :param concurrency: The type of concurrency to apply to the xcorr function. Options are 'multithread', 'multiprocess', 'concurrent'. For more details see :func:`eqcorrscan.utils.correlate.get_stream_xcorr` :type cores: int :param cores: Number of workers for processing and detection. :type ignore_length: bool :param ignore_length: If using daylong=True, then dayproc will try check that the data are there for at least 80% of the day, if you don't want this check (which will raise an error if too much data are missing) then set ignore_length=True. This is not recommended! :type ignore_bad_data: bool :param ignore_bad_data: If False (default), errors will be raised if data are excessively gappy or are mostly zeros. If True then no error will be raised, but an empty trace will be returned (and not used in detection). :type group_size: int :param group_size: Maximum number of templates to run at once, use to reduce memory consumption, if unset will use all templates. :type full_peaks: bool :param full_peaks: See `eqcorrscan.utils.findpeaks.find_peaks2_short` :type save_progress: bool :param save_progress: Whether to save the resulting party at every data step or not. Useful for long-running processes. :type process_cores: int :param process_cores: Number of processes to use for pre-processing (if different to `cores`). :type return_stream: bool :param return_stream: Whether to also output the stream downloaded, useful if you plan to use the stream for something else, e.g. lag_calc. :type retries: int :param retries: Number of attempts allowed for downloading - allows for transient server issues. :return: :class:`eqcorrscan.core.match_filter.Party` of Families of detections. .. Note:: When using the "fftw" correlation backend the length of the fft can be set. See :mod:`eqcorrscan.utils.correlate` for more info. .. Note:: Ensures that data overlap between loops, which will lead to no missed detections at data start-stop points (see note for :meth:`eqcorrscan.core.match_filter.Tribe.detect` method). This will result in end-time not being strictly honoured, so detections may occur after the end-time set. This is because data must be run in the correct process-length. .. warning:: Plotting within the match-filter routine uses the Agg backend with interactive plotting turned off. This is because the function is designed to work in bulk. If you wish to turn interactive plotting on you must import matplotlib in your script first, when you then import match_filter you will get the warning that this call to matplotlib has no effect, which will mean that match_filter has not changed the plotting behaviour. .. note:: **Thresholding:** **MAD** threshold is calculated as the: .. math:: threshold {\\times} (median(abs(cccsum))) where :math:`cccsum` is the cross-correlation sum for a given template. **absolute** threshold is a true absolute threshold based on the cccsum value. **av_chan_corr** is based on the mean values of single-channel cross-correlations assuming all data are present as required for the template, e.g: .. math:: av\_chan\_corr\_thresh=threshold \\times (cccsum / len(template)) where :math:`template` is a single template from the input and the length is the number of channels within this template. """ from obspy.clients.fdsn.client import FDSNException # This uses get_waveforms_bulk to get data - not all client types have # this, so we check and monkey patch here. if not hasattr(client, "get_waveforms_bulk"): assert hasattr(client, "get_waveforms"), ( f"client {client} must have at least a get_waveforms method") Logger.info(f"Client {client} does not have a get_waveforms_bulk " "method, monkey-patching this") client = get_waveform_client(client) party = Party() buff = 300 # Apply a buffer, often data downloaded is not the correct length data_length = max([t.process_length for t in self.templates]) pad = 0 for template in self.templates: max_delay = (template.st.sort(['starttime'])[-1].stats.starttime - template.st.sort(['starttime'])[0].stats.starttime) if max_delay > pad: pad = max_delay download_groups = int(endtime - starttime) / data_length template_channel_ids = [] for template in self.templates: for tr in template.st: if tr.stats.network not in [None, '']: chan_id = (tr.stats.network,) else: chan_id = ('*',) if tr.stats.station not in [None, '']: chan_id += (tr.stats.station,) else: chan_id += ('*',) if tr.stats.location not in [None, '']: chan_id += (tr.stats.location,) else: chan_id += ('*',) if tr.stats.channel not in [None, '']: if len(tr.stats.channel) == 2: chan_id += (tr.stats.channel[0] + '?' + tr.stats.channel[-1],) else: chan_id += (tr.stats.channel,) else: chan_id += ('*',) template_channel_ids.append(chan_id) template_channel_ids = list(set(template_channel_ids)) if return_stream: stream = Stream() if int(download_groups) < download_groups: download_groups = int(download_groups) + 1 else: download_groups = int(download_groups) for i in range(download_groups): bulk_info = [] for chan_id in template_channel_ids: bulk_info.append(( chan_id[0], chan_id[1], chan_id[2], chan_id[3], starttime + (i * data_length) - (pad + buff), starttime + ((i + 1) * data_length) + (pad + buff))) for retry_attempt in range(retries): try: Logger.info("Downloading data") st = client.get_waveforms_bulk(bulk_info) Logger.info( "Downloaded data for {0} traces".format(len(st))) break except FDSNException as e: if "Split the request in smaller" in " ".join(e.args): Logger.warning( "Datacentre does not support large requests: " "splitting request into smaller chunks") st = Stream() for _bulk in bulk_info: try: st += client.get_waveforms_bulk([_bulk]) except Exception as e: Logger.error("No data for {0}".format(_bulk)) Logger.error(e) continue Logger.info("Downloaded data for {0} traces".format( len(st))) break except Exception as e: Logger.error(e) continue else: raise MatchFilterError( "Could not download data after {0} attempts".format( retries)) # Get gaps and remove traces as necessary if min_gap: gaps = st.get_gaps(min_gap=min_gap) if len(gaps) > 0: Logger.warning("Large gaps in downloaded data") st.merge() gappy_channels = list( set([(gap[0], gap[1], gap[2], gap[3]) for gap in gaps])) _st = Stream() for tr in st: tr_stats = (tr.stats.network, tr.stats.station, tr.stats.location, tr.stats.channel) if tr_stats in gappy_channels: Logger.warning( "Removing gappy channel: {0}".format(tr)) else: _st += tr st = _st st.split() st.detrend("simple").merge() st.trim(starttime=starttime + (i * data_length) - pad, endtime=starttime + ((i + 1) * data_length) + pad) for tr in st: if not _check_daylong(tr): st.remove(tr) Logger.warning( "{0} contains more zeros than non-zero, " "removed".format(tr.id)) for tr in st: if tr.stats.endtime - tr.stats.starttime < \ 0.8 * data_length: st.remove(tr) Logger.warning( "{0} is less than 80% of the required length" ", removed".format(tr.id)) if return_stream: stream += st try: party += self.detect( stream=st, threshold=threshold, threshold_type=threshold_type, trig_int=trig_int, plot=plot, plotdir=plotdir, daylong=daylong, parallel_process=parallel_process, xcorr_func=xcorr_func, concurrency=concurrency, cores=cores, ignore_length=ignore_length, ignore_bad_data=ignore_bad_data, group_size=group_size, overlap=None, full_peaks=full_peaks, process_cores=process_cores, **kwargs) if save_progress: party.write("eqcorrscan_temporary_party") except Exception as e: Logger.critical( 'Error, routine incomplete, returning incomplete Party') Logger.error('Error: {0}'.format(e)) if return_stream: return party, stream else: return party for family in party: if family is not None: family.detections = family._uniq().detections if return_stream: return party, stream else: return party
def detect(self, stream, threshold, threshold_type, trig_int, plot=False, plotdir=None, daylong=False, parallel_process=True, xcorr_func=None, concurrency=None, cores=None, ignore_length=False, ignore_bad_data=False, group_size=None, overlap="calculate", full_peaks=False, save_progress=False, process_cores=None, **kwargs): """ Detect using a Tribe of templates within a continuous stream. :type stream: `obspy.core.stream.Stream` :param stream: Continuous data to detect within using the Template. :type threshold: float :param threshold: Threshold level, if using `threshold_type='MAD'` then this will be the multiple of the median absolute deviation. :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD, absolute or av_chan_corr. See Note on thresholding below. :type trig_int: float :param trig_int: Minimum gap between detections from one template in seconds. If multiple detections occur within trig_int of one-another, the one with the highest cross-correlation sum will be selected. :type plot: bool :param plot: Turn plotting on or off. :type plotdir: str :param plotdir: The path to save plots to. If `plotdir=None` (default) then the figure will be shown on screen. :type daylong: bool :param daylong: Set to True to use the :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which preforms additional checks and is more efficient for day-long data over other methods. :type parallel_process: bool :param parallel_process: :type xcorr_func: str or callable :param xcorr_func: A str of a registered xcorr function or a callable for implementing a custom xcorr function. For more information see: :func:`eqcorrscan.utils.correlate.register_array_xcorr` :type concurrency: str :param concurrency: The type of concurrency to apply to the xcorr function. Options are 'multithread', 'multiprocess', 'concurrent'. For more details see :func:`eqcorrscan.utils.correlate.get_stream_xcorr` :type cores: int :param cores: Number of workers for procesisng and detection. :type ignore_length: bool :param ignore_length: If using daylong=True, then dayproc will try check that the data are there for at least 80% of the day, if you don't want this check (which will raise an error if too much data are missing) then set ignore_length=True. This is not recommended! :type ignore_bad_data: bool :param ignore_bad_data: If False (default), errors will be raised if data are excessively gappy or are mostly zeros. If True then no error will be raised, but an empty trace will be returned (and not used in detection). :type group_size: int :param group_size: Maximum number of templates to run at once, use to reduce memory consumption, if unset will use all templates. :type overlap: float :param overlap: Either None, "calculate" or a float of number of seconds to overlap detection streams by. This is to counter the effects of the delay-and-stack in calculating cross-correlation sums. Setting overlap = "calculate" will work out the appropriate overlap based on the maximum lags within templates. :type full_peaks: bool :param full_peaks: See `eqcorrscan.utils.findpeak.find_peaks2_short` :type save_progress: bool :param save_progress: Whether to save the resulting party at every data step or not. Useful for long-running processes. :type process_cores: int :param process_cores: Number of processes to use for pre-processing (if different to `cores`). :return: :class:`eqcorrscan.core.match_filter.Party` of Families of detections. .. Note:: When using the "fftw" correlation backend the length of the fft can be set. See :mod:`eqcorrscan.utils.correlate` for more info. .. Note:: `stream` must not be pre-processed. If your data contain gaps you should *NOT* fill those gaps before using this method. The pre-process functions (called within) will fill the gaps internally prior to processing, process the data, then re-fill the gaps with zeros to ensure correlations are not incorrectly calculated within gaps. If your data have gaps you should pass a merged stream without the `fill_value` argument (e.g.: `stream = stream.merge()`). .. note:: **Data overlap:** Internally this routine shifts and trims the data according to the offsets in the template (e.g. if trace 2 starts 2 seconds after trace 1 in the template then the continuous data will be shifted by 2 seconds to align peak correlations prior to summing). Because of this, detections at the start and end of continuous data streams **may be missed**. The maximum time-period that might be missing detections is the maximum offset in the template. To work around this, if you are conducting matched-filter detections through long-duration continuous data, we suggest using some overlap (a few seconds, on the order of the maximum offset in the templates) in the continuous data. You will then need to post-process the detections (which should be done anyway to remove duplicates). See below note for how `overlap` argument affects data internally if `stream` is longer than the processing length. .. Note:: If `stream` is longer than processing length, this routine will ensure that data overlap between loops, which will lead to no missed detections at data start-stop points (see above note). This will result in end-time not being strictly honoured, so detections may occur after the end-time set. This is because data must be run in the correct process-length. .. note:: **Thresholding:** **MAD** threshold is calculated as the: .. math:: threshold {\\times} (median(abs(cccsum))) where :math:`cccsum` is the cross-correlation sum for a given template. **absolute** threshold is a true absolute threshold based on the cccsum value. **av_chan_corr** is based on the mean values of single-channel cross-correlations assuming all data are present as required for the template, e.g: .. math:: av\_chan\_corr\_thresh=threshold \\times (cccsum / len(template)) where :math:`template` is a single template from the input and the length is the number of channels within this template. """ party = Party() template_groups = group_templates(self.templates) # now we can compute the detections for each group for group in template_groups: group_party = _group_detect( templates=group, stream=stream.copy(), threshold=threshold, threshold_type=threshold_type, trig_int=trig_int, plot=plot, group_size=group_size, pre_processed=False, daylong=daylong, parallel_process=parallel_process, xcorr_func=xcorr_func, concurrency=concurrency, cores=cores, ignore_length=ignore_length, overlap=overlap, plotdir=plotdir, full_peaks=full_peaks, process_cores=process_cores, ignore_bad_data=ignore_bad_data, arg_check=False, **kwargs) party += group_party if save_progress: party.write("eqcorrscan_temporary_party") if len(party) > 0: for family in party: if family is not None: family.detections = family._uniq().detections return party
def _group_detect(templates, stream, threshold, threshold_type, trig_int, plot=False, plotdir=None, group_size=None, pre_processed=False, daylong=False, parallel_process=True, xcorr_func=None, concurrency=None, cores=None, ignore_length=False, ignore_bad_data=False, overlap="calculate", full_peaks=False, process_cores=None, **kwargs): """ Pre-process and compute detections for a group of templates. Will process the stream object, so if running in a loop, you will want to copy the stream before passing it to this function. :type templates: list :param templates: List of :class:`eqcorrscan.core.match_filter.Template` :type stream: `obspy.core.stream.Stream` :param stream: Continuous data to detect within using the Template. :type threshold: float :param threshold: Threshold level, if using `threshold_type='MAD'` then this will be the multiple of the median absolute deviation. :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD, absolute or av_chan_corr. See Note on thresholding below. :type trig_int: float :param trig_int: Minimum gap between detections from one template in seconds. If multiple detections occur within trig_int of one-another, the one with the highest cross-correlation sum will be selected. :type plot: bool :param plot: Turn plotting on or off. :type plotdir: str :param plotdir: The path to save plots to. If `plotdir=None` (default) then the figure will be shown on screen. :type group_size: int :param group_size: Maximum number of templates to run at once, use to reduce memory consumption, if unset will use all templates. :type pre_processed: bool :param pre_processed: Set to True if `stream` has already undergone processing, in this case eqcorrscan will only check that the sampling rate is correct. Defaults to False, which will use the :mod:`eqcorrscan.utils.pre_processing` routines to resample and filter the continuous data. :type daylong: bool :param daylong: Set to True to use the :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which preforms additional checks and is more efficient for day-long data over other methods. :type parallel_process: bool :param parallel_process: :type xcorr_func: str or callable :param xcorr_func: A str of a registered xcorr function or a callable for implementing a custom xcorr function. For more details see: :func:`eqcorrscan.utils.correlate.register_array_xcorr` :type concurrency: str :param concurrency: The type of concurrency to apply to the xcorr function. Options are 'multithread', 'multiprocess', 'concurrent'. For more details see :func:`eqcorrscan.utils.correlate.get_stream_xcorr` :type cores: int :param cores: Number of workers for processing and correlation. :type ignore_length: bool :param ignore_length: If using daylong=True, then dayproc will try check that the data are there for at least 80% of the day, if you don't want this check (which will raise an error if too much data are missing) then set ignore_length=True. This is not recommended! :type overlap: float :param overlap: Either None, "calculate" or a float of number of seconds to overlap detection streams by. This is to counter the effects of the delay-and-stack in calculating cross-correlation sums. Setting overlap = "calculate" will work out the appropriate overlap based on the maximum lags within templates. :type full_peaks: bool :param full_peaks: See `eqcorrscan.utils.findpeaks.find_peaks_compiled` :type process_cores: int :param process_cores: Number of processes to use for pre-processing (if different to `cores`). :return: :class:`eqcorrscan.core.match_filter.Party` of families of detections. """ from eqcorrscan.core.match_filter.party import Party from eqcorrscan.core.match_filter.family import Family master = templates[0] # Check that they are all processed the same. lap = 0.0 for template in templates: starts = [t.stats.starttime for t in template.st.sort(['starttime'])] if starts[-1] - starts[0] > lap: lap = starts[-1] - starts[0] if not template.same_processing(master): raise MatchFilterError('Templates must be processed the same.') if overlap is None: overlap = 0.0 elif not isinstance(overlap, float) and str(overlap) == str("calculate"): overlap = lap elif not isinstance(overlap, float): raise NotImplementedError("%s is not a recognised overlap type" % str(overlap)) if overlap >= master.process_length: Logger.warning(f"Overlap of {overlap} s is greater than process " f"length ({master.process_length} s), ignoring overlap") overlap = 0 if not pre_processed: if process_cores is None: process_cores = cores streams = _group_process(template_group=templates, parallel=parallel_process, cores=process_cores, stream=stream, daylong=daylong, ignore_length=ignore_length, ignore_bad_data=ignore_bad_data, overlap=overlap) for _st in streams: Logger.debug(f"Processed stream:\n{_st.__str__(extended=True)}") else: Logger.warning('Not performing any processing on the continuous data.') streams = [stream] detections = [] party = Party() if group_size is not None: n_groups = int(len(templates) / group_size) if n_groups * group_size < len(templates): n_groups += 1 else: n_groups = 1 for st_chunk in streams: chunk_start, chunk_end = (min(tr.stats.starttime for tr in st_chunk), max(tr.stats.endtime for tr in st_chunk)) Logger.info( f'Computing detections between {chunk_start} and {chunk_end}') st_chunk.trim(starttime=chunk_start, endtime=chunk_end) for tr in st_chunk: if len(tr) > len(st_chunk[0]): tr.data = tr.data[0:len(st_chunk[0])] for i in range(n_groups): if group_size is not None: end_group = (i + 1) * group_size start_group = i * group_size if i == n_groups: end_group = len(templates) else: end_group = len(templates) start_group = 0 template_group = [t for t in templates[start_group:end_group]] detections += match_filter( template_names=[t.name for t in template_group], template_list=[t.st for t in template_group], st=st_chunk, xcorr_func=xcorr_func, concurrency=concurrency, threshold=threshold, threshold_type=threshold_type, trig_int=trig_int, plot=plot, plotdir=plotdir, cores=cores, full_peaks=full_peaks, peak_cores=process_cores, **kwargs) for template in template_group: family = Family(template=template, detections=[]) for detection in detections: if detection.template_name == template.name: for pick in detection.event.picks: pick.time += template.prepick for origin in detection.event.origins: origin.time += template.prepick family.detections.append(detection) party += family return party