def test_daylong_checks(self): """Test that the data are day-long.""" self.assertTrue(_check_daylong(self.st[0])) not_daylong = self.st[0].copy().trim(self.st[0].stats.starttime, self.st[0].stats.starttime + 3600) not_daylong.data = np.append( not_daylong.data, np.zeros(3602 * int(self.st[0].stats.sampling_rate))) self.assertFalse(_check_daylong(not_daylong))
def client_detect(self, client, starttime, endtime, threshold, threshold_type, trig_int, plot=False, plotdir=None, min_gap=None, daylong=False, parallel_process=True, xcorr_func=None, concurrency=None, cores=None, ignore_length=False, ignore_bad_data=False, group_size=None, return_stream=False, full_peaks=False, save_progress=False, process_cores=None, retries=3, **kwargs): """ Detect using a Tribe of templates within a continuous stream. :type client: `obspy.clients.*.Client` :param client: Any obspy client with a dataselect service. :type starttime: :class:`obspy.core.UTCDateTime` :param starttime: Start-time for detections. :type endtime: :class:`obspy.core.UTCDateTime` :param endtime: End-time for detections :type threshold: float :param threshold: Threshold level, if using `threshold_type='MAD'` then this will be the multiple of the median absolute deviation. :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD, absolute or av_chan_corr. See Note on thresholding below. :type trig_int: float :param trig_int: Minimum gap between detections from one template in seconds. If multiple detections occur within trig_int of one-another, the one with the highest cross-correlation sum will be selected. :type plot: bool :param plot: Turn plotting on or off. :type plotdir: str :param plotdir: The path to save plots to. If `plotdir=None` (default) then the figure will be shown on screen. :type min_gap: float :param min_gap: Minimum gap allowed in data - use to remove traces with known issues :type daylong: bool :param daylong: Set to True to use the :func:`eqcorrscan.utils.pre_processing.dayproc` routine, which preforms additional checks and is more efficient for day-long data over other methods. :type parallel_process: bool :param parallel_process: :type xcorr_func: str or callable :param xcorr_func: A str of a registered xcorr function or a callable for implementing a custom xcorr function. For more information see: :func:`eqcorrscan.utils.correlate.register_array_xcorr` :type concurrency: str :param concurrency: The type of concurrency to apply to the xcorr function. Options are 'multithread', 'multiprocess', 'concurrent'. For more details see :func:`eqcorrscan.utils.correlate.get_stream_xcorr` :type cores: int :param cores: Number of workers for processing and detection. :type ignore_length: bool :param ignore_length: If using daylong=True, then dayproc will try check that the data are there for at least 80% of the day, if you don't want this check (which will raise an error if too much data are missing) then set ignore_length=True. This is not recommended! :type ignore_bad_data: bool :param ignore_bad_data: If False (default), errors will be raised if data are excessively gappy or are mostly zeros. If True then no error will be raised, but an empty trace will be returned (and not used in detection). :type group_size: int :param group_size: Maximum number of templates to run at once, use to reduce memory consumption, if unset will use all templates. :type full_peaks: bool :param full_peaks: See `eqcorrscan.utils.findpeaks.find_peaks2_short` :type save_progress: bool :param save_progress: Whether to save the resulting party at every data step or not. Useful for long-running processes. :type process_cores: int :param process_cores: Number of processes to use for pre-processing (if different to `cores`). :type return_stream: bool :param return_stream: Whether to also output the stream downloaded, useful if you plan to use the stream for something else, e.g. lag_calc. :type retries: int :param retries: Number of attempts allowed for downloading - allows for transient server issues. :return: :class:`eqcorrscan.core.match_filter.Party` of Families of detections. .. Note:: When using the "fftw" correlation backend the length of the fft can be set. See :mod:`eqcorrscan.utils.correlate` for more info. .. Note:: Ensures that data overlap between loops, which will lead to no missed detections at data start-stop points (see note for :meth:`eqcorrscan.core.match_filter.Tribe.detect` method). This will result in end-time not being strictly honoured, so detections may occur after the end-time set. This is because data must be run in the correct process-length. .. warning:: Plotting within the match-filter routine uses the Agg backend with interactive plotting turned off. This is because the function is designed to work in bulk. If you wish to turn interactive plotting on you must import matplotlib in your script first, when you then import match_filter you will get the warning that this call to matplotlib has no effect, which will mean that match_filter has not changed the plotting behaviour. .. note:: **Thresholding:** **MAD** threshold is calculated as the: .. math:: threshold {\\times} (median(abs(cccsum))) where :math:`cccsum` is the cross-correlation sum for a given template. **absolute** threshold is a true absolute threshold based on the cccsum value. **av_chan_corr** is based on the mean values of single-channel cross-correlations assuming all data are present as required for the template, e.g: .. math:: av\_chan\_corr\_thresh=threshold \\times (cccsum / len(template)) where :math:`template` is a single template from the input and the length is the number of channels within this template. """ from obspy.clients.fdsn.client import FDSNException # This uses get_waveforms_bulk to get data - not all client types have # this, so we check and monkey patch here. if not hasattr(client, "get_waveforms_bulk"): assert hasattr(client, "get_waveforms"), ( f"client {client} must have at least a get_waveforms method") Logger.info(f"Client {client} does not have a get_waveforms_bulk " "method, monkey-patching this") client = get_waveform_client(client) party = Party() buff = 300 # Apply a buffer, often data downloaded is not the correct length data_length = max([t.process_length for t in self.templates]) pad = 0 for template in self.templates: max_delay = (template.st.sort(['starttime'])[-1].stats.starttime - template.st.sort(['starttime'])[0].stats.starttime) if max_delay > pad: pad = max_delay download_groups = int(endtime - starttime) / data_length template_channel_ids = [] for template in self.templates: for tr in template.st: if tr.stats.network not in [None, '']: chan_id = (tr.stats.network,) else: chan_id = ('*',) if tr.stats.station not in [None, '']: chan_id += (tr.stats.station,) else: chan_id += ('*',) if tr.stats.location not in [None, '']: chan_id += (tr.stats.location,) else: chan_id += ('*',) if tr.stats.channel not in [None, '']: if len(tr.stats.channel) == 2: chan_id += (tr.stats.channel[0] + '?' + tr.stats.channel[-1],) else: chan_id += (tr.stats.channel,) else: chan_id += ('*',) template_channel_ids.append(chan_id) template_channel_ids = list(set(template_channel_ids)) if return_stream: stream = Stream() if int(download_groups) < download_groups: download_groups = int(download_groups) + 1 else: download_groups = int(download_groups) for i in range(download_groups): bulk_info = [] for chan_id in template_channel_ids: bulk_info.append(( chan_id[0], chan_id[1], chan_id[2], chan_id[3], starttime + (i * data_length) - (pad + buff), starttime + ((i + 1) * data_length) + (pad + buff))) for retry_attempt in range(retries): try: Logger.info("Downloading data") st = client.get_waveforms_bulk(bulk_info) Logger.info( "Downloaded data for {0} traces".format(len(st))) break except FDSNException as e: if "Split the request in smaller" in " ".join(e.args): Logger.warning( "Datacentre does not support large requests: " "splitting request into smaller chunks") st = Stream() for _bulk in bulk_info: try: st += client.get_waveforms_bulk([_bulk]) except Exception as e: Logger.error("No data for {0}".format(_bulk)) Logger.error(e) continue Logger.info("Downloaded data for {0} traces".format( len(st))) break except Exception as e: Logger.error(e) continue else: raise MatchFilterError( "Could not download data after {0} attempts".format( retries)) # Get gaps and remove traces as necessary if min_gap: gaps = st.get_gaps(min_gap=min_gap) if len(gaps) > 0: Logger.warning("Large gaps in downloaded data") st.merge() gappy_channels = list( set([(gap[0], gap[1], gap[2], gap[3]) for gap in gaps])) _st = Stream() for tr in st: tr_stats = (tr.stats.network, tr.stats.station, tr.stats.location, tr.stats.channel) if tr_stats in gappy_channels: Logger.warning( "Removing gappy channel: {0}".format(tr)) else: _st += tr st = _st st.split() st.detrend("simple").merge() st.trim(starttime=starttime + (i * data_length) - pad, endtime=starttime + ((i + 1) * data_length) + pad) for tr in st: if not _check_daylong(tr): st.remove(tr) Logger.warning( "{0} contains more zeros than non-zero, " "removed".format(tr.id)) for tr in st: if tr.stats.endtime - tr.stats.starttime < \ 0.8 * data_length: st.remove(tr) Logger.warning( "{0} is less than 80% of the required length" ", removed".format(tr.id)) if return_stream: stream += st try: party += self.detect( stream=st, threshold=threshold, threshold_type=threshold_type, trig_int=trig_int, plot=plot, plotdir=plotdir, daylong=daylong, parallel_process=parallel_process, xcorr_func=xcorr_func, concurrency=concurrency, cores=cores, ignore_length=ignore_length, ignore_bad_data=ignore_bad_data, group_size=group_size, overlap=None, full_peaks=full_peaks, process_cores=process_cores, **kwargs) if save_progress: party.write("eqcorrscan_temporary_party") except Exception as e: Logger.critical( 'Error, routine incomplete, returning incomplete Party') Logger.error('Error: {0}'.format(e)) if return_stream: return party, stream else: return party for family in party: if family is not None: family.detections = family._uniq().detections if return_stream: return party, stream else: return party
def _download_from_client(client, client_type, catalog, data_pad, process_len, available_stations=[], all_channels=False): """ Internal function to handle downloading from either seishub or fdsn client """ st = Stream() catalog = Catalog(sorted(catalog, key=lambda e: e.origins[0].time)) all_waveform_info = [] for event in catalog: for pick in event.picks: if not pick.waveform_id: Logger.warning( "Pick not associated with waveforms, will not use:" " {0}".format(pick)) continue if all_channels: channel_code = pick.waveform_id.channel_code[0:2] + "?" else: channel_code = pick.waveform_id.channel_code all_waveform_info.append( (pick.waveform_id.network_code, pick.waveform_id.station_code, channel_code, pick.waveform_id.location_code)) starttime = UTCDateTime(catalog[0].origins[0].time - data_pad) endtime = starttime + process_len # Check that endtime is after the last event if not endtime > catalog[-1].origins[0].time + data_pad: raise TemplateGenError('Events do not fit in processing window') all_waveform_info = sorted(list(set(all_waveform_info))) dropped_pick_stations = 0 for waveform_info in all_waveform_info: net, sta, chan, loc = waveform_info if client_type == 'seishub' and sta not in available_stations: Logger.error("Station not found in SeisHub DB") dropped_pick_stations += 1 continue Logger.info('Downloading for start-time: {0} end-time: {1}'.format( starttime, endtime)) Logger.debug('.'.join([net, sta, loc, chan])) query_params = dict(network=net, station=sta, location=loc, channel=chan, starttime=starttime, endtime=endtime) try: st += client.get_waveforms(**query_params) except Exception as e: Logger.error(e) Logger.error( 'Found no data for this station: {0}'.format(query_params)) dropped_pick_stations += 1 if not st and dropped_pick_stations == len(event.picks): raise Exception('No data available, is the server down?') st.merge() # clients download chunks, we need to check that the data are # the desired length final_channels = [] for tr in st: tr.trim(starttime, endtime) if len(tr.data) == (process_len * tr.stats.sampling_rate) + 1: tr.data = tr.data[1:len(tr.data)] if tr.stats.endtime - tr.stats.starttime < 0.8 * process_len: Logger.warning( "Data for {0}.{1} is {2} hours long, which is less than 80 " "percent of the desired length, will not use".format( tr.stats.station, tr.stats.channel, (tr.stats.endtime - tr.stats.starttime) / 3600)) elif not pre_processing._check_daylong(tr): Logger.warning("Data are mostly zeros, removing trace: {0}".format( tr.id)) else: final_channels.append(tr) st.traces = final_channels return st