Esempio n. 1
0
 def test_detection_multiplot(self):
     times = [min([pk.time - 0.05 for pk in self.event.picks])]
     times.append(times[0] + 10)
     fig = detection_multiplot(
         stream=self.st, template=self.template, times=times,
         show=False, return_figure=True)
     return fig
Esempio n. 2
0
def run_tutorial(plot=False):
    """Main function to run the tutorial dataset."""

    from eqcorrscan.utils import pre_processing
    from eqcorrscan.utils import plotting
    from eqcorrscan.core import match_filter
    import glob
    from multiprocessing import cpu_count

    # This import section copes with namespace changes between obspy versions
    import obspy
    if int(obspy.__version__.split('.')[0]) >= 1:
        from obspy.clients.fdsn import Client
    else:
        from obspy.fdsn import Client
    from obspy import UTCDateTime, Stream, read

    # First we want to load our templates
    template_names = glob.glob('tutorial_template_*.ms')

    if len(template_names) == 0:
        raise IOError('Template files not found, have you run the template ' +
                      'creation tutorial?')

    templates = [read(template_name) for template_name in template_names]

    # Work out what stations we have and get the data for them
    stations = []
    for template in templates:
        for tr in template:
            stations.append((tr.stats.station, tr.stats.channel))
    # Get a unique list of stations
    stations = list(set(stations))

    # We will loop through the data chunks at a time, these chunks can be any
    # size, in general we have used 1 day as our standard, but this can be
    # as short as five minutes (for MAD thresholds) or shorter for other
    # threshold metrics. However the chunk size should be the same as your
    # template process_len.

    # You should test different parameters!!!
    start_time = UTCDateTime(2016, 1, 4)
    end_time = UTCDateTime(2016, 1, 5)
    process_len = 3600
    chunks = []
    chunk_start = start_time
    while chunk_start < end_time:
        chunk_end = chunk_start + process_len
        if chunk_end > end_time:
            chunk_end = end_time
        chunks.append((chunk_start, chunk_end))
        chunk_start += process_len

    unique_detections = []
    detections = []

    # Set up a client to access the GeoNet database
    client = Client("GEONET")

    # Note that these chunks do not rely on each other, and could be paralleled
    # on multiple nodes of a distributed cluster, see the SLURM tutorial for
    # an example of this.
    for t1, t2 in chunks:
        # Generate the bulk information to query the GeoNet database
        bulk_info = []
        for station in stations:
            bulk_info.append(('NZ', station[0], '*',
                              station[1][0] + 'H' + station[1][-1], t1, t2))

        # Note this will take a little while.
        print('Downloading seismic data, this may take a while')
        st = client.get_waveforms_bulk(bulk_info)
        # Merge the stream, it will be downloaded in chunks
        st.merge(fill_value='interpolate')

        # Set how many cores we want to parallel across, we will set this to four
        # as this is the number of templates, if your machine has fewer than four
        # cores/CPUs the multiprocessing will wait until there is a free core.
        # Setting this to be higher than the number of templates will have no
        # increase in speed as only detections for each template are computed in
        # parallel.  It may also slow your processing by using more memory than
        # needed, to the extent that swap may be filled.
        if cpu_count() < 4:
            ncores = cpu_count()
        else:
            ncores = 4

        # Pre-process the data to set frequency band and sampling rate
        # Note that this is, and MUST BE the same as the parameters used for the
        # template creation.
        print('Processing the seismic data')
        st = pre_processing.shortproc(st,
                                      lowcut=2.0,
                                      highcut=9.0,
                                      filt_order=4,
                                      samp_rate=20.0,
                                      debug=2,
                                      num_cores=ncores,
                                      starttime=t1,
                                      endtime=t2)
        # Convert from list to stream
        st = Stream(st)

        # Now we can conduct the matched-filter detection
        detections += match_filter.match_filter(template_names=template_names,
                                                template_list=templates,
                                                st=st,
                                                threshold=8.0,
                                                threshold_type='MAD',
                                                trig_int=6.0,
                                                plotvar=plot,
                                                plotdir='.',
                                                cores=ncores,
                                                tempdir=False,
                                                debug=1,
                                                plot_format='jpg')

    # Now lets try and work out how many unique events we have just to compare
    # with the GeoNet catalog of 20 events on this day in this sequence
    for master in detections:
        keep = True
        for slave in detections:
            if not master == slave and\
               abs(master.detect_time - slave.detect_time) <= 1.0:
                # If the events are within 1s of each other then test which
                # was the 'best' match, strongest detection
                if not master.detect_val > slave.detect_val:
                    keep = False
                    break
        if keep:
            unique_detections.append(master)

    print('We made a total of ' + str(len(unique_detections)) + ' detections')

    for detection in unique_detections:
        print('Detection at :' + str(detection.detect_time) +
              ' for template ' + detection.template_name +
              ' with a cross-correlation sum of: ' + str(detection.detect_val))
        # We can plot these too
        if plot:
            stplot = st.copy()
            template = templates[template_names.index(detection.template_name)]
            lags = sorted([tr.stats.starttime for tr in template])
            maxlag = lags[-1] - lags[0]
            stplot.trim(starttime=detection.detect_time - 10,
                        endtime=detection.detect_time + maxlag + 10)
            plotting.detection_multiplot(stplot, template,
                                         [detection.detect_time.datetime])
    return unique_detections
Esempio n. 3
0
def run_tutorial(plot=False,
                 process_len=3600,
                 num_cores=cpu_count(),
                 **kwargs):
    """Main function to run the tutorial dataset."""
    # First we want to load our templates
    template_names = glob.glob('tutorial_template_*.ms')

    if len(template_names) == 0:
        raise IOError('Template files not found, have you run the template ' +
                      'creation tutorial?')

    templates = [read(template_name) for template_name in template_names]

    # Work out what stations we have and get the data for them
    stations = []
    for template in templates:
        for tr in template:
            stations.append((tr.stats.station, tr.stats.channel))
    # Get a unique list of stations
    stations = list(set(stations))

    # We will loop through the data chunks at a time, these chunks can be any
    # size, in general we have used 1 day as our standard, but this can be
    # as short as five minutes (for MAD thresholds) or shorter for other
    # threshold metrics. However the chunk size should be the same as your
    # template process_len.

    # You should test different parameters!!!
    start_time = UTCDateTime(2016, 1, 4)
    end_time = UTCDateTime(2016, 1, 5)
    chunks = []
    chunk_start = start_time
    while chunk_start < end_time:
        chunk_end = chunk_start + process_len
        if chunk_end > end_time:
            chunk_end = end_time
        chunks.append((chunk_start, chunk_end))
        chunk_start += process_len

    unique_detections = []

    # Set up a client to access the GeoNet database
    client = Client("GEONET")

    # Note that these chunks do not rely on each other, and could be paralleled
    # on multiple nodes of a distributed cluster, see the SLURM tutorial for
    # an example of this.
    for t1, t2 in chunks:
        # Generate the bulk information to query the GeoNet database
        bulk_info = []
        for station in stations:
            bulk_info.append(('NZ', station[0], '*',
                              station[1][0] + 'H' + station[1][-1], t1, t2))

        # Note this will take a little while.
        print('Downloading seismic data, this may take a while')
        st = client.get_waveforms_bulk(bulk_info)
        # Merge the stream, it will be downloaded in chunks
        st.merge()

        # Pre-process the data to set frequency band and sampling rate
        # Note that this is, and MUST BE the same as the parameters used for
        # the template creation.
        print('Processing the seismic data')
        st = pre_processing.shortproc(st,
                                      lowcut=2.0,
                                      highcut=9.0,
                                      filt_order=4,
                                      samp_rate=20.0,
                                      num_cores=num_cores,
                                      starttime=t1,
                                      endtime=t2)
        # Convert from list to stream
        st = Stream(st)

        # Now we can conduct the matched-filter detection
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=st,
                                               threshold=8.0,
                                               threshold_type='MAD',
                                               trig_int=6.0,
                                               plotvar=plot,
                                               plotdir='.',
                                               cores=num_cores,
                                               plot_format='png',
                                               **kwargs)

        # Now lets try and work out how many unique events we have just to
        # compare with the GeoNet catalog of 20 events on this day in this
        # sequence
        for master in detections:
            keep = True
            for slave in detections:
                if not master == slave and abs(master.detect_time -
                                               slave.detect_time) <= 1.0:
                    # If the events are within 1s of each other then test which
                    # was the 'best' match, strongest detection
                    if not master.detect_val > slave.detect_val:
                        keep = False
                        print('Removed detection at %s with cccsum %s' %
                              (master.detect_time, master.detect_val))
                        print('Keeping detection at %s with cccsum %s' %
                              (slave.detect_time, slave.detect_val))
                        break
            if keep:
                unique_detections.append(master)
                print('Detection at :' + str(master.detect_time) +
                      ' for template ' + master.template_name +
                      ' with a cross-correlation sum of: ' +
                      str(master.detect_val))
                # We can plot these too
                if plot:
                    stplot = st.copy()
                    template = templates[template_names.index(
                        master.template_name)]
                    lags = sorted([tr.stats.starttime for tr in template])
                    maxlag = lags[-1] - lags[0]
                    stplot.trim(starttime=master.detect_time - 10,
                                endtime=master.detect_time + maxlag + 10)
                    plotting.detection_multiplot(stplot, template,
                                                 [master.detect_time.datetime])
    print('We made a total of ' + str(len(unique_detections)) + ' detections')
    return unique_detections
Esempio n. 4
0
def _prepare_data(detect_data, detections, zipped_templates, delays, shift_len,
                  plot):
    """
    Prepare data for lag_calc - reduce memory here.

    :type detect_data: obspy.core.stream.Stream
    :param detect_data: Stream to extract detection streams from.
    :type detections: list
    :param detections:
        List of :class:`eqcorrscan.core.match_filter.DETECTION` to get
        data for.
    :type zipped_templates: zip
    :param zipped_templates: Zipped list of (template_name, template)
    :type delays: list
    :param delays: List of lists of the delays for each template
    :type shift_len: float
    :param shift_len: Shift length in seconds allowed for picking.
    :type plot: bool
    :param plot:
        Whether to plot the data extracted or not, used for debugging.

    :returns: List of detect_streams to be worked on
    :rtype: list
    """
    detect_streams = []
    for detection in detections:
        # Stream to be saved for new detection
        detect_stream = []
        max_delay = 0
        for tr in detect_data:
            tr_copy = tr.copy()
            # Right now, copying each trace hundreds of times...
            template = [
                t for t in zipped_templates
                if str(t[0]) == str(detection.template_name)
            ]
            if len(template) > 0:
                template = template[0]
            else:
                warnings.warn('No template with name: %s' %
                              detection.template_name)
                for t in zipped_templates:
                    print(t)
                continue
            template = template[1].select(station=tr.stats.station,
                                          channel=tr.stats.channel)
            if template:
                # Save template trace length in seconds
                template_len = len(template[0]) / \
                    template[0].stats.sampling_rate
            else:
                continue
                # If there is no template-data match then skip the rest
                # of the trace loop.
            # Grab the delays for the desired template: [(sta, chan, delay)]
            delay = [
                delay for delay in delays
                if delay[0] == detection.template_name
            ][0][1]
            # Now grab the delay for the desired trace for this template
            delay = [
                d for d in delay
                if d[0] == tr.stats.station and d[1] == tr.stats.channel
            ][0][2]
            if delay > max_delay:
                max_delay = delay
            detect_stream.append(
                tr_copy.trim(
                    starttime=detection.detect_time - shift_len + delay,
                    endtime=detection.detect_time + delay + shift_len +
                    template_len))
            del tr_copy
        for tr in detect_stream:
            if len(tr.data) == 0:
                msg = ('No data in %s.%s for detection at time %s' %
                       (tr.stats.station, tr.stats.channel,
                        detection.detect_time))
                log.debug(msg)
                warnings.warn(msg)
                detect_stream.remove(tr)
            if tr.stats.endtime - tr.stats.starttime < template_len:
                msg = ("Insufficient data for %s.%s will not use." %
                       (tr.stats.station, tr.stats.channel))
                log.debug(msg)
                warnings.warn(msg)
                detect_stream.remove(tr)
        # Check for duplicate traces
        stachans = [(tr.stats.station, tr.stats.channel)
                    for tr in detect_stream]
        c_stachans = Counter(stachans)
        for key in c_stachans.keys():
            if c_stachans[key] > 1:
                msg = ('Multiple channels for %s.%s, likely a data issue' %
                       (key[0], key[1]))
                raise LagCalcError(msg)
        if plot:
            background = detect_data.copy().trim(
                starttime=detection.detect_time - (shift_len + 5),
                endtime=detection.detect_time + shift_len + max_delay + 7)
            for tr in background:
                if len(tr.data) == 0:
                    background.remove(tr)
            detection_multiplot(stream=background,
                                template=Stream(detect_stream),
                                times=[detection.detect_time - shift_len],
                                title='Detection Extracted')
        if not len(detect_stream) == 0:
            # Create tuple of (template name, data stream)
            detect_streams.append(
                (detection.template_name, Stream(detect_stream)))
    return detect_streams
Esempio n. 5
0
def detections_2_cat(detections, template_dict, stream, temp_prepick, max_lag, cc_thresh,
                     extract_pre_pick=3.0, extract_post_pick=7.0, write_wav=False, debug=0):
    r"""Function to create a catalog from a list of detections, adjusting template pick \
    times using cross correlation with data stream at the time of detection.

    :type detections: list of DETECTION objects
    :param detections: Detections which we want to extract and locate.
    :type template_dict: dict
    :param template_dict: Dictionary of template name: template stream for the entire \
        catalog. Template names must be in the format found in the DETECTION objects.
    :type stream: obspy.Stream
    :param stream: stream encompassing time span of the detections. Will be used for pick \
        refinement by cross correlation. Should be fed a stream processed in the same way \
        as the streams in template dict (and in the same way that they were processed \
        during matched filtering). The waveforms will not be processed here.
    :type write_wav: bool or str
    :param write_wav: If false, will not write detection waveforms to miniseed files. \
        Otherwise, specify a directory to write the templates to. Will use name \
        template_name_detection_time.mseed.
    :returns: :class: obspy.Catalog
    """

    from obspy import UTCDateTime, Catalog, Stream
    from obspy.core.event import ResourceIdentifier, Event, Pick, CreationInfo, Comment, WaveformStreamID
    from obspy.signal.cross_correlation import xcorr
    from eqcorrscan.utils import plotting

    #XXX TODO Scripts havent been saving the actual detection objects so we cannot make
    #XXX TODO use of DETECTION.chans. Would be useful.

    # Copy stream out of the way
    st = stream.copy()
    # Create nested dictionary of delays template_name: stachan: delay
    # dict.items() works in both python 2 and 3 but is memory inefficient in 2 as both vars are
    # read into memory as lists
    delays = {}
    for name, temp in template_dict.items():
        sorted_temp = temp.sort(['starttime'])
        stachans = [(tr.stats.station, tr.stats.channel, tr.stats.network)
                    for tr in sorted_temp]
        mintime = sorted_temp[0].stats.starttime
        delays[name] = {(tr.stats.station, tr.stats.channel): tr.stats.starttime - mintime
                        for tr in sorted_temp}
    # Loop over all detections, saving each as a new event in a catalog
    new_cat = Catalog()
    for detection in detections:
        if write_wav:
            new_stream = Stream()
        if hasattr(detection, 'event'):
            new_event = detection.event
        else:
            rid = ResourceIdentifier(id=detection.template_name + '_' +\
                                        detection.detect_time.strftime('%Y%m%dT%H%M%S.%f'),
                                     prefix='smi:local')
            new_event = Event(resource_id=rid)
            cr_i = CreationInfo(author='EQcorrscan',
                                creation_time=UTCDateTime())
            new_event.creation_info = cr_i
            thresh_str = 'threshold=' + str(detection.threshold)
            ccc_str = 'detect_val=' + str(detection.detect_val)
            det_time_str = 'det_time=%s' % str(detection.detect_time)
            if detection.chans:
                used_chans = 'channels used: ' + \
                             ' '.join([str(pair) for pair in detection.chans])
                new_event.comments.append(Comment(text=used_chans))
            new_event.comments.append(Comment(text=thresh_str))
            new_event.comments.append(Comment(text=ccc_str))
            new_event.comments.append(Comment(text=det_time_str))
        template = template_dict[detection.template_name]
        temp_len = template[0].stats.npts * template[0].stats.sampling_rate
        if template.sort(['starttime'])[0].stats.starttime == detection.detect_time:
            print('Template %s detected itself at %s.' % (detection.template_name, str(detection.detect_time)))
            new_event.resource_id = ResourceIdentifier(id=detection.template_name + '_self',
                                                       prefix='smi:local')
        if debug >= 2:
            print('Plotting detection for template: %s' % detection.template_name)
            plt_st = Stream([st.select(station=tr.stats.station,
                                       channel=tr.stats.channel)[0].slice(detection.detect_time-extract_pre_pick,
                                                                          detection.detect_time+extract_post_pick)
                             for tr in template if len(st.select(station=tr.stats.station,
                                                                 channel=tr.stats.channel)) > 0])
            plotting.detection_multiplot(plt_st, template, [detection.detect_time.datetime])
        # Loop over each trace in the template, correcting picks for new event if need be
        for tr in template:
            sta = tr.stats.station
            chan = tr.stats.channel
            if len(st.select(station=sta, channel=chan)) != 0:
                st_tr = st.select(station=sta, channel=chan)[0]
            else:
                print('No stream for %s: %s' % (sta, chan))
                continue
            st_tr_pick = detection.detect_time + delays[detection.template_name][(sta, chan)] + temp_prepick
            i, absval, full_corr = xcorr(tr, st_tr.slice(st_tr_pick - temp_prepick,
                                                            st_tr_pick - temp_prepick + temp_len),
                                            shift_len=max_lag, full_xcorr=True)
            ccval = max(full_corr)
            index = np.argmax(full_corr) - max_lag
            pk_str = 'ccval=' + str(ccval)
            if index == 0 or index == max_lag * 2:
                msg = 'Correlation correction at max_lag. Consider increasing max_lag.'
                warnings.warn(msg)
            if debug >= 3:
                print('Plotting full correlation function')
                print('index: %d' % index)
                print('max_ccval: %.2f' % ccval)
                plt.plot(full_corr)
                plt.show()
                plt.close()
            if ccval > cc_thresh:
                print('Threshold exceeded at %s: %s' % (sta, chan))
                pick_tm = st_tr_pick + (index / tr.stats.sampling_rate)
            else:
                print('Correlation at %s: %s not good enough to correct pick' % (sta, chan))
                pick_tm = st_tr_pick
            if tr.stats.channel[-1] in ['Z']:
                phase_hint = 'P'
            elif tr.stats.channel[-1] in ['N', 'E', '1', '2']:
                phase_hint = 'S'
            wv_id = WaveformStreamID(network_code=tr.stats.network,
                                     station_code=tr.stats.station,
                                     channel_code=tr.stats.channel)
            new_event.picks.append(Pick(time=pick_tm, waveform_id=wv_id, phase_hint=phase_hint,
                                        comments=[Comment(text=pk_str)]))
            if write_wav:
                    new_stream.append(st_tr.slice(starttime=pick_tm - extract_pre_pick,
                                                  endtime=pick_tm + extract_post_pick))
        # Append to new catalog
        new_cat += new_event
        if write_wav:
            filename = '%s%s.mseed' % (write_wav, str(new_event.resource_id))
            print('Writing new stream for detection to %s' % filename)
            new_stream.write(filename, format='MSEED')
    return new_cat
Esempio n. 6
0
    def test_match_filter(self, samp_rate=20.0, debug=0):
        """
        Function to test the capabilities of match_filter and just check that \
        it is working!  Uses synthetic templates and seeded, randomised data.

        :type debug: int
        :param debug: Debug level, higher the number the more output.
        """
        from eqcorrscan.utils import pre_processing
        from eqcorrscan.utils import plotting
        from eqcorrscan.core import match_filter
        from eqcorrscan.utils.synth_seis import generate_synth_data
        from obspy import UTCDateTime
        import string
        # Generate a random dataset
        templates, data, seeds = generate_synth_data(nsta=5, ntemplates=2,
                                                     nseeds=50,
                                                     samp_rate=samp_rate,
                                                     t_length=6.0, max_amp=5.0,
                                                     debug=debug)
        # Notes to the user: If you use more templates you should ensure they
        # are more different, e.g. set the data to have larger moveouts,
        # otherwise similar templates will detect events seeded by another
        # template.
        # Test the pre_processing functions
        data = pre_processing.dayproc(st=data, lowcut=2.0, highcut=8.0,
                                      filt_order=3, samp_rate=samp_rate,
                                      debug=0, starttime=UTCDateTime(0))
        if debug > 0:
            data.plot()
        # Filter the data and the templates
        for template in templates:
            pre_processing.shortproc(st=template, lowcut=2.0, highcut=8.0,
                                     filt_order=3, samp_rate=samp_rate)
            if debug > 0:
                template.plot()
        template_names = list(string.ascii_lowercase)[0:len(templates)]
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=data, threshold=10.0,
                                               threshold_type='MAD',
                                               trig_int=6.0,
                                               plotvar=False,
                                               plotdir='.',
                                               cores=1,
                                               debug=0)
        # Compare the detections to the seeds
        print('This test made ' + str(len(detections)) + ' detections')
        ktrue = 0
        kfalse = 0
        for detection in detections:
            print(detection.template_name)
            i = template_names.index(detection.template_name)
            t_seeds = seeds[i]
            dtime_samples = int((detection.detect_time - UTCDateTime(0)) *
                                samp_rate)
            if dtime_samples in t_seeds['time']:
                j = list(t_seeds['time']).index(dtime_samples)
                print('Detection at SNR of: ' + str(t_seeds['SNR'][j]))
                ktrue += 1
            else:
                min_diff = min(abs(t_seeds['time'] - dtime_samples))
                if min_diff < 10:
                    # If there is a match within ten samples then it is
                    # good enough
                    j = list(abs(t_seeds['time'] -
                                 dtime_samples)).index(min_diff)
                    print('Detection at SNR of: ' + str(t_seeds['SNR'][j]))
                    ktrue += 1
                else:
                    print('Detection at sample: ' + str(dtime_samples) +
                          ' does not match anything in seed times:')
                    kfalse += 1
                print('Minimum difference in samples is: ' + str(min_diff))
        # Plot the detections
        if debug > 3:
            for i, template in enumerate(templates):
                times = [d.detect_time.datetime for d in detections
                         if d.template_name == template_names[i]]
                print(times)
                plotting.detection_multiplot(data, template, times)
        # Set an 'acceptable' ratio of positive to false detections
        print(str(ktrue) + ' true detections and ' + str(kfalse) +
              ' false detections')
        self.assertTrue(kfalse / ktrue < 0.25)
Esempio n. 7
0
def run_tutorial(plot=False):
    """Main function to run the tutorial dataset."""

    from eqcorrscan.utils import pre_processing
    from eqcorrscan.utils import plotting
    from eqcorrscan.core import match_filter
    import glob

    # This import section copes with namespace changes between obspy versions
    import obspy
    if int(obspy.__version__.split('.')[0]) >= 1:
        from obspy.clients.fdsn import Client
    else:
        from obspy.fdsn import Client
    from obspy import UTCDateTime, Stream, read

    # First we want to load our templates
    template_names = glob.glob('tutorial_template_*.ms')

    if len(template_names) == 0:
        raise IOError('Template files not found, have you run the template ' +
                      'creation tutorial?')

    templates = [read(template_name) for template_name in template_names]

    # Work out what stations we have and get the data for them
    stations = []
    for template in templates:
        for tr in template:
            stations.append((tr.stats.station, tr.stats.channel))
    # Get a unique list of stations
    stations = list(set(stations))

    # We are going to look for detections on the day of our template, however, to
    # generalize, we will write a loop through the days between our templates, in
    # this case that is only one day.

    template_days = []
    for template in templates:
        template_days.append(template[0].stats.starttime.date)
    template_days = sorted(template_days)
    kdays = (template_days[-1] - template_days[0]).days + 1

    unique_detections = []

    for i in range(kdays):
        t1 = UTCDateTime(template_days[0]) + (86400 * i)
        t2 = t1 + 86400

        # Generate the bulk information to query the GeoNet database
        bulk_info = []
        for station in stations:
            bulk_info.append(('NZ', station[0], '*',
                              station[1][0] + 'H' + station[1][-1], t1, t2))

        # Set up a client to access the GeoNet database
        client = Client("GEONET")

        # Note this will take a little while.
        print('Downloading seismic data, this may take a while')
        st = client.get_waveforms_bulk(bulk_info)
        # Merge the stream, it will be downloaded in chunks
        st.merge(fill_value='interpolate')

        # Work out what data we actually have to cope with possible lost data
        stations = list(set([tr.stats.station for tr in st]))

        # Set how many cores we want to parallel across, we will set this to four
        # as this is the number of templates, if your machine has fewer than four
        # cores/CPUs the multiprocessing will wait until there is a free core.
        # Setting this to be higher than the number of templates will have no
        # increase in speed as only detections for each template are computed in
        # parallel.  It may also slow your processing by using more memory than
        # needed, to the extent that swap may be filled.
        ncores = 4

        # Pre-process the data to set frequency band and sampling rate
        # Note that this is, and MUST BE the same as the parameters used for the
        # template creation.
        print('Processing the seismic data')
        st = pre_processing.dayproc(st, lowcut=2.0, highcut=9.0,
                                    filt_order=4, samp_rate=20.0,
                                    debug=0, starttime=t1, num_cores=ncores)
        # Convert from list to stream
        st = Stream(st)

        # Now we can conduct the matched-filter detection
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=st, threshold=8.0,
                                               threshold_type='MAD',
                                               trig_int=6.0, plotvar=plot,
                                               plotdir='.', cores=ncores,
                                               tempdir=False, debug=1,
                                               plot_format='jpg')

        # Now lets try and work out how many unique events we have just to compare
        # with the GeoNet catalog of 20 events on this day in this sequence
        for master in detections:
            keep = True
            for slave in detections:
                if not master == slave and\
                   abs(master.detect_time - slave.detect_time) <= 1.0:
                    # If the events are within 1s of each other then test which
                    # was the 'best' match, strongest detection
                    if not master.detect_val > slave.detect_val:
                        keep = False
                        break
            if keep:
                unique_detections.append(master)

    print('We made a total of ' + str(len(unique_detections)) + ' detections')

    for detection in unique_detections:
        print('Detection at :' + str(detection.detect_time) +
              ' for template ' + detection.template_name +
              ' with a cross-correlation sum of: ' +
              str(detection.detect_val))
        # We can plot these too
        if plot:
            stplot = st.copy()
            template = templates[template_names.index(detection.template_name)]
            lags = sorted([tr.stats.starttime for tr in template])
            maxlag = lags[-1] - lags[0]
            stplot.trim(starttime=detection.detect_time - 10,
                        endtime=detection.detect_time + maxlag + 10)
            plotting.detection_multiplot(stplot, template,
                                         [detection.detect_time.datetime])
    return unique_detections
Esempio n. 8
0
raw_dict = {}
for filename in raw_files:
    uri_name = 'smi:org.gfz-potsdam.de/geofon/' +\
               filename.split('/')[-1].split('_')[-1].rstrip('.mseed')
    uri = ResourceIdentifier(uri_name)
    raw_dict[uri] = read(filename)

# Grab some catalog of interest
cat_list = glob('/media/chet/hdd/seismic/NZ/catalogs/qml/corr_groups/*029*')
cat = read_events(
    '/media/chet/hdd/seismic/NZ/catalogs/qml/2015_nlloc_final_run02_group_refined.xml'
)

# Plotting with multi_event_singlechan

# Plot a template over raw data? Not sure this works correctly
rid = cat[0].resource_id
temp_st = template_dict[rid]
raw_st = raw_dict[rid]
raw_st.filter('bandpass', freqmin=1.0, freqmax=20)
times = []
for tr in raw_st:
    temp_tr_time = [
        p.time for p in cat[0].picks
        if p.waveform_id.station_code == tr.stats.station
        and p.waveform_id.channel_code == tr.stats.channel
    ]
    if temp_tr_time:
        times.append(temp_tr_time[0])
plotting.detection_multiplot(raw_st, temp_st, times, plot_mode='single')
Esempio n. 9
0
                                       cores=6)

for detection in detections:
    #detection.write('detections.csv', append=True)
    detection.write('detections.csv')

# plot
# multi_trace_plot(st, corr=True, stack='linstack', size=(7, 12), show=True, title=None)

times = []
for dc in detections:
    for pick in dc.event.picks:
        times.append(pick.time)
template = read('template.ms')
template.plot()
detection_multiplot(st, template, times, streamcolour='k', templatecolour='r')

# f, (ax1, ax2, ax3) = plt.subplots(3, 1, sharex=True, sharey=False)
# ax1.plot(st.select(id='YN.ZAT.00.BZ')[0].data, 'k')
# y1min, y1max = ax1.get_ylim()
# ax2.plot(st.select(id='YN.ZAT.00.BE')[0].data, 'k')
# y2min, y2max = ax2.get_ylim()
# ax3.plot(st.select(id='YN.ZAT.00.BN')[0].data, 'k')
# y3min, y3max = ax3.get_ylim()
#
# for detection in detections:
#     t = detection.detect_time
#     tt = (t- st[0].stats['starttime']) / st[0].stats['delta']
#     ax1.vlines(tt, y1min, y1max, color='r', linewidth=2)
#     ax2.vlines(tt, y2min, y2max, color='r', linewidth=2)
#     ax3.vlines(tt, y3min, y3max, color='r', linewidth=2)
Esempio n. 10
0
    def test_match_filter(self, samp_rate=20.0, debug=0):
        """
        Function to test the capabilities of match_filter and just check that \
        it is working!  Uses synthetic templates and seeded, randomised data.

        :type debug: int
        :param debug: Debug level, higher the number the more output.
        """
        from eqcorrscan.utils import pre_processing
        from eqcorrscan.utils import plotting
        from eqcorrscan.core import match_filter
        from eqcorrscan.utils.synth_seis import generate_synth_data
        from obspy import UTCDateTime
        import string
        # Generate a random dataset
        templates, data, seeds = generate_synth_data(nsta=5,
                                                     ntemplates=2,
                                                     nseeds=50,
                                                     samp_rate=samp_rate,
                                                     t_length=6.0,
                                                     max_amp=5.0,
                                                     max_lag=12.0,
                                                     debug=debug)
        # Notes to the user: If you use more templates you should ensure they
        # are more different, e.g. set the data to have larger moveouts,
        # otherwise similar templates will detect events seeded by another
        # template.
        # Test the pre_processing functions
        data = pre_processing.dayproc(st=data,
                                      lowcut=2.0,
                                      highcut=8.0,
                                      filt_order=3,
                                      samp_rate=samp_rate,
                                      debug=0,
                                      starttime=UTCDateTime(0))
        if debug > 0:
            data.plot()
        # Filter the data and the templates
        for template in templates:
            pre_processing.shortproc(st=template,
                                     lowcut=2.0,
                                     highcut=8.0,
                                     filt_order=3,
                                     samp_rate=samp_rate)
            if debug > 0:
                template.plot()
        template_names = list(string.ascii_lowercase)[0:len(templates)]
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=data,
                                               threshold=10.0,
                                               threshold_type='MAD',
                                               trig_int=6.0,
                                               plotvar=False,
                                               plotdir='.',
                                               cores=1,
                                               debug=0)
        # Compare the detections to the seeds
        print('This test made ' + str(len(detections)) + ' detections')
        ktrue = 0
        kfalse = 0
        for detection in detections:
            print(detection)
            i = template_names.index(detection.template_name)
            t_seeds = seeds[i]
            dtime_samples = int(
                (detection.detect_time - UTCDateTime(0)) * samp_rate)
            if dtime_samples in t_seeds['time']:
                j = list(t_seeds['time']).index(dtime_samples)
                print('Detection at SNR of: ' + str(t_seeds['SNR'][j]))
                ktrue += 1
            else:
                min_diff = min(abs(t_seeds['time'] - dtime_samples))
                if min_diff < 10:
                    # If there is a match within ten samples then it is
                    # good enough
                    j = list(abs(t_seeds['time'] -
                                 dtime_samples)).index(min_diff)
                    print('Detection at SNR of: ' + str(t_seeds['SNR'][j]))
                    ktrue += 1
                else:
                    print('Detection at sample: ' + str(dtime_samples) +
                          ' does not match anything in seed times:')
                    kfalse += 1
                print('Minimum difference in samples is: ' + str(min_diff))
        # Plot the detections
        if debug > 3:
            for i, template in enumerate(templates):
                times = [
                    d.detect_time.datetime for d in detections
                    if d.template_name == template_names[i]
                ]
                print(times)
                plotting.detection_multiplot(data, template, times)
        # Set an 'acceptable' ratio of positive to false detections
        print(
            str(ktrue) + ' true detections and ' + str(kfalse) +
            ' false detections')
        self.assertTrue(kfalse / ktrue < 0.25)
Esempio n. 11
0
                        print('Detection at :' + str(detection.detect_time) +
                              ' for template ' + detection.template_name +
                              ' with a cross-correlation sum of: ' +
                              str(detection.detect_val))
                        stplot2 = std_filter.copy()
                        template = templates[template_names.index(
                            detection.template_name)]
                        lags2 = sorted([tr.stats.starttime for tr in template])
                        maxlag2 = lags2[-1] - lags2[0]
                        starttime = detection.detect_time
                        stplot2.trim(starttime=starttime - 10,
                                     endtime=starttime + maxlag2 + 10)
                        plotting.detection_multiplot(
                            stplot2,
                            template, [detection.detect_time.datetime],
                            size=[24.0, 11.77],
                            save=True,
                            savefile=os.getcwd() +
                            '/Detection_Plots/Detection_' + str(starttime) +
                            '.png')

            #Clear streams to keep memory usage low
            std1.clear()
            std_filter.clear()
            std.clear()
            st.clear()

            #Delete automatically generated template*.npy files
            filelist = glob.glob(os.getcwd() + "/template_*.npy")
            for file in filelist:
                os.remove(file)
Esempio n. 12
0
def _prepare_data(detect_data, detections, template, delays, shift_len, plot):
    """
    Prepare data for lag_calc - reduce memory here.

    :type detect_data: obspy.core.stream.Stream
    :param detect_data: Stream to extract detection streams from.
    :type detections: list
    :param detections:
        List of :class:`eqcorrscan.core.match_filter.Detection` to get
        data for.
    :type template: tuple
    :param template: tuple of (template_name, template)
    :type delays: list
    :param delays:
        Dictionary of delay times in seconds keyed by sta.channel.
    :type shift_len: float
    :param shift_len: Shift length in seconds allowed for picking.
    :type plot: bool
    :param plot:
        Whether to plot the data extracted or not, used for debugging.

    :returns: List of detect_streams to be worked on
    :rtype: list
    """
    detect_streams = []
    for detection in detections:
        if detection.template_name != template[0]:
            continue
        # Stream to be saved for new detection
        detect_stream = []
        max_delay = 0
        for tr in detect_data:
            template_tr = template[1].select(station=tr.stats.station,
                                             channel=tr.stats.channel)
            if len(template_tr) >= 1:
                # Save template trace length in seconds
                template_len = (len(template_tr[0]) /
                                template_tr[0].stats.sampling_rate)
            else:
                continue
                # If there is no template-data match then skip the rest
                # of the trace loop.
            # Grab the delays for the desired template: [(sta, chan, delay)]
            # Now grab the delay for the desired trace for this template
            delay = delays[tr.stats.station + '.' + tr.stats.channel]
            if delay > max_delay:
                max_delay = delay
            detect_stream.append(
                tr.slice(starttime=detection.detect_time - shift_len + delay,
                         endtime=detection.detect_time + delay + shift_len +
                         template_len).copy())
        for tr in detect_stream:
            if len(tr.data) == 0:
                msg = ('No data in %s.%s for detection at time %s' %
                       (tr.stats.station, tr.stats.channel,
                        detection.detect_time))
                warnings.warn(msg)
                detect_stream.remove(tr)
            elif tr.stats.endtime - tr.stats.starttime < (
                    2 * shift_len) + template_len:
                msg = ("Insufficient data for %s.%s will not use." %
                       (tr.stats.station, tr.stats.channel))
                warnings.warn(msg)
                detect_stream.remove(tr)
            elif np.ma.is_masked(tr.data):
                msg = ("Masked data found for %s.%s, will not use." %
                       (tr.stats.station, tr.stats.channel))
                warnings.warn(msg)
                detect_stream.remove(tr)
        # Check for duplicate traces
        stachans = [(tr.stats.station, tr.stats.channel)
                    for tr in detect_stream]
        c_stachans = Counter(stachans)
        for key in c_stachans.keys():
            if c_stachans[key] > 1:
                msg = ('Multiple channels for %s.%s, likely a data issue' %
                       (key[0], key[1]))
                raise LagCalcError(msg)
        if plot:
            background = detect_data.slice(
                starttime=detection.detect_time - (shift_len + 5),
                endtime=detection.detect_time + shift_len + max_delay +
                7).copy()
            for tr in background:
                if len(tr.data) == 0:
                    background.remove(tr)
            detection_multiplot(stream=background,
                                template=Stream(detect_stream),
                                times=[detection.detect_time - shift_len],
                                title='Detection Extracted')
        if not len(detect_stream) == 0:
            detect_stream = Stream(detect_stream).split()
            # Make sure there are no masks left over.
            # Create tuple of (template name, data stream)
            detect_streams.append(
                (detection.template_name, Stream(detect_stream)))
    return detect_streams
Esempio n. 13
0
    # Now lets try and work out how many unique events we have just to compare
    # with the GeoNet catalog of 20 events on this day in this sequence
    for master in detections:
        keep = True
        for slave in detections:
            if not master == slave and\
               abs(master.detect_time - slave.detect_time) <= 1.0:
                # If the events are within 1s of each other then test which
                # was the 'best' match, strongest detection
                if not master.detect_val > slave.detect_val:
                    keep = False
                    break
        if keep:
            unique_detections.append(master)

print('We made a total of ' + str(len(unique_detections)) + ' detections')

for detection in unique_detections:
    print('Detection at :' + str(detection.detect_time) + ' for template ' +
          detection.template_name + ' with a cross-correlation sum of: ' +
          str(detection.detect_val))
    # We can plot these too
    stplot = st.copy()
    template = templates[template_names.index(detection.template_name)]
    lags = sorted([tr.stats.starttime for tr in template])
    maxlag = lags[-1] - lags[0]
    stplot.trim(starttime=detection.detect_time - 10,
                endtime=detection.detect_time + maxlag + 10)
    plotting.detection_multiplot(stplot, template,
                                 [detection.detect_time.datetime])
Esempio n. 14
0
    for master in detections:
        keep = True
        for slave in detections:
            if not master == slave and abs(master.detect_time - slave.detect_time) <= 1.0:
                # If the events are within 1s of each other then test which
                # was the 'best' match, strongest detection
                if not master.detect_val > slave.detect_val:
                    keep = False
                    break
        if keep:
            unique_detections.append(master)

print("We made a total of " + str(len(unique_detections)) + " detections")

for detection in unique_detections:
    print(
        "Detection at :"
        + str(detection.detect_time)
        + " for template "
        + detection.template_name
        + " with a cross-correlation sum of: "
        + str(detection.detect_val)
    )
    # We can plot these too
    stplot = st.copy()
    template = templates[template_names.index(detection.template_name)]
    lags = sorted([tr.stats.starttime for tr in template])
    maxlag = lags[-1] - lags[0]
    stplot.trim(starttime=detection.detect_time - 10, endtime=detection.detect_time + maxlag + 10)
    plotting.detection_multiplot(stplot, template, [detection.detect_time.datetime])
Esempio n. 15
0
def plot_detection_wavs(family, tribe, wav_dirs, start=None, end=None,
                        save=False, save_dir=None, no_dets=5):
    """
    Wrapper on detection_multiplot() for our dataset
    :param cat: catalog of detections
    :param temp_dir: template waveform dict
    :param det_dir: detection waveform dict
    :return: matplotlib.pyplot.Figure
    """

    # Random range of dates in detections
    rand_inds = np.random.choice(range(len(family)), no_dets, replace=False)
    cat = Catalog(events=[det.event for i, det in enumerate(family)
                          if i in rand_inds])
    # Always plot self_detection
    cat += [det.event for det in family
            if det.detect_val / det.no_chans == 1.0][0]
    cat.events.sort(key=lambda x: x.picks[0].time)
    sub_fam = Family(template=family.template, detections=[det for i, det in
                                                           enumerate(family)
                                                           if i in rand_inds])
    sub_fam.detections.extend([det for det in family
                               if det.detect_val / det.no_chans == 1.0])
    temp = tribe[sub_fam.template.name]
    if start:
        cat_start = datetime.strptime(start, '%d/%m/%Y')
        cat_end = datetime.strptime(end, '%d/%m/%Y')
    else:
        cat_start = cat[0].picks[0].time.date
        cat_end = cat[-1].picks[0].time.date
    for date in date_generator(cat_start, cat_end):
        dto = UTCDateTime(date)
        dets = [det for det in sub_fam if dto
                < det.detect_time < dto + 86400]
        if len(dets) == 0:
            print('No detections on: {!s}'.format(dto))
            continue
        print('Running for date: %s' % str(dto))
        stachans = {}
        for det in dets:
            ev = det.event
            for pk in ev.picks:
                sta = pk.waveform_id.station_code
                chan = pk.waveform_id.channel_code
                if sta not in stachans:
                    stachans[sta] = [chan]
                elif chan not in stachans[sta]:
                    stachans[sta].append(chan)
        # Grab day's wav files
        wav_ds = ['%s%d' % (d, dto.year) for d in wav_dirs]
        stream = grab_day_wavs(wav_ds, dto, stachans)
        print('Preprocessing')
        st1 = pre_processing.dayproc(stream, temp.lowcut, temp.highcut,
                                        temp.filt_order, temp.samp_rate,
                                        starttime=dto, num_cores=3)
        for det in dets:
            det_st = st1.slice(starttime=det.detect_time - 3,
                               endtime=det.detect_time + 7).copy()
            fname = '{}/{}.png'.format(
                save_dir,
                str(det.event.resource_id).split('/')[-1])
            det_t = 'Template {}: {}'.format(temp.name, det.detect_time)
            detection_multiplot(det_st, temp.st, [det.detect_time],
                                save=save, savefile=fname, title=det_t)
            plt.close('all')
    return