Example #1
0
 def test_write_correlations(self):
     """
     Test that the write_correlations function works as it should.
     Hard to test accurately...
     """
     from eqcorrscan.utils.catalog_to_dd import write_correlations
     from eqcorrscan.utils.timer import Timer
     import os
     import glob
     testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                 'test_data', 'REA', 'TEST_')
     wavbase = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                            'test_data', 'WAV', 'TEST_')
     sfile_list = glob.glob(os.path.join(testing_path, '*L.S??????'))
     event_ids = list(range(len(sfile_list)))
     event_list = zip(event_ids, sfile_list)
     with Timer() as t:
         write_correlations(event_list, wavbase, extract_len=2,
                            pre_pick=0.5, shift_len=0.2, lowcut=2.0,
                            highcut=10.0, max_sep=1, min_link=8,
                            coh_thresh=0.0, plotvar=False)
     msg = 'Running ' + str(len(event_list)) + ' events took %s s' % t.secs
     print(msg)
     self.assertTrue(os.path.isfile('dt.cc'))
     os.remove('dt.cc')
     if os.path.isfile('dt.cc2'):
         os.remove('dt.cc2')
Example #2
0
def median_filter(tr, multiplier=10, windowlength=0.5, interp_len=0.05):
    """
    Filter out spikes in data above a multiple of MAD of the data.

    Currently only has the ability to replaces spikes with linear
    interpolation.  In the future we would aim to fill the gap with something
    more appropriate.  Works in-place on data.

    :type tr: obspy.core.trace.Trace
    :param tr: trace to despike
    :type multiplier: float
    :param multiplier:
        median absolute deviation multiplier to find spikes above.
    :type windowlength: float
    :param windowlength: Length of window to look for spikes in in seconds.
    :type interp_len: float
    :param interp_len: Length in seconds to interpolate around spikes.

    :returns: :class:`obspy.core.trace.Trace`

    .. warning::
        Not particularly effective, and may remove earthquake signals, use with
        caution.
    """
    num_cores = cpu_count()
    # Note - might be worth finding spikes in filtered data
    filt = tr.copy()
    filt.detrend('linear')
    try:
        filt.filter('bandpass',
                    freqmin=10.0,
                    freqmax=(tr.stats.sampling_rate / 2) - 1)
    except Exception as e:
        Logger.error("Could not filter due to error: {0}".format(e))
    data = filt.data
    del filt
    # Loop through windows
    _windowlength = int(windowlength * tr.stats.sampling_rate)
    _interp_len = int(interp_len * tr.stats.sampling_rate)
    peaks = []
    with Timer() as t:
        pool = Pool(processes=num_cores)
        results = [
            pool.apply_async(_median_window,
                             args=(data[chunk * _windowlength:(chunk + 1) *
                                        _windowlength], chunk * _windowlength,
                                   multiplier,
                                   tr.stats.starttime + windowlength,
                                   tr.stats.sampling_rate))
            for chunk in range(int(len(data) / _windowlength))
        ]
        pool.close()
        for p in results:
            peaks += p.get()
        pool.join()
        for peak in peaks:
            tr.data = _interp_gap(tr.data, peak[1], _interp_len)
    Logger.debug("Despiking took: %s s" % t.secs)
    return tr
Example #3
0
def test_synth_large():
    print('\tGenerating Synthetic data\n\n')
    templates, data, seeds = synth_seis.generate_synth_data(
        nsta=10, ntemplates=20, nseeds=5, samp_rate=100, t_length=6,
        max_amp=5, max_lag=5, debug=0)
    print('\tRunning the parallel detections\n\n')
    with Timer() as t:
        detections = match_filter(
            template_names=[str(i) for i in range(len(templates))],
            template_list=templates, st=data, threshold=8, threshold_type='MAD',
            trig_int=6, plotvar=False, cores=4, output_event=False)
    print('Parallel run took %f seconds' % t.secs)
    print('\tRunning the serial detections\n\n')
    with Timer() as t:
        detections = match_filter(
            template_names=[str(i) for i in range(len(templates))],
            template_list=templates, st=data, threshold=8, threshold_type='MAD',
            trig_int=6, plotvar=False, cores=None, output_event=False)
    print('Serial run took %f seconds' % t.secs)
Example #4
0
def template_remove(tr,
                    template,
                    cc_thresh,
                    windowlength,
                    interp_len,
                    debug=0):
    """
    Looks for instances of template in the trace and removes the matches.

    :type tr: obspy.core.trace.Trace
    :param tr: Trace to remove spikes from.
    :type template: osbpy.core.trace.Trace
    :param template: Spike template to look for in data.
    :type cc_thresh: float
    :param cc_thresh: Cross-correlation threshold (-1 - 1).
    :type windowlength: float
    :param windowlength: Length of window to look for spikes in in seconds.
    :type interp_len: float
    :param interp_len: Window length to remove and fill in seconds.
    :type debug: int
    :param debug: Debug level.

    :returns: tr, works in place.
    :rtype: :class:`obspy.core.trace.Trace`
    """
    data_in = tr.copy()
    _interp_len = int(tr.stats.sampling_rate * interp_len)
    if _interp_len < len(template.data):
        warnings.warn('Interp_len is less than the length of the template,'
                      'will used the length of the template!')
        _interp_len = len(template.data)
    if isinstance(template, Trace):
        template = template.data
    with Timer() as t:
        cc = normxcorr2(image=tr.data.astype(np.float32),
                        template=template.astype(np.float32))
        if debug > 3:
            plt.plot(cc.flatten(), 'k', label='cross-correlation')
            plt.legend()
            plt.show()
        peaks = find_peaks2_short(arr=cc.flatten(),
                                  thresh=cc_thresh,
                                  trig_int=windowlength *
                                  tr.stats.sampling_rate)
        for peak in peaks:
            tr.data = _interp_gap(data=tr.data,
                                  peak_loc=peak[1] + int(0.5 * _interp_len),
                                  interp_len=_interp_len)
    print("Despiking took: %s s" % t.secs)
    if debug > 2:
        plt.plot(data_in.data, 'r', label='raw')
        plt.plot(tr.data, 'k', label='despiked')
        plt.legend()
        plt.show()
    return tr
Example #5
0
def template_remove(tr, template, cc_thresh, windowlength, interp_len):
    """
    Looks for instances of template in the trace and removes the matches.

    :type tr: obspy.core.trace.Trace
    :param tr: Trace to remove spikes from.
    :type template: osbpy.core.trace.Trace
    :param template: Spike template to look for in data.
    :type cc_thresh: float
    :param cc_thresh: Cross-correlation threshold (-1 - 1).
    :type windowlength: float
    :param windowlength: Length of window to look for spikes in in seconds.
    :type interp_len: float
    :param interp_len: Window length to remove and fill in seconds.

    :returns: tr, works in place.
    :rtype: :class:`obspy.core.trace.Trace`
    """
    _interp_len = int(tr.stats.sampling_rate * interp_len)
    if _interp_len < len(template.data):
        Logger.warning('Interp_len is less than the length of the template, '
                       'will used the length of the template!')
        _interp_len = len(template.data)
    if isinstance(template, Trace):
        template = np.array([template.data])
    with Timer() as t:
        normxcorr = get_array_xcorr("fftw")
        cc, _ = normxcorr(stream=tr.data.astype(np.float32),
                          templates=template.astype(np.float32),
                          pads=[0])
        peaks = find_peaks2_short(arr=cc.flatten(),
                                  thresh=cc_thresh,
                                  trig_int=windowlength *
                                  tr.stats.sampling_rate)
        for peak in peaks:
            tr.data = _interp_gap(data=tr.data,
                                  peak_loc=peak[1] + int(0.5 * _interp_len),
                                  interp_len=_interp_len)
    Logger.info("Despiking took: {0:.4f} s".format(t.secs))
    return tr
Example #6
0
def median_filter(tr,
                  multiplier=10,
                  windowlength=0.5,
                  interp_len=0.05,
                  debug=0):
    """
    Filter out spikes in data according to the median absolute deviation of \
    the data.

    Currently only has the ability to replaces spikes with linear
    interpolation.  In the future we would aim to fill the gap with something
    more appropriate.  Works in-place on data.

    :type tr: obspy.Trace
    :param tr: trace to despike
    :type multiplier: float
    :param multiplier: median absolute deviation multiplier to find spikes \
        above.
    :type windowlength: float
    :param windowlength: Length of window to look for spikes in in seconds.
    :type interp_len: float
    :param interp_len: Length in seconds to interpolate around spikes.

    :returns: obspy.trace
    """
    import matplotlib.pyplot as plt
    from multiprocessing import Pool, cpu_count
    from eqcorrscan.utils.timer import Timer

    num_cores = cpu_count()
    if debug >= 1:
        data_in = tr.copy()
    # Note - might be worth finding spikes in filtered data
    filt = tr.copy()
    filt.detrend('linear')
    filt.filter('bandpass',
                freqmin=10.0,
                freqmax=(tr.stats.sampling_rate / 2) - 1)
    data = filt.data
    del (filt)
    # Loop through windows
    _windowlength = int(windowlength * tr.stats.sampling_rate)
    _interp_len = int(interp_len * tr.stats.sampling_rate)
    peaks = []
    with Timer() as t:
        pool = Pool(processes=num_cores)
        results = [
            pool.apply_async(_median_window,
                             args=(data[chunk * _windowlength:(chunk + 1) *
                                        _windowlength], chunk * _windowlength,
                                   multiplier,
                                   tr.stats.starttime + windowlength,
                                   tr.stats.sampling_rate, debug))
            for chunk in range(int(len(data) / _windowlength))
        ]
        pool.close()
        for p in results:
            peaks += p.get()
        pool.join()
        for peak in peaks:
            tr.data = _interp_gap(tr.data, peak[1], _interp_len)
    print("Despiking took: %s s" % t.secs)
    if debug >= 1:
        plt.plot(data_in.data, 'r', label='raw')
        plt.plot(tr.data, 'k', label='despiked')
        plt.legend()
        plt.show()
    return tr
Example #7
0
    def test_write_correlations(self):
        """
        Test that the write_correlations function works as it should.
        Hard to test accurately...
        """
        max_shift_len = 0.2
        testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                    'test_data', 'REA', 'TEST_')
        wavbase = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                               'test_data', 'WAV', 'TEST_')
        sfile_list = glob.glob(os.path.join(testing_path, '*L.S??????'))
        event_ids = list(range(len(sfile_list)))
        event_list = list(zip(event_ids, sfile_list))
        with Timer() as t:
            write_correlations(event_list, wavbase, extract_len=2,
                               pre_pick=0.5, shift_len=max_shift_len,
                               lowcut=2.0, highcut=10.0, max_sep=1, min_link=8,
                               cc_thresh=0.0, plotvar=False)
        msg = 'Running ' + str(len(list(event_list))) + \
              ' events took %s s' % t.secs
        print(msg)
        self.assertTrue(os.path.isfile('dt.cc'))
        # Generate a complementary dt.ct file and check against that
        write_catalog(event_list=event_list, max_sep=1, min_link=8)
        cc = open('dt.cc', 'r')
        cc_pairs = []
        observations = []
        pair = cc.readline().split()[1:3]
        for line in cc:
            if line[0] == '#':
                # Append old observations to the previous pair and put in pairs
                cc_pairs.append({'pair': pair, 'observations': observations})
                pair = line.split()[1:3]
                observations = []
            else:
                obs = line.split()
                observations.append({'station': obs[0],
                                     'diff_time': float(obs[1]),
                                     'weight': float(obs[2]),
                                     'phase': obs[3]})
        cc.close()
        ct = open('dt.ct', 'r')
        ct_pairs = []
        observations = []
        pair = ct.readline().split()[1:3]
        for line in ct:
            if line[0] == '#':
                # Append old observations to the previous pair and put in pairs
                ct_pairs.append({'pair': pair,
                                 'observations': observations})
                pair = line.split()[1:3]
                observations = []
            else:
                obs = line.split()
                # for sub in line.split('-'):
                #     for item in sub.split():
                #         obs.append(item)
                observations.append({'station': obs[0],
                                     'diff_time': float(obs[1]) -
                                     float(obs[2]),
                                     'weight': float(obs[3]),
                                     'phase': obs[4]})
        ct.close()
        # Everything is in memory, now we need to find matching pairs
        for cc_pair in cc_pairs:
            for ct_pair in ct_pairs:
                if cc_pair['pair'] == ct_pair['pair']:
                    for cc_obs in cc_pair['observations']:
                        for ct_obs in ct_pair['observations']:
                            if cc_obs['station'] == ct_obs['station'] and\
                               cc_obs['phase'] == ct_obs['phase']:
                                corr_correction = abs(ct_obs['diff_time'] -
                                                      cc_obs['diff_time'])
                                self.assertTrue(corr_correction <
                                                max_shift_len)

        os.remove('dt.cc')
        os.remove('dt.ct')
        os.remove('phase.dat')
        if os.path.isfile('dt.cc2'):
            os.remove('dt.cc2')
        if os.path.isfile('dt.ct2'):
            os.remove('dt.ct2')
Example #8
0
def _channel_loop(templates, stream, cores=1, debug=0):
    r"""
    Loop to generate cross channel correaltion sums for a series of templates\
    hands off the actual correlations to a sister function which can be run in\
    parallel.

    :type templates: :class: 'obspy.Stream'
    :param templates: A list of templates, where each one should be an\
    obspy.Stream object containing multiple traces of seismic data and the\
    relevant header information.
    :param stream: A single obspy.Stream object containing daylong seismic\
    data to be correlated through using the templates.  This is in effect the\
    image.
    :type core: int
    :param core: Number of cores to loop over
    :type debug: int
    :param debug: Debug level.

    :return: New list of :class: 'numpy.array' objects.  These will contain\
    the correlation sums for each template for this day of data.
    :return: list of ints as number of channels used for each cross-correlation
    """
    import time
    from multiprocessing import Pool
    from eqcorrscan.utils.timer import Timer
    num_cores = cores
    if len(templates) < num_cores:
        num_cores = len(templates)
    if 'cccs_matrix' in locals():
        del cccs_matrix
    # Initialize cccs_matrix, which will be two arrays of len(templates) arrays
    # where the arrays cccs_matrix[0[:]] will be the cross channel sum for each
    # template.

    # Note: This requires all templates to be the same length, and all channels
    # to be the same length
    cccs_matrix = np.array([
        np.array([
            np.array([0.0] *
                     (len(stream[0].data) - len(templates[0][0].data) + 1))
        ] * len(templates))
    ] * 2)
    # Initialize number of channels array
    no_chans = np.array([0] * len(templates))

    for tr in stream:
        tr_data = tr.data
        station = tr.stats.station
        channel = tr.stats.channel
        if debug >= 1:
            print "Starting parallel run for station " + station + " channel " +\
                channel
        tic = time.clock()
        with Timer() as t:
            # Send off to sister function
            pool = Pool(processes=num_cores, maxtasksperchild=None)
            results = [
                pool.apply_async(_template_loop,
                                 args=(templates[i], tr_data, station, channel,
                                       debug, i))
                for i in range(len(templates))
            ]
            pool.close()
        if debug >= 1:
            print "--------- TIMER:    Correlation loop took: %s s" % t.secs
            print " I have " + str(len(results)) + " results"
        with Timer() as t:
            cccs_list = [p.get() for p in results]
            pool.join()
        if debug >= 1:
            print "--------- TIMER:    Getting results took: %s s" % t.secs
        with Timer() as t:
            # Sort by placeholder returned from _template_loop
            cccs_list.sort(key=lambda tup: tup[0])
        if debug >= 1:
            print "--------- TIMER:    Sorting took: %s s" % t.secs
        with Timer() as t:
            cccs_list = [ccc[1] for ccc in cccs_list]
        if debug >= 1:
            print "--------- TIMER:    Extracting arrays took: %s s" % t.secs
        if debug >= 3:
            print 'cccs_list is shaped: ' + str(np.shape(cccs_list))
        with Timer() as t:
            cccs = np.concatenate(cccs_list, axis=0)
        if debug >= 1:
            print "--------- TIMER:    cccs_list conversion: %s s" % t.secs
        del cccs_list
        if debug >= 2:
            print 'After looping through templates the cccs is shaped: ' +\
                str(np.shape(cccs))
            print 'cccs is using: ' + str(cccs.nbytes / 1000000) +\
                ' MB of memory'
        cccs_matrix[1] = np.reshape(cccs,
                                    (1, len(templates), max(np.shape(cccs))))
        del cccs
        if debug >= 2:
            print 'cccs_matrix shaped: ' + str(np.shape(cccs_matrix))
            print 'cccs_matrix is using ' + str(cccs_matrix.nbytes / 1000000) +\
                ' MB of memory'
        # Now we have an array of arrays with the first dimensional index
        # giving the channel, the second dimensional index giving the
        # template and the third dimensional index giving the position
        # in the ccc, e.g.:
        # np.shape(cccsums)=(len(stream), len(templates), len(ccc))

        if debug >= 2:
            print 'cccs_matrix as a np.array is shaped: ' +\
                str(np.shape(cccs_matrix))
        # First work out how many channels were used
        for i in range(0, len(templates)):
            if not np.all(cccs_matrix[1][i] == 0):
                # Check that there are some real numbers in the vector rather
                # than being all 0, which is the default case for no match
                # of image and template names
                no_chans[i] += 1
        # Now sum along the channel axis for each template to give the
        # cccsum values for each template for each day
        with Timer() as t:
            cccsums = cccs_matrix.sum(axis=0).astype(np.float32)
        if debug >= 1:
            print "--------- TIMER:    Summing took %s s" % t.secs
        if debug >= 2:
            print 'cccsums is shaped thus: ' + str(np.shape(cccsums))
        cccs_matrix[0] = cccsums
        del cccsums
        toc = time.clock()
        if debug >= 1:
            print "--------- TIMER:    Trace loop took " + str(toc - tic) +\
                " s"
    if debug >= 2:
        print 'cccs_matrix is shaped: ' + str(np.shape(cccs_matrix))
    cccsums = cccs_matrix[0]
    return cccsums, no_chans
Example #9
0
def _template_loop(template, chan, station, channel, debug=0, i=0):
    r"""Sister loop to handle the correlation of a single template (of multiple\
    channels) with a single channel of data.

    :type template: obspy.Stream
    :type chan: np.array
    :type station: string
    :type channel: string
    :type i: int
    :param i: Optional argument, used to keep track of which process is being\
    run.

    :returns: tuple of (i,ccc) with ccc as an ndarray

    .. rubric:: Note
    ..This function currently assumes only one template-channel per\
     data-channel, while this is normal for a standard matched-filter routine,\
     if we wanted to impliment a subspace detector, this would be the function\
     to change, I think.  E.g. where I currently take only the first matching\
     channel, we could loop through all the matching channels and then sum the\
     correlation sums - however I don't really understand how you detect based\
     on that.  More reading of the Harris document required.
    """
    from eqcorrscan.utils.timer import Timer

    ccc = np.array([np.nan] * (len(chan) - len(template[0].data) + 1),
                   dtype=np.float16)
    ccc = ccc.reshape((1, len(ccc)))  # Set default value for
    # cross-channel correlation in case there are no data that match our
    # channels.

    with Timer() as t:
        # While each bit of this loop isn't slow, looping through the if
        # statement when I don't need to adds up, I should work this out
        # earlier
        template_data = template.select(station=station, channel=channel)
        # I will for now assume that you only have one template per-channel
        template_data = template_data[0]
        delay = template_data.stats.starttime - \
            template.sort(['starttime'])[0].stats.starttime
        pad = np.array([0] *
                       int(round(delay * template_data.stats.sampling_rate)))
        image = np.append(chan, pad)[len(pad):]
        ccc = (normxcorr2(template_data.data, image))
        ccc = ccc.astype(np.float16)
        # Convert to float16 to save memory for large problems - lose some
        # accuracy which will affect detections very close to threshold
    if debug >= 2 and t.secs > 4:
        print "Single if statement took %s s" % t.secs
        if not template_data:
            print "Didn't even correlate!"
        print station + ' ' + channel
    elif debug >= 2:
        print "If statement without correlation took %s s" % t.secs
    if debug >= 3:
        print '********* DEBUG:  ' + station + '.' +\
            channel + ' ccc MAX: ' + str(np.max(ccc[0]))
        print '********* DEBUG:  ' + station + '.' +\
            channel + ' ccc MEAN: ' + str(np.mean(ccc[0]))
    if np.isinf(np.mean(ccc[0])):
        warnings.warn('Mean of ccc is infinite, check!')
        if debug >= 3:
            np.save('inf_cccmean_ccc.npy', ccc[0])
            np.save('inf_cccmean_template.npy', template_data.data)
            np.save('inf_cccmean_image.npy', image)
    if debug >= 3:
        print 'shape of ccc: ' + str(np.shape(ccc))
        print 'A single ccc is using: ' + str(ccc.nbytes / 1000000) + 'MB'
        print 'ccc type is: ' + str(type(ccc))
    if debug >= 3:
        print 'shape of ccc: ' + str(np.shape(ccc))
        print "Parallel worker " + str(i) + " complete"
    return (i, ccc)
 def test_write_correlations(self):
     """
     Test that the write_correlations function works as it should.
     Hard to test accurately...
     """
     max_shift_len = 0.2
     with Timer() as t:
         write_correlations(self.event_list,
                            self.wavbase,
                            extract_len=2,
                            pre_pick=0.5,
                            shift_len=max_shift_len,
                            lowcut=2.0,
                            highcut=10.0,
                            max_sep=self.maximum_separation,
                            min_link=self.minimum_links,
                            cc_thresh=0.0,
                            plotvar=False)
     msg = 'Running ' + str(len(list(self.event_list))) + \
           ' events took %s s' % t.secs
     print(msg)
     self.assertTrue(os.path.isfile('dt.cc'))
     cc = open('dt.cc', 'r')
     cc_pairs = []
     observations = []
     pair = cc.readline().split()[1:3]
     for line in cc:
         if line[0] == '#':
             # Append old observations to the previous pair and put in pairs
             cc_pairs.append({'pair': pair, 'observations': observations})
             pair = line.split()[1:3]
             observations = []
         else:
             obs = line.split()
             observations.append({
                 'station': obs[0],
                 'diff_time': float(obs[1]),
                 'weight': float(obs[2]),
                 'phase': obs[3]
             })
     cc.close()
     ct = open('dt.ct', 'r')
     ct_pairs = []
     observations = []
     pair = ct.readline().split()[1:3]
     for line in ct:
         if line[0] == '#':
             # Append old observations to the previous pair and put in pairs
             ct_pairs.append({'pair': pair, 'observations': observations})
             pair = line.split()[1:3]
             observations = []
         else:
             obs = line.split()
             # for sub in line.split('-'):
             #     for item in sub.split():
             #         obs.append(item)
             observations.append({
                 'station': obs[0],
                 'diff_time': float(obs[1]) - float(obs[2]),
                 'weight': float(obs[3]),
                 'phase': obs[4]
             })
     ct.close()
     # Everything is in memory, now we need to find matching pairs
     for cc_pair in cc_pairs:
         for ct_pair in ct_pairs:
             if cc_pair['pair'] == ct_pair['pair']:
                 for cc_obs in cc_pair['observations']:
                     for ct_obs in ct_pair['observations']:
                         if cc_obs['station'] == ct_obs['station'] and\
                            cc_obs['phase'] == ct_obs['phase']:
                             corr_correction = abs(ct_obs['diff_time'] -
                                                   cc_obs['diff_time'])
                             self.assertTrue(
                                 corr_correction < max_shift_len)
Example #11
0
def median_filter(tr,
                  multiplier=10,
                  windowlength=0.5,
                  interp_len=0.05,
                  debug=0):
    """
    Filter out spikes in data above a multiple of MAD of the data.

    Currently only has the ability to replaces spikes with linear
    interpolation.  In the future we would aim to fill the gap with something
    more appropriate.  Works in-place on data.

    :type tr: obspy.core.trace.Trace
    :param tr: trace to despike
    :type multiplier: float
    :param multiplier:
        median absolute deviation multiplier to find spikes above.
    :type windowlength: float
    :param windowlength: Length of window to look for spikes in in seconds.
    :type interp_len: float
    :param interp_len: Length in seconds to interpolate around spikes.
    :type debug: int
    :param debug: Debug output level between 0 and 5, higher is more output.

    :returns: :class:`obspy.core.trace.Trace`

    .. warning::
        Not particularly effective, and may remove earthquake signals, use with
        caution.
    """
    num_cores = cpu_count()
    if debug >= 1:
        data_in = tr.copy()
    # Note - might be worth finding spikes in filtered data
    filt = tr.copy()
    filt.detrend('linear')
    filt.filter('bandpass',
                freqmin=10.0,
                freqmax=(tr.stats.sampling_rate / 2) - 1)
    data = filt.data
    del (filt)
    # Loop through windows
    _windowlength = int(windowlength * tr.stats.sampling_rate)
    _interp_len = int(interp_len * tr.stats.sampling_rate)
    peaks = []
    with Timer() as t:
        pool = Pool(processes=num_cores)
        results = [
            pool.apply_async(_median_window,
                             args=(data[chunk * _windowlength:(chunk + 1) *
                                        _windowlength], chunk * _windowlength,
                                   multiplier,
                                   tr.stats.starttime + windowlength,
                                   tr.stats.sampling_rate, debug))
            for chunk in range(int(len(data) / _windowlength))
        ]
        pool.close()
        for p in results:
            peaks += p.get()
        pool.join()
        for peak in peaks:
            tr.data = _interp_gap(tr.data, peak[1], _interp_len)
    print("Despiking took: %s s" % t.secs)
    if debug >= 1:
        plt.plot(data_in.data, 'r', label='raw')
        plt.plot(tr.data, 'k', label='despiked')
        plt.legend()
        plt.show()
    return tr
Example #12
0
def bench(
    n_templates: Iterable,
    n_channels: int,
    process_length: float,
    template_length: float,
    sampling_rate: float,
    reruns: int = 3,
    outfile: str = None,
):
    """
    Benchmark the matched-filter detection process for a given configuration.

    Profiles time and memory use - note that this only profiles the EQcorrscan
    Tribe.detect method, not the full real-time process. Use this to give you
    an idea of how many templates you can run within real-time.

    Parameters
    ----------
    n_templates
        Numbers of templates to profile using
    n_channels
        Number of channels of data to profile using
    process_length
        Length of data to profile using in seconds
    template_length
        Length of templates in seconds
    sampling_rate
        Sampling-rate in Hz
    reruns
        Number of times to re-run profiling - the average of these runs will
        be reported
    """
    import matplotlib.pyplot as plt

    timings, memory = dict(), dict()
    for template_set in n_templates:
        tribe = make_synthetic_tribe(n_channels=n_channels,
                                     n_templates=template_set,
                                     process_length=process_length,
                                     template_length=template_length,
                                     sampling_rate=sampling_rate)
        st = make_synthetic_data(
            n_channels=n_channels,
            length=process_length,
            sampling_rate=sampling_rate,
            nslc=list({tuple(tr.id.split('.'))
                       for tr in tribe[0].st}))
        print(f"Running for {template_set} templates")
        time = 0.0
        mem = 0.0
        for _ in range(reruns):
            with Timer() as t:
                mem_use = memory_usage(proc=(tribe.detect, (),
                                             dict(stream=Stream(st),
                                                  threshold=8,
                                                  threshold_type="MAD",
                                                  trig_int=2,
                                                  parallel_process=False)),
                                       interval=0.05,
                                       multiprocess=True,
                                       include_children=True)
            time += t.secs
            mem += max(mem_use)
        time /= reruns
        mem /= reruns
        print(f"It took {time:.3f} s and used {mem:.3f} MB to run "
              f"{template_set} templates")
        timings.update({template_set: time})
        memory.update({template_set: mem})
    fig = plot_time_memory(timings,
                           memory,
                           process_length=process_length,
                           show=False)
    fig.suptitle(
        f"RTEQcorrscan benchmark: {n_channels} channels of {process_length} "
        f"s\n{psutil.cpu_count()} CPU cores, max clock: "
        f"{psutil.cpu_freq().max} Hz")
    plt.show()

    # Reshape for output
    times_mems = {
        key: {
            "time": timings[key],
            "memory": memory[key]
        }
        for key in timings.keys()
    }
    now = UTCDateTime.now().strftime("%Y%m%dT%H%M%S")
    outfile = outfile or f"rteqcorrscan-bench_{now}"
    with open(outfile, "w") as f:
        json.dump(fp=f, obj=times_mems)
    print(f"Written results to {outfile}")
Example #13
0
def _template_loop(template, chan, station, channel, do_subspace=False,
                   debug=0, i=0):
    r"""Sister loop to handle the correlation of a single template (of \
    multiple channels) with a single channel of data.

    :type template: obspy.Stream or list of obspy.Stream if subspace is True
    :type chan: np.array
    :type station: string
    :type channel: string
    :type do_subspace: bool
    :param do_subspace: Flag for running subspace detection. Defaults to False.
    :type i: int
    :param i: Optional argument, used to keep track of which process is being \
        run.

    :returns: tuple of (i, ccc) with ccc as an ndarray
    """
    from eqcorrscan.utils.timer import Timer
    from eqcorrscan.core import subspace
    if do_subspace:
        temp_len = len(template[0][0].data)
    else:
        temp_len = len(template[0].data)
    cstat = np.array([np.nan] * (len(chan) - temp_len + 1), dtype=np.float16)
    cstat = cstat.reshape((1, len(cstat)))           # Set default value for
    # cross-channel correlation in case there are no data that match our
    # channels.
    with Timer() as t:
        # While each bit of this loop isn't slow, looping through the if
        # statement when I don't need to adds up, I should work this out
        # earlier
        if do_subspace:
            sin_vecs = [st.select(station=station, channel=channel)[0].data
                        for st in template
                        if len(st.select(station=station,
                                         channel=channel)) != 0]
            # Convert trace data to np array
            detector = np.asarray(sin_vecs)
            cstat = subspace.det_statistic(detector, data=chan)
            cstat = cstat.reshape((1, len(cstat)))
            # Do not convert subspace statistic to float16 due to overrunning
            # 16 bit precision in the mean calculation. np.isinf(np.mean())=T
            # cstat = cstat.astype(np.float16)
        else:
            template_data = template.select(station=station,
                                            channel=channel)
            # I will for now assume that you only have one template per-channel
            template_data = template_data[0]
            delay = template_data.stats.starttime - \
                template.sort(['starttime'])[0].stats.starttime
            pad = np.array([0] * int(round(delay *
                                           template_data.stats.sampling_rate)))
            image = np.append(chan, pad)[len(pad):]
            cstat = (normxcorr2(template_data.data, image))
            cstat = cstat.astype(np.float16)
        # Convert to float16 to save memory for large problems - lose some
        # accuracy which will affect detections very close to threshold
        #
        # There is an interesting issue found in the tests that sometimes what
        # should be a perfect correlation results in a max of ccc of 0.99999994
        # Converting to float16 'corrects' this to 1.0 - bad workaround.
    if debug >= 3:
        print('********* DEBUG:  ' + station + '.' +
              channel + ' ccc MAX: ' + str(np.max(cstat[0])))
        print('********* DEBUG:  ' + station + '.' +
              channel + ' ccc MEAN: ' + str(np.mean(cstat[0])))
    if np.isinf(np.mean(cstat[0])):
        warnings.warn('Mean of ccc is infinite, check!')
        if debug >= 3:
            np.save('inf_cccmean_ccc_%02d.npy' % i, cstat[0])
            if do_subspace:
                np.save('inf_cccmean_template_%02d.npy' % i, sin_vecs)
                np.save('inf_cccmean_image_%02d.npy' % i, chan)
            else:
                np.save('inf_cccmean_template_%02d.npy' % i, template_data.data)
                np.save('inf_cccmean_image_%02d.npy' % i, image)
        ccc = np.zeros(len(ccc))
        ccc = ccc.reshape((1, len(ccc)))
        # Returns zeros
    if debug >= 3:
        print('shape of ccc: ' + str(np.shape(cstat)))
        print('A single ccc is using: ' + str(cstat.nbytes / 1000000) + 'MB')
        print('ccc type is: ' + str(type(cstat)))
    if debug >= 3:
        print('shape of ccc: ' + str(np.shape(cstat)))
        print("Parallel worker " + str(i) + " complete")
    return (i, cstat)
Example #14
0
def _channel_loop(templates, stream, cores=1, debug=0):
    """
    Internal loop for parallel processing.

    Loop to generate cross channel correlation sums for a series of templates
    hands off the actual correlations to a sister function which can be run
    in parallel.

    :type templates: list
    :param templates:
        A list of templates, where each one should be an obspy.Stream object
        containing multiple traces of seismic data and the relevant header
        information.
    :type stream: obspy.core.stream.Stream
    :param stream:
        A single Stream object to be correlated with the templates.  This is
        in effect the image in normxcorr2 and cv2.
    :type cores: int
    :param cores: Number of cores to loop over
    :type debug: int
    :param debug: Debug level.

    :returns:
        New list of :class:`numpy.ndarray` objects.  These will contain
        the correlation sums for each template for this day of data.
    :rtype: list
    :returns:
        list of ints as number of channels used for each cross-correlation.
    :rtype: list
    :returns:
        list of list of tuples of station, channel for all cross-correlations.
    :rtype: list

    .. Note::
        Each template must contain the same channels as every other template,
        the stream must also contain the same channels (note that if there
        are duplicate channels in the template you do not need duplicate
        channels in the stream).
    """
    num_cores = cores
    if len(templates) < num_cores:
        num_cores = len(templates)
    # Initialize cccs_matrix, which will be two arrays of len(templates) arrays
    # where the arrays cccs_matrix[0[:]] will be the cross channel sum for each
    # template.

    # Note: This requires all templates to be the same length, and all channels
    # to be the same length
    temp_len = len(templates[0][0].data)
    cccs_matrix = np.array([np.array([np.array([0.0] * (len(stream[0].data) -
                                     temp_len + 1))] *
                            len(templates))] * 2, dtype=np.float32)
    # Initialize number of channels array
    no_chans = np.array([0] * len(templates))
    chans = [[] for _ in range(len(templates))]

    # Match-filter enforces that each template is the same length...
    for stream_ind in range(len(templates[0])):
        station = templates[0][stream_ind].stats.station
        channel = templates[0][stream_ind].stats.channel
        tr = stream.select(station=station, channel=channel)[0]
        if debug >= 1:
            print("Starting parallel run for station " + station +
                  " channel " + channel)
        tic = time.clock()
        with Timer() as t:
            # Send off to sister function
            pool = Pool(processes=num_cores)
            results = [pool.apply_async(_template_loop,
                                        args=(templates[i], tr.data,
                                              stream_ind, debug, i))
                       for i in range(len(templates))]
            pool.close()
        if debug >= 1:
            print("--------- TIMER:    Correlation loop took: %s s" % t.secs)
            print(" I have " + str(len(results)) + " results")
        with Timer() as t:
            cccs_list = [p.get() for p in results]
            pool.join()
        if debug >= 1:
            print("--------- TIMER:    Getting results took: %s s" % t.secs)
        with Timer() as t:
            # Sort by placeholder returned from _template_loop
            cccs_list.sort(key=lambda tup: tup[0])
        if debug >= 1:
            print("--------- TIMER:    Sorting took: %s s" % t.secs)
        with Timer() as t:
            cccs_list = [ccc[1] for ccc in cccs_list]
        if debug >= 1:
            print("--------- TIMER:    Extracting arrays took: %s s" % t.secs)
        if debug >= 3:
            print('cccs_list is shaped: ' + str(np.shape(cccs_list)))
        with Timer() as t:
            cccs = np.concatenate(cccs_list, axis=0)
        if debug >= 1:
            print("--------- TIMER:    cccs_list conversion: %s s" % t.secs)
        del cccs_list
        if debug >= 2:
            print('After looping through templates the cccs is shaped: ' +
                  str(np.shape(cccs)))
            print('cccs is using: ' + str(cccs.nbytes / 1000000) +
                  ' MB of memory')
        cccs_matrix[1] = np.reshape(cccs, (1, len(templates),
                                    max(np.shape(cccs))))
        del cccs
        if debug >= 2:
            print('cccs_matrix shaped: ' + str(np.shape(cccs_matrix)))
            print('cccs_matrix is using ' + str(cccs_matrix.nbytes / 1000000) +
                  ' MB of memory')
        # Now we have an array of arrays with the first dimensional index
        # giving the channel, the second dimensional index giving the
        # template and the third dimensional index giving the position
        # in the ccc, e.g.:
        # np.shape(cccsums)=(len(stream), len(templates), len(ccc))

        if debug >= 2:
            print('cccs_matrix as a np.array is shaped: ' +
                  str(np.shape(cccs_matrix)))
        # First work out how many channels were used
        for i in range(0, len(templates)):
            if not np.all(cccs_matrix[1][i] == 0):
                # Check that there are some real numbers in the vector rather
                # than being all 0, which is the default case for no match
                # of image and template names
                no_chans[i] += 1
                chans[i].append((tr.stats.station, tr.stats.channel))
        # Now sum along the channel axis for each template to give the
        # cccsum values for each template for each day
        with Timer() as t:
            cccsums = cccs_matrix.sum(axis=0).astype(np.float32)
        if debug >= 1:
            print("--------- TIMER:    Summing took %s s" % t.secs)
        if debug >= 2:
            print('cccsums is shaped thus: ' + str(np.shape(cccsums)))
        cccs_matrix[0] = cccsums
        del cccsums
        toc = time.clock()
        if debug >= 1:
            print("--------- TIMER:    Trace loop took " + str(toc - tic) +
                  " s")
    if debug >= 2:
        print('cccs_matrix is shaped: ' + str(np.shape(cccs_matrix)))
    cccsums = cccs_matrix[0]
    return cccsums, no_chans, chans