Ejemplo n.º 1
0
 def test_same_detections_individual_and_parallel(self):
     """
     Check that the same detections are made regardless of whether templates
     are run together or separately.
     """
     individual_detections = []
     for template, template_name in zip(self.templates,
                                        self.template_names):
         individual_detections += match_filter(
             template_names=[template_name], template_list=[template],
             st=self.st.copy(), threshold=8.0, threshold_type='MAD',
             trig_int=6.0, plotvar=False, plotdir='.', cores=1)
     individual_dict = []
     for detection in individual_detections:
         individual_dict.append({'template_name': detection.template_name,
                                 'time': detection.detect_time,
                                 'cccsum': detection.detect_val})
     detections = match_filter(template_names=self.template_names,
                               template_list=self.templates, st=self.st,
                               threshold=8.0, threshold_type='MAD',
                               trig_int=6.0, plotvar=False, plotdir='.',
                               cores=1)
     self.assertEqual(len(individual_detections), len(detections))
     for detection in detections:
         detection_dict = {'template_name': detection.template_name,
                           'time': detection.detect_time,
                           'cccsum': detection.detect_val}
         self.assertTrue(detection_dict in individual_dict)
Ejemplo n.º 2
0
 def test_non_equal_template_lengths(self):
     templates = [self.templates[0].copy()]
     templates[0][0].data = np.concatenate([templates[0][0].data,
                                            np.random.randn(10)])
     with self.assertRaises(MatchFilterError):
         match_filter(template_names=[self.template_names[0]],
                      template_list=templates, st=self.st,
                      threshold=8.0, threshold_type='MAD', trig_int=6.0,
                      plotvar=False, plotdir='.', cores=1)
Ejemplo n.º 3
0
 def test_no_matching_data(self):
     """ No matching data between continuous and templates."""
     st = self.st.copy()
     for tr, staname in zip(st, ['a', 'b', 'c', 'd', 'e']):
         tr.stats.station = staname
     with self.assertRaises(IndexError):
         match_filter(template_names=self.template_names,
                      template_list=self.templates, st=st,
                      threshold=8.0, threshold_type='MAD', trig_int=6.0,
                      plotvar=False, plotdir='.', cores=1)
Ejemplo n.º 4
0
 def test_masked_template(self):
     templates = [self.templates[0].copy()]
     tr = templates[0][0].copy()
     tr.stats.starttime += 3600
     templates[0] += tr
     templates[0].merge()
     with self.assertRaises(MatchFilterError):
         match_filter(template_names=[self.template_names[0]],
                      template_list=templates, st=self.st,
                      threshold=8.0, threshold_type='MAD', trig_int=6.0,
                      plotvar=False, plotdir='.', cores=1)
Ejemplo n.º 5
0
 def test_duplicate_cont_data(self):
     """ Check that error is raised if duplicate channels are present in
     the continuous data."""
     tr = self.st[0].copy()
     tr.data = np.random.randn(100)
     st = self.st.copy() + tr
     with self.assertRaises(MatchFilterError):
         match_filter(template_names=self.template_names,
                      template_list=self.templates, st=st, threshold=8.0,
                      threshold_type='MAD', trig_int=6.0, plotvar=False,
                      plotdir='.', cores=1)
Ejemplo n.º 6
0
    def predict(self, dataset_path, template_path,
                catalog_path):
        template = load_stream(template_path)
        stream = load_stream(dataset_path)
        try:
            print self.beta
        except AttributeError:
            self.beta = 8.5
        templates = [template]
        template_name = [os.path.split(template_path)[-1].split('.')[0]]

        day_streams = self._split_stream_into_day_streams(stream)
        print '-------'
        print ' + Running template matching method on test set'
        detections = []
        for st in day_streams:
            detections.append(mf.match_filter(template_names=template_name,
                                              template_list=templates,
                                              st=st, threshold=self.beta,
                                              threshold_type='MAD',
                                              trig_int=1.0,
                                              plotvar=False))

        # Flatten list of detections
        self.detections = [d for detection in detections for d in detection]
Ejemplo n.º 7
0
    def fit(self, dataset_path, template_path,
            catalog_path):
        """ Detect events in stream for various beta and find the optimal
        beta parameter
        """
        template = load_stream(template_path)
        stream = load_stream(dataset_path)
        templates = [template]
        template_name = [os.path.split(template_path)[-1].split('.')[0]]

        day_streams = self._split_stream_into_day_streams(stream)
        detection_results = np.zeros(len(self._betas))
        for k in range(len(self._betas)):
            beta = self._betas[k]
            print '------'
            print ' + Running template matching method for beta =',beta
            detections = []
            for st in day_streams:
                detections.append(mf.match_filter(template_names=template_name,
                                                  template_list=templates,
                                                  st=st, threshold=beta,
                                                  threshold_type='MAD',
                                                  trig_int=1.0,
                                                  plotvar=False))
            # Flatten list of detections
            detections = [d for detection in detections for d in detection]
            false_pos, false_neg = self.score(detections, catalog_path)
            print 'FP: {}, FN: {}'.format(false_pos, false_neg)
            detection_results[k] = false_pos + false_neg
        self.beta = self._betas[np.argmin(detection_results)]
Ejemplo n.º 8
0
def test_synth_large():
    print('\tGenerating Synthetic data\n\n')
    templates, data, seeds = synth_seis.generate_synth_data(
        nsta=10, ntemplates=20, nseeds=5, samp_rate=100, t_length=6,
        max_amp=5, max_lag=5, debug=0)
    print('\tRunning the parallel detections\n\n')
    with Timer() as t:
        detections = match_filter(
            template_names=[str(i) for i in range(len(templates))],
            template_list=templates, st=data, threshold=8, threshold_type='MAD',
            trig_int=6, plotvar=False, cores=4, output_event=False)
    print('Parallel run took %f seconds' % t.secs)
    print('\tRunning the serial detections\n\n')
    with Timer() as t:
        detections = match_filter(
            template_names=[str(i) for i in range(len(templates))],
            template_list=templates, st=data, threshold=8, threshold_type='MAD',
            trig_int=6, plotvar=False, cores=None, output_event=False)
    print('Serial run took %f seconds' % t.secs)
Ejemplo n.º 9
0
 def test_catalog_extraction(self):
     detections, det_cat, detection_streams = \
         match_filter(template_names=self.template_names,
                      template_list=self.templates, st=self.st,
                      threshold=8.0, threshold_type='MAD',
                      trig_int=6.0, plotvar=False, plotdir='.',
                      cores=1, extract_detections=True, output_cat=True)
     self.assertEqual(len(detections), 4)
     self.assertEqual(len(detection_streams), len(detections))
     self.assertEqual(len(detection_streams), len(det_cat))
Ejemplo n.º 10
0
 def test_missing_cont_channel(self):
     """ Remove one channel from continuous data and check that everything
     still works. """
     st = self.st.copy()
     st.remove(st[-1])
     detections, det_cat = match_filter(
         template_names=self.template_names, template_list=self.templates,
         st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0,
         plotvar=False, plotdir='.', cores=1, output_cat=True)
     self.assertEqual(len(detections), 1)
     self.assertEqual(detections[0].no_chans, 5)
     self.assertEqual(len(detections), len(det_cat))
Ejemplo n.º 11
0
 def test_duplicate_channels_in_template(self):
     """
     Test using a template with duplicate channels.
     """
     templates = copy.deepcopy(self.templates)
     # Do this to test an extra condition in match_filter
     templates[0].remove(templates[0].select(station='CNGZ')[0])
     detections = match_filter(template_names=self.template_names,
                               template_list=templates, st=self.st,
                               threshold=8.0, threshold_type='MAD',
                               trig_int=6.0, plotvar=False, plotdir='.',
                               cores=1)
     self.assertEqual(len(detections), 1)
     self.assertEqual(detections[0].no_chans, 6)
Ejemplo n.º 12
0
def run():
    """Internal run function so that this can be called from interactive \
    python session for debugging."""
    from eqcorrscan.utils import pre_processing
    from eqcorrscan.utils.archive_read import read_data
    from eqcorrscan.core.match_filter import match_filter
    from obspy import UTCDateTime, Stream
    from eqcorrscan.utils.parameters import read_parameters
    import warnings
    import os
    import datetime as dt
    from obspy import read
    import copy

    # Read parameter files
    par = read_parameters('../parameters/VSP_parameters.txt')
    # Log the input parameters
    log_name = ('EQcorrscan_detection_log_' +
                dt.datetime.now().strftime('%Y.%j.%H:%M:%S') + '.log')
    f = open(os.path.join('..', 'detections', log_name), 'w')
    for parameter in par.__dict__.keys():
        f.write(parameter + ': ' + str(par.__dict__.get(parameter)) + '\n')
    f.write('\n###################################\n')
    f.write('template, detect-time, cccsum, threshold, number of channels\n')

    days = (par.enddate.date - par.startdate.date).days
    dates = [par.startdate + (i * 86400) for i in range(days)]

    # Read in templates
    templates = [
        read(os.path.join('..', 'templates', template))
        for template in par.template_names
    ]
    # We don't need the full file path in the match-filter routine, just the
    # final 'name'
    template_names_short = [
        t_name.split(os.sep)[-1] for t_name in par.template_names
    ]
    warnings.warn('Unable to check whether filters are correct in templates')
    # Check that the sampling rate is correct...
    for st in templates:
        for tr in st:
            if not tr.stats.sampling_rate == par.samp_rate:
                msg = 'Template sampling rate is not correct: ' + tr.__str__()
                raise IOError(msg)
    # Work out which stations and channels we will be using
    stachans = [(tr.stats.station, tr.stats.channel) for st in templates
                for tr in st]
    stachans = list(set(stachans))
    # Loop through days
    for date in dates:
        # Read in the data
        st = read_data(par.archive, par.arc_type, date.date, stachans)
        # Process the data
        st.merge(fill_value='interpolate')
        st = pre_processing.dayproc(st,
                                    lowcut=par.lowcut,
                                    highcut=par.highcut,
                                    filt_order=par.filt_order,
                                    samp_rate=par.samp_rate,
                                    debug=par.debug,
                                    starttime=UTCDateTime(date.date))
        # Will remove templates if they are deemed useless
        # (eg no matching channels)
        template_names_short_copy = copy.deepcopy(template_names_short)
        templates_copy = copy.deepcopy(templates)
        # Now conduct matched-filter
        detections = match_filter(template_names=template_names_short_copy,
                                  template_list=templates_copy,
                                  st=st,
                                  threshold=par.threshold,
                                  threshold_type=par.threshold_type,
                                  trig_int=par.trigger_interval,
                                  plotvar=par.plotvar,
                                  plotdir=par.plotdir,
                                  cores=par.cores,
                                  tempdir=par.tempdir,
                                  debug=par.debug,
                                  plot_format=par.plot_format)
        # Log the output
        for detection in detections:
            f.write(', '.join([
                detection.template_name,
                str(detection.detect_time),
                str(detection.detect_val),
                str(detection.threshold),
                str(detection.no_chans) + '\n'
            ]))
    f.close()
Ejemplo n.º 13
0
        print('Merging took %.3f seconds' % (merg_stp - merg_strt))
        proc_strt = timer()
        st1 = pre_processing.dayproc(st, lowcut=1.0, highcut=20.0,
                                     filt_order=3, samp_rate=50.0,
                                     starttime=dto, debug=2, parallel=True,
                                     as_float32=True)
        del st
        proc_stp = timer()
        print('Pre-processing took %.3f seconds' % (proc_stp - proc_strt))
        # RUN MATCH FILTER (looping through chunks of templates due to RAM)
        chunk_size = len(templates) // 40
        chunk_temps = [templates[i:i+chunk_size]
                       for i in range(0, len(templates), chunk_size)]
        chunk_temp_names = [template_names[i:i+chunk_size]
                            for i in range(0, len(template_names), chunk_size)]
        for temps, temp_names in itertools.izip(chunk_temps, chunk_temp_names):
            detections = match_filter.match_filter(temp_names, temps, st1,
                                                   threshold=8.0,
                                                   threshold_type='MAD',
                                                   trig_int=6.0, plotvar=False,
                                                   cores='all', debug=2)
            # Write detections to a file to check later
            for detection in detections:
                det_writer.writerow([detection.template_name,
                                     detection.detect_time, detection.detect_val,
                                     detection.threshold, detection.no_chans])
            del detections
#Print out runtime
script_end = timer()
print('Instance took %.3f seconds' % (script_end - script_start))
Ejemplo n.º 14
0
# Work out what day we are working on, required as we will pad the data to be daylong
day = st[0].stats.starttime.date

# Process the data in the same way as the template
for tr in st:
    tr = pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\
                                debug, day)

# Compute detections
detections = match_filter.match_filter(template_names,
                                       templates,
                                       st,
                                       threshold,
                                       threshtype,
                                       trig_int,
                                       plotvar=True,
                                       cores=2,
                                       tempdir=False,
                                       debug=debug,
                                       plot_format='pdf')

# We now have a list of detections! We can output these to a file to check later
f = open('tutorial_detections.csv', 'w')
for detection in detections:
    line = ', '.join([
        detection.template_name,
        str(detection.detect_time),
        str(detection.detect_val),
        str(detection.threshold),
        str(detection.no_chans)
Ejemplo n.º 15
0
    if not 'st' in locals():
        st=read('test_data/tutorial_data/'+stachan[0]+'.*..*'+stachan[1][-1]+'.*')
    else:
        st+=read('test_data/tutorial_data/'+stachan[0]+'.*..*'+stachan[1][-1]+'.*')

# Merge the data to account for miniseed files being written in chunks
# We need continuous day-long data, so data are padded if there are gaps
st=st.merge(fill_value='interpolate')

# Work out what day we are working on, required as we will pad the data to be daylong
day=st[0].stats.starttime.date

# Process the data in the same way as the template
for tr in st:
    tr=pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\
                              matchdef.debug, day)

# Compute detections
detections=match_filter.match_filter(template_names, templates, st,\
                                     matchdef.threshold, matchdef.threshtype,\
                                     matchdef.trig_int, True,\
                                     'temp_0')

# We now have a list of detections! We can output these to a file to check later
f=open('tutorial_detections.csv','w')
for detection in detections:
    f.write(detection.template_name+', '+str(detection.detect_time)+\
            ', '+str(detection.detect_val)+', '+str(detection.threshold)+\
            ', '+str(detection.no_chans)+'\n')
f.close()
Ejemplo n.º 16
0
    def test_match_filter(self, samp_rate=20.0, debug=0):
        """
        Function to test the capabilities of match_filter and just check that \
        it is working!  Uses synthetic templates and seeded, randomised data.

        :type debug: int
        :param debug: Debug level, higher the number the more output.
        """
        from eqcorrscan.utils import pre_processing
        from eqcorrscan.utils import plotting
        from eqcorrscan.core import match_filter
        from eqcorrscan.utils.synth_seis import generate_synth_data
        from obspy import UTCDateTime
        import string
        # Generate a random dataset
        templates, data, seeds = generate_synth_data(nsta=5, ntemplates=2,
                                                     nseeds=50,
                                                     samp_rate=samp_rate,
                                                     t_length=6.0, max_amp=5.0,
                                                     debug=debug)
        # Notes to the user: If you use more templates you should ensure they
        # are more different, e.g. set the data to have larger moveouts,
        # otherwise similar templates will detect events seeded by another
        # template.
        # Test the pre_processing functions
        data = pre_processing.dayproc(st=data, lowcut=2.0, highcut=8.0,
                                      filt_order=3, samp_rate=samp_rate,
                                      debug=0, starttime=UTCDateTime(0))
        if debug > 0:
            data.plot()
        # Filter the data and the templates
        for template in templates:
            pre_processing.shortproc(st=template, lowcut=2.0, highcut=8.0,
                                     filt_order=3, samp_rate=samp_rate)
            if debug > 0:
                template.plot()
        template_names = list(string.ascii_lowercase)[0:len(templates)]
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=data, threshold=10.0,
                                               threshold_type='MAD',
                                               trig_int=6.0,
                                               plotvar=False,
                                               plotdir='.',
                                               cores=1,
                                               debug=0)
        # Compare the detections to the seeds
        print('This test made ' + str(len(detections)) + ' detections')
        ktrue = 0
        kfalse = 0
        for detection in detections:
            print(detection.template_name)
            i = template_names.index(detection.template_name)
            t_seeds = seeds[i]
            dtime_samples = int((detection.detect_time - UTCDateTime(0)) *
                                samp_rate)
            if dtime_samples in t_seeds['time']:
                j = list(t_seeds['time']).index(dtime_samples)
                print('Detection at SNR of: ' + str(t_seeds['SNR'][j]))
                ktrue += 1
            else:
                min_diff = min(abs(t_seeds['time'] - dtime_samples))
                if min_diff < 10:
                    # If there is a match within ten samples then it is
                    # good enough
                    j = list(abs(t_seeds['time'] -
                                 dtime_samples)).index(min_diff)
                    print('Detection at SNR of: ' + str(t_seeds['SNR'][j]))
                    ktrue += 1
                else:
                    print('Detection at sample: ' + str(dtime_samples) +
                          ' does not match anything in seed times:')
                    kfalse += 1
                print('Minimum difference in samples is: ' + str(min_diff))
        # Plot the detections
        if debug > 3:
            for i, template in enumerate(templates):
                times = [d.detect_time.datetime for d in detections
                         if d.template_name == template_names[i]]
                print(times)
                plotting.detection_multiplot(data, template, times)
        # Set an 'acceptable' ratio of positive to false detections
        print(str(ktrue) + ' true detections and ' + str(kfalse) +
              ' false detections')
        self.assertTrue(kfalse / ktrue < 0.25)
Ejemplo n.º 17
0
def run_tutorial(plot=False):
    """Main function to run the tutorial dataset."""

    from eqcorrscan.utils import pre_processing
    from eqcorrscan.utils import plotting
    from eqcorrscan.core import match_filter
    import glob

    # This import section copes with namespace changes between obspy versions
    import obspy
    if int(obspy.__version__.split('.')[0]) >= 1:
        from obspy.clients.fdsn import Client
    else:
        from obspy.fdsn import Client
    from obspy import UTCDateTime, Stream, read

    # First we want to load our templates
    template_names = glob.glob('tutorial_template_*.ms')

    if len(template_names) == 0:
        raise IOError('Template files not found, have you run the template ' +
                      'creation tutorial?')

    templates = [read(template_name) for template_name in template_names]

    # Work out what stations we have and get the data for them
    stations = []
    for template in templates:
        for tr in template:
            stations.append((tr.stats.station, tr.stats.channel))
    # Get a unique list of stations
    stations = list(set(stations))

    # We are going to look for detections on the day of our template, however, to
    # generalize, we will write a loop through the days between our templates, in
    # this case that is only one day.

    template_days = []
    for template in templates:
        template_days.append(template[0].stats.starttime.date)
    template_days = sorted(template_days)
    kdays = (template_days[-1] - template_days[0]).days + 1

    unique_detections = []

    for i in range(kdays):
        t1 = UTCDateTime(template_days[0]) + (86400 * i)
        t2 = t1 + 86400

        # Generate the bulk information to query the GeoNet database
        bulk_info = []
        for station in stations:
            bulk_info.append(('NZ', station[0], '*',
                              station[1][0] + 'H' + station[1][-1], t1, t2))

        # Set up a client to access the GeoNet database
        client = Client("GEONET")

        # Note this will take a little while.
        print('Downloading seismic data, this may take a while')
        st = client.get_waveforms_bulk(bulk_info)
        # Merge the stream, it will be downloaded in chunks
        st.merge(fill_value='interpolate')

        # Work out what data we actually have to cope with possible lost data
        stations = list(set([tr.stats.station for tr in st]))

        # Set how many cores we want to parallel across, we will set this to four
        # as this is the number of templates, if your machine has fewer than four
        # cores/CPUs the multiprocessing will wait until there is a free core.
        # Setting this to be higher than the number of templates will have no
        # increase in speed as only detections for each template are computed in
        # parallel.  It may also slow your processing by using more memory than
        # needed, to the extent that swap may be filled.
        ncores = 4

        # Pre-process the data to set frequency band and sampling rate
        # Note that this is, and MUST BE the same as the parameters used for the
        # template creation.
        print('Processing the seismic data')
        st = pre_processing.dayproc(st, lowcut=2.0, highcut=9.0,
                                    filt_order=4, samp_rate=20.0,
                                    debug=0, starttime=t1, num_cores=ncores)
        # Convert from list to stream
        st = Stream(st)

        # Now we can conduct the matched-filter detection
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=st, threshold=8.0,
                                               threshold_type='MAD',
                                               trig_int=6.0, plotvar=plot,
                                               plotdir='.', cores=ncores,
                                               tempdir=False, debug=1,
                                               plot_format='jpg')

        # Now lets try and work out how many unique events we have just to compare
        # with the GeoNet catalog of 20 events on this day in this sequence
        for master in detections:
            keep = True
            for slave in detections:
                if not master == slave and\
                   abs(master.detect_time - slave.detect_time) <= 1.0:
                    # If the events are within 1s of each other then test which
                    # was the 'best' match, strongest detection
                    if not master.detect_val > slave.detect_val:
                        keep = False
                        break
            if keep:
                unique_detections.append(master)

    print('We made a total of ' + str(len(unique_detections)) + ' detections')

    for detection in unique_detections:
        print('Detection at :' + str(detection.detect_time) +
              ' for template ' + detection.template_name +
              ' with a cross-correlation sum of: ' +
              str(detection.detect_val))
        # We can plot these too
        if plot:
            stplot = st.copy()
            template = templates[template_names.index(detection.template_name)]
            lags = sorted([tr.stats.starttime for tr in template])
            maxlag = lags[-1] - lags[0]
            stplot.trim(starttime=detection.detect_time - 10,
                        endtime=detection.detect_time + maxlag + 10)
            plotting.detection_multiplot(stplot, template,
                                         [detection.detect_time.datetime])
    return unique_detections
Ejemplo n.º 18
0
        st += read(data_file)

# Merge the data to account for miniseed files being written in chunks
# We need continuous day-long data, so data are padded if there are gaps
st = st.merge(fill_value='interpolate')

# Work out what day we are working on, required as we will pad the data to be daylong
day = st[0].stats.starttime.date

# Process the data in the same way as the template
for tr in st:
    tr = pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\
                                debug, day)

# Compute detections
detections = match_filter.match_filter(template_names, templates, st,
                                       threshold, threshtype, trig_int,
                                       plotvar=True, cores=2, tempdir=False,
                                       debug=debug, plot_format='pdf')

# We now have a list of detections! We can output these to a file to check later
f = open('tutorial_detections.csv', 'w')
for detection in detections:
    line = ', '.join([detection.template_name, str(detection.detect_time),
                      str(detection.detect_val), str(detection.threshold),
                      str(detection.no_chans)])
    f.write(line)
    print line
    f.write(os.linesep)
f.close()
Ejemplo n.º 19
0
    def test_match_filter(self, samp_rate=20.0, debug=0):
        """
        Function to test the capabilities of match_filter and just check that \
        it is working!  Uses synthetic templates and seeded, randomised data.

        :type debug: int
        :param debug: Debug level, higher the number the more output.
        """
        from eqcorrscan.utils import pre_processing
        from eqcorrscan.utils import plotting
        from eqcorrscan.core import match_filter
        from eqcorrscan.utils.synth_seis import generate_synth_data
        from obspy import UTCDateTime
        import string
        # Generate a random dataset
        templates, data, seeds = generate_synth_data(nsta=5,
                                                     ntemplates=2,
                                                     nseeds=50,
                                                     samp_rate=samp_rate,
                                                     t_length=6.0,
                                                     max_amp=5.0,
                                                     max_lag=12.0,
                                                     debug=debug)
        # Notes to the user: If you use more templates you should ensure they
        # are more different, e.g. set the data to have larger moveouts,
        # otherwise similar templates will detect events seeded by another
        # template.
        # Test the pre_processing functions
        data = pre_processing.dayproc(st=data,
                                      lowcut=2.0,
                                      highcut=8.0,
                                      filt_order=3,
                                      samp_rate=samp_rate,
                                      debug=0,
                                      starttime=UTCDateTime(0))
        if debug > 0:
            data.plot()
        # Filter the data and the templates
        for template in templates:
            pre_processing.shortproc(st=template,
                                     lowcut=2.0,
                                     highcut=8.0,
                                     filt_order=3,
                                     samp_rate=samp_rate)
            if debug > 0:
                template.plot()
        template_names = list(string.ascii_lowercase)[0:len(templates)]
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=data,
                                               threshold=10.0,
                                               threshold_type='MAD',
                                               trig_int=6.0,
                                               plotvar=False,
                                               plotdir='.',
                                               cores=1,
                                               debug=0)
        # Compare the detections to the seeds
        print('This test made ' + str(len(detections)) + ' detections')
        ktrue = 0
        kfalse = 0
        for detection in detections:
            print(detection)
            i = template_names.index(detection.template_name)
            t_seeds = seeds[i]
            dtime_samples = int(
                (detection.detect_time - UTCDateTime(0)) * samp_rate)
            if dtime_samples in t_seeds['time']:
                j = list(t_seeds['time']).index(dtime_samples)
                print('Detection at SNR of: ' + str(t_seeds['SNR'][j]))
                ktrue += 1
            else:
                min_diff = min(abs(t_seeds['time'] - dtime_samples))
                if min_diff < 10:
                    # If there is a match within ten samples then it is
                    # good enough
                    j = list(abs(t_seeds['time'] -
                                 dtime_samples)).index(min_diff)
                    print('Detection at SNR of: ' + str(t_seeds['SNR'][j]))
                    ktrue += 1
                else:
                    print('Detection at sample: ' + str(dtime_samples) +
                          ' does not match anything in seed times:')
                    kfalse += 1
                print('Minimum difference in samples is: ' + str(min_diff))
        # Plot the detections
        if debug > 3:
            for i, template in enumerate(templates):
                times = [
                    d.detect_time.datetime for d in detections
                    if d.template_name == template_names[i]
                ]
                print(times)
                plotting.detection_multiplot(data, template, times)
        # Set an 'acceptable' ratio of positive to false detections
        print(
            str(ktrue) + ' true detections and ' + str(kfalse) +
            ' false detections')
        self.assertTrue(kfalse / ktrue < 0.25)
Ejemplo n.º 20
0
                                                highcut=10,
                                                filt_order=4,
                                                samp_rate=25,
                                                debug=0,
                                                starttime=starttime1,
                                                parallel=True,
                                                num_cores=10)
            print("MAKING DETECTIONS.", starttime1)

            #match filtering
            detections = match_filter.match_filter(
                template_names=template_names,
                template_list=templates,
                st=std_filter,
                threshold=10,
                threshold_type='MAD',
                trig_int=6.0,
                plotvar=False,
                plotdir='.',
                cores=10,
                debug=2)

            print("PLOTTING DETECTIONS AND SAVING TO FOLDER.")
            #            print(detections)

            for detection in detections:
                if detection.detect_val / detection.no_chans >= 0.6 and detection.no_chans >= 3:  #Only save and plot detections with avg CC >= 0.6 with min 3 channels
                    with open('Detections.csv', 'a') as csvfile:
                        detwriter = csv.writer(csvfile,
                                               delimiter=',',
                                               quotechar='|',
Ejemplo n.º 21
0
st.detrend("linear")
st.taper(max_percentage=0.01, max_length=10.)
if filtType == "bandpass":
    st.filter(filtType, freqmin=freq[0], freqmax=freq[1])
    st.resample(freq[1] * 2)
elif filtType == "lowpass":
    st.filter(filtType, freq=freq[0])
    st.resample(freq[0] * 2)
elif filtType == "highpass":
    st.filter(filtType, freq=freq[0])

# start timer and give output
timer = time.time()
print("Starting scan...")

# run eqcorrscan's match filter routine
detections = match_filter(template_names=template_names,
                          template_list=templates,
                          st=st,
                          threshold=8,
                          threshold_type="MAD",
                          trig_int=6,
                          cores=20)

# stop timer and give output
runtime = time.time() - timer
print(detections)
print("Scanned 1 day of data with " + str(len(templates)) + " templates in " +
      str(runtime) + " seconds and found " + str(len(detections)) +
      " detections")
Ejemplo n.º 22
0
    # Pre-process the data to set frequency band and sampling rate
    # Note that this is, and MUST BE the same as the parameters used for the
    # template creation.
    print("Processing the seismic data")
    st = pre_processing.dayproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, debug=0, starttime=t1)
    # Convert from list to stream
    st = Stream(st)

    # Now we can conduct the matched-filter detection
    detections = match_filter.match_filter(
        template_names=template_names,
        template_list=templates,
        st=st,
        threshold=8.0,
        threshold_type="MAD",
        trig_int=6.0,
        plotvar=True,
        plotdir=".",
        cores=ncores,
        tempdir=False,
        debug=1,
        plot_format="jpg",
    )

    # Now lets try and work out how many unique events we have just to compare
    # with the GeoNet catalog of 20 events on this day in this sequence
    for master in detections:
        keep = True
        for slave in detections:
            if not master == slave and abs(master.detect_time - slave.detect_time) <= 1.0:
                # If the events are within 1s of each other then test which
                # was the 'best' match, strongest detection
Ejemplo n.º 23
0
                              filt_order=18,
                              samp_rate=100,
                              starttime=st[0].stats.starttime,
                              endtime=st[0].stats.endtime)
st = Stream(st)
# st.plot()
# Read in the templates
templates = []
template_names = ['template.ms']
for template_file in template_names:
    templates.append(read(template_file))

detections = match_filter.match_filter(template_names=template_names,
                                       template_list=templates,
                                       st=st,
                                       threshold=5.5,
                                       threshold_type='MAD',
                                       trig_int=1,
                                       plotvar=True,
                                       cores=6)

for detection in detections:
    #detection.write('detections.csv', append=True)
    detection.write('detections.csv')

# plot
# multi_trace_plot(st, corr=True, stack='linstack', size=(7, 12), show=True, title=None)

times = []
for dc in detections:
    for pick in dc.event.picks:
        times.append(pick.time)
Ejemplo n.º 24
0
def self_test(template, low_cut, high_cut, filt_order, samp_rate,\
                threshold, thresh_type, trig_int ,debug=0):
    """
    :type template: :class: obspy.Stream
    :param template: Template to check for self-detectability
    :type highcut: float
    :param highcut: High cut in Hz for bandpass
    :type lowcut: float
    :type lowcut: Low cut in Hz for bandpass
    :type filt_order: int
    :param filt_order: Corners for bandpass
    :type samp_rate: float
    :param samp_rate: Desired sampling rate in Hz
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type\
    :type threshold_type: str
    :param threshold_type: The type of threshold to be used, can be MAD,\
        absolute or av_chan_corr.    MAD threshold is calculated as the\
        threshold*(median(abs(cccsum))) where cccsum is the cross-correlation\
        sum for a given template. absolute threhsold is a true absolute\
        threshold based on the cccsum value av_chan_corr is based on the mean\
        values of single-channel cross-correlations assuming all data are\
        present as required for the template, \
        e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where\
        template is a single template from the input and the length is the\
        number of channels within this template.
    :type trig_int: float
    :param trig_int: Minimum gap between detections in seconds.
    :type debug: int
    :param debug: Debug output level, higher=more output.
    """
    import sys
    sys.path.append('..')
    import datetime as dt
    import pre_processing
    from eqcorrscan.core import match_filter
    from obspy import read
    # Work out the date of the template
    date=template[0].stats.starttime.datetime.date()
    # Read in the appropriate data
    sta_chans=[(tr.stats.station,tr.stats.channel,tr.stats.network) for tr in template]
    for sta_chan in sta_chans:
        base=matchdef.contbase[[i for i in xrange(len(matchdef.contbase)) \
                                if matchdef.contbase[i][2]==sta_chan[2]][0]]
        if base[1]=='yyyymmdd':
            daydir=date.strftime('%Y%m%d')
            staform='*'+sta_chan[0]+'.'+sta_chan[1][0]+'*'+sta_chan[1][1]+'.*'
        elif base[1]=='Yyyyy/Rjjj.01':
            daydir=date.strftime('Y%Y/R%j.01')
            staform=sta_chan[0]+'.*.'+sta_chan[1][0]+'*'+sta_chan[1][1]+\
                    '.'+date.strftime('%Y.%j')
        else:
            raise IOError('Not in the correct form'+base[1])
        if not 'image' in locals():
            image=read(base[0]+'/'+daydir+'/'+staform)
        else:
            image+=read(base[0]+'/'+daydir+'/'+staform)
    # Process the data using pre-processing
    for tr in image:
        tr=pre_processing.dayproc(tr, lowcut, highcut, filt_order, samp_rate,\
                                matchdef.debug, date)
    # image.plot(size=(800,600), equal_scale=False)
    # Apply the detection routine with plot on
    detections=match_filter.match_filter(str(template[0].stats.starttime), \
                                         [template], image, threshold,\
                                         threshtype, trig_int,\
                                         True)
    for detection in detections:
        print 'Detection using template: '+detection.template_name+' at '+\
                str(detection.detect_time)+' with a cccsum of: '+\
                str(detection.detect_val)
Ejemplo n.º 25
0
def match_synth(sfile, cont_base, freqmin=2.0, freqmax=10.0, samp_rate=100.0,\
                threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=True,\
                save_template=True):
    """
    Function to generate a basic synthetic from a real event, given by an s-file
    and cross-correlate this with the day of continuous data including the event

    :type sfile: str
    :param sfile: Path to the s-file for the event
    :type cont_base: str
    :param cont_base: Path to the continuous data, should be in Yyyyy/Rjjj.01\
                directories
    :type freqmin: float
    :param freqmin: Low-cut for bandpass in Hz, defualts to 2.0
    :type freqmax: float
    :param freqmax: High-cut for bandpass in Hz, defaults to 10.0
    :type samp_rate: float
    :param samp_rate: Desired sampling rate in Hz, defaults to 100.0
    :type threshold: float
    :param threshold: Threshold for detection in cccsum, defaults to 8.0
    :type threshold_type: str
    :param threshold_type: Type to threshold, either MAD or ABS, defaults to MAD
    :type trig_int: float
    :param trig_int: Trigger interval in seconds, defaults to 6.0
    :type plotvar: bool
    :param plotvar: To plot or not, defaults to true

    :returns: detections
    """
    # import matplotlib.pyplot as plt
    from eqcorrscan.core import match_filter, template_gen
    from eqcorrscan.utils import Sfile_util, pre_processing
    import glob
    from obspy import read, Stream, UTCDateTime
    from obspy.signal.cross_correlation import xcorr
    from joblib import Parallel, delayed
    from multiprocessing import cpu_count
    import numpy as np
    # Generate the synthetic
    synth_template=synth_from_sfile(sfile, samp_rate, length=1.0,\
                                    PS_ratio=1.68)
    synth_template.filter('bandpass', freqmin=freqmin, freqmax=freqmax)
    for tr in synth_template:
        tr.data=(tr.data*1000).astype(np.int32)
    # Find the date from the sfile
    event_date=Sfile_util.readheader(sfile).time.datetime
    day=UTCDateTime(event_date.date())
    # Work out which stations we have template info for
    stachans=[(tr.stats.station, tr.stats.channel) for tr in synth_template]
    # Read in the day of data
    for stachan in stachans:
        wavfile=glob.glob(cont_base+event_date.strftime('/Y%Y/R%j.01/')+\
                            stachan[0]+'.*.'+stachan[1][0]+'?'+stachan[1][-1]+\
                            '.'+event_date.strftime('%Y.%j'))
        if len(wavfile) != 0:
            for wavf in wavfile:
                if not 'st' in locals():
                    st=read(wavf)
                else:
                    st+=read(wavf)
    st=st.merge(fill_value='interpolate')
    cores=cpu_count()
    if len(st) < cores:
        jobs=len(st)
    else:
        jobs=cores
    st=Parallel(n_jobs=jobs)(delayed(pre_processing.dayproc)(tr, freqmin,\
                                                             freqmax, 3,\
                                                             samp_rate, 0,\
                                                             day)
                            for tr in st)
    st=Stream(st)
    # Make the real template
    picks=Sfile_util.readpicks(sfile)
    real_template=template_gen._template_gen(picks, st, 1.0, 'all',\
                                            prepick=10/samp_rate)
    for tr in real_template:
        tr.data=tr.data.astype(np.int32)
    if save_template:
        real_template.write('Real_'+sfile.split('/')[-1], format='MSEED',\
                            encoding='STEIM2')
    # Shift the synthetic to better align with the real one
    for tr in real_template:
        synth_tr=synth_template.select(station=tr.stats.station,\
                                        channel=tr.stats.channel)[0]
        shift, corr = xcorr(tr.data, synth_tr.data, 20)
        print tr.stats.station+'.'+tr.stats.channel+\
            ' shift='+str(shift)+'samples corr='+str(corr)
        if corr < 0:
            synth_tr.data*=-1
        # Apply a pad
        pad=np.zeros(abs(shift))
        if shift < 0:
            synth_tr.data=np.append(synth_tr.data, pad)[abs(shift):]
        elif shift > 0:
            synth_tr.data=np.append(pad, synth_tr.data)[0:-shift]
    if save_template:
        synth_template.write('Synthetic_'+sfile.split('/')[-1],
                            format='MSEED', encoding='STEIM2')
    # Now we have processed data and a template, we can try and detect!
    detections=match_filter.match_filter(['Synthetic_'+sfile.split('/')[-1],
                                        'Real_'+sfile.split('/')[-1]],\
                                        [synth_template, real_template],\
                                        st, threshold, \
                                        threshold_type, trig_int,\
                                        plotvar, 'synth_temp')
    f=open('Synthetic_test.csv', 'w')
    f.write('template, detect-time, cccsum, threshold, number of channels\n')
    for detection in detections:
        # output detections to file
        f.write(detection.template_name+', '+str(detection.detect_time)+\
                ', '+str(detection.detect_val)+', '+str(detection.threshold)+\
                ', '+str(detection.no_chans)+'\n')
        print 'template: '+detection.template_name+' detection at: '\
            +str(detection.detect_time)+' with a cccsum of: '+\
            str(detection.detect_val)
    if detections:
        f.write('\n')
    f.close()
Ejemplo n.º 26
0
def run_tutorial(min_magnitude=2, shift_len=0.2, num_cores=4, min_cc=0.5):
    """Functional, tested example script for running the lag-calc tutorial."""
    if num_cores > cpu_count():
        num_cores = cpu_count()
    client = Client('NCEDC')
    t1 = UTCDateTime(2004, 9, 28)
    t2 = t1 + 86400
    print('Downloading catalog')
    catalog = client.get_events(starttime=t1,
                                endtime=t2,
                                minmagnitude=min_magnitude,
                                minlatitude=35.7,
                                maxlatitude=36.1,
                                minlongitude=-120.6,
                                maxlongitude=-120.2,
                                includearrivals=True)
    # We don't need all the picks, lets take the information from the
    # five most used stations - note that this is done to reduce computational
    # costs.
    catalog = catalog_utils.filter_picks(catalog,
                                         channels=['EHZ'],
                                         top_n_picks=5)
    # There is a duplicate pick in event 3 in the catalog - this has the effect
    # of reducing our detections - check it yourself.
    for pick in catalog[3].picks:
        if pick.waveform_id.station_code == 'PHOB' and \
                        pick.onset == 'emergent':
            catalog[3].picks.remove(pick)
    print('Generating templates')
    templates = template_gen.template_gen(method="from_client",
                                          catalog=catalog,
                                          client_id='NCEDC',
                                          lowcut=2.0,
                                          highcut=9.0,
                                          samp_rate=50.0,
                                          filt_order=4,
                                          length=3.0,
                                          prepick=0.15,
                                          swin='all',
                                          process_len=3600)
    # In this section we generate a series of chunks of data.
    start_time = UTCDateTime(2004, 9, 28, 17)
    end_time = UTCDateTime(2004, 9, 28, 20)
    process_len = 3600
    chunks = []
    chunk_start = start_time
    while chunk_start < end_time:
        chunk_end = chunk_start + process_len
        if chunk_end > end_time:
            chunk_end = end_time
        chunks.append((chunk_start, chunk_end))
        chunk_start += process_len

    all_detections = []
    picked_catalog = Catalog()
    template_names = [
        template[0].stats.starttime.strftime("%Y%m%d_%H%M%S")
        for template in templates
    ]
    for t1, t2 in chunks:
        print('Downloading and processing for start-time: %s' % t1)
        # Download and process the data
        bulk_info = [(tr.stats.network, tr.stats.station, '*',
                      tr.stats.channel, t1, t2) for tr in templates[0]]
        # Just downloading a chunk of data
        try:
            st = client.get_waveforms_bulk(bulk_info)
        except FDSNException:
            st = Stream()
            for _bulk in bulk_info:
                st += client.get_waveforms(*_bulk)
        st.merge(fill_value='interpolate')
        st = pre_processing.shortproc(st,
                                      lowcut=2.0,
                                      highcut=9.0,
                                      filt_order=4,
                                      samp_rate=50.0,
                                      num_cores=num_cores)
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=st,
                                               threshold=8.0,
                                               threshold_type='MAD',
                                               trig_int=6.0,
                                               plotvar=False,
                                               plotdir='.',
                                               cores=num_cores)
        # Extract unique detections from set.
        unique_detections = []
        for master in detections:
            keep = True
            for slave in detections:
                if not master == slave and\
                   abs(master.detect_time - slave.detect_time) <= 1.0:
                    # If the events are within 1s of each other then test which
                    # was the 'best' match, strongest detection
                    if not master.detect_val > slave.detect_val:
                        keep = False
                        break
            if keep:
                unique_detections.append(master)
        all_detections += unique_detections

        picked_catalog += lag_calc.lag_calc(detections=unique_detections,
                                            detect_data=st,
                                            template_names=template_names,
                                            templates=templates,
                                            shift_len=shift_len,
                                            min_cc=min_cc,
                                            interpolate=False,
                                            plot=False)
    # Return all of this so that we can use this function for testing.
    return all_detections, picked_catalog, templates, template_names
Ejemplo n.º 27
0
                                      highcut=9.0,
                                      filt_order=4,
                                      samp_rate=20.0,
                                      starttime=st[0].stats.starttime.date)

# Reading the templates

templates = []
template_names = [
    'kaik_eq-WEL.ms', 'kaik_eq-WEL2.ms', 'kaik_eq-WEL3.ms', 'kaik_eq-WEL4.ms'
]

for template_file in template_names:
    templates.append(read(template_file))

detections = match_filter.match_filter(template_names=template_names,
                                       template_list=templates,
                                       st=st_processed,
                                       threshold=8,
                                       threshold_type='MAD',
                                       trig_int=6,
                                       plotvar=False,
                                       cores=4,
                                       debug=1)

for detection in detections:
    detection.write('detection-attempt.csv', append=True)

# code only came up with a single detection, and it falls
# outside of the start and end times which is strange"
Ejemplo n.º 28
0
def self_test(template, low_cut, high_cut, filt_order, samp_rate,\
                threshold, thresh_type, trig_int ,debug=0):
    """
    :type template: :class: obspy.Stream
    :param template: Template to check for self-detectability
    :type highcut: float
    :param highcut: High cut in Hz for bandpass
    :type lowcut: float
    :type lowcut: Low cut in Hz for bandpass
    :type filt_order: int
    :param filt_order: Corners for bandpass
    :type samp_rate: float
    :param samp_rate: Desired sampling rate in Hz
    :type threshold: float
    :param threshold: A threshold value set based on the threshold_type\
    :type threshold_type: str
    :param threshold_type: The type of threshold to be used, can be MAD,\
        absolute or av_chan_corr.    MAD threshold is calculated as the\
        threshold*(median(abs(cccsum))) where cccsum is the cross-correlation\
        sum for a given template. absolute threhsold is a true absolute\
        threshold based on the cccsum value av_chan_corr is based on the mean\
        values of single-channel cross-correlations assuming all data are\
        present as required for the template, \
        e.g. av_chan_corr_thresh=threshold*(cccsum/len(template)) where\
        template is a single template from the input and the length is the\
        number of channels within this template.
    :type trig_int: float
    :param trig_int: Minimum gap between detections in seconds.
    :type debug: int
    :param debug: Debug output level, higher=more output.
    """
    import sys
    sys.path.append('..')
    import datetime as dt
    import pre_processing
    from eqcorrscan.core import match_filter
    from obspy import read
    # Work out the date of the template
    date = template[0].stats.starttime.datetime.date()
    # Read in the appropriate data
    sta_chans = [(tr.stats.station, tr.stats.channel, tr.stats.network)
                 for tr in template]
    for sta_chan in sta_chans:
        base=matchdef.contbase[[i for i in xrange(len(matchdef.contbase)) \
                                if matchdef.contbase[i][2]==sta_chan[2]][0]]
        if base[1] == 'yyyymmdd':
            daydir = date.strftime('%Y%m%d')
            staform = '*' + sta_chan[0] + '.' + sta_chan[1][
                0] + '*' + sta_chan[1][1] + '.*'
        elif base[1] == 'Yyyyy/Rjjj.01':
            daydir = date.strftime('Y%Y/R%j.01')
            staform=sta_chan[0]+'.*.'+sta_chan[1][0]+'*'+sta_chan[1][1]+\
                    '.'+date.strftime('%Y.%j')
        else:
            raise IOError('Not in the correct form' + base[1])
        if not 'image' in locals():
            image = read(base[0] + '/' + daydir + '/' + staform)
        else:
            image += read(base[0] + '/' + daydir + '/' + staform)
    # Process the data using pre-processing
    for tr in image:
        tr=pre_processing.dayproc(tr, lowcut, highcut, filt_order, samp_rate,\
                                matchdef.debug, date)
    # image.plot(size=(800,600), equal_scale=False)
    # Apply the detection routine with plot on
    detections=match_filter.match_filter(str(template[0].stats.starttime), \
                                         [template], image, threshold,\
                                         threshtype, trig_int,\
                                         True)
    for detection in detections:
        print 'Detection using template: '+detection.template_name+' at '+\
                str(detection.detect_time)+' with a cccsum of: '+\
                str(detection.detect_val)
Ejemplo n.º 29
0
                                        str(k) + '.h5')

                    # occasionally templates are one sample too short- trim to the correct number of samples
                    # this handling is a bit clumsy- redo later
                    for c in range(numChan):
                        stTemp[c].data = stTemp[c].data[:trimIdx]
                    templates.append(stTemp)
                    template_names.append("template_" + str(k))
                except:
                    pass

            # run eqcorrscan's match filter routine
            det = match_filter(template_names=template_names,
                               template_list=templates,
                               st=st,
                               threshold=threshold,
                               threshold_type="MAD",
                               trig_int=tolerance,
                               cores=20)

            # append to list
            detections.extend(det)

            # stop timer and give output
            runtime = time.time() - timer

            # give some output
            if blockSize * (j + 1) >= numTemp:
                print("Scanned " + currentDate.date.strftime("%Y-%m-%d") +
                      " with " + str(len(templates)) + " templates (" +
                      str(numTemp) + "/" + str(numTemp) + ") in " +
Ejemplo n.º 30
0
    # Merge the data to account for miniseed files being written in chunks
    # We need continuous day-long data, so data are padded if there are gaps
    st = st.merge(fill_value='interpolate')

    # Work out what day we are working on, required as we will pad the data to be daylong
    day = st[0].stats.starttime.date

    # Process the data in the same way as the template
    for tr in st:
        tr = pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\
                                    1, day)

    #Set directory for match filter output plots
    plot_dir = '/projects/nesi00228/data/plots/'
    # Compute detections
    detections = match_filter.match_filter(template_names, templates, st,
                                           8.0, 'MAD', 6.0, False, plot_dir)

    # We now have a list of detections! We can output these to a file to check later
    for detection in detections:
        f.write(detection.template_name+', '+str(detection.detect_time) +
                ', '+str(detection.detect_val)+', '+str(detection.threshold) +
                ', '+str(detection.no_chans)+'\n')
    del detections
f.close()

#Print out runtime
print 'Script took ', time.time() - start, ' seconds.'

    ##Instead of saving all of these waveforms, just save the plots as pdf
    # wav_dir='/home/chet/data/detections/'
    # det_wav = Stream()
Ejemplo n.º 31
0
            groups=0
            detections=[]
            # Cope with having heaps of templates
            if len(all_templates) > 100:
                groups=int(len(all_templates)/100)
            for i in xrange(groups):
                if i==groups:
                    templates=all_templates[i*100:]
                    template_names=all_template_names[i*100:]
                else:
                    templates=all_templates[i*100:(i+1)*100]
                    template_names=all_template_names[i*100:(i+1)*100]
                    
                detections+=match_filter.match_filter(template_names, templates, st,
                                                 matchdef.threshold, matchdef.threshtype,
                                                 matchdef.trig_int,  matchdef.plot,
                                                 matchdef=matchdef,
                                                 tempdir='temp_'+str(instance))

            for detection in detections:
                # output detections to file
                f.write(detection.template_name+', '+str(detection.detect_time)+\
                        ', '+str(detection.detect_val)+', '+str(detection.threshold)+\
                        ', '+str(detection.no_chans)+'\n')
                print 'template: '+detection.template_name+' detection at: '\
                    +str(detection.detect_time)+' with a cccsum of: '+str(detection.detect_val)
            if detections:
                f.write('\n')
        else:
            for tr in st:
                tr.write('test_data/'+tr.stats.station+'-'+tr.stats.channel+\
Ejemplo n.º 32
0
                                lowcut=2.0,
                                highcut=9.0,
                                filt_order=4,
                                samp_rate=20.0,
                                debug=0,
                                starttime=t1)
    # Convert from list to stream
    st = Stream(st)

    # Now we can conduct the matched-filter detection
    detections = match_filter.match_filter(template_names=template_names,
                                           template_list=templates,
                                           st=st,
                                           threshold=8.0,
                                           threshold_type='MAD',
                                           trig_int=6.0,
                                           plotvar=True,
                                           plotdir='.',
                                           cores=ncores,
                                           tempdir=False,
                                           debug=1,
                                           plot_format='jpg')

    # Now lets try and work out how many unique events we have just to compare
    # with the GeoNet catalog of 20 events on this day in this sequence
    for master in detections:
        keep = True
        for slave in detections:
            if not master == slave and\
               abs(master.detect_time - slave.detect_time) <= 1.0:
                # If the events are within 1s of each other then test which
                # was the 'best' match, strongest detection
Ejemplo n.º 33
0
def run_tutorial(plot=False,
                 process_len=3600,
                 num_cores=cpu_count(),
                 **kwargs):
    """Main function to run the tutorial dataset."""
    # First we want to load our templates
    template_names = glob.glob('tutorial_template_*.ms')

    if len(template_names) == 0:
        raise IOError('Template files not found, have you run the template ' +
                      'creation tutorial?')

    templates = [read(template_name) for template_name in template_names]

    # Work out what stations we have and get the data for them
    stations = []
    for template in templates:
        for tr in template:
            stations.append((tr.stats.station, tr.stats.channel))
    # Get a unique list of stations
    stations = list(set(stations))

    # We will loop through the data chunks at a time, these chunks can be any
    # size, in general we have used 1 day as our standard, but this can be
    # as short as five minutes (for MAD thresholds) or shorter for other
    # threshold metrics. However the chunk size should be the same as your
    # template process_len.

    # You should test different parameters!!!
    start_time = UTCDateTime(2016, 1, 4)
    end_time = UTCDateTime(2016, 1, 5)
    chunks = []
    chunk_start = start_time
    while chunk_start < end_time:
        chunk_end = chunk_start + process_len
        if chunk_end > end_time:
            chunk_end = end_time
        chunks.append((chunk_start, chunk_end))
        chunk_start += process_len

    unique_detections = []

    # Set up a client to access the GeoNet database
    client = Client("GEONET")

    # Note that these chunks do not rely on each other, and could be paralleled
    # on multiple nodes of a distributed cluster, see the SLURM tutorial for
    # an example of this.
    for t1, t2 in chunks:
        # Generate the bulk information to query the GeoNet database
        bulk_info = []
        for station in stations:
            bulk_info.append(('NZ', station[0], '*',
                              station[1][0] + 'H' + station[1][-1], t1, t2))

        # Note this will take a little while.
        print('Downloading seismic data, this may take a while')
        st = client.get_waveforms_bulk(bulk_info)
        # Merge the stream, it will be downloaded in chunks
        st.merge()

        # Pre-process the data to set frequency band and sampling rate
        # Note that this is, and MUST BE the same as the parameters used for
        # the template creation.
        print('Processing the seismic data')
        st = pre_processing.shortproc(st,
                                      lowcut=2.0,
                                      highcut=9.0,
                                      filt_order=4,
                                      samp_rate=20.0,
                                      num_cores=num_cores,
                                      starttime=t1,
                                      endtime=t2)
        # Convert from list to stream
        st = Stream(st)

        # Now we can conduct the matched-filter detection
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=st,
                                               threshold=8.0,
                                               threshold_type='MAD',
                                               trig_int=6.0,
                                               plotvar=plot,
                                               plotdir='.',
                                               cores=num_cores,
                                               plot_format='png',
                                               **kwargs)

        # Now lets try and work out how many unique events we have just to
        # compare with the GeoNet catalog of 20 events on this day in this
        # sequence
        for master in detections:
            keep = True
            for slave in detections:
                if not master == slave and abs(master.detect_time -
                                               slave.detect_time) <= 1.0:
                    # If the events are within 1s of each other then test which
                    # was the 'best' match, strongest detection
                    if not master.detect_val > slave.detect_val:
                        keep = False
                        print('Removed detection at %s with cccsum %s' %
                              (master.detect_time, master.detect_val))
                        print('Keeping detection at %s with cccsum %s' %
                              (slave.detect_time, slave.detect_val))
                        break
            if keep:
                unique_detections.append(master)
                print('Detection at :' + str(master.detect_time) +
                      ' for template ' + master.template_name +
                      ' with a cross-correlation sum of: ' +
                      str(master.detect_val))
                # We can plot these too
                if plot:
                    stplot = st.copy()
                    template = templates[template_names.index(
                        master.template_name)]
                    lags = sorted([tr.stats.starttime for tr in template])
                    maxlag = lags[-1] - lags[0]
                    stplot.trim(starttime=master.detect_time - 10,
                                endtime=master.detect_time + maxlag + 10)
                    plotting.detection_multiplot(stplot, template,
                                                 [master.detect_time.datetime])
    print('We made a total of ' + str(len(unique_detections)) + ' detections')
    return unique_detections
Ejemplo n.º 34
0
 print('Pre-processing took %.3f seconds' % (proc_stp - proc_strt))
 # RUN MATCH FILTER (looping through chunks of templates due to RAM)
 chunk_temps = partition(templates, 120)
 chunk_temp_names = partition(template_names, 120)
 print('Starting correlation runs for %s' % str(day))
 i = 0
 for temps, temp_names in zip(chunk_temps, chunk_temp_names):
     i += 1  # Silly counter for debug
     grp_corr_st = timer()
     print('On template group %d of %d' % (i, len(chunk_temps)))
     dets, cat, sts = match_filter.match_filter(temp_names,
                                                temps,
                                                st1,
                                                threshold=8.0,
                                                threshold_type='MAD',
                                                trig_int=1.0,
                                                plotvar=False,
                                                cores=12,
                                                output_cat=True,
                                                extract_detections=True,
                                                debug=2)
     # Append detections to a file for this instance to check later
     print('Correlations for group %d took %.3f sec, now extracting them' %
           (i, timer() - grp_corr_st))
     extrct_st = timer()
     with open(
             '/projects/nesi00228/data/detections/raw_det_txt/%s/%d_dets.txt'
             % (str(dto.year), instance),
             mode='a') as fo:
         det_writer = csv.writer(fo)
         for det, st in zip(dets, sts):
Ejemplo n.º 35
0
    # We need continuous day-long data, so data are padded if there are gaps
    st = st.merge(fill_value='interpolate')

    # Work out what day we are working on, required as we will pad the data to be daylong
    day = st[0].stats.starttime.date

    # Process the data in the same way as the template
    for tr in st:
        tr=pre_processing.dayproc(tr, 1.0, 20.0, 3, 100.0,\
                                  matchdef.debug, day)

    #Set directory for match filter output plots
    plot_dir = '/home/chet/data/plot/'
    # Compute detections
    detections=match_filter.match_filter(template_names, templates, st,\
                                         8.0, matchdef.threshtype,\
                                         matchdef.trig_int, True, plot_dir, cores=4)

    # We now have a list of detections! We can output these to a file to check later
    for detection in detections:
        f.write(detection.template_name+', '+str(detection.detect_time)+\
                ', '+str(detection.detect_val)+', '+str(detection.threshold)+\
                ', '+str(detection.no_chans)+'\n')
    del detections
f.close()
print 'Runtime: ', time.time() - start, ' seconds'
##Instead of saving all of these waveforms, just save the plots as pdf
# wav_dir='/home/chet/data/detections/'
# det_wav = Stream()
# for detection in detections:
#     st.plot(starttime=detection.detect_time-2, endtime=detection.detect_time+8, \
Ejemplo n.º 36
0
def run_tutorial(min_magnitude=2, shift_len=0.2, num_cores=4):
    import obspy
    if int(obspy.__version__.split('.')[0]) >= 1:
        from obspy.clients.fdsn import Client
    else:
        from obspy.fdsn import Client
    from obspy.core.event import Catalog
    from obspy import UTCDateTime
    from eqcorrscan.core import template_gen, match_filter, lag_calc
    from eqcorrscan.utils import pre_processing, catalog_utils

    client = Client('NCEDC')
    t1 = UTCDateTime(2004, 9, 28)
    t2 = t1 + 86400
    print('Downloading catalog')
    catalog = client.get_events(starttime=t1,
                                endtime=t2,
                                minmagnitude=min_magnitude,
                                minlatitude=35.7,
                                maxlatitude=36.1,
                                minlongitude=-120.6,
                                maxlongitude=-120.2,
                                includearrivals=True)
    # We don't need all the picks, lets take the information from the
    # five most used stations - note that this is done to reduce computational
    # costs.
    catalog = catalog_utils.filter_picks(catalog,
                                         channels=['EHZ'],
                                         top_n_picks=5)
    print('Generating templates')
    templates = template_gen.from_client(catalog=catalog,
                                         client_id='NCEDC',
                                         lowcut=2.0,
                                         highcut=9.0,
                                         samp_rate=50.0,
                                         filt_order=4,
                                         length=3.0,
                                         prepick=0.15,
                                         swin='all',
                                         process_len=3600)
    start_time = UTCDateTime(2004, 9, 28, 17)
    end_time = UTCDateTime(2004, 9, 28, 20)
    process_len = 1800
    chunks = []
    chunk_start = start_time
    while chunk_start < end_time:
        chunk_end = chunk_start + process_len
        if chunk_end > end_time:
            chunk_end = end_time
        chunks.append((chunk_start, chunk_end))
        chunk_start += process_len

    all_detections = []
    picked_catalog = Catalog()
    template_names = [
        str(template[0].stats.starttime) for template in templates
    ]
    for t1, t2 in chunks:
        print('Downloading and processing for start-time: %s' % t1)
        # Download and process the data
        bulk_info = [(tr.stats.network, tr.stats.station, '*',
                      tr.stats.channel[0] + 'H' + tr.stats.channel[1], t1, t2)
                     for tr in templates[0]]
        # Just downloading a chunk of data
        st = client.get_waveforms_bulk(bulk_info)
        st.merge(fill_value='interpolate')
        st = pre_processing.shortproc(st,
                                      lowcut=2.0,
                                      highcut=9.0,
                                      filt_order=4,
                                      samp_rate=50.0,
                                      debug=0,
                                      num_cores=num_cores)
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=st,
                                               threshold=8.0,
                                               threshold_type='MAD',
                                               trig_int=6.0,
                                               plotvar=False,
                                               plotdir='.',
                                               cores=num_cores)
        # Extract unique detections from set.
        unique_detections = []
        for master in detections:
            keep = True
            for slave in detections:
                if not master == slave and\
                   abs(master.detect_time - slave.detect_time) <= 1.0:
                    # If the events are within 1s of each other then test which
                    # was the 'best' match, strongest detection
                    if not master.detect_val > slave.detect_val:
                        keep = False
                        break
            if keep:
                unique_detections.append(master)
        all_detections += unique_detections

        picked_catalog += lag_calc.lag_calc(detections=unique_detections,
                                            detect_data=st,
                                            template_names=template_names,
                                            templates=templates,
                                            shift_len=shift_len,
                                            min_cc=0.5,
                                            interpolate=True,
                                            plot=False)
    # Return all of this so that we can use this function for testing.
    return all_detections, picked_catalog, templates, template_names
Ejemplo n.º 37
0
def run_tutorial(plot=False):
    """Main function to run the tutorial dataset."""

    from eqcorrscan.utils import pre_processing
    from eqcorrscan.utils import plotting
    from eqcorrscan.core import match_filter
    import glob
    from multiprocessing import cpu_count

    # This import section copes with namespace changes between obspy versions
    import obspy
    if int(obspy.__version__.split('.')[0]) >= 1:
        from obspy.clients.fdsn import Client
    else:
        from obspy.fdsn import Client
    from obspy import UTCDateTime, Stream, read

    # First we want to load our templates
    template_names = glob.glob('tutorial_template_*.ms')

    if len(template_names) == 0:
        raise IOError('Template files not found, have you run the template ' +
                      'creation tutorial?')

    templates = [read(template_name) for template_name in template_names]

    # Work out what stations we have and get the data for them
    stations = []
    for template in templates:
        for tr in template:
            stations.append((tr.stats.station, tr.stats.channel))
    # Get a unique list of stations
    stations = list(set(stations))

    # We will loop through the data chunks at a time, these chunks can be any
    # size, in general we have used 1 day as our standard, but this can be
    # as short as five minutes (for MAD thresholds) or shorter for other
    # threshold metrics. However the chunk size should be the same as your
    # template process_len.

    # You should test different parameters!!!
    start_time = UTCDateTime(2016, 1, 4)
    end_time = UTCDateTime(2016, 1, 5)
    process_len = 3600
    chunks = []
    chunk_start = start_time
    while chunk_start < end_time:
        chunk_end = chunk_start + process_len
        if chunk_end > end_time:
            chunk_end = end_time
        chunks.append((chunk_start, chunk_end))
        chunk_start += process_len

    unique_detections = []
    detections = []

    # Set up a client to access the GeoNet database
    client = Client("GEONET")

    # Note that these chunks do not rely on each other, and could be paralleled
    # on multiple nodes of a distributed cluster, see the SLURM tutorial for
    # an example of this.
    for t1, t2 in chunks:
        # Generate the bulk information to query the GeoNet database
        bulk_info = []
        for station in stations:
            bulk_info.append(('NZ', station[0], '*',
                              station[1][0] + 'H' + station[1][-1], t1, t2))

        # Note this will take a little while.
        print('Downloading seismic data, this may take a while')
        st = client.get_waveforms_bulk(bulk_info)
        # Merge the stream, it will be downloaded in chunks
        st.merge(fill_value='interpolate')

        # Set how many cores we want to parallel across, we will set this to four
        # as this is the number of templates, if your machine has fewer than four
        # cores/CPUs the multiprocessing will wait until there is a free core.
        # Setting this to be higher than the number of templates will have no
        # increase in speed as only detections for each template are computed in
        # parallel.  It may also slow your processing by using more memory than
        # needed, to the extent that swap may be filled.
        if cpu_count() < 4:
            ncores = cpu_count()
        else:
            ncores = 4

        # Pre-process the data to set frequency band and sampling rate
        # Note that this is, and MUST BE the same as the parameters used for the
        # template creation.
        print('Processing the seismic data')
        st = pre_processing.shortproc(st,
                                      lowcut=2.0,
                                      highcut=9.0,
                                      filt_order=4,
                                      samp_rate=20.0,
                                      debug=2,
                                      num_cores=ncores,
                                      starttime=t1,
                                      endtime=t2)
        # Convert from list to stream
        st = Stream(st)

        # Now we can conduct the matched-filter detection
        detections += match_filter.match_filter(template_names=template_names,
                                                template_list=templates,
                                                st=st,
                                                threshold=8.0,
                                                threshold_type='MAD',
                                                trig_int=6.0,
                                                plotvar=plot,
                                                plotdir='.',
                                                cores=ncores,
                                                tempdir=False,
                                                debug=1,
                                                plot_format='jpg')

    # Now lets try and work out how many unique events we have just to compare
    # with the GeoNet catalog of 20 events on this day in this sequence
    for master in detections:
        keep = True
        for slave in detections:
            if not master == slave and\
               abs(master.detect_time - slave.detect_time) <= 1.0:
                # If the events are within 1s of each other then test which
                # was the 'best' match, strongest detection
                if not master.detect_val > slave.detect_val:
                    keep = False
                    break
        if keep:
            unique_detections.append(master)

    print('We made a total of ' + str(len(unique_detections)) + ' detections')

    for detection in unique_detections:
        print('Detection at :' + str(detection.detect_time) +
              ' for template ' + detection.template_name +
              ' with a cross-correlation sum of: ' + str(detection.detect_val))
        # We can plot these too
        if plot:
            stplot = st.copy()
            template = templates[template_names.index(detection.template_name)]
            lags = sorted([tr.stats.starttime for tr in template])
            maxlag = lags[-1] - lags[0]
            stplot.trim(starttime=detection.detect_time - 10,
                        endtime=detection.detect_time + maxlag + 10)
            plotting.detection_multiplot(stplot, template,
                                         [detection.detect_time.datetime])
    return unique_detections
Ejemplo n.º 38
0
    # Convert from list to stream
    st = Stream(st)

    # Set how many cores we want to parallel across, we will set this to four
    # as this is the number of templates, if your machine has fewer than four
    # cores/CPUs the multiprocessing will wait until there is a free core.
    # Setting this to be higher than the number of templates will have no i
    # increase in speed as only detections for each template are computed in
    # parallel.
    ncores = 4

    # Now we can conduct the matched-filter detection
    detections = match_filter.match_filter(template_names=template_names,
                                           template_list=templates,
                                           st=st, threshold=8.0,
                                           threshold_type='MAD',
                                           trig_int=6.0, plotvar=True,
                                           plotdir='.', cores=ncores,
                                           tempdir=False, debug=0,
                                           plot_format='jpg')

    # Now lets try and work out how many unique events we have just to compare
    # with the GeoNet catalog of 20 events on this day in this sequence
    for master in detections:
        keep = True
        for slave in detections:
            if not master == slave and\
               abs(master.detect_time - slave.detect_time) <= 6.0:
                # If the events are within 6s of each other then test which
                # was the 'best' match, strongest detection
                if not master.detect_val > slave.detect_val:
                    keep = False