def test_filter_error(self):
     """Check that we don't allow filtering above the nyquist."""
     with self.assertRaises(IOError):
         shortproc(self.short_stream.copy(),
                   lowcut=0.1,
                   highcut=0.6,
                   filt_order=4,
                   samp_rate=1,
                   parallel=False,
                   num_cores=False,
                   starttime=None,
                   endtime=None)
Example #2
0
 def test_parallel_core_unset(self):
     """Test the parallel implementation without num_cores set."""
     processed = shortproc(
         self.short_stream.copy(), lowcut=0.1, highcut=0.4, filt_order=4,
         samp_rate=1, debug=0, parallel=True, num_cores=False,
         starttime=None, endtime=None)
     self.assertEqual(len(processed), self.nchans)
     for tr in processed:
         self.assertEqual(self.instart, tr.stats.starttime)
         self.assertEqual(self.inend, tr.stats.endtime)
Example #3
0
 def test_shortproc(self):
     """Test the short-proc processing method."""
     processed = shortproc(
         self.short_stream.copy(), lowcut=0.1, highcut=0.4, filt_order=4,
         samp_rate=1, debug=0, parallel=False, num_cores=False,
         starttime=None, endtime=None)
     self.assertEqual(len(processed), self.nchans)
     for tr in processed:
         self.assertEqual(self.instart, tr.stats.starttime)
         self.assertEqual(self.inend, tr.stats.endtime)
Example #4
0
 def test_shortproc_set_end(self):
     """Check that shortproc trims properly."""
     processed = shortproc(
         self.short_stream.copy(), lowcut=0.1, highcut=0.4, filt_order=4,
         samp_rate=1, debug=0, parallel=False, num_cores=False,
         starttime=None, endtime=self.short_stream[0].stats.endtime - 2)
     self.assertEqual(len(processed), self.nchans)
     for tr in processed:
         self.assertEqual(self.instart, tr.stats.starttime)
         self.assertEqual(self.inend - 2, tr.stats.endtime)
Example #5
0
 def test_trace_as_argument(self):
     """
     Check that we can cope with a trace, and that a trace is returned.
     """
     processed = shortproc(
         self.short_stream.copy()[0], lowcut=0.1, highcut=0.4, filt_order=4,
         samp_rate=1, debug=0, parallel=False, num_cores=False,
         starttime=None, endtime=None)
     self.assertTrue(isinstance(processed, Trace))
     self.assertEqual(self.instart, processed.stats.starttime)
     self.assertEqual(self.inend, processed.stats.endtime)
Example #6
0
def read_gappy_real_data():
    """ These data SUCK - gap followed by spike, and long period trend.
    Super fugly"""
    from obspy.clients.fdsn import Client
    from obspy import UTCDateTime
    from eqcorrscan.utils.pre_processing import shortproc

    client = Client("GEONET")
    st = client.get_waveforms(
        network="NZ", station="DUWZ", location="20", channel="BNZ",
        starttime=UTCDateTime(2016, 12, 31, 23, 58, 56),
        endtime=UTCDateTime(2017, 1, 1, 0, 58, 56))
    st = shortproc(
        st=st.merge(), lowcut=2, highcut=20, filt_order=4, samp_rate=50)
    return st
Example #7
0
 def setUpClass(cls):
     print('\t\t\t Downloading data')
     client = Client('NCEDC')
     t1 = UTCDateTime(2004, 9, 28, 17)
     t2 = t1 + 3600
     process_len = 3600
     # t1 = UTCDateTime(2004, 9, 28)
     # t2 = t1 + 80000
     # process_len = 80000
     catalog = client.get_events(starttime=t1, endtime=t2,
                                 minmagnitude=4,
                                 minlatitude=35.7, maxlatitude=36.1,
                                 minlongitude=-120.6,
                                 maxlongitude=-120.2,
                                 includearrivals=True)
     catalog = catalog_utils.filter_picks(catalog, channels=['EHZ'],
                                          top_n_picks=5)
     cls.templates = template_gen.from_client(catalog=catalog,
                                              client_id='NCEDC',
                                              lowcut=2.0, highcut=9.0,
                                              samp_rate=50.0, filt_order=4,
                                              length=3.0, prepick=0.15,
                                              swin='all',
                                              process_len=process_len)
     for template in cls.templates:
         template.sort()
     # Download and process the day-long data
     template_stachans = []
     for template in cls.templates:
         for tr in template:
             template_stachans.append((tr.stats.network,
                                       tr.stats.station,
                                       tr.stats.channel))
     template_stachans = list(set(template_stachans))
     bulk_info = [(stachan[0], stachan[1], '*',
                   stachan[2][0] + 'H' + stachan[2][1],
                   t1, t1 + process_len)
                  for stachan in template_stachans]
     # Just downloading an hour of data
     st = client.get_waveforms_bulk(bulk_info)
     st.merge(fill_value='interpolate')
     cls.st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0,
                                       filt_order=4, samp_rate=50.0,
                                       debug=0, num_cores=1)
     cls.template_names = [str(template[0].stats.starttime)
                           for template in cls.templates]
Example #8
0
def get_real_multichannel_data():
    from obspy.clients.fdsn import Client
    from obspy import UTCDateTime
    from eqcorrscan.utils.pre_processing import shortproc

    t1 = UTCDateTime("2016-01-04T12:00:00.000000Z")
    t2 = t1 + 600
    bulk = [('NZ', 'POWZ', '*', 'EHZ', t1, t2),
            ('NZ', 'HOWZ', '*', 'EHZ', t1, t2)]
    client = Client("GEONET")
    st = client.get_waveforms_bulk(bulk)
    st = shortproc(st.merge(),
                   lowcut=2.0,
                   highcut=9.0,
                   filt_order=4,
                   samp_rate=20.0,
                   starttime=t1,
                   endtime=t2)
    return st
Example #9
0
    def test_short_match_filter(self):
        """Test using short streams of data."""
        from obspy.clients.fdsn import Client
        from obspy import UTCDateTime
        from eqcorrscan.core import template_gen, match_filter
        from eqcorrscan.utils import pre_processing, catalog_utils

        client = Client('NCEDC')
        t1 = UTCDateTime(2004, 9, 28)
        t2 = t1 + 86400
        catalog = client.get_events(starttime=t1, endtime=t2,
                                    minmagnitude=4,
                                    minlatitude=35.7, maxlatitude=36.1,
                                    minlongitude=-120.6,
                                    maxlongitude=-120.2,
                                    includearrivals=True)
        catalog = catalog_utils.filter_picks(catalog, channels=['EHZ'],
                                             top_n_picks=5)
        templates = template_gen.from_client(catalog=catalog,
                                             client_id='NCEDC',
                                             lowcut=2.0, highcut=9.0,
                                             samp_rate=50.0, filt_order=4,
                                             length=3.0, prepick=0.15,
                                             swin='all', process_len=3600)
        # Download and process the day-long data
        bulk_info = [(tr.stats.network, tr.stats.station, '*',
                      tr.stats.channel[0] + 'H' + tr.stats.channel[1],
                      t2 - 3600, t2) for tr in templates[0]]
        # Just downloading an hour of data
        st = client.get_waveforms_bulk(bulk_info)
        st.merge(fill_value='interpolate')
        st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0,
                                      filt_order=4, samp_rate=50.0,
                                      debug=0, num_cores=4)
        template_names = [str(template[0].stats.starttime)
                          for template in templates]
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=st, threshold=8.0,
                                               threshold_type='MAD',
                                               trig_int=6.0, plotvar=False,
                                               plotdir='.', cores=4)
Example #10
0
 def setUpClass(cls):
     client = Client('GEONET')
     t1 = UTCDateTime(2016, 9, 4)
     t2 = t1 + 86400
     catalog = get_geonet_events(startdate=t1, enddate=t2, minmag=4,
                                 minlat=-49, maxlat=-35, minlon=175.0,
                                 maxlon=185.0)
     catalog = catalog_utils.filter_picks(catalog, channels=['EHZ'],
                                          top_n_picks=5)
     for event in catalog:
         extra_pick = Pick()
         extra_pick.phase_hint = 'S'
         extra_pick.time = event.picks[0].time + 10
         extra_pick.waveform_id = event.picks[0].waveform_id
         event.picks.append(extra_pick)
     cls.templates = template_gen.from_client(catalog=catalog,
                                              client_id='GEONET',
                                              lowcut=2.0, highcut=9.0,
                                              samp_rate=50.0, filt_order=4,
                                              length=3.0, prepick=0.15,
                                              swin='all', process_len=3600)
     # Download and process the day-long data
     bulk_info = [(tr.stats.network, tr.stats.station, '*',
                   tr.stats.channel[0] + 'H' + tr.stats.channel[1],
                   t1 + (4 * 3600), t1 + (5 * 3600))
                  for tr in cls.templates[0]]
     # Just downloading an hour of data
     print('Downloading data')
     st = client.get_waveforms_bulk(bulk_info)
     st.merge(fill_value='interpolate')
     st.trim(t1 + (4 * 3600), t1 + (5 * 3600)).sort()
     # This is slow?
     print('Processing continuous data')
     cls.st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0,
                                       filt_order=4, samp_rate=50.0,
                                       debug=0, num_cores=1)
     cls.st.trim(t1 + (4 * 3600), t1 + (5 * 3600)).sort()
     cls.template_names = [str(template[0].stats.starttime)
                           for template in cls.templates]
Example #11
0
def make_dist_mat(directory, highcut, lowcut, samp_rate,
                  filt_order, raw_prepick, corr_prepick,
                  length, shift, outfile, cores):
    """
    Taking a directory of templates, processing wavs and computing correlation
    clustering prior to creating subspace
    :param directory: Directory of template mseeds
    :param highcut: filter highcut
    :param lowcut: filter lowcut
    :param samp_rate: output sample rate
    :param filt_order: filter corners
    :param raw_prepick: Prepick time of template files
    :param corr_prepick: Output prepick before correlations
    :param length: Length of temp to be correlating
    :param shift: Shift length in secs allowed during correlations
    :param outfile: Filename for output distance matrix
    :param method: Method for heirarchical clustering
    :return:
    """

    temp_files = glob('%s/*' % directory)
    temp_list = [(shortproc(read(tmp),lowcut=lowcut, highcut=highcut,
                            samp_rate=samp_rate, filt_order=filt_order,
                            parallel=True, num_cores=cores),
                  tmp.split('/')[-1].split('.')[0])
                 for tmp in temp_files]
    front_clip = raw_prepick - corr_prepick
    back_clip = front_clip + length
    for temp in temp_list:
        for tr in temp[0]:
            tr.trim(starttime=tr.stats.starttime + front_clip,
                    endtime=tr.stats.starttime + back_clip)
    temp_sts = [x[0] for x in temp_list]
    print('Starting distance matrix computations')
    dist_mat = distance_matrix(temp_sts, allow_shift=True, shift_len=shift,
                               cores=cores)
    print('Saving matrix to %s' % outfile)
    np.save(outfile, dist_mat)
    return
Example #12
0
    def test_match_filter(self, samp_rate=20.0, debug=0):
        """
        Function to test the capabilities of match_filter and just check that \
        it is working!  Uses synthetic templates and seeded, randomised data.

        :type debug: int
        :param debug: Debug level, higher the number the more output.
        """
        from eqcorrscan.utils import pre_processing
        from eqcorrscan.utils import plotting
        from eqcorrscan.core import match_filter
        from eqcorrscan.utils.synth_seis import generate_synth_data
        from obspy import UTCDateTime
        import string
        # Generate a random dataset
        templates, data, seeds = generate_synth_data(nsta=5, ntemplates=2,
                                                     nseeds=50,
                                                     samp_rate=samp_rate,
                                                     t_length=6.0, max_amp=5.0,
                                                     debug=debug)
        # Notes to the user: If you use more templates you should ensure they
        # are more different, e.g. set the data to have larger moveouts,
        # otherwise similar templates will detect events seeded by another
        # template.
        # Test the pre_processing functions
        data = pre_processing.dayproc(st=data, lowcut=2.0, highcut=8.0,
                                      filt_order=3, samp_rate=samp_rate,
                                      debug=0, starttime=UTCDateTime(0))
        if debug > 0:
            data.plot()
        # Filter the data and the templates
        for template in templates:
            pre_processing.shortproc(st=template, lowcut=2.0, highcut=8.0,
                                     filt_order=3, samp_rate=samp_rate)
            if debug > 0:
                template.plot()
        template_names = list(string.ascii_lowercase)[0:len(templates)]
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=data, threshold=10.0,
                                               threshold_type='MAD',
                                               trig_int=6.0,
                                               plotvar=False,
                                               plotdir='.',
                                               cores=1,
                                               debug=0)
        # Compare the detections to the seeds
        print('This test made ' + str(len(detections)) + ' detections')
        ktrue = 0
        kfalse = 0
        for detection in detections:
            print(detection.template_name)
            i = template_names.index(detection.template_name)
            t_seeds = seeds[i]
            dtime_samples = int((detection.detect_time - UTCDateTime(0)) *
                                samp_rate)
            if dtime_samples in t_seeds['time']:
                j = list(t_seeds['time']).index(dtime_samples)
                print('Detection at SNR of: ' + str(t_seeds['SNR'][j]))
                ktrue += 1
            else:
                min_diff = min(abs(t_seeds['time'] - dtime_samples))
                if min_diff < 10:
                    # If there is a match within ten samples then it is
                    # good enough
                    j = list(abs(t_seeds['time'] -
                                 dtime_samples)).index(min_diff)
                    print('Detection at SNR of: ' + str(t_seeds['SNR'][j]))
                    ktrue += 1
                else:
                    print('Detection at sample: ' + str(dtime_samples) +
                          ' does not match anything in seed times:')
                    kfalse += 1
                print('Minimum difference in samples is: ' + str(min_diff))
        # Plot the detections
        if debug > 3:
            for i, template in enumerate(templates):
                times = [d.detect_time.datetime for d in detections
                         if d.template_name == template_names[i]]
                print(times)
                plotting.detection_multiplot(data, template, times)
        # Set an 'acceptable' ratio of positive to false detections
        print(str(ktrue) + ' true detections and ' + str(kfalse) +
              ' false detections')
        self.assertTrue(kfalse / ktrue < 0.25)
Example #13
0
        temp_name = file1.split("/")[-1:][0][:-3]
        template_names.append(temp_name)
        temp_tup.append((temp_name, temp_data))
    else:
        temp_data = read(file1)
        templates += [temp_data]
        template_names.append(file1.split("/")[-1:][0][:-3])
        temp_name = file1.split("/")[-1:][0][:-3]
        template_names.append(temp_name)
        temp_tup.append((temp_name, temp_data))


# Extract the station info from the templates
for template in templates:
    #Filter and downsample sample data
    template = pre_processing.shortproc(template, 1.0, 20.0, 3, 100.0,
                                        matchdef.debug)
    if not 'stachans' in locals():
        stachans = [(tr.stats.station, tr.stats.channel) for tr in template]
    else:
        stachans += [(tr.stats.station, tr.stats.channel) for tr in template]

# Make this a unique list
stachans = list(set(stachans))

# Read in the continuous data for these station, channel combinations
raw_files = []
raw_dir = '/Volumes/GeoPhysics_07/users-data/matsonga/MRP_PROJ/data/mastersData/sac'
#Recursively search a directory for specific files amtching desired day and stachan
for root, dirnames, filenames in os.walk(raw_dir):
    for stachan in stachans:
        for filename in fnmatch.filter(filenames, 'NZ.'+stachan[0]+'*' +
Example #14
0
def extract_from_stack(stack,
                       template,
                       length,
                       pre_pick,
                       pre_pad,
                       Z_include=False,
                       pre_processed=True,
                       samp_rate=None,
                       lowcut=None,
                       highcut=None,
                       filt_order=3):
    """
    Extract a multiplexed template from a stack of detections.

    Function to extract a new template from a stack of previous detections.
    Requires the stack, the template used to make the detections for the \
    stack, and we need to know if the stack has been pre-processed.

    :type stack: obspy.core.stream.Stream
    :param stack: Waveform stack from detections.  Can be of any length and \
        can have delays already included, or not.
    :type template: obspy.core.stream.Stream
    :param template: Template used to make the detections in the stack. Will \
        use the delays of this for the new template.
    :type length: float
    :param length: Length of new template in seconds
    :type pre_pick: float
    :param pre_pick: Extract additional data before the detection, seconds
    :type pre_pad: float
    :param pre_pad: Pad used in seconds when extracting the data, e.g. the \
        time before the detection extracted.  If using \
        clustering.extract_detections this half the length of the extracted \
        waveform.
    :type Z_include: bool
    :param Z_include: If True will include any Z-channels even if there is \
        no template for this channel, as long as there is a template for this \
        station at a different channel.  If this is False and Z channels are \
        included in the template Z channels will be included in the \
        new_template anyway.
    :type pre_processed: bool
    :param pre_processed: Have the data been pre-processed, if True (default) \
        then we will only cut the data here.
    :type samp_rate: float
    :param samp_rate: If pre_processed=False then this is required, desired \
        sampling rate in Hz, defaults to False.
    :type lowcut: float
    :param lowcut: If pre_processed=False then this is required, lowcut in \
        Hz, defaults to False.
    :type highcut: float
    :param highcut: If pre_processed=False then this is required, highcut in \
        Hz, defaults to False
    :type filt_order: int
    :param filt_order:
        If pre_processed=False then this is required, filter order, defaults
        to False

    :returns: Newly cut template.
    :rtype: :class:`obspy.core.stream.Stream`
    """
    new_template = stack.copy()
    # Copy the data before we trim it to keep the stack safe
    # Get the earliest time in the template as this is when the detection is
    # taken.
    mintime = min([tr.stats.starttime for tr in template])
    # Generate a list of tuples of (station, channel, delay) with delay in
    # seconds
    delays = [(tr.stats.station, tr.stats.channel[-1],
               tr.stats.starttime - mintime) for tr in template]

    #  Process the data if necessary
    if not pre_processed:
        new_template = pre_processing.shortproc(st=new_template,
                                                lowcut=lowcut,
                                                highcut=highcut,
                                                filt_order=filt_order,
                                                samp_rate=samp_rate)
    # Loop through the stack and trim!
    out = Stream()
    for tr in new_template:
        # Find the matching delay
        delay = [
            d[2] for d in delays
            if d[0] == tr.stats.station and d[1] == tr.stats.channel[-1]
        ]
        if Z_include and len(delay) == 0:
            delay = [d[2] for d in delays if d[0] == tr.stats.station]
        if len(delay) == 0:
            Logger.error("No matching template channel found for stack channel"
                         " {0}.{1}".format(tr.stats.station, tr.stats.channel))
        else:
            for d in delay:
                out += tr.copy().trim(
                    starttime=tr.stats.starttime + d + pre_pad - pre_pick,
                    endtime=tr.stats.starttime + d + pre_pad + length -
                    pre_pick)
    return out
Example #15
0
def template_gen(method,
                 lowcut,
                 highcut,
                 samp_rate,
                 filt_order,
                 length,
                 prepick,
                 swin="all",
                 process_len=86400,
                 all_horiz=False,
                 delayed=True,
                 plot=False,
                 plotdir=None,
                 return_event=False,
                 min_snr=None,
                 parallel=False,
                 num_cores=False,
                 save_progress=False,
                 skip_short_chans=False,
                 **kwargs):
    """
    Generate processed and cut waveforms for use as templates.

    :type method: str
    :param method:
        Template generation method, must be one of ('from_client',
        'from_seishub', 'from_sac', 'from_meta_file'). - Each method requires
        associated arguments, see note below.
    :type lowcut: float
    :param lowcut: Low cut (Hz), if set to None will not apply a lowcut.
    :type highcut: float
    :param highcut: High cut (Hz), if set to None will not apply a highcut.
    :type samp_rate: float
    :param samp_rate: New sampling rate in Hz.
    :type filt_order: int
    :param filt_order: Filter level (number of corners).
    :type length: float
    :param length: Length of template waveform in seconds.
    :type prepick: float
    :param prepick: Pre-pick time in seconds
    :type swin: str
    :param swin:
        P, S, P_all, S_all or all, defaults to all: see note in
        :func:`eqcorrscan.core.template_gen.template_gen`
    :type process_len: int
    :param process_len: Length of data in seconds to download and process.
    :type all_horiz: bool
    :param all_horiz:
        To use both horizontal channels even if there is only a pick on one of
        them.  Defaults to False.
    :type delayed: bool
    :param delayed: If True, each channel will begin relative to it's own \
        pick-time, if set to False, each channel will begin at the same time.
    :type plot: bool
    :param plot: Plot templates or not.
    :type plotdir: str
	:param plotdir:
        The path to save plots to. If `plotdir=None` (default) then the figure
        will be shown on screen.
    :type return_event: bool
    :param return_event: Whether to return the event and process length or not.
    :type min_snr: float
    :param min_snr:
        Minimum signal-to-noise ratio for a channel to be included in the
        template, where signal-to-noise ratio is calculated as the ratio of
        the maximum amplitude in the template window to the rms amplitude in
        the whole window given.
    :type parallel: bool
    :param parallel: Whether to process data in parallel or not.
    :type num_cores: int
    :param num_cores:
        Number of cores to try and use, if False and parallel=True, will use
        either all your cores, or as many traces as in the data (whichever is
        smaller).
    :type save_progress: bool
    :param save_progress:
        Whether to save the resulting templates at every data step or not.
        Useful for long-running processes.
    :type skip_short_chans: bool
    :param skip_short_chans:
        Whether to ignore channels that have insufficient length data or not.
        Useful when the quality of data is not known, e.g. when downloading
        old, possibly triggered data from a datacentre

    :returns: List of :class:`obspy.core.stream.Stream` Templates
    :rtype: list

    .. note::
        *Method specific arguments:*

        - `from_client` requires:
            :param str client_id:
                string passable by obspy to generate Client, or a Client
                instance
            :param `obspy.core.event.Catalog` catalog:
                Catalog of events to generate template for
            :param float data_pad: Pad length for data-downloads in seconds
        - `from_seishub` requires:
            :param str url: url to seishub database
            :param `obspy.core.event.Catalog` catalog:
                Catalog of events to generate template for
            :param float data_pad: Pad length for data-downloads in seconds
        - `from_sac` requires:
            :param list sac_files:
                osbpy.core.stream.Stream of sac waveforms, or list of paths to
                sac waveforms.
            .. note::
                See `eqcorrscan.utils.sac_util.sactoevent` for details on
                how pick information is collected.
        - `from_meta_file` requires:
            :param str meta_file:
                Path to obspy-readable event file, or an obspy Catalog
            :param `obspy.core.stream.Stream` st:
                Stream containing waveform data for template. Note that this
                should be the same length of stream as you will use for the
                continuous detection, e.g. if you detect in day-long files,
                give this a day-long file!
            :param bool process:
                Whether to process the data or not, defaults to True.

    .. note::
        process_len should be set to the same length as used when computing
        detections using match_filter.match_filter, e.g. if you read
        in day-long data for match_filter, process_len should be 86400.

        .. rubric:: Example

    >>> from obspy.clients.fdsn import Client
    >>> from eqcorrscan.core.template_gen import template_gen
    >>> client = Client('NCEDC')
    >>> catalog = client.get_events(eventid='72572665', includearrivals=True)
    >>> # We are only taking two picks for this example to speed up the
    >>> # example, note that you don't have to!
    >>> catalog[0].picks = catalog[0].picks[0:2]
    >>> templates = template_gen(
    ...    method='from_client', catalog=catalog, client_id='NCEDC',
    ...    lowcut=2.0, highcut=9.0, samp_rate=20.0, filt_order=4, length=3.0,
    ...    prepick=0.15, swin='all', process_len=300, all_horiz=True)
    >>> templates[0].plot(equal_scale=False, size=(800,600)) # doctest: +SKIP

    .. figure:: ../../plots/template_gen.from_client.png

    .. rubric:: Example

    >>> from obspy import read
    >>> from eqcorrscan.core.template_gen import template_gen
    >>> # Get the path to the test data
    >>> import eqcorrscan
    >>> import os
    >>> TEST_PATH = os.path.dirname(eqcorrscan.__file__) + '/tests/test_data'
    >>> st = read(TEST_PATH + '/WAV/TEST_/' +
    ...           '2013-09-01-0410-35.DFDPC_024_00')
    >>> quakeml = TEST_PATH + '/20130901T041115.xml'
    >>> templates = template_gen(
    ...    method='from_meta_file', meta_file=quakeml, st=st, lowcut=2.0,
    ...    highcut=9.0, samp_rate=20.0, filt_order=3, length=2, prepick=0.1,
    ...    swin='S', all_horiz=True)
    >>> print(len(templates[0]))
    10
    >>> templates = template_gen(
    ...    method='from_meta_file', meta_file=quakeml, st=st, lowcut=2.0,
    ...    highcut=9.0, samp_rate=20.0, filt_order=3, length=2, prepick=0.1,
    ...    swin='S_all', all_horiz=True)
    >>> print(len(templates[0]))
    15

    .. rubric:: Example

    >>> from eqcorrscan.core.template_gen import template_gen
    >>> import glob
    >>> # Get all the SAC-files associated with one event.
    >>> sac_files = glob.glob(TEST_PATH + '/SAC/2014p611252/*')
    >>> templates = template_gen(
    ...    method='from_sac', sac_files=sac_files, lowcut=2.0, highcut=10.0,
    ...    samp_rate=25.0, filt_order=4, length=2.0, swin='all', prepick=0.1,
    ...    all_horiz=True)
    >>> print(templates[0][0].stats.sampling_rate)
    25.0
    >>> print(len(templates[0]))
    15
    """
    client_map = {'from_client': 'fdsn', 'from_seishub': 'seishub'}
    assert method in ('from_client', 'from_seishub', 'from_meta_file',
                      'from_sac')
    if not isinstance(swin, list):
        swin = [swin]
    process = True
    if method in ['from_client', 'from_seishub']:
        catalog = kwargs.get('catalog', Catalog())
        data_pad = kwargs.get('data_pad', 90)
        # Group catalog into days and only download the data once per day
        sub_catalogs = _group_events(catalog=catalog,
                                     process_len=process_len,
                                     template_length=length,
                                     data_pad=data_pad)
        if method == 'from_client':
            if isinstance(kwargs.get('client_id'), str):
                client = FDSNClient(kwargs.get('client_id', None))
            else:
                client = kwargs.get('client_id', None)
            available_stations = []
        else:
            client = SeisHubClient(kwargs.get('url', None), timeout=10)
            available_stations = client.waveform.get_station_ids()
    elif method == 'from_meta_file':
        if isinstance(kwargs.get('meta_file'), Catalog):
            catalog = kwargs.get('meta_file')
        elif kwargs.get('meta_file'):
            catalog = read_events(kwargs.get('meta_file'))
        elif kwargs.get('catalog'):
            catalog = kwargs.get('catalog')
        sub_catalogs = [catalog]
        st = kwargs.get('st', Stream())
        process = kwargs.get('process', True)
    elif method == 'from_sac':
        sac_files = kwargs.get('sac_files')
        if isinstance(sac_files, list):
            if isinstance(sac_files[0], (Stream, Trace)):
                # This is a list of streams...
                st = Stream(sac_files[0])
                for sac_file in sac_files[1:]:
                    st += sac_file
            else:
                sac_files = [read(sac_file)[0] for sac_file in sac_files]
                st = Stream(sac_files)
        else:
            st = sac_files
        # Make an event object...
        catalog = Catalog([sactoevent(st)])
        sub_catalogs = [catalog]

    temp_list = []
    process_lengths = []

    if "P_all" in swin or "S_all" in swin or all_horiz:
        all_channels = True
    else:
        all_channels = False
    for sub_catalog in sub_catalogs:
        if method in ['from_seishub', 'from_client']:
            Logger.info("Downloading data")
            st = _download_from_client(client=client,
                                       client_type=client_map[method],
                                       catalog=sub_catalog,
                                       data_pad=data_pad,
                                       process_len=process_len,
                                       available_stations=available_stations,
                                       all_channels=all_channels)
        Logger.info('Pre-processing data')
        st.merge()
        if len(st) == 0:
            Logger.info("No data")
            continue
        if process:
            data_len = max(
                [len(tr.data) / tr.stats.sampling_rate for tr in st])
            if 80000 < data_len < 90000:
                daylong = True
                starttime = min([tr.stats.starttime for tr in st])
                min_delta = min([tr.stats.delta for tr in st])
                # Cope with the common starttime less than 1 sample before the
                #  start of day.
                if (starttime + min_delta).date > starttime.date:
                    starttime = (starttime + min_delta)
                # Check if this is stupid:
                if abs(starttime - UTCDateTime(starttime.date)) > 600:
                    print(abs(starttime - UTCDateTime(starttime.date)))
                    daylong = False
                starttime = starttime.date
            else:
                daylong = False
            # Check if the required amount of data have been downloaded - skip
            # channels if arg set.
            if skip_short_chans:
                _st = Stream()
                for tr in st:
                    if np.ma.is_masked(tr.data):
                        _len = np.ma.count(tr.data) * tr.stats.delta
                    else:
                        _len = tr.stats.npts * tr.stats.delta
                    if _len < process_len * .8:
                        Logger.info(
                            "Data for {0} are too short, skipping".format(
                                tr.id))
                    else:
                        _st += tr
                st = _st
                if len(st) == 0:
                    Logger.info("No data")
                    continue
            if daylong:
                st = pre_processing.dayproc(st=st,
                                            lowcut=lowcut,
                                            highcut=highcut,
                                            filt_order=filt_order,
                                            samp_rate=samp_rate,
                                            parallel=parallel,
                                            starttime=UTCDateTime(starttime),
                                            num_cores=num_cores)
            else:
                st = pre_processing.shortproc(st=st,
                                              lowcut=lowcut,
                                              highcut=highcut,
                                              filt_order=filt_order,
                                              parallel=parallel,
                                              samp_rate=samp_rate,
                                              num_cores=num_cores)
        data_start = min([tr.stats.starttime for tr in st])
        data_end = max([tr.stats.endtime for tr in st])

        for event in sub_catalog:
            stations, channels, st_stachans = ([], [], [])
            if len(event.picks) == 0:
                Logger.warning('No picks for event {0}'.format(
                    event.resource_id))
                continue
            use_event = True
            # Check that the event is within the data
            for pick in event.picks:
                if not data_start < pick.time < data_end:
                    Logger.warning(
                        "Pick outside of data span: Pick time {0} Start "
                        "time {1} End time: {2}".format(
                            str(pick.time), str(data_start), str(data_end)))
                    use_event = False
            if not use_event:
                Logger.error('Event is not within data time-span')
                continue
            # Read in pick info
            Logger.debug("I have found the following picks")
            for pick in event.picks:
                if not pick.waveform_id:
                    Logger.warning(
                        'Pick not associated with waveforms, will not use:'
                        ' {0}'.format(pick))
                    continue
                Logger.debug(pick)
                stations.append(pick.waveform_id.station_code)
                channels.append(pick.waveform_id.channel_code)
            # Check to see if all picks have a corresponding waveform
            for tr in st:
                st_stachans.append('.'.join(
                    [tr.stats.station, tr.stats.channel]))
            # Cut and extract the templates
            template = _template_gen(event.picks,
                                     st,
                                     length,
                                     swin,
                                     prepick=prepick,
                                     plot=plot,
                                     all_horiz=all_horiz,
                                     delayed=delayed,
                                     min_snr=min_snr,
                                     plotdir=plotdir)
            process_lengths.append(len(st[0].data) / samp_rate)
            temp_list.append(template)
        if save_progress:
            if not os.path.isdir("eqcorrscan_temporary_templates"):
                os.makedirs("eqcorrscan_temporary_templates")
            for template in temp_list:
                template.write(
                    "eqcorrscan_temporary_templates{0}{1}.ms".format(
                        os.path.sep, template[0].stats.starttime),
                    format="MSEED")
        del st
    if return_event:
        return temp_list, catalog, process_lengths
    return temp_list
Example #16
0
def extract_from_stack(stack, template, length, pre_pick, pre_pad,
                       Z_include=False, pre_processed=True, samp_rate=False,
                       lowcut=False, highcut=False, filt_order=False):
    r"""Function to extract a new template from a stack of previous detections.
    Requires the stack, the template used to make the detections for the \
    stack, and we need to know if the stack has been pre-processed.

    :type stack: :class:obspy.Stream
    :param stack: Waveform stack from detections.  Can be of any length and \
        can have delays already included, or not.
    :type template: :class:obspy.Stream
    :param template: Template used to make the detections in the stack. Will \
        use the delays of this for the new template.
    :type length: float
    :param length: Length of new template in seconds
    :type pre_pick: float
    :param pre_pick: Extract additional data before the detection, seconds
    :type pre_pad: float
    :param pre_pad: Pad used in seconds when extracting the data, e.g. the \
        time before the detection extracted.  If using \
        clustering.extract_detections this half the length of the extracted \
        waveform.
    :type Z_include: bool
    :param Z_include: If True will include any Z-channels even if there is \
        no template for this channel, as long as there is a template for this \
        station at a different channel.  If this is False and Z channels are \
        included in the template Z channels will be included in the \
        new_template anyway.
    :type pre_processed: bool
    :param pre_processed: Have the data been pre-processed, if True (default) \
        then we will only cut the data here.
    :type samp_rate: float
    :param samp_rate: If pre_processed=False then this is required, desired \
        sampling rate in Hz, defaults to False.
    :type lowcut: float
    :param lowcut: If pre_processed=False then this is required, lowcut in \
        Hz, defaults to False.
    :type highcut: float
    :param highcut: If pre_processed=False then this is required, highcut in \
        Hz, defaults to False
    :type filt_order: int
    :param filt_order: If pre_processed=False then this is required, filter \
        order, defaults to False

    :returns: obspy.Stream Newly cut template
    """
    from eqcorrscan.utils import pre_processing
    import warnings
    new_template = stack.copy()
    # Copy the data before we trim it to keep the stack safe
    # Get the earliest time in the template as this is when the detection is
    # taken.
    mintime = min([tr.stats.starttime for tr in template])
    # Generate a list of tuples of (station, channel, delay) with delay in
    # seconds
    delays = [(tr.stats.station, tr.stats.channel[-1],
               tr.stats.starttime - mintime) for tr in template]
    # Loop through the stack and trim!
    for tr in new_template:
        # Process the data if necessary
        if not pre_processed:
            new_template = pre_processing.shortproc(new_template, lowcut,
                                                    highcut, filt_order,
                                                    samp_rate, 0)
        # Find the matching delay
        delay = [d[2] for d in delays if d[0] == tr.stats.station and
                 d[1] == tr.stats.channel[-1]]
        if Z_include and len(delay) == 0:
            delay = [d[2] for d in delays if d[0] == tr.stats.station]
        if len(delay) == 0:
            msg = ' '.join(['No matching template channel found for stack',
                            'channel', tr.stats.station, tr.stats.channel])
            warnings.warn(msg)
            new_template.remove(tr)
        elif len(delay) > 1:
            msg = ' '.join(['Multiple delays found for stack channel',
                            tr.stats.station, tr.stats.channel])
            warnings.warn(msg)
        else:
            tr.trim(starttime=tr.stats.starttime + delay[0] + pre_pad -
                    pre_pick,
                    endtime=tr.stats.starttime + delay[0] + pre_pad + length -
                    pre_pick)
    return new_template
Example #17
0
 if not 'st' in locals():
     print('No data for this day from pyasdf?!')
     continue
 else:
     print('Merging stream...')
     st.merge(fill_value='interpolate')
     day_st = st.copy()
     for event in day_cat:
         ev_name = str(event.resource_id).split('/')[2]
         origin_time = event.origins[0].time
         print('Trimming data around event time...')
         day_st.trim(origin_time - 120, origin_time + 120)
         print('Preprocessing data for day: ' + str(starttime.date))
         temp_st = pre_processing.shortproc(day_st,
                                            lowcut=1.0,
                                            highcut=20.0,
                                            filt_order=3,
                                            samp_rate=100,
                                            debug=0)
         del day_st
         print('Feeding stream to _template_gen...')
         template = _template_gen(event.picks,
                                  temp_st,
                                  length=4,
                                  swin='all',
                                  prepick=0.5)
         print('Writing event ' + ev_name + ' to file...')
         template.write('/media/rotnga_data/templates/2015/' +
                        ev_name + '.mseed',
                        format="MSEED")
         del temp_st, template
     del day_cat
Example #18
0
ynst = read("2014080316.YN.mseed").sort(['starttime']).trim()

st = ynst.select(station='ZAT')
# st += ynst.select(station='QIJ')
# st += ynst.select(station='PGE')
# st += ynst.select(station='DOC')
# st += ynst.select(station='XUW')

st.simulate(paz_remove=paz, paz_simulate=paz_1hz)
st.detrend()
# st.filter('bandpass', freqmin=20, freqmax=30,corners=4)
st = pre_processing.shortproc(st,
                              lowcut=2,
                              highcut=9,
                              filt_order=18,
                              samp_rate=100,
                              starttime=st[0].stats.starttime,
                              endtime=st[0].stats.endtime)

st = Stream(st)
templates = multi_template_gen(catalog, st, 5.19, plot=True)

#t = UTCDateTime("2014-08-03T08:30:19.095000")
#st = st.slice(t-10, t+36000)

#st.plot()

for t in templates:
    print(t)
    t.write('template.ms', format="MSEED")
Example #19
0
def party_relative_mags(party, self_files, shift_len, align_len, svd_len,
                        reject, wav_dir, min_amps, m, c, calibrate=False,
                        method='PCA', plot_svd=False):
    """
    Calculate the relative moments for detections in a Family using
    mag_calc.svd_moments()

    :param party: Party of detections
    :param self_files: List of self-detection wav files (in order of families)
    :param shift_len: Maximum shift length used in waveform alignment
    :param align_len: Length of waveform used for correlation in alignment
    :param svd_len: Length of waveform used in relative amplitude calc
    :param reject: Min cc threshold for accepted measurement
    :param wav_dir: Root directory of waveforms
    :param min_amps: Minimum number of relative measurements per pair
    :param m: m in Mw = (m * ML) + c regression between Ml and Mw
    :param c: c in Mw = (m * ML) + c regression between Ml and Mw
    :param calibrate: Flag for calibration to a priori Ml's
    :param method: 'PCA' or 'LSQR'
    :param plot_svd: Bool to plot results of svd relative amplitude calcs
    :return:
    """
    pty = party.copy()
    # sort self files and parties by template name
    pty.families.sort(key=lambda x: x.template.name)
    self_files.sort()
    ev_files = glob('{}/*'.format(wav_dir))
    ev_files.sort()
    ev_files = {os.path.basename(f).rstrip('.ms'): f for f in ev_files}
    for i, fam in enumerate(pty.families):
        temp_wav = read(self_files[i])
        print('Starting work on family %s' % fam.template.name)
        if len(fam) == 0:
            print('No detections. Moving on.')
            continue
        temp = fam.template
        prepick = temp.prepick
        det_ids = [d.id for d in fam]
        # Read in waveforms for detections in family
        streams = [read(ev_files[id]) for id in det_ids]
        # Add template wav as the first element
        streams.insert(0, temp_wav)
        print('Template Stream: %s' % str(streams[0]))
        if len(streams[0]) == 0:
            print('Template %s waveforms did not get written. Investigate.' %
                  temp.name)
            continue
        # Process streams then copy to both ccc_streams and svd_streams
        print('Shortproc-ing streams')
        breakit = False
        for st in streams:
            # rms = [tr for tr in st if tr.stats.sampling_rate < temp.samp_rate]
            # for rm in rms:
            #     st.traces.remove(rm)
            try:
                shortproc(st=st, lowcut=temp.lowcut,
                          highcut=temp.highcut, filt_order=temp.filt_order,
                          samp_rate=temp.samp_rate)
            except ValueError as e:
                    breakit = True
        if breakit:
            print('Something wrong in shortproc. Skip family')
            continue
        # Remove all traces with no picks before copying
        for str_ind, st in enumerate(streams):
            if str_ind == 0:
                event = temp.event
            else:
                event = fam.detections[str_ind-1].event
            rms = []
            for tr in st:
                try:
                    [pk for pk in event.picks
                     if pk.waveform_id.get_seed_string() == tr.id][0]
                except IndexError:
                    rms.append(tr)
            for rm in rms:
                st.traces.remove(rm)
        print('Copying streams')
        wrk_streams = copy.deepcopy(streams)
        svd_streams = copy.deepcopy(streams)  # For svd
        ccc_streams = copy.deepcopy(streams)
        event_list = [temp.event] + [d.event for d in fam.detections]
        try:
            # work out cccoh for each event with template
            cccohs = cc_coh_dets(streams=ccc_streams, events=event_list,
                                 length=svd_len, corr_prepick=prepick,
                                 shift=shift_len)
        except (AssertionError, ValueError) as e:
            # Issue with trimming above?
            print(e)
            continue
        for eind, st in enumerate(wrk_streams):
            if eind == 0:
                event = temp.event
            else:
                event = fam.detections[eind-1].event
            for tr in st:
                pk = [pk for pk in event.picks
                      if pk.waveform_id.get_seed_string() == tr.id][0]
                tr.trim(starttime=pk.time - prepick - shift_len,
                        endtime=pk.time + shift_len + align_len)
        st_seeds = list(set([tr.id for st in wrk_streams for tr in st]))
        st_seeds.sort()
        # Align streams with just P arrivals, then use longer st for svd
        print('Now aligning svd_streams')
        shift_inds = int(shift_len * fam.template.samp_rate)
        for st_seed in st_seeds:
            trs = []
            for i, st in enumerate(wrk_streams):
                if len(st.select(id=st_seed)) > 0:
                    trs.append((i, st.select(id=st_seed)[0]))
            inds, traces = zip(*trs)
            shifts, ccs = stacking.align_traces(trace_list=list(traces),
                                                shift_len=shift_inds,
                                                positive=True,
                                                master=traces[0].copy())
            # We now have shifts based on P correlation, shift and trim
            # larger wavs for svd
            for j, shift in enumerate(shifts):
                st = svd_streams[inds[j]]
                if inds[j] == 0:
                    event = temp.event
                else:
                    event = fam.detections[inds[j]-1].event
                if ccs[j] < reject:
                    svd_streams[inds[j]].remove(st.select(id=st_seed)[0])
                    print('Removing trace due to low cc value: %s' % ccs[j])
                    continue
                pk = [pk for pk in event.picks
                      if pk.waveform_id.get_seed_string() == st_seed][0]
                strt_tr = pk.time - prepick - shift
                st.select(id=st_seed)[0].trim(strt_tr, strt_tr + svd_len)
        if method == 'LSQR':
            print('Using least-squares method')
            event_list = []
            for st_id in st_seeds:
                st_list = []
                for stind, st in enumerate(svd_streams):
                    if len(st.select(id=st_id)) > 0:
                        st_list.append(stind)
                event_list.append(st_list)
            # event_list = np.asarray(event_list).tolist()
            u, sigma, v, sta_chans = svd(stream_list=svd_streams, full=True)
            try:
                M, events_out = svd_moments(u, sigma, v, sta_chans, event_list)
            except IOError as e:
                print('Family %s raised error %s' % (fam.template.name, e))
                return
        elif method == 'PCA':
            print('Using principal component method')
            M, events_out = svd_relative_amps(fam, svd_streams, min_amps,
                                              plot=plot_svd)
            print(M, events_out)
            if len(M) == 0:
                print('No amplitudes calculated, skipping')
                continue
        else:
            print('{} not valid argument for mag calc method'.format(method))
            return
        # If we have a Mag for template, calibrate moments
        if calibrate and len(fam.template.event.magnitudes) > 0:
            print('Converting relative amps to magnitudes')
            # Convert the template magnitude to seismic moment
            temp_mag = fam.template.event.magnitudes[-1].mag
            temp_Mw = ML_to_Mw(temp_mag, m, c)
            temp_mo = Mw_to_M0(temp_Mw)
            # Extrapolate from the template moment - relative moment relationship to
            # Get the moment for relative moment = 1.0
            norm_mo = temp_mo / M[0]
            # Template is the last event in the list
            # Now these are weights which we can multiple the moments by
            moments = np.multiply(M, norm_mo)
            # Now convert to Mw
            Mw = [Mw_to_M0(mo, inverse=True) for mo in moments]
            # Convert to local
            Ml = [ML_to_Mw(mm, m, c, inverse=True) for mm in Mw]
            #Normalize moments to template mag
            # Add calibrated mags to detection events
            for jabba, eind in enumerate(events_out):
                # Skip template waveform
                if eind == 0:
                    continue
                fam.detections[eind].event.magnitudes = [
                    Magnitude(mag=Mw[jabba], magnitude_type='Mw')]
                fam.detections[eind].event.comments.append(
                    Comment(text=str(cccohs[eind])))
                fam.detections[eind].event.magnitudes.append(
                    Magnitude(mag=Ml[jabba], magnitude_type='ML'))
                fam.detections[eind].event.preferred_magnitude_id = (
                    fam.detections[eind].event.magnitudes[-1].resource_id.id)
    return pty, cccohs
Example #20
0
def run_tutorial(plot=False,
                 process_len=3600,
                 num_cores=cpu_count(),
                 **kwargs):
    """Main function to run the tutorial dataset."""
    # First we want to load our templates
    template_names = glob.glob('tutorial_template_*.ms')

    if len(template_names) == 0:
        raise IOError('Template files not found, have you run the template ' +
                      'creation tutorial?')

    templates = [read(template_name) for template_name in template_names]

    # Work out what stations we have and get the data for them
    stations = []
    for template in templates:
        for tr in template:
            stations.append((tr.stats.station, tr.stats.channel))
    # Get a unique list of stations
    stations = list(set(stations))

    # We will loop through the data chunks at a time, these chunks can be any
    # size, in general we have used 1 day as our standard, but this can be
    # as short as five minutes (for MAD thresholds) or shorter for other
    # threshold metrics. However the chunk size should be the same as your
    # template process_len.

    # You should test different parameters!!!
    start_time = UTCDateTime(2016, 1, 4)
    end_time = UTCDateTime(2016, 1, 5)
    chunks = []
    chunk_start = start_time
    while chunk_start < end_time:
        chunk_end = chunk_start + process_len
        if chunk_end > end_time:
            chunk_end = end_time
        chunks.append((chunk_start, chunk_end))
        chunk_start += process_len

    unique_detections = []

    # Set up a client to access the GeoNet database
    client = Client("GEONET")

    # Note that these chunks do not rely on each other, and could be paralleled
    # on multiple nodes of a distributed cluster, see the SLURM tutorial for
    # an example of this.
    for t1, t2 in chunks:
        # Generate the bulk information to query the GeoNet database
        bulk_info = []
        for station in stations:
            bulk_info.append(('NZ', station[0], '*',
                              station[1][0] + 'H' + station[1][-1], t1, t2))

        # Note this will take a little while.
        print('Downloading seismic data, this may take a while')
        st = client.get_waveforms_bulk(bulk_info)
        # Merge the stream, it will be downloaded in chunks
        st.merge()

        # Pre-process the data to set frequency band and sampling rate
        # Note that this is, and MUST BE the same as the parameters used for
        # the template creation.
        print('Processing the seismic data')
        st = pre_processing.shortproc(st,
                                      lowcut=2.0,
                                      highcut=9.0,
                                      filt_order=4,
                                      samp_rate=20.0,
                                      num_cores=num_cores,
                                      starttime=t1,
                                      endtime=t2)
        # Convert from list to stream
        st = Stream(st)

        # Now we can conduct the matched-filter detection
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=st,
                                               threshold=8.0,
                                               threshold_type='MAD',
                                               trig_int=6.0,
                                               plotvar=plot,
                                               plotdir='.',
                                               cores=num_cores,
                                               plot_format='png',
                                               **kwargs)

        # Now lets try and work out how many unique events we have just to
        # compare with the GeoNet catalog of 20 events on this day in this
        # sequence
        for master in detections:
            keep = True
            for slave in detections:
                if not master == slave and abs(master.detect_time -
                                               slave.detect_time) <= 1.0:
                    # If the events are within 1s of each other then test which
                    # was the 'best' match, strongest detection
                    if not master.detect_val > slave.detect_val:
                        keep = False
                        print('Removed detection at %s with cccsum %s' %
                              (master.detect_time, master.detect_val))
                        print('Keeping detection at %s with cccsum %s' %
                              (slave.detect_time, slave.detect_val))
                        break
            if keep:
                unique_detections.append(master)
                print('Detection at :' + str(master.detect_time) +
                      ' for template ' + master.template_name +
                      ' with a cross-correlation sum of: ' +
                      str(master.detect_val))
                # We can plot these too
                if plot:
                    stplot = st.copy()
                    template = templates[template_names.index(
                        master.template_name)]
                    lags = sorted([tr.stats.starttime for tr in template])
                    maxlag = lags[-1] - lags[0]
                    stplot.trim(starttime=master.detect_time - 10,
                                endtime=master.detect_time + maxlag + 10)
                    plotting.detection_multiplot(stplot, template,
                                                 [master.detect_time.datetime])
    print('We made a total of ' + str(len(unique_detections)) + ' detections')
    return unique_detections
Example #21
0
def stack_party(party,
                sac_dir,
                method='linear',
                filt_params=None,
                align=True,
                shift_len=0.1,
                prepick=2.,
                postpick=5.,
                reject=0.7,
                normalize=False,
                plot=False,
                outdir=None):
    """
    Return a stream for the linear stack of the templates in a multiplet.

    The approach here is to first stack all of the detections in a family
    over the rejection ccc threshold and THEN stack the Family stacks into
    the final stack for the multiplet. This avoids attempting to correlate
    detections from different Families with each other, which is nonsensical.

    :param party: Party for the multiplet we're interested in
    :param sac_dir: Directory of SAC files made for Stefan
    :param method: Stacking method: 'linear' or 'PWS'
    :param filt_params: (optional) Dictionary of filter parameters to use
        before aligning waveforms. Keys must be 'highcut', 'lowcut',
        'filt_order', and 'samp_rate'
    :param align: Whether or not to align the waveforms
    :param shift_len: Allowed shift in aligning in seconds
    :param reject: Correlation coefficient cutoff in aligning
    :param normalize: Whether to normalize before stacking
    :param plot: Alignment plot flag
    :return:
    """

    sac_dirs = glob('{}/2*'.format(sac_dir))
    fam_stacks = {}
    for fam in party:
        fam_id = fam.template.event.resource_id
        print('For Family {}'.format(fam_id))
        eids = [str(ev.resource_id).split('/')[-1] for ev in fam.catalog]
        raws = []
        for s_dir in sac_dirs:
            if s_dir.split('/')[-1] in eids:
                raws.append(
                    read('{}/*'.format(s_dir)).merge(fill_value='interpolate'))
        # Stupid check for empty det directories. Not yet resolved
        lens = [len(raw) for raw in raws]
        if len(lens) == 0: continue
        if max(lens) == 0: continue
        print('Removing all traces without 3001 samples')
        for st in raws:
            for tr in st.copy():
                if len(tr.data) != 3001:
                    st.remove(tr)
        if filt_params:
            for raw in raws:
                shortproc(raw,
                          lowcut=filt_params['lowcut'],
                          highcut=filt_params['highcut'],
                          filt_order=filt_params['filt_order'],
                          samp_rate=filt_params['samp_rate'])
        print('Now trimming around pick times')
        z_streams = []
        for raw in raws:
            z_stream = Stream()
            for tr in raw.copy():
                if 'a' in tr.stats.sac:
                    strt = tr.stats.starttime
                    z_stream += tr.trim(
                        starttime=strt + tr.stats.sac['a'] - prepick,
                        endtime=strt + tr.stats.sac['a'] + postpick)
            if len(z_stream) > 0:
                z_streams.append(z_stream)
        # At the moment, the picks are based on P-arrival correlation already!
        if align:
            z_streams = align_design(z_streams,
                                     shift_len=shift_len,
                                     reject=reject,
                                     multiplex=False,
                                     no_missed=False,
                                     plot=plot)
        if method == 'linear':
            fam_stacks[fam_id] = linstack(z_streams, normalize=normalize)
        elif method == 'PWS':
            fam_stacks[fam_id] = PWS_stack(z_streams, normalize=normalize)
    if plot:
        # Plot up the stacks of the Families first
        for id, fam_stack in fam_stacks.items():
            fam_stack.plot(equal_scale=False)
    if outdir:
        if not os.path.isdir(outdir):
            os.mkdir(outdir)
        for id, fam_stack in fam_stacks.items():
            filename = '{}/Family_{}_stack.mseed'.format(
                outdir,
                str(id).split('/')[-1])
            fam_stack.write(filename, format='MSEED')
    return fam_stacks
Example #22
0
def cluster_cat(catalog,
                corr_thresh,
                corr_params=None,
                raw_wav_dir=None,
                dist_mat=False,
                out_cat=None,
                show=False,
                method='average'):
    """
    Cross correlate all templates in a tribe and return separate tribes for
    each cluster
    :param tribe: Tribe to cluster
    :param corr_thresh: Correlation threshold for clustering
    :param corr_params: Dictionary of filter parameters. Must include keys:
        lowcut, highcut, samp_rate, filt_order, pre_pick, length, shift_len,
        cores
    :param raw_wav_dir: Directory of waveforms to take from
    :param dist_mat: If there's a precomputed distance matrix, use this
        instead of doing all the correlations
    :param out_cat: Output catalog corresponding to the events
    :param show: Show the dendrogram? Careful as this can exceed max recursion
    :param wavs: Should we even bother with processing waveforms? Otherwise
        will just populate the tribe with an empty Stream
    :return:

    .. Note: Functionality here is pilaged from align design as we don't
        want the multiplexed portion of that function.
    """

    if corr_params and raw_wav_dir:
        shift_len = corr_params['shift_len']
        lowcut = corr_params['lowcut']
        highcut = corr_params['highcut']
        samp_rate = corr_params['samp_rate']
        filt_order = corr_params['filt_order']
        pre_pick = corr_params['pre_pick']
        length = corr_params['length']
        cores = corr_params['cores']
        raw_wav_files = glob('%s/*' % raw_wav_dir)
        raw_wav_files.sort()
        all_wavs = [wav.split('/')[-1].split('_')[-3] for wav in raw_wav_files]
        print(all_wavs[0])
        names = [
            ev.resource_id.id.split('/')[-1] for ev in catalog
            if ev.resource_id.id.split('/')[-1] in all_wavs
        ]
        print(names[0])
        wavs = [
            wav for wav in raw_wav_files
            if wav.split('/')[-1].split('_')[-3] in names
        ]
        print(wavs[0])
        new_cat = Catalog(events=[
            ev for ev in catalog if ev.resource_id.id.split('/')[-1] in names
        ])
        print('Processing temps')
        temp_list = [(shortproc(read('{}/*'.format(tmp)),
                                lowcut=lowcut,
                                highcut=highcut,
                                samp_rate=samp_rate,
                                filt_order=filt_order,
                                parallel=True,
                                num_cores=cores),
                      ev.resource_id.id.split('/')[-1])
                     for tmp, ev in zip(wavs, new_cat)]
        print('Clipping traces')
        rm_temps = []
        for i, temp in enumerate(temp_list):
            print('Clipping template %s' % new_cat[i].resource_id.id)
            rm_ts = []  # Make a list of traces with no pick to remove
            rm_ev = []
            for tr in temp[0]:
                pk = [
                    pk for pk in new_cat[i].picks
                    if pk.waveform_id.station_code == tr.stats.station
                    and pk.waveform_id.channel_code == tr.stats.channel
                ]
                if len(pk) == 0:
                    rm_ts.append(tr)
                else:
                    tr.trim(starttime=pk[0].time - shift_len - pre_pick,
                            endtime=pk[0].time - pre_pick + length + shift_len)
            # Remove pickless traces
            for rm in rm_ts:
                temp[0].traces.remove(rm)
            # If trace lengths are internally inconsistent, remove template
            if len(list(set([len(tr) for tr in temp[0]]))) > 1:
                rm_temps.append(temp)
            # If template is now length 0, remove it and associated event
            if len(temp[0]) == 0:
                rm_temps.append(temp)
                rm_ev.append(new_cat[i])
        for t in rm_temps:
            temp_list.remove(t)
        # Remove the corresponding events as well so catalog and distmat
        # are the same shape
        for rme in rm_ev:
            new_cat.events.remove(rme)
    print(new_cat)
    new_cat.write(out_cat, format="QUAKEML")
    print('Clustering')
    if isinstance(dist_mat, np.ndarray):
        print('Assuming the tribe provided is the same shape as dist_mat')
        # Dummy streams
        temp_list = [(Stream(), ev) for ev in catalog]
        groups = cluster_from_dist_mat(dist_mat=dist_mat,
                                       temp_list=temp_list,
                                       show=show,
                                       corr_thresh=corr_thresh,
                                       method=method)
    else:
        groups = clustering.cluster(temp_list,
                                    show=show,
                                    corr_thresh=corr_thresh,
                                    shift_len=shift_len * 2,
                                    save_corrmat=True,
                                    cores=cores)
    group_tribes = []
    group_cats = []
    if corr_params:
        for group in groups:
            group_tribes.append(
                Tribe(templates=[
                    Template(st=tmp[0],
                             name=tmp[1].resource_id.id.split('/')[-1],
                             event=tmp[1],
                             highcut=highcut,
                             lowcut=lowcut,
                             samp_rate=samp_rate,
                             filt_order=filt_order,
                             prepick=pre_pick) for tmp in group
                ]))
            group_cats.append(Catalog(events=[tmp[1] for tmp in group]))
    else:
        for group in groups:
            group_tribes.append(
                Tribe(templates=[
                    Template(st=tmp[0],
                             name=tmp[1].resource_id.id.split('/')[-1],
                             event=tmp[1].event,
                             highcut=None,
                             lowcut=None,
                             samp_rate=None,
                             filt_order=None,
                             prepick=None) for tmp in group
                ]))
            group_cats.append(Catalog(events=[tmp[1] for tmp in group]))
    return group_tribes, group_cats
Example #23
0
def run_tutorial(min_magnitude=2, shift_len=0.2, num_cores=4):
    import obspy
    if int(obspy.__version__.split('.')[0]) >= 1:
        from obspy.clients.fdsn import Client
    else:
        from obspy.fdsn import Client
    from obspy.core.event import Catalog
    from obspy import UTCDateTime
    from eqcorrscan.core import template_gen, match_filter, lag_calc
    from eqcorrscan.utils import pre_processing, catalog_utils

    client = Client('NCEDC')
    t1 = UTCDateTime(2004, 9, 28)
    t2 = t1 + 86400
    print('Downloading catalog')
    catalog = client.get_events(starttime=t1,
                                endtime=t2,
                                minmagnitude=min_magnitude,
                                minlatitude=35.7,
                                maxlatitude=36.1,
                                minlongitude=-120.6,
                                maxlongitude=-120.2,
                                includearrivals=True)
    # We don't need all the picks, lets take the information from the
    # five most used stations - note that this is done to reduce computational
    # costs.
    catalog = catalog_utils.filter_picks(catalog,
                                         channels=['EHZ'],
                                         top_n_picks=5)
    print('Generating templates')
    templates = template_gen.from_client(catalog=catalog,
                                         client_id='NCEDC',
                                         lowcut=2.0,
                                         highcut=9.0,
                                         samp_rate=50.0,
                                         filt_order=4,
                                         length=3.0,
                                         prepick=0.15,
                                         swin='all',
                                         process_len=3600)
    start_time = UTCDateTime(2004, 9, 28, 17)
    end_time = UTCDateTime(2004, 9, 28, 20)
    process_len = 1800
    chunks = []
    chunk_start = start_time
    while chunk_start < end_time:
        chunk_end = chunk_start + process_len
        if chunk_end > end_time:
            chunk_end = end_time
        chunks.append((chunk_start, chunk_end))
        chunk_start += process_len

    all_detections = []
    picked_catalog = Catalog()
    template_names = [
        str(template[0].stats.starttime) for template in templates
    ]
    for t1, t2 in chunks:
        print('Downloading and processing for start-time: %s' % t1)
        # Download and process the data
        bulk_info = [(tr.stats.network, tr.stats.station, '*',
                      tr.stats.channel[0] + 'H' + tr.stats.channel[1], t1, t2)
                     for tr in templates[0]]
        # Just downloading a chunk of data
        st = client.get_waveforms_bulk(bulk_info)
        st.merge(fill_value='interpolate')
        st = pre_processing.shortproc(st,
                                      lowcut=2.0,
                                      highcut=9.0,
                                      filt_order=4,
                                      samp_rate=50.0,
                                      debug=0,
                                      num_cores=num_cores)
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=st,
                                               threshold=8.0,
                                               threshold_type='MAD',
                                               trig_int=6.0,
                                               plotvar=False,
                                               plotdir='.',
                                               cores=num_cores)
        # Extract unique detections from set.
        unique_detections = []
        for master in detections:
            keep = True
            for slave in detections:
                if not master == slave and\
                   abs(master.detect_time - slave.detect_time) <= 1.0:
                    # If the events are within 1s of each other then test which
                    # was the 'best' match, strongest detection
                    if not master.detect_val > slave.detect_val:
                        keep = False
                        break
            if keep:
                unique_detections.append(master)
        all_detections += unique_detections

        picked_catalog += lag_calc.lag_calc(detections=unique_detections,
                                            detect_data=st,
                                            template_names=template_names,
                                            templates=templates,
                                            shift_len=shift_len,
                                            min_cc=0.5,
                                            interpolate=True,
                                            plot=False)
    # Return all of this so that we can use this function for testing.
    return all_detections, picked_catalog, templates, template_names
Example #24
0
    def test_match_filter(self, samp_rate=20.0, debug=0):
        """
        Function to test the capabilities of match_filter and just check that \
        it is working!  Uses synthetic templates and seeded, randomised data.

        :type debug: int
        :param debug: Debug level, higher the number the more output.
        """
        from eqcorrscan.utils import pre_processing
        from eqcorrscan.utils import plotting
        from eqcorrscan.core import match_filter
        from eqcorrscan.utils.synth_seis import generate_synth_data
        from obspy import UTCDateTime
        import string
        # Generate a random dataset
        templates, data, seeds = generate_synth_data(nsta=5,
                                                     ntemplates=2,
                                                     nseeds=50,
                                                     samp_rate=samp_rate,
                                                     t_length=6.0,
                                                     max_amp=5.0,
                                                     max_lag=12.0,
                                                     debug=debug)
        # Notes to the user: If you use more templates you should ensure they
        # are more different, e.g. set the data to have larger moveouts,
        # otherwise similar templates will detect events seeded by another
        # template.
        # Test the pre_processing functions
        data = pre_processing.dayproc(st=data,
                                      lowcut=2.0,
                                      highcut=8.0,
                                      filt_order=3,
                                      samp_rate=samp_rate,
                                      debug=0,
                                      starttime=UTCDateTime(0))
        if debug > 0:
            data.plot()
        # Filter the data and the templates
        for template in templates:
            pre_processing.shortproc(st=template,
                                     lowcut=2.0,
                                     highcut=8.0,
                                     filt_order=3,
                                     samp_rate=samp_rate)
            if debug > 0:
                template.plot()
        template_names = list(string.ascii_lowercase)[0:len(templates)]
        detections = match_filter.match_filter(template_names=template_names,
                                               template_list=templates,
                                               st=data,
                                               threshold=10.0,
                                               threshold_type='MAD',
                                               trig_int=6.0,
                                               plotvar=False,
                                               plotdir='.',
                                               cores=1,
                                               debug=0)
        # Compare the detections to the seeds
        print('This test made ' + str(len(detections)) + ' detections')
        ktrue = 0
        kfalse = 0
        for detection in detections:
            print(detection)
            i = template_names.index(detection.template_name)
            t_seeds = seeds[i]
            dtime_samples = int(
                (detection.detect_time - UTCDateTime(0)) * samp_rate)
            if dtime_samples in t_seeds['time']:
                j = list(t_seeds['time']).index(dtime_samples)
                print('Detection at SNR of: ' + str(t_seeds['SNR'][j]))
                ktrue += 1
            else:
                min_diff = min(abs(t_seeds['time'] - dtime_samples))
                if min_diff < 10:
                    # If there is a match within ten samples then it is
                    # good enough
                    j = list(abs(t_seeds['time'] -
                                 dtime_samples)).index(min_diff)
                    print('Detection at SNR of: ' + str(t_seeds['SNR'][j]))
                    ktrue += 1
                else:
                    print('Detection at sample: ' + str(dtime_samples) +
                          ' does not match anything in seed times:')
                    kfalse += 1
                print('Minimum difference in samples is: ' + str(min_diff))
        # Plot the detections
        if debug > 3:
            for i, template in enumerate(templates):
                times = [
                    d.detect_time.datetime for d in detections
                    if d.template_name == template_names[i]
                ]
                print(times)
                plotting.detection_multiplot(data, template, times)
        # Set an 'acceptable' ratio of positive to false detections
        print(
            str(ktrue) + ' true detections and ' + str(kfalse) +
            ' false detections')
        self.assertTrue(kfalse / ktrue < 0.25)
Example #25
0
def from_quakeml(quakeml, st, lowcut, highcut, samp_rate, filt_order,
                 length, prepick, swin, debug=0, plot=False):
    """
    Generate a multiplexed template from a local quakeML file.

    Function to generate a template from a local quakeml file \
    and an obspy.Stream object.

    :type quakeml: str
    :param quakeml: QuakeML file containing pick information, can contain \
        multiple events.
    :type st: obspy.core.stream.Stream
    :param st: Stream containing waveform data for template (hopefully). \
        Note that this should be the same length of stream as you will use \
        for the continuous detection, e.g. if you detect in day-long files, \
        give this a day-long file!
    :type lowcut: float
    :param lowcut: Low cut (Hz), if set to None will look in template \
            defaults file
    :type highcut: float
    :param lowcut: High cut (Hz), if set to None will look in template \
            defaults file
    :type samp_rate: float
    :param samp_rate: New sampling rate in Hz, if set to None will look in \
            template defaults file
    :type filt_order: int
    :param filt_order: Filter level, if set to None will look in \
            template defaults file
    :type length: float
    :param length: Extract length in seconds, if None will look in template \
            defaults file.
    :type prepick: float
    :param prepick: Pre-pick time in seconds
    :type swin: str
    :param swin: Either 'all', 'P' or 'S', to select which phases to output.
    :type debug: int
    :param debug: Level of debugging output, higher=more
    :type plot: bool
    :param plot: Display template plots or not

    :returns: list of obspy.Stream Newly cut templates

    .. warning:: We suggest giving this function a full day of data, to \
        ensure templates are generated with **exactly** the same processing \
        as the continuous data.  Not doing this will result in slightly \
        reduced cross-correlation values.

    .. rubric:: Example

    >>> from obspy import read
    >>> from eqcorrscan.core.template_gen import from_quakeml
    >>> st = read('eqcorrscan/tests/test_data/WAV/TEST_/' +
    ...           '2013-09-01-0410-35.DFDPC_024_00')
    >>> quakeml = 'eqcorrscan/tests/test_data/20130901T041115.xml'
    >>> templates = from_quakeml(quakeml=quakeml, st=st, lowcut=2.0,
    ...                          highcut=9.0, samp_rate=20.0, filt_order=3,
    ...                          length=2, prepick=0.1, swin='S')
    >>> print(len(templates[0]))
    15
    """
    # Perform some checks first
    import os
    import warnings
    if not os.path.isfile(quakeml):
        raise IOError('QuakeML file does not exist')
    import obspy
    if int(obspy.__version__.split('.')[0]) >= 1:
        from obspy import read_events
    else:
        from obspy import readEvents as read_events
    from obspy import UTCDateTime
    from eqcorrscan.utils import pre_processing
    stations = []
    channels = []
    st_stachans = []
    # Process waveform data
    st.merge(fill_value='interpolate')
    # Work out if the data are daylong or not...
    data_len = max([len(tr.data)/tr.stats.sampling_rate for tr in st])
    if 80000 < data_len < 90000:
        daylong = True
    else:
        daylong = False
    if daylong:
        st = pre_processing.dayproc(st, lowcut, highcut, filt_order,
                                    samp_rate, debug=debug,
                                    starttime=UTCDateTime(st[0].stats.
                                                          starttime.date))
    else:
        st = pre_processing.shortproc(st, lowcut, highcut, filt_order,
                                      samp_rate, debug=debug)
    data_start = min([tr.stats.starttime for tr in st])
    data_end = max([tr.stats.endtime for tr in st])
    # Read QuakeML file into Catalog class
    catalog = read_events(quakeml)
    templates = []
    for event in catalog:
        use_event = True
        # Check that the event is within the data
        for pick in event.picks:
            if not data_start < pick.time < data_end:
                if debug > 0:
                    print('Pick outside of data span:')
                    print('Pick time: ' + str(pick.time))
                    print('Start time: ' + str(data_start))
                    print('End time: ' + str(data_end))
                use_event = False
        if not use_event:
            warnings.warn('Event is not within data time-span')
            continue
        # Read in pick info
        if debug > 0:
            print("I have found the following picks")
        for pick in event.picks:
            if debug > 0:
                print(' '.join([pick.waveform_id.station_code,
                                pick.waveform_id.channel_code,
                                pick.phase_hint, str(pick.time)]))
            stations.append(pick.waveform_id.station_code)
            channels.append(pick.waveform_id.channel_code)
        # Check to see if all picks have a corresponding waveform
        for tr in st:
            st_stachans.append('.'.join([tr.stats.station, tr.stats.channel]))
        for i in range(len(stations)):
            if not '.'.join([stations[i], channels[i]]) in st_stachans:
                warnings.warn('No data provided for ' + stations[i] + '.' +
                              channels[i])
        st1 = st.copy()
        # Cut and extract the templates
        template = _template_gen(event.picks, st1, length, swin,
                                 prepick=prepick, plot=plot, debug=debug)
        templates.append(template)
    return templates
Example #26
0
def from_sac(sac_files, lowcut, highcut, samp_rate, filt_order, length, swin,
             prepick=0.05, debug=0, plot=False):
    """
    Generate a multiplexed template from a list of SAC files.

    Function to read picks and waveforms from SAC data, and generate a \
    template from these. Usually sac_files is a list of all single-channel \
    SAC files for a given event, a single, multi-channel template will be \
    created from these traces.

    **All files listed in sac_files should be associated with a single event.**

    :type sac_files: list
    :param sac_files: osbpy.core.stream.Stream of sac waveforms, or
        list of paths to sac waveforms.
    :type lowcut: float
    :param lowcut: Low cut (Hz), if set to None will look in template \
            defaults file
    :type highcut: float
    :param highcut: High cut (Hz), if set to None will look in template \
            defaults file
    :type samp_rate: float
    :param samp_rate: New sampling rate in Hz, if set to None will look in \
            template defaults file
    :type filt_order: int
    :param filt_order: Filter level, if set to None will look in \
            template defaults file
    :type swin: str
    :param swin: Either 'all', 'P' or 'S', to select which phases to output.
    :type length: float
    :param length: Extract length in seconds, if None will look in template \
            defaults file.
    :type prepick: float
    :param prepick: Length to extract prior to the pick in seconds.
    :type debug: int
    :param debug: Debug level, higher number=more output.
    :type plot: bool
    :param plot: Turns template plotting on or off.

    :returns: obspy.core.stream.Stream Newly cut template

    .. note:: This functionality is not supported for obspy versions below \
        1.0.0 as references times are not read in by SACIO, which are needed \
        for defining pick times.

    .. rubric:: Example

    >>> from eqcorrscan.core.template_gen import from_sac
    >>> import glob
    >>> # Get all the SAC-files associated with one event.
    >>> sac_files = glob.glob('eqcorrscan/tests/test_data/SAC/2014p611252/*')
    >>> template = from_sac(sac_files=sac_files, lowcut=2.0, highcut=10.0,
    ...                     samp_rate=25.0, filt_order=4, length=2.0,
    ...                     swin='all', prepick=0.1)
    >>> print(template[0].stats.sampling_rate)
    25.0
    >>> print(len(template))
    15
    """
    from obspy import read, Stream
    from eqcorrscan.utils.sac_util import sactoevent
    from eqcorrscan.utils import pre_processing
    # Check whether sac_files is a stream or a list
    if isinstance(sac_files, list):
        if isinstance(sac_files[0], str) or isinstance(sac_files[0], unicode):
            sac_files = [read(sac_file)[0] for sac_file in sac_files]
        if isinstance(sac_files[0], Stream):
            # This is a list of streams...
            st = sac_files[0]
            for sac_file in sac_files[1:]:
                st += sac_file
        st = Stream(sac_files)
    elif isinstance(sac_files, Stream):
        st = sac_files
    # Make an event object...
    event = sactoevent(st, debug=debug)
    # Process the data
    st.merge(fill_value='interpolate')
    st = pre_processing.shortproc(st, lowcut, highcut, filt_order,
                                  samp_rate, debug)
    template = _template_gen(picks=event.picks, st=st, length=length,
                             swin=swin, prepick=prepick, plot=plot,
                             debug=debug)
    return template
Example #27
0
def from_sac(sac_files, lowcut, highcut, samp_rate, filt_order, length, swin,
             prepick=0.05, debug=0, plot=False):
    """Function to read picks and waveforms from SAC data, and generate a \
    template from these. Usually sac_files is a list of all single-channel \
    SAC files for a given event, a single, multi-channel template will be \
    created from these traces.

    :type sac_files: list or stream
    :param sac_files: List or stream of sac waveforms, or list of paths to \
        sac waveforms.
    :type lowcut: float
    :param lowcut: Low cut (Hz), if set to None will look in template \
            defaults file
    :type highcut: float
    :param highcut: High cut (Hz), if set to None will look in template \
            defaults file
    :type samp_rate: float
    :param samp_rate: New sampling rate in Hz, if set to None will look in \
            template defaults file
    :type filt_order: int
    :param filt_order: Filter level, if set to None will look in \
            template defaults file
    :type swin: str
    :param swin: Either 'all', 'P' or 'S', to select which phases to output.
    :type length: float
    :param length: Extract length in seconds, if None will look in template \
            defaults file.
    :type prepick: float
    :param prepick: Length to extract prior to the pick in seconds.
    :type debug: int
    :param debug: Debug level, higher number=more output.
    :type plot: bool
    :param plot: Turns template plotting on or off.

    :returns: obspy.Stream Newly cut template

    .. note:: This functionality is not supported for obspy versions below \
        1.0.0 as references times are not read in by SACIO, which are needed \
        for defining pick times.
    """
    from obspy import read, Stream
    from eqcorrscan.utils.sac_util import sactoevent
    from eqcorrscan.utils import pre_processing
    # Check whether sac_files is a stream or a list
    if isinstance(sac_files, list):
        if isinstance(sac_files[0], str) or isinstance(sac_files[0], unicode):
            sac_files = [read(sac_file)[0] for sac_file in sac_files]
        if isinstance(sac_files[0], Stream):
            # This is a list of streams...
            st = sac_files[0]
            for sac_file in sac_files[1:]:
                st += sac_file
        st = Stream(sac_files)
    elif isinstance(sac_files, Stream):
        st = sac_files
    # Make an event object...
    event = sactoevent(st, debug=debug)
    # Process the data
    st.merge(fill_value='interpolate')
    st = pre_processing.shortproc(st, lowcut, highcut, filt_order,
                                  samp_rate, debug)
    template = _template_gen(picks=event.picks, st=st, length=length,
                             swin=swin, prepick=prepick, plot=plot,
                             debug=debug)
    return template
Example #28
0
def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin,\
               debug=0):
    """
    Function to read in picks from sfile then generate the template from the
    picks within this and the wavefile found in the pick file.

    :type sfile: string
    :param sfile: sfilename must be the\
    path to a seisan nordic type s-file containing waveform and pick\
    information.
    :type lowcut: float
    :param lowcut: Low cut (Hz), if set to None will look in template\
            defaults file
    :type highcut: float
    :param lowcut: High cut (Hz), if set to None will look in template\
            defaults file
    :type samp_rate: float
    :param samp_rate: New sampling rate in Hz, if set to None will look in\
            template defaults file
    :type filt_order: int
    :param filt_order: Filter level, if set to None will look in\
            template defaults file
    :type swin: str
    :param swin: Either 'all', 'P' or 'S', to select which phases to output.
    :type length: float
    :param length: Extract length in seconds, if None will look in template\
            defaults file.
    :type debug: int
    :param debug: Debug level, higher number=more output.
    """
    # Perform some checks first
    import os
    import sys
    if not os.path.isfile(sfile):
        raise IOError('sfile does not exist')

    from eqcorrscan.utils import Sfile_util
    # Read in the header of the sfile
    wavefiles=Sfile_util.readwavename(sfile)
    pathparts=sfile.split('/')[0:len(sfile.split('/'))-1]
    wavpath=''
    for part in pathparts:
        if part == 'REA':
            part='WAV'
        wavpath+=part+'/'
    from obspy import read as obsread
    from eqcorrscan.utils import pre_processing
    # Read in waveform file
    for wavefile in wavefiles:
        print "I am going to read waveform data from: "+wavpath+wavefile
        if 'st' in locals():
            st+=obsread(wavpath+wavefile)
        else:
            st=obsread(wavpath+wavefile)
    for tr in st:
        if tr.stats.sampling_rate < samp_rate:
            print 'Sampling rate of data is lower than sampling rate asked for'
            print 'As this is not good practice for correlations I will not do this'
            raise ValueError("Trace: "+tr.stats.station+" sampling rate: "+\
                             str(tr.stats.sampling_rate))
    # Read in pick info
    picks=Sfile_util.readpicks(sfile)
    print "I have found the following picks"
    for pick in picks:
        print pick.station+' '+pick.channel+' '+pick.phase+' '+str(pick.time)

    # Process waveform data
    st=pre_processing.shortproc(st, lowcut, highcut, filt_order,\
                      samp_rate, debug)
    st1=_template_gen(picks, st, length, swin)
    return st1
Example #29
0
    call('NLLoc /Users/home/hoppche/NLLoc/mrp/run/nlloc_mrp.in', shell=True)
    # Now reading NLLoc output back into catalog as new origin
    out_w_ext = glob(outfile + '*.grid0.loc.hyp')
    new_o = read_nlloc_hyp(out_w_ext[0],
                           coordinate_converter=my_conversion,
                           picks=ev.picks)
    ev.origins.append(new_o[0].origins[0])
    ev.preferred_origin_id = str(new_o[0].origins[0].resource_id)

# Cut templates for each new event based on new picks
for event in refined_cat:
    ev_name = str(event.resource_id).split('/')[2]
    st = template_dict[event.resource_id]
    st1 = pre_processing.shortproc(st,
                                   lowcut=1.0,
                                   highcut=20.0,
                                   filt_order=3,
                                   samp_rate=50,
                                   debug=0)
    print('Feeding stream to _template_gen...')
    template = template_gen._template_gen(event.picks,
                                          st1,
                                          length=4.0,
                                          swin='all',
                                          prepick=0.5)
    print('Writing event ' + ev_name + ' to file...')
    template.write('/media/chet/hdd/seismic/NZ/templates/rotnga_2015/' +
                   'refined_picks/' + ev_name + '_50Hz.mseed',
                   format="MSEED")
    del st, st1, template
Example #30
0
def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin,
               prepick=0.05, debug=0, plot=False):
    r"""Function to read in picks from sfile then generate the template from \
    the picks within this and the wavefile found in the pick file.

    :type sfile: string
    :param sfile: sfilename must be the \
        path to a seisan nordic type s-file containing waveform and pick \
        information.
    :type lowcut: float
    :param lowcut: Low cut (Hz), if set to None will look in template \
            defaults file
    :type highcut: float
    :param highcut: High cut (Hz), if set to None will look in template \
            defaults file
    :type samp_rate: float
    :param samp_rate: New sampling rate in Hz, if set to None will look in \
            template defaults file
    :type filt_order: int
    :param filt_order: Filter level, if set to None will look in \
            template defaults file
    :type swin: str
    :param swin: Either 'all', 'P' or 'S', to select which phases to output.
    :type length: float
    :param length: Extract length in seconds, if None will look in template \
            defaults file.
    :type prepick: float
    :param prepick: Length to extract prior to the pick in seconds.
    :type debug: int
    :param debug: Debug level, higher number=more output.
    :type plot: bool
    :param plot: Turns template plotting on or off.

    :returns: obspy.Stream Newly cut template

    .. warning:: This will use whatever data is pointed to in the s-file, if \
        this is not the coninuous data, we recommend using other functions. \
        Differences in processing between short files and day-long files \
        (inherent to resampling) will produce lower cross-correlations.
    """
    # Perform some checks first
    import os
    if not os.path.isfile(sfile):
        raise IOError('sfile does not exist')

    from eqcorrscan.utils import pre_processing
    from eqcorrscan.utils import sfile_util
    from obspy import read as obsread
    # Read in the header of the sfile
    wavefiles = sfile_util.readwavename(sfile)
    pathparts = sfile.split('/')[0:-1]
    new_path_parts = []
    for part in pathparts:
        if part == 'REA':
            part = 'WAV'
        new_path_parts.append(part)
    # * argument to allow .join() to accept a list
    wavpath = os.path.join(*new_path_parts) + '/'
    # In case of absolute paths (not handled with .split() --> .join())
    if sfile[0] == '/':
        wavpath = '/' + wavpath
    # Read in waveform file
    for wavefile in wavefiles:
        print(''.join(["I am going to read waveform data from: ", wavpath,
                       wavefile]))
        if 'st' not in locals():
            st = obsread(wavpath + wavefile)
        else:
            st += obsread(wavpath + wavefile)
    for tr in st:
        if tr.stats.sampling_rate < samp_rate:
            print('Sampling rate of data is lower than sampling rate asked ' +
                  'for')
            print('Not good practice for correlations: I will not do this')
            raise ValueError("Trace: " + tr.stats.station +
                             " sampling rate: " + str(tr.stats.sampling_rate))
    # Read in pick info
    catalog = sfile_util.readpicks(sfile)
    # Read the list of Picks for this event
    picks = catalog[0].picks
    print("I have found the following picks")
    for pick in picks:
        print(' '.join([pick.waveform_id.station_code,
                        pick.waveform_id.channel_code, pick.phase_hint,
                        str(pick.time)]))

    # Process waveform data
    st.merge(fill_value='interpolate')
    st = pre_processing.shortproc(st, lowcut, highcut, filt_order,
                                  samp_rate, debug)
    st1 = _template_gen(picks=picks, st=st, length=length, swin=swin,
                        prepick=prepick, plot=plot, debug=debug)
    return st1
Example #31
0
def party_relative_mags(party, self_files, shift_len, align_len, svd_len,
                        reject, sac_dir, min_amps, calibrate=False,
                        method='PCA'):
    """
    Calculate the relative moments for detections in a Family using
    mag_calc.svd_moments()

    :param party: Party of detections
    :param shift_len: Maximum shift length used in waveform alignment
    :param align_len: Length of waveform used for correlation in alignment
    :param svd_len: Length of waveform used in relative amplitude calc
    :param reject: Min cc threshold for accepted measurement
    :param sac_dir: Root directory of waveforms
    :param min_amps: Minimum number of relative measurements per pair
    :param calibrate: Flag for calibration to a priori Ml's
    :param method: 'PCA' or 'LSQR'
    :return:
    """

    # First read-in self detection names
    selfs = []
    for self_file in self_files:
        with open(self_file, 'r') as f:
            rdr = csv.reader(f)
            for row in rdr:
                selfs.append(str(row[0]))
    for fam in party.families:
        print('Starting work on family %s' % fam.template.name)
        if len(fam) == 1:
            print('Only self-detection. Moving on.')
            continue
        temp = fam.template
        prepick = temp.prepick
        events = [det.event for det in fam.detections]
        # Here we'll read in the waveforms and trim from stefan's directory
        # of SAC files so as not to duplicate data
        ev_dirs = ['%s%s' % (sac_dir, str(ev.resource_id).split('/')[-1])
                   for ev in events]
        streams = []
        if len([i for i, ev_dir in enumerate(ev_dirs)
                    if ev_dir.split('/')[-1] in selfs]) == 0:
            print('Family %s has no self detection. Investigate'
                  % fam.template.name)
            continue
        self_ind = [i for i, ev_dir in enumerate(ev_dirs)
                    if ev_dir.split('/')[-1] in selfs][0]
        # Read in Z components of events which we wrote for stefan
        # Many of these ev_dirs will not exist!
        for i, ev_dir in enumerate(ev_dirs):
            raw_st = Stream()
            print('Reading %s' % ev_dir)
            for wav_file in glob('%s/*Z.sac' % ev_dir):
                print('...file %s' % wav_file)
                raw_tr = read(wav_file)[0]
                start = raw_tr.stats.starttime + raw_tr.stats.sac['a'] - 3.
                end = start + 10
                raw_tr.trim(starttime=start, endtime=end)
                raw_st.traces.append(raw_tr)
            streams.append(raw_st)
        print('Moved self detection to top of list')
        # Move the self detection to the first element
        streams.insert(0, streams.pop(self_ind))
        print('Template Stream: %s' % str(streams[0]))
        if len(streams[0]) == 0:
            print('Template %s waveforms did not get written to SAC.' %
                  temp.name)
            continue
        # Front/back clip hardcoded relative to wavs starting 3 s before pick
        front_clip = 3.0 - shift_len - 0.05 - prepick
        back_clip = front_clip + align_len + (2 * shift_len) + 0.05
        wrk_streams = [] # For aligning
        # Process streams then copy to both ccc_streams and svd_streams
        bad_streams = []
        for i, st in enumerate(list(streams)):
            try:
                shortproc(st=streams[i], lowcut=temp.lowcut,
                          highcut=temp.highcut, filt_order=temp.filt_order,
                          samp_rate=temp.samp_rate)
                wrk_streams.append(st.copy())
            except ValueError as e:
                print('ValueError reads:')
                print(str(e))
                print('Attempting to remove bad trace at {}'.format(
                    str(e).split(' ')[-1]))
                bad_tr = str(e).split(' ')[-1][:-1] # Eliminate trailing "'"
                print('Sta and chan names: {}'.format(bad_tr.split('.')))
                try:
                    tr = streams[i].select(station=bad_tr.split('.')[0],
                                           channel=bad_tr.split('.')[1])[0]
                    streams[i].traces.remove(tr)
                    shortproc(st=streams[i], lowcut=temp.lowcut,
                              highcut=temp.highcut,
                              filt_order=temp.filt_order,
                              samp_rate=temp.samp_rate)
                    wrk_streams.append(st.copy())
                except IndexError as e:
                    print(str(e))
                    print('Funkyness. Removing entire stream')
                    bad_streams.append(st)
        if len(bad_streams) > 0:
            for bst in bad_streams:
                streams.remove(bst)
        svd_streams = copy.deepcopy(streams) # For svd
        ccc_streams = copy.deepcopy(streams)
        # work out cccoh for each event with template
        cccohs = cc_coh_dets(streams=ccc_streams, shift=shift_len,
                             length=svd_len, wav_prepick=3.,
                             corr_prepick=0.05)
        for st in wrk_streams:
            for tr in st:
                tr.trim(starttime=tr.stats.starttime + front_clip,
                        endtime=tr.stats.starttime + back_clip)
        st_chans = list(set([(tr.stats.station, tr.stats.channel)
                             for st in wrk_streams for tr in st]))
        st_chans.sort()
        # Align streams with just P arrivals, then use longer st for svd
        print('Now aligning svd_streams')
        shift_inds = int(shift_len * fam.template.samp_rate)
        for st_chan in st_chans:
            trs = []
            for i, st in enumerate(wrk_streams):
                if len(st.select(station=st_chan[0], channel=st_chan[-1])) > 0:
                    trs.append((i, st.select(station=st_chan[0],
                                             channel=st_chan[-1])[0]))
            inds, traces = zip(*trs)
            shifts, ccs = stacking.align_traces(trace_list=list(traces),
                                                shift_len=shift_inds,
                                                positive=True,
                                                master=traces[0].copy())
            # We now have shifts based on P correlation, shift and trim
            # larger wavs for svd
            for j, shift in enumerate(shifts):
                st = svd_streams[inds[j]]
                if ccs[j] < reject:
                    svd_streams[inds[j]].remove(st.select(
                        station=st_chan[0], channel=st_chan[-1])[0])
                    print('Removing trace due to low cc value: %s' % ccs[j])
                    continue
                strt_tr = st.select(
                    station=st_chan[0], channel=st_chan[-1])[0].stats.starttime
                strt_tr += (3.0 - prepick - shift)
                st.select(station=st_chan[0],
                          channel=st_chan[-1])[0].trim(strt_tr,strt_tr
                                                       + svd_len)
        if method == 'LSQR':
            print('Using least-squares method')
            event_list = []
            for stachan in st_chans:
                st_list = []
                for i, st in enumerate(svd_streams):
                    if len(st.select(station=stachan[0],
                                     channel=stachan[-1])) > 0:
                        st_list.append(i)
                event_list.append(st_list)
            # event_list = np.asarray(event_list).tolist()
            u, sigma, v, sta_chans = svd(stream_list=svd_streams, full=True)
            try:
                M, events_out = svd_moments(u, sigma, v, sta_chans, event_list)
            except IOError as e:
                print('Family %s raised error %s' % (fam.template.name, e))
                continue
        elif method == 'PCA':
            print('Using principal component method')
            # Now loop over all detections and do svd for each matching
            # chan with temp
            events_out = []
            template = svd_streams[0]
            M = []
            for i, st in enumerate(svd_streams):
                if len(st) == 0:
                    print('Event not located, skipping')
                    continue
                ev_r_amps = []
                # For each pair of template:detection (including temp:temp)
                for tr in template:
                    if len(st.select(station=tr.stats.station,
                                     channel=tr.stats.channel)) > 0:
                        det_tr = st.select(station=tr.stats.station,
                                           channel=tr.stats.channel)[0]
                        # Convoluted way of getting two 'vert' vectors
                        data_mat = np.vstack((tr.data, det_tr.data)).T
                        U, sig, Vt = scipy.linalg.svd(data_mat,
                                                      full_matrices=True)
                        # Vt is 2x2 for two events
                        # Per Shelly et al., 2016 eq. 4
                        ev_r_amps.append(Vt[0][1] / Vt[0][0])
                if len(ev_r_amps) < min_amps:
                    print('Fewer than 4 amplitude picks, skipping.')
                    continue
                M.append(np.median(ev_r_amps))
                events_out.append(i)
        # If we have a Mag for template, calibrate moments
        if calibrate and len(fam.template.event.magnitudes) > 0:
            # Convert the template magnitude to seismic moment
            temp_mag = fam.template.event.magnitudes[-1].mag
            temp_mo = local_to_moment(temp_mag)
            # Extrapolate from the template moment - relative moment relationship to
            # Get the moment for relative moment = 1.0
            norm_mo = temp_mo / M[0]
            # Template is the last event in the list
            # Now these are weights which we can multiple the moments by
            moments = np.multiply(M, norm_mo)
            # Now convert to Mw
            Mw = [2.0 / 3.0 * (np.log10(m) - 9.0) for m in moments]
            Mw2, evs2 = remove_outliers(Mw, events_out)
            # Convert to local
            Ml = [0.88 * m + 0.73 for m in Mw2]
            #Normalize moments to template mag
            # Add calibrated mags to detection events
            for i, eind in enumerate(evs2):
                fam.detections[eind-1].event.magnitudes = [
                    Magnitude(mag=Mw2[i], magnitude_type='Mw')]
                fam.detections[eind-1].event.comments.append(
                    Comment(text=str(cccohs[eind-1])))
                fam.detections[eind-1].event.magnitudes.append(
                    Magnitude(mag=Ml[i], magnitude_type='ML'))
            fam.catalog = Catalog(events=[det.event for det in fam.detections])
    return party, cccohs
def custom_template_gen(method,
                        lowcut,
                        highcut,
                        samp_rate,
                        filt_order,
                        length,
                        prepick,
                        swin="all",
                        process_len=86400,
                        all_horiz=False,
                        delayed=True,
                        plot=False,
                        plotdir=None,
                        return_event=False,
                        min_snr=None,
                        parallel=False,
                        num_cores=False,
                        save_progress=False,
                        skip_short_chans=False,
                        **kwargs):
    """
    Generate processed and cut waveforms for use as templates.

    :type method: str
    :param method:
        Template generation method, must be one of ('from_client',
        'from_seishub', 'from_sac', 'from_meta_file'). - Each method requires
        associated arguments, see note below.
    :type lowcut: float
    :param lowcut: Low cut (Hz), if set to None will not apply a lowcut.
    :type highcut: float
    :param highcut: High cut (Hz), if set to None will not apply a highcut.
    :type samp_rate: float
    :param samp_rate: New sampling rate in Hz.
    :type filt_order: int
    :param filt_order: Filter level (number of corners).
    :type length: float
    :param length: Length of template waveform in seconds.
    :type prepick: float
    :param prepick: Pre-pick time in seconds
    :type swin: str
    :param swin:
        P, S, P_all, S_all or all, defaults to all: see note in
        :func:`eqcorrscan.core.template_gen.template_gen`
    :type process_len: int
    :param process_len: Length of data in seconds to download and process.
    :type all_horiz: bool
    :param all_horiz:
        To use both horizontal channels even if there is only a pick on one of
        them.  Defaults to False.
    :type delayed: bool
    :param delayed: If True, each channel will begin relative to it's own \
        pick-time, if set to False, each channel will begin at the same time.
    :type plot: bool
    :param plot: Plot templates or not.
    :type plotdir: str
    :param plotdir:
        The path to save plots to. If `plotdir=None` (default) then the figure
        will be shown on screen.
    :type return_event: bool
    :param return_event: Whether to return the event and process length or not.
    :type min_snr: float
    :param min_snr:
        Minimum signal-to-noise ratio for a channel to be included in the
        template, where signal-to-noise ratio is calculated as the ratio of
        the maximum amplitude in the template window to the rms amplitude in
        the whole window given.
    :type parallel: bool
    :param parallel: Whether to process data in parallel or not.
    :type num_cores: int
    :param num_cores:
        Number of cores to try and use, if False and parallel=True, will use
        either all your cores, or as many traces as in the data (whichever is
        smaller).
    :type save_progress: bool
    :param save_progress:
        Whether to save the resulting templates at every data step or not.
        Useful for long-running processes.
    :type skip_short_chans: bool
    :param skip_short_chans:
        Whether to ignore channels that have insufficient length data or not.
        Useful when the quality of data is not known, e.g. when downloading
        old, possibly triggered data from a datacentre

    :returns: List of :class:`obspy.core.stream.Stream` Templates
    :rtype: list

    """

    client_map = {'from_client': 'fdsn', 'from_seishub': 'seishub'}
    assert method in ('from_client', 'from_seishub', 'from_meta_file',
                      'from_sac')
    if not isinstance(swin, list):
        swin = [swin]
    process = True
    if method in ['from_client', 'from_seishub']:
        catalog = kwargs.get('catalog', Catalog())
        data_pad = kwargs.get('data_pad', 90)
        # Group catalog into days and only download the data once per day
        sub_catalogs = _group_events(catalog=catalog,
                                     process_len=process_len,
                                     template_length=length,
                                     data_pad=data_pad)
        if method == 'from_client':
            if isinstance(kwargs.get('client_id'), str):
                client = FDSNClient(kwargs.get('client_id', None))
            else:
                client = kwargs.get('client_id', None)
            available_stations = []
        else:
            client = SeisHubClient(kwargs.get('url', None), timeout=10)
            available_stations = client.waveform.get_station_ids()
    elif method == 'from_meta_file':
        if isinstance(kwargs.get('meta_file'), Catalog):
            catalog = kwargs.get('meta_file')
        elif kwargs.get('meta_file'):
            catalog = read_events(kwargs.get('meta_file'))
        else:
            catalog = kwargs.get('catalog')
        sub_catalogs = [catalog]
        st = kwargs.get('st', Stream())
        process = kwargs.get('process', True)
    elif method == 'from_sac':
        sac_files = kwargs.get('sac_files')
        if isinstance(sac_files, list):
            if isinstance(sac_files[0], (Stream, Trace)):
                # This is a list of streams...
                st = Stream(sac_files[0])
                for sac_file in sac_files[1:]:
                    st += sac_file
            else:
                sac_files = [read(sac_file)[0] for sac_file in sac_files]
                st = Stream(sac_files)
        else:
            st = sac_files
        # Make an event object...
        catalog = Catalog([sactoevent(st)])
        sub_catalogs = [catalog]

    temp_list = []
    process_lengths = []
    catalog_out = Catalog()

    if "P_all" in swin or "S_all" in swin or all_horiz:
        all_channels = True
    else:
        all_channels = False
    for sub_catalog in sub_catalogs:
        if method in ['from_seishub', 'from_client']:
            Logger.info("Downloading data")
            st = _download_from_client(client=client,
                                       client_type=client_map[method],
                                       catalog=sub_catalog,
                                       data_pad=data_pad,
                                       process_len=process_len,
                                       available_stations=available_stations,
                                       all_channels=all_channels)
        Logger.info('Pre-processing data')
        st.merge()
        if len(st) == 0:
            Logger.info("No data")
            continue
        if process:
            data_len = max(
                [len(tr.data) / tr.stats.sampling_rate for tr in st])
            if 80000 < data_len < 90000:
                daylong = True
                starttime = min([tr.stats.starttime for tr in st])
                min_delta = min([tr.stats.delta for tr in st])
                # Cope with the common starttime less than 1 sample before the
                #  start of day.
                if (starttime + min_delta).date > starttime.date:
                    starttime = (starttime + min_delta)
                # Check if this is stupid:
                if abs(starttime - UTCDateTime(starttime.date)) > 600:
                    daylong = False
                starttime = starttime.date
            else:
                daylong = False
            # Check if the required amount of data have been downloaded - skip
            # channels if arg set.
            for tr in st:
                if np.ma.is_masked(tr.data):
                    _len = np.ma.count(tr.data) * tr.stats.delta
                else:
                    _len = tr.stats.npts * tr.stats.delta
                if _len < process_len * .8:
                    Logger.info("Data for {0} are too short, skipping".format(
                        tr.id))
                    if skip_short_chans:
                        continue
                # Trim to enforce process-len
                tr.data = tr.data[0:int(process_len * tr.stats.sampling_rate)]
            if len(st) == 0:
                Logger.info("No data")
                continue
            if daylong:
                st = pre_processing.dayproc(st=st,
                                            lowcut=lowcut,
                                            highcut=highcut,
                                            filt_order=filt_order,
                                            samp_rate=samp_rate,
                                            parallel=parallel,
                                            starttime=UTCDateTime(starttime),
                                            num_cores=num_cores)
            else:
                st = pre_processing.shortproc(st=st,
                                              lowcut=lowcut,
                                              highcut=highcut,
                                              filt_order=filt_order,
                                              parallel=parallel,
                                              samp_rate=samp_rate,
                                              num_cores=num_cores)
        data_start = min([tr.stats.starttime for tr in st])
        data_end = max([tr.stats.endtime for tr in st])

        for event in sub_catalog:
            stations, channels, st_stachans = ([], [], [])
            if len(event.picks) == 0:
                Logger.warning('No picks for event {0}'.format(
                    event.resource_id))
                continue
            use_event = True
            # Check that the event is within the data
            for pick in event.picks:
                if not data_start < pick.time < data_end:
                    Logger.warning(
                        "Pick outside of data span: Pick time {0} Start "
                        "time {1} End time: {2}".format(
                            str(pick.time), str(data_start), str(data_end)))
                    use_event = False
            if not use_event:
                Logger.error('Event is not within data time-span')
                continue
            # Read in pick info
            Logger.debug("I have found the following picks")
            for pick in event.picks:
                if not pick.waveform_id:
                    Logger.warning(
                        'Pick not associated with waveforms, will not use:'
                        ' {0}'.format(pick))
                    continue
                Logger.debug(pick)
                stations.append(pick.waveform_id.station_code)
                channels.append(pick.waveform_id.channel_code)
            # Check to see if all picks have a corresponding waveform
            for tr in st:
                st_stachans.append('.'.join(
                    [tr.stats.station, tr.stats.channel]))
            # Cut and extract the templates
            template = _template_gen(event.picks,
                                     st,
                                     length,
                                     swin,
                                     prepick=prepick,
                                     plot=plot,
                                     all_horiz=all_horiz,
                                     delayed=delayed,
                                     min_snr=min_snr,
                                     plotdir=plotdir)
            process_lengths.append(len(st[0].data) / samp_rate)
            temp_list.append(template)
            catalog_out += event
        if save_progress:
            if not os.path.isdir("eqcorrscan_temporary_templates"):
                os.makedirs("eqcorrscan_temporary_templates")
            for template in temp_list:
                template.write(
                    "eqcorrscan_temporary_templates{0}{1}.ms".format(
                        os.path.sep, template[0].stats.starttime.strftime(
                            "%Y-%m-%dT%H%M%S")),
                    format="MSEED")
        del st
    if return_event:
        return temp_list, catalog_out, process_lengths
    return temp_list
Example #33
0
os.chdir(temp_dir)
ms_files = glob('*.ms')
ms_files.sort()
template_names = []
for file1 in ms_files:
    if not 'templates' in locals():
        templates = [read(file1)]
        template_names.append(file1[:-3])
    else:
        templates += [read(file1)]
        template_names.append(file1[:-3])

# Extract the station info from the templates
for template in templates:
    #Filter and downsample sample data
    template=pre_processing.shortproc(template, 1.0, 20.0, 3, 100.0,\
                              debug=1)
    if not 'stachans' in locals():
        stachans = [(tr.stats.station, tr.stats.channel) for tr in template]
    else:
        stachans += [(tr.stats.station, tr.stats.channel) for tr in template]

# Make this a unique list
stachans = list(set(stachans))

# Read in the continuous data for these station, channel combinations
raw_dir = '/Volumes/GeoPhysics_07/users-data/matsonga/MRP_PROJ/data/mastersData/sac'
#Recursively search a directory for specific files amtching desired day and stachan
start_day = UTCDateTime(2012, 06, 11).julday
end_day = UTCDateTime(2012, 06, 12).julday
days = range(start_day, end_day + 1)
Example #34
0
def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin,
               prepick=0.05, debug=0, plot=False):
    r"""Function to read in picks from sfile then generate the template from \
    the picks within this and the wavefile found in the pick file.

    :type sfile: string
    :param sfile: sfilename must be the \
        path to a seisan nordic type s-file containing waveform and pick \
        information.
    :type lowcut: float
    :param lowcut: Low cut (Hz), if set to None will look in template \
            defaults file
    :type highcut: float
    :param highcut: High cut (Hz), if set to None will look in template \
            defaults file
    :type samp_rate: float
    :param samp_rate: New sampling rate in Hz, if set to None will look in \
            template defaults file
    :type filt_order: int
    :param filt_order: Filter level, if set to None will look in \
            template defaults file
    :type swin: str
    :param swin: Either 'all', 'P' or 'S', to select which phases to output.
    :type length: float
    :param length: Extract length in seconds, if None will look in template \
            defaults file.
    :type prepick: float
    :param prepick: Length to extract prior to the pick in seconds.
    :type debug: int
    :param debug: Debug level, higher number=more output.
    :type plot: bool
    :param plot: Turns template plotting on or off.

    :returns: obspy.Stream Newly cut template

    .. warning:: This will use whatever data is pointed to in the s-file, if \
        this is not the coninuous data, we recommend using other functions. \
        Differences in processing between short files and day-long files \
        (inherent to resampling) will produce lower cross-correlations.
    """
    # Perform some checks first
    import os
    if not os.path.isfile(sfile):
        raise IOError('sfile does not exist')

    from eqcorrscan.utils import pre_processing
    from eqcorrscan.utils import sfile_util
    from obspy import read as obsread
    # Read in the header of the sfile
    wavefiles = sfile_util.readwavename(sfile)
    pathparts = sfile.split('/')[0:-1]
    new_path_parts = []
    for part in pathparts:
        if part == 'REA':
            part = 'WAV'
        new_path_parts.append(part)
    # * argument to allow .join() to accept a list
    wavpath = os.path.join(*new_path_parts) + '/'
    # In case of absolute paths (not handled with .split() --> .join())
    if sfile[0] == '/':
        wavpath = '/' + wavpath
    # Read in waveform file
    for wavefile in wavefiles:
        print(''.join(["I am going to read waveform data from: ", wavpath,
                       wavefile]))
        if 'st' not in locals():
            st = obsread(wavpath + wavefile)
        else:
            st += obsread(wavpath + wavefile)
    for tr in st:
        if tr.stats.sampling_rate < samp_rate:
            print('Sampling rate of data is lower than sampling rate asked ' +
                  'for')
            print('Not good practice for correlations: I will not do this')
            raise ValueError("Trace: " + tr.stats.station +
                             " sampling rate: " + str(tr.stats.sampling_rate))
    # Read in pick info
    catalog = sfile_util.readpicks(sfile)
    # Read the list of Picks for this event
    picks = catalog[0].picks
    print("I have found the following picks")
    for pick in picks:
        print(' '.join([pick.waveform_id.station_code,
                        pick.waveform_id.channel_code, pick.phase_hint,
                        str(pick.time)]))

    # Process waveform data
    st.merge(fill_value='interpolate')
    st = pre_processing.shortproc(st, lowcut, highcut, filt_order,
                                  samp_rate, debug)
    st1 = _template_gen(picks=picks, st=st, length=length, swin=swin,
                        prepick=prepick, plot=plot)
    return st1
Example #35
0
def cluster_tribe(tribe,
                  raw_wav_dir,
                  lowcut,
                  highcut,
                  samp_rate,
                  filt_order,
                  pre_pick,
                  length,
                  shift_len,
                  corr_thresh,
                  cores,
                  dist_mat=False,
                  show=False):
    """
    Cross correlate all templates in a tribe and return separate tribes for
    each cluster
    :param tribe:
    :return:

    .. Note: Functionality here is pilaged from align design as we don't
        want the multiplexed portion of that function.
    """

    tribe.sort()
    raw_wav_files = glob('%s/*' % raw_wav_dir)
    raw_wav_files.sort()
    all_wavs = [wav.split('/')[-1].split('.')[0] for wav in raw_wav_files]
    names = [t.name for t in tribe if t.name in all_wavs]
    wavs = [
        wav for wav in raw_wav_files
        if wav.split('/')[-1].split('.')[0] in names
    ]
    new_tribe = Tribe()
    new_tribe.templates = [temp for temp in tribe if temp.name in names]
    print('Processing temps')
    temp_list = [(shortproc(read(tmp),
                            lowcut=lowcut,
                            highcut=highcut,
                            samp_rate=samp_rate,
                            filt_order=filt_order,
                            parallel=True,
                            num_cores=cores), template)
                 for tmp, template in zip(wavs, new_tribe)]
    print('Clipping traces')
    for temp in temp_list:
        print('Clipping template %s' % temp[1].name)
        for tr in temp[0]:
            pk = [
                pk for pk in temp[1].event.picks
                if pk.waveform_id.station_code == tr.stats.station
                and pk.waveform_id.channel_code == tr.stats.channel
            ][0]
            tr.trim(starttime=pk.time - shift_len - pre_pick,
                    endtime=pk.time - pre_pick + length + shift_len)
    trace_lengths = [
        tr.stats.endtime - tr.stats.starttime for st in temp_list
        for tr in st[0]
    ]
    clip_len = min(trace_lengths) - (2 * shift_len)
    stachans = list(
        set([(tr.stats.station, tr.stats.channel) for st in temp_list
             for tr in st[0]]))
    print('Aligning traces')
    for stachan in stachans:
        trace_list = []
        trace_ids = []
        for i, st in enumerate(temp_list):
            tr = st[0].select(station=stachan[0], channel=stachan[1])
            if len(tr) > 0:
                trace_list.append(tr[0])
                trace_ids.append(i)
            if len(tr) > 1:
                warnings.warn('Too many matches for %s %s' %
                              (stachan[0], stachan[1]))
        shift_len_samples = int(shift_len * trace_list[0].stats.sampling_rate)
        shifts, cccs = stacking.align_traces(trace_list=trace_list,
                                             shift_len=shift_len_samples,
                                             positive=True)
        for i, shift in enumerate(shifts):
            st = temp_list[trace_ids[i]][0]
            start_t = st.select(station=stachan[0],
                                channel=stachan[1])[0].stats.starttime
            start_t += shift_len
            start_t -= shift
            st.select(station=stachan[0],
                      channel=stachan[1])[0].trim(start_t, start_t + clip_len)
    print('Clustering')
    if isinstance(dist_mat, np.ndarray):
        groups = cluster_from_dist_mat(dist_mat=dist_mat,
                                       temp_list=temp_list,
                                       show=show,
                                       corr_thresh=corr_thresh)
    else:
        groups = clustering.cluster(temp_list,
                                    show=show,
                                    corr_thresh=corr_thresh,
                                    allow_shift=False,
                                    save_corrmat=True,
                                    cores=cores)
    group_tribes = []
    for group in groups:
        group_tribes.append(
            Tribe(templates=[
                Template(st=tmp[0],
                         name=tmp[1].name,
                         event=tmp[1].event,
                         highcut=highcut,
                         lowcut=lowcut,
                         samp_rate=samp_rate,
                         filt_order=filt_order,
                         prepick=pre_pick) for tmp in group
            ]))
    return group_tribes
Example #36
0
def from_sac(sac_files, lowcut, highcut, samp_rate, filt_order, length, swin,
             prepick=0.05, debug=0, plot=False):
    """Function to read picks and waveforms from SAC data, and generate a \
    template from these.

    :type sac_files: list or stream
    :param sac_files: List or stream of sac waveforms, or list of paths to \
        sac waveforms.
    :type lowcut: float
    :param lowcut: Low cut (Hz), if set to None will look in template \
            defaults file
    :type highcut: float
    :param highcut: High cut (Hz), if set to None will look in template \
            defaults file
    :type samp_rate: float
    :param samp_rate: New sampling rate in Hz, if set to None will look in \
            template defaults file
    :type filt_order: int
    :param filt_order: Filter level, if set to None will look in \
            template defaults file
    :type swin: str
    :param swin: Either 'all', 'P' or 'S', to select which phases to output.
    :type length: float
    :param length: Extract length in seconds, if None will look in template \
            defaults file.
    :type prepick: float
    :param prepick: Length to extract prior to the pick in seconds.
    :type debug: int
    :param debug: Debug level, higher number=more output.
    :type plot: bool
    :param plot: Turns template plotting on or off.

    :returns: obspy.Stream Newly cut template

    .. note:: This functionality is not supported for obspy versions below \
        1.0.0 as references times are not read in by SACIO, which are needed \
        for defining pick times.
    """
    from obspy import read, Stream
    from eqcorrscan.utils.sac_util import sactoevent
    from eqcorrscan.utils import pre_processing
    # Check whether sac_files is a stream or a list
    if isinstance(sac_files, list):
        if isinstance(sac_files[0], str) or isinstance(sac_files[0], unicode):
            sac_files = [read(sac_file)[0] for sac_file in sac_files]
        if isinstance(sac_files[0], Stream):
            # This is a list of streams...
            st = sac_files[0]
            for sac_file in sac_files[1:]:
                st += sac_file
        st = Stream(sac_files)
    elif isinstance(sac_files, Stream):
        st = sac_files
    # Make an event object...
    event = sactoevent(st)
    # Process the data
    st.merge(fill_value='interpolate')
    st = pre_processing.shortproc(st, lowcut, highcut, filt_order,
                                  samp_rate, debug)
    template = _template_gen(picks=event.picks, st=st, length=length,
                             swin=swin, prepick=prepick, plot=plot)
    return template
Example #37
0
def run_tutorial(plot=False):
    """Main function to run the tutorial dataset."""

    from eqcorrscan.utils import pre_processing
    from eqcorrscan.utils import plotting
    from eqcorrscan.core import match_filter
    import glob
    from multiprocessing import cpu_count

    # This import section copes with namespace changes between obspy versions
    import obspy
    if int(obspy.__version__.split('.')[0]) >= 1:
        from obspy.clients.fdsn import Client
    else:
        from obspy.fdsn import Client
    from obspy import UTCDateTime, Stream, read

    # First we want to load our templates
    template_names = glob.glob('tutorial_template_*.ms')

    if len(template_names) == 0:
        raise IOError('Template files not found, have you run the template ' +
                      'creation tutorial?')

    templates = [read(template_name) for template_name in template_names]

    # Work out what stations we have and get the data for them
    stations = []
    for template in templates:
        for tr in template:
            stations.append((tr.stats.station, tr.stats.channel))
    # Get a unique list of stations
    stations = list(set(stations))

    # We will loop through the data chunks at a time, these chunks can be any
    # size, in general we have used 1 day as our standard, but this can be
    # as short as five minutes (for MAD thresholds) or shorter for other
    # threshold metrics. However the chunk size should be the same as your
    # template process_len.

    # You should test different parameters!!!
    start_time = UTCDateTime(2016, 1, 4)
    end_time = UTCDateTime(2016, 1, 5)
    process_len = 3600
    chunks = []
    chunk_start = start_time
    while chunk_start < end_time:
        chunk_end = chunk_start + process_len
        if chunk_end > end_time:
            chunk_end = end_time
        chunks.append((chunk_start, chunk_end))
        chunk_start += process_len

    unique_detections = []
    detections = []

    # Set up a client to access the GeoNet database
    client = Client("GEONET")

    # Note that these chunks do not rely on each other, and could be paralleled
    # on multiple nodes of a distributed cluster, see the SLURM tutorial for
    # an example of this.
    for t1, t2 in chunks:
        # Generate the bulk information to query the GeoNet database
        bulk_info = []
        for station in stations:
            bulk_info.append(('NZ', station[0], '*',
                              station[1][0] + 'H' + station[1][-1], t1, t2))

        # Note this will take a little while.
        print('Downloading seismic data, this may take a while')
        st = client.get_waveforms_bulk(bulk_info)
        # Merge the stream, it will be downloaded in chunks
        st.merge(fill_value='interpolate')

        # Set how many cores we want to parallel across, we will set this to four
        # as this is the number of templates, if your machine has fewer than four
        # cores/CPUs the multiprocessing will wait until there is a free core.
        # Setting this to be higher than the number of templates will have no
        # increase in speed as only detections for each template are computed in
        # parallel.  It may also slow your processing by using more memory than
        # needed, to the extent that swap may be filled.
        if cpu_count() < 4:
            ncores = cpu_count()
        else:
            ncores = 4

        # Pre-process the data to set frequency band and sampling rate
        # Note that this is, and MUST BE the same as the parameters used for the
        # template creation.
        print('Processing the seismic data')
        st = pre_processing.shortproc(st,
                                      lowcut=2.0,
                                      highcut=9.0,
                                      filt_order=4,
                                      samp_rate=20.0,
                                      debug=2,
                                      num_cores=ncores,
                                      starttime=t1,
                                      endtime=t2)
        # Convert from list to stream
        st = Stream(st)

        # Now we can conduct the matched-filter detection
        detections += match_filter.match_filter(template_names=template_names,
                                                template_list=templates,
                                                st=st,
                                                threshold=8.0,
                                                threshold_type='MAD',
                                                trig_int=6.0,
                                                plotvar=plot,
                                                plotdir='.',
                                                cores=ncores,
                                                tempdir=False,
                                                debug=1,
                                                plot_format='jpg')

    # Now lets try and work out how many unique events we have just to compare
    # with the GeoNet catalog of 20 events on this day in this sequence
    for master in detections:
        keep = True
        for slave in detections:
            if not master == slave and\
               abs(master.detect_time - slave.detect_time) <= 1.0:
                # If the events are within 1s of each other then test which
                # was the 'best' match, strongest detection
                if not master.detect_val > slave.detect_val:
                    keep = False
                    break
        if keep:
            unique_detections.append(master)

    print('We made a total of ' + str(len(unique_detections)) + ' detections')

    for detection in unique_detections:
        print('Detection at :' + str(detection.detect_time) +
              ' for template ' + detection.template_name +
              ' with a cross-correlation sum of: ' + str(detection.detect_val))
        # We can plot these too
        if plot:
            stplot = st.copy()
            template = templates[template_names.index(detection.template_name)]
            lags = sorted([tr.stats.starttime for tr in template])
            maxlag = lags[-1] - lags[0]
            stplot.trim(starttime=detection.detect_time - 10,
                        endtime=detection.detect_time + maxlag + 10)
            plotting.detection_multiplot(stplot, template,
                                         [detection.detect_time.datetime])
    return unique_detections
Example #38
0
def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin,
               prepick=0.05, debug=0, plot=False):
    """
    Generate multiplexed template from a Nordic (Seisan) s-file.
    Function to read in picks from sfile then generate the template from \
    the picks within this and the wavefile found in the pick file.

    :type sfile: str
    :param sfile: sfilename must be the \
        path to a seisan nordic type s-file containing waveform and pick \
        information.
    :type lowcut: float
    :param lowcut: Low cut (Hz), if set to None will look in template \
            defaults file
    :type highcut: float
    :param highcut: High cut (Hz), if set to None will look in template \
            defaults file
    :type samp_rate: float
    :param samp_rate: New sampling rate in Hz, if set to None will look in \
            template defaults file
    :type filt_order: int
    :param filt_order: Filter level, if set to None will look in \
            template defaults file
    :type swin: str
    :param swin: Either 'all', 'P' or 'S', to select which phases to output.
    :type length: float
    :param length: Extract length in seconds, if None will look in template \
            defaults file.
    :type prepick: float
    :param prepick: Length to extract prior to the pick in seconds.
    :type debug: int
    :param debug: Debug level, higher number=more output.
    :type plot: bool
    :param plot: Turns template plotting on or off.

    :returns: obspy.core.stream.Stream Newly cut template

    .. warning:: This will use whatever data is pointed to in the s-file, if \
        this is not the coninuous data, we recommend using other functions. \
        Differences in processing between short files and day-long files \
        (inherent to resampling) will produce lower cross-correlations.

    .. rubric:: Example

    >>> from eqcorrscan.core.template_gen import from_sfile
    >>> sfile = 'eqcorrscan/tests/test_data/REA/TEST_/01-0411-15L.S201309'
    >>> template = from_sfile(sfile=sfile, lowcut=5.0, highcut=15.0,
    ...                       samp_rate=50.0, filt_order=4, swin='P',
    ...                       prepick=0.2, length=6)
    >>> print(len(template))
    15
    >>> print(template[0].stats.sampling_rate)
    50.0
    >>> template.plot(equal_scale=False, size=(800,600)) # doctest: +SKIP

    .. plot::

        from eqcorrscan.core.template_gen import from_sfile
        import os
        sfile = os.path.realpath('../../..') + \
            '/tests/test_data/REA/TEST_/01-0411-15L.S201309'
        template = from_sfile(sfile=sfile, lowcut=5.0, highcut=15.0,
                              samp_rate=50.0, filt_order=4, swin='P',
                              prepick=0.2, length=6)
        template.plot(equal_scale=False, size=(800, 600))
    """
    # Perform some checks first
    import os
    if not os.path.isfile(sfile):
        raise IOError('sfile does not exist')

    from eqcorrscan.utils import pre_processing
    from eqcorrscan.utils import sfile_util
    from obspy import read as obsread
    # Read in the header of the sfile
    wavefiles = sfile_util.readwavename(sfile)
    pathparts = sfile.split('/')[0:-1]
    new_path_parts = []
    for part in pathparts:
        if part == 'REA':
            part = 'WAV'
        new_path_parts.append(part)
    main_wav_parts = []
    for part in new_path_parts:
        main_wav_parts.append(part)
        if part == 'WAV':
            break
    mainwav = os.path.join(*main_wav_parts) + os.path.sep
    # * argument to allow .join() to accept a list
    wavpath = os.path.join(*new_path_parts) + os.path.sep
    # In case of absolute paths (not handled with .split() --> .join())
    if sfile[0] == os.path.sep:
        wavpath = os.path.sep + wavpath
        mainwav = os.path.sep + mainwav
    # Read in waveform file
    for wavefile in wavefiles:
        if debug > 0:
            print(''.join(["I am going to read waveform data from: ", wavpath,
                           wavefile]))
        if 'st' not in locals():
            if os.path.isfile(wavpath + wavefile):
                st = obsread(wavpath + wavefile)
            elif os.path.isfile(wavefile):
                st = obsread(wavefile)
            else:
                # Read from the main WAV directory
                st = obsread(mainwav + wavefile)
        else:
            if os.path.isfile(wavpath + wavefile):
                st += obsread(wavpath + wavefile)
            elif os.path.isfile(wavefile):
                st += obsread(wavefile)
            else:
                st += obsread(mainwav + wavefile)
    for tr in st:
        if tr.stats.sampling_rate < samp_rate:
            print('Sampling rate of data is lower than sampling rate asked ' +
                  'for')
            print('Not good practice for correlations: I will not do this')
            raise ValueError("Trace: " + tr.stats.station +
                             " sampling rate: " + str(tr.stats.sampling_rate))
    # Read in pick info
    event = sfile_util.readpicks(sfile)
    # Read the list of Picks for this event
    picks = event.picks
    if debug > 0:
        print("I have found the following picks")
        for pick in picks:
            print(' '.join([pick.waveform_id.station_code,
                            pick.waveform_id.channel_code, pick.phase_hint,
                            str(pick.time)]))
    # Process waveform data
    st.merge(fill_value='interpolate')
    st = pre_processing.shortproc(st, lowcut, highcut, filt_order,
                                  samp_rate, debug)
    st1 = _template_gen(picks=picks, st=st, length=length, swin=swin,
                        prepick=prepick, plot=plot, debug=debug)
    return st1