Ejemplo n.º 1
0
 def test_unclustered(self):
     """Test clustering on unclustered data..."""
     testing_path = os.path.join(self.testing_path, 'WAV', 'TEST_')
     stream_files = glob.glob(os.path.join(testing_path, '*DFDPC*'))[0:10]
     stream_list = [(read(stream_file), i)
                    for i, stream_file in enumerate(stream_files)]
     for st in stream_list:
         for tr in st[0]:
             if tr.stats.sampling_rate != 100.0:
                 ratio = tr.stats.sampling_rate / 100
                 if int(ratio) == ratio:
                     tr.decimate(int(ratio))
                 else:
                     tr.resample(100)
     shortest_tr = min(
         [tr.stats.npts for st in stream_list for tr in st[0]])
     for st in stream_list:
         for tr in st[0]:
             tr.data = tr.data[0:shortest_tr]
     groups = cluster(template_list=stream_list,
                      show=False,
                      corr_thresh=0.3)
     self.assertEqual(len(groups), 10)  # They shouldn't cluster at all
     # Test setting a number of cores
     groups_2 = cluster(template_list=stream_list,
                        show=False,
                        corr_thresh=0.3,
                        cores=2,
                        save_corrmat=True)
     self.assertTrue(os.path.isfile('dist_mat.npy'))
     os.remove('dist_mat.npy')
     self.assertEqual(len(groups_2), 10)  # They shouldn't cluster at all
     self.assertEqual(groups, groups_2)
Ejemplo n.º 2
0
 def test_clustered(self):
     """Test clustering on clustered data..."""
     groups = cluster(template_list=[
         (st, i) for i, st in enumerate(self.stream_list)
     ],
                      show=False,
                      corr_thresh=0.3)
     self.assertEqual(len(groups), 9)
Ejemplo n.º 3
0
 def test_unclustered(self):
     """Test clustering on unclustered data..."""
     testing_path = os.path.join(self.testing_path, 'WAV', 'TEST_')
     stream_files = glob.glob(os.path.join(testing_path, '*DFDPC*'))[0:10]
     stream_list = [(read(stream_file), i)
                    for i, stream_file in enumerate(stream_files)]
     groups = cluster(template_list=stream_list, show=False,
                      corr_thresh=0.3)
     self.assertEqual(len(groups), 10)  # They shouldn't cluster at all
     # Test setting a number of cores
     groups_2 = cluster(template_list=stream_list, show=False,
                        corr_thresh=0.3, cores=2, debug=2,
                        save_corrmat=True)
     self.assertTrue(os.path.isfile('dist_mat.npy'))
     os.remove('dist_mat.npy')
     self.assertEqual(len(groups_2), 10)  # They shouldn't cluster at all
     self.assertEqual(groups, groups_2)
Ejemplo n.º 4
0
 def test_unclustered(self):
     """Test clustering on unclustered data..."""
     from obspy import read
     import glob
     import os
     from eqcorrscan.utils.clustering import cluster
     testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                 'test_data', 'WAV', 'TEST_')
     stream_files = glob.glob(os.path.join(testing_path, '*'))[0:10]
     stream_list = [(read(stream_file), i)
                    for i, stream_file in enumerate(stream_files)]
     groups = cluster(template_list=stream_list, show=False,
                      corr_thresh=0.3)
     self.assertEqual(len(groups), 10)  # They shouldn't cluster at all
Ejemplo n.º 5
0
 def test_clustered(self):
     """Test clustering on clustered data..."""
     testing_path = os.path.join(self.testing_path, 'similar_events')
     stream_files = glob.glob(os.path.join(testing_path, '*'))
     stream_list = [(read(stream_file), i)
                    for i, stream_file in enumerate(stream_files)]
     for stream in stream_list:
         for tr in stream[0]:
             if tr.stats.station not in ['WHAT2', 'WV04', 'GCSZ']:
                 stream[0].remove(tr)
                 continue
             tr.detrend('simple')
             tr.filter('bandpass', freqmin=5.0, freqmax=15.0)
             tr.trim(tr.stats.starttime + 40, tr.stats.endtime - 45)
     groups = cluster(template_list=stream_list, show=False,
                      corr_thresh=0.3)
     self.assertEqual(len(groups), 9)  # They should cluster reasonably
Ejemplo n.º 6
0
 def test_clustered(self):
     """Test clustering on clustered data..."""
     from obspy import read
     import glob
     import os
     from eqcorrscan.utils.clustering import cluster
     testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                 'test_data', 'similar_events')
     stream_files = glob.glob(os.path.join(testing_path, '*'))
     stream_list = [(read(stream_file), i)
                    for i, stream_file in enumerate(stream_files)]
     for stream in stream_list:
         for tr in stream[0]:
             if tr.stats.station not in ['WHAT2', 'WV04', 'GCSZ']:
                 stream[0].remove(tr)
                 continue
             tr.detrend('simple')
             tr.filter('bandpass', freqmin=5.0, freqmax=15.0)
             tr.trim(tr.stats.starttime + 40, tr.stats.endtime - 45)
     groups = cluster(template_list=stream_list, show=False,
                      corr_thresh=0.3)
     self.assertEqual(len(groups), 9)  # They should cluster reasonably
Ejemplo n.º 7
0
def cluster_tribe(tribe,
                  raw_wav_dir,
                  lowcut,
                  highcut,
                  samp_rate,
                  filt_order,
                  pre_pick,
                  length,
                  shift_len,
                  corr_thresh,
                  cores,
                  dist_mat=False,
                  show=False):
    """
    Cross correlate all templates in a tribe and return separate tribes for
    each cluster
    :param tribe:
    :return:

    .. Note: Functionality here is pilaged from align design as we don't
        want the multiplexed portion of that function.
    """

    tribe.sort()
    raw_wav_files = glob('%s/*' % raw_wav_dir)
    raw_wav_files.sort()
    all_wavs = [wav.split('/')[-1].split('.')[0] for wav in raw_wav_files]
    names = [t.name for t in tribe if t.name in all_wavs]
    wavs = [
        wav for wav in raw_wav_files
        if wav.split('/')[-1].split('.')[0] in names
    ]
    new_tribe = Tribe()
    new_tribe.templates = [temp for temp in tribe if temp.name in names]
    print('Processing temps')
    temp_list = [(shortproc(read(tmp),
                            lowcut=lowcut,
                            highcut=highcut,
                            samp_rate=samp_rate,
                            filt_order=filt_order,
                            parallel=True,
                            num_cores=cores), template)
                 for tmp, template in zip(wavs, new_tribe)]
    print('Clipping traces')
    for temp in temp_list:
        print('Clipping template %s' % temp[1].name)
        for tr in temp[0]:
            pk = [
                pk for pk in temp[1].event.picks
                if pk.waveform_id.station_code == tr.stats.station
                and pk.waveform_id.channel_code == tr.stats.channel
            ][0]
            tr.trim(starttime=pk.time - shift_len - pre_pick,
                    endtime=pk.time - pre_pick + length + shift_len)
    trace_lengths = [
        tr.stats.endtime - tr.stats.starttime for st in temp_list
        for tr in st[0]
    ]
    clip_len = min(trace_lengths) - (2 * shift_len)
    stachans = list(
        set([(tr.stats.station, tr.stats.channel) for st in temp_list
             for tr in st[0]]))
    print('Aligning traces')
    for stachan in stachans:
        trace_list = []
        trace_ids = []
        for i, st in enumerate(temp_list):
            tr = st[0].select(station=stachan[0], channel=stachan[1])
            if len(tr) > 0:
                trace_list.append(tr[0])
                trace_ids.append(i)
            if len(tr) > 1:
                warnings.warn('Too many matches for %s %s' %
                              (stachan[0], stachan[1]))
        shift_len_samples = int(shift_len * trace_list[0].stats.sampling_rate)
        shifts, cccs = stacking.align_traces(trace_list=trace_list,
                                             shift_len=shift_len_samples,
                                             positive=True)
        for i, shift in enumerate(shifts):
            st = temp_list[trace_ids[i]][0]
            start_t = st.select(station=stachan[0],
                                channel=stachan[1])[0].stats.starttime
            start_t += shift_len
            start_t -= shift
            st.select(station=stachan[0],
                      channel=stachan[1])[0].trim(start_t, start_t + clip_len)
    print('Clustering')
    if isinstance(dist_mat, np.ndarray):
        groups = cluster_from_dist_mat(dist_mat=dist_mat,
                                       temp_list=temp_list,
                                       show=show,
                                       corr_thresh=corr_thresh)
    else:
        groups = clustering.cluster(temp_list,
                                    show=show,
                                    corr_thresh=corr_thresh,
                                    allow_shift=False,
                                    save_corrmat=True,
                                    cores=cores)
    group_tribes = []
    for group in groups:
        group_tribes.append(
            Tribe(templates=[
                Template(st=tmp[0],
                         name=tmp[1].name,
                         event=tmp[1].event,
                         highcut=highcut,
                         lowcut=lowcut,
                         samp_rate=samp_rate,
                         filt_order=filt_order,
                         prepick=pre_pick) for tmp in group
            ]))
    return group_tribes
Ejemplo n.º 8
0
              index=False,
              header=False)


for st in inv.networks[0].stations:
    stream_list_l = [(Stream(traces=[
        tr,
    ]), i) for i, tr in enumerate(
        stream.select(component='L', station=st.code).trim2(-5, 20, 'onset'))]
    stream_list = [(Stream(traces=[
        tr,
    ]), i) for i, tr in enumerate(
        stream.select(component='Q', station=st.code).trim2(-5, 20, 'onset'))]
    try:
        groups = cluster(template_list=stream_list,
                         show=False,
                         corr_thresh=0.3,
                         cores=4)
        if len(groups) < len(stream_list):
            group_max = groups[0]
            for g in groups:
                if len(g) > len(group_max):
                    group_max = g
            group_max_l = Stream(
                traces=[stream_list_l[g[1]][0] for g in group_max])
            group_streams = [st_tuple[0] for st_tuple in group_max]
            group_streams_l = [
                Stream(traces=[
                    st_tuple[0],
                ]) for st_tuple in group_max_l
            ]
            stack = PWS_stack(streams=group_streams)
Ejemplo n.º 9
0
     continue
 elif len(cat) < cpu_count():
     cores = len(cat)
 elif len(cat) >= cpu_count():
     cores = 'all'
 grp_num = space_cat.split('/')[-1].split('_')[-1].rstrip('.xml')
 template_list = [(template_dict[ev.resource_id], ev.resource_id)
                  for ev in cat]
 plt_name = '/media/chet/hdd/seismic/NZ/catalogs/corr_figs/1_sec_temps/' +\
            'spacegrp_%s_dend_0.20.png' % grp_num
 corr_mat = '/media/chet/hdd/seismic/NZ/catalogs/corr_figs/1_sec_temps/' +\
            'spacegrp_%s_mat.npy' % grp_num
 groups = clustering.cluster(template_list,
                             corr_thresh=0.30,
                             allow_shift=True,
                             shift_len=25,
                             save_corrmat=True,
                             cores=cores,
                             debug=2)
 for i, grp in enumerate(groups):
     corrgrp_cat = Catalog()
     f_name_root = '/media/chet/hdd/seismic/NZ/catalogs/'
     f_name = 'spacegrp_%s_corrgrp_%03d' % (grp_num, i)
     for e in cat:
         for temp_st in grp:
             if e.resource_id == temp_st[1]:
                 corrgrp_cat.append(e)
     corrgrp_cat.write(f_name_root + 'qml/corr_groups/1_sec_temps/' +
                       f_name + '.xml',
                       format="QUAKEML")
     corrgrp_cat.write(f_name_root + 'shp/corr_groups/1_sec_temps/' +
Ejemplo n.º 10
0
def cluster_cat(catalog,
                corr_thresh,
                corr_params=None,
                raw_wav_dir=None,
                dist_mat=False,
                out_cat=None,
                show=False,
                method='average'):
    """
    Cross correlate all templates in a tribe and return separate tribes for
    each cluster
    :param tribe: Tribe to cluster
    :param corr_thresh: Correlation threshold for clustering
    :param corr_params: Dictionary of filter parameters. Must include keys:
        lowcut, highcut, samp_rate, filt_order, pre_pick, length, shift_len,
        cores
    :param raw_wav_dir: Directory of waveforms to take from
    :param dist_mat: If there's a precomputed distance matrix, use this
        instead of doing all the correlations
    :param out_cat: Output catalog corresponding to the events
    :param show: Show the dendrogram? Careful as this can exceed max recursion
    :param wavs: Should we even bother with processing waveforms? Otherwise
        will just populate the tribe with an empty Stream
    :return:

    .. Note: Functionality here is pilaged from align design as we don't
        want the multiplexed portion of that function.
    """

    if corr_params and raw_wav_dir:
        shift_len = corr_params['shift_len']
        lowcut = corr_params['lowcut']
        highcut = corr_params['highcut']
        samp_rate = corr_params['samp_rate']
        filt_order = corr_params['filt_order']
        pre_pick = corr_params['pre_pick']
        length = corr_params['length']
        cores = corr_params['cores']
        raw_wav_files = glob('%s/*' % raw_wav_dir)
        raw_wav_files.sort()
        all_wavs = [wav.split('/')[-1].split('_')[-3] for wav in raw_wav_files]
        print(all_wavs[0])
        names = [
            ev.resource_id.id.split('/')[-1] for ev in catalog
            if ev.resource_id.id.split('/')[-1] in all_wavs
        ]
        print(names[0])
        wavs = [
            wav for wav in raw_wav_files
            if wav.split('/')[-1].split('_')[-3] in names
        ]
        print(wavs[0])
        new_cat = Catalog(events=[
            ev for ev in catalog if ev.resource_id.id.split('/')[-1] in names
        ])
        print('Processing temps')
        temp_list = [(shortproc(read('{}/*'.format(tmp)),
                                lowcut=lowcut,
                                highcut=highcut,
                                samp_rate=samp_rate,
                                filt_order=filt_order,
                                parallel=True,
                                num_cores=cores),
                      ev.resource_id.id.split('/')[-1])
                     for tmp, ev in zip(wavs, new_cat)]
        print('Clipping traces')
        rm_temps = []
        for i, temp in enumerate(temp_list):
            print('Clipping template %s' % new_cat[i].resource_id.id)
            rm_ts = []  # Make a list of traces with no pick to remove
            rm_ev = []
            for tr in temp[0]:
                pk = [
                    pk for pk in new_cat[i].picks
                    if pk.waveform_id.station_code == tr.stats.station
                    and pk.waveform_id.channel_code == tr.stats.channel
                ]
                if len(pk) == 0:
                    rm_ts.append(tr)
                else:
                    tr.trim(starttime=pk[0].time - shift_len - pre_pick,
                            endtime=pk[0].time - pre_pick + length + shift_len)
            # Remove pickless traces
            for rm in rm_ts:
                temp[0].traces.remove(rm)
            # If trace lengths are internally inconsistent, remove template
            if len(list(set([len(tr) for tr in temp[0]]))) > 1:
                rm_temps.append(temp)
            # If template is now length 0, remove it and associated event
            if len(temp[0]) == 0:
                rm_temps.append(temp)
                rm_ev.append(new_cat[i])
        for t in rm_temps:
            temp_list.remove(t)
        # Remove the corresponding events as well so catalog and distmat
        # are the same shape
        for rme in rm_ev:
            new_cat.events.remove(rme)
    print(new_cat)
    new_cat.write(out_cat, format="QUAKEML")
    print('Clustering')
    if isinstance(dist_mat, np.ndarray):
        print('Assuming the tribe provided is the same shape as dist_mat')
        # Dummy streams
        temp_list = [(Stream(), ev) for ev in catalog]
        groups = cluster_from_dist_mat(dist_mat=dist_mat,
                                       temp_list=temp_list,
                                       show=show,
                                       corr_thresh=corr_thresh,
                                       method=method)
    else:
        groups = clustering.cluster(temp_list,
                                    show=show,
                                    corr_thresh=corr_thresh,
                                    shift_len=shift_len * 2,
                                    save_corrmat=True,
                                    cores=cores)
    group_tribes = []
    group_cats = []
    if corr_params:
        for group in groups:
            group_tribes.append(
                Tribe(templates=[
                    Template(st=tmp[0],
                             name=tmp[1].resource_id.id.split('/')[-1],
                             event=tmp[1],
                             highcut=highcut,
                             lowcut=lowcut,
                             samp_rate=samp_rate,
                             filt_order=filt_order,
                             prepick=pre_pick) for tmp in group
                ]))
            group_cats.append(Catalog(events=[tmp[1] for tmp in group]))
    else:
        for group in groups:
            group_tribes.append(
                Tribe(templates=[
                    Template(st=tmp[0],
                             name=tmp[1].resource_id.id.split('/')[-1],
                             event=tmp[1].event,
                             highcut=None,
                             lowcut=None,
                             samp_rate=None,
                             filt_order=None,
                             prepick=None) for tmp in group
                ]))
            group_cats.append(Catalog(events=[tmp[1] for tmp in group]))
    return group_tribes, group_cats
Ejemplo n.º 11
0
import numpy as np

temp_dir = '/media/rotnga_data/templates/2015_dayproc/*'
temp_files = glob(temp_dir)
temp_files.sort()

template_list = []
files_wo_data = []
for filename in temp_files:
    try:
        template_list.append(read(filename))
    except TypeError:
        print('No actual data in this file')
        files_wo_data.append(filename)
#Run hierarchical clustering function
groups = clustering.cluster(template_list, show=False, corr_thresh=0.28,
                            save_corrmat=True, debug=2)

"""
Now compute the SVD (or empirical approximation) for each family
of MORE THAN ONE event
Use SVD() or empirical_SVD()
"""
#First, empirical_SVD
first_subspace = []
second_subspace = []
for group in groups:
    if len(group) > 1:
        [first, second] = clustering.empirical_SVD(group)
        #Account for np.diff() returning array with len one less than original
        for tr in second:
            tr.data = np.concatenate(([0.0], tr.data))