def test_filter_error(self): """Check that we don't allow filtering above the nyquist.""" with self.assertRaises(IOError): shortproc(self.short_stream.copy(), lowcut=0.1, highcut=0.6, filt_order=4, samp_rate=1, parallel=False, num_cores=False, starttime=None, endtime=None)
def test_parallel_core_unset(self): """Test the parallel implementation without num_cores set.""" processed = shortproc( self.short_stream.copy(), lowcut=0.1, highcut=0.4, filt_order=4, samp_rate=1, debug=0, parallel=True, num_cores=False, starttime=None, endtime=None) self.assertEqual(len(processed), self.nchans) for tr in processed: self.assertEqual(self.instart, tr.stats.starttime) self.assertEqual(self.inend, tr.stats.endtime)
def test_shortproc(self): """Test the short-proc processing method.""" processed = shortproc( self.short_stream.copy(), lowcut=0.1, highcut=0.4, filt_order=4, samp_rate=1, debug=0, parallel=False, num_cores=False, starttime=None, endtime=None) self.assertEqual(len(processed), self.nchans) for tr in processed: self.assertEqual(self.instart, tr.stats.starttime) self.assertEqual(self.inend, tr.stats.endtime)
def test_shortproc_set_end(self): """Check that shortproc trims properly.""" processed = shortproc( self.short_stream.copy(), lowcut=0.1, highcut=0.4, filt_order=4, samp_rate=1, debug=0, parallel=False, num_cores=False, starttime=None, endtime=self.short_stream[0].stats.endtime - 2) self.assertEqual(len(processed), self.nchans) for tr in processed: self.assertEqual(self.instart, tr.stats.starttime) self.assertEqual(self.inend - 2, tr.stats.endtime)
def test_trace_as_argument(self): """ Check that we can cope with a trace, and that a trace is returned. """ processed = shortproc( self.short_stream.copy()[0], lowcut=0.1, highcut=0.4, filt_order=4, samp_rate=1, debug=0, parallel=False, num_cores=False, starttime=None, endtime=None) self.assertTrue(isinstance(processed, Trace)) self.assertEqual(self.instart, processed.stats.starttime) self.assertEqual(self.inend, processed.stats.endtime)
def read_gappy_real_data(): """ These data SUCK - gap followed by spike, and long period trend. Super fugly""" from obspy.clients.fdsn import Client from obspy import UTCDateTime from eqcorrscan.utils.pre_processing import shortproc client = Client("GEONET") st = client.get_waveforms( network="NZ", station="DUWZ", location="20", channel="BNZ", starttime=UTCDateTime(2016, 12, 31, 23, 58, 56), endtime=UTCDateTime(2017, 1, 1, 0, 58, 56)) st = shortproc( st=st.merge(), lowcut=2, highcut=20, filt_order=4, samp_rate=50) return st
def setUpClass(cls): print('\t\t\t Downloading data') client = Client('NCEDC') t1 = UTCDateTime(2004, 9, 28, 17) t2 = t1 + 3600 process_len = 3600 # t1 = UTCDateTime(2004, 9, 28) # t2 = t1 + 80000 # process_len = 80000 catalog = client.get_events(starttime=t1, endtime=t2, minmagnitude=4, minlatitude=35.7, maxlatitude=36.1, minlongitude=-120.6, maxlongitude=-120.2, includearrivals=True) catalog = catalog_utils.filter_picks(catalog, channels=['EHZ'], top_n_picks=5) cls.templates = template_gen.from_client(catalog=catalog, client_id='NCEDC', lowcut=2.0, highcut=9.0, samp_rate=50.0, filt_order=4, length=3.0, prepick=0.15, swin='all', process_len=process_len) for template in cls.templates: template.sort() # Download and process the day-long data template_stachans = [] for template in cls.templates: for tr in template: template_stachans.append((tr.stats.network, tr.stats.station, tr.stats.channel)) template_stachans = list(set(template_stachans)) bulk_info = [(stachan[0], stachan[1], '*', stachan[2][0] + 'H' + stachan[2][1], t1, t1 + process_len) for stachan in template_stachans] # Just downloading an hour of data st = client.get_waveforms_bulk(bulk_info) st.merge(fill_value='interpolate') cls.st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=50.0, debug=0, num_cores=1) cls.template_names = [str(template[0].stats.starttime) for template in cls.templates]
def get_real_multichannel_data(): from obspy.clients.fdsn import Client from obspy import UTCDateTime from eqcorrscan.utils.pre_processing import shortproc t1 = UTCDateTime("2016-01-04T12:00:00.000000Z") t2 = t1 + 600 bulk = [('NZ', 'POWZ', '*', 'EHZ', t1, t2), ('NZ', 'HOWZ', '*', 'EHZ', t1, t2)] client = Client("GEONET") st = client.get_waveforms_bulk(bulk) st = shortproc(st.merge(), lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, starttime=t1, endtime=t2) return st
def test_short_match_filter(self): """Test using short streams of data.""" from obspy.clients.fdsn import Client from obspy import UTCDateTime from eqcorrscan.core import template_gen, match_filter from eqcorrscan.utils import pre_processing, catalog_utils client = Client('NCEDC') t1 = UTCDateTime(2004, 9, 28) t2 = t1 + 86400 catalog = client.get_events(starttime=t1, endtime=t2, minmagnitude=4, minlatitude=35.7, maxlatitude=36.1, minlongitude=-120.6, maxlongitude=-120.2, includearrivals=True) catalog = catalog_utils.filter_picks(catalog, channels=['EHZ'], top_n_picks=5) templates = template_gen.from_client(catalog=catalog, client_id='NCEDC', lowcut=2.0, highcut=9.0, samp_rate=50.0, filt_order=4, length=3.0, prepick=0.15, swin='all', process_len=3600) # Download and process the day-long data bulk_info = [(tr.stats.network, tr.stats.station, '*', tr.stats.channel[0] + 'H' + tr.stats.channel[1], t2 - 3600, t2) for tr in templates[0]] # Just downloading an hour of data st = client.get_waveforms_bulk(bulk_info) st.merge(fill_value='interpolate') st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=50.0, debug=0, num_cores=4) template_names = [str(template[0].stats.starttime) for template in templates] detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=4)
def setUpClass(cls): client = Client('GEONET') t1 = UTCDateTime(2016, 9, 4) t2 = t1 + 86400 catalog = get_geonet_events(startdate=t1, enddate=t2, minmag=4, minlat=-49, maxlat=-35, minlon=175.0, maxlon=185.0) catalog = catalog_utils.filter_picks(catalog, channels=['EHZ'], top_n_picks=5) for event in catalog: extra_pick = Pick() extra_pick.phase_hint = 'S' extra_pick.time = event.picks[0].time + 10 extra_pick.waveform_id = event.picks[0].waveform_id event.picks.append(extra_pick) cls.templates = template_gen.from_client(catalog=catalog, client_id='GEONET', lowcut=2.0, highcut=9.0, samp_rate=50.0, filt_order=4, length=3.0, prepick=0.15, swin='all', process_len=3600) # Download and process the day-long data bulk_info = [(tr.stats.network, tr.stats.station, '*', tr.stats.channel[0] + 'H' + tr.stats.channel[1], t1 + (4 * 3600), t1 + (5 * 3600)) for tr in cls.templates[0]] # Just downloading an hour of data print('Downloading data') st = client.get_waveforms_bulk(bulk_info) st.merge(fill_value='interpolate') st.trim(t1 + (4 * 3600), t1 + (5 * 3600)).sort() # This is slow? print('Processing continuous data') cls.st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=50.0, debug=0, num_cores=1) cls.st.trim(t1 + (4 * 3600), t1 + (5 * 3600)).sort() cls.template_names = [str(template[0].stats.starttime) for template in cls.templates]
def make_dist_mat(directory, highcut, lowcut, samp_rate, filt_order, raw_prepick, corr_prepick, length, shift, outfile, cores): """ Taking a directory of templates, processing wavs and computing correlation clustering prior to creating subspace :param directory: Directory of template mseeds :param highcut: filter highcut :param lowcut: filter lowcut :param samp_rate: output sample rate :param filt_order: filter corners :param raw_prepick: Prepick time of template files :param corr_prepick: Output prepick before correlations :param length: Length of temp to be correlating :param shift: Shift length in secs allowed during correlations :param outfile: Filename for output distance matrix :param method: Method for heirarchical clustering :return: """ temp_files = glob('%s/*' % directory) temp_list = [(shortproc(read(tmp),lowcut=lowcut, highcut=highcut, samp_rate=samp_rate, filt_order=filt_order, parallel=True, num_cores=cores), tmp.split('/')[-1].split('.')[0]) for tmp in temp_files] front_clip = raw_prepick - corr_prepick back_clip = front_clip + length for temp in temp_list: for tr in temp[0]: tr.trim(starttime=tr.stats.starttime + front_clip, endtime=tr.stats.starttime + back_clip) temp_sts = [x[0] for x in temp_list] print('Starting distance matrix computations') dist_mat = distance_matrix(temp_sts, allow_shift=True, shift_len=shift, cores=cores) print('Saving matrix to %s' % outfile) np.save(outfile, dist_mat) return
def test_match_filter(self, samp_rate=20.0, debug=0): """ Function to test the capabilities of match_filter and just check that \ it is working! Uses synthetic templates and seeded, randomised data. :type debug: int :param debug: Debug level, higher the number the more output. """ from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter from eqcorrscan.utils.synth_seis import generate_synth_data from obspy import UTCDateTime import string # Generate a random dataset templates, data, seeds = generate_synth_data(nsta=5, ntemplates=2, nseeds=50, samp_rate=samp_rate, t_length=6.0, max_amp=5.0, debug=debug) # Notes to the user: If you use more templates you should ensure they # are more different, e.g. set the data to have larger moveouts, # otherwise similar templates will detect events seeded by another # template. # Test the pre_processing functions data = pre_processing.dayproc(st=data, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate, debug=0, starttime=UTCDateTime(0)) if debug > 0: data.plot() # Filter the data and the templates for template in templates: pre_processing.shortproc(st=template, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate) if debug > 0: template.plot() template_names = list(string.ascii_lowercase)[0:len(templates)] detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=data, threshold=10.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1, debug=0) # Compare the detections to the seeds print('This test made ' + str(len(detections)) + ' detections') ktrue = 0 kfalse = 0 for detection in detections: print(detection.template_name) i = template_names.index(detection.template_name) t_seeds = seeds[i] dtime_samples = int((detection.detect_time - UTCDateTime(0)) * samp_rate) if dtime_samples in t_seeds['time']: j = list(t_seeds['time']).index(dtime_samples) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: min_diff = min(abs(t_seeds['time'] - dtime_samples)) if min_diff < 10: # If there is a match within ten samples then it is # good enough j = list(abs(t_seeds['time'] - dtime_samples)).index(min_diff) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: print('Detection at sample: ' + str(dtime_samples) + ' does not match anything in seed times:') kfalse += 1 print('Minimum difference in samples is: ' + str(min_diff)) # Plot the detections if debug > 3: for i, template in enumerate(templates): times = [d.detect_time.datetime for d in detections if d.template_name == template_names[i]] print(times) plotting.detection_multiplot(data, template, times) # Set an 'acceptable' ratio of positive to false detections print(str(ktrue) + ' true detections and ' + str(kfalse) + ' false detections') self.assertTrue(kfalse / ktrue < 0.25)
temp_name = file1.split("/")[-1:][0][:-3] template_names.append(temp_name) temp_tup.append((temp_name, temp_data)) else: temp_data = read(file1) templates += [temp_data] template_names.append(file1.split("/")[-1:][0][:-3]) temp_name = file1.split("/")[-1:][0][:-3] template_names.append(temp_name) temp_tup.append((temp_name, temp_data)) # Extract the station info from the templates for template in templates: #Filter and downsample sample data template = pre_processing.shortproc(template, 1.0, 20.0, 3, 100.0, matchdef.debug) if not 'stachans' in locals(): stachans = [(tr.stats.station, tr.stats.channel) for tr in template] else: stachans += [(tr.stats.station, tr.stats.channel) for tr in template] # Make this a unique list stachans = list(set(stachans)) # Read in the continuous data for these station, channel combinations raw_files = [] raw_dir = '/Volumes/GeoPhysics_07/users-data/matsonga/MRP_PROJ/data/mastersData/sac' #Recursively search a directory for specific files amtching desired day and stachan for root, dirnames, filenames in os.walk(raw_dir): for stachan in stachans: for filename in fnmatch.filter(filenames, 'NZ.'+stachan[0]+'*' +
def extract_from_stack(stack, template, length, pre_pick, pre_pad, Z_include=False, pre_processed=True, samp_rate=None, lowcut=None, highcut=None, filt_order=3): """ Extract a multiplexed template from a stack of detections. Function to extract a new template from a stack of previous detections. Requires the stack, the template used to make the detections for the \ stack, and we need to know if the stack has been pre-processed. :type stack: obspy.core.stream.Stream :param stack: Waveform stack from detections. Can be of any length and \ can have delays already included, or not. :type template: obspy.core.stream.Stream :param template: Template used to make the detections in the stack. Will \ use the delays of this for the new template. :type length: float :param length: Length of new template in seconds :type pre_pick: float :param pre_pick: Extract additional data before the detection, seconds :type pre_pad: float :param pre_pad: Pad used in seconds when extracting the data, e.g. the \ time before the detection extracted. If using \ clustering.extract_detections this half the length of the extracted \ waveform. :type Z_include: bool :param Z_include: If True will include any Z-channels even if there is \ no template for this channel, as long as there is a template for this \ station at a different channel. If this is False and Z channels are \ included in the template Z channels will be included in the \ new_template anyway. :type pre_processed: bool :param pre_processed: Have the data been pre-processed, if True (default) \ then we will only cut the data here. :type samp_rate: float :param samp_rate: If pre_processed=False then this is required, desired \ sampling rate in Hz, defaults to False. :type lowcut: float :param lowcut: If pre_processed=False then this is required, lowcut in \ Hz, defaults to False. :type highcut: float :param highcut: If pre_processed=False then this is required, highcut in \ Hz, defaults to False :type filt_order: int :param filt_order: If pre_processed=False then this is required, filter order, defaults to False :returns: Newly cut template. :rtype: :class:`obspy.core.stream.Stream` """ new_template = stack.copy() # Copy the data before we trim it to keep the stack safe # Get the earliest time in the template as this is when the detection is # taken. mintime = min([tr.stats.starttime for tr in template]) # Generate a list of tuples of (station, channel, delay) with delay in # seconds delays = [(tr.stats.station, tr.stats.channel[-1], tr.stats.starttime - mintime) for tr in template] # Process the data if necessary if not pre_processed: new_template = pre_processing.shortproc(st=new_template, lowcut=lowcut, highcut=highcut, filt_order=filt_order, samp_rate=samp_rate) # Loop through the stack and trim! out = Stream() for tr in new_template: # Find the matching delay delay = [ d[2] for d in delays if d[0] == tr.stats.station and d[1] == tr.stats.channel[-1] ] if Z_include and len(delay) == 0: delay = [d[2] for d in delays if d[0] == tr.stats.station] if len(delay) == 0: Logger.error("No matching template channel found for stack channel" " {0}.{1}".format(tr.stats.station, tr.stats.channel)) else: for d in delay: out += tr.copy().trim( starttime=tr.stats.starttime + d + pre_pad - pre_pick, endtime=tr.stats.starttime + d + pre_pad + length - pre_pick) return out
def template_gen(method, lowcut, highcut, samp_rate, filt_order, length, prepick, swin="all", process_len=86400, all_horiz=False, delayed=True, plot=False, plotdir=None, return_event=False, min_snr=None, parallel=False, num_cores=False, save_progress=False, skip_short_chans=False, **kwargs): """ Generate processed and cut waveforms for use as templates. :type method: str :param method: Template generation method, must be one of ('from_client', 'from_seishub', 'from_sac', 'from_meta_file'). - Each method requires associated arguments, see note below. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will not apply a lowcut. :type highcut: float :param highcut: High cut (Hz), if set to None will not apply a highcut. :type samp_rate: float :param samp_rate: New sampling rate in Hz. :type filt_order: int :param filt_order: Filter level (number of corners). :type length: float :param length: Length of template waveform in seconds. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: P, S, P_all, S_all or all, defaults to all: see note in :func:`eqcorrscan.core.template_gen.template_gen` :type process_len: int :param process_len: Length of data in seconds to download and process. :type all_horiz: bool :param all_horiz: To use both horizontal channels even if there is only a pick on one of them. Defaults to False. :type delayed: bool :param delayed: If True, each channel will begin relative to it's own \ pick-time, if set to False, each channel will begin at the same time. :type plot: bool :param plot: Plot templates or not. :type plotdir: str  :param plotdir: The path to save plots to. If `plotdir=None` (default) then the figure will be shown on screen. :type return_event: bool :param return_event: Whether to return the event and process length or not. :type min_snr: float :param min_snr: Minimum signal-to-noise ratio for a channel to be included in the template, where signal-to-noise ratio is calculated as the ratio of the maximum amplitude in the template window to the rms amplitude in the whole window given. :type parallel: bool :param parallel: Whether to process data in parallel or not. :type num_cores: int :param num_cores: Number of cores to try and use, if False and parallel=True, will use either all your cores, or as many traces as in the data (whichever is smaller). :type save_progress: bool :param save_progress: Whether to save the resulting templates at every data step or not. Useful for long-running processes. :type skip_short_chans: bool :param skip_short_chans: Whether to ignore channels that have insufficient length data or not. Useful when the quality of data is not known, e.g. when downloading old, possibly triggered data from a datacentre :returns: List of :class:`obspy.core.stream.Stream` Templates :rtype: list .. note:: *Method specific arguments:* - `from_client` requires: :param str client_id: string passable by obspy to generate Client, or a Client instance :param `obspy.core.event.Catalog` catalog: Catalog of events to generate template for :param float data_pad: Pad length for data-downloads in seconds - `from_seishub` requires: :param str url: url to seishub database :param `obspy.core.event.Catalog` catalog: Catalog of events to generate template for :param float data_pad: Pad length for data-downloads in seconds - `from_sac` requires: :param list sac_files: osbpy.core.stream.Stream of sac waveforms, or list of paths to sac waveforms. .. note:: See `eqcorrscan.utils.sac_util.sactoevent` for details on how pick information is collected. - `from_meta_file` requires: :param str meta_file: Path to obspy-readable event file, or an obspy Catalog :param `obspy.core.stream.Stream` st: Stream containing waveform data for template. Note that this should be the same length of stream as you will use for the continuous detection, e.g. if you detect in day-long files, give this a day-long file! :param bool process: Whether to process the data or not, defaults to True. .. note:: process_len should be set to the same length as used when computing detections using match_filter.match_filter, e.g. if you read in day-long data for match_filter, process_len should be 86400. .. rubric:: Example >>> from obspy.clients.fdsn import Client >>> from eqcorrscan.core.template_gen import template_gen >>> client = Client('NCEDC') >>> catalog = client.get_events(eventid='72572665', includearrivals=True) >>> # We are only taking two picks for this example to speed up the >>> # example, note that you don't have to! >>> catalog[0].picks = catalog[0].picks[0:2] >>> templates = template_gen( ... method='from_client', catalog=catalog, client_id='NCEDC', ... lowcut=2.0, highcut=9.0, samp_rate=20.0, filt_order=4, length=3.0, ... prepick=0.15, swin='all', process_len=300, all_horiz=True) >>> templates[0].plot(equal_scale=False, size=(800,600)) # doctest: +SKIP .. figure:: ../../plots/template_gen.from_client.png .. rubric:: Example >>> from obspy import read >>> from eqcorrscan.core.template_gen import template_gen >>> # Get the path to the test data >>> import eqcorrscan >>> import os >>> TEST_PATH = os.path.dirname(eqcorrscan.__file__) + '/tests/test_data' >>> st = read(TEST_PATH + '/WAV/TEST_/' + ... '2013-09-01-0410-35.DFDPC_024_00') >>> quakeml = TEST_PATH + '/20130901T041115.xml' >>> templates = template_gen( ... method='from_meta_file', meta_file=quakeml, st=st, lowcut=2.0, ... highcut=9.0, samp_rate=20.0, filt_order=3, length=2, prepick=0.1, ... swin='S', all_horiz=True) >>> print(len(templates[0])) 10 >>> templates = template_gen( ... method='from_meta_file', meta_file=quakeml, st=st, lowcut=2.0, ... highcut=9.0, samp_rate=20.0, filt_order=3, length=2, prepick=0.1, ... swin='S_all', all_horiz=True) >>> print(len(templates[0])) 15 .. rubric:: Example >>> from eqcorrscan.core.template_gen import template_gen >>> import glob >>> # Get all the SAC-files associated with one event. >>> sac_files = glob.glob(TEST_PATH + '/SAC/2014p611252/*') >>> templates = template_gen( ... method='from_sac', sac_files=sac_files, lowcut=2.0, highcut=10.0, ... samp_rate=25.0, filt_order=4, length=2.0, swin='all', prepick=0.1, ... all_horiz=True) >>> print(templates[0][0].stats.sampling_rate) 25.0 >>> print(len(templates[0])) 15 """ client_map = {'from_client': 'fdsn', 'from_seishub': 'seishub'} assert method in ('from_client', 'from_seishub', 'from_meta_file', 'from_sac') if not isinstance(swin, list): swin = [swin] process = True if method in ['from_client', 'from_seishub']: catalog = kwargs.get('catalog', Catalog()) data_pad = kwargs.get('data_pad', 90) # Group catalog into days and only download the data once per day sub_catalogs = _group_events(catalog=catalog, process_len=process_len, template_length=length, data_pad=data_pad) if method == 'from_client': if isinstance(kwargs.get('client_id'), str): client = FDSNClient(kwargs.get('client_id', None)) else: client = kwargs.get('client_id', None) available_stations = [] else: client = SeisHubClient(kwargs.get('url', None), timeout=10) available_stations = client.waveform.get_station_ids() elif method == 'from_meta_file': if isinstance(kwargs.get('meta_file'), Catalog): catalog = kwargs.get('meta_file') elif kwargs.get('meta_file'): catalog = read_events(kwargs.get('meta_file')) elif kwargs.get('catalog'): catalog = kwargs.get('catalog') sub_catalogs = [catalog] st = kwargs.get('st', Stream()) process = kwargs.get('process', True) elif method == 'from_sac': sac_files = kwargs.get('sac_files') if isinstance(sac_files, list): if isinstance(sac_files[0], (Stream, Trace)): # This is a list of streams... st = Stream(sac_files[0]) for sac_file in sac_files[1:]: st += sac_file else: sac_files = [read(sac_file)[0] for sac_file in sac_files] st = Stream(sac_files) else: st = sac_files # Make an event object... catalog = Catalog([sactoevent(st)]) sub_catalogs = [catalog] temp_list = [] process_lengths = [] if "P_all" in swin or "S_all" in swin or all_horiz: all_channels = True else: all_channels = False for sub_catalog in sub_catalogs: if method in ['from_seishub', 'from_client']: Logger.info("Downloading data") st = _download_from_client(client=client, client_type=client_map[method], catalog=sub_catalog, data_pad=data_pad, process_len=process_len, available_stations=available_stations, all_channels=all_channels) Logger.info('Pre-processing data') st.merge() if len(st) == 0: Logger.info("No data") continue if process: data_len = max( [len(tr.data) / tr.stats.sampling_rate for tr in st]) if 80000 < data_len < 90000: daylong = True starttime = min([tr.stats.starttime for tr in st]) min_delta = min([tr.stats.delta for tr in st]) # Cope with the common starttime less than 1 sample before the # start of day. if (starttime + min_delta).date > starttime.date: starttime = (starttime + min_delta) # Check if this is stupid: if abs(starttime - UTCDateTime(starttime.date)) > 600: print(abs(starttime - UTCDateTime(starttime.date))) daylong = False starttime = starttime.date else: daylong = False # Check if the required amount of data have been downloaded - skip # channels if arg set. if skip_short_chans: _st = Stream() for tr in st: if np.ma.is_masked(tr.data): _len = np.ma.count(tr.data) * tr.stats.delta else: _len = tr.stats.npts * tr.stats.delta if _len < process_len * .8: Logger.info( "Data for {0} are too short, skipping".format( tr.id)) else: _st += tr st = _st if len(st) == 0: Logger.info("No data") continue if daylong: st = pre_processing.dayproc(st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order, samp_rate=samp_rate, parallel=parallel, starttime=UTCDateTime(starttime), num_cores=num_cores) else: st = pre_processing.shortproc(st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order, parallel=parallel, samp_rate=samp_rate, num_cores=num_cores) data_start = min([tr.stats.starttime for tr in st]) data_end = max([tr.stats.endtime for tr in st]) for event in sub_catalog: stations, channels, st_stachans = ([], [], []) if len(event.picks) == 0: Logger.warning('No picks for event {0}'.format( event.resource_id)) continue use_event = True # Check that the event is within the data for pick in event.picks: if not data_start < pick.time < data_end: Logger.warning( "Pick outside of data span: Pick time {0} Start " "time {1} End time: {2}".format( str(pick.time), str(data_start), str(data_end))) use_event = False if not use_event: Logger.error('Event is not within data time-span') continue # Read in pick info Logger.debug("I have found the following picks") for pick in event.picks: if not pick.waveform_id: Logger.warning( 'Pick not associated with waveforms, will not use:' ' {0}'.format(pick)) continue Logger.debug(pick) stations.append(pick.waveform_id.station_code) channels.append(pick.waveform_id.channel_code) # Check to see if all picks have a corresponding waveform for tr in st: st_stachans.append('.'.join( [tr.stats.station, tr.stats.channel])) # Cut and extract the templates template = _template_gen(event.picks, st, length, swin, prepick=prepick, plot=plot, all_horiz=all_horiz, delayed=delayed, min_snr=min_snr, plotdir=plotdir) process_lengths.append(len(st[0].data) / samp_rate) temp_list.append(template) if save_progress: if not os.path.isdir("eqcorrscan_temporary_templates"): os.makedirs("eqcorrscan_temporary_templates") for template in temp_list: template.write( "eqcorrscan_temporary_templates{0}{1}.ms".format( os.path.sep, template[0].stats.starttime), format="MSEED") del st if return_event: return temp_list, catalog, process_lengths return temp_list
def extract_from_stack(stack, template, length, pre_pick, pre_pad, Z_include=False, pre_processed=True, samp_rate=False, lowcut=False, highcut=False, filt_order=False): r"""Function to extract a new template from a stack of previous detections. Requires the stack, the template used to make the detections for the \ stack, and we need to know if the stack has been pre-processed. :type stack: :class:obspy.Stream :param stack: Waveform stack from detections. Can be of any length and \ can have delays already included, or not. :type template: :class:obspy.Stream :param template: Template used to make the detections in the stack. Will \ use the delays of this for the new template. :type length: float :param length: Length of new template in seconds :type pre_pick: float :param pre_pick: Extract additional data before the detection, seconds :type pre_pad: float :param pre_pad: Pad used in seconds when extracting the data, e.g. the \ time before the detection extracted. If using \ clustering.extract_detections this half the length of the extracted \ waveform. :type Z_include: bool :param Z_include: If True will include any Z-channels even if there is \ no template for this channel, as long as there is a template for this \ station at a different channel. If this is False and Z channels are \ included in the template Z channels will be included in the \ new_template anyway. :type pre_processed: bool :param pre_processed: Have the data been pre-processed, if True (default) \ then we will only cut the data here. :type samp_rate: float :param samp_rate: If pre_processed=False then this is required, desired \ sampling rate in Hz, defaults to False. :type lowcut: float :param lowcut: If pre_processed=False then this is required, lowcut in \ Hz, defaults to False. :type highcut: float :param highcut: If pre_processed=False then this is required, highcut in \ Hz, defaults to False :type filt_order: int :param filt_order: If pre_processed=False then this is required, filter \ order, defaults to False :returns: obspy.Stream Newly cut template """ from eqcorrscan.utils import pre_processing import warnings new_template = stack.copy() # Copy the data before we trim it to keep the stack safe # Get the earliest time in the template as this is when the detection is # taken. mintime = min([tr.stats.starttime for tr in template]) # Generate a list of tuples of (station, channel, delay) with delay in # seconds delays = [(tr.stats.station, tr.stats.channel[-1], tr.stats.starttime - mintime) for tr in template] # Loop through the stack and trim! for tr in new_template: # Process the data if necessary if not pre_processed: new_template = pre_processing.shortproc(new_template, lowcut, highcut, filt_order, samp_rate, 0) # Find the matching delay delay = [d[2] for d in delays if d[0] == tr.stats.station and d[1] == tr.stats.channel[-1]] if Z_include and len(delay) == 0: delay = [d[2] for d in delays if d[0] == tr.stats.station] if len(delay) == 0: msg = ' '.join(['No matching template channel found for stack', 'channel', tr.stats.station, tr.stats.channel]) warnings.warn(msg) new_template.remove(tr) elif len(delay) > 1: msg = ' '.join(['Multiple delays found for stack channel', tr.stats.station, tr.stats.channel]) warnings.warn(msg) else: tr.trim(starttime=tr.stats.starttime + delay[0] + pre_pad - pre_pick, endtime=tr.stats.starttime + delay[0] + pre_pad + length - pre_pick) return new_template
if not 'st' in locals(): print('No data for this day from pyasdf?!') continue else: print('Merging stream...') st.merge(fill_value='interpolate') day_st = st.copy() for event in day_cat: ev_name = str(event.resource_id).split('/')[2] origin_time = event.origins[0].time print('Trimming data around event time...') day_st.trim(origin_time - 120, origin_time + 120) print('Preprocessing data for day: ' + str(starttime.date)) temp_st = pre_processing.shortproc(day_st, lowcut=1.0, highcut=20.0, filt_order=3, samp_rate=100, debug=0) del day_st print('Feeding stream to _template_gen...') template = _template_gen(event.picks, temp_st, length=4, swin='all', prepick=0.5) print('Writing event ' + ev_name + ' to file...') template.write('/media/rotnga_data/templates/2015/' + ev_name + '.mseed', format="MSEED") del temp_st, template del day_cat
ynst = read("2014080316.YN.mseed").sort(['starttime']).trim() st = ynst.select(station='ZAT') # st += ynst.select(station='QIJ') # st += ynst.select(station='PGE') # st += ynst.select(station='DOC') # st += ynst.select(station='XUW') st.simulate(paz_remove=paz, paz_simulate=paz_1hz) st.detrend() # st.filter('bandpass', freqmin=20, freqmax=30,corners=4) st = pre_processing.shortproc(st, lowcut=2, highcut=9, filt_order=18, samp_rate=100, starttime=st[0].stats.starttime, endtime=st[0].stats.endtime) st = Stream(st) templates = multi_template_gen(catalog, st, 5.19, plot=True) #t = UTCDateTime("2014-08-03T08:30:19.095000") #st = st.slice(t-10, t+36000) #st.plot() for t in templates: print(t) t.write('template.ms', format="MSEED")
def party_relative_mags(party, self_files, shift_len, align_len, svd_len, reject, wav_dir, min_amps, m, c, calibrate=False, method='PCA', plot_svd=False): """ Calculate the relative moments for detections in a Family using mag_calc.svd_moments() :param party: Party of detections :param self_files: List of self-detection wav files (in order of families) :param shift_len: Maximum shift length used in waveform alignment :param align_len: Length of waveform used for correlation in alignment :param svd_len: Length of waveform used in relative amplitude calc :param reject: Min cc threshold for accepted measurement :param wav_dir: Root directory of waveforms :param min_amps: Minimum number of relative measurements per pair :param m: m in Mw = (m * ML) + c regression between Ml and Mw :param c: c in Mw = (m * ML) + c regression between Ml and Mw :param calibrate: Flag for calibration to a priori Ml's :param method: 'PCA' or 'LSQR' :param plot_svd: Bool to plot results of svd relative amplitude calcs :return: """ pty = party.copy() # sort self files and parties by template name pty.families.sort(key=lambda x: x.template.name) self_files.sort() ev_files = glob('{}/*'.format(wav_dir)) ev_files.sort() ev_files = {os.path.basename(f).rstrip('.ms'): f for f in ev_files} for i, fam in enumerate(pty.families): temp_wav = read(self_files[i]) print('Starting work on family %s' % fam.template.name) if len(fam) == 0: print('No detections. Moving on.') continue temp = fam.template prepick = temp.prepick det_ids = [d.id for d in fam] # Read in waveforms for detections in family streams = [read(ev_files[id]) for id in det_ids] # Add template wav as the first element streams.insert(0, temp_wav) print('Template Stream: %s' % str(streams[0])) if len(streams[0]) == 0: print('Template %s waveforms did not get written. Investigate.' % temp.name) continue # Process streams then copy to both ccc_streams and svd_streams print('Shortproc-ing streams') breakit = False for st in streams: # rms = [tr for tr in st if tr.stats.sampling_rate < temp.samp_rate] # for rm in rms: # st.traces.remove(rm) try: shortproc(st=st, lowcut=temp.lowcut, highcut=temp.highcut, filt_order=temp.filt_order, samp_rate=temp.samp_rate) except ValueError as e: breakit = True if breakit: print('Something wrong in shortproc. Skip family') continue # Remove all traces with no picks before copying for str_ind, st in enumerate(streams): if str_ind == 0: event = temp.event else: event = fam.detections[str_ind-1].event rms = [] for tr in st: try: [pk for pk in event.picks if pk.waveform_id.get_seed_string() == tr.id][0] except IndexError: rms.append(tr) for rm in rms: st.traces.remove(rm) print('Copying streams') wrk_streams = copy.deepcopy(streams) svd_streams = copy.deepcopy(streams) # For svd ccc_streams = copy.deepcopy(streams) event_list = [temp.event] + [d.event for d in fam.detections] try: # work out cccoh for each event with template cccohs = cc_coh_dets(streams=ccc_streams, events=event_list, length=svd_len, corr_prepick=prepick, shift=shift_len) except (AssertionError, ValueError) as e: # Issue with trimming above? print(e) continue for eind, st in enumerate(wrk_streams): if eind == 0: event = temp.event else: event = fam.detections[eind-1].event for tr in st: pk = [pk for pk in event.picks if pk.waveform_id.get_seed_string() == tr.id][0] tr.trim(starttime=pk.time - prepick - shift_len, endtime=pk.time + shift_len + align_len) st_seeds = list(set([tr.id for st in wrk_streams for tr in st])) st_seeds.sort() # Align streams with just P arrivals, then use longer st for svd print('Now aligning svd_streams') shift_inds = int(shift_len * fam.template.samp_rate) for st_seed in st_seeds: trs = [] for i, st in enumerate(wrk_streams): if len(st.select(id=st_seed)) > 0: trs.append((i, st.select(id=st_seed)[0])) inds, traces = zip(*trs) shifts, ccs = stacking.align_traces(trace_list=list(traces), shift_len=shift_inds, positive=True, master=traces[0].copy()) # We now have shifts based on P correlation, shift and trim # larger wavs for svd for j, shift in enumerate(shifts): st = svd_streams[inds[j]] if inds[j] == 0: event = temp.event else: event = fam.detections[inds[j]-1].event if ccs[j] < reject: svd_streams[inds[j]].remove(st.select(id=st_seed)[0]) print('Removing trace due to low cc value: %s' % ccs[j]) continue pk = [pk for pk in event.picks if pk.waveform_id.get_seed_string() == st_seed][0] strt_tr = pk.time - prepick - shift st.select(id=st_seed)[0].trim(strt_tr, strt_tr + svd_len) if method == 'LSQR': print('Using least-squares method') event_list = [] for st_id in st_seeds: st_list = [] for stind, st in enumerate(svd_streams): if len(st.select(id=st_id)) > 0: st_list.append(stind) event_list.append(st_list) # event_list = np.asarray(event_list).tolist() u, sigma, v, sta_chans = svd(stream_list=svd_streams, full=True) try: M, events_out = svd_moments(u, sigma, v, sta_chans, event_list) except IOError as e: print('Family %s raised error %s' % (fam.template.name, e)) return elif method == 'PCA': print('Using principal component method') M, events_out = svd_relative_amps(fam, svd_streams, min_amps, plot=plot_svd) print(M, events_out) if len(M) == 0: print('No amplitudes calculated, skipping') continue else: print('{} not valid argument for mag calc method'.format(method)) return # If we have a Mag for template, calibrate moments if calibrate and len(fam.template.event.magnitudes) > 0: print('Converting relative amps to magnitudes') # Convert the template magnitude to seismic moment temp_mag = fam.template.event.magnitudes[-1].mag temp_Mw = ML_to_Mw(temp_mag, m, c) temp_mo = Mw_to_M0(temp_Mw) # Extrapolate from the template moment - relative moment relationship to # Get the moment for relative moment = 1.0 norm_mo = temp_mo / M[0] # Template is the last event in the list # Now these are weights which we can multiple the moments by moments = np.multiply(M, norm_mo) # Now convert to Mw Mw = [Mw_to_M0(mo, inverse=True) for mo in moments] # Convert to local Ml = [ML_to_Mw(mm, m, c, inverse=True) for mm in Mw] #Normalize moments to template mag # Add calibrated mags to detection events for jabba, eind in enumerate(events_out): # Skip template waveform if eind == 0: continue fam.detections[eind].event.magnitudes = [ Magnitude(mag=Mw[jabba], magnitude_type='Mw')] fam.detections[eind].event.comments.append( Comment(text=str(cccohs[eind]))) fam.detections[eind].event.magnitudes.append( Magnitude(mag=Ml[jabba], magnitude_type='ML')) fam.detections[eind].event.preferred_magnitude_id = ( fam.detections[eind].event.magnitudes[-1].resource_id.id) return pty, cccohs
def run_tutorial(plot=False, process_len=3600, num_cores=cpu_count(), **kwargs): """Main function to run the tutorial dataset.""" # First we want to load our templates template_names = glob.glob('tutorial_template_*.ms') if len(template_names) == 0: raise IOError('Template files not found, have you run the template ' + 'creation tutorial?') templates = [read(template_name) for template_name in template_names] # Work out what stations we have and get the data for them stations = [] for template in templates: for tr in template: stations.append((tr.stats.station, tr.stats.channel)) # Get a unique list of stations stations = list(set(stations)) # We will loop through the data chunks at a time, these chunks can be any # size, in general we have used 1 day as our standard, but this can be # as short as five minutes (for MAD thresholds) or shorter for other # threshold metrics. However the chunk size should be the same as your # template process_len. # You should test different parameters!!! start_time = UTCDateTime(2016, 1, 4) end_time = UTCDateTime(2016, 1, 5) chunks = [] chunk_start = start_time while chunk_start < end_time: chunk_end = chunk_start + process_len if chunk_end > end_time: chunk_end = end_time chunks.append((chunk_start, chunk_end)) chunk_start += process_len unique_detections = [] # Set up a client to access the GeoNet database client = Client("GEONET") # Note that these chunks do not rely on each other, and could be paralleled # on multiple nodes of a distributed cluster, see the SLURM tutorial for # an example of this. for t1, t2 in chunks: # Generate the bulk information to query the GeoNet database bulk_info = [] for station in stations: bulk_info.append(('NZ', station[0], '*', station[1][0] + 'H' + station[1][-1], t1, t2)) # Note this will take a little while. print('Downloading seismic data, this may take a while') st = client.get_waveforms_bulk(bulk_info) # Merge the stream, it will be downloaded in chunks st.merge() # Pre-process the data to set frequency band and sampling rate # Note that this is, and MUST BE the same as the parameters used for # the template creation. print('Processing the seismic data') st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, num_cores=num_cores, starttime=t1, endtime=t2) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=plot, plotdir='.', cores=num_cores, plot_format='png', **kwargs) # Now lets try and work out how many unique events we have just to # compare with the GeoNet catalog of 20 events on this day in this # sequence for master in detections: keep = True for slave in detections: if not master == slave and abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False print('Removed detection at %s with cccsum %s' % (master.detect_time, master.detect_val)) print('Keeping detection at %s with cccsum %s' % (slave.detect_time, slave.detect_val)) break if keep: unique_detections.append(master) print('Detection at :' + str(master.detect_time) + ' for template ' + master.template_name + ' with a cross-correlation sum of: ' + str(master.detect_val)) # We can plot these too if plot: stplot = st.copy() template = templates[template_names.index( master.template_name)] lags = sorted([tr.stats.starttime for tr in template]) maxlag = lags[-1] - lags[0] stplot.trim(starttime=master.detect_time - 10, endtime=master.detect_time + maxlag + 10) plotting.detection_multiplot(stplot, template, [master.detect_time.datetime]) print('We made a total of ' + str(len(unique_detections)) + ' detections') return unique_detections
def stack_party(party, sac_dir, method='linear', filt_params=None, align=True, shift_len=0.1, prepick=2., postpick=5., reject=0.7, normalize=False, plot=False, outdir=None): """ Return a stream for the linear stack of the templates in a multiplet. The approach here is to first stack all of the detections in a family over the rejection ccc threshold and THEN stack the Family stacks into the final stack for the multiplet. This avoids attempting to correlate detections from different Families with each other, which is nonsensical. :param party: Party for the multiplet we're interested in :param sac_dir: Directory of SAC files made for Stefan :param method: Stacking method: 'linear' or 'PWS' :param filt_params: (optional) Dictionary of filter parameters to use before aligning waveforms. Keys must be 'highcut', 'lowcut', 'filt_order', and 'samp_rate' :param align: Whether or not to align the waveforms :param shift_len: Allowed shift in aligning in seconds :param reject: Correlation coefficient cutoff in aligning :param normalize: Whether to normalize before stacking :param plot: Alignment plot flag :return: """ sac_dirs = glob('{}/2*'.format(sac_dir)) fam_stacks = {} for fam in party: fam_id = fam.template.event.resource_id print('For Family {}'.format(fam_id)) eids = [str(ev.resource_id).split('/')[-1] for ev in fam.catalog] raws = [] for s_dir in sac_dirs: if s_dir.split('/')[-1] in eids: raws.append( read('{}/*'.format(s_dir)).merge(fill_value='interpolate')) # Stupid check for empty det directories. Not yet resolved lens = [len(raw) for raw in raws] if len(lens) == 0: continue if max(lens) == 0: continue print('Removing all traces without 3001 samples') for st in raws: for tr in st.copy(): if len(tr.data) != 3001: st.remove(tr) if filt_params: for raw in raws: shortproc(raw, lowcut=filt_params['lowcut'], highcut=filt_params['highcut'], filt_order=filt_params['filt_order'], samp_rate=filt_params['samp_rate']) print('Now trimming around pick times') z_streams = [] for raw in raws: z_stream = Stream() for tr in raw.copy(): if 'a' in tr.stats.sac: strt = tr.stats.starttime z_stream += tr.trim( starttime=strt + tr.stats.sac['a'] - prepick, endtime=strt + tr.stats.sac['a'] + postpick) if len(z_stream) > 0: z_streams.append(z_stream) # At the moment, the picks are based on P-arrival correlation already! if align: z_streams = align_design(z_streams, shift_len=shift_len, reject=reject, multiplex=False, no_missed=False, plot=plot) if method == 'linear': fam_stacks[fam_id] = linstack(z_streams, normalize=normalize) elif method == 'PWS': fam_stacks[fam_id] = PWS_stack(z_streams, normalize=normalize) if plot: # Plot up the stacks of the Families first for id, fam_stack in fam_stacks.items(): fam_stack.plot(equal_scale=False) if outdir: if not os.path.isdir(outdir): os.mkdir(outdir) for id, fam_stack in fam_stacks.items(): filename = '{}/Family_{}_stack.mseed'.format( outdir, str(id).split('/')[-1]) fam_stack.write(filename, format='MSEED') return fam_stacks
def cluster_cat(catalog, corr_thresh, corr_params=None, raw_wav_dir=None, dist_mat=False, out_cat=None, show=False, method='average'): """ Cross correlate all templates in a tribe and return separate tribes for each cluster :param tribe: Tribe to cluster :param corr_thresh: Correlation threshold for clustering :param corr_params: Dictionary of filter parameters. Must include keys: lowcut, highcut, samp_rate, filt_order, pre_pick, length, shift_len, cores :param raw_wav_dir: Directory of waveforms to take from :param dist_mat: If there's a precomputed distance matrix, use this instead of doing all the correlations :param out_cat: Output catalog corresponding to the events :param show: Show the dendrogram? Careful as this can exceed max recursion :param wavs: Should we even bother with processing waveforms? Otherwise will just populate the tribe with an empty Stream :return: .. Note: Functionality here is pilaged from align design as we don't want the multiplexed portion of that function. """ if corr_params and raw_wav_dir: shift_len = corr_params['shift_len'] lowcut = corr_params['lowcut'] highcut = corr_params['highcut'] samp_rate = corr_params['samp_rate'] filt_order = corr_params['filt_order'] pre_pick = corr_params['pre_pick'] length = corr_params['length'] cores = corr_params['cores'] raw_wav_files = glob('%s/*' % raw_wav_dir) raw_wav_files.sort() all_wavs = [wav.split('/')[-1].split('_')[-3] for wav in raw_wav_files] print(all_wavs[0]) names = [ ev.resource_id.id.split('/')[-1] for ev in catalog if ev.resource_id.id.split('/')[-1] in all_wavs ] print(names[0]) wavs = [ wav for wav in raw_wav_files if wav.split('/')[-1].split('_')[-3] in names ] print(wavs[0]) new_cat = Catalog(events=[ ev for ev in catalog if ev.resource_id.id.split('/')[-1] in names ]) print('Processing temps') temp_list = [(shortproc(read('{}/*'.format(tmp)), lowcut=lowcut, highcut=highcut, samp_rate=samp_rate, filt_order=filt_order, parallel=True, num_cores=cores), ev.resource_id.id.split('/')[-1]) for tmp, ev in zip(wavs, new_cat)] print('Clipping traces') rm_temps = [] for i, temp in enumerate(temp_list): print('Clipping template %s' % new_cat[i].resource_id.id) rm_ts = [] # Make a list of traces with no pick to remove rm_ev = [] for tr in temp[0]: pk = [ pk for pk in new_cat[i].picks if pk.waveform_id.station_code == tr.stats.station and pk.waveform_id.channel_code == tr.stats.channel ] if len(pk) == 0: rm_ts.append(tr) else: tr.trim(starttime=pk[0].time - shift_len - pre_pick, endtime=pk[0].time - pre_pick + length + shift_len) # Remove pickless traces for rm in rm_ts: temp[0].traces.remove(rm) # If trace lengths are internally inconsistent, remove template if len(list(set([len(tr) for tr in temp[0]]))) > 1: rm_temps.append(temp) # If template is now length 0, remove it and associated event if len(temp[0]) == 0: rm_temps.append(temp) rm_ev.append(new_cat[i]) for t in rm_temps: temp_list.remove(t) # Remove the corresponding events as well so catalog and distmat # are the same shape for rme in rm_ev: new_cat.events.remove(rme) print(new_cat) new_cat.write(out_cat, format="QUAKEML") print('Clustering') if isinstance(dist_mat, np.ndarray): print('Assuming the tribe provided is the same shape as dist_mat') # Dummy streams temp_list = [(Stream(), ev) for ev in catalog] groups = cluster_from_dist_mat(dist_mat=dist_mat, temp_list=temp_list, show=show, corr_thresh=corr_thresh, method=method) else: groups = clustering.cluster(temp_list, show=show, corr_thresh=corr_thresh, shift_len=shift_len * 2, save_corrmat=True, cores=cores) group_tribes = [] group_cats = [] if corr_params: for group in groups: group_tribes.append( Tribe(templates=[ Template(st=tmp[0], name=tmp[1].resource_id.id.split('/')[-1], event=tmp[1], highcut=highcut, lowcut=lowcut, samp_rate=samp_rate, filt_order=filt_order, prepick=pre_pick) for tmp in group ])) group_cats.append(Catalog(events=[tmp[1] for tmp in group])) else: for group in groups: group_tribes.append( Tribe(templates=[ Template(st=tmp[0], name=tmp[1].resource_id.id.split('/')[-1], event=tmp[1].event, highcut=None, lowcut=None, samp_rate=None, filt_order=None, prepick=None) for tmp in group ])) group_cats.append(Catalog(events=[tmp[1] for tmp in group])) return group_tribes, group_cats
def run_tutorial(min_magnitude=2, shift_len=0.2, num_cores=4): import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy.clients.fdsn import Client else: from obspy.fdsn import Client from obspy.core.event import Catalog from obspy import UTCDateTime from eqcorrscan.core import template_gen, match_filter, lag_calc from eqcorrscan.utils import pre_processing, catalog_utils client = Client('NCEDC') t1 = UTCDateTime(2004, 9, 28) t2 = t1 + 86400 print('Downloading catalog') catalog = client.get_events(starttime=t1, endtime=t2, minmagnitude=min_magnitude, minlatitude=35.7, maxlatitude=36.1, minlongitude=-120.6, maxlongitude=-120.2, includearrivals=True) # We don't need all the picks, lets take the information from the # five most used stations - note that this is done to reduce computational # costs. catalog = catalog_utils.filter_picks(catalog, channels=['EHZ'], top_n_picks=5) print('Generating templates') templates = template_gen.from_client(catalog=catalog, client_id='NCEDC', lowcut=2.0, highcut=9.0, samp_rate=50.0, filt_order=4, length=3.0, prepick=0.15, swin='all', process_len=3600) start_time = UTCDateTime(2004, 9, 28, 17) end_time = UTCDateTime(2004, 9, 28, 20) process_len = 1800 chunks = [] chunk_start = start_time while chunk_start < end_time: chunk_end = chunk_start + process_len if chunk_end > end_time: chunk_end = end_time chunks.append((chunk_start, chunk_end)) chunk_start += process_len all_detections = [] picked_catalog = Catalog() template_names = [ str(template[0].stats.starttime) for template in templates ] for t1, t2 in chunks: print('Downloading and processing for start-time: %s' % t1) # Download and process the data bulk_info = [(tr.stats.network, tr.stats.station, '*', tr.stats.channel[0] + 'H' + tr.stats.channel[1], t1, t2) for tr in templates[0]] # Just downloading a chunk of data st = client.get_waveforms_bulk(bulk_info) st.merge(fill_value='interpolate') st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=50.0, debug=0, num_cores=num_cores) detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=num_cores) # Extract unique detections from set. unique_detections = [] for master in detections: keep = True for slave in detections: if not master == slave and\ abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False break if keep: unique_detections.append(master) all_detections += unique_detections picked_catalog += lag_calc.lag_calc(detections=unique_detections, detect_data=st, template_names=template_names, templates=templates, shift_len=shift_len, min_cc=0.5, interpolate=True, plot=False) # Return all of this so that we can use this function for testing. return all_detections, picked_catalog, templates, template_names
def test_match_filter(self, samp_rate=20.0, debug=0): """ Function to test the capabilities of match_filter and just check that \ it is working! Uses synthetic templates and seeded, randomised data. :type debug: int :param debug: Debug level, higher the number the more output. """ from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter from eqcorrscan.utils.synth_seis import generate_synth_data from obspy import UTCDateTime import string # Generate a random dataset templates, data, seeds = generate_synth_data(nsta=5, ntemplates=2, nseeds=50, samp_rate=samp_rate, t_length=6.0, max_amp=5.0, max_lag=12.0, debug=debug) # Notes to the user: If you use more templates you should ensure they # are more different, e.g. set the data to have larger moveouts, # otherwise similar templates will detect events seeded by another # template. # Test the pre_processing functions data = pre_processing.dayproc(st=data, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate, debug=0, starttime=UTCDateTime(0)) if debug > 0: data.plot() # Filter the data and the templates for template in templates: pre_processing.shortproc(st=template, lowcut=2.0, highcut=8.0, filt_order=3, samp_rate=samp_rate) if debug > 0: template.plot() template_names = list(string.ascii_lowercase)[0:len(templates)] detections = match_filter.match_filter(template_names=template_names, template_list=templates, st=data, threshold=10.0, threshold_type='MAD', trig_int=6.0, plotvar=False, plotdir='.', cores=1, debug=0) # Compare the detections to the seeds print('This test made ' + str(len(detections)) + ' detections') ktrue = 0 kfalse = 0 for detection in detections: print(detection) i = template_names.index(detection.template_name) t_seeds = seeds[i] dtime_samples = int( (detection.detect_time - UTCDateTime(0)) * samp_rate) if dtime_samples in t_seeds['time']: j = list(t_seeds['time']).index(dtime_samples) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: min_diff = min(abs(t_seeds['time'] - dtime_samples)) if min_diff < 10: # If there is a match within ten samples then it is # good enough j = list(abs(t_seeds['time'] - dtime_samples)).index(min_diff) print('Detection at SNR of: ' + str(t_seeds['SNR'][j])) ktrue += 1 else: print('Detection at sample: ' + str(dtime_samples) + ' does not match anything in seed times:') kfalse += 1 print('Minimum difference in samples is: ' + str(min_diff)) # Plot the detections if debug > 3: for i, template in enumerate(templates): times = [ d.detect_time.datetime for d in detections if d.template_name == template_names[i] ] print(times) plotting.detection_multiplot(data, template, times) # Set an 'acceptable' ratio of positive to false detections print( str(ktrue) + ' true detections and ' + str(kfalse) + ' false detections') self.assertTrue(kfalse / ktrue < 0.25)
def from_quakeml(quakeml, st, lowcut, highcut, samp_rate, filt_order, length, prepick, swin, debug=0, plot=False): """ Generate a multiplexed template from a local quakeML file. Function to generate a template from a local quakeml file \ and an obspy.Stream object. :type quakeml: str :param quakeml: QuakeML file containing pick information, can contain \ multiple events. :type st: obspy.core.stream.Stream :param st: Stream containing waveform data for template (hopefully). \ Note that this should be the same length of stream as you will use \ for the continuous detection, e.g. if you detect in day-long files, \ give this a day-long file! :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param lowcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type debug: int :param debug: Level of debugging output, higher=more :type plot: bool :param plot: Display template plots or not :returns: list of obspy.Stream Newly cut templates .. warning:: We suggest giving this function a full day of data, to \ ensure templates are generated with **exactly** the same processing \ as the continuous data. Not doing this will result in slightly \ reduced cross-correlation values. .. rubric:: Example >>> from obspy import read >>> from eqcorrscan.core.template_gen import from_quakeml >>> st = read('eqcorrscan/tests/test_data/WAV/TEST_/' + ... '2013-09-01-0410-35.DFDPC_024_00') >>> quakeml = 'eqcorrscan/tests/test_data/20130901T041115.xml' >>> templates = from_quakeml(quakeml=quakeml, st=st, lowcut=2.0, ... highcut=9.0, samp_rate=20.0, filt_order=3, ... length=2, prepick=0.1, swin='S') >>> print(len(templates[0])) 15 """ # Perform some checks first import os import warnings if not os.path.isfile(quakeml): raise IOError('QuakeML file does not exist') import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy import read_events else: from obspy import readEvents as read_events from obspy import UTCDateTime from eqcorrscan.utils import pre_processing stations = [] channels = [] st_stachans = [] # Process waveform data st.merge(fill_value='interpolate') # Work out if the data are daylong or not... data_len = max([len(tr.data)/tr.stats.sampling_rate for tr in st]) if 80000 < data_len < 90000: daylong = True else: daylong = False if daylong: st = pre_processing.dayproc(st, lowcut, highcut, filt_order, samp_rate, debug=debug, starttime=UTCDateTime(st[0].stats. starttime.date)) else: st = pre_processing.shortproc(st, lowcut, highcut, filt_order, samp_rate, debug=debug) data_start = min([tr.stats.starttime for tr in st]) data_end = max([tr.stats.endtime for tr in st]) # Read QuakeML file into Catalog class catalog = read_events(quakeml) templates = [] for event in catalog: use_event = True # Check that the event is within the data for pick in event.picks: if not data_start < pick.time < data_end: if debug > 0: print('Pick outside of data span:') print('Pick time: ' + str(pick.time)) print('Start time: ' + str(data_start)) print('End time: ' + str(data_end)) use_event = False if not use_event: warnings.warn('Event is not within data time-span') continue # Read in pick info if debug > 0: print("I have found the following picks") for pick in event.picks: if debug > 0: print(' '.join([pick.waveform_id.station_code, pick.waveform_id.channel_code, pick.phase_hint, str(pick.time)])) stations.append(pick.waveform_id.station_code) channels.append(pick.waveform_id.channel_code) # Check to see if all picks have a corresponding waveform for tr in st: st_stachans.append('.'.join([tr.stats.station, tr.stats.channel])) for i in range(len(stations)): if not '.'.join([stations[i], channels[i]]) in st_stachans: warnings.warn('No data provided for ' + stations[i] + '.' + channels[i]) st1 = st.copy() # Cut and extract the templates template = _template_gen(event.picks, st1, length, swin, prepick=prepick, plot=plot, debug=debug) templates.append(template) return templates
def from_sac(sac_files, lowcut, highcut, samp_rate, filt_order, length, swin, prepick=0.05, debug=0, plot=False): """ Generate a multiplexed template from a list of SAC files. Function to read picks and waveforms from SAC data, and generate a \ template from these. Usually sac_files is a list of all single-channel \ SAC files for a given event, a single, multi-channel template will be \ created from these traces. **All files listed in sac_files should be associated with a single event.** :type sac_files: list :param sac_files: osbpy.core.stream.Stream of sac waveforms, or list of paths to sac waveforms. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param highcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Length to extract prior to the pick in seconds. :type debug: int :param debug: Debug level, higher number=more output. :type plot: bool :param plot: Turns template plotting on or off. :returns: obspy.core.stream.Stream Newly cut template .. note:: This functionality is not supported for obspy versions below \ 1.0.0 as references times are not read in by SACIO, which are needed \ for defining pick times. .. rubric:: Example >>> from eqcorrscan.core.template_gen import from_sac >>> import glob >>> # Get all the SAC-files associated with one event. >>> sac_files = glob.glob('eqcorrscan/tests/test_data/SAC/2014p611252/*') >>> template = from_sac(sac_files=sac_files, lowcut=2.0, highcut=10.0, ... samp_rate=25.0, filt_order=4, length=2.0, ... swin='all', prepick=0.1) >>> print(template[0].stats.sampling_rate) 25.0 >>> print(len(template)) 15 """ from obspy import read, Stream from eqcorrscan.utils.sac_util import sactoevent from eqcorrscan.utils import pre_processing # Check whether sac_files is a stream or a list if isinstance(sac_files, list): if isinstance(sac_files[0], str) or isinstance(sac_files[0], unicode): sac_files = [read(sac_file)[0] for sac_file in sac_files] if isinstance(sac_files[0], Stream): # This is a list of streams... st = sac_files[0] for sac_file in sac_files[1:]: st += sac_file st = Stream(sac_files) elif isinstance(sac_files, Stream): st = sac_files # Make an event object... event = sactoevent(st, debug=debug) # Process the data st.merge(fill_value='interpolate') st = pre_processing.shortproc(st, lowcut, highcut, filt_order, samp_rate, debug) template = _template_gen(picks=event.picks, st=st, length=length, swin=swin, prepick=prepick, plot=plot, debug=debug) return template
def from_sac(sac_files, lowcut, highcut, samp_rate, filt_order, length, swin, prepick=0.05, debug=0, plot=False): """Function to read picks and waveforms from SAC data, and generate a \ template from these. Usually sac_files is a list of all single-channel \ SAC files for a given event, a single, multi-channel template will be \ created from these traces. :type sac_files: list or stream :param sac_files: List or stream of sac waveforms, or list of paths to \ sac waveforms. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param highcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Length to extract prior to the pick in seconds. :type debug: int :param debug: Debug level, higher number=more output. :type plot: bool :param plot: Turns template plotting on or off. :returns: obspy.Stream Newly cut template .. note:: This functionality is not supported for obspy versions below \ 1.0.0 as references times are not read in by SACIO, which are needed \ for defining pick times. """ from obspy import read, Stream from eqcorrscan.utils.sac_util import sactoevent from eqcorrscan.utils import pre_processing # Check whether sac_files is a stream or a list if isinstance(sac_files, list): if isinstance(sac_files[0], str) or isinstance(sac_files[0], unicode): sac_files = [read(sac_file)[0] for sac_file in sac_files] if isinstance(sac_files[0], Stream): # This is a list of streams... st = sac_files[0] for sac_file in sac_files[1:]: st += sac_file st = Stream(sac_files) elif isinstance(sac_files, Stream): st = sac_files # Make an event object... event = sactoevent(st, debug=debug) # Process the data st.merge(fill_value='interpolate') st = pre_processing.shortproc(st, lowcut, highcut, filt_order, samp_rate, debug) template = _template_gen(picks=event.picks, st=st, length=length, swin=swin, prepick=prepick, plot=plot, debug=debug) return template
def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin,\ debug=0): """ Function to read in picks from sfile then generate the template from the picks within this and the wavefile found in the pick file. :type sfile: string :param sfile: sfilename must be the\ path to a seisan nordic type s-file containing waveform and pick\ information. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template\ defaults file :type highcut: float :param lowcut: High cut (Hz), if set to None will look in template\ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in\ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in\ template defaults file :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type length: float :param length: Extract length in seconds, if None will look in template\ defaults file. :type debug: int :param debug: Debug level, higher number=more output. """ # Perform some checks first import os import sys if not os.path.isfile(sfile): raise IOError('sfile does not exist') from eqcorrscan.utils import Sfile_util # Read in the header of the sfile wavefiles=Sfile_util.readwavename(sfile) pathparts=sfile.split('/')[0:len(sfile.split('/'))-1] wavpath='' for part in pathparts: if part == 'REA': part='WAV' wavpath+=part+'/' from obspy import read as obsread from eqcorrscan.utils import pre_processing # Read in waveform file for wavefile in wavefiles: print "I am going to read waveform data from: "+wavpath+wavefile if 'st' in locals(): st+=obsread(wavpath+wavefile) else: st=obsread(wavpath+wavefile) for tr in st: if tr.stats.sampling_rate < samp_rate: print 'Sampling rate of data is lower than sampling rate asked for' print 'As this is not good practice for correlations I will not do this' raise ValueError("Trace: "+tr.stats.station+" sampling rate: "+\ str(tr.stats.sampling_rate)) # Read in pick info picks=Sfile_util.readpicks(sfile) print "I have found the following picks" for pick in picks: print pick.station+' '+pick.channel+' '+pick.phase+' '+str(pick.time) # Process waveform data st=pre_processing.shortproc(st, lowcut, highcut, filt_order,\ samp_rate, debug) st1=_template_gen(picks, st, length, swin) return st1
call('NLLoc /Users/home/hoppche/NLLoc/mrp/run/nlloc_mrp.in', shell=True) # Now reading NLLoc output back into catalog as new origin out_w_ext = glob(outfile + '*.grid0.loc.hyp') new_o = read_nlloc_hyp(out_w_ext[0], coordinate_converter=my_conversion, picks=ev.picks) ev.origins.append(new_o[0].origins[0]) ev.preferred_origin_id = str(new_o[0].origins[0].resource_id) # Cut templates for each new event based on new picks for event in refined_cat: ev_name = str(event.resource_id).split('/')[2] st = template_dict[event.resource_id] st1 = pre_processing.shortproc(st, lowcut=1.0, highcut=20.0, filt_order=3, samp_rate=50, debug=0) print('Feeding stream to _template_gen...') template = template_gen._template_gen(event.picks, st1, length=4.0, swin='all', prepick=0.5) print('Writing event ' + ev_name + ' to file...') template.write('/media/chet/hdd/seismic/NZ/templates/rotnga_2015/' + 'refined_picks/' + ev_name + '_50Hz.mseed', format="MSEED") del st, st1, template
def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin, prepick=0.05, debug=0, plot=False): r"""Function to read in picks from sfile then generate the template from \ the picks within this and the wavefile found in the pick file. :type sfile: string :param sfile: sfilename must be the \ path to a seisan nordic type s-file containing waveform and pick \ information. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param highcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Length to extract prior to the pick in seconds. :type debug: int :param debug: Debug level, higher number=more output. :type plot: bool :param plot: Turns template plotting on or off. :returns: obspy.Stream Newly cut template .. warning:: This will use whatever data is pointed to in the s-file, if \ this is not the coninuous data, we recommend using other functions. \ Differences in processing between short files and day-long files \ (inherent to resampling) will produce lower cross-correlations. """ # Perform some checks first import os if not os.path.isfile(sfile): raise IOError('sfile does not exist') from eqcorrscan.utils import pre_processing from eqcorrscan.utils import sfile_util from obspy import read as obsread # Read in the header of the sfile wavefiles = sfile_util.readwavename(sfile) pathparts = sfile.split('/')[0:-1] new_path_parts = [] for part in pathparts: if part == 'REA': part = 'WAV' new_path_parts.append(part) # * argument to allow .join() to accept a list wavpath = os.path.join(*new_path_parts) + '/' # In case of absolute paths (not handled with .split() --> .join()) if sfile[0] == '/': wavpath = '/' + wavpath # Read in waveform file for wavefile in wavefiles: print(''.join(["I am going to read waveform data from: ", wavpath, wavefile])) if 'st' not in locals(): st = obsread(wavpath + wavefile) else: st += obsread(wavpath + wavefile) for tr in st: if tr.stats.sampling_rate < samp_rate: print('Sampling rate of data is lower than sampling rate asked ' + 'for') print('Not good practice for correlations: I will not do this') raise ValueError("Trace: " + tr.stats.station + " sampling rate: " + str(tr.stats.sampling_rate)) # Read in pick info catalog = sfile_util.readpicks(sfile) # Read the list of Picks for this event picks = catalog[0].picks print("I have found the following picks") for pick in picks: print(' '.join([pick.waveform_id.station_code, pick.waveform_id.channel_code, pick.phase_hint, str(pick.time)])) # Process waveform data st.merge(fill_value='interpolate') st = pre_processing.shortproc(st, lowcut, highcut, filt_order, samp_rate, debug) st1 = _template_gen(picks=picks, st=st, length=length, swin=swin, prepick=prepick, plot=plot, debug=debug) return st1
def party_relative_mags(party, self_files, shift_len, align_len, svd_len, reject, sac_dir, min_amps, calibrate=False, method='PCA'): """ Calculate the relative moments for detections in a Family using mag_calc.svd_moments() :param party: Party of detections :param shift_len: Maximum shift length used in waveform alignment :param align_len: Length of waveform used for correlation in alignment :param svd_len: Length of waveform used in relative amplitude calc :param reject: Min cc threshold for accepted measurement :param sac_dir: Root directory of waveforms :param min_amps: Minimum number of relative measurements per pair :param calibrate: Flag for calibration to a priori Ml's :param method: 'PCA' or 'LSQR' :return: """ # First read-in self detection names selfs = [] for self_file in self_files: with open(self_file, 'r') as f: rdr = csv.reader(f) for row in rdr: selfs.append(str(row[0])) for fam in party.families: print('Starting work on family %s' % fam.template.name) if len(fam) == 1: print('Only self-detection. Moving on.') continue temp = fam.template prepick = temp.prepick events = [det.event for det in fam.detections] # Here we'll read in the waveforms and trim from stefan's directory # of SAC files so as not to duplicate data ev_dirs = ['%s%s' % (sac_dir, str(ev.resource_id).split('/')[-1]) for ev in events] streams = [] if len([i for i, ev_dir in enumerate(ev_dirs) if ev_dir.split('/')[-1] in selfs]) == 0: print('Family %s has no self detection. Investigate' % fam.template.name) continue self_ind = [i for i, ev_dir in enumerate(ev_dirs) if ev_dir.split('/')[-1] in selfs][0] # Read in Z components of events which we wrote for stefan # Many of these ev_dirs will not exist! for i, ev_dir in enumerate(ev_dirs): raw_st = Stream() print('Reading %s' % ev_dir) for wav_file in glob('%s/*Z.sac' % ev_dir): print('...file %s' % wav_file) raw_tr = read(wav_file)[0] start = raw_tr.stats.starttime + raw_tr.stats.sac['a'] - 3. end = start + 10 raw_tr.trim(starttime=start, endtime=end) raw_st.traces.append(raw_tr) streams.append(raw_st) print('Moved self detection to top of list') # Move the self detection to the first element streams.insert(0, streams.pop(self_ind)) print('Template Stream: %s' % str(streams[0])) if len(streams[0]) == 0: print('Template %s waveforms did not get written to SAC.' % temp.name) continue # Front/back clip hardcoded relative to wavs starting 3 s before pick front_clip = 3.0 - shift_len - 0.05 - prepick back_clip = front_clip + align_len + (2 * shift_len) + 0.05 wrk_streams = [] # For aligning # Process streams then copy to both ccc_streams and svd_streams bad_streams = [] for i, st in enumerate(list(streams)): try: shortproc(st=streams[i], lowcut=temp.lowcut, highcut=temp.highcut, filt_order=temp.filt_order, samp_rate=temp.samp_rate) wrk_streams.append(st.copy()) except ValueError as e: print('ValueError reads:') print(str(e)) print('Attempting to remove bad trace at {}'.format( str(e).split(' ')[-1])) bad_tr = str(e).split(' ')[-1][:-1] # Eliminate trailing "'" print('Sta and chan names: {}'.format(bad_tr.split('.'))) try: tr = streams[i].select(station=bad_tr.split('.')[0], channel=bad_tr.split('.')[1])[0] streams[i].traces.remove(tr) shortproc(st=streams[i], lowcut=temp.lowcut, highcut=temp.highcut, filt_order=temp.filt_order, samp_rate=temp.samp_rate) wrk_streams.append(st.copy()) except IndexError as e: print(str(e)) print('Funkyness. Removing entire stream') bad_streams.append(st) if len(bad_streams) > 0: for bst in bad_streams: streams.remove(bst) svd_streams = copy.deepcopy(streams) # For svd ccc_streams = copy.deepcopy(streams) # work out cccoh for each event with template cccohs = cc_coh_dets(streams=ccc_streams, shift=shift_len, length=svd_len, wav_prepick=3., corr_prepick=0.05) for st in wrk_streams: for tr in st: tr.trim(starttime=tr.stats.starttime + front_clip, endtime=tr.stats.starttime + back_clip) st_chans = list(set([(tr.stats.station, tr.stats.channel) for st in wrk_streams for tr in st])) st_chans.sort() # Align streams with just P arrivals, then use longer st for svd print('Now aligning svd_streams') shift_inds = int(shift_len * fam.template.samp_rate) for st_chan in st_chans: trs = [] for i, st in enumerate(wrk_streams): if len(st.select(station=st_chan[0], channel=st_chan[-1])) > 0: trs.append((i, st.select(station=st_chan[0], channel=st_chan[-1])[0])) inds, traces = zip(*trs) shifts, ccs = stacking.align_traces(trace_list=list(traces), shift_len=shift_inds, positive=True, master=traces[0].copy()) # We now have shifts based on P correlation, shift and trim # larger wavs for svd for j, shift in enumerate(shifts): st = svd_streams[inds[j]] if ccs[j] < reject: svd_streams[inds[j]].remove(st.select( station=st_chan[0], channel=st_chan[-1])[0]) print('Removing trace due to low cc value: %s' % ccs[j]) continue strt_tr = st.select( station=st_chan[0], channel=st_chan[-1])[0].stats.starttime strt_tr += (3.0 - prepick - shift) st.select(station=st_chan[0], channel=st_chan[-1])[0].trim(strt_tr,strt_tr + svd_len) if method == 'LSQR': print('Using least-squares method') event_list = [] for stachan in st_chans: st_list = [] for i, st in enumerate(svd_streams): if len(st.select(station=stachan[0], channel=stachan[-1])) > 0: st_list.append(i) event_list.append(st_list) # event_list = np.asarray(event_list).tolist() u, sigma, v, sta_chans = svd(stream_list=svd_streams, full=True) try: M, events_out = svd_moments(u, sigma, v, sta_chans, event_list) except IOError as e: print('Family %s raised error %s' % (fam.template.name, e)) continue elif method == 'PCA': print('Using principal component method') # Now loop over all detections and do svd for each matching # chan with temp events_out = [] template = svd_streams[0] M = [] for i, st in enumerate(svd_streams): if len(st) == 0: print('Event not located, skipping') continue ev_r_amps = [] # For each pair of template:detection (including temp:temp) for tr in template: if len(st.select(station=tr.stats.station, channel=tr.stats.channel)) > 0: det_tr = st.select(station=tr.stats.station, channel=tr.stats.channel)[0] # Convoluted way of getting two 'vert' vectors data_mat = np.vstack((tr.data, det_tr.data)).T U, sig, Vt = scipy.linalg.svd(data_mat, full_matrices=True) # Vt is 2x2 for two events # Per Shelly et al., 2016 eq. 4 ev_r_amps.append(Vt[0][1] / Vt[0][0]) if len(ev_r_amps) < min_amps: print('Fewer than 4 amplitude picks, skipping.') continue M.append(np.median(ev_r_amps)) events_out.append(i) # If we have a Mag for template, calibrate moments if calibrate and len(fam.template.event.magnitudes) > 0: # Convert the template magnitude to seismic moment temp_mag = fam.template.event.magnitudes[-1].mag temp_mo = local_to_moment(temp_mag) # Extrapolate from the template moment - relative moment relationship to # Get the moment for relative moment = 1.0 norm_mo = temp_mo / M[0] # Template is the last event in the list # Now these are weights which we can multiple the moments by moments = np.multiply(M, norm_mo) # Now convert to Mw Mw = [2.0 / 3.0 * (np.log10(m) - 9.0) for m in moments] Mw2, evs2 = remove_outliers(Mw, events_out) # Convert to local Ml = [0.88 * m + 0.73 for m in Mw2] #Normalize moments to template mag # Add calibrated mags to detection events for i, eind in enumerate(evs2): fam.detections[eind-1].event.magnitudes = [ Magnitude(mag=Mw2[i], magnitude_type='Mw')] fam.detections[eind-1].event.comments.append( Comment(text=str(cccohs[eind-1]))) fam.detections[eind-1].event.magnitudes.append( Magnitude(mag=Ml[i], magnitude_type='ML')) fam.catalog = Catalog(events=[det.event for det in fam.detections]) return party, cccohs
def custom_template_gen(method, lowcut, highcut, samp_rate, filt_order, length, prepick, swin="all", process_len=86400, all_horiz=False, delayed=True, plot=False, plotdir=None, return_event=False, min_snr=None, parallel=False, num_cores=False, save_progress=False, skip_short_chans=False, **kwargs): """ Generate processed and cut waveforms for use as templates. :type method: str :param method: Template generation method, must be one of ('from_client', 'from_seishub', 'from_sac', 'from_meta_file'). - Each method requires associated arguments, see note below. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will not apply a lowcut. :type highcut: float :param highcut: High cut (Hz), if set to None will not apply a highcut. :type samp_rate: float :param samp_rate: New sampling rate in Hz. :type filt_order: int :param filt_order: Filter level (number of corners). :type length: float :param length: Length of template waveform in seconds. :type prepick: float :param prepick: Pre-pick time in seconds :type swin: str :param swin: P, S, P_all, S_all or all, defaults to all: see note in :func:`eqcorrscan.core.template_gen.template_gen` :type process_len: int :param process_len: Length of data in seconds to download and process. :type all_horiz: bool :param all_horiz: To use both horizontal channels even if there is only a pick on one of them. Defaults to False. :type delayed: bool :param delayed: If True, each channel will begin relative to it's own \ pick-time, if set to False, each channel will begin at the same time. :type plot: bool :param plot: Plot templates or not. :type plotdir: str :param plotdir: The path to save plots to. If `plotdir=None` (default) then the figure will be shown on screen. :type return_event: bool :param return_event: Whether to return the event and process length or not. :type min_snr: float :param min_snr: Minimum signal-to-noise ratio for a channel to be included in the template, where signal-to-noise ratio is calculated as the ratio of the maximum amplitude in the template window to the rms amplitude in the whole window given. :type parallel: bool :param parallel: Whether to process data in parallel or not. :type num_cores: int :param num_cores: Number of cores to try and use, if False and parallel=True, will use either all your cores, or as many traces as in the data (whichever is smaller). :type save_progress: bool :param save_progress: Whether to save the resulting templates at every data step or not. Useful for long-running processes. :type skip_short_chans: bool :param skip_short_chans: Whether to ignore channels that have insufficient length data or not. Useful when the quality of data is not known, e.g. when downloading old, possibly triggered data from a datacentre :returns: List of :class:`obspy.core.stream.Stream` Templates :rtype: list """ client_map = {'from_client': 'fdsn', 'from_seishub': 'seishub'} assert method in ('from_client', 'from_seishub', 'from_meta_file', 'from_sac') if not isinstance(swin, list): swin = [swin] process = True if method in ['from_client', 'from_seishub']: catalog = kwargs.get('catalog', Catalog()) data_pad = kwargs.get('data_pad', 90) # Group catalog into days and only download the data once per day sub_catalogs = _group_events(catalog=catalog, process_len=process_len, template_length=length, data_pad=data_pad) if method == 'from_client': if isinstance(kwargs.get('client_id'), str): client = FDSNClient(kwargs.get('client_id', None)) else: client = kwargs.get('client_id', None) available_stations = [] else: client = SeisHubClient(kwargs.get('url', None), timeout=10) available_stations = client.waveform.get_station_ids() elif method == 'from_meta_file': if isinstance(kwargs.get('meta_file'), Catalog): catalog = kwargs.get('meta_file') elif kwargs.get('meta_file'): catalog = read_events(kwargs.get('meta_file')) else: catalog = kwargs.get('catalog') sub_catalogs = [catalog] st = kwargs.get('st', Stream()) process = kwargs.get('process', True) elif method == 'from_sac': sac_files = kwargs.get('sac_files') if isinstance(sac_files, list): if isinstance(sac_files[0], (Stream, Trace)): # This is a list of streams... st = Stream(sac_files[0]) for sac_file in sac_files[1:]: st += sac_file else: sac_files = [read(sac_file)[0] for sac_file in sac_files] st = Stream(sac_files) else: st = sac_files # Make an event object... catalog = Catalog([sactoevent(st)]) sub_catalogs = [catalog] temp_list = [] process_lengths = [] catalog_out = Catalog() if "P_all" in swin or "S_all" in swin or all_horiz: all_channels = True else: all_channels = False for sub_catalog in sub_catalogs: if method in ['from_seishub', 'from_client']: Logger.info("Downloading data") st = _download_from_client(client=client, client_type=client_map[method], catalog=sub_catalog, data_pad=data_pad, process_len=process_len, available_stations=available_stations, all_channels=all_channels) Logger.info('Pre-processing data') st.merge() if len(st) == 0: Logger.info("No data") continue if process: data_len = max( [len(tr.data) / tr.stats.sampling_rate for tr in st]) if 80000 < data_len < 90000: daylong = True starttime = min([tr.stats.starttime for tr in st]) min_delta = min([tr.stats.delta for tr in st]) # Cope with the common starttime less than 1 sample before the # start of day. if (starttime + min_delta).date > starttime.date: starttime = (starttime + min_delta) # Check if this is stupid: if abs(starttime - UTCDateTime(starttime.date)) > 600: daylong = False starttime = starttime.date else: daylong = False # Check if the required amount of data have been downloaded - skip # channels if arg set. for tr in st: if np.ma.is_masked(tr.data): _len = np.ma.count(tr.data) * tr.stats.delta else: _len = tr.stats.npts * tr.stats.delta if _len < process_len * .8: Logger.info("Data for {0} are too short, skipping".format( tr.id)) if skip_short_chans: continue # Trim to enforce process-len tr.data = tr.data[0:int(process_len * tr.stats.sampling_rate)] if len(st) == 0: Logger.info("No data") continue if daylong: st = pre_processing.dayproc(st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order, samp_rate=samp_rate, parallel=parallel, starttime=UTCDateTime(starttime), num_cores=num_cores) else: st = pre_processing.shortproc(st=st, lowcut=lowcut, highcut=highcut, filt_order=filt_order, parallel=parallel, samp_rate=samp_rate, num_cores=num_cores) data_start = min([tr.stats.starttime for tr in st]) data_end = max([tr.stats.endtime for tr in st]) for event in sub_catalog: stations, channels, st_stachans = ([], [], []) if len(event.picks) == 0: Logger.warning('No picks for event {0}'.format( event.resource_id)) continue use_event = True # Check that the event is within the data for pick in event.picks: if not data_start < pick.time < data_end: Logger.warning( "Pick outside of data span: Pick time {0} Start " "time {1} End time: {2}".format( str(pick.time), str(data_start), str(data_end))) use_event = False if not use_event: Logger.error('Event is not within data time-span') continue # Read in pick info Logger.debug("I have found the following picks") for pick in event.picks: if not pick.waveform_id: Logger.warning( 'Pick not associated with waveforms, will not use:' ' {0}'.format(pick)) continue Logger.debug(pick) stations.append(pick.waveform_id.station_code) channels.append(pick.waveform_id.channel_code) # Check to see if all picks have a corresponding waveform for tr in st: st_stachans.append('.'.join( [tr.stats.station, tr.stats.channel])) # Cut and extract the templates template = _template_gen(event.picks, st, length, swin, prepick=prepick, plot=plot, all_horiz=all_horiz, delayed=delayed, min_snr=min_snr, plotdir=plotdir) process_lengths.append(len(st[0].data) / samp_rate) temp_list.append(template) catalog_out += event if save_progress: if not os.path.isdir("eqcorrscan_temporary_templates"): os.makedirs("eqcorrscan_temporary_templates") for template in temp_list: template.write( "eqcorrscan_temporary_templates{0}{1}.ms".format( os.path.sep, template[0].stats.starttime.strftime( "%Y-%m-%dT%H%M%S")), format="MSEED") del st if return_event: return temp_list, catalog_out, process_lengths return temp_list
os.chdir(temp_dir) ms_files = glob('*.ms') ms_files.sort() template_names = [] for file1 in ms_files: if not 'templates' in locals(): templates = [read(file1)] template_names.append(file1[:-3]) else: templates += [read(file1)] template_names.append(file1[:-3]) # Extract the station info from the templates for template in templates: #Filter and downsample sample data template=pre_processing.shortproc(template, 1.0, 20.0, 3, 100.0,\ debug=1) if not 'stachans' in locals(): stachans = [(tr.stats.station, tr.stats.channel) for tr in template] else: stachans += [(tr.stats.station, tr.stats.channel) for tr in template] # Make this a unique list stachans = list(set(stachans)) # Read in the continuous data for these station, channel combinations raw_dir = '/Volumes/GeoPhysics_07/users-data/matsonga/MRP_PROJ/data/mastersData/sac' #Recursively search a directory for specific files amtching desired day and stachan start_day = UTCDateTime(2012, 06, 11).julday end_day = UTCDateTime(2012, 06, 12).julday days = range(start_day, end_day + 1)
def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin, prepick=0.05, debug=0, plot=False): r"""Function to read in picks from sfile then generate the template from \ the picks within this and the wavefile found in the pick file. :type sfile: string :param sfile: sfilename must be the \ path to a seisan nordic type s-file containing waveform and pick \ information. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param highcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Length to extract prior to the pick in seconds. :type debug: int :param debug: Debug level, higher number=more output. :type plot: bool :param plot: Turns template plotting on or off. :returns: obspy.Stream Newly cut template .. warning:: This will use whatever data is pointed to in the s-file, if \ this is not the coninuous data, we recommend using other functions. \ Differences in processing between short files and day-long files \ (inherent to resampling) will produce lower cross-correlations. """ # Perform some checks first import os if not os.path.isfile(sfile): raise IOError('sfile does not exist') from eqcorrscan.utils import pre_processing from eqcorrscan.utils import sfile_util from obspy import read as obsread # Read in the header of the sfile wavefiles = sfile_util.readwavename(sfile) pathparts = sfile.split('/')[0:-1] new_path_parts = [] for part in pathparts: if part == 'REA': part = 'WAV' new_path_parts.append(part) # * argument to allow .join() to accept a list wavpath = os.path.join(*new_path_parts) + '/' # In case of absolute paths (not handled with .split() --> .join()) if sfile[0] == '/': wavpath = '/' + wavpath # Read in waveform file for wavefile in wavefiles: print(''.join(["I am going to read waveform data from: ", wavpath, wavefile])) if 'st' not in locals(): st = obsread(wavpath + wavefile) else: st += obsread(wavpath + wavefile) for tr in st: if tr.stats.sampling_rate < samp_rate: print('Sampling rate of data is lower than sampling rate asked ' + 'for') print('Not good practice for correlations: I will not do this') raise ValueError("Trace: " + tr.stats.station + " sampling rate: " + str(tr.stats.sampling_rate)) # Read in pick info catalog = sfile_util.readpicks(sfile) # Read the list of Picks for this event picks = catalog[0].picks print("I have found the following picks") for pick in picks: print(' '.join([pick.waveform_id.station_code, pick.waveform_id.channel_code, pick.phase_hint, str(pick.time)])) # Process waveform data st.merge(fill_value='interpolate') st = pre_processing.shortproc(st, lowcut, highcut, filt_order, samp_rate, debug) st1 = _template_gen(picks=picks, st=st, length=length, swin=swin, prepick=prepick, plot=plot) return st1
def cluster_tribe(tribe, raw_wav_dir, lowcut, highcut, samp_rate, filt_order, pre_pick, length, shift_len, corr_thresh, cores, dist_mat=False, show=False): """ Cross correlate all templates in a tribe and return separate tribes for each cluster :param tribe: :return: .. Note: Functionality here is pilaged from align design as we don't want the multiplexed portion of that function. """ tribe.sort() raw_wav_files = glob('%s/*' % raw_wav_dir) raw_wav_files.sort() all_wavs = [wav.split('/')[-1].split('.')[0] for wav in raw_wav_files] names = [t.name for t in tribe if t.name in all_wavs] wavs = [ wav for wav in raw_wav_files if wav.split('/')[-1].split('.')[0] in names ] new_tribe = Tribe() new_tribe.templates = [temp for temp in tribe if temp.name in names] print('Processing temps') temp_list = [(shortproc(read(tmp), lowcut=lowcut, highcut=highcut, samp_rate=samp_rate, filt_order=filt_order, parallel=True, num_cores=cores), template) for tmp, template in zip(wavs, new_tribe)] print('Clipping traces') for temp in temp_list: print('Clipping template %s' % temp[1].name) for tr in temp[0]: pk = [ pk for pk in temp[1].event.picks if pk.waveform_id.station_code == tr.stats.station and pk.waveform_id.channel_code == tr.stats.channel ][0] tr.trim(starttime=pk.time - shift_len - pre_pick, endtime=pk.time - pre_pick + length + shift_len) trace_lengths = [ tr.stats.endtime - tr.stats.starttime for st in temp_list for tr in st[0] ] clip_len = min(trace_lengths) - (2 * shift_len) stachans = list( set([(tr.stats.station, tr.stats.channel) for st in temp_list for tr in st[0]])) print('Aligning traces') for stachan in stachans: trace_list = [] trace_ids = [] for i, st in enumerate(temp_list): tr = st[0].select(station=stachan[0], channel=stachan[1]) if len(tr) > 0: trace_list.append(tr[0]) trace_ids.append(i) if len(tr) > 1: warnings.warn('Too many matches for %s %s' % (stachan[0], stachan[1])) shift_len_samples = int(shift_len * trace_list[0].stats.sampling_rate) shifts, cccs = stacking.align_traces(trace_list=trace_list, shift_len=shift_len_samples, positive=True) for i, shift in enumerate(shifts): st = temp_list[trace_ids[i]][0] start_t = st.select(station=stachan[0], channel=stachan[1])[0].stats.starttime start_t += shift_len start_t -= shift st.select(station=stachan[0], channel=stachan[1])[0].trim(start_t, start_t + clip_len) print('Clustering') if isinstance(dist_mat, np.ndarray): groups = cluster_from_dist_mat(dist_mat=dist_mat, temp_list=temp_list, show=show, corr_thresh=corr_thresh) else: groups = clustering.cluster(temp_list, show=show, corr_thresh=corr_thresh, allow_shift=False, save_corrmat=True, cores=cores) group_tribes = [] for group in groups: group_tribes.append( Tribe(templates=[ Template(st=tmp[0], name=tmp[1].name, event=tmp[1].event, highcut=highcut, lowcut=lowcut, samp_rate=samp_rate, filt_order=filt_order, prepick=pre_pick) for tmp in group ])) return group_tribes
def from_sac(sac_files, lowcut, highcut, samp_rate, filt_order, length, swin, prepick=0.05, debug=0, plot=False): """Function to read picks and waveforms from SAC data, and generate a \ template from these. :type sac_files: list or stream :param sac_files: List or stream of sac waveforms, or list of paths to \ sac waveforms. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param highcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Length to extract prior to the pick in seconds. :type debug: int :param debug: Debug level, higher number=more output. :type plot: bool :param plot: Turns template plotting on or off. :returns: obspy.Stream Newly cut template .. note:: This functionality is not supported for obspy versions below \ 1.0.0 as references times are not read in by SACIO, which are needed \ for defining pick times. """ from obspy import read, Stream from eqcorrscan.utils.sac_util import sactoevent from eqcorrscan.utils import pre_processing # Check whether sac_files is a stream or a list if isinstance(sac_files, list): if isinstance(sac_files[0], str) or isinstance(sac_files[0], unicode): sac_files = [read(sac_file)[0] for sac_file in sac_files] if isinstance(sac_files[0], Stream): # This is a list of streams... st = sac_files[0] for sac_file in sac_files[1:]: st += sac_file st = Stream(sac_files) elif isinstance(sac_files, Stream): st = sac_files # Make an event object... event = sactoevent(st) # Process the data st.merge(fill_value='interpolate') st = pre_processing.shortproc(st, lowcut, highcut, filt_order, samp_rate, debug) template = _template_gen(picks=event.picks, st=st, length=length, swin=swin, prepick=prepick, plot=plot) return template
def run_tutorial(plot=False): """Main function to run the tutorial dataset.""" from eqcorrscan.utils import pre_processing from eqcorrscan.utils import plotting from eqcorrscan.core import match_filter import glob from multiprocessing import cpu_count # This import section copes with namespace changes between obspy versions import obspy if int(obspy.__version__.split('.')[0]) >= 1: from obspy.clients.fdsn import Client else: from obspy.fdsn import Client from obspy import UTCDateTime, Stream, read # First we want to load our templates template_names = glob.glob('tutorial_template_*.ms') if len(template_names) == 0: raise IOError('Template files not found, have you run the template ' + 'creation tutorial?') templates = [read(template_name) for template_name in template_names] # Work out what stations we have and get the data for them stations = [] for template in templates: for tr in template: stations.append((tr.stats.station, tr.stats.channel)) # Get a unique list of stations stations = list(set(stations)) # We will loop through the data chunks at a time, these chunks can be any # size, in general we have used 1 day as our standard, but this can be # as short as five minutes (for MAD thresholds) or shorter for other # threshold metrics. However the chunk size should be the same as your # template process_len. # You should test different parameters!!! start_time = UTCDateTime(2016, 1, 4) end_time = UTCDateTime(2016, 1, 5) process_len = 3600 chunks = [] chunk_start = start_time while chunk_start < end_time: chunk_end = chunk_start + process_len if chunk_end > end_time: chunk_end = end_time chunks.append((chunk_start, chunk_end)) chunk_start += process_len unique_detections = [] detections = [] # Set up a client to access the GeoNet database client = Client("GEONET") # Note that these chunks do not rely on each other, and could be paralleled # on multiple nodes of a distributed cluster, see the SLURM tutorial for # an example of this. for t1, t2 in chunks: # Generate the bulk information to query the GeoNet database bulk_info = [] for station in stations: bulk_info.append(('NZ', station[0], '*', station[1][0] + 'H' + station[1][-1], t1, t2)) # Note this will take a little while. print('Downloading seismic data, this may take a while') st = client.get_waveforms_bulk(bulk_info) # Merge the stream, it will be downloaded in chunks st.merge(fill_value='interpolate') # Set how many cores we want to parallel across, we will set this to four # as this is the number of templates, if your machine has fewer than four # cores/CPUs the multiprocessing will wait until there is a free core. # Setting this to be higher than the number of templates will have no # increase in speed as only detections for each template are computed in # parallel. It may also slow your processing by using more memory than # needed, to the extent that swap may be filled. if cpu_count() < 4: ncores = cpu_count() else: ncores = 4 # Pre-process the data to set frequency band and sampling rate # Note that this is, and MUST BE the same as the parameters used for the # template creation. print('Processing the seismic data') st = pre_processing.shortproc(st, lowcut=2.0, highcut=9.0, filt_order=4, samp_rate=20.0, debug=2, num_cores=ncores, starttime=t1, endtime=t2) # Convert from list to stream st = Stream(st) # Now we can conduct the matched-filter detection detections += match_filter.match_filter(template_names=template_names, template_list=templates, st=st, threshold=8.0, threshold_type='MAD', trig_int=6.0, plotvar=plot, plotdir='.', cores=ncores, tempdir=False, debug=1, plot_format='jpg') # Now lets try and work out how many unique events we have just to compare # with the GeoNet catalog of 20 events on this day in this sequence for master in detections: keep = True for slave in detections: if not master == slave and\ abs(master.detect_time - slave.detect_time) <= 1.0: # If the events are within 1s of each other then test which # was the 'best' match, strongest detection if not master.detect_val > slave.detect_val: keep = False break if keep: unique_detections.append(master) print('We made a total of ' + str(len(unique_detections)) + ' detections') for detection in unique_detections: print('Detection at :' + str(detection.detect_time) + ' for template ' + detection.template_name + ' with a cross-correlation sum of: ' + str(detection.detect_val)) # We can plot these too if plot: stplot = st.copy() template = templates[template_names.index(detection.template_name)] lags = sorted([tr.stats.starttime for tr in template]) maxlag = lags[-1] - lags[0] stplot.trim(starttime=detection.detect_time - 10, endtime=detection.detect_time + maxlag + 10) plotting.detection_multiplot(stplot, template, [detection.detect_time.datetime]) return unique_detections
def from_sfile(sfile, lowcut, highcut, samp_rate, filt_order, length, swin, prepick=0.05, debug=0, plot=False): """ Generate multiplexed template from a Nordic (Seisan) s-file. Function to read in picks from sfile then generate the template from \ the picks within this and the wavefile found in the pick file. :type sfile: str :param sfile: sfilename must be the \ path to a seisan nordic type s-file containing waveform and pick \ information. :type lowcut: float :param lowcut: Low cut (Hz), if set to None will look in template \ defaults file :type highcut: float :param highcut: High cut (Hz), if set to None will look in template \ defaults file :type samp_rate: float :param samp_rate: New sampling rate in Hz, if set to None will look in \ template defaults file :type filt_order: int :param filt_order: Filter level, if set to None will look in \ template defaults file :type swin: str :param swin: Either 'all', 'P' or 'S', to select which phases to output. :type length: float :param length: Extract length in seconds, if None will look in template \ defaults file. :type prepick: float :param prepick: Length to extract prior to the pick in seconds. :type debug: int :param debug: Debug level, higher number=more output. :type plot: bool :param plot: Turns template plotting on or off. :returns: obspy.core.stream.Stream Newly cut template .. warning:: This will use whatever data is pointed to in the s-file, if \ this is not the coninuous data, we recommend using other functions. \ Differences in processing between short files and day-long files \ (inherent to resampling) will produce lower cross-correlations. .. rubric:: Example >>> from eqcorrscan.core.template_gen import from_sfile >>> sfile = 'eqcorrscan/tests/test_data/REA/TEST_/01-0411-15L.S201309' >>> template = from_sfile(sfile=sfile, lowcut=5.0, highcut=15.0, ... samp_rate=50.0, filt_order=4, swin='P', ... prepick=0.2, length=6) >>> print(len(template)) 15 >>> print(template[0].stats.sampling_rate) 50.0 >>> template.plot(equal_scale=False, size=(800,600)) # doctest: +SKIP .. plot:: from eqcorrscan.core.template_gen import from_sfile import os sfile = os.path.realpath('../../..') + \ '/tests/test_data/REA/TEST_/01-0411-15L.S201309' template = from_sfile(sfile=sfile, lowcut=5.0, highcut=15.0, samp_rate=50.0, filt_order=4, swin='P', prepick=0.2, length=6) template.plot(equal_scale=False, size=(800, 600)) """ # Perform some checks first import os if not os.path.isfile(sfile): raise IOError('sfile does not exist') from eqcorrscan.utils import pre_processing from eqcorrscan.utils import sfile_util from obspy import read as obsread # Read in the header of the sfile wavefiles = sfile_util.readwavename(sfile) pathparts = sfile.split('/')[0:-1] new_path_parts = [] for part in pathparts: if part == 'REA': part = 'WAV' new_path_parts.append(part) main_wav_parts = [] for part in new_path_parts: main_wav_parts.append(part) if part == 'WAV': break mainwav = os.path.join(*main_wav_parts) + os.path.sep # * argument to allow .join() to accept a list wavpath = os.path.join(*new_path_parts) + os.path.sep # In case of absolute paths (not handled with .split() --> .join()) if sfile[0] == os.path.sep: wavpath = os.path.sep + wavpath mainwav = os.path.sep + mainwav # Read in waveform file for wavefile in wavefiles: if debug > 0: print(''.join(["I am going to read waveform data from: ", wavpath, wavefile])) if 'st' not in locals(): if os.path.isfile(wavpath + wavefile): st = obsread(wavpath + wavefile) elif os.path.isfile(wavefile): st = obsread(wavefile) else: # Read from the main WAV directory st = obsread(mainwav + wavefile) else: if os.path.isfile(wavpath + wavefile): st += obsread(wavpath + wavefile) elif os.path.isfile(wavefile): st += obsread(wavefile) else: st += obsread(mainwav + wavefile) for tr in st: if tr.stats.sampling_rate < samp_rate: print('Sampling rate of data is lower than sampling rate asked ' + 'for') print('Not good practice for correlations: I will not do this') raise ValueError("Trace: " + tr.stats.station + " sampling rate: " + str(tr.stats.sampling_rate)) # Read in pick info event = sfile_util.readpicks(sfile) # Read the list of Picks for this event picks = event.picks if debug > 0: print("I have found the following picks") for pick in picks: print(' '.join([pick.waveform_id.station_code, pick.waveform_id.channel_code, pick.phase_hint, str(pick.time)])) # Process waveform data st.merge(fill_value='interpolate') st = pre_processing.shortproc(st, lowcut, highcut, filt_order, samp_rate, debug) st1 = _template_gen(picks=picks, st=st, length=length, swin=swin, prepick=prepick, plot=plot, debug=debug) return st1