def test_write_correlations(self): """ Test that the write_correlations function works as it should. Hard to test accurately... """ from eqcorrscan.utils.catalog_to_dd import write_correlations from eqcorrscan.utils.timer import Timer import os import glob testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'test_data', 'REA', 'TEST_') wavbase = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'test_data', 'WAV', 'TEST_') sfile_list = glob.glob(os.path.join(testing_path, '*L.S??????')) event_ids = list(range(len(sfile_list))) event_list = zip(event_ids, sfile_list) with Timer() as t: write_correlations(event_list, wavbase, extract_len=2, pre_pick=0.5, shift_len=0.2, lowcut=2.0, highcut=10.0, max_sep=1, min_link=8, coh_thresh=0.0, plotvar=False) msg = 'Running ' + str(len(event_list)) + ' events took %s s' % t.secs print(msg) self.assertTrue(os.path.isfile('dt.cc')) os.remove('dt.cc') if os.path.isfile('dt.cc2'): os.remove('dt.cc2')
def median_filter(tr, multiplier=10, windowlength=0.5, interp_len=0.05): """ Filter out spikes in data above a multiple of MAD of the data. Currently only has the ability to replaces spikes with linear interpolation. In the future we would aim to fill the gap with something more appropriate. Works in-place on data. :type tr: obspy.core.trace.Trace :param tr: trace to despike :type multiplier: float :param multiplier: median absolute deviation multiplier to find spikes above. :type windowlength: float :param windowlength: Length of window to look for spikes in in seconds. :type interp_len: float :param interp_len: Length in seconds to interpolate around spikes. :returns: :class:`obspy.core.trace.Trace` .. warning:: Not particularly effective, and may remove earthquake signals, use with caution. """ num_cores = cpu_count() # Note - might be worth finding spikes in filtered data filt = tr.copy() filt.detrend('linear') try: filt.filter('bandpass', freqmin=10.0, freqmax=(tr.stats.sampling_rate / 2) - 1) except Exception as e: Logger.error("Could not filter due to error: {0}".format(e)) data = filt.data del filt # Loop through windows _windowlength = int(windowlength * tr.stats.sampling_rate) _interp_len = int(interp_len * tr.stats.sampling_rate) peaks = [] with Timer() as t: pool = Pool(processes=num_cores) results = [ pool.apply_async(_median_window, args=(data[chunk * _windowlength:(chunk + 1) * _windowlength], chunk * _windowlength, multiplier, tr.stats.starttime + windowlength, tr.stats.sampling_rate)) for chunk in range(int(len(data) / _windowlength)) ] pool.close() for p in results: peaks += p.get() pool.join() for peak in peaks: tr.data = _interp_gap(tr.data, peak[1], _interp_len) Logger.debug("Despiking took: %s s" % t.secs) return tr
def test_synth_large(): print('\tGenerating Synthetic data\n\n') templates, data, seeds = synth_seis.generate_synth_data( nsta=10, ntemplates=20, nseeds=5, samp_rate=100, t_length=6, max_amp=5, max_lag=5, debug=0) print('\tRunning the parallel detections\n\n') with Timer() as t: detections = match_filter( template_names=[str(i) for i in range(len(templates))], template_list=templates, st=data, threshold=8, threshold_type='MAD', trig_int=6, plotvar=False, cores=4, output_event=False) print('Parallel run took %f seconds' % t.secs) print('\tRunning the serial detections\n\n') with Timer() as t: detections = match_filter( template_names=[str(i) for i in range(len(templates))], template_list=templates, st=data, threshold=8, threshold_type='MAD', trig_int=6, plotvar=False, cores=None, output_event=False) print('Serial run took %f seconds' % t.secs)
def template_remove(tr, template, cc_thresh, windowlength, interp_len, debug=0): """ Looks for instances of template in the trace and removes the matches. :type tr: obspy.core.trace.Trace :param tr: Trace to remove spikes from. :type template: osbpy.core.trace.Trace :param template: Spike template to look for in data. :type cc_thresh: float :param cc_thresh: Cross-correlation threshold (-1 - 1). :type windowlength: float :param windowlength: Length of window to look for spikes in in seconds. :type interp_len: float :param interp_len: Window length to remove and fill in seconds. :type debug: int :param debug: Debug level. :returns: tr, works in place. :rtype: :class:`obspy.core.trace.Trace` """ data_in = tr.copy() _interp_len = int(tr.stats.sampling_rate * interp_len) if _interp_len < len(template.data): warnings.warn('Interp_len is less than the length of the template,' 'will used the length of the template!') _interp_len = len(template.data) if isinstance(template, Trace): template = template.data with Timer() as t: cc = normxcorr2(image=tr.data.astype(np.float32), template=template.astype(np.float32)) if debug > 3: plt.plot(cc.flatten(), 'k', label='cross-correlation') plt.legend() plt.show() peaks = find_peaks2_short(arr=cc.flatten(), thresh=cc_thresh, trig_int=windowlength * tr.stats.sampling_rate) for peak in peaks: tr.data = _interp_gap(data=tr.data, peak_loc=peak[1] + int(0.5 * _interp_len), interp_len=_interp_len) print("Despiking took: %s s" % t.secs) if debug > 2: plt.plot(data_in.data, 'r', label='raw') plt.plot(tr.data, 'k', label='despiked') plt.legend() plt.show() return tr
def template_remove(tr, template, cc_thresh, windowlength, interp_len): """ Looks for instances of template in the trace and removes the matches. :type tr: obspy.core.trace.Trace :param tr: Trace to remove spikes from. :type template: osbpy.core.trace.Trace :param template: Spike template to look for in data. :type cc_thresh: float :param cc_thresh: Cross-correlation threshold (-1 - 1). :type windowlength: float :param windowlength: Length of window to look for spikes in in seconds. :type interp_len: float :param interp_len: Window length to remove and fill in seconds. :returns: tr, works in place. :rtype: :class:`obspy.core.trace.Trace` """ _interp_len = int(tr.stats.sampling_rate * interp_len) if _interp_len < len(template.data): Logger.warning('Interp_len is less than the length of the template, ' 'will used the length of the template!') _interp_len = len(template.data) if isinstance(template, Trace): template = np.array([template.data]) with Timer() as t: normxcorr = get_array_xcorr("fftw") cc, _ = normxcorr(stream=tr.data.astype(np.float32), templates=template.astype(np.float32), pads=[0]) peaks = find_peaks2_short(arr=cc.flatten(), thresh=cc_thresh, trig_int=windowlength * tr.stats.sampling_rate) for peak in peaks: tr.data = _interp_gap(data=tr.data, peak_loc=peak[1] + int(0.5 * _interp_len), interp_len=_interp_len) Logger.info("Despiking took: {0:.4f} s".format(t.secs)) return tr
def median_filter(tr, multiplier=10, windowlength=0.5, interp_len=0.05, debug=0): """ Filter out spikes in data according to the median absolute deviation of \ the data. Currently only has the ability to replaces spikes with linear interpolation. In the future we would aim to fill the gap with something more appropriate. Works in-place on data. :type tr: obspy.Trace :param tr: trace to despike :type multiplier: float :param multiplier: median absolute deviation multiplier to find spikes \ above. :type windowlength: float :param windowlength: Length of window to look for spikes in in seconds. :type interp_len: float :param interp_len: Length in seconds to interpolate around spikes. :returns: obspy.trace """ import matplotlib.pyplot as plt from multiprocessing import Pool, cpu_count from eqcorrscan.utils.timer import Timer num_cores = cpu_count() if debug >= 1: data_in = tr.copy() # Note - might be worth finding spikes in filtered data filt = tr.copy() filt.detrend('linear') filt.filter('bandpass', freqmin=10.0, freqmax=(tr.stats.sampling_rate / 2) - 1) data = filt.data del (filt) # Loop through windows _windowlength = int(windowlength * tr.stats.sampling_rate) _interp_len = int(interp_len * tr.stats.sampling_rate) peaks = [] with Timer() as t: pool = Pool(processes=num_cores) results = [ pool.apply_async(_median_window, args=(data[chunk * _windowlength:(chunk + 1) * _windowlength], chunk * _windowlength, multiplier, tr.stats.starttime + windowlength, tr.stats.sampling_rate, debug)) for chunk in range(int(len(data) / _windowlength)) ] pool.close() for p in results: peaks += p.get() pool.join() for peak in peaks: tr.data = _interp_gap(tr.data, peak[1], _interp_len) print("Despiking took: %s s" % t.secs) if debug >= 1: plt.plot(data_in.data, 'r', label='raw') plt.plot(tr.data, 'k', label='despiked') plt.legend() plt.show() return tr
def test_write_correlations(self): """ Test that the write_correlations function works as it should. Hard to test accurately... """ max_shift_len = 0.2 testing_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'test_data', 'REA', 'TEST_') wavbase = os.path.join(os.path.abspath(os.path.dirname(__file__)), 'test_data', 'WAV', 'TEST_') sfile_list = glob.glob(os.path.join(testing_path, '*L.S??????')) event_ids = list(range(len(sfile_list))) event_list = list(zip(event_ids, sfile_list)) with Timer() as t: write_correlations(event_list, wavbase, extract_len=2, pre_pick=0.5, shift_len=max_shift_len, lowcut=2.0, highcut=10.0, max_sep=1, min_link=8, cc_thresh=0.0, plotvar=False) msg = 'Running ' + str(len(list(event_list))) + \ ' events took %s s' % t.secs print(msg) self.assertTrue(os.path.isfile('dt.cc')) # Generate a complementary dt.ct file and check against that write_catalog(event_list=event_list, max_sep=1, min_link=8) cc = open('dt.cc', 'r') cc_pairs = [] observations = [] pair = cc.readline().split()[1:3] for line in cc: if line[0] == '#': # Append old observations to the previous pair and put in pairs cc_pairs.append({'pair': pair, 'observations': observations}) pair = line.split()[1:3] observations = [] else: obs = line.split() observations.append({'station': obs[0], 'diff_time': float(obs[1]), 'weight': float(obs[2]), 'phase': obs[3]}) cc.close() ct = open('dt.ct', 'r') ct_pairs = [] observations = [] pair = ct.readline().split()[1:3] for line in ct: if line[0] == '#': # Append old observations to the previous pair and put in pairs ct_pairs.append({'pair': pair, 'observations': observations}) pair = line.split()[1:3] observations = [] else: obs = line.split() # for sub in line.split('-'): # for item in sub.split(): # obs.append(item) observations.append({'station': obs[0], 'diff_time': float(obs[1]) - float(obs[2]), 'weight': float(obs[3]), 'phase': obs[4]}) ct.close() # Everything is in memory, now we need to find matching pairs for cc_pair in cc_pairs: for ct_pair in ct_pairs: if cc_pair['pair'] == ct_pair['pair']: for cc_obs in cc_pair['observations']: for ct_obs in ct_pair['observations']: if cc_obs['station'] == ct_obs['station'] and\ cc_obs['phase'] == ct_obs['phase']: corr_correction = abs(ct_obs['diff_time'] - cc_obs['diff_time']) self.assertTrue(corr_correction < max_shift_len) os.remove('dt.cc') os.remove('dt.ct') os.remove('phase.dat') if os.path.isfile('dt.cc2'): os.remove('dt.cc2') if os.path.isfile('dt.ct2'): os.remove('dt.ct2')
def _channel_loop(templates, stream, cores=1, debug=0): r""" Loop to generate cross channel correaltion sums for a series of templates\ hands off the actual correlations to a sister function which can be run in\ parallel. :type templates: :class: 'obspy.Stream' :param templates: A list of templates, where each one should be an\ obspy.Stream object containing multiple traces of seismic data and the\ relevant header information. :param stream: A single obspy.Stream object containing daylong seismic\ data to be correlated through using the templates. This is in effect the\ image. :type core: int :param core: Number of cores to loop over :type debug: int :param debug: Debug level. :return: New list of :class: 'numpy.array' objects. These will contain\ the correlation sums for each template for this day of data. :return: list of ints as number of channels used for each cross-correlation """ import time from multiprocessing import Pool from eqcorrscan.utils.timer import Timer num_cores = cores if len(templates) < num_cores: num_cores = len(templates) if 'cccs_matrix' in locals(): del cccs_matrix # Initialize cccs_matrix, which will be two arrays of len(templates) arrays # where the arrays cccs_matrix[0[:]] will be the cross channel sum for each # template. # Note: This requires all templates to be the same length, and all channels # to be the same length cccs_matrix = np.array([ np.array([ np.array([0.0] * (len(stream[0].data) - len(templates[0][0].data) + 1)) ] * len(templates)) ] * 2) # Initialize number of channels array no_chans = np.array([0] * len(templates)) for tr in stream: tr_data = tr.data station = tr.stats.station channel = tr.stats.channel if debug >= 1: print "Starting parallel run for station " + station + " channel " +\ channel tic = time.clock() with Timer() as t: # Send off to sister function pool = Pool(processes=num_cores, maxtasksperchild=None) results = [ pool.apply_async(_template_loop, args=(templates[i], tr_data, station, channel, debug, i)) for i in range(len(templates)) ] pool.close() if debug >= 1: print "--------- TIMER: Correlation loop took: %s s" % t.secs print " I have " + str(len(results)) + " results" with Timer() as t: cccs_list = [p.get() for p in results] pool.join() if debug >= 1: print "--------- TIMER: Getting results took: %s s" % t.secs with Timer() as t: # Sort by placeholder returned from _template_loop cccs_list.sort(key=lambda tup: tup[0]) if debug >= 1: print "--------- TIMER: Sorting took: %s s" % t.secs with Timer() as t: cccs_list = [ccc[1] for ccc in cccs_list] if debug >= 1: print "--------- TIMER: Extracting arrays took: %s s" % t.secs if debug >= 3: print 'cccs_list is shaped: ' + str(np.shape(cccs_list)) with Timer() as t: cccs = np.concatenate(cccs_list, axis=0) if debug >= 1: print "--------- TIMER: cccs_list conversion: %s s" % t.secs del cccs_list if debug >= 2: print 'After looping through templates the cccs is shaped: ' +\ str(np.shape(cccs)) print 'cccs is using: ' + str(cccs.nbytes / 1000000) +\ ' MB of memory' cccs_matrix[1] = np.reshape(cccs, (1, len(templates), max(np.shape(cccs)))) del cccs if debug >= 2: print 'cccs_matrix shaped: ' + str(np.shape(cccs_matrix)) print 'cccs_matrix is using ' + str(cccs_matrix.nbytes / 1000000) +\ ' MB of memory' # Now we have an array of arrays with the first dimensional index # giving the channel, the second dimensional index giving the # template and the third dimensional index giving the position # in the ccc, e.g.: # np.shape(cccsums)=(len(stream), len(templates), len(ccc)) if debug >= 2: print 'cccs_matrix as a np.array is shaped: ' +\ str(np.shape(cccs_matrix)) # First work out how many channels were used for i in range(0, len(templates)): if not np.all(cccs_matrix[1][i] == 0): # Check that there are some real numbers in the vector rather # than being all 0, which is the default case for no match # of image and template names no_chans[i] += 1 # Now sum along the channel axis for each template to give the # cccsum values for each template for each day with Timer() as t: cccsums = cccs_matrix.sum(axis=0).astype(np.float32) if debug >= 1: print "--------- TIMER: Summing took %s s" % t.secs if debug >= 2: print 'cccsums is shaped thus: ' + str(np.shape(cccsums)) cccs_matrix[0] = cccsums del cccsums toc = time.clock() if debug >= 1: print "--------- TIMER: Trace loop took " + str(toc - tic) +\ " s" if debug >= 2: print 'cccs_matrix is shaped: ' + str(np.shape(cccs_matrix)) cccsums = cccs_matrix[0] return cccsums, no_chans
def _template_loop(template, chan, station, channel, debug=0, i=0): r"""Sister loop to handle the correlation of a single template (of multiple\ channels) with a single channel of data. :type template: obspy.Stream :type chan: np.array :type station: string :type channel: string :type i: int :param i: Optional argument, used to keep track of which process is being\ run. :returns: tuple of (i,ccc) with ccc as an ndarray .. rubric:: Note ..This function currently assumes only one template-channel per\ data-channel, while this is normal for a standard matched-filter routine,\ if we wanted to impliment a subspace detector, this would be the function\ to change, I think. E.g. where I currently take only the first matching\ channel, we could loop through all the matching channels and then sum the\ correlation sums - however I don't really understand how you detect based\ on that. More reading of the Harris document required. """ from eqcorrscan.utils.timer import Timer ccc = np.array([np.nan] * (len(chan) - len(template[0].data) + 1), dtype=np.float16) ccc = ccc.reshape((1, len(ccc))) # Set default value for # cross-channel correlation in case there are no data that match our # channels. with Timer() as t: # While each bit of this loop isn't slow, looping through the if # statement when I don't need to adds up, I should work this out # earlier template_data = template.select(station=station, channel=channel) # I will for now assume that you only have one template per-channel template_data = template_data[0] delay = template_data.stats.starttime - \ template.sort(['starttime'])[0].stats.starttime pad = np.array([0] * int(round(delay * template_data.stats.sampling_rate))) image = np.append(chan, pad)[len(pad):] ccc = (normxcorr2(template_data.data, image)) ccc = ccc.astype(np.float16) # Convert to float16 to save memory for large problems - lose some # accuracy which will affect detections very close to threshold if debug >= 2 and t.secs > 4: print "Single if statement took %s s" % t.secs if not template_data: print "Didn't even correlate!" print station + ' ' + channel elif debug >= 2: print "If statement without correlation took %s s" % t.secs if debug >= 3: print '********* DEBUG: ' + station + '.' +\ channel + ' ccc MAX: ' + str(np.max(ccc[0])) print '********* DEBUG: ' + station + '.' +\ channel + ' ccc MEAN: ' + str(np.mean(ccc[0])) if np.isinf(np.mean(ccc[0])): warnings.warn('Mean of ccc is infinite, check!') if debug >= 3: np.save('inf_cccmean_ccc.npy', ccc[0]) np.save('inf_cccmean_template.npy', template_data.data) np.save('inf_cccmean_image.npy', image) if debug >= 3: print 'shape of ccc: ' + str(np.shape(ccc)) print 'A single ccc is using: ' + str(ccc.nbytes / 1000000) + 'MB' print 'ccc type is: ' + str(type(ccc)) if debug >= 3: print 'shape of ccc: ' + str(np.shape(ccc)) print "Parallel worker " + str(i) + " complete" return (i, ccc)
def test_write_correlations(self): """ Test that the write_correlations function works as it should. Hard to test accurately... """ max_shift_len = 0.2 with Timer() as t: write_correlations(self.event_list, self.wavbase, extract_len=2, pre_pick=0.5, shift_len=max_shift_len, lowcut=2.0, highcut=10.0, max_sep=self.maximum_separation, min_link=self.minimum_links, cc_thresh=0.0, plotvar=False) msg = 'Running ' + str(len(list(self.event_list))) + \ ' events took %s s' % t.secs print(msg) self.assertTrue(os.path.isfile('dt.cc')) cc = open('dt.cc', 'r') cc_pairs = [] observations = [] pair = cc.readline().split()[1:3] for line in cc: if line[0] == '#': # Append old observations to the previous pair and put in pairs cc_pairs.append({'pair': pair, 'observations': observations}) pair = line.split()[1:3] observations = [] else: obs = line.split() observations.append({ 'station': obs[0], 'diff_time': float(obs[1]), 'weight': float(obs[2]), 'phase': obs[3] }) cc.close() ct = open('dt.ct', 'r') ct_pairs = [] observations = [] pair = ct.readline().split()[1:3] for line in ct: if line[0] == '#': # Append old observations to the previous pair and put in pairs ct_pairs.append({'pair': pair, 'observations': observations}) pair = line.split()[1:3] observations = [] else: obs = line.split() # for sub in line.split('-'): # for item in sub.split(): # obs.append(item) observations.append({ 'station': obs[0], 'diff_time': float(obs[1]) - float(obs[2]), 'weight': float(obs[3]), 'phase': obs[4] }) ct.close() # Everything is in memory, now we need to find matching pairs for cc_pair in cc_pairs: for ct_pair in ct_pairs: if cc_pair['pair'] == ct_pair['pair']: for cc_obs in cc_pair['observations']: for ct_obs in ct_pair['observations']: if cc_obs['station'] == ct_obs['station'] and\ cc_obs['phase'] == ct_obs['phase']: corr_correction = abs(ct_obs['diff_time'] - cc_obs['diff_time']) self.assertTrue( corr_correction < max_shift_len)
def median_filter(tr, multiplier=10, windowlength=0.5, interp_len=0.05, debug=0): """ Filter out spikes in data above a multiple of MAD of the data. Currently only has the ability to replaces spikes with linear interpolation. In the future we would aim to fill the gap with something more appropriate. Works in-place on data. :type tr: obspy.core.trace.Trace :param tr: trace to despike :type multiplier: float :param multiplier: median absolute deviation multiplier to find spikes above. :type windowlength: float :param windowlength: Length of window to look for spikes in in seconds. :type interp_len: float :param interp_len: Length in seconds to interpolate around spikes. :type debug: int :param debug: Debug output level between 0 and 5, higher is more output. :returns: :class:`obspy.core.trace.Trace` .. warning:: Not particularly effective, and may remove earthquake signals, use with caution. """ num_cores = cpu_count() if debug >= 1: data_in = tr.copy() # Note - might be worth finding spikes in filtered data filt = tr.copy() filt.detrend('linear') filt.filter('bandpass', freqmin=10.0, freqmax=(tr.stats.sampling_rate / 2) - 1) data = filt.data del (filt) # Loop through windows _windowlength = int(windowlength * tr.stats.sampling_rate) _interp_len = int(interp_len * tr.stats.sampling_rate) peaks = [] with Timer() as t: pool = Pool(processes=num_cores) results = [ pool.apply_async(_median_window, args=(data[chunk * _windowlength:(chunk + 1) * _windowlength], chunk * _windowlength, multiplier, tr.stats.starttime + windowlength, tr.stats.sampling_rate, debug)) for chunk in range(int(len(data) / _windowlength)) ] pool.close() for p in results: peaks += p.get() pool.join() for peak in peaks: tr.data = _interp_gap(tr.data, peak[1], _interp_len) print("Despiking took: %s s" % t.secs) if debug >= 1: plt.plot(data_in.data, 'r', label='raw') plt.plot(tr.data, 'k', label='despiked') plt.legend() plt.show() return tr
def bench( n_templates: Iterable, n_channels: int, process_length: float, template_length: float, sampling_rate: float, reruns: int = 3, outfile: str = None, ): """ Benchmark the matched-filter detection process for a given configuration. Profiles time and memory use - note that this only profiles the EQcorrscan Tribe.detect method, not the full real-time process. Use this to give you an idea of how many templates you can run within real-time. Parameters ---------- n_templates Numbers of templates to profile using n_channels Number of channels of data to profile using process_length Length of data to profile using in seconds template_length Length of templates in seconds sampling_rate Sampling-rate in Hz reruns Number of times to re-run profiling - the average of these runs will be reported """ import matplotlib.pyplot as plt timings, memory = dict(), dict() for template_set in n_templates: tribe = make_synthetic_tribe(n_channels=n_channels, n_templates=template_set, process_length=process_length, template_length=template_length, sampling_rate=sampling_rate) st = make_synthetic_data( n_channels=n_channels, length=process_length, sampling_rate=sampling_rate, nslc=list({tuple(tr.id.split('.')) for tr in tribe[0].st})) print(f"Running for {template_set} templates") time = 0.0 mem = 0.0 for _ in range(reruns): with Timer() as t: mem_use = memory_usage(proc=(tribe.detect, (), dict(stream=Stream(st), threshold=8, threshold_type="MAD", trig_int=2, parallel_process=False)), interval=0.05, multiprocess=True, include_children=True) time += t.secs mem += max(mem_use) time /= reruns mem /= reruns print(f"It took {time:.3f} s and used {mem:.3f} MB to run " f"{template_set} templates") timings.update({template_set: time}) memory.update({template_set: mem}) fig = plot_time_memory(timings, memory, process_length=process_length, show=False) fig.suptitle( f"RTEQcorrscan benchmark: {n_channels} channels of {process_length} " f"s\n{psutil.cpu_count()} CPU cores, max clock: " f"{psutil.cpu_freq().max} Hz") plt.show() # Reshape for output times_mems = { key: { "time": timings[key], "memory": memory[key] } for key in timings.keys() } now = UTCDateTime.now().strftime("%Y%m%dT%H%M%S") outfile = outfile or f"rteqcorrscan-bench_{now}" with open(outfile, "w") as f: json.dump(fp=f, obj=times_mems) print(f"Written results to {outfile}")
def _template_loop(template, chan, station, channel, do_subspace=False, debug=0, i=0): r"""Sister loop to handle the correlation of a single template (of \ multiple channels) with a single channel of data. :type template: obspy.Stream or list of obspy.Stream if subspace is True :type chan: np.array :type station: string :type channel: string :type do_subspace: bool :param do_subspace: Flag for running subspace detection. Defaults to False. :type i: int :param i: Optional argument, used to keep track of which process is being \ run. :returns: tuple of (i, ccc) with ccc as an ndarray """ from eqcorrscan.utils.timer import Timer from eqcorrscan.core import subspace if do_subspace: temp_len = len(template[0][0].data) else: temp_len = len(template[0].data) cstat = np.array([np.nan] * (len(chan) - temp_len + 1), dtype=np.float16) cstat = cstat.reshape((1, len(cstat))) # Set default value for # cross-channel correlation in case there are no data that match our # channels. with Timer() as t: # While each bit of this loop isn't slow, looping through the if # statement when I don't need to adds up, I should work this out # earlier if do_subspace: sin_vecs = [st.select(station=station, channel=channel)[0].data for st in template if len(st.select(station=station, channel=channel)) != 0] # Convert trace data to np array detector = np.asarray(sin_vecs) cstat = subspace.det_statistic(detector, data=chan) cstat = cstat.reshape((1, len(cstat))) # Do not convert subspace statistic to float16 due to overrunning # 16 bit precision in the mean calculation. np.isinf(np.mean())=T # cstat = cstat.astype(np.float16) else: template_data = template.select(station=station, channel=channel) # I will for now assume that you only have one template per-channel template_data = template_data[0] delay = template_data.stats.starttime - \ template.sort(['starttime'])[0].stats.starttime pad = np.array([0] * int(round(delay * template_data.stats.sampling_rate))) image = np.append(chan, pad)[len(pad):] cstat = (normxcorr2(template_data.data, image)) cstat = cstat.astype(np.float16) # Convert to float16 to save memory for large problems - lose some # accuracy which will affect detections very close to threshold # # There is an interesting issue found in the tests that sometimes what # should be a perfect correlation results in a max of ccc of 0.99999994 # Converting to float16 'corrects' this to 1.0 - bad workaround. if debug >= 3: print('********* DEBUG: ' + station + '.' + channel + ' ccc MAX: ' + str(np.max(cstat[0]))) print('********* DEBUG: ' + station + '.' + channel + ' ccc MEAN: ' + str(np.mean(cstat[0]))) if np.isinf(np.mean(cstat[0])): warnings.warn('Mean of ccc is infinite, check!') if debug >= 3: np.save('inf_cccmean_ccc_%02d.npy' % i, cstat[0]) if do_subspace: np.save('inf_cccmean_template_%02d.npy' % i, sin_vecs) np.save('inf_cccmean_image_%02d.npy' % i, chan) else: np.save('inf_cccmean_template_%02d.npy' % i, template_data.data) np.save('inf_cccmean_image_%02d.npy' % i, image) ccc = np.zeros(len(ccc)) ccc = ccc.reshape((1, len(ccc))) # Returns zeros if debug >= 3: print('shape of ccc: ' + str(np.shape(cstat))) print('A single ccc is using: ' + str(cstat.nbytes / 1000000) + 'MB') print('ccc type is: ' + str(type(cstat))) if debug >= 3: print('shape of ccc: ' + str(np.shape(cstat))) print("Parallel worker " + str(i) + " complete") return (i, cstat)
def _channel_loop(templates, stream, cores=1, debug=0): """ Internal loop for parallel processing. Loop to generate cross channel correlation sums for a series of templates hands off the actual correlations to a sister function which can be run in parallel. :type templates: list :param templates: A list of templates, where each one should be an obspy.Stream object containing multiple traces of seismic data and the relevant header information. :type stream: obspy.core.stream.Stream :param stream: A single Stream object to be correlated with the templates. This is in effect the image in normxcorr2 and cv2. :type cores: int :param cores: Number of cores to loop over :type debug: int :param debug: Debug level. :returns: New list of :class:`numpy.ndarray` objects. These will contain the correlation sums for each template for this day of data. :rtype: list :returns: list of ints as number of channels used for each cross-correlation. :rtype: list :returns: list of list of tuples of station, channel for all cross-correlations. :rtype: list .. Note:: Each template must contain the same channels as every other template, the stream must also contain the same channels (note that if there are duplicate channels in the template you do not need duplicate channels in the stream). """ num_cores = cores if len(templates) < num_cores: num_cores = len(templates) # Initialize cccs_matrix, which will be two arrays of len(templates) arrays # where the arrays cccs_matrix[0[:]] will be the cross channel sum for each # template. # Note: This requires all templates to be the same length, and all channels # to be the same length temp_len = len(templates[0][0].data) cccs_matrix = np.array([np.array([np.array([0.0] * (len(stream[0].data) - temp_len + 1))] * len(templates))] * 2, dtype=np.float32) # Initialize number of channels array no_chans = np.array([0] * len(templates)) chans = [[] for _ in range(len(templates))] # Match-filter enforces that each template is the same length... for stream_ind in range(len(templates[0])): station = templates[0][stream_ind].stats.station channel = templates[0][stream_ind].stats.channel tr = stream.select(station=station, channel=channel)[0] if debug >= 1: print("Starting parallel run for station " + station + " channel " + channel) tic = time.clock() with Timer() as t: # Send off to sister function pool = Pool(processes=num_cores) results = [pool.apply_async(_template_loop, args=(templates[i], tr.data, stream_ind, debug, i)) for i in range(len(templates))] pool.close() if debug >= 1: print("--------- TIMER: Correlation loop took: %s s" % t.secs) print(" I have " + str(len(results)) + " results") with Timer() as t: cccs_list = [p.get() for p in results] pool.join() if debug >= 1: print("--------- TIMER: Getting results took: %s s" % t.secs) with Timer() as t: # Sort by placeholder returned from _template_loop cccs_list.sort(key=lambda tup: tup[0]) if debug >= 1: print("--------- TIMER: Sorting took: %s s" % t.secs) with Timer() as t: cccs_list = [ccc[1] for ccc in cccs_list] if debug >= 1: print("--------- TIMER: Extracting arrays took: %s s" % t.secs) if debug >= 3: print('cccs_list is shaped: ' + str(np.shape(cccs_list))) with Timer() as t: cccs = np.concatenate(cccs_list, axis=0) if debug >= 1: print("--------- TIMER: cccs_list conversion: %s s" % t.secs) del cccs_list if debug >= 2: print('After looping through templates the cccs is shaped: ' + str(np.shape(cccs))) print('cccs is using: ' + str(cccs.nbytes / 1000000) + ' MB of memory') cccs_matrix[1] = np.reshape(cccs, (1, len(templates), max(np.shape(cccs)))) del cccs if debug >= 2: print('cccs_matrix shaped: ' + str(np.shape(cccs_matrix))) print('cccs_matrix is using ' + str(cccs_matrix.nbytes / 1000000) + ' MB of memory') # Now we have an array of arrays with the first dimensional index # giving the channel, the second dimensional index giving the # template and the third dimensional index giving the position # in the ccc, e.g.: # np.shape(cccsums)=(len(stream), len(templates), len(ccc)) if debug >= 2: print('cccs_matrix as a np.array is shaped: ' + str(np.shape(cccs_matrix))) # First work out how many channels were used for i in range(0, len(templates)): if not np.all(cccs_matrix[1][i] == 0): # Check that there are some real numbers in the vector rather # than being all 0, which is the default case for no match # of image and template names no_chans[i] += 1 chans[i].append((tr.stats.station, tr.stats.channel)) # Now sum along the channel axis for each template to give the # cccsum values for each template for each day with Timer() as t: cccsums = cccs_matrix.sum(axis=0).astype(np.float32) if debug >= 1: print("--------- TIMER: Summing took %s s" % t.secs) if debug >= 2: print('cccsums is shaped thus: ' + str(np.shape(cccsums))) cccs_matrix[0] = cccsums del cccsums toc = time.clock() if debug >= 1: print("--------- TIMER: Trace loop took " + str(toc - tic) + " s") if debug >= 2: print('cccs_matrix is shaped: ' + str(np.shape(cccs_matrix))) cccsums = cccs_matrix[0] return cccsums, no_chans, chans