def brightness(stations, nodes, lags, stream, threshold, thresh_type, template_length, template_saveloc, coherence_thresh, coherence_stations=['all'], coherence_clip=False, gap=2.0, clip_level=100, instance=0, pre_pick=0.2, plotvar=False, plotsave=True, cores=1, debug=0, mem_issue=False): """ Calculate the brightness function for a single day. Written to calculate the brightness function for a single day of data, using moveouts from a 3D travel-time grid. .. Note:: Data in stream must be all of the same length and have the same sampling rates, see :func:`eqcorrscan.utils.pre_processing.dayproc` :type stations: list :param stations: List of station names from in the form where stations[i] refers to nodes[i][:] and lags[i][:] :type nodes: list :param nodes: List of node points where nodes[i] refers to stations[i] and nodes[:][:][0] is latitude in degrees, nodes[:][:][1] is longitude in degrees, nodes[:][:][2] is depth in km. :type lags: numpy.ndarray :param lags: Array of arrays where lags[i][:] refers to stations[i]. lags[i][j] should be the delay to the nodes[i][j] for stations[i] in seconds. :type stream: obspy.core.stream.Stream :param stream: Data through which to look for detections. :type threshold: float :param threshold: Threshold value for detection of template within the brightness function. :type thresh_type: str :param thresh_type: Either MAD or abs where MAD is the Median Absolute Deviation and abs is an absolute brightness. :type template_length: float :param template_length: Length of template to extract in seconds :type template_saveloc: str :param template_saveloc: Path of where to save the templates. :type coherence_thresh: tuple :param coherence_thresh: Threshold for removing incoherent peaks in the network response, those below this will not be used as templates. Must be in the form of (a,b) where the coherence is given by: :math:`a-kchan/b` where kchan is the number of channels used to compute the coherence. :type coherence_stations: list :param coherence_stations: List of stations to use in the coherence thresholding - defaults to `all` which uses all the stations. :type coherence_clip: tuple :param coherence_clip: Start and end in seconds of data to window around, defaults to False, which uses all the data given. :type gap: float :param gap: Minimum inter-event time in seconds for detections. :type clip_level: float :param clip_level: Multiplier applied to the mean deviation of the energy as an upper limit, used to remove spikes (earthquakes, lightning, electrical spikes) from the energy stack. :type instance: int :param instance: Optional, used for tracking when using a distributed computing system. :type pre_pick: float :param pre_pick: Seconds before the detection time to include in template :type plotvar: bool :param plotvar: Turn plotting on or off :type plotsave: bool :param plotsave: Save or show plots, if `False` will try and show the plots on screen - as this is designed for bulk use this is set to `True` to save any plots rather than show them if you create them - changes the backend of matplotlib, so if is set to `False` you will see NO PLOTS! :type cores: int :param cores: Number of cores to use, defaults to 1. :type debug: int :param debug: Debug level from 0-5, higher is more output. :type mem_issue: bool :param mem_issue: Set to True to write temporary variables to disk rather than store in memory, slow. :return: list of templates as :class:`obspy.core.stream.Stream` objects :rtype: list """ if plotsave: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt plt.ioff() from eqcorrscan.utils import plotting from eqcorrscan.utils.debug_log import debug_print # Check that we actually have the correct stations realstations = [] for station in stations: st = stream.select(station=station) if st: realstations += station del st stream_copy = stream.copy() # Force convert to int16 for tr in stream_copy: # int16 max range is +/- 32767 if max(abs(tr.data)) > 32767: tr.data = 32767 * (tr.data / max(abs(tr.data))) # Make sure that the data aren't clipped it they are high gain # scale the data tr.data = tr.data.astype(np.int16) # The internal _node_loop converts energy to int16 too to conserve memory, # to do this it forces the maximum of a single energy trace to be 500 and # normalises to this level - this only works for fewer than 65 channels of # data if len(stream_copy) > 130: raise BrightnessError( 'Too many streams, either re-code and cope with either more memory' ' usage, or less precision, or reduce data volume') # Loop through each node in the input # Linear run print('Computing the energy stacks') # Parallel run num_cores = cores if num_cores > len(nodes): num_cores = len(nodes) if num_cores > cpu_count(): num_cores = cpu_count() if mem_issue and not os.path.isdir('tmp' + str(instance)): os.makedirs('tmp' + str(instance)) pool = Pool(processes=num_cores) results = [ pool.apply_async( _node_loop, (stations, ), { 'lags': lags[:, i], 'stream': stream, 'i': i, 'clip_level': clip_level, 'mem_issue': mem_issue, 'instance': instance }) for i in range(len(nodes)) ] pool.close() if not mem_issue: print('Computing the cumulative network response from memory') energy = [p.get() for p in results] pool.join() energy.sort(key=lambda tup: tup[0]) energy = [node[1] for node in energy] energy = np.concatenate(energy, axis=0) print(energy.shape) else: pool.join() del results # Now compute the cumulative network response and then detect possible # events if not mem_issue: print(energy.shape) indices = np.argmax(energy, axis=0) # Indices of maximum energy print(indices.shape) cum_net_resp = np.array([np.nan] * len(indices)) cum_net_resp[0] = energy[indices[0]][0] peak_nodes = [nodes[indices[0]]] for i in range(1, len(indices)): cum_net_resp[i] = energy[indices[i]][i] peak_nodes.append(nodes[indices[i]]) del energy, indices else: print('Reading the temp files and computing network response') node_splits = int(len(nodes) // num_cores) print(node_splits) indices = [] for i in range(num_cores): indices.append( list(np.arange(node_splits * i, node_splits * (i + 1)))) indices[-1] += list(np.arange(node_splits * (i + 1), len(nodes))) # results = [_cum_net_resp(node_lis=indices[i], instance=instance) # for i in range(num_cores)] pool = Pool(processes=num_cores) results = [ pool.apply_async(_cum_net_resp, args=(indices[i], instance)) for i in range(num_cores) ] pool.close() results = [p.get() for p in results] pool.join() responses = [result[0] for result in results] print(np.shape(responses)) node_indices = [result[1] for result in results] cum_net_resp = np.array(responses) indices = np.argmax(cum_net_resp, axis=0) print(indices.shape) print(cum_net_resp.shape) cum_net_resp = np.array( [cum_net_resp[indices[i]][i] for i in range(len(indices))]) peak_nodes = [ nodes[node_indices[indices[i]][i]] for i in range(len(indices)) ] del indices, node_indices if plotvar: cum_net_trace = Stream( Trace(data=cum_net_resp, header=Stats({ 'station': 'NR', 'channel': '', 'network': 'Z', 'location': '', 'starttime': stream[0].stats.starttime, 'sampling_rate': stream[0].stats.sampling_rate }))) cum_net_trace += stream.select(channel='*N') cum_net_trace += stream.select(channel='*1') cum_net_trace.sort(['network', 'station', 'channel']) # Find detection within this network response print('Finding detections in the cumulative network response') detections = _find_detections(cum_net_resp, peak_nodes, threshold, thresh_type, stream[0].stats.sampling_rate, realstations, gap) del cum_net_resp templates = [] nodesout = [] good_detections = [] if detections: print('Converting detections into templates') # Generate a catalog of detections # detections_cat = Catalog() for j, detection in enumerate(detections): debug_print( 'Converting for detection %i of %i' % (j, len(detections)), 3, debug) # Create an event for each detection event = Event() # Set up some header info for the event event.event_descriptions.append(EventDescription()) event.event_descriptions[0].text = 'Brightness detection' event.creation_info = CreationInfo(agency_id='EQcorrscan') copy_of_stream = deepcopy(stream_copy) # Convert detections to obspy.core.event type - # name of detection template is the node. node = (detection.template_name.split('_')[0], detection.template_name.split('_')[1], detection.template_name.split('_')[2]) # Look up node in nodes and find the associated lags index = nodes.index( (float(node[0]), float(node[1]), float(node[2]))) detect_lags = lags[:, index] ksta = Comment(text='Number of stations=' + str(len(detect_lags))) event.origins.append(Origin()) event.origins[0].comments.append(ksta) event.origins[0].time = copy_of_stream[0].stats.starttime +\ detect_lags[0] + detection.detect_time event.origins[0].latitude = node[0] event.origins[0].longitude = node[1] event.origins[0].depth = node[2] for i, detect_lag in enumerate(detect_lags): station = stations[i] st = copy_of_stream.select(station=station) if len(st) != 0: for tr in st: _waveform_id = WaveformStreamID( station_code=tr.stats.station, channel_code=tr.stats.channel, network_code=tr.stats.network) event.picks.append( Pick(waveform_id=_waveform_id, time=tr.stats.starttime + detect_lag + detection.detect_time + pre_pick, onset='emergent', evalutation_mode='automatic')) debug_print('Generating template for detection: %i' % j, 0, debug) template = template_gen(picks=event.picks, st=copy_of_stream, length=template_length, swin='all') template_name = template_saveloc + '/' +\ str(template[0].stats.starttime) + '.ms' # In the interests of RAM conservation we write then read # Check coherency here! temp_coher, kchan = coherence(template, coherence_stations, coherence_clip) coh_thresh = float(coherence_thresh[0]) - kchan / \ float(coherence_thresh[1]) coherent = False if temp_coher > coh_thresh: template.write(template_name, format="MSEED") print('Written template as: ' + template_name) print('---------------------------------coherence LEVEL: ' + str(temp_coher)) coherent = True debug_print( 'Template was incoherent, coherence level: ' + str(temp_coher), 0, debug) coherent = False del copy_of_stream, tr, template if coherent: templates.append(obsread(template_name)) nodesout += [node] good_detections.append(detection) debug_print('No template for you', 0, debug) # detections_cat += event if plotvar: good_detections = [(cum_net_trace[-1].stats.starttime + detection.detect_time).datetime for detection in good_detections] if not plotsave: plotting.NR_plot(cum_net_trace[0:-1], Stream(cum_net_trace[-1]), detections=good_detections, size=(18.5, 10), title='Network response') # cum_net_trace.plot(size=(800,600), equal_scale=False) else: savefile = 'plots/' +\ cum_net_trace[0].stats.starttime.datetime.strftime('%Y%m%d') +\ '_NR_timeseries.pdf' plotting.NR_plot(cum_net_trace[0:-1], Stream(cum_net_trace[-1]), detections=good_detections, size=(18.5, 10), save=True, savefile=savefile, title='Network response') nodesout = list(set(nodesout)) return templates, nodesout
def brightness(stations, nodes, lags, stream, threshold, thresh_type, template_length, template_saveloc, coherence_thresh, coherence_stations=['all'], coherence_clip=False, gap=2.0, clip_level=100, instance=0, pre_pick=0.2, plotsave=True, cores=1): r"""Function to calculate the brightness function in terms of energy for \ a day of data over the entire network for a given grid of nodes. Note data in stream must be all of the same length and have the same sampling rates. :type stations: list :param stations: List of station names from in the form where stations[i] \ refers to nodes[i][:] and lags[i][:] :type nodes: list, tuple :param nodes: List of node points where nodes[i] referes to stations[i] \ and nodes[:][:][0] is latitude in degrees, nodes[:][:][1] is \ longitude in degrees, nodes[:][:][2] is depth in km. :type lags: :class: 'numpy.array' :param lags: Array of arrays where lags[i][:] refers to stations[i]. \ lags[i][j] should be the delay to the nodes[i][j] for stations[i] in \ seconds. :type stream: :class: `obspy.Stream` :param data: Data through which to look for detections. :type threshold: float :param threshold: Threshold value for detection of template within the \ brightness function :type thresh_type: str :param thresh_type: Either MAD or abs where MAD is the Median Absolute \ Deviation and abs is an absoulte brightness. :type template_length: float :param template_length: Length of template to extract in seconds :type template_saveloc: str :param template_saveloc: Path of where to save the templates. :type coherence_thresh: tuple of floats :param coherence_thresh: Threshold for removing incoherant peaks in the \ network response, those below this will not be used as templates. \ Must be in the form of (a,b) where the coherence is given by: \ a-kchan/b where kchan is the number of channels used to compute \ the coherence :type coherence_stations: list :param coherence_stations: List of stations to use in the coherance \ thresholding - defaults to 'all' which uses all the stations. :type coherence_clip: float :param coherence_clip: tuple :type coherence_clip: Start and end in seconds of data to window around, \ defaults to False, which uses all the data given. :type pre_pick: float :param pre_pick: Seconds before the detection time to include in template :type plotsave: bool :param plotsave: Save or show plots, if False will try and show the plots \ on screen - as this is designed for bulk use this is set to \ True to save any plots rather than show them if you create \ them - changes the backend of matplotlib, so if is set to \ False you will see NO PLOTS! :type cores: int :param core: Number of cores to use, defaults to 1. :type clip_level: float :param clip_level: Multiplier applied to the mean deviation of the energy \ as an upper limit, used to remove spikes (earthquakes, \ lightning, electircal spikes) from the energy stack. :type gap: float :param gap: Minimum inter-event time in seconds for detections :return: list of templates as :class: `obspy.Stream` objects """ from eqcorrscan.core.template_gen import _template_gen if plotsave: import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt plt.ioff() # from joblib import Parallel, delayed from multiprocessing import Pool, cpu_count from copy import deepcopy from obspy import read as obsread from obspy.core.event import Catalog, Event, Pick, WaveformStreamID, Origin from obspy.core.event import EventDescription, CreationInfo, Comment import obspy.Stream import matplotlib.pyplot as plt from eqcorrscan.utils import plotting # Check that we actually have the correct stations realstations = [] for station in stations: st = stream.select(station=station) if st: realstations += station del st stream_copy = stream.copy() # Force convert to int16 for tr in stream_copy: # int16 max range is +/- 32767 if max(abs(tr.data)) > 32767: tr.data = 32767 * (tr.data / max(abs(tr.data))) # Make sure that the data aren't clipped it they are high gain # scale the data tr.data = tr.data.astype(np.int16) # The internal _node_loop converts energy to int16 too to converse memory, # to do this it forces the maximum of a single energy trace to be 500 and # normalises to this level - this only works for fewer than 65 channels of # data if len(stream_copy) > 130: raise OverflowError('Too many streams, either re-code and cope with' + 'either more memory usage, or less precision, or' + 'reduce data volume') detections = [] detect_lags = [] parallel = True plotvar = True mem_issue = False # Loop through each node in the input # Linear run print('Computing the energy stacks') if not parallel: for i in range(0, len(nodes)): print(i) if not mem_issue: j, a = _node_loop(stations, lags[:, i], stream, plot=True) if 'energy' not in locals(): energy = a else: energy = np.concatenate((energy, a), axis=0) print('energy: ' + str(np.shape(energy))) else: j, filename = _node_loop(stations, lags[:, i], stream, i, mem_issue) energy = np.array(energy) print(np.shape(energy)) else: # Parallel run num_cores = cores if num_cores > len(nodes): num_cores = len(nodes) if num_cores > cpu_count(): num_cores = cpu_count() pool = Pool(processes=num_cores) results = [ pool.apply_async(_node_loop, args=(stations, lags[:, i], stream, i, clip_level, mem_issue, instance)) for i in range(len(nodes)) ] pool.close() if not mem_issue: print('Computing the cumulative network response from memory') energy = [p.get() for p in results] pool.join() energy.sort(key=lambda tup: tup[0]) energy = [node[1] for node in energy] energy = np.concatenate(energy, axis=0) print(energy.shape) else: pool.join() # Now compute the cumulative network response and then detect possible # events if not mem_issue: print(energy.shape) indeces = np.argmax(energy, axis=0) # Indeces of maximum energy print(indeces.shape) cum_net_resp = np.array([np.nan] * len(indeces)) cum_net_resp[0] = energy[indeces[0]][0] peak_nodes = [nodes[indeces[0]]] for i in range(1, len(indeces)): cum_net_resp[i] = energy[indeces[i]][i] peak_nodes.append(nodes[indeces[i]]) del energy, indeces else: print('Reading the temp files and computing network response') node_splits = int(len(nodes) // num_cores) indeces = [range(node_splits)] for i in range(1, num_cores - 1): indeces.append(range(node_splits * i, node_splits * (i + 1))) indeces.append(range(node_splits * (i + 1), len(nodes))) pool = Pool(processes=num_cores) results = [ pool.apply_async(_cum_net_resp, args=(indeces[i], instance)) for i in range(num_cores) ] pool.close() results = [p.get() for p in results] pool.join() responses = [result[0] for result in results] print(np.shape(responses)) node_indeces = [result[1] for result in results] cum_net_resp = np.array(responses) indeces = np.argmax(cum_net_resp, axis=0) print(indeces.shape) print(cum_net_resp.shape) cum_net_resp = np.array( [cum_net_resp[indeces[i]][i] for i in range(len(indeces))]) peak_nodes = [ nodes[node_indeces[indeces[i]][i]] for i in range(len(indeces)) ] del indeces, node_indeces if plotvar: cum_net_trace = deepcopy(stream[0]) cum_net_trace.data = cum_net_resp cum_net_trace.stats.station = 'NR' cum_net_trace.stats.channel = '' cum_net_trace.stats.network = 'Z' cum_net_trace.stats.location = '' cum_net_trace.stats.starttime = stream[0].stats.starttime cum_net_trace = obspy.Stream(cum_net_trace) cum_net_trace += stream.select(channel='*N') cum_net_trace += stream.select(channel='*1') cum_net_trace.sort(['network', 'station', 'channel']) # np.save('cum_net_resp.npy',cum_net_resp) # cum_net_trace.plot(size=(800,600), equal_scale=False,\ # outfile='NR_timeseries.eps') # Find detection within this network response print('Finding detections in the cumulatve network response') detections = _find_detections(cum_net_resp, peak_nodes, threshold, thresh_type, stream[0].stats.sampling_rate, realstations, gap) del cum_net_resp templates = [] nodesout = [] good_detections = [] if detections: print('Converting detections in to templates') # Generate a catalog of detections detections_cat = Catalog() for j, detection in enumerate(detections): print('Converting for detection ' + str(j) + ' of ' + str(len(detections))) # Create an event for each detection event = Event() # Set up some header info for the event event.event_descriptions.append(EventDescription()) event.event_descriptions[0].text = 'Brightness detection' event.creation_info = CreationInfo(agency_id='EQcorrscan') copy_of_stream = deepcopy(stream_copy) # Convert detections to obspy.core.event type - # name of detection template is the node. node = (detection.template_name.split('_')[0], detection.template_name.split('_')[1], detection.template_name.split('_')[2]) print(node) # Look up node in nodes and find the associated lags index = nodes.index(node) detect_lags = lags[:, index] ksta = Comment(text='Number of stations=' + len(detect_lags)) event.origins.append(Origin()) event.origins[0].comments.append(ksta) event.origins[0].time = copy_of_stream[0].stats.starttime +\ detect_lags[0] + detection.detect_time event.origins[0].latitude = node[0] event.origins[0].longitude = node[1] event.origins[0].depth = node[2] for i, detect_lag in enumerate(detect_lags): station = stations[i] st = copy_of_stream.select(station=station) if len(st) != 0: for tr in st: _waveform_id = WaveformStreamID( station_code=tr.stats.station, channel_code=tr.stats.channel, network_code='NA') event.picks.append( Pick(waveform_id=_waveform_id, time=tr.stats.starttime + detect_lag + detection.detect_time + pre_pick, onset='emergent', evalutation_mode='automatic')) print('Generating template for detection: ' + str(j)) template = (_template_gen(event.picks, copy_of_stream, template_length, 'all')) template_name = template_saveloc + '/' +\ str(template[0].stats.starttime) + '.ms' # In the interests of RAM conservation we write then read # Check coherancy here! temp_coher, kchan = coherence(template, coherence_stations, coherence_clip) coh_thresh = float(coherence_thresh[0]) - kchan / \ float(coherence_thresh[1]) if temp_coher > coh_thresh: template.write(template_name, format="MSEED") print('Written template as: ' + template_name) print('---------------------------------coherence LEVEL: ' + str(temp_coher)) coherant = True else: print('Template was incoherant, coherence level: ' + str(temp_coher)) coherant = False del copy_of_stream, tr, template if coherant: templates.append(obsread(template_name)) nodesout += [node] good_detections.append(detection) else: print('No template for you') if plotvar: all_detections = [(cum_net_trace[-1].stats.starttime + detection.detect_time).datetime for detection in detections] good_detections = [(cum_net_trace[-1].stats.starttime + detection.detect_time).datetime for detection in good_detections] if not plotsave: plotting.NR_plot(cum_net_trace[0:-1], obspy.Stream(cum_net_trace[-1]), detections=good_detections, size=(18.5, 10), title='Network response') # cum_net_trace.plot(size=(800,600), equal_scale=False) else: savefile = 'plots/' +\ cum_net_trace[0].stats.starttime.datetime.strftime('%Y%m%d') +\ '_NR_timeseries.pdf' plotting.NR_plot(cum_net_trace[0:-1], obspy.Stream(cum_net_trace[-1]), detections=good_detections, size=(18.5, 10), save=savefile, title='Network response') nodesout = list(set(nodesout)) return templates, nodesout