def remove_unreferenced(catalog: Union[Catalog, Event]) -> Catalog: """ Remove un-referenced arrivals, amplitudes and station_magnitudes. """ if isinstance(catalog, Event): catalog = Catalog([catalog]) catalog_out = Catalog() for _event in catalog: event = _event.copy() pick_ids = {p.resource_id for p in event.picks} # Remove unreferenced arrivals for origin in event.origins: origin.arrivals = [ arr for arr in origin.arrivals if arr.pick_id in pick_ids] # Remove unreferenced amplitudes event.amplitudes = [ amp for amp in event.amplitudes if amp.pick_id in pick_ids] amplitude_ids = {a.resource_id for a in event.amplitudes} # Remove now unreferenced station magnitudes event.station_magnitudes = [ sta_mag for sta_mag in event.station_magnitudes if sta_mag.amplitude_id in amplitude_ids] station_magnitude_ids = { sta_mag.resource_id for sta_mag in event.station_magnitudes} # Remove unreferenced station_magnitude_contributions for magnitude in event.magnitudes: magnitude.station_magnitude_contributions = [ sta_mag_contrib for sta_mag_contrib in magnitude.station_magnitude_contributions if sta_mag_contrib.station_magnitude_id in station_magnitude_ids] catalog_out.append(event) return catalog_out
def test_fail_writing(self): """ Test a deliberate fail. """ test_event = full_test_event() # Add the event to a catalogue which can be used for QuakeML testing test_cat = Catalog() test_cat += test_event test_ev = test_cat[0] test_cat.append(full_test_event()) with self.assertRaises(NordicParsingError): # Raises error due to multiple events in catalog _write_nordic(test_cat, filename=None, userid='TEST', evtype='L', outdir='.', wavefiles='test', explosion=True, overwrite=True) with self.assertRaises(NordicParsingError): # Raises error due to too long userid _write_nordic(test_ev, filename=None, userid='TESTICLE', evtype='L', outdir='.', wavefiles='test', explosion=True, overwrite=True) with self.assertRaises(NordicParsingError): # Raises error due to unrecognised event type _write_nordic(test_ev, filename=None, userid='TEST', evtype='U', outdir='.', wavefiles='test', explosion=True, overwrite=True) with self.assertRaises(NordicParsingError): # Raises error due to no output directory _write_nordic(test_ev, filename=None, userid='TEST', evtype='L', outdir='albatross', wavefiles='test', explosion=True, overwrite=True) invalid_origin = test_ev.copy() invalid_origin.origins = [] with self.assertRaises(NordicParsingError): _write_nordic(invalid_origin, filename=None, userid='TEST', evtype='L', outdir='.', wavefiles='test', explosion=True, overwrite=True) invalid_origin = test_ev.copy() invalid_origin.origins[0].time = None with self.assertRaises(NordicParsingError): _write_nordic(invalid_origin, filename=None, userid='TEST', evtype='L', outdir='.', wavefiles='test', explosion=True, overwrite=True) # Write a near empty origin valid_origin = test_ev.copy() valid_origin.origins[0].latitude = None valid_origin.origins[0].longitude = None valid_origin.origins[0].depth = None with NamedTemporaryFile() as tf: _write_nordic(valid_origin, filename=tf.name, userid='TEST', evtype='L', outdir='.', wavefiles='test', explosion=True, overwrite=True) self.assertTrue(os.path.isfile(tf.name))
def test_fail_writing(self): """ Test a deliberate fail. """ test_event = full_test_event() # Add the event to a catalogue which can be used for QuakeML testing test_cat = Catalog() test_cat += test_event test_ev = test_cat[0] test_cat.append(full_test_event()) with self.assertRaises(NordicParsingError): # Raises error due to multiple events in catalog _write_nordic(test_cat, filename=None, userid='TEST', evtype='L', outdir='.', wavefiles='test', explosion=True, overwrite=True) with self.assertRaises(NordicParsingError): # Raises error due to too long userid _write_nordic(test_ev, filename=None, userid='TESTICLE', evtype='L', outdir='.', wavefiles='test', explosion=True, overwrite=True) with self.assertRaises(NordicParsingError): # Raises error due to unrecognised event type _write_nordic(test_ev, filename=None, userid='TEST', evtype='U', outdir='.', wavefiles='test', explosion=True, overwrite=True) with self.assertRaises(NordicParsingError): # Raises error due to no output directory _write_nordic(test_ev, filename=None, userid='TEST', evtype='L', outdir='albatross', wavefiles='test', explosion=True, overwrite=True) invalid_origin = test_ev.copy() invalid_origin.origins = [] with self.assertRaises(NordicParsingError): _write_nordic(invalid_origin, filename=None, userid='TEST', evtype='L', outdir='.', wavefiles='test', explosion=True, overwrite=True) invalid_origin = test_ev.copy() invalid_origin.origins[0].time = None with self.assertRaises(NordicParsingError): _write_nordic(invalid_origin, filename=None, userid='TEST', evtype='L', outdir='.', wavefiles='test', explosion=True, overwrite=True) # Write a near empty origin valid_origin = test_ev.copy() valid_origin.origins[0].latitude = None valid_origin.origins[0].longitude = None valid_origin.origins[0].depth = None with NamedTemporaryFile() as tf: _write_nordic(valid_origin, filename=tf.name, userid='TEST', evtype='L', outdir='.', wavefiles='test', explosion=True, overwrite=True) self.assertTrue(os.path.isfile(tf.name))
def write(self, filename, compress=True, catalog_format="QUAKEML"): """ Write the tribe to a file using tar archive formatting. :type filename: str :param filename: Filename to write to, if it exists it will be appended to. :type compress: bool :param compress: Whether to compress the tar archive or not, if False then will just be files in a folder. :type catalog_format: str :param catalog_format: What format to write the detection-catalog with. Only Nordic, SC3ML, QUAKEML are supported. Note that not all information is written for all formats (QUAKEML is the most complete, but is slow for IO). .. rubric:: Example >>> tribe = Tribe(templates=[Template(name='c', st=read())]) >>> tribe.write('test_tribe') Tribe of 1 templates """ from eqcorrscan.core.match_filter import CAT_EXT_MAP if catalog_format not in CAT_EXT_MAP.keys(): raise TypeError("{0} is not supported".format(catalog_format)) dirname, ext = os.path.splitext(filename) if not os.path.isdir(dirname): os.makedirs(dirname) self._par_write(dirname) tribe_cat = Catalog() for t in self.templates: if t.event is not None: # Check that the name in the comment matches the template name for comment in t.event.comments: if comment.text and comment.text.startswith( "eqcorrscan_template_"): comment.text = "eqcorrscan_template_{0}".format(t.name) tribe_cat.append(t.event) if len(tribe_cat) > 0: tribe_cat.write( os.path.join(dirname, 'tribe_cat.{0}'.format( CAT_EXT_MAP[catalog_format])), format=catalog_format) for template in self.templates: template.st.write( os.path.join(dirname, '{0}.ms'.format(template.name)), format='MSEED') if compress: if not filename.endswith(".tgz"): Logger.info("Appending '.tgz' to filename.") filename += ".tgz" with tarfile.open(filename, "w:gz") as tar: tar.add(dirname, arcname=os.path.basename(dirname)) shutil.rmtree(dirname) return self
def read_quakemigrate(run_dir, units, run_subname="", local_mag_ph="S"): """ Reads the .event and .picks outputs, and .amps outputs if available, from a QuakeMigrate run into an obspy Catalog object. NOTE: if a station_corrections dict was used to calculate the network-averaged local magnitude, this information will not be included in the obspy event object. There might therefore be a discrepancy between the mean of the StationMagnitudes and the event magnitude. Parameters ---------- run_dir : str Path to QuakeMigrate run directory. units : {"km", "m"} Grid projection coordinates for QM LUT (determines units of depths and uncertainties in the .event files). run_subname : str, optional Run_subname string (if applicable). local_mag_ph : {"S", "P"}, optional Amplitude measurement used to calculate local magnitudes. (Default "S") Returns ------- cat : `obspy.Catalog` object Catalog containing events in the specified QuakeMigrate run directory. """ locate_dir = pathlib.Path(run_dir) / "locate" / run_subname events_dir = locate_dir / "events" if events_dir.is_dir(): try: event_files = events_dir.glob("*.event") first = next(event_files) event_files = chain([first], event_files) except StopIteration: pass cat = Catalog() for eventf in event_files: event = _read_single_event(eventf, locate_dir, units, local_mag_ph) if event is None: continue else: cat.append(event) cat.creation_info.creation_time = UTCDateTime() cat.creation_info.version = "ObsPy %s" % __version__ return cat
def remove_bad_picks(cat, st_dev): # For removing events with 1 or more bad picks filtered_cat = Catalog() for event in cat: pref_o = event.preferred_origin() bad_arrivals = [x for x in pref_o.arrivals if x.time_residual > st_dev] if bad_arrivals: del bad_arrivals continue else: filtered_cat.append(event) del bad_arrivals
def space_cluster(catalog, d_thresh, show=True): """ Cluster a catalog by distance only. Will compute the\ matrix of physical distances between events and utilize the\ scipy.clusering.hierarchy module to perform the clustering. :type catalog: obspy.Catalog :param catalog: Catalog of events to clustered :type d_thresh: float :param d_thresh: Maximum inter-event distance threshold :returns: list of Catalog classes """ from scipy.spatial.distance import squareform from scipy.cluster.hierarchy import linkage, dendrogram, fcluster import matplotlib.pyplot as plt from obspy import Catalog # Compute the distance matrix and linkage dist_mat = dist_mat_km(catalog) dist_vec = squareform(dist_mat) Z = linkage(dist_vec, method='average') # Cluster the linkage using the given threshold as the cutoff indices = fcluster(Z, t=d_thresh, criterion='distance') group_ids = list(set(indices)) indices = [(indices[i], i) for i in range(len(indices))] if show: # Plot the dendrogram...if it's not way too huge dendrogram(Z, color_threshold=d_thresh, distance_sort='ascending') plt.show() # Sort by group id indices.sort(key=lambda tup: tup[0]) groups = [] for group_id in group_ids: group = Catalog() for ind in indices: if ind[0] == group_id: group.append(catalog[ind[1]]) elif ind[0] > group_id: # Because we have sorted by group id, when the index is greater # than the group_id we can break the inner loop. # Patch applied by CJC 05/11/2015 groups.append(group) break groups.append(group) return groups
def space_cluster(catalog, d_thresh, show=True): """ Cluster a catalog by distance only. Will compute the\ matrix of physical distances between events and utilize the\ scipy.clustering.hierarchy module to perform the clustering. :type catalog: obspy.Catalog :param catalog: Catalog of events to clustered :type d_thresh: float :param d_thresh: Maximum inter-event distance threshold :returns: list of Catalog classes """ from scipy.spatial.distance import squareform from scipy.cluster.hierarchy import linkage, dendrogram, fcluster import matplotlib.pyplot as plt from obspy import Catalog # Compute the distance matrix and linkage dist_mat = dist_mat_km(catalog) dist_vec = squareform(dist_mat) Z = linkage(dist_vec, method='average') # Cluster the linkage using the given threshold as the cutoff indices = fcluster(Z, t=d_thresh, criterion='distance') group_ids = list(set(indices)) indices = [(indices[i], i) for i in range(len(indices))] if show: # Plot the dendrogram...if it's not way too huge dendrogram(Z, color_threshold=d_thresh, distance_sort='ascending') plt.show() # Sort by group id indices.sort(key=lambda tup: tup[0]) groups = [] for group_id in group_ids: group = Catalog() for ind in indices: if ind[0] == group_id: group.append(catalog[ind[1]]) elif ind[0] > group_id: # Because we have sorted by group id, when the index is greater # than the group_id we can break the inner loop. # Patch applied by CJC 05/11/2015 groups.append(group) break groups.append(group) return groups
def get_catalog(detections): """ Generate an :class:`obspy.core.event.Catalog` from list of \ :class:`DETECTION`'s. :type detections: list :param detections: list of :class:`eqcorrscan.core.match_filter.DETECTION` :returns: Catalog of detected events. :rtype: :class:`obspy.core.event.Catalog` .. warning:: Will only work if the detections have an event associated with them. This will not be the case if detections have been written to csv format using :func:`eqcorrscan.core.match_filter.DETECTION.write` and read back in. """ catalog = Catalog() for detection in detections: catalog.append(detection.event) return catalog
def cat_stat_ev_avg(cat): filtered_cat = Catalog() avg_arr_res = [] for event in cat: pref_o = event.preferred_origin() # Calculate average arrival time residual for origin avg_arr_res.append( sum([x.time_residual for i, x in enumerate(pref_o.arrivals)]) / i) mean_avg_ev_res = np.mean(avg_arr_res) std_avg_ev_res = np.std(avg_arr_res) print('Catalog mean avg event arr. time residual of: %0.3f' % mean_avg_ev_res) print('Catalog st_dev avg event arr residual of: %0.3f' % std_avg_ev_res) for event in cat: pref_o = event.preferred_origin() avg_arr_res = sum( [x.time_residual for i, x in enumerate(pref_o.arrivals)]) / i if avg_arr_res < std_avg_ev_res: filtered_cat.append(event) else: continue return filtered_cat, avg_arr_res, mean_avg_ev_res, std_avg_ev_res
# Make picks for detections from template picks det_picks = [] for p in template_event.picks: delay_template = p.time - min_template_starttime det_pick_time = detect_time + delay_template pick = Pick(time=det_pick_time, phase_hint=p.phase_hint, waveform_id=p.waveform_id) det_picks.append(pick) # figure out origin time for detection pick1_temp = template_event.picks[0] origin_det = template_event.origins[0].copy() pick1_det = [ p for p in det_picks if p.waveform_id == pick1_temp.waveform_id ][0] origin_det.time = pick1_det.time - (pick1_temp.time - template_event.origins[0].time) # Create and save event for detection event = Event(picks=det_picks, origins=[origin_det]) event.preferred_origin_id = event.origins[0].resource_id catalog.append(event) catalog_dir = os.path.join(os.getcwd(), "families_events") catalog_fname = "catalog_" + family_name.split(".")[0] + ".xml" catalog_file = os.path.join(catalog_dir, catalog_fname) Logger.info("Now writing catalogue to file %s" % catalog_file) catalog.write(catalog_file, format="QUAKEML")
def relative_mag_calc(cat, template_dict, n_SVs=4, plot=False, debug=1): """ Now we're going to loop through templates, filter out poorly correlated waveforms, compute SVD and relative magnitudes using EQcorrscan functions, then map relative mags to real magnitudes using template local magnitudes """ from eqcorrscan.utils.plotting import multi_trace_plot from obspy.core.event import ResourceIdentifier import matplotlib.pyplot as plt # Assign shifts for detections to template dictionary new_cat = Catalog() # Random sample of template ids for plotting samp_ids = [ id for i, id in enumerate(template_dict.keys()) if i in np.random.choice( range(len(template_dict)), len(template_dict) // 20, replace=False) ] for tid, det_dict in template_dict.iteritems(): # Perform some checks on the dictionary first if len(det_dict) <= 1: print('%s has <= one detection. No magnitude will be calculated.' % str(tid)) continue else: print('Working on detections for template: %s' % str(tid)) if 'self' not in [ str(key).split('/')[-1].split('_')[-1] for key in det_dict.keys() ]: print( 'Self detection not located in catalog. Moving to next template.' ) continue inds = det_dict['aligned_inds'] stream_list = det_dict['aligned'] # Do SVD if len(stream_list) <= n_SVs: warnings.warn( 'Fewer streams then nSVs passed to SVD. Moving to next template' ) continue svd_dict = clustering.svd(stream_list, full=True) if plot: if tid in samp_ids: for stachan in svd_dict: if 'svectors' in svd_dict[stachan]: if len(svd_dict[stachan]['svectors']) > 0: if len(svd_dict[stachan]['events']) < 5: # we will not plot stachans with only one event continue fig, axes = plt.subplots(len( svd_dict[stachan]['events']), 1, sharex=True, figsize=(14, 24), squeeze=False) first_SV = svd_dict[stachan]['svectors'][0] first_SVal = svd_dict[stachan]['svalues'][0] for i, ev_ind in enumerate( svd_dict[stachan]['events']): data_tr = stream_list[ev_ind].select( station=stachan.split('.')[0], channel=stachan.split('.')[1])[0] samp_rate = data_tr.stats.sampling_rate SV_y = first_SV * first_SVal SV_x = np.arange(len(SV_y)) SV_x = SV_x / samp_rate dat_y = data_tr.data U_wt = np.matrix( copy.deepcopy( svd_dict[stachan]['uvectors'])) svd_wts = np.array( U_wt[:, 0]).reshape(-1).tolist() axes[i, 0].plot(SV_x, SV_y * svd_wts[i], color='r') axes[i, 0].plot(SV_x, dat_y, color='k') axes[i, 0].text(0.9, 0.15, str(svd_wts[i]), bbox=dict(facecolor='white', alpha=0.95), transform=axes[i, 0].transAxes) axes[i, 0].text( 0.7, 0.85, data_tr.stats.starttime.datetime. strftime('%Y/%m/%d %H:%M:%S'), bbox=dict(facecolor='white', alpha=0.95), transform=axes[i, 0].transAxes) fig.suptitle('%s\nChannel: %s First SVal: %f' % (str(tid), stachan, first_SVal)) fig.show() # Feed output vectors and values to mag_calc.SVD_moments M, events_out = mag_calc.SVD_moments(svd_dict, n_SVs, debug=debug) # Find rel_amp of self detection try: rel_amp_t = [ M[i] for i, cat_ind in enumerate(inds) if i in events_out and cat_ind == det_dict['temp_ind'] ][0] except: warnings.warn( 'Relative amp not calculated for template in this case....investigate' ) continue # Convert relative values to template values Mls = [ np.log10(rel_amp_i / rel_amp_t) + det_dict['temp_mag'] for rel_amp_i in M ] if len(Mls) != len(events_out): warnings.warn('Not same number of local mags and out events') for i, cat_ind in enumerate(inds): if i in events_out: Mls_ind = [ k for k, ev in enumerate(events_out) if ev == i ][0] if cat_ind == det_dict['temp_ind']: event = cat[cat_ind].copy() event.magnitudes.append( Magnitude(mag=det_dict['temp_mag'], creation_info=(CreationInfo( author='SeisComp')))) new_cat.append(event) else: event = cat[cat_ind].copy() event.magnitudes.append( Magnitude( mag=Mls[Mls_ind], creation_info=(CreationInfo( author= 'eqcorrscan.utils.mag_calc.SVD_moment')))) new_cat.append(event) return new_cat
def filter_events(catalog, method='avg_residual', plot=False): r""" Function to remove events with unsatisfactory picks from a Catalog :type catalog: :class: 'obspy.Catalog' :param catalog: Catalog from which to remove events :type method: str :param method: Method used to determine which events to remove from\ catalog. Options are 'avg_residual' and 'single_arrival'. Defaults to\ 'avg_residual' which will remove events with an average arrival time\ residual outside of one standard deviation accross the catalog.\ 'single_arrival' will remove events which contain one or more arrivals\ with time residuals outside of one standard deviation for all arrival\ time residuals in the entire catalog. :type plot: bool :param plot: If True, will plot the distribution of either average event\ arrival time residual or all arrival time residuals for the catalog,\ depending upon which method is used. :returns: class: obspy.Catalog """ from obspy import Catalog # Extract all arrivals for each preferred origin arr_time_resds = extract_all_residuals(catalog) # Calculate average arrival time residual for all preferred origins avg_arr_res = [] for event in catalog: pref_o = event.preferred_origin() or event.origins[0] # Calculate average arrival time residual for origin avg_arr_res.append(sum([abs(x.time_residual) for i, x in enumerate(pref_o.arrivals)]) / i) # Plot the histograms if plot: f, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharex=True) ax1.set_title('Arrival-time residuals') sns.boxplot(data=arr_time_resds, ax=ax1, orient="h", width=0.1) sns.distplot(arr_time_resds, ax=ax2) ax3.set_title('Event average arrival-time residuals') sns.boxplot(data=avg_arr_res, ax=ax3, orient="h", width=0.1) sns.distplot(avg_arr_res, ax=ax4) plt.show() plt.close() # Creat new, filtered catalog filtered_cat = Catalog() if method == 'avg_residual': mean_avg = np.mean(avg_arr_res) std_avg = np.std(avg_arr_res) for event in catalog: pref_o = event.preferred_origin() or event.origins[0] avg_arr_res = sum([x.time_residual for x in pref_o.arrivals]) / len(pref_o.arrivals) if avg_arr_res < mean_avg + std_avg and\ avg_arr_res > mean_avg - std_avg: filtered_cat.append(event) else: continue elif method == 'single_arrival': mean_res = np.mean(arr_time_resds) std_res = np.std(arr_time_resds) for event in catalog: pref_o = event.preferred_origin() or event.origins[0] bad_arrivals = [x for x in pref_o.arrivals if x.time_residual < mean_res - std_res or x.time_residual > mean_res + std_res] if bad_arrivals: del bad_arrivals continue else: filtered_cat.append(event) return filtered_cat
def origins_pruning(xml_name, output_fn='origenes_preferidos.xml', check_db=False, quadrant="None"): """Delete all origins that are not the prefered origin in a seiscomp event xml file. Returns a xml with origins only Parameters ---------- xml_name : str Name of events type SeisComP3 xml file. output_fn : str Name of output SeisComP3 xml file. """ change_xml_version(xml_name) print( '\n\nRemoving origins that are not the prefered one in the xml %s\n' % xml_name) try: cat = obs.read_events(xml_name, id_prefix='', format='SC3ML') except FileNotFoundError: print('\n\t No existe el archivo %s, se salta este proceso\n' % xml_name) sys.exit(1) cat2 = Catalog() # para acada evento en el xml de eventos for i, ev in enumerate(cat): magnitude = ev.preferred_magnitude().mag pref_orig = cat[i].preferred_origin() if not pass_origin_quality(pref_orig, magnitude): # imprime en rojo que el evento no pasó el filtro de calidad print( f'\033[91m Evento {pref_orig.time} no pasó el filtro de calidad \033[0m' ) continue # Si check_db es True se verifica si el evento ya esta en la base de datos # en caso de que si devuelve True, se elimina el evento del xml if check_db or quadrant != "None": watcher = Watcher(pref_orig) region = ev.event_descriptions[0].text.encode('utf-8') if check_db and watcher.exist_in_db(): print( f'\n\n\t El evento\033[91m {pref_orig.time} - {region}\033[0m ya existe en la base de datos, se elimina del xml\n\n' ) continue if quadrant != "None" and not watcher.check_in_region(quadrant): print(f'region {region}') print( f'\n\n\t El evento\033[91m {pref_orig.time} : {pref_orig.latitude}, {pref_orig.longitude} : {region}\033[0m fuera del cuadrante {quadrant}, se elimina del xml\n\n' ) continue del cat[i].origins[:-1] cat2.append(cat[i]) # se escribe xml con solo los orígenes preferidos cat2.write(output_fn, format='SC3ML', validate=True, event_removal=True, verbose=True) remove_id_prefix(output_fn) print( '\n\tArchivo con origenes preferidos para migrar a SeisComP3:\n\n\t %s\n' % output_fn)
def read_nlloc_hyp(filename, coordinate_converter=None, picks=None, **kwargs): """ Reads a NonLinLoc Hypocenter-Phase file to a :class:`~obspy.core.event.Catalog` object. .. note:: Coordinate conversion from coordinate frame of NonLinLoc model files / location run to WGS84 has to be specified explicitly by the user if necessary. .. note:: An example can be found on the :mod:`~obspy.io.nlloc` submodule front page in the documentation pages. :param filename: File or file-like object in text mode. :type coordinate_converter: func :param coordinate_converter: Function to convert (x, y, z) coordinates of NonLinLoc output to geographical coordinates and depth in meters (longitude, latitude, depth in kilometers). If left ``None``, the geographical coordinates in the "GEOGRAPHIC" line of NonLinLoc output are used. The function should accept three arguments x, y, z (each of type :class:`numpy.ndarray`) and return a tuple of three :class:`numpy.ndarray` (lon, lat, depth in kilometers). :type picks: list of :class:`~obspy.core.event.Pick` :param picks: Original picks used to generate the NonLinLoc location. If provided, the output event will include the original picks and the arrivals in the output origin will link to them correctly (with their ``pick_id`` attribute). If not provided, the output event will include (the rather basic) pick information that can be reconstructed from the NonLinLoc hypocenter-phase file. :rtype: :class:`~obspy.core.event.Catalog` """ if not hasattr(filename, "read"): # Check if it exists, otherwise assume its a string. try: with open(filename, "rb") as fh: data = fh.read() data = data.decode("UTF-8") except Exception: try: data = filename.decode("UTF-8") except Exception: data = str(filename) data = data.strip() else: data = filename.read() if hasattr(data, "decode"): data = data.decode("UTF-8") # split lines and remove empty ones lines = [line for line in data.splitlines() if line.strip()] # remember picks originally used in location, if provided original_picks = picks if original_picks is None: original_picks = [] cat = Catalog() lines_start = [i for i, line in enumerate(lines) if line.startswith("NLLOC ")] lines_end = [i for i, line in enumerate(lines) if line.startswith("END_NLLOC")] if len(lines_start) != len(lines_end): msg = ("NLLOC HYP file '{}' seems corrupt, number of 'NLLOC' lines " "does not match number of 'END_NLLOC' lines").format(filename) raise Exception(msg) start_end_indices = [] for start, end in zip(lines_start, lines_end): start_end_indices.append(start) start_end_indices.append(end) if any(np.diff(start_end_indices) < 1): msg = ("NLLOC HYP file '{}' seems corrupt, inconsistent " "positioning of 'NLLOC' and 'END_NLLOC' lines " "detected.").format(filename) raise Exception(msg) for start, end in zip(lines_start, lines_end): event = _read_single_hypocenter( lines[start:end + 1], coordinate_converter=coordinate_converter, original_picks=original_picks) cat.append(event) cat.creation_info.creation_time = UTCDateTime() cat.creation_info.version = "ObsPy %s" % __version__ return cat
# Extract just the for group in groups: if len(group) > 7: big_group_ids.append(list(zip(*group)[1])) big_group_streams.append(list(zip(*group)[0])) for i, group_ids in enumerate(big_group_ids): file_names = '/home/chet/data/mrp_data/catalogs/2015/final/thresh_' +\ str(corr_thresh) + '_group_' + str(i) temp_cat = Catalog() with open(file_names + '.csv', 'wb') as f: csvwriter = csv.writer(f, delimiter=',') for event in cat: ev_name = str(event.resource_id).split('/')[-1:][0] if ev_name in group_ids: x = str(event.preferred_origin().longitude) y = str(event.preferred_origin().latitude) z = str(event.preferred_origin().depth) csvwriter.writerow([x, y, z]) temp_cat.append(event) temp_cat.write(file_names + '.shp', format="SHAPEFILE") # Below we'll plot picks over templates for given indices ev_id = '2015sora495962' res_id = ResourceIdentifier('smi:org.gfz-potsdam.de/geofon/2015sora495962') for event in cat: if event.resource_id == res_id: test_ev = event for i, group_id in enumerate(big_group_ids): if group_id == ev_id: pretty_template_plot(big_group_streams[i], picks=test_ev.picks)
'spacegrp_%s_mat.npy' % grp_num groups = clustering.cluster(template_list, corr_thresh=0.30, allow_shift=True, shift_len=25, save_corrmat=True, cores=cores, debug=2) for i, grp in enumerate(groups): corrgrp_cat = Catalog() f_name_root = '/media/chet/hdd/seismic/NZ/catalogs/' f_name = 'spacegrp_%s_corrgrp_%03d' % (grp_num, i) for e in cat: for temp_st in grp: if e.resource_id == temp_st[1]: corrgrp_cat.append(e) corrgrp_cat.write(f_name_root + 'qml/corr_groups/1_sec_temps/' + f_name + '.xml', format="QUAKEML") corrgrp_cat.write(f_name_root + 'shp/corr_groups/1_sec_temps/' + f_name + '.shp', format="SHAPEFILE") # Also trying correlation cluster for whole catalog cat = read_events( '/media/chet/hdd/seismic/NZ/catalogs/qml/2015_nlloc_final_run02_group_refined.xml' ) template_list = [(template_dict[ev.resource_id], ev.resource_id) for ev in cat] plt_name = '/media/chet/hdd/seismic/NZ/catalogs/corr_figs/4_sec_temps/' +\ 'entire_cat_cluster_dend_shift25.png' corr_mat = '/media/chet/hdd/seismic/NZ/catalogs/corr_figs/1_sec_temps/' +\
from obspy import read_events, Catalog #Make list of catalog parts cat_list = glob('/home/chet/data/mrp_data/sherburn_catalog/quake-ml/' + 'rotnga/final_cat/*part*') #Search through all events in catalog and output list of names new_cat = Catalog() for catalog in cat_list: #Read in catalog cat = read_events(catalog) for event in cat: lat = event.preferred_origin().latitude lon = event.preferred_origin().longitude if lat > -38.661 and lat < -38.483 and lon > 176.094 and lon < 176.296: new_cat.append(event) else: print('Event outside bounding box...') #Write catalog to various formats #VELEST format new_cat.write( '/home/chet/data/mrp_data/catalogs/2015/final/cnv/rotnga2015.cnv', format="CNV") #Shapefile new_cat.write('/home/chet/data/mrp_data/catalogs/2015/final/shp/rotnga2015', format="SHAPEFILE") #Loop to write single event NLLOC files for event in new_cat: ev_name = str(event.resource_id).split('/')[2] event.write('/home/chet/data/mrp_data/catalogs/2015/final/nlloc/' +
def space_cluster(catalog, d_thresh, show=True): """ Cluster a catalog by distance only. Will compute the matrix of physical distances between events and utilize the :mod:`scipy.clustering.hierarchy` module to perform the clustering. :type catalog: obspy.core.event.Catalog :param catalog: Catalog of events to clustered :type d_thresh: float :param d_thresh: Maximum inter-event distance threshold :returns: list of :class:`obspy.core.event.Catalog` objects :rtype: list >>> from eqcorrscan.utils.clustering import space_cluster >>> from obspy.clients.fdsn import Client >>> from obspy import UTCDateTime >>> client = Client("NCEDC") >>> starttime = UTCDateTime("2002-01-01") >>> endtime = UTCDateTime("2002-02-01") >>> cat = client.get_events(starttime=starttime, endtime=endtime, ... minmagnitude=2) >>> groups = space_cluster(catalog=cat, d_thresh=2, show=False) >>> from eqcorrscan.utils.clustering import space_cluster >>> from obspy.clients.fdsn import Client >>> from obspy import UTCDateTime >>> client = Client("https://earthquake.usgs.gov") >>> starttime = UTCDateTime("2002-01-01") >>> endtime = UTCDateTime("2002-02-01") >>> cat = client.get_events(starttime=starttime, endtime=endtime, ... minmagnitude=6) >>> groups = space_cluster(catalog=cat, d_thresh=1000, show=False) """ # Compute the distance matrix and linkage dist_mat = dist_mat_km(catalog) dist_vec = squareform(dist_mat) Z = linkage(dist_vec, method='average') # Cluster the linkage using the given threshold as the cutoff indices = fcluster(Z, t=d_thresh, criterion='distance') group_ids = list(set(indices)) indices = [(indices[i], i) for i in range(len(indices))] if show: # Plot the dendrogram...if it's not way too huge dendrogram(Z, color_threshold=d_thresh, distance_sort='ascending') plt.show() # Sort by group id indices.sort(key=lambda tup: tup[0]) groups = [] for group_id in group_ids: group = Catalog() for ind in indices: if ind[0] == group_id: group.append(catalog[ind[1]]) elif ind[0] > group_id: # Because we have sorted by group id, when the index is greater # than the group_id we can break the inner loop. # Patch applied by CJC 05/11/2015 groups.append(group) break groups.append(group) return groups
def match_filter(template_names, template_list, st, threshold, threshold_type, trig_int, plot=False, plotdir=None, xcorr_func=None, concurrency=None, cores=None, plot_format='png', output_cat=False, output_event=True, extract_detections=False, arg_check=True, full_peaks=False, peak_cores=None, spike_test=True, **kwargs): """ Main matched-filter detection function. Over-arching code to run the correlations of given templates with a day of seismic data and output the detections based on a given threshold. For a functional example see the tutorials. :type template_names: list :param template_names: List of template names in the same order as template_list :type template_list: list :param template_list: A list of templates of which each template is a :class:`obspy.core.stream.Stream` of obspy traces containing seismic data and header information. :type st: :class:`obspy.core.stream.Stream` :param st: A Stream object containing all the data available and required for the correlations with templates given. For efficiency this should contain no excess traces which are not in one or more of the templates. This will now remove excess traces internally, but will copy the stream and work on the copy, leaving your input stream untouched. :type threshold: float :param threshold: A threshold value set based on the threshold_type :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD, absolute or av_chan_corr. See Note on thresholding below. :type trig_int: float :param trig_int: Minimum gap between detections from one template in seconds. If multiple detections occur within trig_int of one-another, the one with the highest cross-correlation sum will be selected. :type plot: bool :param plot: Turn plotting on or off :type plotdir: str :param plotdir: Path to plotting folder, plots will be output here, defaults to None, and plots are shown on screen. :type xcorr_func: str or callable :param xcorr_func: A str of a registered xcorr function or a callable for implementing a custom xcorr function. For more information see: :func:`eqcorrscan.utils.correlate.register_array_xcorr` :type concurrency: str :param concurrency: The type of concurrency to apply to the xcorr function. Options are 'multithread', 'multiprocess', 'concurrent'. For more details see :func:`eqcorrscan.utils.correlate.get_stream_xcorr` :type cores: int :param cores: Number of cores to use :type plot_format: str :param plot_format: Specify format of output plots if saved :type output_cat: bool :param output_cat: Specifies if matched_filter will output an obspy.Catalog class containing events for each detection. Default is False, in which case matched_filter will output a list of detection classes, as normal. :type output_event: bool :param output_event: Whether to include events in the Detection objects, defaults to True, but for large cases you may want to turn this off as Event objects can be quite memory intensive. :type extract_detections: bool :param extract_detections: Specifies whether or not to return a list of streams, one stream per detection. :type arg_check: bool :param arg_check: Check arguments, defaults to True, but if running in bulk, and you are certain of your arguments, then set to False. :type full_peaks: bool :param full_peaks: See :func: `eqcorrscan.utils.findpeaks.find_peaks_compiled` :type peak_cores: int :param peak_cores: Number of processes to use for parallel peak-finding (if different to `cores`). :type spike_test: bool :param spike_test: If set True, raise error when there is a spike in data. defaults to True. .. Note:: When using the "fftw" correlation backend the length of the fft can be set. See :mod:`eqcorrscan.utils.correlate` for more info. .. note:: **Returns:** If neither `output_cat` or `extract_detections` are set to `True`, then only the list of :class:`eqcorrscan.core.match_filter.Detection`'s will be output: :return: :class:`eqcorrscan.core.match_filter.Detection` detections for each detection made. :rtype: list If `output_cat` is set to `True`, then the :class:`obspy.core.event.Catalog` will also be output: :return: Catalog containing events for each detection, see above. :rtype: :class:`obspy.core.event.Catalog` If `extract_detections` is set to `True` then the list of :class:`obspy.core.stream.Stream`'s will also be output. :return: list of :class:`obspy.core.stream.Stream`'s for each detection, see above. :rtype: list .. note:: If your data contain gaps these must be padded with zeros before using this function. The `eqcorrscan.utils.pre_processing` functions will provide gap-filled data in the appropriate format. Note that if you pad your data with zeros before filtering or resampling the gaps will not be all zeros after filtering. This will result in the calculation of spurious correlations in the gaps. .. Note:: Detections are not corrected for `pre-pick`, the detection.detect_time corresponds to the beginning of the earliest template channel at detection. .. note:: **Data overlap:** Internally this routine shifts and trims the data according to the offsets in the template (e.g. if trace 2 starts 2 seconds after trace 1 in the template then the continuous data will be shifted by 2 seconds to align peak correlations prior to summing). Because of this, detections at the start and end of continuous data streams **may be missed**. The maximum time-period that might be missing detections is the maximum offset in the template. To work around this, if you are conducting matched-filter detections through long-duration continuous data, we suggest using some overlap (a few seconds, on the order of the maximum offset in the templates) in the continous data. You will then need to post-process the detections (which should be done anyway to remove duplicates). .. note:: **Thresholding:** **MAD** threshold is calculated as the: .. math:: threshold {\\times} (median(abs(cccsum))) where :math:`cccsum` is the cross-correlation sum for a given template. **absolute** threshold is a true absolute threshold based on the cccsum value. **av_chan_corr** is based on the mean values of single-channel cross-correlations assuming all data are present as required for the template, e.g: .. math:: av\_chan\_corr\_thresh=threshold \\times (cccsum\ /\ len(template)) where :math:`template` is a single template from the input and the length is the number of channels within this template. .. note:: The output_cat flag will create an :class:`obspy.core.event.Catalog` containing one event for each :class:`eqcorrscan.core.match_filter.Detection`'s generated by match_filter. Each event will contain a number of comments dealing with correlation values and channels used for the detection. Each channel used for the detection will have a corresponding :class:`obspy.core.event.Pick` which will contain time and waveform information. **HOWEVER**, the user should note that the pick times do not account for the prepick times inherent in each template. For example, if a template trace starts 0.1 seconds before the actual arrival of that phase, then the pick time generated by match_filter for that phase will be 0.1 seconds early. .. Note:: xcorr_func can be used as follows: .. rubric::xcorr_func argument example >>> import obspy >>> import numpy as np >>> from eqcorrscan.core.match_filter.matched_filter import ( ... match_filter) >>> from eqcorrscan.utils.correlate import time_multi_normxcorr >>> # define a custom xcorr function >>> def custom_normxcorr(templates, stream, pads, *args, **kwargs): ... # Just to keep example short call other xcorr function ... # in practice you would define your own function here ... print('calling custom xcorr function') ... return time_multi_normxcorr(templates, stream, pads) >>> # generate some toy templates and stream >>> random = np.random.RandomState(42) >>> template = obspy.read() >>> stream = obspy.read() >>> for num, tr in enumerate(stream): # iter st and embed templates ... data = tr.data ... tr.data = random.randn(6000) * 5 ... tr.data[100: 100 + len(data)] = data >>> # call match_filter ane ensure the custom function is used >>> detections = match_filter( ... template_names=['1'], template_list=[template], st=stream, ... threshold=.5, threshold_type='absolute', trig_int=1, ... plotvar=False, ... xcorr_func=custom_normxcorr) # doctest:+ELLIPSIS calling custom xcorr function... """ from eqcorrscan.core.match_filter.detection import Detection from eqcorrscan.utils.plotting import _match_filter_plot if "plotvar" in kwargs.keys(): Logger.warning("plotvar is depreciated, use plot instead") plot = kwargs.get("plotvar") if arg_check: # Check the arguments to be nice - if arguments wrong type the parallel # output for the error won't be useful if not isinstance(template_names, list): raise MatchFilterError('template_names must be of type: list') if not isinstance(template_list, list): raise MatchFilterError('templates must be of type: list') if not len(template_list) == len(template_names): raise MatchFilterError('Not the same number of templates as names') for template in template_list: if not isinstance(template, Stream): msg = 'template in template_list must be of type: ' + \ 'obspy.core.stream.Stream' raise MatchFilterError(msg) if not isinstance(st, Stream): msg = 'st must be of type: obspy.core.stream.Stream' raise MatchFilterError(msg) if str(threshold_type) not in [ str('MAD'), str('absolute'), str('av_chan_corr') ]: msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr' raise MatchFilterError(msg) for tr in st: if not tr.stats.sampling_rate == st[0].stats.sampling_rate: raise MatchFilterError( 'Sampling rates are not equal %f: %f' % (tr.stats.sampling_rate, st[0].stats.sampling_rate)) for template in template_list: for tr in template: if not tr.stats.sampling_rate == st[0].stats.sampling_rate: raise MatchFilterError('Template sampling rate does not ' 'match continuous data') for template in template_list: for tr in template: if isinstance(tr.data, np.ma.core.MaskedArray): raise MatchFilterError( 'Template contains masked array, split first') if spike_test: Logger.info("Checking for spikes in data") _spike_test(st) if cores is not None: parallel = True else: parallel = False if peak_cores is None: peak_cores = cores # Copy the stream here because we will muck about with it Logger.info("Copying data to keep your input safe") stream = st.copy() templates = [t.copy() for t in template_list] _template_names = template_names.copy() # This can just be a shallow copy Logger.info("Reshaping templates") stream, templates, _template_names = _prep_data_for_correlation( stream=stream, templates=templates, template_names=_template_names) if len(templates) == 0: raise IndexError("No matching data") Logger.info('Starting the correlation run for these data') for template in templates: Logger.debug(template.__str__()) Logger.debug(stream.__str__()) multichannel_normxcorr = get_stream_xcorr(xcorr_func, concurrency) outtic = default_timer() [cccsums, no_chans, chans] = multichannel_normxcorr(templates=templates, stream=stream, cores=cores, **kwargs) if len(cccsums[0]) == 0: raise MatchFilterError('Correlation has not run, zero length cccsum') outtoc = default_timer() Logger.info( 'Looping over templates and streams took: {0:.4f}s'.format(outtoc - outtic)) Logger.debug('The shape of the returned cccsums is: {0}'.format( cccsums.shape)) Logger.debug('This is from {0} templates correlated with {1} channels of ' 'data'.format(len(templates), len(stream))) detections = [] if output_cat: det_cat = Catalog() if str(threshold_type) == str("absolute"): thresholds = [threshold for _ in range(len(cccsums))] elif str(threshold_type) == str('MAD'): thresholds = [ threshold * np.median(np.abs(cccsum)) for cccsum in cccsums ] else: thresholds = [threshold * no_chans[i] for i in range(len(cccsums))] if peak_cores is None: peak_cores = cores outtic = default_timer() all_peaks = multi_find_peaks(arr=cccsums, thresh=thresholds, parallel=parallel, trig_int=int(trig_int * stream[0].stats.sampling_rate), full_peaks=full_peaks, cores=peak_cores) outtoc = default_timer() Logger.info("Finding peaks took {0:.4f}s".format(outtoc - outtic)) for i, cccsum in enumerate(cccsums): if np.abs(np.mean(cccsum)) > 0.05: Logger.warning('Mean is not zero! Check this!') # Set up a trace object for the cccsum as this is easier to plot and # maintains timing if plot: _match_filter_plot(stream=stream, cccsum=cccsum, template_names=_template_names, rawthresh=thresholds[i], plotdir=plotdir, plot_format=plot_format, i=i) if all_peaks[i]: Logger.debug("Found {0} peaks for template {1}".format( len(all_peaks[i]), _template_names[i])) for peak in all_peaks[i]: detecttime = (stream[0].stats.starttime + peak[1] / stream[0].stats.sampling_rate) detection = Detection(template_name=_template_names[i], detect_time=detecttime, no_chans=no_chans[i], detect_val=peak[0], threshold=thresholds[i], typeofdet='corr', chans=chans[i], threshold_type=threshold_type, threshold_input=threshold) if output_cat or output_event: detection._calculate_event(template_st=templates[i]) detections.append(detection) if output_cat: det_cat.append(detection.event) else: Logger.debug("Found 0 peaks for template {0}".format( _template_names[i])) Logger.info("Made {0} detections from {1} templates".format( len(detections), len(templates))) if extract_detections: detection_streams = extract_from_stream(stream, detections) del stream, templates if output_cat and not extract_detections: return detections, det_cat elif not extract_detections: return detections elif extract_detections and not output_cat: return detections, detection_streams else: return detections, det_cat, detection_streams
def match_filter(template_names, template_list, st, threshold, threshold_type, trig_int, plotvar, plotdir='.', cores=1, debug=0, plot_format='png', output_cat=False, extract_detections=False, arg_check=True): """ Main matched-filter detection function. Over-arching code to run the correlations of given templates with a \ day of seismic data and output the detections based on a given threshold. For a functional example see the tutorials. :type template_names: list :param template_names: List of template names in the same order as \ template_list :type template_list: list :param template_list: A list of templates of which each template is a \ Stream of obspy traces containing seismic data and header information. :type st: obspy.core.stream.Stream :param st: A Stream object containing all the data available and \ required for the correlations with templates given. For efficiency \ this should contain no excess traces which are not in one or more of \ the templates. This will now remove excess traces internally, but \ will copy the stream and work on the copy, leaving your input stream \ untouched. :type threshold: float :param threshold: A threshold value set based on the threshold_type :type threshold_type: str :param threshold_type: The type of threshold to be used, can be MAD, \ absolute or av_chan_corr. See Note on thresholding below. :type trig_int: float :param trig_int: Minimum gap between detections in seconds. :type plotvar: bool :param plotvar: Turn plotting on or off :type plotdir: str :param plotdir: Path to plotting folder, plots will be output here, \ defaults to run location. :type cores: int :param cores: Number of cores to use :type debug: int :param debug: Debug output level, the bigger the number, the more the \ output. :type plot_format: str :param plot_format: Specify format of output plots if saved :type output_cat: bool :param output_cat: Specifies if matched_filter will output an \ obspy.Catalog class containing events for each detection. Default \ is False, in which case matched_filter will output a list of \ detection classes, as normal. :type extract_detections: bool :param extract_detections: Specifies whether or not to return a list of \ streams, one stream per detection. :type arg_check: bool :param arg_check: Check arguments, defaults to True, but if running in \ bulk, and you are certain of your arguments, then set to False.\n .. rubric:: If neither `output_cat` or `extract_detections` are set to `True`, then only the list of :class:`eqcorrscan.core.match_filter.DETECTION`'s will be output: :return: :class:`eqcorrscan.core.match_filter.DETECTION`'s detections for each detection made. :rtype: list .. rubric:: If `output_cat` is set to `True`, then the :class:`obspy.core.event.Catalog` will also be output: :return: Catalog containing events for each detection, see above. :rtype: :class:`obspy.core.event.Catalog` .. rubric:: If `extract_detections` is set to `True` then the list of :class:`obspy.core.stream.Stream`'s will also be output. :return: list of :class:`obspy.core.stream.Stream`'s for each detection, see above. :rtype: list .. warning:: Plotting within the match-filter routine uses the Agg backend with interactive plotting turned off. This is because the function is designed to work in bulk. If you wish to turn interactive plotting on you must import matplotlib in your script first, when you them import match_filter you will get the warning that this call to matplotlib has no effect, which will mean that match_filter has not changed the plotting behaviour. .. note:: **Thresholding:** **MAD** threshold is calculated as the: .. math:: threshold {\\times} (median(abs(cccsum))) where :math:`cccsum` is the cross-correlation sum for a given template. **absolute** threshold is a true absolute threshold based on the cccsum value. **av_chan_corr** is based on the mean values of single-channel cross-correlations assuming all data are present as required for the template, e.g: .. math:: av\_chan\_corr\_thresh=threshold \\times (cccsum / len(template)) where :math:`template` is a single template from the input and the length is the number of channels within this template. .. note:: The output_cat flag will create an :class:`obspy.core.eventCatalog` containing one event for each :class:`eqcorrscan.core.match_filter.DETECTION`'s generated by match_filter. Each event will contain a number of comments dealing with correlation values and channels used for the detection. Each channel used for the detection will have a corresponding :class:`obspy.core.event.Pick` which will contain time and waveform information. **HOWEVER**, the user should note that, at present, the pick times do not account for the prepick times inherent in each template. For example, if a template trace starts 0.1 seconds before the actual arrival of that phase, then the pick time generated by match_filter for that phase will be 0.1 seconds early. We are working on a solution that will involve saving templates alongside associated metadata. """ import matplotlib matplotlib.use('Agg') if arg_check: # Check the arguments to be nice - if arguments wrong type the parallel # output for the error won't be useful if not type(template_names) == list: raise MatchFilterError('template_names must be of type: list') if not type(template_list) == list: raise MatchFilterError('templates must be of type: list') if not len(template_list) == len(template_names): raise MatchFilterError('Not the same number of templates as names') for template in template_list: if not type(template) == Stream: msg = 'template in template_list must be of type: ' +\ 'obspy.core.stream.Stream' raise MatchFilterError(msg) if not type(st) == Stream: msg = 'st must be of type: obspy.core.stream.Stream' raise MatchFilterError(msg) if str(threshold_type) not in [str('MAD'), str('absolute'), str('av_chan_corr')]: msg = 'threshold_type must be one of: MAD, absolute, av_chan_corr' raise MatchFilterError(msg) # Copy the stream here because we will muck about with it stream = st.copy() templates = copy.deepcopy(template_list) _template_names = copy.deepcopy(template_names) # Debug option to confirm that the channel names match those in the # templates if debug >= 2: template_stachan = [] data_stachan = [] for template in templates: for tr in template: if isinstance(tr.data, np.ma.core.MaskedArray): raise MatchFilterError('Template contains masked array,' ' split first') template_stachan.append(tr.stats.station + '.' + tr.stats.channel) for tr in stream: data_stachan.append(tr.stats.station + '.' + tr.stats.channel) template_stachan = list(set(template_stachan)) data_stachan = list(set(data_stachan)) if debug >= 3: print('I have template info for these stations:') print(template_stachan) print('I have daylong data for these stations:') print(data_stachan) # Perform a check that the continuous data are all the same length min_start_time = min([tr.stats.starttime for tr in stream]) max_end_time = max([tr.stats.endtime for tr in stream]) longest_trace_length = stream[0].stats.sampling_rate * (max_end_time - min_start_time) for tr in stream: if not tr.stats.npts == longest_trace_length: msg = 'Data are not equal length, padding short traces' warnings.warn(msg) start_pad = np.zeros(int(tr.stats.sampling_rate * (tr.stats.starttime - min_start_time))) end_pad = np.zeros(int(tr.stats.sampling_rate * (max_end_time - tr.stats.endtime))) tr.data = np.concatenate([start_pad, tr.data, end_pad]) # Perform check that all template lengths are internally consistent for i, temp in enumerate(template_list): if len(set([tr.stats.npts for tr in temp])) > 1: msg = ('Template %s contains traces of differing length, this is ' 'not currently supported' % _template_names[i]) raise MatchFilterError(msg) outtic = time.clock() if debug >= 2: print('Ensuring all template channels have matches in long data') template_stachan = {} # Work out what station-channel pairs are in the templates, including # duplicate station-channel pairs. We will use this information to fill # all templates with the same station-channel pairs as required by # _template_loop. for template in templates: stachans_in_template = [] for tr in template: stachans_in_template.append((tr.stats.network, tr.stats.station, tr.stats.location, tr.stats.channel)) stachans_in_template = dict(Counter(stachans_in_template)) for stachan in stachans_in_template.keys(): if stachan not in template_stachan.keys(): template_stachan.update({stachan: stachans_in_template[stachan]}) elif stachans_in_template[stachan] > template_stachan[stachan]: template_stachan.update({stachan: stachans_in_template[stachan]}) # Remove un-matched channels from templates. _template_stachan = copy.deepcopy(template_stachan) for stachan in template_stachan.keys(): if not stream.select(network=stachan[0], station=stachan[1], location=stachan[2], channel=stachan[3]): # Remove stachan from list of dictionary of template_stachans _template_stachan.pop(stachan) # Remove template traces rather than adding NaN data for template in templates: if template.select(network=stachan[0], station=stachan[1], location=stachan[2], channel=stachan[3]): for tr in template.select(network=stachan[0], station=stachan[1], location=stachan[2], channel=stachan[3]): template.remove(tr) template_stachan = _template_stachan # Remove un-needed channels from continuous data. for tr in stream: if not (tr.stats.network, tr.stats.station, tr.stats.location, tr.stats.channel) in \ template_stachan.keys(): stream.remove(tr) # Check for duplicate channels stachans = [(tr.stats.network, tr.stats.station, tr.stats.location, tr.stats.channel) for tr in stream] c_stachans = Counter(stachans) for key in c_stachans.keys(): if c_stachans[key] > 1: msg = ('Multiple channels for %s.%s.%s.%s, likely a data issue' % (key[0], key[1], key[2], key[3])) raise MatchFilterError(msg) # Pad out templates to have all channels for template, template_name in zip(templates, _template_names): if len(template) == 0: msg = ('No channels matching in continuous data for ' + 'template' + template_name) warnings.warn(msg) templates.remove(template) _template_names.remove(template_name) continue for stachan in template_stachan.keys(): number_of_channels = len(template.select(network=stachan[0], station=stachan[1], location=stachan[2], channel=stachan[3])) if number_of_channels < template_stachan[stachan]: missed_channels = template_stachan[stachan] -\ number_of_channels nulltrace = Trace() nulltrace.stats.update( {'network': stachan[0], 'station': stachan[1], 'location': stachan[2], 'channel': stachan[3], 'sampling_rate': template[0].stats.sampling_rate, 'starttime': template[0].stats.starttime}) nulltrace.data = np.array([np.NaN] * len(template[0].data), dtype=np.float32) for dummy in range(missed_channels): template += nulltrace template.sort() # Quick check that this has all worked if len(template) != max([len(t) for t in templates]): raise MatchFilterError('Internal error forcing same template ' 'lengths, report this error.') if debug >= 2: print('Starting the correlation run for this day') if debug >= 4: for template in templates: print(template) print(stream) [cccsums, no_chans, chans] = _channel_loop(templates=templates, stream=stream, cores=cores, debug=debug) if len(cccsums[0]) == 0: raise MatchFilterError('Correlation has not run, zero length cccsum') outtoc = time.clock() print(' '.join(['Looping over templates and streams took:', str(outtoc - outtic), 's'])) if debug >= 2: print(' '.join(['The shape of the returned cccsums is:', str(np.shape(cccsums))])) print(' '.join(['This is from', str(len(templates)), 'templates'])) print(' '.join(['Correlated with', str(len(stream)), 'channels of data'])) detections = [] if output_cat: det_cat = Catalog() for i, cccsum in enumerate(cccsums): template = templates[i] if str(threshold_type) == str('MAD'): rawthresh = threshold * np.median(np.abs(cccsum)) elif str(threshold_type) == str('absolute'): rawthresh = threshold elif str(threshold_type) == str('av_chan_corr'): rawthresh = threshold * no_chans[i] # Findpeaks returns a list of tuples in the form [(cccsum, sample)] print(' '.join(['Threshold is set at:', str(rawthresh)])) print(' '.join(['Max of data is:', str(max(cccsum))])) print(' '.join(['Mean of data is:', str(np.mean(cccsum))])) if np.abs(np.mean(cccsum)) > 0.05: warnings.warn('Mean is not zero! Check this!') # Set up a trace object for the cccsum as this is easier to plot and # maintains timing if plotvar: _match_filter_plot(stream=stream, cccsum=cccsum, template_names=_template_names, rawthresh=rawthresh, plotdir=plotdir, plot_format=plot_format, i=i) if debug >= 4: print(' '.join(['Saved the cccsum to:', _template_names[i], stream[0].stats.starttime.datetime. strftime('%Y%j')])) np.save(_template_names[i] + stream[0].stats.starttime.datetime.strftime('%Y%j'), cccsum) tic = time.clock() if max(cccsum) > rawthresh: peaks = findpeaks.find_peaks2_short( arr=cccsum, thresh=rawthresh, trig_int=trig_int * stream[0].stats.sampling_rate, debug=debug, starttime=stream[0].stats.starttime, samp_rate=stream[0].stats.sampling_rate) else: print('No peaks found above threshold') peaks = False toc = time.clock() if debug >= 1: print(' '.join(['Finding peaks took:', str(toc - tic), 's'])) if peaks: for peak in peaks: detecttime = stream[0].stats.starttime +\ peak[1] / stream[0].stats.sampling_rate # Detect time must be valid QuakeML uri within resource_id. # This will write a formatted string which is still # readable by UTCDateTime rid = ResourceIdentifier(id=_template_names[i] + '_' + str(detecttime. strftime('%Y%m%dT%H%M%S.%f')), prefix='smi:local') ev = Event(resource_id=rid) cr_i = CreationInfo(author='EQcorrscan', creation_time=UTCDateTime()) ev.creation_info = cr_i # All detection info in Comments for lack of a better idea thresh_str = 'threshold=' + str(rawthresh) ccc_str = 'detect_val=' + str(peak[0]) used_chans = 'channels used: ' +\ ' '.join([str(pair) for pair in chans[i]]) ev.comments.append(Comment(text=thresh_str)) ev.comments.append(Comment(text=ccc_str)) ev.comments.append(Comment(text=used_chans)) min_template_tm = min([tr.stats.starttime for tr in template]) for tr in template: if (tr.stats.station, tr.stats.channel) not in chans[i]: continue else: pick_tm = detecttime + (tr.stats.starttime - min_template_tm) wv_id = WaveformStreamID(network_code=tr.stats.network, station_code=tr.stats.station, channel_code=tr.stats.channel) ev.picks.append(Pick(time=pick_tm, waveform_id=wv_id)) detections.append(DETECTION(_template_names[i], detecttime, no_chans[i], peak[0], rawthresh, 'corr', chans[i], event=ev)) if output_cat: det_cat.append(ev) if extract_detections: detection_streams = extract_from_stream(stream, detections) del stream, templates if output_cat and not extract_detections: return detections, det_cat elif not extract_detections: return detections elif extract_detections and not output_cat: return detections, detection_streams else: return detections, det_cat, detection_streams
def read_nlloc_hyp(filename, coordinate_converter=None, picks=None, **kwargs): """ Reads a NonLinLoc Hypocenter-Phase file to a :class:`~obspy.core.event.Catalog` object. .. note:: Coordinate conversion from coordinate frame of NonLinLoc model files / location run to WGS84 has to be specified explicitly by the user if necessary. .. note:: An example can be found on the :mod:`~obspy.io.nlloc` submodule front page in the documentation pages. :param filename: File or file-like object in text mode. :type coordinate_converter: func :param coordinate_converter: Function to convert (x, y, z) coordinates of NonLinLoc output to geographical coordinates and depth in meters (longitude, latitude, depth in kilometers). If left ``None``, the geographical coordinates in the "GEOGRAPHIC" line of NonLinLoc output are used. The function should accept three arguments x, y, z (each of type :class:`numpy.ndarray`) and return a tuple of three :class:`numpy.ndarray` (lon, lat, depth in kilometers). :type picks: list of :class:`~obspy.core.event.Pick` :param picks: Original picks used to generate the NonLinLoc location. If provided, the output event will include the original picks and the arrivals in the output origin will link to them correctly (with their ``pick_id`` attribute). If not provided, the output event will include (the rather basic) pick information that can be reconstructed from the NonLinLoc hypocenter-phase file. :rtype: :class:`~obspy.core.event.Catalog` """ if not hasattr(filename, "read"): # Check if it exists, otherwise assume its a string. try: with open(filename, "rb") as fh: data = fh.read() data = data.decode("UTF-8") except Exception: try: data = filename.decode("UTF-8") except Exception: data = str(filename) data = data.strip() else: data = filename.read() if hasattr(data, "decode"): data = data.decode("UTF-8") # split lines and remove empty ones lines = [line for line in data.splitlines() if line.strip()] # remember picks originally used in location, if provided original_picks = picks if original_picks is None: original_picks = [] cat = Catalog() lines_start = [ i for i, line in enumerate(lines) if line.startswith("NLLOC ") ] lines_end = [ i for i, line in enumerate(lines) if line.startswith("END_NLLOC") ] if len(lines_start) != len(lines_end): msg = ("NLLOC HYP file '{}' seems corrupt, number of 'NLLOC' lines " "does not match number of 'END_NLLOC' lines").format(filename) raise Exception(msg) start_end_indices = [] for start, end in zip(lines_start, lines_end): start_end_indices.append(start) start_end_indices.append(end) if any(np.diff(start_end_indices) < 1): msg = ("NLLOC HYP file '{}' seems corrupt, inconsistent " "positioning of 'NLLOC' and 'END_NLLOC' lines " "detected.").format(filename) raise Exception(msg) for start, end in zip(lines_start, lines_end): event = _read_single_hypocenter( lines[start:end + 1], coordinate_converter=coordinate_converter, original_picks=original_picks) cat.append(event) cat.creation_info.creation_time = UTCDateTime() cat.creation_info.version = "ObsPy %s" % __version__ return cat
def read_nlloc_hyp(filename, coordinate_converter=None, picks=None, **kwargs): """ Reads a NonLinLoc Hypocenter-Phase file to a :class:`~obspy.core.event.Catalog` object. .. note:: Coordinate conversion from coordinate frame of NonLinLoc model files / location run to WGS84 has to be specified explicitly by the user if necessary. .. note:: An example can be found on the :mod:`~obspy.io.nlloc` submodule front page in the documentation pages. :param filename: File or file-like object in text mode. :type coordinate_converter: func :param coordinate_converter: Function to convert (x, y, z) coordinates of NonLinLoc output to geographical coordinates and depth in meters (longitude, latitude, depth in kilometers). If left ``None``, the geographical coordinates in the "GEOGRAPHIC" line of NonLinLoc output are used. The function should accept three arguments x, y, z (each of type :class:`numpy.ndarray`) and return a tuple of three :class:`numpy.ndarray` (lon, lat, depth in kilometers). :type picks: list of :class:`~obspy.core.event.Pick` :param picks: Original picks used to generate the NonLinLoc location. If provided, the output event will include the original picks and the arrivals in the output origin will link to them correctly (with their ``pick_id`` attribute). If not provided, the output event will include (the rather basic) pick information that can be reconstructed from the NonLinLoc hypocenter-phase file. :rtype: :class:`~obspy.core.event.Catalog` """ if not hasattr(filename, "read"): # Check if it exists, otherwise assume its a string. try: with open(filename, "rt") as fh: data = fh.read() except: try: data = filename.decode() except: data = str(filename) data = data.strip() else: data = filename.read() if hasattr(data, "decode"): data = data.decode() # split lines and remove empty ones lines = [line for line in data.splitlines() if line.strip()] # remember picks originally used in location, if provided original_picks = picks if original_picks is None: original_picks = [] cat = Catalog() while lines: while not lines[0].startswith("NLLOC "): line = lines.pop(0) msg = ("Ignoring an unexpected line in NLLOC_HYP " "file:\n'{}'".format(line)) warnings.warn(msg) for i, line in enumerate(lines): if line.startswith("END_NLLOC"): break else: msg = ("NLLOC HYP file seems corrupt," " could not detect 'END_NLLOC' line.") raise RuntimeError(msg) event = _read_single_hypocenter( lines[:i+1], coordinate_converter=coordinate_converter, original_picks=original_picks) cat.append(event) lines = lines[i+1:] cat.creation_info.creation_time = UTCDateTime() cat.creation_info.version = "ObsPy %s" % __version__ return cat
def surf_events_to_cat(loc_file, pick_file): """ Take location files (hypoinverse formatted) and picks (format TBD) and creates a single obspy catalog for later use and dissemination. :param loc_file: File path :param pick_file: File path :return: obspy.core.Catalog """ # Read/parse location file and create Events for each surf_cat = Catalog() # Parse the pick file to a dictionary pick_dict = parse_picks(pick_file) with open(loc_file, 'r') as f: next(f) for ln in f: ln = ln.strip('\n') line = ln.split(',') eid = line[0] if eid not in pick_dict: print('No picks for this location, skipping for now.') continue ot = UTCDateTime(line[1]) hmc_east = float(line[2]) hmc_north = float(line[3]) hmc_elev = float(line[4]) gap = float(line[-5]) rms = float(line[-3]) errXY = float(line[-2]) errZ = float(line[-1]) converter = SURF_converter() lon, lat, elev = converter.to_lonlat((hmc_east, hmc_north, hmc_elev)) o = Origin(time=ot, longitude=lon, latitude=lat, depth=130 - elev) o.origin_uncertainty = OriginUncertainty() o.quality = OriginQuality() ou = o.origin_uncertainty oq = o.quality ou.horizontal_uncertainty = errXY * 1e3 ou.preferred_description = "horizontal uncertainty" o.depth_errors.uncertainty = errZ * 1e3 oq.standard_error = rms oq.azimuthal_gap = gap extra = AttribDict({ 'hmc_east': { 'value': hmc_east, 'namespace': 'smi:local/hmc' }, 'hmc_north': { 'value': hmc_north, 'namespace': 'smi:local/hmc' }, 'hmc_elev': { 'value': hmc_elev, 'namespace': 'smi:local/hmc' }, 'hmc_eid': { 'value': eid, 'namespace': 'smi:local/hmc' } }) o.extra = extra rid = ResourceIdentifier(id=ot.strftime('%Y%m%d%H%M%S%f')) # Dummy magnitude of 1. for all events until further notice mag = Magnitude(mag=1., mag_errors=QuantityError(uncertainty=1.)) ev = Event(origins=[o], magnitudes=[mag], picks=pick_dict[eid], resource_id=rid) surf_cat.append(ev) return surf_cat