def example_data_set(tmpdir): """ Fixture creating a small example file. """ asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_path = os.path.join(data_dir, "small_sample_data_set") data_set = ASDFDataSet(asdf_filename) for filename in glob.glob(os.path.join(data_path, "*.xml")): if "quake.xml" in filename: data_set.add_quakeml(filename) else: data_set.add_stationxml(filename) for filename in glob.glob(os.path.join(data_path, "*.mseed")): data_set.add_waveforms(filename, tag="raw_recording", event_id=data_set.events[0]) # Flush and finish writing. del data_set # Return filename and path to tempdir, no need to always create a # new one. return Namespace(filename=asdf_filename, tmpdir=tmpdir.strpath)
def combine_asdf(base_asdf_path, append_asdf_path, output_asdf_path): """ combine_asdf: merge the waveforms in append_asdf to base_asdf, and generate a new asdf. """ base_asdf = ASDFDataSet(base_asdf_path, mode="r", mpi=False) append_asdf = ASDFDataSet(append_asdf_path, mode="r", mpi=False) output_asdf = ASDFDataSet(output_asdf_path, mpi=False) # * add events events = base_asdf.events event = events[0] output_asdf.add_quakeml(events) # * add waveforms and stationxml # firstly we add base asdf rep_net_sta = base_asdf.waveforms.list()[0] tag_default = base_asdf.waveforms[rep_net_sta].get_waveform_tags()[0] for each_net_sta in base_asdf.waveforms.list(): tag = base_asdf.waveforms[each_net_sta].get_waveform_tags()[0] assert tag == tag_default st = base_asdf.waveforms[each_net_sta][tag] inv = base_asdf.waveforms[each_net_sta]["StationXML"] output_asdf.add_waveforms(st, tag=tag, event_id=event) output_asdf.add_stationxml(inv) # secondly we add append asdf for each_net_sta in append_asdf.waveforms.list(): tag = append_asdf.waveforms[each_net_sta].get_waveform_tags()[0] assert tag == tag_default st = append_asdf.waveforms[each_net_sta][tag] inv = append_asdf.waveforms[each_net_sta]["StationXML"] output_asdf.add_waveforms(st, tag=tag, event_id=event) output_asdf.add_stationxml(inv) del base_asdf del append_asdf del output_asdf
def write_new_synt_asdf(self, file_prefix): new_synt_dict = self._sort_new_synt() for tag, win_array in new_synt_dict.iteritems(): filename = "%s.%s.h5" % (file_prefix, tag) if os.path.exists(filename): os.remove(filename) logger.info("Output file exists, removed: %s" % filename) else: logger.info("Output new synt asdf: %s" % filename) ds = ASDFDataSet(filename, mode='w') added_list = [] for window in win_array: synt_id = window.datalist['new_synt'].id # skip duplicate obsd location id. # for example, II.AAK.00.BHZ and II.AAK.10.BHZ will # be treated as different traces. But the synt and # new synt will be the same. So we only add one if synt_id in added_list: continue else: added_list.append(synt_id) ds.add_waveforms(window.datalist['new_synt'], tag=tag) # add stationxml _staxml_asdf = self._asdf_file_dict['synt'] ds_sta = ASDFDataSet(_staxml_asdf) self.__add_staxml_from_other_asdf(ds, ds_sta) ds.flush()
def test_detailed_event_association_is_persistent_through_processing( example_data_set): """ Processing a file with an associated event and storing it again should keep the association for all the possible event tags.. """ data_set = ASDFDataSet(example_data_set.filename) # Store a new waveform. event = data_set.events[0] origin = event.origins[0] magnitude = event.magnitudes[0] focmec = event.focal_mechanisms[0] tr = obspy.read()[0] tr.stats.network = "BW" tr.stats.station = "RJOB" data_set.add_waveforms(tr, tag="random", event_id=event, origin_id=origin, focal_mechanism_id=focmec, magnitude_id=magnitude) new_st = data_set.waveforms.BW_RJOB.random new_st.taper(max_percentage=0.05, type="cosine") data_set.add_waveforms(new_st, tag="processed") processed_st = data_set.waveforms.BW_RJOB.processed assert event.resource_id == processed_st[0].stats.asdf.event_id assert origin.resource_id == processed_st[0].stats.asdf.origin_id assert magnitude.resource_id == processed_st[0].stats.asdf.magnitude_id assert focmec.resource_id == processed_st[0].stats.asdf.focal_mechanism_id
def write_new_synt_asdf(self, file_prefix): new_synt_dict = self._sort_new_synt() for tag, win_array in new_synt_dict.iteritems(): filename = "%s.%s.h5" % (file_prefix, tag) if os.path.exists(filename): os.remove(filename) logger.info("Output file exists, removed: %s" % filename) else: logger.info("Output new synt asdf: %s" % filename) ds = ASDFDataSet(filename, mode='w') added_list = [] for window in win_array: synt_id = window.datalist['new_synt'].id # skip duplicate obsd location id. # for example, II.AAK.00.BHZ and II.AAK.10.BHZ will # be treated as different traces. But the synt and # new synt will be the same. So we only add one if synt_id in added_list: continue else: added_list.append(synt_id) ds.add_waveforms(window.datalist['new_synt'], tag=tag) # add stationxml _staxml_asdf = self._asdf_file_dict['synt'] ds_sta = ASDFDataSet(_staxml_asdf) self.__add_staxml_from_other_asdf(ds, ds_sta) ds.flush()
def write_new_syn_file(self, file_format="sac", outputdir=".", eventname=None, suffix=None): """ Write out new synthetic file based on new cmtsolution :return: """ if not os.path.exists(outputdir): os.makedirs(outputdir) # sort the new synthetic data new_synt_dict = {} for window in self.window: tag = window.tag['synt'] if tag not in new_synt_dict.keys(): new_synt_dict[tag] = [] new_synt_dict[tag].append(window) if file_format.upper() == "SAC": for tag, win_array in new_synt_dict.iteritems(): if eventname is None: targetdir = os.path.join(outputdir, tag) else: targetdir = os.path.join(outputdir, "%s_%s" % (eventname, tag)) if not os.path.exists(targetdir): os.makedirs(targetdir) for window in win_array: sta = window.station nw = window.network component = window.component location = window.location filename = "%s.%s.%s.%s.sac" \ % (sta, nw, location, component) outputfn = os.path.join(targetdir, filename) new_synt = window.datalist['new_synt'] new_synt.write(outputfn, format='SAC') elif file_format.upper() == "ASDF": for tag, win_array in new_synt_dict.iteritems(): if eventname is None: outputfn = os.path.join(outputdir, "new_synt.%s.h5" % tag) else: if suffix is None: outputfn = os.path.join(outputdir, "%s.new_synt.%s.h5" % (eventname, tag)) else: outputfn = os.path.join( outputdir, "%s.%s.new_synt.%s.h5" % (eventname, suffix, tag)) if os.path.exists(outputfn): os.remove(outputfn) ds = ASDFDataSet(outputfn) for window in win_array: ds.add_waveforms(window.datalist['new_synt'], tag=tag) # add stationxml else: raise NotImplementedError
def test_data_set_creation(tmpdir): """ Test data set creation with a small test dataset. It tests that the the stuff that goes in is correctly saved and can be retrieved again. """ asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_path = os.path.join(data_dir, "small_sample_data_set") data_set = ASDFDataSet(asdf_filename) for filename in glob.glob(os.path.join(data_path, "*.mseed")): data_set.add_waveforms(filename, tag="raw_recording") for filename in glob.glob(os.path.join(data_path, "*.xml")): if "quake.xml" in filename: data_set.add_quakeml(filename) else: data_set.add_stationxml(filename) # Flush and finish writing. del data_set # Open once again data_set = ASDFDataSet(asdf_filename) # ObsPy is tested enough to make this comparison meaningful. for station in (("AE", "113A"), ("TA", "POKR")): # Test the waveforms stream_asdf = \ getattr(data_set.waveforms, "%s_%s" % station).raw_recording stream_file = obspy.read(os.path.join( data_path, "%s.%s.*.mseed" % station)) # Delete the file format specific stats attributes. These are # meaningless inside ASDF data sets. for trace in stream_file: del trace.stats.mseed del trace.stats._format for trace in stream_asdf: del trace.stats.asdf del trace.stats._format assert stream_asdf == stream_file # Test the inventory data. inv_asdf = \ getattr(data_set.waveforms, "%s_%s" % station).StationXML inv_file = obspy.read_inventory( os.path.join(data_path, "%s.%s..BH*.xml" % station)) assert inv_file == inv_asdf # Test the event. cat_file = obspy.readEvents(os.path.join(data_path, "quake.xml")) cat_asdf = data_set.events # from IPython.core.debugger import Tracer; Tracer(colors="Linux")() assert cat_file == cat_asdf
def test_coordinate_extraction_but_no_stationxml(tmpdir): """ Tests what happens if no stationxml is defined for a station. """ asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_path = os.path.join(data_dir, "small_sample_data_set") data_set = ASDFDataSet(asdf_filename) for filename in glob.glob(os.path.join(data_path, "*.mseed")): data_set.add_waveforms(filename, tag="raw_recording") # If not stationxml exists it should just return an empty dictionary. assert data_set.get_all_coordinates() == {}
def test_event_association_is_persistent_through_processing(example_data_set): """ Processing a file with an associated event and storing it again should keep the association. """ data_set = ASDFDataSet(example_data_set.filename) st = data_set.waveforms.TA_POKR.raw_recording event_id = st[0].stats.asdf.event_id st.taper(max_percentage=0.05, type="cosine") data_set.add_waveforms(st, tag="processed") processed_st = data_set.waveforms.TA_POKR.processed assert event_id == processed_st[0].stats.asdf.event_id
def convert_to_asdf(filelist, asdf_fn, quakemlfile, staxml_filelist=None, tag=None): """ Convert files(sac or mseed) to asdf """ nfiles = len(filelist) if nfiles == 0: print "No file specified. Return..." return if os.path.exists(asdf_fn): raise Exception("File '%s' exists." % asdf_fn) ds = ASDFDataSet(asdf_fn) # Add event if quakemlfile is not None and os.path.exists(quakemlfile): print "Event info added" ds.add_quakeml(quakemlfile) event = ds.events[0] else: raise ValueError("No Event file") # Add waveforms. print "Adding Waveform data" for _i, filename in enumerate(filelist): if os.path.exists(filename): #print("Adding file %i of %i: %s" % (_i + 1, # len(filelist), os.path.basename(filename))) ds.add_waveforms(filename, tag=tag, event_id=event) else: print("File not exist %i of %i") # Add StationXML files. if staxml_filelist is not None and len(staxml_filelist) > 0: for _i, filename in enumerate(staxml_filelist): if os.path.exists(filename): #print("Adding StationXML file %i of %i..." % (_i + 1, len(filenames))) ds.add_stationxml(filename) else: print("No stationxml added")
def test_adding_waveforms_with_provenance_id(tmpdir): asdf_filename = os.path.join(tmpdir.strpath, "test.h5") data_path = os.path.join(data_dir, "small_sample_data_set") data_set = ASDFDataSet(asdf_filename) for filename in glob.glob(os.path.join(data_path, "*.mseed")): data_set.add_waveforms(filename, tag="raw_recording", provenance_id="{http://example.org}test") data_set.__del__() del data_set new_data_set = ASDFDataSet(asdf_filename) st = new_data_set.waveforms.TA_POKR.raw_recording for tr in st: assert tr.stats.asdf.provenance_id == "{http://example.org}test" new_data_set.__del__() del new_data_set
def convert_to_asdf(filelist, asdf_fn, quakemlfile, staxml_filelist=None, tag=None): """ Convert files(sac or mseed) to asdf """ nfiles = len(filelist) if nfiles == 0: print "No file specified. Return..." return if os.path.exists(asdf_fn): raise Exception("File '%s' exists." % asdf_fn) ds = ASDFDataSet(asdf_fn) # Add event if quakemlfile is not None and os.path.exists(quakemlfile): print "Event info added" ds.add_quakeml(quakemlfile) event = ds.events[0] else: raise ValueError("No Event file") # Add waveforms. print "Adding Waveform data" for _i, filename in enumerate(filelist): if os.path.exists(filename): #print("Adding file %i of %i: %s" % (_i + 1, # len(filelist), os.path.basename(filename))) ds.add_waveforms(filename, tag=tag, event_id=event) else: print("File not exist %i of %i") # Add StationXML files. if staxml_filelist is not None and len(staxml_filelist) > 0: for _i, filename in enumerate(staxml_filelist): if os.path.exists(filename): #print("Adding StationXML file %i of %i..." % (_i + 1, len(filenames))) ds.add_stationxml(filename) else: print("No stationxml added")
def test_stationxml_is_invalid_tag_name(tmpdir): """ StationXML is an invalid waveform tag. """ filename = os.path.join(tmpdir.strpath, "example.h5") data_set = ASDFDataSet(filename) st = obspy.read() with pytest.raises(ValueError): data_set.add_waveforms(st, tag="StationXML") with pytest.raises(ValueError): data_set.add_waveforms(st, tag="stationxml") # Adding with a proper tag works just fine. data_set.add_waveforms(st, tag="random_waveform")
def test_saving_event_id(tmpdir): """ Tests that the event_id can be saved and retrieved automatically. """ data_path = os.path.join(data_dir, "small_sample_data_set") filename = os.path.join(tmpdir.strpath, "example.h5") event = obspy.readEvents(os.path.join(data_path, "quake.xml"))[0] # Add the event object, and associate the waveform with it. data_set = ASDFDataSet(filename) data_set.add_quakeml(event) waveform = obspy.read(os.path.join(data_path, "TA.*.mseed")).sort() data_set.add_waveforms(waveform, "raw_recording", event_id=event) st = data_set.waveforms.TA_POKR.raw_recording for tr in st: assert tr.stats.asdf.event_id.getReferredObject() == event del data_set os.remove(filename) # Add as a string. data_set = ASDFDataSet(filename) data_set.add_quakeml(event) waveform = obspy.read(os.path.join(data_path, "TA.*.mseed")).sort() data_set.add_waveforms(waveform, "raw_recording", event_id=str(event.resource_id.id)) st = data_set.waveforms.TA_POKR.raw_recording for tr in st: assert tr.stats.asdf.event_id.getReferredObject() == event del data_set os.remove(filename) # Add as a resource identifier object. data_set = ASDFDataSet(filename) data_set.add_quakeml(event) waveform = obspy.read(os.path.join(data_path, "TA.*.mseed")).sort() data_set.add_waveforms(waveform, "raw_recording", event_id=event.resource_id) st = data_set.waveforms.TA_POKR.raw_recording for tr in st: assert tr.stats.asdf.event_id.getReferredObject() == event del data_set os.remove(filename)
import glob import os from pyasdf import ASDFDataSet filename = "synthetic.h5" if os.path.exists(filename): raise Exception("File '%s' exists." % filename) ds = ASDFDataSet(filename) # Add event ds.add_quakeml("./GCMT_event_SOUTH_SANDWICH_ISLANDS_REGION_Mag_5.6_2010-3-11-6.xml") event = ds.events[0] # Add waveforms. filenames = glob.glob("./SYNTHETIC_SAC/*.sem") for _i, filename in enumerate(filenames): print("Adding SAC file %i of %i..." % (_i + 1, len(filenames))) ds.add_waveforms(filename, tag="synthetic", event_id=event) # Add StationXML files. filenames = glob.glob("./StationXML/*.xml") for _i, filename in enumerate(filenames): print("Adding StationXML file %i of %i..." % (_i + 1, len(filenames))) ds.add_stationxml(filename)
import glob import os from pyasdf import ASDFDataSet filename = "observed.h5" if os.path.exists(filename): raise Exception("File '%s' exists." % filename) ds = ASDFDataSet(filename) # Add event ds.add_quakeml("./GCMT_event_SOUTH_SANDWICH_ISLANDS_REGION_Mag_5.6_2010-3-11-6.xml") event = ds.events[0] # Add waveforms. filenames = glob.glob("./SAC/*.SAC") for _i, filename in enumerate(filenames): print("Adding SAC file %i of %i..." % (_i + 1, len(filenames))) ds.add_waveforms(filename, tag="raw_recording", event_id=event) # Add StationXML files. filenames = glob.glob("./StationXML/*.xml") for _i, filename in enumerate(filenames): print("Adding StationXML file %i of %i..." % (_i + 1, len(filenames))) ds.add_stationxml(filename)
import glob import os from pyasdf import ASDFDataSet filename = "observed.h5" if os.path.exists(filename): raise Exception("File '%s' exists." % filename) ds = ASDFDataSet(filename) # Add event ds.add_quakeml( "./GCMT_event_SOUTH_SANDWICH_ISLANDS_REGION_Mag_5.6_2010-3-11-6.xml") event = ds.events[0] # Add waveforms. filenames = glob.glob("./SAC/*.SAC") for _i, filename in enumerate(filenames): print("Adding SAC file %i of %i..." % (_i + 1, len(filenames))) # We associate the waveform with the previous event. This is optional # but recommended if the association is meaningful. ds.add_waveforms(filename, tag="raw_recording", event_id=event) # Add StationXML files. filenames = glob.glob("./StationXML/*.xml") for _i, filename in enumerate(filenames): print("Adding StationXML file %i of %i..." % (_i + 1, len(filenames))) ds.add_stationxml(filename)
event_id = get_event_id() events[coords] = event_id # keep track of channels coords = get_station_coords(sac_header) name = trace.id channels[name] = coords # keep track of recording times s, e = trace.stats.starttime, trace.stats.endtime if s.timestamp < starttime: starttime = s if e.timestamp > endtime: endtime = e ds.add_waveforms(trace, args.tag, event_id, labels=[sac_filename]) # add events catalog = obspy.core.event.Catalog() for event_coords, event_id in events.items(): latitude, longitude, depth, origin_time = event_coords origin = obspy.core.event.Origin(time=origin_time, longitude=longitude, latitude=latitude, depth=depth) catalog.append( obspy.core.event.Event(resource_id=event_id, origins=[origin])) ds.add_quakeml(catalog) # add stations for group1, group2 in sort_by_station(channels):
else: source_id = get_source_id() sources[coords] = source_id # use trace index in place of network/station/channel trace.stats.station = _i # keep track of receivers coords = get_receiver_coords(trace) if coords in sources: source_id = sources[coords] else: source_id = get_source_id() sources[coords] = source_id ds.add_waveforms(trace, args.tag, source_id) # add events # longitude = source_coord_x, latitude = source_coord_y catalog = obspy.core.event.Catalog() for source_coords, source_id in sources.items(): latitude, longitude, depth = source_coords origin = obspy.core.event.Origin(latitude=coords[0], longitude=coords[1], depth=coords[2]) catalog.append( obspy.core.event.Event(resource_id=source_id, origins=[origin])) ds.add_quakeml(catalog) # add su_headers as auxiliary data ds.add_auxiliary_data_file(dump(headers), path='SUHeaders')