def _path_or_event_bank(path): """ Return either a path (str) to a function, or an EventBank if path is a directory. """ if Path(path).is_dir(): return obsplus.EventBank(path).update_index() return str(path)
def catalog_to_directory( cat: Union[str, Catalog, ev.Event, Path], path: Union[str, Path], file_format: str = "quakeml", ext="xml", event_bank_index=True, check_duplicates=True, ) -> None: """ Parse a catalog and save each event to a time-based directory structure. Saves each event based on preferred origin time. The format of the saved file and directory is: YYYY/MM/DD/YYYY-MM-DDThh-mm-ss-RID where RID is the last 5 digits of the event id. If another event is found with the same last 5 digits of the resource_id the event will be read into memory. If the complete resource IDs are the same the old path will be used. This helps avoid changing the path of the event when origin times change slightly. Parameters ---------- cat The obspy events, event or path to such. path A path to the directory. If one does not exist it will be created. file_format. Any obspy event format that can be written. ext The extention to add to each file. event_bank_index If True, create an event bank index on the newly created directory. check_duplicates If True check other events that may have the same resource id and merge any duplicate events. """ if isinstance(cat, (str, Path)): cat = obspy.read_events(str(cat)) # ensure events is iterable and ext has a dot before it cat = [cat] if not isinstance(cat, obspy.Catalog) else cat ext = "." + ext if not ext.startswith(".") else ext # make sure directory exists path = Path(path) path.mkdir(parents=True, exist_ok=True) # iterate each event, get a time and resource id and save to disk for event in cat: event_path = Path(get_event_path(event, str(path), ext=ext)) path.parent.mkdir(parents=True, exist_ok=True) # determine if another event exists with same id, if so use its path if check_duplicates: rid = str(event.resource_id)[-5:] possible_duplicate = list(event_path.parent.glob(f"*{rid}{ext}")) for duplicate_path in possible_duplicate or []: new_event = obspy.read_events(str(duplicate_path))[0] if new_event.resource_id == event.resource_id: event_path = duplicate_path break event.write(str(event_path), file_format) if event_bank_index: obsplus.EventBank(path).update_index()
def test_future_version(self, ebank_high_version): """Ensure reading a bank with a future version issues warning.""" path = ebank_high_version.bank_path with pytest.warns(UserWarning) as w: obsplus.EventBank(path) assert len(w) == 1 message = w.list[0].message.args[0] assert "a newer version of ObsPlus" in message
def test_issue_30(self, crandall_dataset): """ ensure eventid can accept a numpy array. see #30. """ ds = crandall_dataset ebank = obsplus.EventBank(ds.event_path) # get first two event_ids inds = ebank.read_index()["event_id"].values[0:2] # query with inds as np array assert len(ebank.get_events(eventid=np.array(inds))) == 2
def test_from_path(self, tmpdir): """ catalog_to_directory should work with a path to a events. """ cat = obspy.read_events() path = Path(tmpdir) / "events.xml" path_out1 = path.parent / "catalog_dir1" path_out2 = path.parent / "catalog_dir2" # a slightly invalid uri is used, just ignore with warnings.catch_warnings(): warnings.simplefilter("ignore") cat.write(str(path), "quakeml") # test works with a Path instance catalog_to_directory(path, path_out1) assert path_out1.exists() assert not obsplus.EventBank(path_out1).read_index().empty # tests with a string catalog_to_directory(str(path), path_out2) assert path_out2.exists() assert not obsplus.EventBank(path_out2).read_index().empty
def test_files_created(self, tmpdir): """ ensure a file is created for each event in default events, and the bank index as well. """ cat = obspy.read_events() path = Path(tmpdir) catalog_to_directory(cat, tmpdir) qml_files = list(path.rglob("*.xml")) assert len(qml_files) == len(cat) ebank = obsplus.EventBank(path) assert Path(ebank.index_path).exists()
def _catalog_to_client(path): """ Turn a str or directory into a client. If a single file, try to read as events, if directory init event bank. """ path = Path(path) # ensure we are working with a path assert path.exists() if path.is_dir(): return get_event_client(obsplus.EventBank(path)) else: return get_event_client(obspy.read_events(str(path)))
def test_init_with_banks(self, bingham_dataset): """ Ensure the fetcher can be init'ed with all bank inputs. """ wbank = obsplus.WaveBank(bingham_dataset.waveform_path) ebank = obsplus.EventBank(bingham_dataset.event_path) wbank.update_index(), ebank.update_index() sbank = bingham_dataset.station_client fetcher = Fetcher(waveforms=wbank, events=ebank, stations=sbank) edf = fetcher.event_df sdf = fetcher.station_df for df in [edf, sdf]: assert isinstance(df, pd.DataFrame) assert not df.empty
def test_init_with_banks(self, bingham_dataset): """Ensure the fetcher can be init'ed with all bank inputs.""" wbank = obsplus.WaveBank(bingham_dataset.waveform_path).update_index() ebank = obsplus.EventBank(bingham_dataset.event_path).update_index() sbank = bingham_dataset.station_client # ignore warnings (endtimes of inv are out of range) with suppress_warnings(): fetcher = Fetcher(waveforms=wbank, events=ebank, stations=sbank) edf = fetcher.event_df sdf = fetcher.station_df for df in [edf, sdf]: assert isinstance(df, pd.DataFrame) assert not df.empty
def ebank_high_version(self, tmpdir, monkeypatch): """return the default bank with a negative version number.""" # monkey patch obsplus version so that a low version is saved to disk monkeypatch.setattr(obsplus, "__last_version__", self.high_version_str) cat = obspy.read_events() ebank = obsplus.EventBank(tmpdir).put_events(cat, update_index=False) # write index with suppress_warnings(): ebank.update_index() monkeypatch.undo() assert ebank._index_version == self.high_version_str assert obsplus.__last_version__ != self.high_version_str return ebank
def test_get_events_empty_bank(self, tmp_path): """ Calling get_waveforms on an empty bank should update index. """ cat1 = obspy.read_events() catalog_to_directory(cat1, tmp_path, event_bank_index=False) cat1_dict = {str(x.resource_id): x for x in cat1} # get a bank, ensure it has no index and call get events bank = obsplus.EventBank(tmp_path) index = Path(bank.index_path) if index.exists(): index.unlink() # now get events and assert equal to input (although order can change) cat2 = bank.get_events() cat2_dict = {str(x.resource_id): x for x in cat2} assert cat2_dict == cat1_dict
def bar_ebank(self, tmpdir, monkeypatch): """ return an event bank specifically for testing ProgressBar. """ # set the interval and min files to 1 to ensure bar gets called cat = obspy.read_events() path = Path(tmpdir) for event in cat: file_name = str(event.resource_id)[-5:] + ".xml" event.write(str(path / file_name), "quakeml") ebank = obsplus.EventBank(path) monkeypatch.setattr(ebank, "_bar_update_interval", 1) monkeypatch.setattr(ebank, "_min_files_for_bar", 1) # move the index to make sure there are files to update assert not Path(ebank.index_name).exists() return ebank
def ebank_with_event_no_time(self, tmp_path_factory): """ Create an event bank which has one file with no time. """ tmp_path = Path(tmp_path_factory.mktemp("basic")) cat = obspy.read_events() # clear origin from first event and add an empty one cat[0].origins.clear() new_origin = ev.Origin() cat[0].origins.append(new_origin) cat[0].preferred_origin_id = new_origin.resource_id # now save the events for num, event in enumerate(cat): path = tmp_path / f"{num}.xml" event.write(path, format="quakeml") # init, update, return bank return obsplus.EventBank(tmp_path).update_index()
def test_events_different_time_same_id_not_duplicated(self, tmpdir): """ Events with different times but the same id should not be duplicated; the old path should be used when detected. """ cat = obspy.read_events() first_id = str(cat[0].resource_id) bank = obsplus.EventBank(Path(tmpdir)).put_events(cat) df = bank.read_index().set_index("event_id") # modify first event preferred origin time slightly event = cat[0] origin = get_preferred(event, "origin") origin.time += 10 # save to disk again bank.put_events(event) # ensure event count didnt change assert len(df) == len(bank.read_index()) # read first path and make sure origin time was updated cat2 = bank.get_events(event_id=first_id) assert len(cat2) == 1 assert get_preferred(cat2[0], "origin").time == origin.time
def download_events(self): """ Simply copy events from base directory. """ cat = obspy.read_events(str(self.source_path / "events.xml")) obsplus.EventBank(self.event_path).put_events(cat)
def download_events(self): """Just copy the events into a directory.""" cat = obspy.read_events() obsplus.EventBank(self.event_path).put_events(cat)
def download_events(self): """Just copy the events into a directory.""" cat = obspy.read_events(str(self.source_path / "events.xml")) obsplus.EventBank(self.event_path).put_events(cat)