def __call__( self, time_arg: event_time_type, time_before: Optional[float] = None, time_after: Optional[float] = None, *args, **kwargs, ) -> obspy.Stream: """ Using a reference time, return a waveforms that encompasses that time. Parameters ---------- time_arg The argument that will indicate a start time. Can be a one length events, and event, a float, or a UTCDatetime object time_before The time before time_arg to include in waveforms time_after The time after time_arg to include in waveforms Returns ------- obspy.Stream """ tbefore = to_timedelta64(time_before, default=self.time_before) tafter = to_timedelta64(time_after, default=self.time_after) assert (tbefore is not None) and (tafter is not None) # get the reference time from the object time = to_datetime64(get_reference_time(time_arg)) t1 = time - tbefore t2 = time + tafter return self.get_waveforms(starttime=t1, endtime=t2, **kwargs)
def bing_pick_bulk(self, bingham_catalog): """ Create a dataframe from the bingham picks. """ picks = obsplus.picks_to_df(bingham_catalog) df = picks[list(NSLC)] df["starttime"] = picks["time"] - to_timedelta64(1.011) df["endtime"] = picks["time"] + to_timedelta64(7.011) return df
def _add_starttime_end(df): """ Add the time window start and end """ # fill references with start times of phases if empty df.loc[df["twindow_ref"].isnull(), "twindow_ref"] = df["time"] # Fill NaNs and convert to timedelta # Double type conversion is necessary to get everything into the same dtype twindow_start = (df["twindow_start"].fillna(0.0).astype( "float64").astype("timedelta64[ns]")) twindow_end = (df["twindow_end"].fillna(0.0).astype("float64").astype( "timedelta64[ns]")) # Determine start/end times of phase windows df["starttime"] = df["twindow_ref"] - twindow_start df["endtime"] = df["twindow_ref"] + twindow_end # add travel time df["travel_time"] = df["time"] - reftime # get earliest s phase by station _s_start = df.groupby(list(NSLC[:2])).apply(_get_earliest_s_time) s_start = _s_start.rename("s_start").to_frame().reset_index() # merge back into pick_df, use either defined window or S phase, whichever # is smaller. dd2 = df.merge(s_start, on=["network", "station"], how="left") # get dataframe indices for P p_inds = df[df.phase_hint == "P"].index # make sure P end times don't exceed s start times endtime_or_s_start = dd2[["s_start", "endtime"]].min(axis=1, skipna=True) df.loc[p_inds, "endtime"] = endtime_or_s_start[p_inds] duration = abs(df["endtime"] - df["starttime"]) # Make sure all value are under phase duration time, else truncate them if max_duration is not None: max_dur = to_timedelta64(_get_extrema_like_df(df, max_duration)) larger_than_max = duration > max_dur df.loc[larger_than_max, "endtime"] = df["starttime"] + to_timedelta64(max_duration) # Make sure all values are at least min_phase_duration, else expand them if min_duration is not None: min_dur = to_timedelta64(_get_extrema_like_df(df, min_duration)) small_than_min = duration < min_dur df.loc[small_than_min, "endtime"] = df["starttime"] + min_dur # sanity checks assert (df["endtime"] >= df["starttime"]).all() assert not (df["starttime"].isnull()).any() return df
def yield_event_waveforms( self, time_before: Optional[float] = None, time_after: Optional[float] = None, reference: Union[str, Callable] = "origin", raise_on_fail: bool = True, ) -> Tuple[str, Stream]: """ Yield event_id and streams for each event in the events. Parameters ---------- time_before The time before (in seconds) the reference that will be included in the waveforms if possible. time_after The Time after (in seconds) the reference that will be included in the waveforms if possible. reference A str that indicates how the starttime of the trace should be determined. The following are supported: origin - use the origin time of the event for each channel p - use the first p times as the start of the station traces s - use the first s times as the start of the station traces If a station doesn't have p or s picks and "p" or "s" is used, it's streams will not be returned. raise_on_fail If True, re raise and exception if one is caught during waveform fetching, else continue to next event. Yields ------ obspy.Stream """ assert reference.lower() in self.reference_funcs tb = to_timedelta64(time_before, default=self.time_before) ta = to_timedelta64(time_after, default=self.time_after) assert (tb is not None) and (ta is not None) # get reference times event_ids = self.event_df.event_id.values reftimes = { x: self.reference_funcs[reference](self, x) for x in event_ids } # if using a wavebank preload index over entire time-span for speedup if isinstance(self.waveform_client, WaveBank): mt = min([ x.min() if hasattr(x, "min") else x for x in reftimes.values() ]) mx = max([ x.max() if hasattr(x, "max") else x for x in reftimes.values() ]) index = self.waveform_client.read_index(starttime=mt, endtime=mx) get_bulk_wf = partial(self._get_bulk_wf, index=index) else: get_bulk_wf = self._get_bulk_wf # iterate each event in the events and yield the waveform for event_id in event_ids: # make sure ser is either a single datetime or a series of datetimes ti_ = to_datetime64(reftimes[event_id]) bulk_args = self._get_bulk_arg(starttime=ti_ - tb, endtime=ti_ + ta) try: yield EventStream(event_id, get_bulk_wf(bulk_args)) except Exception: if raise_on_fail: raise
class TestGetGaps: """ test that the get_gaps method returns info about gaps """ start = UTC("2017-09-18") end = UTC("2017-09-28") sampling_rate = 1 gaps = [ (UTC("2017-09-18T18-00-00"), UTC("2017-09-18T19-00-00")), (UTC("2017-09-18T20-00-00"), UTC("2017-09-18T20-00-15")), (UTC("2017-09-20T01-25-35"), UTC("2017-09-20T01-25-40")), (UTC("2017-09-21T05-25-35"), UTC("2017-09-25T10-36-42")), ] durations = np.array([y - x for x, y in gaps]) durations_timedelta = np.array([to_timedelta64(float(x)) for x in durations]) overlap = 0 def _make_gappy_archive(self, path): """ Create the gappy archive defined by params in class. """ ArchiveDirectory( path, self.start, self.end, self.sampling_rate, gaps=self.gaps, overlap=self.overlap, ).create_directory() return path # fixtures @pytest.fixture(scope="class") def gappy_dir(self, class_tmp_dir): """ create a directory that has gaps in it """ self._make_gappy_archive(join(class_tmp_dir, "temp1")) return class_tmp_dir @pytest.fixture(scope="class") def gappy_bank(self, gappy_dir): """ init a sbank on the gappy data """ bank = WaveBank(gappy_dir) # make sure index is updated after gaps are introduced if os.path.exists(bank.index_path): os.remove(bank.index_path) bank.update_index() return bank @pytest.fixture() def gappy_and_contiguous_bank(self, tmp_path): """ Create a directory with gaps and continuous data """ # first create directory with gaps self._make_gappy_archive(tmp_path) # first write data with no gaps st = obspy.read() for num, tr in enumerate(st): tr.stats.station = "GOOD" tr.write(str(tmp_path / f"good_{num}.mseed"), "mseed") return WaveBank(tmp_path).update_index() @pytest.fixture(scope="class") def empty_bank(self): """ create a Sbank object initated on an empty directory """ with tempfile.TemporaryDirectory() as td: bank = WaveBank(td) yield bank @pytest.fixture(scope="class") def gap_df(self, gappy_bank): """ return a gap df from the gappy bank""" return gappy_bank.get_gaps_df() @pytest.fixture(scope="class") def uptime_df(self, gappy_bank): """ return the uptime dataframe from the gappy bank """ return gappy_bank.get_uptime_df() @pytest.fixture() def uptime_default(self, default_wbank): """ return the uptime from the default stream bank. """ return default_wbank.get_uptime_df() # tests def test_gaps_length(self, gap_df, gappy_bank): """ ensure each of the gaps shows up in df """ assert isinstance(gap_df, pd.DataFrame) assert not gap_df.empty group = gap_df.groupby(["network", "station", "location", "channel"]) sampling_period = gap_df["sampling_period"].iloc[0] for gnum, df in group: assert len(df) == len(self.gaps) dif = abs(df["gap_duration"] - self.durations_timedelta) assert (dif < (1.5 * sampling_period)).all() def test_gappy_uptime_df(self, uptime_df): """ ensure the uptime df is of correct type and accurate """ assert isinstance(uptime_df, pd.DataFrame) gap_duration = sum([x[1] - x[0] for x in self.gaps]) duration = self.end - self.start uptime_percent = (duration - gap_duration) / duration assert (abs(uptime_df["availability"] - uptime_percent) < 0.001).all() def test_uptime_default(self, uptime_default): """ Ensure the uptime of the basic bank (no gaps) has expected times/channels. """ df = uptime_default st = obspy.read() assert not df.empty, "uptime df is empty" assert len(df) == len(st) assert {tr.id for tr in st} == set(obsplus.utils.get_seed_id_series(df)) assert (df["gap_duration"] == EMPTYTD64).all() def test_empty_directory(self, empty_bank): """ ensure an empty bank get_gaps returns and empty df with expected columns """ gaps = empty_bank.get_gaps_df() assert not len(gaps) assert set(WaveBank.gap_columns).issubset(set(gaps.columns)) def test_kemmerer_uptime(self, kem_fetcher): """ ensure the kemmerer bank returns an uptime df""" bank = kem_fetcher.waveform_client df = bank.get_uptime_df() diff = abs(df["uptime"] - df["duration"]) tolerance = np.timedelta64(1, "s") assert (diff < tolerance).all() def test_gappy_and_contiguous_uptime(self, gappy_and_contiguous_bank): """ Ensure when there are gappy streams and continguous streams get_uptime still returns correct results. """ wbank = gappy_and_contiguous_bank index = wbank.read_index() uptime = wbank.get_uptime_df() # make sure the same seed ids are in the index as uptime df seeds_from_index = set(obsplus.utils.get_seed_id_series(index)) seeds_from_uptime = set(obsplus.utils.get_seed_id_series(uptime)) assert seeds_from_index == seeds_from_uptime assert not uptime.isnull().any().any()