예제 #1
파일: fetcher.py 프로젝트: niowniow/obsplus
    def __call__(
        time_arg: event_time_type,
        time_before: Optional[float] = None,
        time_after: Optional[float] = None,
    ) -> obspy.Stream:
        Using a reference time, return a waveforms that encompasses that time.

            The argument that will indicate a start time. Can be a one
            length events, and event, a float, or a UTCDatetime object
            The time before time_arg to include in waveforms
            The time after time_arg to include in waveforms

        tbefore = to_timedelta64(time_before, default=self.time_before)
        tafter = to_timedelta64(time_after, default=self.time_after)
        assert (tbefore is not None) and (tafter is not None)
        # get the reference time from the object
        time = to_datetime64(get_reference_time(time_arg))
        t1 = time - tbefore
        t2 = time + tafter
        return self.get_waveforms(starttime=t1, endtime=t2, **kwargs)
예제 #2
 def bing_pick_bulk(self, bingham_catalog):
     """ Create a dataframe from the bingham picks. """
     picks = obsplus.picks_to_df(bingham_catalog)
     df = picks[list(NSLC)]
     df["starttime"] = picks["time"] - to_timedelta64(1.011)
     df["endtime"] = picks["time"] + to_timedelta64(7.011)
     return df
예제 #3
 def _add_starttime_end(df):
     """ Add the time window start and end """
     # fill references with start times of phases if empty
     df.loc[df["twindow_ref"].isnull(), "twindow_ref"] = df["time"]
     # Fill NaNs and convert to timedelta
     # Double type conversion is necessary to get everything into the same dtype
     twindow_start = (df["twindow_start"].fillna(0.0).astype(
     twindow_end = (df["twindow_end"].fillna(0.0).astype("float64").astype(
     # Determine start/end times of phase windows
     df["starttime"] = df["twindow_ref"] - twindow_start
     df["endtime"] = df["twindow_ref"] + twindow_end
     # add travel time
     df["travel_time"] = df["time"] - reftime
     # get earliest s phase by station
     _s_start = df.groupby(list(NSLC[:2])).apply(_get_earliest_s_time)
     s_start = _s_start.rename("s_start").to_frame().reset_index()
     # merge back into pick_df, use either defined window or S phase, whichever
     # is smaller.
     dd2 = df.merge(s_start, on=["network", "station"], how="left")
     # get dataframe indices for P
     p_inds = df[df.phase_hint == "P"].index
     # make sure P end times don't exceed s start times
     endtime_or_s_start = dd2[["s_start", "endtime"]].min(axis=1,
     df.loc[p_inds, "endtime"] = endtime_or_s_start[p_inds]
     duration = abs(df["endtime"] - df["starttime"])
     # Make sure all value are under phase duration time, else truncate them
     if max_duration is not None:
         max_dur = to_timedelta64(_get_extrema_like_df(df, max_duration))
         larger_than_max = duration > max_dur
                "endtime"] = df["starttime"] + to_timedelta64(max_duration)
     # Make sure all values are at least min_phase_duration, else expand them
     if min_duration is not None:
         min_dur = to_timedelta64(_get_extrema_like_df(df, min_duration))
         small_than_min = duration < min_dur
         df.loc[small_than_min, "endtime"] = df["starttime"] + min_dur
     # sanity checks
     assert (df["endtime"] >= df["starttime"]).all()
     assert not (df["starttime"].isnull()).any()
     return df
예제 #4
파일: fetcher.py 프로젝트: niowniow/obsplus
    def yield_event_waveforms(
        time_before: Optional[float] = None,
        time_after: Optional[float] = None,
        reference: Union[str, Callable] = "origin",
        raise_on_fail: bool = True,
    ) -> Tuple[str, Stream]:
        Yield event_id and streams for each event in the events.

            The time before (in seconds) the reference that will be included
            in the waveforms if possible.
            The Time after (in seconds) the reference that will be included
            in the waveforms if possible.
            A str that indicates how the starttime of the trace should be
            determined. The following are supported:
                origin - use the origin time of the event for each channel
                p - use the first p times as the start of the station traces
                s - use the first s times as the start of the station traces
            If a station doesn't have p or s picks and "p" or "s" is used,
            it's streams will not be returned.
            If True, re raise and exception if one is caught during waveform
            fetching, else continue to next event.

        assert reference.lower() in self.reference_funcs
        tb = to_timedelta64(time_before, default=self.time_before)
        ta = to_timedelta64(time_after, default=self.time_after)
        assert (tb is not None) and (ta is not None)
        # get reference times
        event_ids = self.event_df.event_id.values
        reftimes = {
            x: self.reference_funcs[reference](self, x)
            for x in event_ids
        # if using a wavebank preload index over entire time-span for speedup
        if isinstance(self.waveform_client, WaveBank):
            mt = min([
                x.min() if hasattr(x, "min") else x for x in reftimes.values()
            mx = max([
                x.max() if hasattr(x, "max") else x for x in reftimes.values()
            index = self.waveform_client.read_index(starttime=mt, endtime=mx)
            get_bulk_wf = partial(self._get_bulk_wf, index=index)
            get_bulk_wf = self._get_bulk_wf
        # iterate each event in the events and yield the waveform
        for event_id in event_ids:
            # make sure ser is either a single datetime or a series of datetimes
            ti_ = to_datetime64(reftimes[event_id])
            bulk_args = self._get_bulk_arg(starttime=ti_ - tb,
                                           endtime=ti_ + ta)
                yield EventStream(event_id, get_bulk_wf(bulk_args))
            except Exception:
                if raise_on_fail:
예제 #5
class TestGetGaps:
    """ test that the get_gaps method returns info about gaps """

    start = UTC("2017-09-18")
    end = UTC("2017-09-28")
    sampling_rate = 1

    gaps = [
        (UTC("2017-09-18T18-00-00"), UTC("2017-09-18T19-00-00")),
        (UTC("2017-09-18T20-00-00"), UTC("2017-09-18T20-00-15")),
        (UTC("2017-09-20T01-25-35"), UTC("2017-09-20T01-25-40")),
        (UTC("2017-09-21T05-25-35"), UTC("2017-09-25T10-36-42")),

    durations = np.array([y - x for x, y in gaps])

    durations_timedelta = np.array([to_timedelta64(float(x)) for x in durations])

    overlap = 0

    def _make_gappy_archive(self, path):
        """ Create the gappy archive defined by params in class. """
        return path

    # fixtures
    def gappy_dir(self, class_tmp_dir):
        """ create a directory that has gaps in it """
        self._make_gappy_archive(join(class_tmp_dir, "temp1"))
        return class_tmp_dir

    def gappy_bank(self, gappy_dir):
        """ init a sbank on the gappy data """
        bank = WaveBank(gappy_dir)
        # make sure index is updated after gaps are introduced
        if os.path.exists(bank.index_path):
        return bank

    def gappy_and_contiguous_bank(self, tmp_path):
        """ Create a directory with gaps and continuous data """
        # first create directory with gaps
        # first write data with no gaps
        st = obspy.read()
        for num, tr in enumerate(st):
            tr.stats.station = "GOOD"
            tr.write(str(tmp_path / f"good_{num}.mseed"), "mseed")
        return WaveBank(tmp_path).update_index()

    def empty_bank(self):
        """ create a Sbank object initated on an empty directory """
        with tempfile.TemporaryDirectory() as td:
            bank = WaveBank(td)
            yield bank

    def gap_df(self, gappy_bank):
        """ return a gap df from the gappy bank"""
        return gappy_bank.get_gaps_df()

    def uptime_df(self, gappy_bank):
        """ return the uptime dataframe from the gappy bank """
        return gappy_bank.get_uptime_df()

    def uptime_default(self, default_wbank):
        """ return the uptime from the default stream bank. """
        return default_wbank.get_uptime_df()

    # tests
    def test_gaps_length(self, gap_df, gappy_bank):
        """ ensure each of the gaps shows up in df """
        assert isinstance(gap_df, pd.DataFrame)
        assert not gap_df.empty
        group = gap_df.groupby(["network", "station", "location", "channel"])
        sampling_period = gap_df["sampling_period"].iloc[0]
        for gnum, df in group:
            assert len(df) == len(self.gaps)
            dif = abs(df["gap_duration"] - self.durations_timedelta)
            assert (dif < (1.5 * sampling_period)).all()

    def test_gappy_uptime_df(self, uptime_df):
        """ ensure the uptime df is of correct type and accurate """
        assert isinstance(uptime_df, pd.DataFrame)
        gap_duration = sum([x[1] - x[0] for x in self.gaps])
        duration = self.end - self.start
        uptime_percent = (duration - gap_duration) / duration
        assert (abs(uptime_df["availability"] - uptime_percent) < 0.001).all()

    def test_uptime_default(self, uptime_default):
        Ensure the uptime of the basic bank (no gaps) has expected times/channels.
        df = uptime_default
        st = obspy.read()
        assert not df.empty, "uptime df is empty"
        assert len(df) == len(st)
        assert {tr.id for tr in st} == set(obsplus.utils.get_seed_id_series(df))
        assert (df["gap_duration"] == EMPTYTD64).all()

    def test_empty_directory(self, empty_bank):
        """ ensure an empty bank get_gaps returns and empty df with expected
        columns """
        gaps = empty_bank.get_gaps_df()
        assert not len(gaps)
        assert set(WaveBank.gap_columns).issubset(set(gaps.columns))

    def test_kemmerer_uptime(self, kem_fetcher):
        """ ensure the kemmerer bank returns an uptime df"""
        bank = kem_fetcher.waveform_client
        df = bank.get_uptime_df()
        diff = abs(df["uptime"] - df["duration"])
        tolerance = np.timedelta64(1, "s")
        assert (diff < tolerance).all()

    def test_gappy_and_contiguous_uptime(self, gappy_and_contiguous_bank):
        Ensure when there are gappy streams and continguous streams
        get_uptime still returns correct results.
        wbank = gappy_and_contiguous_bank
        index = wbank.read_index()
        uptime = wbank.get_uptime_df()
        # make sure the same seed ids are in the index as uptime df
        seeds_from_index = set(obsplus.utils.get_seed_id_series(index))
        seeds_from_uptime = set(obsplus.utils.get_seed_id_series(uptime))
        assert seeds_from_index == seeds_from_uptime
        assert not uptime.isnull().any().any()