예제 #1
0
파일: fetcher.py 프로젝트: niowniow/obsplus
    def __call__(
        self,
        time_arg: event_time_type,
        time_before: Optional[float] = None,
        time_after: Optional[float] = None,
        *args,
        **kwargs,
    ) -> obspy.Stream:
        """
        Using a reference time, return a waveforms that encompasses that time.

        Parameters
        ----------
        time_arg
            The argument that will indicate a start time. Can be a one
            length events, and event, a float, or a UTCDatetime object
        time_before
            The time before time_arg to include in waveforms
        time_after
            The time after time_arg to include in waveforms

        Returns
        -------
        obspy.Stream
        """
        tbefore = to_timedelta64(time_before, default=self.time_before)
        tafter = to_timedelta64(time_after, default=self.time_after)
        assert (tbefore is not None) and (tafter is not None)
        # get the reference time from the object
        time = to_datetime64(get_reference_time(time_arg))
        t1 = time - tbefore
        t2 = time + tafter
        return self.get_waveforms(starttime=t1, endtime=t2, **kwargs)
예제 #2
0
 def bing_pick_bulk(self, bingham_catalog):
     """ Create a dataframe from the bingham picks. """
     picks = obsplus.picks_to_df(bingham_catalog)
     df = picks[list(NSLC)]
     df["starttime"] = picks["time"] - to_timedelta64(1.011)
     df["endtime"] = picks["time"] + to_timedelta64(7.011)
     return df
예제 #3
0
 def _add_starttime_end(df):
     """ Add the time window start and end """
     # fill references with start times of phases if empty
     df.loc[df["twindow_ref"].isnull(), "twindow_ref"] = df["time"]
     # Fill NaNs and convert to timedelta
     # Double type conversion is necessary to get everything into the same dtype
     twindow_start = (df["twindow_start"].fillna(0.0).astype(
         "float64").astype("timedelta64[ns]"))
     twindow_end = (df["twindow_end"].fillna(0.0).astype("float64").astype(
         "timedelta64[ns]"))
     # Determine start/end times of phase windows
     df["starttime"] = df["twindow_ref"] - twindow_start
     df["endtime"] = df["twindow_ref"] + twindow_end
     # add travel time
     df["travel_time"] = df["time"] - reftime
     # get earliest s phase by station
     _s_start = df.groupby(list(NSLC[:2])).apply(_get_earliest_s_time)
     s_start = _s_start.rename("s_start").to_frame().reset_index()
     # merge back into pick_df, use either defined window or S phase, whichever
     # is smaller.
     dd2 = df.merge(s_start, on=["network", "station"], how="left")
     # get dataframe indices for P
     p_inds = df[df.phase_hint == "P"].index
     # make sure P end times don't exceed s start times
     endtime_or_s_start = dd2[["s_start", "endtime"]].min(axis=1,
                                                          skipna=True)
     df.loc[p_inds, "endtime"] = endtime_or_s_start[p_inds]
     duration = abs(df["endtime"] - df["starttime"])
     # Make sure all value are under phase duration time, else truncate them
     if max_duration is not None:
         max_dur = to_timedelta64(_get_extrema_like_df(df, max_duration))
         larger_than_max = duration > max_dur
         df.loc[larger_than_max,
                "endtime"] = df["starttime"] + to_timedelta64(max_duration)
     # Make sure all values are at least min_phase_duration, else expand them
     if min_duration is not None:
         min_dur = to_timedelta64(_get_extrema_like_df(df, min_duration))
         small_than_min = duration < min_dur
         df.loc[small_than_min, "endtime"] = df["starttime"] + min_dur
     # sanity checks
     assert (df["endtime"] >= df["starttime"]).all()
     assert not (df["starttime"].isnull()).any()
     return df
예제 #4
0
파일: fetcher.py 프로젝트: niowniow/obsplus
    def yield_event_waveforms(
        self,
        time_before: Optional[float] = None,
        time_after: Optional[float] = None,
        reference: Union[str, Callable] = "origin",
        raise_on_fail: bool = True,
    ) -> Tuple[str, Stream]:
        """
        Yield event_id and streams for each event in the events.

        Parameters
        ----------
        time_before
            The time before (in seconds) the reference that will be included
            in the waveforms if possible.
        time_after
            The Time after (in seconds) the reference that will be included
            in the waveforms if possible.
        reference
            A str that indicates how the starttime of the trace should be
            determined. The following are supported:
                origin - use the origin time of the event for each channel
                p - use the first p times as the start of the station traces
                s - use the first s times as the start of the station traces
            If a station doesn't have p or s picks and "p" or "s" is used,
            it's streams will not be returned.
        raise_on_fail
            If True, re raise and exception if one is caught during waveform
            fetching, else continue to next event.

        Yields
        ------
        obspy.Stream
        """
        assert reference.lower() in self.reference_funcs
        tb = to_timedelta64(time_before, default=self.time_before)
        ta = to_timedelta64(time_after, default=self.time_after)
        assert (tb is not None) and (ta is not None)
        # get reference times
        event_ids = self.event_df.event_id.values
        reftimes = {
            x: self.reference_funcs[reference](self, x)
            for x in event_ids
        }
        # if using a wavebank preload index over entire time-span for speedup
        if isinstance(self.waveform_client, WaveBank):
            mt = min([
                x.min() if hasattr(x, "min") else x for x in reftimes.values()
            ])
            mx = max([
                x.max() if hasattr(x, "max") else x for x in reftimes.values()
            ])
            index = self.waveform_client.read_index(starttime=mt, endtime=mx)
            get_bulk_wf = partial(self._get_bulk_wf, index=index)
        else:
            get_bulk_wf = self._get_bulk_wf
        # iterate each event in the events and yield the waveform
        for event_id in event_ids:
            # make sure ser is either a single datetime or a series of datetimes
            ti_ = to_datetime64(reftimes[event_id])
            bulk_args = self._get_bulk_arg(starttime=ti_ - tb,
                                           endtime=ti_ + ta)
            try:
                yield EventStream(event_id, get_bulk_wf(bulk_args))
            except Exception:
                if raise_on_fail:
                    raise
예제 #5
0
class TestGetGaps:
    """ test that the get_gaps method returns info about gaps """

    start = UTC("2017-09-18")
    end = UTC("2017-09-28")
    sampling_rate = 1

    gaps = [
        (UTC("2017-09-18T18-00-00"), UTC("2017-09-18T19-00-00")),
        (UTC("2017-09-18T20-00-00"), UTC("2017-09-18T20-00-15")),
        (UTC("2017-09-20T01-25-35"), UTC("2017-09-20T01-25-40")),
        (UTC("2017-09-21T05-25-35"), UTC("2017-09-25T10-36-42")),
    ]

    durations = np.array([y - x for x, y in gaps])

    durations_timedelta = np.array([to_timedelta64(float(x)) for x in durations])

    overlap = 0

    def _make_gappy_archive(self, path):
        """ Create the gappy archive defined by params in class. """
        ArchiveDirectory(
            path,
            self.start,
            self.end,
            self.sampling_rate,
            gaps=self.gaps,
            overlap=self.overlap,
        ).create_directory()
        return path

    # fixtures
    @pytest.fixture(scope="class")
    def gappy_dir(self, class_tmp_dir):
        """ create a directory that has gaps in it """
        self._make_gappy_archive(join(class_tmp_dir, "temp1"))
        return class_tmp_dir

    @pytest.fixture(scope="class")
    def gappy_bank(self, gappy_dir):
        """ init a sbank on the gappy data """
        bank = WaveBank(gappy_dir)
        # make sure index is updated after gaps are introduced
        if os.path.exists(bank.index_path):
            os.remove(bank.index_path)
        bank.update_index()
        return bank

    @pytest.fixture()
    def gappy_and_contiguous_bank(self, tmp_path):
        """ Create a directory with gaps and continuous data """
        # first create directory with gaps
        self._make_gappy_archive(tmp_path)
        # first write data with no gaps
        st = obspy.read()
        for num, tr in enumerate(st):
            tr.stats.station = "GOOD"
            tr.write(str(tmp_path / f"good_{num}.mseed"), "mseed")
        return WaveBank(tmp_path).update_index()

    @pytest.fixture(scope="class")
    def empty_bank(self):
        """ create a Sbank object initated on an empty directory """
        with tempfile.TemporaryDirectory() as td:
            bank = WaveBank(td)
            yield bank

    @pytest.fixture(scope="class")
    def gap_df(self, gappy_bank):
        """ return a gap df from the gappy bank"""
        return gappy_bank.get_gaps_df()

    @pytest.fixture(scope="class")
    def uptime_df(self, gappy_bank):
        """ return the uptime dataframe from the gappy bank """
        return gappy_bank.get_uptime_df()

    @pytest.fixture()
    def uptime_default(self, default_wbank):
        """ return the uptime from the default stream bank. """
        return default_wbank.get_uptime_df()

    # tests
    def test_gaps_length(self, gap_df, gappy_bank):
        """ ensure each of the gaps shows up in df """
        assert isinstance(gap_df, pd.DataFrame)
        assert not gap_df.empty
        group = gap_df.groupby(["network", "station", "location", "channel"])
        sampling_period = gap_df["sampling_period"].iloc[0]
        for gnum, df in group:
            assert len(df) == len(self.gaps)
            dif = abs(df["gap_duration"] - self.durations_timedelta)
            assert (dif < (1.5 * sampling_period)).all()

    def test_gappy_uptime_df(self, uptime_df):
        """ ensure the uptime df is of correct type and accurate """
        assert isinstance(uptime_df, pd.DataFrame)
        gap_duration = sum([x[1] - x[0] for x in self.gaps])
        duration = self.end - self.start
        uptime_percent = (duration - gap_duration) / duration
        assert (abs(uptime_df["availability"] - uptime_percent) < 0.001).all()

    def test_uptime_default(self, uptime_default):
        """
        Ensure the uptime of the basic bank (no gaps) has expected times/channels.
        """
        df = uptime_default
        st = obspy.read()
        assert not df.empty, "uptime df is empty"
        assert len(df) == len(st)
        assert {tr.id for tr in st} == set(obsplus.utils.get_seed_id_series(df))
        assert (df["gap_duration"] == EMPTYTD64).all()

    def test_empty_directory(self, empty_bank):
        """ ensure an empty bank get_gaps returns and empty df with expected
        columns """
        gaps = empty_bank.get_gaps_df()
        assert not len(gaps)
        assert set(WaveBank.gap_columns).issubset(set(gaps.columns))

    def test_kemmerer_uptime(self, kem_fetcher):
        """ ensure the kemmerer bank returns an uptime df"""
        bank = kem_fetcher.waveform_client
        df = bank.get_uptime_df()
        diff = abs(df["uptime"] - df["duration"])
        tolerance = np.timedelta64(1, "s")
        assert (diff < tolerance).all()

    def test_gappy_and_contiguous_uptime(self, gappy_and_contiguous_bank):
        """
        Ensure when there are gappy streams and continguous streams
        get_uptime still returns correct results.
        """
        wbank = gappy_and_contiguous_bank
        index = wbank.read_index()
        uptime = wbank.get_uptime_df()
        # make sure the same seed ids are in the index as uptime df
        seeds_from_index = set(obsplus.utils.get_seed_id_series(index))
        seeds_from_uptime = set(obsplus.utils.get_seed_id_series(uptime))
        assert seeds_from_index == seeds_from_uptime
        assert not uptime.isnull().any().any()