Example #1
0
 def __call__(self, starttime, endtime, buffer, **kwargs):
     """ get start and end times, perform in kernel lookup """
     # get defaults if starttime or endtime is none
     starttime = None if pd.isnull(starttime) else starttime
     endtime = None if pd.isnull(endtime) else endtime
     starttime = to_datetime64(starttime or SMALLDT64)
     endtime = to_datetime64(endtime or LARGEDT64)
     # find out if the query falls within one cached times
     con1 = self.cache.t1 <= starttime
     con2 = self.cache.t2 >= endtime
     con3 = self.cache.kwargs == self._kwargs_to_str(kwargs)
     cached_index = self.cache[con1 & con2 & con3]
     if not len(cached_index):  # query is not cached get it from hdf5 file
         where = _get_kernel_query(starttime.astype(np.int64),
                                   endtime.astype(np.int64), int(buffer))
         raw_index = self._get_index(where, **kwargs)
         # replace "None" with None
         ic = self.bank.index_str
         raw_index.loc[:, ic] = raw_index.loc[:, ic].replace(["None"],
                                                             [None])
         # convert data types used by bank back to those seen by user
         index = raw_index.astype(dict(self.bank._dtypes_output))
         self._set_cache(index, starttime, endtime, kwargs)
     else:
         index = cached_index.iloc[0]["cindex"]
     # trim down index
     con1 = index["starttime"] >= (endtime + buffer)
     con2 = index["endtime"] <= (starttime - buffer)
     return index[~(con1 | con2)]
Example #2
0
def waveform_df():
    """ Create a dataframe with the basic required columns. """
    st = obspy.read()
    cols = list(NSLC) + ["starttime", "endtime"]
    df = pd.DataFrame([tr.stats for tr in st])[cols]
    df["starttime"] = to_datetime64(df["starttime"])
    df["endtime"] = to_datetime64(df["endtime"])
    return df
Example #3
0
    def _get_bulk_args(self,
                       starttime=None,
                       endtime=None,
                       **kwargs) -> bulk_waveform_arg_type:
        """
        Get the bulk waveform arguments based on given start/end times.

        This method also takes into account data availability as contained
        in the stations data.

        Parameters
        ----------
        starttime
            Start times for query.
        endtime
            End times for query.

        Returns
        -------
        List of tuples of the form:
            [(network, station, location, channel, starttime, endtime)]
        """
        station_df = self.station_df.copy()
        inv = station_df[filter_index(station_df, **kwargs)]
        # replace None/Nan with larger number
        inv.loc[inv["end_date"].isnull(), "end_date"] = LARGEDT64
        inv["end_date"] = inv["end_date"].astype("datetime64[ns]")
        # get start/end of the inventory
        inv_start = inv["start_date"].min()
        inv_end = inv["end_date"].max()
        # remove station/channels that dont have data for requested time
        min_time = to_datetime64(starttime, default=inv_start).min()
        max_time = to_datetime64(endtime, default=inv_end).max()
        con1, con2 = (inv["start_date"] > max_time), (inv["end_date"] <
                                                      min_time)
        df = inv[~(con1 | con2)].set_index("seed_id")[list(NSLC)]
        if df.empty:  # return empty list if no data found
            return []
        if isinstance(starttime, pd.Series):
            # Have to get clever here to make sure only active stations get used
            # and indices are not duplicated.
            new_start = starttime.loc[set(starttime.index).intersection(
                df.index)]
            new_end = endtime.loc[set(endtime.index).intersection(df.index)]
            df["starttime"] = new_start.loc[~new_start.index.duplicated()]
            df["endtime"] = new_end.loc[~new_end.index.duplicated()]
        else:
            df["starttime"] = starttime
            df["endtime"] = endtime
        # remove any rows that don't have defined start/end times
        out = df[~(df["starttime"].isnull() | df["endtime"].isnull())]
        # ensure we have UTCDateTime objects
        out["starttime"] = [to_utc(x) for x in out["starttime"]]
        out["endtime"] = [to_utc(x) for x in out["endtime"]]
        # convert to list of tuples and return
        return [tuple(x) for x in out.to_records(index=False)]
Example #4
0
def _filter_starttime_endtime(df, starttime=None, endtime=None):
    """Filter dataframe on starttime and endtime."""
    bool_index = np.ones(len(df), dtype=bool)
    t1 = to_datetime64(starttime) if starttime is not None else SMALLDT64
    t2 = to_datetime64(endtime) if endtime is not None else LARGEDT64
    # get time columns
    start_col = getattr(df, "starttime", getattr(df, "start_date", None))
    end_col = getattr(df, "endtime", getattr(df, "end_date", None))
    in_time = ~((end_col < t1) | (start_col > t2))
    return np.logical_and(bool_index, in_time.values)
Example #5
0
    def yield_waveforms(
        self,
        network: Optional[str] = None,
        station: Optional[str] = None,
        location: Optional[str] = None,
        channel: Optional[str] = None,
        starttime: Optional[utc_able_type] = None,
        endtime: Optional[utc_able_type] = None,
        duration: float = 3600.0,
        overlap: Optional[float] = None,
    ) -> Stream:
        """
        Yield time-series segments.

        Parameters
        ----------
        {get_waveforms_params}
        duration : float
            The duration of the streams to yield. All channels selected
            channels will be included in the waveforms.
        overlap : float
            If duration is used, the amount of overlap in yielded streams,
            added to the end of the waveforms.

        Notes
        -----
        All string parameters can use posix style matching with * and ? chars.

        Total duration of yielded streams = duration + overlap.
        """
        # get times in float format
        starttime = to_datetime64(starttime, 0.0)
        endtime = to_datetime64(endtime, "2999-01-01")
        # read in the whole index df
        index = self.read_index(
            network=network,
            station=station,
            location=location,
            channel=channel,
            starttime=starttime,
            endtime=endtime,
        )
        # adjust start/end times
        starttime = max(starttime, index.starttime.min())
        endtime = min(endtime, index.endtime.max())
        # chunk time and iterate over chunks
        time_chunks = make_time_chunks(starttime, endtime, duration, overlap)
        for t1, t2 in time_chunks:
            t1, t2 = to_datetime64(t1), to_datetime64(t2)
            con1 = (index.starttime - self.buffer) > t2
            con2 = (index.endtime + self.buffer) < t1
            ind = index[~(con1 | con2)]
            if not len(ind):
                continue
            yield self._index2stream(ind, t1, t2)
Example #6
0
 def _get_times(starttime, endtime):
     """Return starttimes and endtimes."""
     # get defaults if starttime or endtime is none
     starttime = None if pd.isnull(starttime) else starttime
     endtime = None if pd.isnull(endtime) else endtime
     starttime = to_datetime64(starttime or SMALLDT64)
     endtime = to_datetime64(endtime or LARGEDT64)
     if starttime is not None and endtime is not None:
         if starttime > endtime:
             msg = "starttime cannot be greater than endtime."
             raise ValueError(msg)
     return starttime, endtime
Example #7
0
    def __call__(
        self,
        time_arg: event_time_type,
        time_before: Optional[float] = None,
        time_after: Optional[float] = None,
        *args,
        **kwargs,
    ) -> obspy.Stream:
        """
        Using a reference time, return a waveforms that encompasses that time.

        Parameters
        ----------
        time_arg
            The argument that will indicate a start time. Can be a one
            length events, and event, a float, or a UTCDatetime object
        time_before
            The time before time_arg to include in waveforms
        time_after
            The time after time_arg to include in waveforms

        Returns
        -------
        obspy.Stream
        """
        tbefore = to_timedelta64(time_before, default=self.time_before)
        tafter = to_timedelta64(time_after, default=self.time_after)
        assert (tbefore is not None) and (tafter is not None)
        # get the reference time from the object
        time = to_datetime64(get_reference_time(time_arg))
        t1 = time - tbefore
        t2 = time + tafter
        return self.get_waveforms(starttime=to_utc(t1),
                                  endtime=to_utc(t2),
                                  **kwargs)
Example #8
0
    def _index_from_iterable(self, iterable, update_time):
        """Iterate over an event iterable and dump to database."""
        events, update_times, paths = [], [], []
        max_mem = self._max_events_in_memory  # this avoids the MRO each loop
        events_remain = False

        for cat, mtime, path in iterable:
            if cat is None:
                continue
            for event in cat:
                events.append(event)
                update_times.append(mtime)
                paths.append(path)
            if len(events) >= max_mem:  # max limit exceeded, dump to db
                events_remain = True
                break
        # add new events to database
        df = obsplus.events.pd._default_cat_to_df(events)
        df["updated"] = to_datetime64(update_times)
        df["path"] = _remove_base_path(pd.Series(paths, dtype=object))
        if len(df):
            df = _time_cols_to_ints(df)
            df_to_write = self._prepare_dataframe(df, EVENT_TYPES_INPUT)
            self._write_update(df_to_write, update_time)
        return events_remain
Example #9
0
 def test_with_nulls(self):
     """Test for handling nulls."""
     test_input = (np.NaN, None, "", 15)
     out = np.array(to_datetime64(test_input))
     # first make sure empty values worked
     assert pd.isnull(out[:3]).all()
     assert out[-1].astype(np.int64) == obspy.UTCDateTime(15)._ns
Example #10
0
def _get_absolute_time(time: Union[time_types, np.ndarray],
                       ref_time: np.ndarray) -> np.ndarray:
    """
    Get an absolute time from a possible reference time.

    Parameters
    ----------
    time
        Can either  be a an absolute time, or a timedelta with respect to
        ref_time.
    ref_time
        The object time is referenced to.
    """
    def _is_time_delta(obj):
        """ return True if an object is a timedelta like thing. """
        if isinstance(obj, (int, float)):
            return True
        dtype = getattr(obj, "dtype", None)
        if np.issubdtype(dtype, np.timedelta64):
            return True
        is_int = np.issubdtype(dtype, np.integer)
        is_float = np.issubdtype(dtype, np.floating)
        if is_int or is_float:
            return True
        return False

    # First try converting to datetime64, if that fails convert to timedelta.
    if _is_time_delta(time):
        dt = ref_time + to_timedelta64(time)
    else:
        dt = to_datetime64(time)
    return np.broadcast_to(dt, np.shape(ref_time))
Example #11
0
 def test_npdatetime64_as_input(self):
     """This should also work on np.datetime64."""
     test_input = np.array((np.datetime64(1000,
                                          "s"), np.datetime64(100, "ns")))
     out = to_datetime64(test_input)
     assert isinstance(out, np.ndarray)
     assert (test_input == out).all()
Example #12
0
 def test_creation_time(self, amplitude, amp_series):
     """Ensure creation time was included."""
     assert amp_series["creation_time"] == to_datetime64(
         amplitude.creation_info.creation_time
     )
     assert amp_series["author"] == amplitude.creation_info.author
     assert amp_series["agency_id"] == amplitude.creation_info.agency_id
Example #13
0
 def test_trim_out_of_existence(self, stream_wf):
     """Tests for trimming out all data."""
     far_out = to_datetime64("2200-01-01")
     wf = stream_wf.trim(starttime=far_out)
     assert len(wf) == 0
     data, stats = wf.data, wf.stats
     assert len(data) == len(stats) == 0
Example #14
0
 def test_pandas_timestamp(self):
     """Timestamps should also work."""
     kwargs = dict(year=2019, month=10, day=11, hour=12)
     ts = pd.Timestamp(**kwargs)
     out = to_datetime64((ts, ))
     expected_out = (ts.to_datetime64(), )
     assert out == expected_out
Example #15
0
 def test_simple(self):
     """Test converting simple UTCDateTimable things"""
     test_input = ("2019-01-10 11-12",
                   obspy.UTCDateTime("2019-01-10T12-12"), 100)
     expected_ns = np.array([obspy.UTCDateTime(x)._ns for x in test_input])
     dt64s = to_datetime64(test_input)
     new_ns = np.array(dt64s).astype(np.int64)
     assert np.equal(expected_ns, new_ns).all()
Example #16
0
 def test_set_picks_dataframe(self, add_picks_from_df, pick_df):
     """verify that it is possible to attach picks from a DataFrame/csv file"""
     assert len(add_picks_from_df) == len(pick_df)
     # Make sure the resulting data is what you expect
     newest = add_picks_from_df.data.reset_index().set_index(
         PDF_IND).iloc[-1]
     assert isinstance(newest["time"], pd.Timestamp)
     pick_time = pick_df.set_index(PDF_IND).loc[newest.name]["time"]
     assert newest.time == to_datetime64(pick_time)
     assert_not_nan(newest.pick_id)
Example #17
0
def _get_waveform_df(stream: wave_type) -> pd.DataFrame:
    """
    Convert a stream of sequence of traces into a datframe.

    Parameters
    ----------
    stream
        The streams to index

    Notes
    -----
    This is private because it is probably not quite polished enough to include
    in the public API. More thought is needed how to do this properly.
    """
    stats_columns = list(NSLC) + ["starttime", "endtime", "sampling_rate"]
    trace_contents = [{i: tr.stats[i] for i in stats_columns} for tr in stream]
    df = pd.DataFrame(trace_contents, columns=stats_columns)
    # ensure time(y) columns have proper
    df["starttime"] = to_datetime64(df["starttime"])
    df["endtime"] = to_datetime64(df["endtime"])
    df["sampling_period"] = to_timedelta64(1 / df["sampling_rate"])
    df["seed_id"] = get_seed_id_series(df)
    df["trace"] = [ObjectWrapper(tr) for tr in stream]
    return df
Example #18
0
 def test_multi_indexed_dataframe(self, add_picks_from_multi_df,
                                  pick_df_multi):
     """
     Verify that it is possible to use a multi-indexed dataframe with the
     pick information
     """
     # Things may be starting to get more complicated than I
     # actually want to deal with here
     assert len(add_picks_from_multi_df) == len(pick_df_multi)
     # Make sure the input data is what you expect
     newest = add_picks_from_multi_df.data.reset_index().set_index(
         PDF_IND).iloc[-1]
     pick_time = (pick_df_multi.reset_index().set_index(PDF_IND).loc[
         newest.name]["time"])
     assert newest.time == to_datetime64(pick_time)
     assert_not_nan(newest.pick_id)
Example #19
0
 def test_npdatetime64_too_large(self):
     """Test np.datetime64s larger than can fit into int64"""
     too_big = np.array([
         np.datetime64("2300-01-01"),
         np.datetime64("2020-01-01"),
         np.datetime64("2500-01-01"),
     ])
     with pytest.warns(UserWarning):
         out = to_datetime64(too_big)
     years = out.astype("M8[Y]")
     assert np.array_equal(
         years,
         [
             np.datetime64("2262", "Y"),
             np.datetime64("2020", "Y"),
             np.datetime64("2262", "Y"),
         ],
     )
Example #20
0
 def test_df_overwrite(self, add_picks_from_df, pick_df):
     """
     Verify that my hack for preserving pick_ids when overwriting picks with a df
     works as expected
     """
     pick_df = deepcopy(pick_df)
     resource_ids = add_picks_from_df.data.pick_id
     pick_df["time"] = to_datetime64(pick_df["time"]) + pd.to_timedelta(
         1, unit="s")
     with pytest.warns(UserWarning, match="existing"):
         out = add_picks_from_df.set_picks(pick_df)
     # Make sure the existing picks were overwritten, not appended
     assert len(out) == len(pick_df)
     # Make sure the times got updated
     pick_df["seed_id_less"] = pick_df["seed_id"].str[:-1]
     pick_df.set_index(list(_INDEX_NAMES), inplace=True)
     assert out.data["time"].sort_values().equals(
         pick_df["time"].sort_values())
     # Make sure the resource_ids haven't changed
     assert (add_picks_from_df.data.pick_id == resource_ids).all()
Example #21
0
def _get_phase_reference_time(fetcher: Fetcher, phase):
    """
    Get reference times to specified phases, apply over all channels in a
    station.
    """
    pha = phase.upper()
    # ensure the pick_df and inventory df exist
    pick_df = fetcher.picks_df
    inv_df = fetcher.station_df
    assert pick_df is not None and inv_df is not None
    # filter dataframes for phase of interest
    assert (pick_df["phase_hint"].str.upper() == pha
            ).any(), f"no {phase} picks found"
    pick_df = pick_df[pick_df["phase_hint"] == pha]
    # merge inventory and pick df together, ensure time is datetime64
    columns = ["time", "station", "event_id"]
    merge = pd.merge(inv_df, pick_df[columns], on="station", how="left")
    merge["time"] = to_datetime64(merge["time"])
    assert merge["seed_id"].astype(bool).all()
    return merge.set_index("seed_id")[["time", "event_id"]]
Example #22
0
 def test_time_dtype(self, time_df):
     """ Test time dtype. """
     out1 = upd.cast_dtypes(time_df, {"time": "ops_datetime"})["time"]
     out2 = to_datetime64(time_df["time"])
     assert (out1 == out2).all()
Example #23
0
 def test_utc_too_large(self):
     """Test a time larger than can fit into int64."""
     too_big = obspy.UTCDateTime("2600-01-01")
     with pytest.warns(UserWarning):
         out = to_datetime64(too_big)
     assert pd.Timestamp(out).year == 2262
Example #24
0
 def last_updated(self) -> Optional[np.datetime64]:
     """
     Get the last time (UTC) that the bank was updated.
     """
     return to_datetime64(self.last_updated_timestamp)
Example #25
0
 def test_zero(self):
     """Tests for input values as 0 or 0.0"""
     dt1 = to_datetime64(0)
     dt2 = to_datetime64(0.0)
     assert dt1.astype(np.int64) == dt2.astype(np.int64) == 0
Example #26
0
    def yield_event_waveforms(
        self,
        time_before: Optional[float] = None,
        time_after: Optional[float] = None,
        reference: Union[str, Callable] = "origin",
        raise_on_fail: bool = True,
    ) -> Tuple[str, Stream]:
        """
        Yield event_id and streams for each event.

        Parameters
        ----------
        time_before
            The time before (in seconds) the reference that will be included
            in the waveforms if possible.
        time_after
            The Time after (in seconds) the reference that will be included
            in the waveforms if possible.
        reference
            A str that indicates how the starttime of the trace should be
            determined. The following are supported:
                origin - use the origin time of the event
                p - use the first p time as the start for each station
                s - use the first s times as the start for each station
            If "p" or "s" is used only streams corresponding to stations with
            the appropriate phase pick will be returned.
        raise_on_fail
            If True, re raise an exception if one is caught during waveform
            fetching, else continue to next event.

        Notes
        -----
        Streams will not be yielded for any event for which a reference time
        cannot be obtained. For example, if reference='S' only events with some
        S picks will be yielded.
        """
        def _check_yield_event_waveform_(reference, ta, tb):
            if not reference.lower() in self.reference_funcs:
                msg = (f"reference of {reference} is not supported. Supported "
                       f"reference arguments are {list(self.reference_funcs)}")
                raise ValueError(msg)
            if not (np.abs(tb) + np.abs(ta)) > np.timedelta64(0, "s"):
                msg = (
                    "time_before and/or time_after must be specified in either "
                    "Fetcher's init or the yield_event_Waveforms call")
                raise ValueError(msg)

        tb = to_timedelta64(time_before, default=self.time_before)
        ta = to_timedelta64(time_after, default=self.time_after)
        _check_yield_event_waveform_(reference, ta, tb)
        # get reference times
        ref_func = self.reference_funcs[reference.lower()]
        reftime_df = ref_func(self)
        # if using a wavebank preload index over entire time-span for speedup
        if isinstance(self.waveform_client, WaveBank) and len(reftime_df):
            mt = reftime_df["time"].min() - tb
            mx = reftime_df["time"].max() + ta
            index = self.waveform_client.read_index(starttime=mt, endtime=mx)
            get_bulk_wf = partial(self._get_bulk_wf, index=index)
        else:
            get_bulk_wf = self._get_bulk_wf
        # iterate each event in the events and yield the waveform
        for event_id, df in reftime_df.groupby("event_id"):
            # make sure ser is either a single datetime or a series of datetimes
            time = to_datetime64(df["time"])
            t1, t2 = time - tb, time + ta
            bulk_args = self._get_bulk_args(starttime=t1, endtime=t2)
            try:
                yield EventStream(event_id, get_bulk_wf(bulk_args))
            except Exception:
                if raise_on_fail:
                    raise
                else:
                    msg = f"Fetcher failed to get waveforms for {event_id}."
                    warnings.warn(msg)
Example #27
0
 def test_nullish_values_returns_default(self):
     """Nullish values should return default values."""
     out1 = to_datetime64(None)
     assert pd.isnull(out1)
     assert out1 is not None
Example #28
0
 def test_tuple_and_list(self):
     """tests for tuples and lists."""
     input1 = ["2020-01-03", obspy.UTCDateTime("2020-01-01").timestamp]
     out1 = to_datetime64(input1)
     out2 = to_datetime64(tuple(input1))
     assert np.all(out1 == out2)
Example #29
0
 def test_series_to_datetimes(self):
     """Series should be convertible to datetimes, return series"""
     ser = pd.Series([10, "2010-01-01"])
     out = to_datetime64(ser)
     assert isinstance(out, pd.Series)
Example #30
0
def merge_traces(st: trace_sequence, inplace=False) -> obspy.Stream:
    """
    An efficient function to merge overlapping data for a stream.

    This function is equivalent to calling merge(1) and split() then returning
    the resulting trace. This means only traces that have overlaps or adjacent
    times will be merged, otherwise they will remain separate traces.

    Parameters
    ----------
    st
        The input stream to merge.
    inplace
        If True st is modified in place.

    Returns
    -------
    A stream with merged traces.
    """
    def _make_trace_df(traces: trace_sequence) -> pd.DataFrame:
        """Create a dataframe form a sequence of traces."""
        # create dataframe of traces and stats (use ns for all time values)
        sortby = ["seed_id", "sampling_rate", "starttime", "endtime"]
        index = _get_waveform_df(traces).sort_values(sortby).reset_index(
            drop=True)
        # create column if trace should be merged into previous trace
        shifted_index = index.shift(1)  # shift all value forward one column
        seed_ids_match = index["seed_id"] == shifted_index["seed_id"]
        index_samp = index["sampling_period"]
        samps_match = index_samp == shifted_index["sampling_period"]
        # not sure why ~ is used rather than flipping comparison, maybe something
        # with NaNs?
        overlap = ~(shifted_index["endtime"] <
                    (index["starttime"] - index_samp))
        index["merge_group"] = (
            ~(seed_ids_match & samps_match & overlap)).cumsum()
        return index

    # checks for early bail out, no merging if one trace or all unique ids
    if len(st) < 2 or len({tr.id for tr in st}) == len(st):
        return st
    if not inplace:
        st = copy.deepcopy(st)
    df = _make_trace_df(st)
    # get a series of properties by group
    group = df.groupby("merge_group")
    t1, t2 = group["starttime"].min(), group["endtime"].max()
    sampling_periods = group["sampling_period"].max()
    gsize = group.size()  # number of traces in each group
    gnum_one = gsize[gsize == 1].index  # groups with 1 trace
    gnum_gt_one = gsize[gsize > 1].index  # group num with > 1 trace
    # use this to avoid pandas groupbys
    merged_traces = [
        x.data for x in df[df["merge_group"].isin(gnum_one)]["trace"]
    ]
    for gnum in gnum_gt_one:  # any groups w/ more than one trace
        ind = df.merge_group == gnum
        gtraces = [x.data for x in df.trace[ind]]  # unpack traces
        dtype = _get_dtype(gtraces)
        # create list of time, y values, and marker for when values are filled
        sampling_period = sampling_periods[gnum].to_timedelta64()
        start, stop = t1[gnum].to_datetime64(), t2[gnum].to_datetime64()
        t = np.arange(start=start,
                      stop=stop + sampling_period,
                      step=sampling_period)
        y = np.empty(np.shape(t), dtype=dtype)
        has_filled = np.zeros(len(t), dtype=np.int32)
        for tr in gtraces:
            start_ind = np.searchsorted(t, to_datetime64(tr.stats.starttime))
            y[start_ind:start_ind + len(tr.data)] = tr.data
            has_filled[start_ind:start_ind + len(tr.data)] = 1
        gtraces[0].data = y
        merged_traces.append(gtraces[0])
        assert np.all(has_filled), "some values not filled in!"
    return obspy.Stream(traces=merged_traces)