Ejemplo n.º 1
0
    def get_waveforms(
        self,
        network=None,
        station=None,
        location=None,
        channel=None,
        starttime=None,
        endtime=None,
    ) -> obspy.Stream:
        """
        Get waveforms from the cache, read from disk and cache if needed.

        See obplus.WaveBank.get_waveforms for param descriptions.
        """
        filt = filter_index(self.index, network, station, location, channel,
                            starttime, endtime)
        ser = self.index[filt].set_index("unique_key")["st_call"]
        # drop duplicates
        ser = ser[~ser.index.duplicated()]
        # no waveforms found, return empty waveforms
        if not len(ser):
            return obspy.Stream()

        st = reduce(add, (x() for x in ser))
        if starttime is not None or endtime is not None:
            # use start/endtime or set far out constants
            starttime = starttime or 0
            endtime = endtime or 32503680000
            return st.trim(starttime=starttime, endtime=endtime)
        else:
            return st
Ejemplo n.º 2
0
 def _func(time, ind, df):
     """ return waveforms from df of bulk parameters """
     match_chars = {"*", "?", "[", "]"}
     t1, t2 = time[0], time[1]
     # filter index based on start/end times
     in_time = ~((ind["starttime"] > t2) | (ind["endtime"] < t1))
     ind = ind[in_time]
     # create indices used to load data
     ar = np.ones(len(ind))  # indices of ind to use to load data
     df = df[(df.t1 == time[0]) & (df.t2 == time[1])]
     # determine which columns use any matching or other select features
     uses_matches = [_column_contains(df[x], match_chars) for x in NSLC]
     match_ar = np.array(uses_matches).any(axis=0)
     df_match = df[match_ar]
     df_no_match = df[~match_ar]
     # handle columns that need matches (more expensive)
     if not df_match.empty:
         match_bulk = df_match.to_records(index=False)
         mar = np.array(
             [filter_index(ind,
                           *tuple(b)[:4]) for b in match_bulk])
         ar = np.logical_and(ar, mar.any(axis=0))
     # handle columns that do not need matches
     if not df_no_match.empty:
         nslc1 = set(get_seed_id_series(df_no_match))
         nslc2 = get_seed_id_series(ind)
         ar = np.logical_and(ar, nslc2.isin(nslc1))
     return self._index2stream(ind[ar], t1, t2)
Ejemplo n.º 3
0
 def _func(time, ind, df, st):
     """ return waveforms from df of bulk parameters """
     match_chars = {"*", "?", "[", "]"}
     ar = np.ones(len(ind))  # indices of ind to use to load data
     t1, t2 = time[0], time[1]
     df = df[(df.t1 == time[0]) & (df.t2 == time[1])]
     # determine which columns use any matching or other select features
     uses_matches = [_column_contains(df[x], match_chars) for x in NSLC]
     match_ar = np.array(uses_matches).any(axis=0)
     df_match = df[match_ar]
     df_no_match = df[~match_ar]
     # handle columns that need matches (more expensive)
     if not df_match.empty:
         match_bulk = df_match.to_records(index=False)
         mar = np.array(
             [filter_index(ind,
                           *tuple(b)[:4]) for b in match_bulk])
         ar = np.logical_and(ar, mar.any(axis=0))
     # handle columns that do not need matches
     if not df_no_match.empty:
         nslc1 = set(get_nslc_series(df_no_match))
         nslc2 = get_nslc_series(ind)
         ar = np.logical_and(ar, nslc2.isin(nslc1))
     # get a list of used traces, combine and trim
     st = obspy.Stream([x for x, y in zip(st, ar) if y])
     return st.slice(starttime=UTC(t1), endtime=UTC(t2))
Ejemplo n.º 4
0
 def test_filter_index(self, crandall_dataset):
     """ Tests for filtering index with filter index function. """
     # this is mainly here to test the time filtering, because the bank
     # operations pass this of to the HDF5 kernel.
     index = crandall_dataset.waveform_client.read_index(network="UU")
     t1 = index.starttime.mean()
     t2 = index.endtime.max()
     kwargs = dict(network="UU", station="*", location="*", channel="*")
     bool_ind = filter_index(index, starttime=t1, endtime=t2, **kwargs)
     assert (~np.logical_not(bool_ind)).any()
Ejemplo n.º 5
0
def stream_bulk_split(st: Stream,
                      bulk: List[waveform_request_type]) -> List[Stream]:
    """
    Split a stream into a list of streams that meet requirements in bulk.

    This is similar to the get_waveforms_bulk methods of waveform_client, but
    rather than merging any overlapping data it is returned in a list of traces.

    Parameters
    ----------
    st
        A stream object
    bulk
        A bulk request. Wildcards not currently supported on str params.

    Returns
    -------
    List of traces, each meeting the corresponding request in bulk.
    """
    # return nothing if empty bulk or stream args
    bulk = _get_bulk(bulk)
    if not bulk or len(st) == 0:
        return []

    # # get dataframe of stream contents
    sdf = _stream_data_to_df(st)
    # iterate stream, return output
    out = []
    for barg in bulk:
        assert len(
            barg) == 6, f"{barg} is not a valid bulk arg, must have len 6"
        need = filter_index(sdf, *barg)
        traces = [tr for tr, bo in zip(st, need) if bo]
        new_st = obspy.Stream(traces)
        t1, t2 = to_utc(barg[-2]), to_utc(barg[-1])
        new = new_st.slice(starttime=t1, endtime=t2)
        if new is None or not len(new):
            out.append(obspy.Stream())
            continue
        new = merge_traces(new)
        out.append(new)
    assert len(out) == len(bulk), "output is not the same len as stream list"
    return out
Ejemplo n.º 6
0
    def read_index(
        self,
        network: Optional[str] = None,
        station: Optional[str] = None,
        location: Optional[str] = None,
        channel: Optional[str] = None,
        starttime: Optional[utc_time_type] = None,
        endtime: Optional[utc_time_type] = None,
        **kwargs,
    ) -> pd.DataFrame:
        """
        Return a dataframe of the index, optionally applying filters.

        Parameters
        ----------
        {waveform_params}
        kwargs
            kwargs are passed to pandas.read_hdf function
        """
        self.ensure_bank_path_exists()
        if starttime is not None and endtime is not None:
            if starttime > endtime:
                msg = f"starttime cannot be greater than endtime"
                raise ValueError(msg)
        if not os.path.exists(self.index_path):
            self.update_index()
        # if no file was created (dealing with empty bank) return empty index
        if not os.path.exists(self.index_path):
            return pd.DataFrame(columns=self.index_columns)
        # grab index from cache
        index = self._index_cache(starttime,
                                  endtime,
                                  buffer=self.buffer,
                                  **kwargs)
        # filter and return
        filt = filter_index(index,
                            network=network,
                            station=station,
                            location=location,
                            channel=channel)
        return index[filt]
Ejemplo n.º 7
0
    def _get_bulk_arg(self, starttime=None, endtime=None, **kwargs) -> list:
        """ get the argument passed to get_waveforms_bulk, see
        obspy.fdsn.client for more info """
        station_df = self.station_df.copy()
        inv = station_df[filter_index(station_df, **kwargs)]
        # replace None/Nan with larger number
        inv.loc[inv["end_date"].isnull(), "end_date"] = LARGEDT64
        inv["end_date"] = inv["end_date"].astype("datetime64[ns]")
        # remove station/channels that dont have data for requested time
        starttime = to_datetime64(starttime, default=inv["start_date"].min())
        endtime = to_datetime64(endtime, default=inv["end_date"].max())
        con1, con2 = (inv["start_date"] > endtime), (inv["end_date"] <
                                                     starttime)

        inv = inv[~(con1 | con2)]
        df = inv[list(NSLC)]
        if df.empty:  # return empty list if no data found
            return []
        df.loc[:, "starttime"] = starttime
        df.loc[:, "endtime"] = endtime
        # remove any rows that don't have defined start/end times
        out = df[(~df["starttime"].isnull()) & (~df["endtime"].isnull())]
        # convert to list of tuples and return
        return [tuple(x) for x in out.to_records(index=False)]
Ejemplo n.º 8
0
 def _get_bulk_arg(self, starttime=None, endtime=None, **kwargs) -> list:
     """ get the argument passed to get_waveforms_bulk, see
     obspy.fdsn.client for more info """
     station_df = self.station_df.copy()
     inv = station_df[filter_index(station_df, **kwargs)]
     # replace None/Nan with larger number
     null_inds = inv["end_date"].isnull()
     inv.loc[null_inds, "end_date"] = far_out_time
     # remove station/channels that dont have data for requested time
     starttime = starttime if starttime is not None else inv.start_date.min(
     )
     endtime = endtime if endtime is not None else inv.end_date.max()
     con1, con2 = (inv["start_date"] > endtime), (inv["end_date"] <
                                                  starttime)
     inv = inv[~(con1 | con2)]
     df = inv[list(NSLC)]
     if df.empty:  # return empty list if no data found
         return []
     df.loc[:, "starttime"] = starttime
     df.loc[:, "endtime"] = endtime
     # remove any rows that don't have defined start/end times
     df = df[(~df.starttime.isnull()) & (~df.endtime.isnull())]
     # convert to list of tuples and return
     return [tuple(x) for x in df.to_records(index=False)]