def get_waveforms_bulk(stream: Stream, bulk: bulk_waveform_arg_type, **kwargs) -> Stream: """ Get a large number of waveforms with a bulk request. Parameters ---------- stream A stream object. bulk A list of any number of tuples containing the following: (network, station, location, channel, starttime, endtime). """ # get a dataframe of stream contents index = _get_waveform_df(stream) # get a dataframe of the bulk arguments, convert time to datetime64 request_df = get_waveform_bulk_df(bulk) if not len(request_df): # return empty string if no bulk reqs provided return obspy.Stream() # get unique times and check conditions for string columns unique_times = np.unique(request_df[["starttime", "endtime"]].values, axis=0) traces = [] for (t1, t2) in unique_times: sub = _filter_index_to_bulk((t1, t2), index_df=index, bulk_df=request_df) new = obspy.Stream(traces=[x.data for x in sub["trace"]]).slice( starttime=to_utc(t1), endtime=to_utc(t2)) traces.extend(new.traces) return merge_traces(obspy.Stream(traces=traces))
def get_waveforms_bulk( self, bulk: bulk_waveform_arg_type, index: Optional[pd.DataFrame] = None, **kwargs, ) -> Stream: """ Get a large number of waveforms with a bulk request. Parameters ---------- bulk A list of any number of lists containing the following: (network, station, location, channel, starttime, endtime). index A dataframe returned by read_index. Enables calling code to only read the index from disk once for repetitive calls. """ df = get_waveform_bulk_df(bulk) if not len(df): return obspy.Stream() # get index and filter to temporal extents of request. t_min, t_max = df["starttime"].min(), df["endtime"].max() if index is not None: ind = index[~((index.starttime > t_max) | (index.endtime < t_min))] else: ind = self.read_index(starttime=t_min, endtime=t_max) # for each unique time, apply other filtering conditions and get traces unique_times = np.unique(df[["starttime", "endtime"]].values, axis=0) traces = [] for utime in unique_times: sub = _filter_index_to_bulk(utime, ind, df) traces += self._index2stream(sub, utime[0], utime[1], merge=False).traces return merge_traces(obspy.Stream(traces=traces), inplace=True)
def test_dataframe_extra_column(self, bulk_df): """The dataframe should work even with out of order/extra columns.""" df = bulk_df.copy() df["bob"] = "lightening" # reverse column order df = df[reversed(list(df.columns))] out = get_waveform_bulk_df(df) assert isinstance(out, pd.DataFrame) assert len(out) == len(self.bulk1) == len(bulk_df) # the new columns should have been dropped cols = list(WAVEFORM_REQUEST_DTYPES) assert list(out.columns) == cols
def test_missing_column_raises(self, bulk_df): """A missing column should raise.""" df = bulk_df.drop(columns=["network"]) with pytest.raises(ValidationError, match="network"): get_waveform_bulk_df(df)
def test_dataframe(self, bulk_df): """Ensure a datframe with no extra columns works.""" out = get_waveform_bulk_df(bulk_df) assert isinstance(out, pd.DataFrame) assert len(out) == len(self.bulk1) == len(bulk_df)
def test_tuple(self): """Ensure standard tuples produce bulk df.""" out = get_waveform_bulk_df(self.bulk1) assert isinstance(out, pd.DataFrame) assert len(out) == len(self.bulk1)
def bulk_df(self): """Create a dataframe from bulk.""" df = pd.DataFrame(self.bulk1, columns=list(WAVEFORM_REQUEST_DTYPES)) out = get_waveform_bulk_df(df) return out