def get_waveforms_bulk(stream: Stream, bulk: bulk_waveform_arg_type, **kwargs) -> Stream: """ Get a large number of waveforms with a bulk request. Parameters ---------- stream A stream object. bulk A list of any number of tuples containing the following: (network, station, location, channel, starttime, endtime). """ # get a dataframe of stream contents index = _get_waveform_df(stream) # get a dataframe of the bulk arguments, convert time to datetime64 request_df = get_waveform_bulk_df(bulk) if not len(request_df): # return empty string if no bulk reqs provided return obspy.Stream() # get unique times and check conditions for string columns unique_times = np.unique(request_df[["starttime", "endtime"]].values, axis=0) traces = [] for (t1, t2) in unique_times: sub = _filter_index_to_bulk((t1, t2), index_df=index, bulk_df=request_df) new = obspy.Stream(traces=[x.data for x in sub["trace"]]).slice( starttime=to_utc(t1), endtime=to_utc(t2)) traces.extend(new.traces) return merge_traces(obspy.Stream(traces=traces))
def test_merge_bingham_st(self, bingham_stream): """Ensure the bingham stream can be merged""" out = merge_traces(bingham_stream, inplace=False) cols = list(NSLC) + ["starttime", "endtime", "gap_time", "gap_samps"] gaps_df = pd.DataFrame(out.get_gaps(), columns=cols) # overlaps are indicated by negative gap times assert (gaps_df["gap_time"] > 0).all()
def get_waveforms_bulk( self, bulk: bulk_waveform_arg_type, index: Optional[pd.DataFrame] = None, **kwargs, ) -> Stream: """ Get a large number of waveforms with a bulk request. Parameters ---------- bulk A list of any number of lists containing the following: (network, station, location, channel, starttime, endtime). index A dataframe returned by read_index. Enables calling code to only read the index from disk once for repetitive calls. """ df = get_waveform_bulk_df(bulk) if not len(df): return obspy.Stream() # get index and filter to temporal extents of request. t_min, t_max = df["starttime"].min(), df["endtime"].max() if index is not None: ind = index[~((index.starttime > t_max) | (index.endtime < t_min))] else: ind = self.read_index(starttime=t_min, endtime=t_max) # for each unique time, apply other filtering conditions and get traces unique_times = np.unique(df[["starttime", "endtime"]].values, axis=0) traces = [] for utime in unique_times: sub = _filter_index_to_bulk(utime, ind, df) traces += self._index2stream(sub, utime[0], utime[1], merge=False).traces return merge_traces(obspy.Stream(traces=traces), inplace=True)
def test_traces_with_different_sampling_rates(self): """ traces with different sampling_rates should be left alone. """ st1 = obspy.read() st2 = obspy.read() for tr in st2: tr.stats.sampling_rate = tr.stats.sampling_rate * 2 st_in = st1 + st2 st_out = merge_traces(st_in) assert st_out == st_in
def test_traces_with_overlap(self): """ Trace with overlap should be merged together. """ st1 = obspy.read() st2 = obspy.read() for tr1, tr2 in zip(st1, st2): tr2.stats.starttime = tr1.stats.starttime + 10 st_in = st1 + st2 out = merge_traces(st_in) assert out == st_in.merge(1).split()
def test_adjacent_traces(self): """ Traces that are one sample away in time should be merged together. """ # create stream with traces adjacent in time and merge together st1 = obspy.read() st2 = obspy.read() for tr1, tr2 in zip(st1, st2): tr2.stats.starttime = tr1.stats.endtime + 1.0 / tr2.stats.sampling_rate st_in = st1 + st2 out = merge_traces(st_in) assert len(out) == 3 # should be the same as merge and split assert out == st_in.merge(1).split()
def test_array_data_type(self): """ The array datatype should not change. """ # test floats st1 = obspy.read() st2 = obspy.read() st_out1 = merge_traces(st1 + st2) for tr1, tr2 in zip(st_out1, st1): assert tr1.data.dtype == tr2.data.dtype # tests ints st3 = self.convert_stream_dtype(st1, np.int32) st4 = self.convert_stream_dtype(st1, np.int32) st_out2 = merge_traces(st3 + st4) for tr in st_out2: assert tr.data.dtype == np.int32 # def test one int one float st_out3 = merge_traces(st1 + st3) for tr in st_out3: assert tr.data.dtype == np.float64 # ensure order of traces doesn't mater for dtypes st_out4 = merge_traces(st3 + st1) for tr in st_out4: assert tr.data.dtype == np.float64
def _prep_output_stream(self, st, starttime=None, endtime=None) -> obspy.Stream: """ Prepare waveforms object for output by trimming to desired times, merging channels, and attaching responses. """ if not len(st): return st starttime = starttime or min([x.stats.starttime for x in st]) endtime = endtime or max([x.stats.endtime for x in st]) # trim st.trim(starttime=to_utc(starttime), endtime=to_utc(endtime)) return merge_traces(st, inplace=True).sort()
def put_waveforms(self, stream: Union[obspy.Stream, obspy.Trace], name=None, update_index=True): """ Add the waveforms in a waveforms to the bank. Parameters ---------- stream An obspy waveforms object to add to the bank name Name of file, if None it will be determined based on contents update_index Flag to indicate whether or not to update the waveform index after writing the new events. Default is True. """ self.ensure_bank_path_exists(create=True) st_dic = defaultdict(lambda: []) # make sure we have a trace iterable stream = [stream] if isinstance(stream, obspy.Trace) else stream # iter the waveforms and group by common paths paths = [] for tr in stream: summary = _summarize_trace( tr, name=name, path_struct=self.path_structure, name_struct=self.name_structure, ) path = self.bank_path / summary["path"] st_dic[path].append(tr) # iter all the unique paths and save for path, tr_list in st_dic.items(): # make the parent directories if they dont exist path.parent.mkdir(exist_ok=True, parents=True) stream = obspy.Stream(traces=tr_list).split() # load the waveforms if the file already exists if path.exists(): st_existing = obspy.read(str(path)) stream += st_existing # polish streams and write stream = merge_traces(stream, inplace=True) stream.write(str(path), format="mseed") paths.append(path) # update the index as the contents have changed if st_dic and update_index: self.update_index(paths=paths)
def test_merge_high_sampling_rate(self, gapped_high_sample_stream): """Ensure high sampling rate overlapped data still work.""" # if this runs the test passes due to unmerged assert in function merge_traces(gapped_high_sample_stream)
def test_identical_streams(self): """ ensure passing identical streams performs de-duplication. """ st = obspy.read() st2 = obspy.read() + st + obspy.read() st_out = merge_traces(st2) assert st_out == st