def _func(time, ind, df, st): """ return waveforms from df of bulk parameters """ match_chars = {"*", "?", "[", "]"} ar = np.ones(len(ind)) # indices of ind to use to load data t1, t2 = time[0], time[1] df = df[(df.t1 == time[0]) & (df.t2 == time[1])] # determine which columns use any matching or other select features uses_matches = [_column_contains(df[x], match_chars) for x in NSLC] match_ar = np.array(uses_matches).any(axis=0) df_match = df[match_ar] df_no_match = df[~match_ar] # handle columns that need matches (more expensive) if not df_match.empty: match_bulk = df_match.to_records(index=False) mar = np.array( [filter_index(ind, *tuple(b)[:4]) for b in match_bulk]) ar = np.logical_and(ar, mar.any(axis=0)) # handle columns that do not need matches if not df_no_match.empty: nslc1 = set(get_seed_id_series(df_no_match)) nslc2 = get_seed_id_series(ind) ar = np.logical_and(ar, nslc2.isin(nslc1)) # get a list of used traces, combine and trim st = obspy.Stream([x for x, y in zip(st, ar) if y]) return st.slice(starttime=to_utc(t1), endtime=to_utc(t2))
def _index2stream(self, index, starttime=None, endtime=None, attach_response=False) -> Stream: """ return the waveforms in the index """ # get abs path to each datafame files: pd.Series = (self.bank_path + index.path).unique() # make sure start and endtimes are in UTCDateTime starttime = to_utc(starttime) if starttime else None endtime = to_utc(endtime) if endtime else None # iterate the files to read and try to load into waveforms kwargs = dict(format=self.format, starttime=starttime, endtime=endtime) func = partial(_try_read_stream, **kwargs) stt = obspy.Stream() chunksize = len(files) / self._max_workers for st in self._map(func, files, chunksize=chunksize): if st is not None: stt += st # sort out nullish nslc codes stt = replace_null_nlsc_codes(stt) # filter out any traces not in index (this can happen when files hold # multiple traces). nslc = set(get_seed_id_series(index)) stt.traces = [x for x in stt if x.id in nslc] # trim, merge, attach response stt = self._prep_output_stream(stt, starttime, endtime, attach_response) return stt
def stream_bulk_split(st: Stream, bulk: List[waveform_request_type]) -> List[Stream]: """ Split a stream into a list of streams that meet requirements in bulk. This is similar to the get_waveforms_bulk methods of waveform_client, but rather than merging any overlapping data it is returned in a list of traces. Parameters ---------- st A stream object bulk A bulk request. Wildcards not currently supported on str params. Returns ------- List of traces, each meeting the corresponding request in bulk. """ # return nothing if empty bulk or stream args bulk = _get_bulk(bulk) if not bulk or len(st) == 0: return [] # # get dataframe of stream contents sdf = _stream_data_to_df(st) # iterate stream, return output out = [] for barg in bulk: assert len( barg) == 6, f"{barg} is not a valid bulk arg, must have len 6" need = filter_index(sdf, *barg) traces = [tr for tr, bo in zip(st, need) if bo] new_st = obspy.Stream(traces) t1, t2 = to_utc(barg[-2]), to_utc(barg[-1]) new = new_st.slice(starttime=t1, endtime=t2) if new is None or not len(new): out.append(obspy.Stream()) continue new = merge_traces(new) out.append(new) assert len(out) == len(bulk), "output is not the same len as stream list" return out
def test_starttime_origin_time_seperation(self, dar_attached_picks): """ ensure the start of the trace and start of the events arent too far off """ dar = dar_attached_picks cat = dar.attrs["events"] for ev in cat: rid = ev.resource_id time = to_datetime64(ev.origins[-1].time) dd = dar[dar.stream_id == rid] assert dd.origin_time.values - time == EMPTYTD64 assert ((dd.starttime.values - to_utc(time).timestamp) < 100).all()