Exemple #1
0
 def _func(time, ind, df, st):
     """ return waveforms from df of bulk parameters """
     match_chars = {"*", "?", "[", "]"}
     ar = np.ones(len(ind))  # indices of ind to use to load data
     t1, t2 = time[0], time[1]
     df = df[(df.t1 == time[0]) & (df.t2 == time[1])]
     # determine which columns use any matching or other select features
     uses_matches = [_column_contains(df[x], match_chars) for x in NSLC]
     match_ar = np.array(uses_matches).any(axis=0)
     df_match = df[match_ar]
     df_no_match = df[~match_ar]
     # handle columns that need matches (more expensive)
     if not df_match.empty:
         match_bulk = df_match.to_records(index=False)
         mar = np.array(
             [filter_index(ind,
                           *tuple(b)[:4]) for b in match_bulk])
         ar = np.logical_and(ar, mar.any(axis=0))
     # handle columns that do not need matches
     if not df_no_match.empty:
         nslc1 = set(get_seed_id_series(df_no_match))
         nslc2 = get_seed_id_series(ind)
         ar = np.logical_and(ar, nslc2.isin(nslc1))
     # get a list of used traces, combine and trim
     st = obspy.Stream([x for x, y in zip(st, ar) if y])
     return st.slice(starttime=to_utc(t1), endtime=to_utc(t2))
Exemple #2
0
 def _index2stream(self,
                   index,
                   starttime=None,
                   endtime=None,
                   attach_response=False) -> Stream:
     """ return the waveforms in the index """
     # get abs path to each datafame
     files: pd.Series = (self.bank_path + index.path).unique()
     # make sure start and endtimes are in UTCDateTime
     starttime = to_utc(starttime) if starttime else None
     endtime = to_utc(endtime) if endtime else None
     # iterate the files to read and try to load into waveforms
     kwargs = dict(format=self.format, starttime=starttime, endtime=endtime)
     func = partial(_try_read_stream, **kwargs)
     stt = obspy.Stream()
     chunksize = len(files) / self._max_workers
     for st in self._map(func, files, chunksize=chunksize):
         if st is not None:
             stt += st
     # sort out nullish nslc codes
     stt = replace_null_nlsc_codes(stt)
     # filter out any traces not in index (this can happen when files hold
     # multiple traces).
     nslc = set(get_seed_id_series(index))
     stt.traces = [x for x in stt if x.id in nslc]
     # trim, merge, attach response
     stt = self._prep_output_stream(stt, starttime, endtime,
                                    attach_response)
     return stt
Exemple #3
0
def stream_bulk_split(st: Stream,
                      bulk: List[waveform_request_type]) -> List[Stream]:
    """
    Split a stream into a list of streams that meet requirements in bulk.

    This is similar to the get_waveforms_bulk methods of waveform_client, but
    rather than merging any overlapping data it is returned in a list of traces.

    Parameters
    ----------
    st
        A stream object
    bulk
        A bulk request. Wildcards not currently supported on str params.

    Returns
    -------
    List of traces, each meeting the corresponding request in bulk.
    """
    # return nothing if empty bulk or stream args
    bulk = _get_bulk(bulk)
    if not bulk or len(st) == 0:
        return []

    # # get dataframe of stream contents
    sdf = _stream_data_to_df(st)
    # iterate stream, return output
    out = []
    for barg in bulk:
        assert len(
            barg) == 6, f"{barg} is not a valid bulk arg, must have len 6"
        need = filter_index(sdf, *barg)
        traces = [tr for tr, bo in zip(st, need) if bo]
        new_st = obspy.Stream(traces)
        t1, t2 = to_utc(barg[-2]), to_utc(barg[-1])
        new = new_st.slice(starttime=t1, endtime=t2)
        if new is None or not len(new):
            out.append(obspy.Stream())
            continue
        new = merge_traces(new)
        out.append(new)
    assert len(out) == len(bulk), "output is not the same len as stream list"
    return out
 def test_starttime_origin_time_seperation(self, dar_attached_picks):
     """ ensure the start of the trace and start of the events arent too
     far off """
     dar = dar_attached_picks
     cat = dar.attrs["events"]
     for ev in cat:
         rid = ev.resource_id
         time = to_datetime64(ev.origins[-1].time)
         dd = dar[dar.stream_id == rid]
         assert dd.origin_time.values - time == EMPTYTD64
         assert ((dd.starttime.values - to_utc(time).timestamp) < 100).all()