Example #1
0
 def _func(time, ind, df):
     """ return waveforms from df of bulk parameters """
     match_chars = {"*", "?", "[", "]"}
     t1, t2 = time[0], time[1]
     # filter index based on start/end times
     in_time = ~((ind["starttime"] > t2) | (ind["endtime"] < t1))
     ind = ind[in_time]
     # create indices used to load data
     ar = np.ones(len(ind))  # indices of ind to use to load data
     df = df[(df.t1 == time[0]) & (df.t2 == time[1])]
     # determine which columns use any matching or other select features
     uses_matches = [_column_contains(df[x], match_chars) for x in NSLC]
     match_ar = np.array(uses_matches).any(axis=0)
     df_match = df[match_ar]
     df_no_match = df[~match_ar]
     # handle columns that need matches (more expensive)
     if not df_match.empty:
         match_bulk = df_match.to_records(index=False)
         mar = np.array(
             [filter_index(ind,
                           *tuple(b)[:4]) for b in match_bulk])
         ar = np.logical_and(ar, mar.any(axis=0))
     # handle columns that do not need matches
     if not df_no_match.empty:
         nslc1 = set(get_nslc_series(df_no_match))
         nslc2 = get_nslc_series(ind)
         ar = np.logical_and(ar, nslc2.isin(nslc1))
     return self._index2stream(ind[ar], t1, t2)
Example #2
0
 def _func(time, ind, df, st):
     """ return waveforms from df of bulk parameters """
     match_chars = {"*", "?", "[", "]"}
     ar = np.ones(len(ind))  # indices of ind to use to load data
     t1, t2 = time[0], time[1]
     df = df[(df.t1 == time[0]) & (df.t2 == time[1])]
     # determine which columns use any matching or other select features
     uses_matches = [_column_contains(df[x], match_chars) for x in NSLC]
     match_ar = np.array(uses_matches).any(axis=0)
     df_match = df[match_ar]
     df_no_match = df[~match_ar]
     # handle columns that need matches (more expensive)
     if not df_match.empty:
         match_bulk = df_match.to_records(index=False)
         mar = np.array(
             [filter_index(ind,
                           *tuple(b)[:4]) for b in match_bulk])
         ar = np.logical_and(ar, mar.any(axis=0))
     # handle columns that do not need matches
     if not df_no_match.empty:
         nslc1 = set(get_nslc_series(df_no_match))
         nslc2 = get_nslc_series(ind)
         ar = np.logical_and(ar, nslc2.isin(nslc1))
     # get a list of used traces, combine and trim
     st = obspy.Stream([x for x, y in zip(st, ar) if y])
     return st.slice(starttime=UTC(t1), endtime=UTC(t2))
Example #3
0
 def _index2stream(self,
                   index,
                   starttime=None,
                   endtime=None,
                   attach_response=False) -> Stream:
     """ return the waveforms in the index """
     # get abs path to each datafame
     files: pd.Series = (self.bank_path + index.path).unique()
     # iterate the files to read and try to load into waveforms
     stt = obspy.Stream()
     kwargs = dict(
         format=self.format,
         starttime=obspy.UTCDateTime(starttime) if starttime else None,
         endtime=obspy.UTCDateTime(endtime) if endtime else None,
     )
     for st in (_try_read_stream(x, **kwargs) for x in files):
         if st is not None and len(st):
             stt += st
     # sort out nullish nslc codes
     stt = replace_null_nlsc_codes(stt)
     # filter out any traces not in index (this can happen when files hold
     # multiple traces).
     nslc = set(get_nslc_series(index))
     stt.traces = [x for x in stt if x.id in nslc]
     # trim, merge, attach response
     stt = self._prep_output_stream(stt, starttime, endtime,
                                    attach_response)
     return stt
Example #4
0
def archive_to_sds(
    bank: Union[Path, str, "obsplus.WaveBank"],
    sds_path: Union[Path, str],
    starttime: Optional[UTCDateTime] = None,
    endtime: Optional[UTCDateTime] = None,
    overlap: float = 30,
    type_code: str = "D",
    stream_processor: Optional[callable] = None,
):
    """
    Create a seiscomp data structure archive from a waveform source.

    Parameters
    ----------
    bank
        A wavebank or path to such.
    sds_path
        The path for the new sds archive to be created.
    starttime
        If not None, the starttime to convert data from bank.
    endtime
        If not None, the endtime to convert data from bank.
    overlap
        The overlap to use for each file.
    type_code
        The str indicating the datatype.
    stream_processor
        A callable that will take a single stream as input and return a
        a single stream. May return and empty stream to skip a stream.

    Notes
    -----
    see: https://www.seiscomp3.org/doc/applications/slarchive/SDS.html
    """
    sds_path = Path(sds_path)
    # create a fetcher object for yielding continuous waveforms
    bank = obsplus.WaveBank(bank)
    bank.update_index()
    # get starttime/endtimes
    index = bank.read_index()
    ts1 = index.starttime.min() if not starttime else starttime
    t1 = _nearest_day(ts1)
    t2 = obspy.UTCDateTime(index.endtime.max() if not endtime else endtime)
    nslcs = get_nslc_series(index).unique()
    # iterate over nslc and get data for selected channel
    for nslc in nslcs:
        nslc_dict = {n: v for n, v in zip(NSLC, nslc.split("."))}
        # yield waveforms in desired chunks
        ykwargs = dict(starttime=t1,
                       endtime=t2,
                       overlap=overlap,
                       duration=86400)
        ykwargs.update(nslc_dict)
        for st in bank.yield_waveforms(**ykwargs):
            if stream_processor:  # apply stream processor if needed.
                st = stream_processor(st)
            if st:
                path = _get_sds_filename(st, sds_path, type_code, **nslc_dict)
                st.write(str(path), "mseed")
Example #5
0
def make_origins(
    events: catalog_or_event,
    inventory: obspy.Inventory,
    depth: float = 1.0,
    phase_hints: Optional[Iterable] = ("P", "p"),
) -> catalog_or_event:
    """
    Iterate a catalog or single events and ensure each has an origin.

    If no origins are found for an event, create one with the time set to
    the earliest pick and the location set to the location of the first hit
     station. Events are modified in place.

    This may be useful for location codes that need a starting location.

    Parameters
    ----------
    events
        The events to scan and add origins were necessary.
    inventory
        An inventory object which contains all the stations referenced in
        quakeml elements of events.
    depth
        The default depth for created origins. Should be in meters. See the
        obspy docs for Origin or the quakeml standard for more details.
    phase_hints
        List of acceptable phase hints to use for identifying the earliest
        pick. By default will only search for "P" or "p" phase hints.

    Returns
    -------
    Either a Catalog or Event object (same as input).
    """
    # ensure input is an iterable of events
    cat = [events] if isinstance(events, Event) else events
    # load inv dataframe and make sure it has a seed_id column
    df = obsplus.stations_to_df(inventory)
    nslc_series = get_nslc_series(df)
    for event in cat:
        if not event.origins:  # make new origin
            picks = event.picks_to_df()
            picks = picks.loc[
                (~(picks.evaluation_status == "rejected"))
                & (picks.phase_hint.isin(phase_hints))
            ]
            if not len(picks):
                raise ValueError(f"{event} has no acceptable picks to create origin")
            # get first pick, determine time/station used
            first_pick = picks.loc[picks.time == picks.time.min()].iloc[0]
            seed_id = first_pick.seed_id
            # find channel corresponding to pick
            df_chan = df[nslc_series == seed_id]
            if not len(df_chan):
                raise KeyError(f"{seed_id} not found in inventory")
            ser = df_chan.iloc[0]
            # create origin
            ori = _create_first_pick_origin(first_pick, ser, depth=depth)
            event.origins.append(ori)
    return events
Example #6
0
 def _check_only_one_response_method(self, df):
     """Raise if both response methods are specified."""
     valid_nrl_cols = ~df[self._nrl_response_cols].isnull().all(axis=1)
     valid_client_cols = ~df[self._client_col].isnull()
     both_types_used = valid_nrl_cols & valid_client_cols
     if both_types_used.any():
         bad_nslc = get_nslc_series(df[both_types_used])
         msg = (f"The following channels specify both a NRL and station "
                f"client response methods, choose one or the other:\n "
                f"{bad_nslc}.")
         raise AmbiguousResponseError(msg)
 def test_seed_id(self, pick_df):
     """ ensure valid seed_ids were created. """
     # recreate seed_id and make sure columns are equal
     df = pick_df
     seed = get_nslc_series(pick_df)
     assert (seed == df["seed_id"]).all()