def _func(time, ind, df): """ return waveforms from df of bulk parameters """ match_chars = {"*", "?", "[", "]"} t1, t2 = time[0], time[1] # filter index based on start/end times in_time = ~((ind["starttime"] > t2) | (ind["endtime"] < t1)) ind = ind[in_time] # create indices used to load data ar = np.ones(len(ind)) # indices of ind to use to load data df = df[(df.t1 == time[0]) & (df.t2 == time[1])] # determine which columns use any matching or other select features uses_matches = [_column_contains(df[x], match_chars) for x in NSLC] match_ar = np.array(uses_matches).any(axis=0) df_match = df[match_ar] df_no_match = df[~match_ar] # handle columns that need matches (more expensive) if not df_match.empty: match_bulk = df_match.to_records(index=False) mar = np.array( [filter_index(ind, *tuple(b)[:4]) for b in match_bulk]) ar = np.logical_and(ar, mar.any(axis=0)) # handle columns that do not need matches if not df_no_match.empty: nslc1 = set(get_nslc_series(df_no_match)) nslc2 = get_nslc_series(ind) ar = np.logical_and(ar, nslc2.isin(nslc1)) return self._index2stream(ind[ar], t1, t2)
def _func(time, ind, df, st): """ return waveforms from df of bulk parameters """ match_chars = {"*", "?", "[", "]"} ar = np.ones(len(ind)) # indices of ind to use to load data t1, t2 = time[0], time[1] df = df[(df.t1 == time[0]) & (df.t2 == time[1])] # determine which columns use any matching or other select features uses_matches = [_column_contains(df[x], match_chars) for x in NSLC] match_ar = np.array(uses_matches).any(axis=0) df_match = df[match_ar] df_no_match = df[~match_ar] # handle columns that need matches (more expensive) if not df_match.empty: match_bulk = df_match.to_records(index=False) mar = np.array( [filter_index(ind, *tuple(b)[:4]) for b in match_bulk]) ar = np.logical_and(ar, mar.any(axis=0)) # handle columns that do not need matches if not df_no_match.empty: nslc1 = set(get_nslc_series(df_no_match)) nslc2 = get_nslc_series(ind) ar = np.logical_and(ar, nslc2.isin(nslc1)) # get a list of used traces, combine and trim st = obspy.Stream([x for x, y in zip(st, ar) if y]) return st.slice(starttime=UTC(t1), endtime=UTC(t2))
def _index2stream(self, index, starttime=None, endtime=None, attach_response=False) -> Stream: """ return the waveforms in the index """ # get abs path to each datafame files: pd.Series = (self.bank_path + index.path).unique() # iterate the files to read and try to load into waveforms stt = obspy.Stream() kwargs = dict( format=self.format, starttime=obspy.UTCDateTime(starttime) if starttime else None, endtime=obspy.UTCDateTime(endtime) if endtime else None, ) for st in (_try_read_stream(x, **kwargs) for x in files): if st is not None and len(st): stt += st # sort out nullish nslc codes stt = replace_null_nlsc_codes(stt) # filter out any traces not in index (this can happen when files hold # multiple traces). nslc = set(get_nslc_series(index)) stt.traces = [x for x in stt if x.id in nslc] # trim, merge, attach response stt = self._prep_output_stream(stt, starttime, endtime, attach_response) return stt
def archive_to_sds( bank: Union[Path, str, "obsplus.WaveBank"], sds_path: Union[Path, str], starttime: Optional[UTCDateTime] = None, endtime: Optional[UTCDateTime] = None, overlap: float = 30, type_code: str = "D", stream_processor: Optional[callable] = None, ): """ Create a seiscomp data structure archive from a waveform source. Parameters ---------- bank A wavebank or path to such. sds_path The path for the new sds archive to be created. starttime If not None, the starttime to convert data from bank. endtime If not None, the endtime to convert data from bank. overlap The overlap to use for each file. type_code The str indicating the datatype. stream_processor A callable that will take a single stream as input and return a a single stream. May return and empty stream to skip a stream. Notes ----- see: https://www.seiscomp3.org/doc/applications/slarchive/SDS.html """ sds_path = Path(sds_path) # create a fetcher object for yielding continuous waveforms bank = obsplus.WaveBank(bank) bank.update_index() # get starttime/endtimes index = bank.read_index() ts1 = index.starttime.min() if not starttime else starttime t1 = _nearest_day(ts1) t2 = obspy.UTCDateTime(index.endtime.max() if not endtime else endtime) nslcs = get_nslc_series(index).unique() # iterate over nslc and get data for selected channel for nslc in nslcs: nslc_dict = {n: v for n, v in zip(NSLC, nslc.split("."))} # yield waveforms in desired chunks ykwargs = dict(starttime=t1, endtime=t2, overlap=overlap, duration=86400) ykwargs.update(nslc_dict) for st in bank.yield_waveforms(**ykwargs): if stream_processor: # apply stream processor if needed. st = stream_processor(st) if st: path = _get_sds_filename(st, sds_path, type_code, **nslc_dict) st.write(str(path), "mseed")
def make_origins( events: catalog_or_event, inventory: obspy.Inventory, depth: float = 1.0, phase_hints: Optional[Iterable] = ("P", "p"), ) -> catalog_or_event: """ Iterate a catalog or single events and ensure each has an origin. If no origins are found for an event, create one with the time set to the earliest pick and the location set to the location of the first hit station. Events are modified in place. This may be useful for location codes that need a starting location. Parameters ---------- events The events to scan and add origins were necessary. inventory An inventory object which contains all the stations referenced in quakeml elements of events. depth The default depth for created origins. Should be in meters. See the obspy docs for Origin or the quakeml standard for more details. phase_hints List of acceptable phase hints to use for identifying the earliest pick. By default will only search for "P" or "p" phase hints. Returns ------- Either a Catalog or Event object (same as input). """ # ensure input is an iterable of events cat = [events] if isinstance(events, Event) else events # load inv dataframe and make sure it has a seed_id column df = obsplus.stations_to_df(inventory) nslc_series = get_nslc_series(df) for event in cat: if not event.origins: # make new origin picks = event.picks_to_df() picks = picks.loc[ (~(picks.evaluation_status == "rejected")) & (picks.phase_hint.isin(phase_hints)) ] if not len(picks): raise ValueError(f"{event} has no acceptable picks to create origin") # get first pick, determine time/station used first_pick = picks.loc[picks.time == picks.time.min()].iloc[0] seed_id = first_pick.seed_id # find channel corresponding to pick df_chan = df[nslc_series == seed_id] if not len(df_chan): raise KeyError(f"{seed_id} not found in inventory") ser = df_chan.iloc[0] # create origin ori = _create_first_pick_origin(first_pick, ser, depth=depth) event.origins.append(ori) return events
def _check_only_one_response_method(self, df): """Raise if both response methods are specified.""" valid_nrl_cols = ~df[self._nrl_response_cols].isnull().all(axis=1) valid_client_cols = ~df[self._client_col].isnull() both_types_used = valid_nrl_cols & valid_client_cols if both_types_used.any(): bad_nslc = get_nslc_series(df[both_types_used]) msg = (f"The following channels specify both a NRL and station " f"client response methods, choose one or the other:\n " f"{bad_nslc}.") raise AmbiguousResponseError(msg)
def test_seed_id(self, pick_df): """ ensure valid seed_ids were created. """ # recreate seed_id and make sure columns are equal df = pick_df seed = get_nslc_series(pick_df) assert (seed == df["seed_id"]).all()