def crandall_stream(crandall_event, crandall_ds, crandall_inventory): """ Return the streams from the crandall event, remove response """ time = obsplus.get_reference_time(crandall_event) t1 = time - 5 t2 = time + 60 st = crandall_ds.waveform_client.get_waveforms(starttime=t1, endtime=t2) st.detrend("linear") prefilt = [0.1, 0.2, 40, 50] st.remove_response(crandall_inventory, pre_filt=prefilt, output="VEL") return st
def test_event_streams(self, bingham_dataset, bingham_stream_dict): """Ensure the correct streams are given for ids.""" cat = bingham_dataset.event_client.get_events() evs = {str(ev.resource_id): ev for ev in cat} for eve_id, st in bingham_stream_dict.items(): assert len(st), f"no data for event:{eve_id}" ev = evs[eve_id] time2 = get_reference_time(ev).timestamp tmin = min([tr.stats.starttime.timestamp for tr in st]) assert abs(tmin - time2) < 12
def test_all_channels_included(self, node_dataset): """ ensure all the channels of the same instrument are included. """ # get a pick dataframe event = node_dataset.event_client.get_events()[0] # now get a master stream time = obsplus.get_reference_time(event) t1, t2 = time - 1, time + 15 st = node_dataset.waveform_client.get_waveforms(starttime=t1, endtime=t2) id_sequence = {tr.id for tr in st} # out = utils.get_phase_window_df(event=event, channel_codes=id_sequence) # iterate the time and ensure each has all channels for time, df in out.groupby("time"): assert len(df) == 3 assert len(df["seed_id"]) == len(set(df["seed_id"])) == 3 # make sure no stuff is duplicated assert not out.duplicated(["phase_hint", "seed_id"]).any()
def test_bad_type_raises(self): """Ensure a ValueError is raised when an unsupported type is used.""" assert get_reference_time(None) is None with pytest.raises(TypeError): get_reference_time({})
def test_stream(self): """Ensure the start of the stream is returned.""" st = obspy.read() out = get_reference_time(st) assert out == min([tr.stats.starttime for tr in st])
def test_event_with_picks(self, event_only_picks): """test that an event with picks, no origin, uses smallest pick""" t_expected = UTCDateTime("2015-01-01") t_out = get_reference_time(event_only_picks) assert t_expected == t_out
def test_empty_event_raises(self): """ensure an empty event will raise""" event = ev.Event() with pytest.raises(ValueError): get_reference_time(event)
def time_outputs(self, request): """meta fixtures to gather up all the input types""" fixture_value = request.getfixturevalue(request.param) return get_reference_time(fixture_value)
def get_phase_window_df( # noqa: C901 event: ev.Event, max_duration: Optional[Union[float, int, Mapping]] = None, min_duration: Optional[Union[float, int, Mapping]] = None, channel_codes: Optional[Union[Collection, pd.Series]] = None, buffer_ratio: Optional[float] = None, restrict_to_arrivals: bool = True, ) -> pd.DataFrame: """ Return a dataframe of phase picks for the event. Does the following: 1) Removes the rejected picks. 2) Defines pick time windows using either a) a corresponding amplitude whose type matches the phase hint of the pick and has a time window b) the start of the phase to the arrival time plus min_duration. Parameters ---------- event The seismic event max_duration The maximum duration (in seconds) of a phase. Can either be a scalar, or a mapping whose keys are seed_ids and values are applied to that specific channel. min_duration The minimum duration (in seconds) of a phase. Can either be a scalar, or a mapping whose keys are seed_ids and values are applied to that specific channel. channel_codes If provided, supplies the needed information to expand the dataframe to include an entry for each channel on a station for a given pick. For example, this is used a P pick that occurs on a HHZ channel will also have an entry on HHE and HHN (assuming they are in the list). buffer_ratio If not None, the ratio of the total duration of the phase windows which should be added to BOTH the start and end of the phase window. restrict_to_arrivals If True, only use picks for which there is an arrival on the preferred origin. """ reftime = to_datetime64(get_reference_time(event)) def _get_earliest_s_time(df): return df[df.phase_hint == "S"].time.min() def _get_extrema_like_df(df, extrema_arg): """ get min or max argument with the same length as df. This is done so each rows duration can be compared to some row specific value. """ if isinstance(extrema_arg, pd.Series): return df["seed_id"].map(extrema_arg.droplevel("seed_id_less")) elif isinstance(extrema_arg, Mapping): return df["seed_id"].map(extrema_arg) else: return np.ones(len(df)) * extrema_arg def _get_picks_df(restrict_to_arrivals): """Get the picks dataframe, remove picks flagged as rejected.""" pdf = obsplus.picks_to_df(event) pdf["seed_id_less"] = pdf["seed_id"].str[:-1] if restrict_to_arrivals: adf = obsplus.arrivals_to_df(event) pdf = pdf.loc[pdf["resource_id"].isin(adf["pick_id"])] # remove rejected picks pdf = pdf[pdf.evaluation_status != "rejected"] # Toss any picks from stations that have S-picks that are earlier than P-picks if {"P", "S"}.issubset(pdf["phase_hint"]): phs = pdf.groupby("phase_hint") p_picks = phs.get_group("P") s_picks = phs.get_group("S") both = set(p_picks["seed_id_less"]).intersection( s_picks["seed_id_less"]) p_picks = (p_picks.loc[p_picks["seed_id_less"].isin( both)].set_index("seed_id_less").sort_index()) s_picks = (s_picks.loc[s_picks["seed_id_less"].isin( both)].set_index("seed_id_less").sort_index()) mask = p_picks["time"] > s_picks["time"] bad_p = p_picks.loc[mask] bad_s = s_picks.loc[mask] if mask.any(): warnings.warn( "S-pick is earlier than P-pick for one or more picks." "Skipping phases.") pdf = pdf.loc[~pdf["resource_id"].isin(bad_s["resource_id"]) & ~pdf["resource_id"].isin(bad_p["resource_id"])] if not len(pdf): raise NoPhaseInformationError( f"No valid phases for event:\n{event}") # # add seed_id column # pdf["seed_id"] = obsplus.utils.get_nslc_series(pdf) # add the seed id column that drops the component from the channel # rename the resource_id column for later merging pdf.rename(columns={"resource_id": "pick_id"}, inplace=True) return pdf def _add_amplitudes(df): """Add amplitudes to picks""" # expected_cols = ["pick_id", "twindow_start", "twindow_end", "twindow_ref"] dtypes = { "pick_id": str, "twindow_start": "timedelta64[ns]", "twindow_end": "timedelta64[ns]", "twindow_ref": "datetime64[ns]", } amp_df = event.amplitudes_to_df() # Drop rejected amplitudes amp_df = amp_df.loc[amp_df["evaluation_status"] != "rejected"] if amp_df.empty: # no data, init empty df with expected cols amp_df = pd.DataFrame(columns=list(dtypes)).astype(dtypes) else: # merge picks/amps together and calculate windows amp_df.rename( columns={ "time_begin": "twindow_start", "time_end": "twindow_end", "reference": "twindow_ref", }, inplace=True, ) # merge and return amp_df = amp_df[list(dtypes)].astype( dtypes) # Make sure time-related things are set correctly # Note: the amplitude list can be longer than the pick list because of # the logic for dropping picks earlier merged = df.merge(amp_df, left_on="pick_id", right_on="pick_id", how="outer").dropna(subset=["time"]) assert len(merged) == len(df) return _add_starttime_end(merged) def _add_starttime_end(df): """Add the time window start and end""" # fill references with start times of phases if empty df.loc[df["twindow_ref"].isnull(), "twindow_ref"] = df["time"] # Fill NaTs w/ 0 second timedelta twindow_start = df["twindow_start"].fillna(np.timedelta64(0, "ns")) twindow_end = df["twindow_end"].fillna(np.timedelta64(0, "ns")) # Determine start/end times of phase windows df["starttime"] = df["twindow_ref"] - twindow_start df["endtime"] = df["twindow_ref"] + twindow_end # add travel time df["travel_time"] = df["time"] - reftime # get earliest s phase by station _s_start = df.groupby(list(NSLC[:2])).apply(_get_earliest_s_time) s_start = _s_start.rename("s_start").to_frame().reset_index() # merge back into pick_df, use either defined window or S phase, whichever # is smaller. dd2 = df.merge(s_start, on=["network", "station"], how="left") # get dataframe indices for P p_inds = df[df.phase_hint == "P"].index # make sure P end times don't exceed s start times endtime_or_s_start = dd2[["s_start", "endtime"]].min(axis=1, skipna=True) df.loc[p_inds, "endtime"] = endtime_or_s_start[p_inds] duration = abs(df["endtime"] - df["starttime"]) # Make sure all value are under phase duration time, else truncate them if max_duration is not None: max_dur = to_timedelta64(_get_extrema_like_df(df, max_duration)) larger_than_max = duration > max_dur df.loc[larger_than_max, "endtime"] = df["starttime"] + to_timedelta64(max_duration) # Make sure all values are at least min_phase_duration, else expand them if min_duration is not None: min_dur = to_timedelta64(_get_extrema_like_df(df, min_duration)) small_than_min = duration < min_dur df.loc[small_than_min, "endtime"] = df["starttime"] + min_dur # sanity checks assert (df["endtime"] >= df["starttime"]).all() assert not (df["starttime"].isnull()).any() return df def _duplicate_on_same_stations(df): """ Duplicate all the entries to get the 3 components for each station """ # make a dict of channel[:-1] and matching channels assert channel_codes is not None code_lest_1 = defaultdict(list) for code in channel_codes: code_lest_1[code[0]].append(code[1]) # create expanded df new_inds = [ x for y in df["seed_id"].unique() for x in code_lest_1[y[:-1]] ] # get seed_id columns and merge back together df_new = pd.DataFrame(new_inds, columns=["seed_id"]) df_new["seed_id_less"] = df_new["seed_id"].str[:-1] seed_id = expand_seed_id(df_new["seed_id"]) df_new = df_new.join(seed_id) # now merge in old dataframe for full expand # df_new["temp"] = df_new["seed_id"].str[:-1] right_cols = list(PHASE_WINDOW_INTERMEDIATE_DTYPES) out = pd.merge(df_new, df[right_cols], on="seed_id_less", how="left") return out.drop_duplicates() def _apply_buffer(df): # add buffers on either end of waveform for tapering buff = (df["endtime"] - df["starttime"]) * buffer_ratio df["starttime"] = df["starttime"] - buff df["endtime"] = df["endtime"] + buff return df # read picks in and filter out rejected picks dd = _add_amplitudes(_get_picks_df(restrict_to_arrivals)) # return columns out = dd[list(PHASE_WINDOW_DF_DTYPES)] # add buffer to window start/end if buffer_ratio is not None: out = _apply_buffer(out) # if channel codes are provided, make a duplicate of each phase window row # for each channel on the same station if channel_codes: out = _duplicate_on_same_stations(out)[list(PHASE_WINDOW_DF_DTYPES)] return out
def _get_time(event): try: return {"time": obsplus.get_reference_time(event)} except ValueError: # no valid starttime return {"time": np.nan}