Example #1
0
def crandall_stream(crandall_event, crandall_ds, crandall_inventory):
    """ Return the streams from the crandall event, remove response """
    time = obsplus.get_reference_time(crandall_event)
    t1 = time - 5
    t2 = time + 60
    st = crandall_ds.waveform_client.get_waveforms(starttime=t1, endtime=t2)
    st.detrend("linear")
    prefilt = [0.1, 0.2, 40, 50]
    st.remove_response(crandall_inventory, pre_filt=prefilt, output="VEL")
    return st
Example #2
0
 def test_event_streams(self, bingham_dataset, bingham_stream_dict):
     """Ensure the correct streams are given for ids."""
     cat = bingham_dataset.event_client.get_events()
     evs = {str(ev.resource_id): ev for ev in cat}
     for eve_id, st in bingham_stream_dict.items():
         assert len(st), f"no data for event:{eve_id}"
         ev = evs[eve_id]
         time2 = get_reference_time(ev).timestamp
         tmin = min([tr.stats.starttime.timestamp for tr in st])
         assert abs(tmin - time2) < 12
Example #3
0
 def test_all_channels_included(self, node_dataset):
     """ ensure all the channels of the same instrument are included. """
     # get a pick dataframe
     event = node_dataset.event_client.get_events()[0]
     # now get a master stream
     time = obsplus.get_reference_time(event)
     t1, t2 = time - 1, time + 15
     st = node_dataset.waveform_client.get_waveforms(starttime=t1,
                                                     endtime=t2)
     id_sequence = {tr.id for tr in st}
     #
     out = utils.get_phase_window_df(event=event, channel_codes=id_sequence)
     # iterate the time and ensure each has all channels
     for time, df in out.groupby("time"):
         assert len(df) == 3
         assert len(df["seed_id"]) == len(set(df["seed_id"])) == 3
     # make sure no stuff is duplicated
     assert not out.duplicated(["phase_hint", "seed_id"]).any()
Example #4
0
 def test_bad_type_raises(self):
     """Ensure a ValueError is raised when an unsupported type is used."""
     assert get_reference_time(None) is None
     with pytest.raises(TypeError):
         get_reference_time({})
Example #5
0
 def test_stream(self):
     """Ensure the start of the stream is returned."""
     st = obspy.read()
     out = get_reference_time(st)
     assert out == min([tr.stats.starttime for tr in st])
Example #6
0
 def test_event_with_picks(self, event_only_picks):
     """test that an event with picks, no origin, uses smallest pick"""
     t_expected = UTCDateTime("2015-01-01")
     t_out = get_reference_time(event_only_picks)
     assert t_expected == t_out
Example #7
0
 def test_empty_event_raises(self):
     """ensure an empty event will raise"""
     event = ev.Event()
     with pytest.raises(ValueError):
         get_reference_time(event)
Example #8
0
 def time_outputs(self, request):
     """meta fixtures to gather up all the input types"""
     fixture_value = request.getfixturevalue(request.param)
     return get_reference_time(fixture_value)
Example #9
0
def get_phase_window_df(  # noqa: C901
    event: ev.Event,
    max_duration: Optional[Union[float, int, Mapping]] = None,
    min_duration: Optional[Union[float, int, Mapping]] = None,
    channel_codes: Optional[Union[Collection, pd.Series]] = None,
    buffer_ratio: Optional[float] = None,
    restrict_to_arrivals: bool = True,
) -> pd.DataFrame:
    """
    Return a dataframe of phase picks for the event. Does the following:

    1) Removes the rejected picks.
    2) Defines pick time windows using either
        a) a corresponding amplitude whose type matches the phase hint of the pick
           and has a time window
        b) the start of the phase to the arrival time plus min_duration.

    Parameters
    ----------
    event
        The seismic event
    max_duration
        The maximum duration (in seconds) of a phase. Can either be a scalar,
        or a mapping whose keys are seed_ids and values are applied to that
        specific channel.
    min_duration
        The minimum duration (in seconds) of a phase. Can either be a scalar,
        or a mapping whose keys are seed_ids and values are applied to that
        specific channel.
    channel_codes
        If provided, supplies the needed information to expand the dataframe
        to include an entry for each channel on a station for a given pick.
        For example, this is used a P pick that occurs on a HHZ channel will
        also have an entry on HHE and HHN (assuming they are in the list).
    buffer_ratio
        If not None, the ratio of the total duration of the phase windows
        which should be added to BOTH the start and end of the phase window.
    restrict_to_arrivals
        If True, only use picks for which there is an arrival on the preferred
        origin.
    """
    reftime = to_datetime64(get_reference_time(event))

    def _get_earliest_s_time(df):
        return df[df.phase_hint == "S"].time.min()

    def _get_extrema_like_df(df, extrema_arg):
        """
        get min or max argument with the same length as df.
        This is done so each rows duration can be compared to some
        row specific value.
        """

        if isinstance(extrema_arg, pd.Series):
            return df["seed_id"].map(extrema_arg.droplevel("seed_id_less"))
        elif isinstance(extrema_arg, Mapping):
            return df["seed_id"].map(extrema_arg)
        else:
            return np.ones(len(df)) * extrema_arg

    def _get_picks_df(restrict_to_arrivals):
        """Get the picks dataframe, remove picks flagged as rejected."""
        pdf = obsplus.picks_to_df(event)
        pdf["seed_id_less"] = pdf["seed_id"].str[:-1]
        if restrict_to_arrivals:
            adf = obsplus.arrivals_to_df(event)
            pdf = pdf.loc[pdf["resource_id"].isin(adf["pick_id"])]
        # remove rejected picks
        pdf = pdf[pdf.evaluation_status != "rejected"]
        # Toss any picks from stations that have S-picks that are earlier than P-picks
        if {"P", "S"}.issubset(pdf["phase_hint"]):
            phs = pdf.groupby("phase_hint")
            p_picks = phs.get_group("P")
            s_picks = phs.get_group("S")
            both = set(p_picks["seed_id_less"]).intersection(
                s_picks["seed_id_less"])
            p_picks = (p_picks.loc[p_picks["seed_id_less"].isin(
                both)].set_index("seed_id_less").sort_index())
            s_picks = (s_picks.loc[s_picks["seed_id_less"].isin(
                both)].set_index("seed_id_less").sort_index())
            mask = p_picks["time"] > s_picks["time"]
            bad_p = p_picks.loc[mask]
            bad_s = s_picks.loc[mask]
            if mask.any():
                warnings.warn(
                    "S-pick is earlier than P-pick for one or more picks."
                    "Skipping phases.")
            pdf = pdf.loc[~pdf["resource_id"].isin(bad_s["resource_id"])
                          & ~pdf["resource_id"].isin(bad_p["resource_id"])]
        if not len(pdf):
            raise NoPhaseInformationError(
                f"No valid phases for event:\n{event}")
        # # add seed_id column
        # pdf["seed_id"] = obsplus.utils.get_nslc_series(pdf)
        # add the seed id column that drops the component from the channel
        # rename the resource_id column for later merging
        pdf.rename(columns={"resource_id": "pick_id"}, inplace=True)
        return pdf

    def _add_amplitudes(df):
        """Add amplitudes to picks"""
        # expected_cols = ["pick_id", "twindow_start", "twindow_end", "twindow_ref"]
        dtypes = {
            "pick_id": str,
            "twindow_start": "timedelta64[ns]",
            "twindow_end": "timedelta64[ns]",
            "twindow_ref": "datetime64[ns]",
        }
        amp_df = event.amplitudes_to_df()
        # Drop rejected amplitudes
        amp_df = amp_df.loc[amp_df["evaluation_status"] != "rejected"]
        if amp_df.empty:  # no data, init empty df with expected cols
            amp_df = pd.DataFrame(columns=list(dtypes)).astype(dtypes)
        else:
            # merge picks/amps together and calculate windows
            amp_df.rename(
                columns={
                    "time_begin": "twindow_start",
                    "time_end": "twindow_end",
                    "reference": "twindow_ref",
                },
                inplace=True,
            )
        # merge and return
        amp_df = amp_df[list(dtypes)].astype(
            dtypes)  # Make sure time-related things are set correctly
        # Note: the amplitude list can be longer than the pick list because of
        # the logic for dropping picks earlier
        merged = df.merge(amp_df,
                          left_on="pick_id",
                          right_on="pick_id",
                          how="outer").dropna(subset=["time"])
        assert len(merged) == len(df)
        return _add_starttime_end(merged)

    def _add_starttime_end(df):
        """Add the time window start and end"""
        # fill references with start times of phases if empty
        df.loc[df["twindow_ref"].isnull(), "twindow_ref"] = df["time"]
        # Fill NaTs w/ 0 second timedelta
        twindow_start = df["twindow_start"].fillna(np.timedelta64(0, "ns"))
        twindow_end = df["twindow_end"].fillna(np.timedelta64(0, "ns"))
        # Determine start/end times of phase windows
        df["starttime"] = df["twindow_ref"] - twindow_start
        df["endtime"] = df["twindow_ref"] + twindow_end
        # add travel time
        df["travel_time"] = df["time"] - reftime
        # get earliest s phase by station
        _s_start = df.groupby(list(NSLC[:2])).apply(_get_earliest_s_time)
        s_start = _s_start.rename("s_start").to_frame().reset_index()
        # merge back into pick_df, use either defined window or S phase, whichever
        # is smaller.
        dd2 = df.merge(s_start, on=["network", "station"], how="left")
        # get dataframe indices for P
        p_inds = df[df.phase_hint == "P"].index
        # make sure P end times don't exceed s start times
        endtime_or_s_start = dd2[["s_start", "endtime"]].min(axis=1,
                                                             skipna=True)
        df.loc[p_inds, "endtime"] = endtime_or_s_start[p_inds]
        duration = abs(df["endtime"] - df["starttime"])
        # Make sure all value are under phase duration time, else truncate them
        if max_duration is not None:
            max_dur = to_timedelta64(_get_extrema_like_df(df, max_duration))
            larger_than_max = duration > max_dur
            df.loc[larger_than_max,
                   "endtime"] = df["starttime"] + to_timedelta64(max_duration)
        # Make sure all values are at least min_phase_duration, else expand them
        if min_duration is not None:
            min_dur = to_timedelta64(_get_extrema_like_df(df, min_duration))
            small_than_min = duration < min_dur
            df.loc[small_than_min, "endtime"] = df["starttime"] + min_dur
        # sanity checks
        assert (df["endtime"] >= df["starttime"]).all()
        assert not (df["starttime"].isnull()).any()
        return df

    def _duplicate_on_same_stations(df):
        """
        Duplicate all the entries to get the 3 components for each station
        """
        # make a dict of channel[:-1] and matching channels
        assert channel_codes is not None
        code_lest_1 = defaultdict(list)
        for code in channel_codes:
            code_lest_1[code[0]].append(code[1])
        # create expanded df
        new_inds = [
            x for y in df["seed_id"].unique() for x in code_lest_1[y[:-1]]
        ]
        # get seed_id columns and merge back together
        df_new = pd.DataFrame(new_inds, columns=["seed_id"])
        df_new["seed_id_less"] = df_new["seed_id"].str[:-1]
        seed_id = expand_seed_id(df_new["seed_id"])
        df_new = df_new.join(seed_id)
        # now merge in old dataframe for full expand
        # df_new["temp"] = df_new["seed_id"].str[:-1]
        right_cols = list(PHASE_WINDOW_INTERMEDIATE_DTYPES)
        out = pd.merge(df_new, df[right_cols], on="seed_id_less", how="left")
        return out.drop_duplicates()

    def _apply_buffer(df):
        # add buffers on either end of waveform for tapering
        buff = (df["endtime"] - df["starttime"]) * buffer_ratio
        df["starttime"] = df["starttime"] - buff
        df["endtime"] = df["endtime"] + buff
        return df

    # read picks in and filter out rejected picks
    dd = _add_amplitudes(_get_picks_df(restrict_to_arrivals))
    # return columns
    out = dd[list(PHASE_WINDOW_DF_DTYPES)]
    # add buffer to window start/end
    if buffer_ratio is not None:
        out = _apply_buffer(out)
    # if channel codes are provided, make a duplicate of each phase window row
    # for each channel on the same station
    if channel_codes:
        out = _duplicate_on_same_stations(out)[list(PHASE_WINDOW_DF_DTYPES)]
    return out
Example #10
0
def _get_time(event):
    try:
        return {"time": obsplus.get_reference_time(event)}
    except ValueError:  # no valid starttime
        return {"time": np.nan}