Exemple #1
0
 def test_from_dataframe_int_location_codes(self, bingham_dataset):
     """
     Tests for a dataframe with an int column defining location code.
     These can drop necessary 0s.
     """
     events = bingham_dataset.event_client.get_events()
     df1 = obsplus.picks_to_df(events)
     df = df1.copy()
     df["location"] = df["location"].astype(int)
     df2 = picks_to_df(df)
     assert (df2["location"] == df1["location"]).all()
     # ensure the seed ids are still consistent
     seed_id_loc = df2["seed_id"].str.split(".", expand=True)[2]
     assert (seed_id_loc == df2["location"]).all()
Exemple #2
0
def check_picks(cat: Catalog):
    """
    Checks for errors with phase picks

    This function will check for duplicate picks on each station (i.e. more
    than one P or S per station) as well as if there are any S picks before
    P picks on each station.

    Parameters
    ----------
    cat
        Obspy catalog to validate

    """
    def fn(df):
        # No duplicates
        assert not any(df.phase_hint.duplicated())

        # Check p before s
        if ps.issubset(df.phase_hint):
            p_pick = df.loc[df.phase_hint == "P"].iloc[0]
            s_pick = df.loc[df.phase_hint == "S"].iloc[0]
            assert p_pick.time < s_pick.time

    ps = {"P", "S"}

    pdf = obsplus.picks_to_df(cat)
    pdf = pdf.loc[(pdf.evaluation_status != "rejected")
                  & (pdf.phase_hint.isin(ps))]
    gb = pdf.groupby(["event_id", "station"])
    gb.apply(fn)
Exemple #3
0
def check_pick_order(event: Event):
    """
    Ensure:
        1. There are no S picks before P picks on any station
        2. There are no amplitude picks before P picks on any station
    """
    def pick_order(g, sp, ap):
        # get sub dfs with phases of interest
        p_picks = g[g["phase_hint"].str.upper() == "P"]
        s_picks = g[g["phase_hint"].str.upper() == "S"]
        amp_picks = g[g["phase_hint"].str.endswith("AML")]
        # there should be one P/S pick
        assert len(p_picks) <= 1 and len(s_picks) <= 1
        # first check that P is less than S, if not append to name of bad
        if len(p_picks) and len(s_picks):
            stime, ptime = s_picks.iloc[0]["time"], p_picks.iloc[0]["time"]
            if (stime < ptime) and not (pd.isnull(ptime) | pd.isnull(stime)):
                sp.append(g.name)
        # next check all amplitude picks are after P
        if len(p_picks) and len(amp_picks):
            ptime = p_picks.iloc[0]["time"]
            bad_amp_picks = amp_picks[amp_picks["time"] < ptime]
            ap.extend(list(bad_amp_picks["seed_id"]))

    # get dataframe of picks, filter out rejected
    pdf = obsplus.picks_to_df(event)
    pdf = pdf.loc[pdf.evaluation_status != "rejected"]
    # get series of network, station
    ns = get_seed_id_series(pdf, subset=NSLC[:3])
    # Checking that picks are in acceptable order
    gb, sp, ap = pdf.groupby(ns), [], []
    gb.apply(pick_order, sp, ap)
    assert len(sp) == 0, "S pick found before P pick:\n" f"station/s: {sp}"
    assert len(
        ap) == 0, "amplitude pick found before P pick:\n" f"seed_id/s: {ap}"
Exemple #4
0
 def bing_pick_bulk(self, bingham_catalog):
     """ Create a dataframe from the bingham_test picks. """
     picks = obsplus.picks_to_df(bingham_catalog)
     df = picks[list(NSLC)]
     df["starttime"] = picks["time"] - to_timedelta64(1.011)
     df["endtime"] = picks["time"] + to_timedelta64(7.011)
     return df
Exemple #5
0
def check_duplicate_picks(event: Event):
    """
    Ensure there are no picks with the same phases on the same channels.
    """

    def dup_picks(df, phase_hint, subset):
        """Function for checking for duplications."""
        seed_id = get_seed_id_series(df, subset=subset)
        bad = seed_id[seed_id.duplicated()].tolist()
        assert len(bad) == 0, (
            f"Duplicate {phase_hint} picks found\n" f"event_id: {event_id}, "
        )

    # A dict of {phase: column that cant be duplicated}
    phase_duplicates = {"P": NSLC[:-1], "p": NSLC[:-1], "S": NSLC[:-1], "s": NSLC[:-1]}
    # first get dataframe of picks, filter out rejected
    pdf = obsplus.picks_to_df(event)
    pdf = pdf.loc[pdf.evaluation_status != "rejected"]
    # add column for network.station.location
    event_id = str(event.resource_id)
    # Go through each of phase duplicates and compare against the pick dataframe
    pick_gb = pdf.groupby("phase_hint")
    for ph, cols in phase_duplicates.items():
        if ph not in pick_gb.groups:
            continue
        dup_picks(pick_gb.get_group(ph), ph, subset=cols)
Exemple #6
0
def check_pick_order(event: Event, ):
    """
    Ensure:
        1. There are no S picks before P picks on any station
        2. There are no amplitude picks before P picks on any station
    """
    pdf = obsplus.picks_to_df(event)
    pdf = pdf.loc[pdf.evaluation_status != "rejected"]

    def pick_order(g, sp, ap, event_id=event.resource_id.id):
        # get sub dfs with phases of interest
        p_picks = g[g["phase_hint"].str.upper() == "P"]
        s_picks = g[g["phase_hint"].str.upper() == "S"]
        amp_picks = g[g["phase_hint"].str.endswith("AML")]
        # there should be one P/S pick
        assert len(p_picks) <= 1 and len(s_picks) <= 1
        # first check that P is less than S, if not append to name of bad
        if len(p_picks) and len(s_picks):
            if s_picks.iloc[0]["time"] < p_picks.iloc[0]["time"]:
                sp.append(g.name)
        # next check all amplitude picks are after P
        if len(p_picks) and len(amp_picks):
            ptime = p_picks.iloc[0]["time"]
            bad_amp_picks = amp_picks[amp_picks["time"] < ptime]
            ap.extend(list(bad_amp_picks["seed_id"]))

    # Checking that picks are in acceptable order
    gb, sp, ap = pdf.groupby("station"), [], []
    gb.apply(pick_order, sp, ap)
    assert len(sp) == 0, "S pick found before P pick:\n" f"station/s: {sp}"
    assert len(
        ap) == 0, "amplitude pick found before P pick:\n" f"seed_id/s: {ap}"
Exemple #7
0
 def _get_picks_df():
     """ Get the picks dataframe, remove picks flagged as rejected. """
     pdf = obsplus.picks_to_df(event)
     # remove rejected picks
     pdf = pdf[pdf.evaluation_status != "rejected"]
     # add seed_id column  # TODO change this to seed_id
     pdf["seed_id"] = obsplus.utils.get_nslc_series(pdf)
     return pdf
Exemple #8
0
 def test_none_onset(self):
     """
     Make sure Nones in the data get handled properly
     """
     waveform_id = ev.WaveformStreamID(station_code="A")
     pick = ev.Pick(time=UTCDateTime(), waveform_id=waveform_id)
     df = picks_to_df(pick)
     assert df.onset.iloc[0] == ""
     assert df.polarity.iloc[0] == ""
Exemple #9
0
 def test_picks_one_digit_location(self):
     """
     Ensure 1 digit location codes get preserved
     """
     picks = pick_generator(["UU.TMU.1.HHZ", "UU.TMU.01.HHZ"])
     df = picks_to_df(picks)
     # there should be two unique location code and seed_ids.
     assert len(set(df["location"])) == 2
     assert len(set(df["seed_id"])) == 2
Exemple #10
0
 def picks_df(self):
     """ return a dataframe from the picks (if possible) """
     if self._picks_df is None:
         try:
             df = picks_to_df(self.event_client)
         except TypeError:
             self._picks_df = None
         else:
             self._picks_df = df
     return self._picks_df
Exemple #11
0
 def test_s_phases(self, bingham_dataset):
     """make sure only stations that have s picks are returned"""
     fetcher = bingham_dataset.get_fetcher()
     picks = obsplus.picks_to_df(fetcher.event_client.get_events())
     # There should be some s picks
     assert (picks["phase_hint"].str.lower() == "s").any()
     func = fetcher.yield_event_waveforms
     out = dict(func(self.time_before, self.time_after, reference="S"))
     for id, st in out.items():
         assert isinstance(st, obspy.Stream)
Exemple #12
0
 def test_dot_in_location_code(self):
     """Ensure a dot in the location code causes a ValueError. """
     waveform_id = ev.WaveformStreamID(
         network_code="UU",
         station_code="TMU",
         location_code="1.0",
         channel_code="HHZ",
     )
     pick = ev.Pick(time=obspy.UTCDateTime("2020-01-01"), waveform_id=waveform_id)
     with pytest.raises(ValueError):
         _ = obsplus.picks_to_df([pick])
Exemple #13
0
    def picks_no_origin(self):
        """ create a events that has picks but no origin """
        t0 = UTCDateTime("2016-01-01T10:12:15.222")

        def wave_id(seed_str):
            return ev.WaveformStreamID(seed_string=seed_str)

        picks = [
            ev.Pick(time=t0 + 2, waveform_id=wave_id("UU.TMU..HHZ")),
            ev.Pick(time=t0 + 1.2, waveform_id=wave_id("UU.BOB.01.ELZ")),
            ev.Pick(time=t0 + 3.2, waveform_id=wave_id("UU.TEX..EHZ")),
        ]
        return picks_to_df(ev.Event(picks=picks))
Exemple #14
0
def check_p_lims(event: Event, p_lim=None):
    """
    Check for P picks that aren't within p_lim of the median pick (if provided)
    """
    if p_lim is not None:
        df = obsplus.picks_to_df(event)
        df = df.loc[(df.evaluation_status != "rejected")
                    & (df.phase_hint == "P")]
        med = df.time.median()
        bad = df.loc[abs(df.time - med) > p_lim]
        assert len(bad) == 0, ("Outlying P pick found:\n"
                               f"event_id: {event.resource_id.id}, "
                               f"seed_id/s: {bad.seed_id.tolist()}")
Exemple #15
0
 def test_read_uncertainty(self):
     """
     tests that uncertainties in time_errors attribute are read. See #55.
     """
     kwargs = dict(lower_uncertainty=1, upper_uncertainty=2, uncertainty=12)
     time_error = ev.QuantityError(**kwargs)
     waveform_id = ev.WaveformStreamID(station_code="A")
     pick = ev.Pick(time=UTCDateTime(),
                    time_errors=time_error,
                    waveform_id=waveform_id)
     df = picks_to_df(pick)
     assert set(kwargs).issubset(df.columns)
     assert len(df) == 1
     ser = df.iloc[0]
     assert all([ser[i] == kwargs[i] for i in kwargs])
Exemple #16
0
def check_amps_on_z_component(event: Event,
                              no_z_amps=False,
                              phase_hints=("AML", "IAML")):
    """
    Check for amplitude picks on Z channels (if no_z_amps is True).
    """
    if not no_z_amps:
        return
    df = obsplus.picks_to_df(event)
    con1 = df.evaluation_status != "rejected"
    con2 = df.phase_hint.isin(phase_hints)
    con3 = df["channel"].str.endswith("Z")
    _df = df.loc[con1 & con2 & con3]
    assert len(df) == 0, ("Amplitude pick on Z axis found:\n"
                          f"event_id: {str(event.resource_id)}, "
                          f"seed_id/s: {_df['seed_id'].tolist()}")
Exemple #17
0
def check_duplicate_picks(event: Event):
    """
    Ensure there are no picks with the same phases on the same channels.
    """
    # A dict of {phase: column that cant be duplicated}
    phase_duplicates = {"IAML": "seed_id", "AML": "seed_id"}

    # first get dataframe of picks
    pdf = obsplus.picks_to_df(event)
    pdf = pdf.loc[pdf.evaluation_status != "rejected"]
    event_id = str(event.resource_id)

    def dup_picks(phase_hint, on="station"):
        """ function for checking """
        df = pdf.loc[pdf.phase_hint == phase_hint]
        bad = df.loc[df[on].duplicated()][on].tolist()
        assert len(bad) == 0, (f"Duplicate {phase_hint} picks found\n"
                               f"event_id: {event_id}, "
                               f"{on}/s: {bad}")

    for phase_hint in pdf["phase_hint"].unique():
        dup_picks(phase_hint, on=phase_duplicates.get(phase_hint, "station"))
Exemple #18
0
def check_duplicate_picks(event: Event):
    """
    Ensure there are no picks with the same phases on the same channels.
    """
    def dup_picks(df, phase_hint, subset):
        """ Function for checking for duplications. """
        seed_id = get_seed_id_series(df, subset=subset)
        bad = seed_id[seed_id.duplicated()].tolist()
        assert len(bad) == 0, (f"Duplicate {phase_hint} picks found\n"
                               f"event_id: {event_id}, ")

    # A dict of {phase: column that cant be duplicated}
    phase_duplicates = {"IAML": None, "AML": None}
    # first get dataframe of picks, filter out rejected
    pdf = obsplus.picks_to_df(event)
    pdf = pdf.loc[pdf.evaluation_status != "rejected"]
    # add column for network.station.location
    event_id = str(event.resource_id)
    for phase_hint, sub_df in pdf.groupby("phase_hint"):
        # default to comparing network, station, location
        subset = phase_duplicates.get(phase_hint, NSLC[:-1])
        dup_picks(sub_df, phase_hint, subset=subset)
Exemple #19
0
 def _get_picks_df(restrict_to_arrivals):
     """Get the picks dataframe, remove picks flagged as rejected."""
     pdf = obsplus.picks_to_df(event)
     pdf["seed_id_less"] = pdf["seed_id"].str[:-1]
     if restrict_to_arrivals:
         adf = obsplus.arrivals_to_df(event)
         pdf = pdf.loc[pdf["resource_id"].isin(adf["pick_id"])]
     # remove rejected picks
     pdf = pdf[pdf.evaluation_status != "rejected"]
     # Toss any picks from stations that have S-picks that are earlier than P-picks
     if {"P", "S"}.issubset(pdf["phase_hint"]):
         phs = pdf.groupby("phase_hint")
         p_picks = phs.get_group("P")
         s_picks = phs.get_group("S")
         both = set(p_picks["seed_id_less"]).intersection(
             s_picks["seed_id_less"])
         p_picks = (p_picks.loc[p_picks["seed_id_less"].isin(
             both)].set_index("seed_id_less").sort_index())
         s_picks = (s_picks.loc[s_picks["seed_id_less"].isin(
             both)].set_index("seed_id_less").sort_index())
         mask = p_picks["time"] > s_picks["time"]
         bad_p = p_picks.loc[mask]
         bad_s = s_picks.loc[mask]
         if mask.any():
             warnings.warn(
                 "S-pick is earlier than P-pick for one or more picks."
                 "Skipping phases.")
         pdf = pdf.loc[~pdf["resource_id"].isin(bad_s["resource_id"])
                       & ~pdf["resource_id"].isin(bad_p["resource_id"])]
     if not len(pdf):
         raise NoPhaseInformationError(
             f"No valid phases for event:\n{event}")
     # # add seed_id column
     # pdf["seed_id"] = obsplus.utils.get_nslc_series(pdf)
     # add the seed id column that drops the component from the channel
     # rename the resource_id column for later merging
     pdf.rename(columns={"resource_id": "pick_id"}, inplace=True)
     return pdf
Exemple #20
0
 def test_unique_event_time_no_origin(self, bingham_cat_only_picks):
     """ Ensure events with no origin don't all return the same time. """
     df = picks_to_df(bingham_cat_only_picks)
     assert len(df["event_time"].unique()) == len(df["event_id"].unique())
Exemple #21
0
 def pick_df(self, request):
     """Collect everything in the pick dataframe."""
     df = picks_to_df(request.getfixturevalue(request.param))
     return df
Exemple #22
0
 def pick_df(self):
     """Return the pick dataframe of Bingham."""
     ds = obsplus.load_dataset("bingham_test")
     cat = ds.event_client.get_events()
     return obsplus.picks_to_df(cat)
Exemple #23
0
 def pick_df(self, request):
     """ collect all the supported inputs are parametrize"""
     return picks_to_df(request.param)
Exemple #24
0
 def pick_dataframe(self, catalog):
     """ return a dataframe of picks. """
     return picks_to_df(catalog)
Exemple #25
0
    def _get_associated_event_id(new_picks, old_picks):
        """Return the associated event id"""
        new_med = new_picks.groupby("event_id")["time"].apply(_get_pick_median)
        old_med = _get_pick_median(old_picks["time"])
        diffs = abs(new_med - old_med)
        # check on min tolerance, if exceeded return empty
        if diffs.min() / 1_000_000_000 > median_tolerance:
            return None
        return diffs.idxmin()

    # Get list-like of events from new_catalog
    new_cat = new_catalog if isinstance(new_catalog,
                                        Catalog) else [new_catalog]
    assert len(new_catalog) > 0
    # Get dataframes of event info
    new_pick_df = obsplus.picks_to_df(new_cat)
    old_pick_df = obsplus.picks_to_df(event)
    eid = _get_associated_event_id(new_pick_df, old_pick_df)
    new_event = {str(x.resource_id): x for x in new_catalog}.get(eid)
    # The association failed, just return original event
    if new_event is None:
        return event
    return merge_events(event, new_event, delete_old=delete_old)


# ---------- silly hash functions for getting around resource_ids (sorta)


def _hash_wids(objs, extra_attr=None):
    out = OrderedDict()
    for obj in objs:
Exemple #26
0
 def test_from_file(self, event_file):
     """ test for reading phase picks from files. """
     df = picks_to_df(event_file)
     assert isinstance(df, pd.DataFrame)
     assert len(df)
Exemple #27
0
 def test_from_event_bank(self, default_ebank):
     """ Ensure event banks can be used to get dataframes. """
     df = picks_to_df(default_ebank)
     assert isinstance(df, pd.DataFrame)
Exemple #28
0
 def test_from_event_directory(self, event_directory):
     """Test extracting info from an event directory."""
     df = picks_to_df(event_directory)
     assert len(df)
     assert isinstance(df, pd.DataFrame)
Exemple #29
0
from obspy.core.event import ResourceIdentifier
from obsplus.events.validate import validate_catalog

if __name__ == "__main__":
    cat_path = Path("/media/data/Gits/obsplus/tests/test_data/qml2merge/"
                    "2016-10-15T02-27-50/2016-10-15T02-27-50_2.xml")
    assert cat_path.exists()

    cat = obspy.read_events(str(cat_path))

    # remove all amplitudes
    picks = cat[0].picks
    cat[0].picks = [x for x in picks if not x.phase_hint == "IAML"]

    # get duplicated event ids
    pdf = obsplus.picks_to_df(cat)
    pdf = pdf[pdf["evaluation_status"] != "rejected"]
    duplicated_ids = set(pdf[pdf["station"].duplicated()]["resource_id"])

    # next mark duplicated as rejected and prune the event
    for duplicated_id in duplicated_ids:
        rid = ResourceIdentifier(duplicated_id)
        obj = rid.get_referred_object()
        obj.evaluation_status = "rejected"

    cat_out = obsplus.events.utils.prune_events(cat)

    validate_catalog(cat_out)

    cat.write(str(cat_path), "quakeml")
Exemple #30
0
 def dataframe_output(self, tcat):
     """ return read_picks result from reading dataframe """
     df = picks_to_df(tcat)
     return picks_to_dataframe(df)