def test_from_dataframe_int_location_codes(self, bingham_dataset): """ Tests for a dataframe with an int column defining location code. These can drop necessary 0s. """ events = bingham_dataset.event_client.get_events() df1 = obsplus.picks_to_df(events) df = df1.copy() df["location"] = df["location"].astype(int) df2 = picks_to_df(df) assert (df2["location"] == df1["location"]).all() # ensure the seed ids are still consistent seed_id_loc = df2["seed_id"].str.split(".", expand=True)[2] assert (seed_id_loc == df2["location"]).all()
def check_picks(cat: Catalog): """ Checks for errors with phase picks This function will check for duplicate picks on each station (i.e. more than one P or S per station) as well as if there are any S picks before P picks on each station. Parameters ---------- cat Obspy catalog to validate """ def fn(df): # No duplicates assert not any(df.phase_hint.duplicated()) # Check p before s if ps.issubset(df.phase_hint): p_pick = df.loc[df.phase_hint == "P"].iloc[0] s_pick = df.loc[df.phase_hint == "S"].iloc[0] assert p_pick.time < s_pick.time ps = {"P", "S"} pdf = obsplus.picks_to_df(cat) pdf = pdf.loc[(pdf.evaluation_status != "rejected") & (pdf.phase_hint.isin(ps))] gb = pdf.groupby(["event_id", "station"]) gb.apply(fn)
def check_pick_order(event: Event): """ Ensure: 1. There are no S picks before P picks on any station 2. There are no amplitude picks before P picks on any station """ def pick_order(g, sp, ap): # get sub dfs with phases of interest p_picks = g[g["phase_hint"].str.upper() == "P"] s_picks = g[g["phase_hint"].str.upper() == "S"] amp_picks = g[g["phase_hint"].str.endswith("AML")] # there should be one P/S pick assert len(p_picks) <= 1 and len(s_picks) <= 1 # first check that P is less than S, if not append to name of bad if len(p_picks) and len(s_picks): stime, ptime = s_picks.iloc[0]["time"], p_picks.iloc[0]["time"] if (stime < ptime) and not (pd.isnull(ptime) | pd.isnull(stime)): sp.append(g.name) # next check all amplitude picks are after P if len(p_picks) and len(amp_picks): ptime = p_picks.iloc[0]["time"] bad_amp_picks = amp_picks[amp_picks["time"] < ptime] ap.extend(list(bad_amp_picks["seed_id"])) # get dataframe of picks, filter out rejected pdf = obsplus.picks_to_df(event) pdf = pdf.loc[pdf.evaluation_status != "rejected"] # get series of network, station ns = get_seed_id_series(pdf, subset=NSLC[:3]) # Checking that picks are in acceptable order gb, sp, ap = pdf.groupby(ns), [], [] gb.apply(pick_order, sp, ap) assert len(sp) == 0, "S pick found before P pick:\n" f"station/s: {sp}" assert len( ap) == 0, "amplitude pick found before P pick:\n" f"seed_id/s: {ap}"
def bing_pick_bulk(self, bingham_catalog): """ Create a dataframe from the bingham_test picks. """ picks = obsplus.picks_to_df(bingham_catalog) df = picks[list(NSLC)] df["starttime"] = picks["time"] - to_timedelta64(1.011) df["endtime"] = picks["time"] + to_timedelta64(7.011) return df
def check_duplicate_picks(event: Event): """ Ensure there are no picks with the same phases on the same channels. """ def dup_picks(df, phase_hint, subset): """Function for checking for duplications.""" seed_id = get_seed_id_series(df, subset=subset) bad = seed_id[seed_id.duplicated()].tolist() assert len(bad) == 0, ( f"Duplicate {phase_hint} picks found\n" f"event_id: {event_id}, " ) # A dict of {phase: column that cant be duplicated} phase_duplicates = {"P": NSLC[:-1], "p": NSLC[:-1], "S": NSLC[:-1], "s": NSLC[:-1]} # first get dataframe of picks, filter out rejected pdf = obsplus.picks_to_df(event) pdf = pdf.loc[pdf.evaluation_status != "rejected"] # add column for network.station.location event_id = str(event.resource_id) # Go through each of phase duplicates and compare against the pick dataframe pick_gb = pdf.groupby("phase_hint") for ph, cols in phase_duplicates.items(): if ph not in pick_gb.groups: continue dup_picks(pick_gb.get_group(ph), ph, subset=cols)
def check_pick_order(event: Event, ): """ Ensure: 1. There are no S picks before P picks on any station 2. There are no amplitude picks before P picks on any station """ pdf = obsplus.picks_to_df(event) pdf = pdf.loc[pdf.evaluation_status != "rejected"] def pick_order(g, sp, ap, event_id=event.resource_id.id): # get sub dfs with phases of interest p_picks = g[g["phase_hint"].str.upper() == "P"] s_picks = g[g["phase_hint"].str.upper() == "S"] amp_picks = g[g["phase_hint"].str.endswith("AML")] # there should be one P/S pick assert len(p_picks) <= 1 and len(s_picks) <= 1 # first check that P is less than S, if not append to name of bad if len(p_picks) and len(s_picks): if s_picks.iloc[0]["time"] < p_picks.iloc[0]["time"]: sp.append(g.name) # next check all amplitude picks are after P if len(p_picks) and len(amp_picks): ptime = p_picks.iloc[0]["time"] bad_amp_picks = amp_picks[amp_picks["time"] < ptime] ap.extend(list(bad_amp_picks["seed_id"])) # Checking that picks are in acceptable order gb, sp, ap = pdf.groupby("station"), [], [] gb.apply(pick_order, sp, ap) assert len(sp) == 0, "S pick found before P pick:\n" f"station/s: {sp}" assert len( ap) == 0, "amplitude pick found before P pick:\n" f"seed_id/s: {ap}"
def _get_picks_df(): """ Get the picks dataframe, remove picks flagged as rejected. """ pdf = obsplus.picks_to_df(event) # remove rejected picks pdf = pdf[pdf.evaluation_status != "rejected"] # add seed_id column # TODO change this to seed_id pdf["seed_id"] = obsplus.utils.get_nslc_series(pdf) return pdf
def test_none_onset(self): """ Make sure Nones in the data get handled properly """ waveform_id = ev.WaveformStreamID(station_code="A") pick = ev.Pick(time=UTCDateTime(), waveform_id=waveform_id) df = picks_to_df(pick) assert df.onset.iloc[0] == "" assert df.polarity.iloc[0] == ""
def test_picks_one_digit_location(self): """ Ensure 1 digit location codes get preserved """ picks = pick_generator(["UU.TMU.1.HHZ", "UU.TMU.01.HHZ"]) df = picks_to_df(picks) # there should be two unique location code and seed_ids. assert len(set(df["location"])) == 2 assert len(set(df["seed_id"])) == 2
def picks_df(self): """ return a dataframe from the picks (if possible) """ if self._picks_df is None: try: df = picks_to_df(self.event_client) except TypeError: self._picks_df = None else: self._picks_df = df return self._picks_df
def test_s_phases(self, bingham_dataset): """make sure only stations that have s picks are returned""" fetcher = bingham_dataset.get_fetcher() picks = obsplus.picks_to_df(fetcher.event_client.get_events()) # There should be some s picks assert (picks["phase_hint"].str.lower() == "s").any() func = fetcher.yield_event_waveforms out = dict(func(self.time_before, self.time_after, reference="S")) for id, st in out.items(): assert isinstance(st, obspy.Stream)
def test_dot_in_location_code(self): """Ensure a dot in the location code causes a ValueError. """ waveform_id = ev.WaveformStreamID( network_code="UU", station_code="TMU", location_code="1.0", channel_code="HHZ", ) pick = ev.Pick(time=obspy.UTCDateTime("2020-01-01"), waveform_id=waveform_id) with pytest.raises(ValueError): _ = obsplus.picks_to_df([pick])
def picks_no_origin(self): """ create a events that has picks but no origin """ t0 = UTCDateTime("2016-01-01T10:12:15.222") def wave_id(seed_str): return ev.WaveformStreamID(seed_string=seed_str) picks = [ ev.Pick(time=t0 + 2, waveform_id=wave_id("UU.TMU..HHZ")), ev.Pick(time=t0 + 1.2, waveform_id=wave_id("UU.BOB.01.ELZ")), ev.Pick(time=t0 + 3.2, waveform_id=wave_id("UU.TEX..EHZ")), ] return picks_to_df(ev.Event(picks=picks))
def check_p_lims(event: Event, p_lim=None): """ Check for P picks that aren't within p_lim of the median pick (if provided) """ if p_lim is not None: df = obsplus.picks_to_df(event) df = df.loc[(df.evaluation_status != "rejected") & (df.phase_hint == "P")] med = df.time.median() bad = df.loc[abs(df.time - med) > p_lim] assert len(bad) == 0, ("Outlying P pick found:\n" f"event_id: {event.resource_id.id}, " f"seed_id/s: {bad.seed_id.tolist()}")
def test_read_uncertainty(self): """ tests that uncertainties in time_errors attribute are read. See #55. """ kwargs = dict(lower_uncertainty=1, upper_uncertainty=2, uncertainty=12) time_error = ev.QuantityError(**kwargs) waveform_id = ev.WaveformStreamID(station_code="A") pick = ev.Pick(time=UTCDateTime(), time_errors=time_error, waveform_id=waveform_id) df = picks_to_df(pick) assert set(kwargs).issubset(df.columns) assert len(df) == 1 ser = df.iloc[0] assert all([ser[i] == kwargs[i] for i in kwargs])
def check_amps_on_z_component(event: Event, no_z_amps=False, phase_hints=("AML", "IAML")): """ Check for amplitude picks on Z channels (if no_z_amps is True). """ if not no_z_amps: return df = obsplus.picks_to_df(event) con1 = df.evaluation_status != "rejected" con2 = df.phase_hint.isin(phase_hints) con3 = df["channel"].str.endswith("Z") _df = df.loc[con1 & con2 & con3] assert len(df) == 0, ("Amplitude pick on Z axis found:\n" f"event_id: {str(event.resource_id)}, " f"seed_id/s: {_df['seed_id'].tolist()}")
def check_duplicate_picks(event: Event): """ Ensure there are no picks with the same phases on the same channels. """ # A dict of {phase: column that cant be duplicated} phase_duplicates = {"IAML": "seed_id", "AML": "seed_id"} # first get dataframe of picks pdf = obsplus.picks_to_df(event) pdf = pdf.loc[pdf.evaluation_status != "rejected"] event_id = str(event.resource_id) def dup_picks(phase_hint, on="station"): """ function for checking """ df = pdf.loc[pdf.phase_hint == phase_hint] bad = df.loc[df[on].duplicated()][on].tolist() assert len(bad) == 0, (f"Duplicate {phase_hint} picks found\n" f"event_id: {event_id}, " f"{on}/s: {bad}") for phase_hint in pdf["phase_hint"].unique(): dup_picks(phase_hint, on=phase_duplicates.get(phase_hint, "station"))
def check_duplicate_picks(event: Event): """ Ensure there are no picks with the same phases on the same channels. """ def dup_picks(df, phase_hint, subset): """ Function for checking for duplications. """ seed_id = get_seed_id_series(df, subset=subset) bad = seed_id[seed_id.duplicated()].tolist() assert len(bad) == 0, (f"Duplicate {phase_hint} picks found\n" f"event_id: {event_id}, ") # A dict of {phase: column that cant be duplicated} phase_duplicates = {"IAML": None, "AML": None} # first get dataframe of picks, filter out rejected pdf = obsplus.picks_to_df(event) pdf = pdf.loc[pdf.evaluation_status != "rejected"] # add column for network.station.location event_id = str(event.resource_id) for phase_hint, sub_df in pdf.groupby("phase_hint"): # default to comparing network, station, location subset = phase_duplicates.get(phase_hint, NSLC[:-1]) dup_picks(sub_df, phase_hint, subset=subset)
def _get_picks_df(restrict_to_arrivals): """Get the picks dataframe, remove picks flagged as rejected.""" pdf = obsplus.picks_to_df(event) pdf["seed_id_less"] = pdf["seed_id"].str[:-1] if restrict_to_arrivals: adf = obsplus.arrivals_to_df(event) pdf = pdf.loc[pdf["resource_id"].isin(adf["pick_id"])] # remove rejected picks pdf = pdf[pdf.evaluation_status != "rejected"] # Toss any picks from stations that have S-picks that are earlier than P-picks if {"P", "S"}.issubset(pdf["phase_hint"]): phs = pdf.groupby("phase_hint") p_picks = phs.get_group("P") s_picks = phs.get_group("S") both = set(p_picks["seed_id_less"]).intersection( s_picks["seed_id_less"]) p_picks = (p_picks.loc[p_picks["seed_id_less"].isin( both)].set_index("seed_id_less").sort_index()) s_picks = (s_picks.loc[s_picks["seed_id_less"].isin( both)].set_index("seed_id_less").sort_index()) mask = p_picks["time"] > s_picks["time"] bad_p = p_picks.loc[mask] bad_s = s_picks.loc[mask] if mask.any(): warnings.warn( "S-pick is earlier than P-pick for one or more picks." "Skipping phases.") pdf = pdf.loc[~pdf["resource_id"].isin(bad_s["resource_id"]) & ~pdf["resource_id"].isin(bad_p["resource_id"])] if not len(pdf): raise NoPhaseInformationError( f"No valid phases for event:\n{event}") # # add seed_id column # pdf["seed_id"] = obsplus.utils.get_nslc_series(pdf) # add the seed id column that drops the component from the channel # rename the resource_id column for later merging pdf.rename(columns={"resource_id": "pick_id"}, inplace=True) return pdf
def test_unique_event_time_no_origin(self, bingham_cat_only_picks): """ Ensure events with no origin don't all return the same time. """ df = picks_to_df(bingham_cat_only_picks) assert len(df["event_time"].unique()) == len(df["event_id"].unique())
def pick_df(self, request): """Collect everything in the pick dataframe.""" df = picks_to_df(request.getfixturevalue(request.param)) return df
def pick_df(self): """Return the pick dataframe of Bingham.""" ds = obsplus.load_dataset("bingham_test") cat = ds.event_client.get_events() return obsplus.picks_to_df(cat)
def pick_df(self, request): """ collect all the supported inputs are parametrize""" return picks_to_df(request.param)
def pick_dataframe(self, catalog): """ return a dataframe of picks. """ return picks_to_df(catalog)
def _get_associated_event_id(new_picks, old_picks): """Return the associated event id""" new_med = new_picks.groupby("event_id")["time"].apply(_get_pick_median) old_med = _get_pick_median(old_picks["time"]) diffs = abs(new_med - old_med) # check on min tolerance, if exceeded return empty if diffs.min() / 1_000_000_000 > median_tolerance: return None return diffs.idxmin() # Get list-like of events from new_catalog new_cat = new_catalog if isinstance(new_catalog, Catalog) else [new_catalog] assert len(new_catalog) > 0 # Get dataframes of event info new_pick_df = obsplus.picks_to_df(new_cat) old_pick_df = obsplus.picks_to_df(event) eid = _get_associated_event_id(new_pick_df, old_pick_df) new_event = {str(x.resource_id): x for x in new_catalog}.get(eid) # The association failed, just return original event if new_event is None: return event return merge_events(event, new_event, delete_old=delete_old) # ---------- silly hash functions for getting around resource_ids (sorta) def _hash_wids(objs, extra_attr=None): out = OrderedDict() for obj in objs:
def test_from_file(self, event_file): """ test for reading phase picks from files. """ df = picks_to_df(event_file) assert isinstance(df, pd.DataFrame) assert len(df)
def test_from_event_bank(self, default_ebank): """ Ensure event banks can be used to get dataframes. """ df = picks_to_df(default_ebank) assert isinstance(df, pd.DataFrame)
def test_from_event_directory(self, event_directory): """Test extracting info from an event directory.""" df = picks_to_df(event_directory) assert len(df) assert isinstance(df, pd.DataFrame)
from obspy.core.event import ResourceIdentifier from obsplus.events.validate import validate_catalog if __name__ == "__main__": cat_path = Path("/media/data/Gits/obsplus/tests/test_data/qml2merge/" "2016-10-15T02-27-50/2016-10-15T02-27-50_2.xml") assert cat_path.exists() cat = obspy.read_events(str(cat_path)) # remove all amplitudes picks = cat[0].picks cat[0].picks = [x for x in picks if not x.phase_hint == "IAML"] # get duplicated event ids pdf = obsplus.picks_to_df(cat) pdf = pdf[pdf["evaluation_status"] != "rejected"] duplicated_ids = set(pdf[pdf["station"].duplicated()]["resource_id"]) # next mark duplicated as rejected and prune the event for duplicated_id in duplicated_ids: rid = ResourceIdentifier(duplicated_id) obj = rid.get_referred_object() obj.evaluation_status = "rejected" cat_out = obsplus.events.utils.prune_events(cat) validate_catalog(cat_out) cat.write(str(cat_path), "quakeml")
def dataframe_output(self, tcat): """ return read_picks result from reading dataframe """ df = picks_to_df(tcat) return picks_to_dataframe(df)