def __call__( self, time_arg: event_time_type, time_before: Optional[float] = None, time_after: Optional[float] = None, *args, **kwargs, ) -> obspy.Stream: """ Using a reference time, return a waveforms that encompasses that time. Parameters ---------- time_arg The argument that will indicate a start time. Can be a one length events, and event, a float, or a UTCDatetime object time_before The time before time_arg to include in waveforms time_after The time after time_arg to include in waveforms Returns ------- obspy.Stream """ tbefore = to_timedelta64(time_before, default=self.time_before) tafter = to_timedelta64(time_after, default=self.time_after) assert (tbefore is not None) and (tafter is not None) # get the reference time from the object time = to_datetime64(get_reference_time(time_arg)) t1 = time - tbefore t2 = time + tafter return self.get_waveforms(starttime=to_utc(t1), endtime=to_utc(t2), **kwargs)
def test_no_precision_lost(self): """There should be no precision lost in converting to a timedelta.""" td = np.timedelta64(1_111_111_111, "ns") out = to_timedelta64(td) assert out == td # and also in negative assert (-td) == to_timedelta64(-td)
def __init__( self, waveforms: fetcher_waveform_type, stations: Optional[fetcher_event_type] = None, events: Optional[fetcher_event_type] = None, picks: Optional[pd.DataFrame] = None, stream_processor: Optional[stream_proc_type] = None, time_before: Optional[float] = None, time_after: Optional[float] = None, event_query: Optional[dict] = None, ): # if fetch_arg is a WaveFetcher just update dict and return if isinstance(waveforms, Fetcher): self.__dict__.update(waveforms.__dict__) return # get clients for each data types self.set_waveforms(waveforms) self.set_events(events) self.set_stations(stations) self._picks_input = picks # waveforms processor for applying filters and such self.stream_processor = stream_processor # set event time/query parameters self.time_before = to_timedelta64(time_before) self.time_after = to_timedelta64(time_after) self.event_query = event_query or {}
def bing_pick_bulk(self, bingham_catalog): """ Create a dataframe from the bingham_test picks. """ picks = obsplus.picks_to_df(bingham_catalog) df = picks[list(NSLC)] df["starttime"] = picks["time"] - to_timedelta64(1.011) df["endtime"] = picks["time"] + to_timedelta64(7.011) return df
def set_rel_time_windows(self, **time_windows) -> Optional["StatsGroup"]: """ Method for applying relative time windows to the StatsGroup Parameters ---------- time_windows The time windows are set on a per-phase basis for arbitrary phase types through the following format: phase=(before_pick, after_pick). For example, P=(0.1, 1), S=(0.5, 2), Noise=(0, 5). Note that phase names are limited to valid attribute names (alphanumeric, cannot start with a number). Other Parameters ---------------- inplace Flag indicating whether the StatsGroup should be modified inplace or a copy should be returned. """ # TODO: I'm going to gloss over this for right now because it doesn't # affect my use case, but this might be overwriting user-provided # start and end times? # Loop over each of the provided phase for ph, tw in time_windows.items(): if not isinstance(tw, Sequence) or isinstance(tw, str): raise TypeError( f"time windows must be a tuples of start and end times: {ph}" ) if not len(tw) == 2: raise ValueError(f"time windows must be a tuple of floats: {ph}={tw}") # Get all of the picks that have a matching phase phase_ind = self.data.index.get_level_values("phase_hint") == ph # If none of the picks match, issue a warning and move on if not phase_ind.any(): warnings.warn(f"No picks matching phase type: {ph}") continue if (tw[0] + tw[1]) < 0: raise ValueError(f"Time after must occur after time before: {ph}") time_before = to_timedelta64(tw[0]) time_after = to_timedelta64(tw[1]) # Otherwise, set the time windows if ( self.data.loc[phase_ind, "starttime"].notnull().any() or self.data.loc[phase_ind, "endtime"].notnull().any() ): warnings.warn( "Overwriting existing time windows for one or more phases." ) self.data.loc[phase_ind, "starttime"] = ( self.data.loc[phase_ind, "time"] - time_before ) self.data.loc[phase_ind, "endtime"] = ( self.data.loc[phase_ind, "time"] + time_after ) self.data = self._update_meta(self.data) return self
def abs_time_windows(self, node_stats_group_no_tws): """create absolute time windows""" time_before = 0.2 time_after = 1 phase = node_stats_group_no_tws.data.droplevel("seed_id_less").iloc[-1] return { phase.name: ( phase.time - to_timedelta64(time_before), phase.time + to_timedelta64(time_after), ) }
def _get_event_phase_window( self, event, dist_df, sampling_rate, restrict_to_arrivals: bool ): """ Get the pick time, window start and window end for all phases. """ # determine min duration based on min samples and sec/dist # min duration based on required num of samples min_samples = get_default_param("min_samples", obj=self) min_dur_samps = min_samples / sampling_rate # min duration based on distances seconds_per_m = get_default_param("seconds_per_meter", obj=self) dist = dist_df.loc[str(event.resource_id), "hyp_distance_m"] min_dur_dist = pd.Series(dist * seconds_per_m, index=dist.index) # the minimum duration is the max the min sample requirement and the # min distance requirement min_duration = to_timedelta64(np.maximum(min_dur_dist, min_dur_samps)) # get dataframe if not len(event.picks): raise NoPhaseInformationError() df = get_phase_window_df( event, min_duration=min_duration, channel_codes=set(min_duration.index), restrict_to_arrivals=restrict_to_arrivals, ) # make sure there are no NaNs assert not df.isnull().any().any() return df
def _get_absolute_time(time: Union[time_types, np.ndarray], ref_time: np.ndarray) -> np.ndarray: """ Get an absolute time from a possible reference time. Parameters ---------- time Can either be a an absolute time, or a timedelta with respect to ref_time. ref_time The object time is referenced to. """ def _is_time_delta(obj): """ return True if an object is a timedelta like thing. """ if isinstance(obj, (int, float)): return True dtype = getattr(obj, "dtype", None) if np.issubdtype(dtype, np.timedelta64): return True is_int = np.issubdtype(dtype, np.integer) is_float = np.issubdtype(dtype, np.floating) if is_int or is_float: return True return False # First try converting to datetime64, if that fails convert to timedelta. if _is_time_delta(time): dt = ref_time + to_timedelta64(time) else: dt = to_datetime64(time) return np.broadcast_to(dt, np.shape(ref_time))
def test_identity_function_on_delta_series(self): """Delta series should simply return an equal delta series.""" deltas = np.timedelta64(10_000_100, "us") * np.arange(10) ser = pd.Series(deltas) out = to_timedelta64(ser) assert ser.equals(out) assert out is not ser
def test_series(self): """Ensure an entire series can be converted to timedeltas.""" ser = pd.Series([0, 2.22, 3, 5]) out = to_timedelta64(ser) assert all( [isinstance(x, (np.timedelta64, pd.Timedelta)) for x in out]) assert isinstance(out, pd.Series)
def _get_noise_windows(self, phase_df, df): """ Get noise window rows by first looking for noise phase, if None just use start of trace. """ # init df for each unique channel that needs a noise spectra noise_df = phase_df[~phase_df["seed_id"].duplicated()] noise_df["phase_hint"] = "Noise" # If no noise spectra is defined use start of traces if df.empty: # get parameters for determining noise windows start and stop noise_end = to_timedelta64( get_default_param("noise_end_before_p", obj=self) ) min_noise_dur = to_timedelta64( get_default_param("noise_min_duration", obj=self) ) largest_window = (phase_df["endtime"] - phase_df["starttime"]).max() # Necessary to do it this way because max and np.max can't # handle NaN/NaT properly min_duration = pd.Series([min_noise_dur, largest_window]).max() # set start and stop for noise window noise_df["endtime"] = phase_df["starttime"].min() - noise_end noise_df["starttime"] = noise_df["endtime"] - min_duration else: # else use either the noise window defined for a specific station # or, if a station has None, use the noise window with the earliest # start time ser_min = df.loc[df["starttime"].idxmin()] t1, t2 = ser_min["starttime"], ser_min["endtime"] # drop columns on df and noise df to facilitate merge df = df[["network", "station", "starttime", "endtime"]] noise_df = noise_df.drop(columns=["starttime", "endtime"]) noise_df = noise_df.merge(df, how="left", on=["network", "station"]) # fill nan noise_df = noise_df.fillna({"starttime": t1, "endtime": t2}) # set time between min and max noise_df["time"] = ( noise_df["starttime"] + (noise_df["endtime"] - noise_df["starttime"]) / 2 ) # noise_df["time"] = noise_df[["starttime", "endtime"]].mean(axis=1) # make sure there are no null values out = noise_df.set_index(list(_INDEX_NAMES)) # drop any duplicate indices return out.loc[~out.index.duplicated()]
def add_time_buffer( self, start: Optional[Union[float, pd.Series]] = None, end: Optional[Union[float, pd.Series]] = None, ) -> "StatsGroup": """ Method for adding a time before to start and end of windows. Returns ------- start The time, in seconds, to add to the start of the window end The time, in seconds, to add to the start of the window """ df = self.data.copy() if start is not None: df.loc[:, "starttime"] = df["starttime"] - to_timedelta64(start) if end is not None: df.loc[:, "endtime"] = df["endtime"] + to_timedelta64(end) return self.new_from_dict(data=df)
def test_add_time_buffer( self, node_stats_group ): # Not quite sure what's going on in this test... """ Ensure time can be added to the start and end of the node_stats_group. """ # Add times, start and end df = node_stats_group.data start = 1 end = pd.Series(2, index=df.index) sg = node_stats_group.add_time_buffer(start=start, end=end) # Make sure a copy did occur assert sg is not node_stats_group # Make sure time offset is correct df2 = sg.data # Make sure to only get records with non-NaN start and end times df3 = df2.loc[df2["starttime"].notnull() & df2["endtime"].notnull()] df4 = df.loc[df3.index] assert ((df3["starttime"] + to_timedelta64(1)) == df4["starttime"]).all() assert ((df3["endtime"] - to_timedelta64(2)) == df4["endtime"]).all()
def test_set_rel_time_windows(self, add_rel_time_windows): """Make sure it is possible to set relative time windows""" assert not add_rel_time_windows.data.starttime.isnull().any() assert not add_rel_time_windows.data.endtime.isnull().any() assert (add_rel_time_windows.data.endtime > add_rel_time_windows.data.starttime).all() # Make sure the tw is as expected for each of the provided phase types for phase in self.relative_windows: pick = add_rel_time_windows.data.xs(phase, level="phase_hint").iloc[0] assert pd.Timestamp( pick.starttime, unit="ns") == (pick.time - to_timedelta64(self.relative_windows[phase][0])) assert pd.Timestamp( pick.endtime, "ns") == (pick.time + to_timedelta64(self.relative_windows[phase][1])) # Make sure the times are semi-plausible assert (pick.starttime > pd.Timestamp( 1800, 1, 1)) and (pick.endtime > pd.Timestamp(1800, 1, 1))
def _get_gap_dfs(df, min_gap): """ function to apply to each group of seed_id dataframes """ # get the min gap if min_gap is None: min_gap = 1.5 * df["sampling_period"].iloc[0] else: min_gap = to_timedelta64(min_gap) # get df for determining gaps dd = (df.drop_duplicates().sort_values(["starttime", "endtime" ]).reset_index(drop=True)) shifted_starttimes = dd.starttime.shift(-1) cum_max = np.maximum.accumulate(dd["endtime"] + min_gap) gap_index = cum_max < shifted_starttimes # create a dataframe of gaps df = dd[gap_index] df["starttime"] = dd.endtime[gap_index] df["endtime"] = shifted_starttimes[gap_index] df["gap_duration"] = df["endtime"] - df["starttime"] return df
def _get_waveform_df(stream: wave_type) -> pd.DataFrame: """ Convert a stream of sequence of traces into a datframe. Parameters ---------- stream The streams to index Notes ----- This is private because it is probably not quite polished enough to include in the public API. More thought is needed how to do this properly. """ stats_columns = list(NSLC) + ["starttime", "endtime", "sampling_rate"] trace_contents = [{i: tr.stats[i] for i in stats_columns} for tr in stream] df = pd.DataFrame(trace_contents, columns=stats_columns) # ensure time(y) columns have proper df["starttime"] = to_datetime64(df["starttime"]) df["endtime"] = to_datetime64(df["endtime"]) df["sampling_period"] = to_timedelta64(1 / df["sampling_rate"]) df["seed_id"] = get_seed_id_series(df) df["trace"] = [ObjectWrapper(tr) for tr in stream] return df
def test_identity_function_on_delta_array(self): """Delta array should simply return a delta array.""" deltas = np.timedelta64(10_000_100, "us") * np.arange(10) out = to_timedelta64(deltas) assert np.all(deltas == out)
def test_array(self): """Test the return values from an array.""" ar = np.array([0, 2.22, 3, 5]) out = to_timedelta64(ar) assert all([isinstance(x, np.timedelta64) for x in out])
def test_float(self): """Test converting floats to time deltas (interpreted as seconds)""" vals = [1.23322, 10.2323, -1232.22] out = [to_timedelta64(x) for x in vals] assert all([isinstance(x, np.timedelta64) for x in out])
def yield_event_waveforms( self, time_before: Optional[float] = None, time_after: Optional[float] = None, reference: Union[str, Callable] = "origin", raise_on_fail: bool = True, ) -> Tuple[str, Stream]: """ Yield event_id and streams for each event. Parameters ---------- time_before The time before (in seconds) the reference that will be included in the waveforms if possible. time_after The Time after (in seconds) the reference that will be included in the waveforms if possible. reference A str that indicates how the starttime of the trace should be determined. The following are supported: origin - use the origin time of the event p - use the first p time as the start for each station s - use the first s times as the start for each station If "p" or "s" is used only streams corresponding to stations with the appropriate phase pick will be returned. raise_on_fail If True, re raise an exception if one is caught during waveform fetching, else continue to next event. Notes ----- Streams will not be yielded for any event for which a reference time cannot be obtained. For example, if reference='S' only events with some S picks will be yielded. """ def _check_yield_event_waveform_(reference, ta, tb): if not reference.lower() in self.reference_funcs: msg = (f"reference of {reference} is not supported. Supported " f"reference arguments are {list(self.reference_funcs)}") raise ValueError(msg) if not (np.abs(tb) + np.abs(ta)) > np.timedelta64(0, "s"): msg = ( "time_before and/or time_after must be specified in either " "Fetcher's init or the yield_event_Waveforms call") raise ValueError(msg) tb = to_timedelta64(time_before, default=self.time_before) ta = to_timedelta64(time_after, default=self.time_after) _check_yield_event_waveform_(reference, ta, tb) # get reference times ref_func = self.reference_funcs[reference.lower()] reftime_df = ref_func(self) # if using a wavebank preload index over entire time-span for speedup if isinstance(self.waveform_client, WaveBank) and len(reftime_df): mt = reftime_df["time"].min() - tb mx = reftime_df["time"].max() + ta index = self.waveform_client.read_index(starttime=mt, endtime=mx) get_bulk_wf = partial(self._get_bulk_wf, index=index) else: get_bulk_wf = self._get_bulk_wf # iterate each event in the events and yield the waveform for event_id, df in reftime_df.groupby("event_id"): # make sure ser is either a single datetime or a series of datetimes time = to_datetime64(df["time"]) t1, t2 = time - tb, time + ta bulk_args = self._get_bulk_args(starttime=t1, endtime=t2) try: yield EventStream(event_id, get_bulk_wf(bulk_args)) except Exception: if raise_on_fail: raise else: msg = f"Fetcher failed to get waveforms for {event_id}." warnings.warn(msg)
def test_time_delta(self, time_df): """ Test that timedelta dtype. """ out1 = upd.cast_dtypes(time_df, {"delta": "ops_timedelta"})["delta"] out2 = to_timedelta64(time_df["delta"]) assert (out1 == out2).all()
def test_tuple_and_list(self): """tests for tuples and lists.""" input1 = [2, -3, 4.5] out1 = to_timedelta64(input1) out2 = to_timedelta64(tuple(input1)) assert np.all(out1 == out2)
def test_nullish_values_returns_default(self): """Nullish values should return default values.""" default = np.timedelta64(0, "s") out1 = to_timedelta64(None, default=default) assert out1 == default
def test_whole_number(self): """test converting a number to a timedelta.""" vals = [1, 2, 1000, 23, -122] out = [to_timedelta64(x) for x in vals] assert all(isinstance(x, np.timedelta64) for x in out)