def create_directory(self): """ create the directory with gaps in it """ # get a dataframe of the gaps if self.gaps is not None: df = pd.DataFrame(self.gaps, columns=["start", "end"]) df["start"] = df["start"].apply(lambda x: x.timestamp) df["end"] = df["end"].apply(lambda x: x.timestamp) else: df = pd.DataFrame(columns=["start", "end"]) assert self.starttime and self.endtime, "needs defined times" for t1, t2 in make_time_chunks(self.starttime, self.endtime, self.duration, self.overlap): # figure out of this time lies in a gap gap = df[~((df.start >= t2) | (df.end <= t1))] if not gap.empty: try: st = self.get_gap_stream(t1, t2, gap) except ValueError: continue else: st = self.create_stream(t1, t2) finame = str(t1).split(".")[0].replace(":", "-") + ".mseed" path = join(self.path, finame) st.write(path, "mseed")
def yield_waveforms( self, network: Optional[str] = None, station: Optional[str] = None, location: Optional[str] = None, channel: Optional[str] = None, starttime: Optional[utc_able_type] = None, endtime: Optional[utc_able_type] = None, duration: float = 3600.0, overlap: Optional[float] = None, ) -> Stream: """ Yield time-series segments. Parameters ---------- {get_waveforms_params} duration : float The duration of the streams to yield. All channels selected channels will be included in the waveforms. overlap : float If duration is used, the amount of overlap in yielded streams, added to the end of the waveforms. Notes ----- All string parameters can use posix style matching with * and ? chars. Total duration of yielded streams = duration + overlap. """ # get times in float format starttime = to_datetime64(starttime, 0.0) endtime = to_datetime64(endtime, "2999-01-01") # read in the whole index df index = self.read_index( network=network, station=station, location=location, channel=channel, starttime=starttime, endtime=endtime, ) # adjust start/end times starttime = max(starttime, index.starttime.min()) endtime = min(endtime, index.endtime.max()) # chunk time and iterate over chunks time_chunks = make_time_chunks(starttime, endtime, duration, overlap) for t1, t2 in time_chunks: t1, t2 = to_datetime64(t1), to_datetime64(t2) con1 = (index.starttime - self.buffer) > t2 con2 = (index.endtime + self.buffer) < t1 ind = index[~(con1 | con2)] if not len(ind): continue yield self._index2stream(ind, t1, t2)
def yield_waveforms( self, network: Optional[str] = None, station: Optional[str] = None, location: Optional[str] = None, channel: Optional[str] = None, starttime: Optional[obspy.UTCDateTime] = None, endtime: Optional[obspy.UTCDateTime] = None, duration: float = 3600.0, overlap: Optional[float] = None, ) -> Stream: """ Yield time-series segments from the waveform client. Parameters ---------- {get_waveforms_params} duration : float The duration of the streams to yield. All channels selected channels will be included in the waveforms. overlap : float If duration is used, the amount of overlap in yielded streams, added to the end of the waveforms. Notes ----- All string parameters can use posix style matching with * and ? chars. Total duration of yielded streams = duration + overlap. If no starttime or endtime is provided the min/max indicated by the stations will be used. """ # Note: although WaveBank has a yield waveforms method, we want # fetcher to work with any client so we don't use its implementation. starttime = to_utc(starttime or self.station_df["start_date"].min()) endtime = to_utc(endtime or self.station_df["end_date"].max()) time_chunks = make_time_chunks(starttime, endtime, duration, overlap) for t1, t2 in time_chunks: kwargs = dict(network=network, station=station, location=location, channel=channel) yield self.get_waveforms(starttime=t1, endtime=t2, **kwargs)
def build_archive(self, st, starttime=None, endtime=None): """ Build the archive. """ starttime = starttime or self.starttime endtime = endtime or self.endtime for utc1, utc2 in make_time_chunks(starttime, endtime, duration=3600): stt = st.copy() stt.trim(starttime=utc1, endtime=utc2) channels = {x.stats.channel for x in st} for channel in channels: ts = stt.select(channel=channel) if not len(ts): continue stats = ts[0].stats net, sta = stats.network, stats.station loc, cha = stats.location, stats.channel utc_str = str(utc1).split(".")[0].replace(":", "-") end = utc_str + ".mseed" fpath = self.waveform_path / net / sta / loc / cha / end fpath.parent.mkdir(parents=True, exist_ok=True) ts.write(str(fpath), "mseed")