def gen_stream(infiles): '''Make an obspy stream and order it by distance''' instream = obspy.Stream() print 'Loading traces ...' for sacfile in infiles: tr = obspy.read(sacfile, format='SAC') tr[0].stats.distance = tr[0].stats.sac.dist * 1000 instream += tr[0] sortedstream = obspy.Stream() traces = [] distances = [] for trace in instream: traces.append(trace) distances.append(trace.stats.sac.dist) print 'Sorted distances' sorted_dist = list(sorted(distances)) for distance in sorted_dist: idx = distances.index(distance) tr = traces[idx] sortedstream += tr return sortedstream, sorted_dist
def _mergeChannels(st): """ function to find longest continuous data chunck and discard the rest """ st1 = st.copy() st1.merge(fill_value=0.0) start = max([x.stats.starttime for x in st1]) end = min([x.stats.endtime for x in st1]) try: st1.trim(starttime=start, endtime=end) except ValueError: # if stream too factured end is larger than start return obspy.Stream() ar_len = min([len(x.data) for x in st1]) ar = np.ones(ar_len) for tr in st1: ar *= tr.data trace = obspy.Trace(data=np.ma.masked_where(ar == 0.0, ar)) trace.stats.starttime = start trace.stats.sampling_rate = st1[0].stats.sampling_rate if (ar == 0.0).any(): try: st2 = trace.split() except Exception: return obspy.Stream() times = np.array([[x.stats.starttime, x.stats.endtime] for x in st2]) df = pd.DataFrame(times, columns=['start', 'stop']) df['duration'] = df['stop'] - df['start'] max_dur = df[df.duration == df['duration'].max()].iloc[0] st.trim(starttime=max_dur.start, endtime=max_dur.stop) else: st = st1 return st
def get_waveforms_bulk( self, bulk: bulk_waveform_arg_type, index: Optional[pd.DataFrame] = None, **kwargs, ) -> Stream: """ Get a large number of waveforms with a bulk request. Parameters ---------- bulk A list of any number of lists containing the following: (network, station, location, channel, starttime, endtime). index A dataframe returned by read_index. Enables calling code to only read the index from disk once for repetitive calls. """ df = get_waveform_bulk_df(bulk) if not len(df): return obspy.Stream() # get index and filter to temporal extents of request. t_min, t_max = df["starttime"].min(), df["endtime"].max() if index is not None: ind = index[~((index.starttime > t_max) | (index.endtime < t_min))] else: ind = self.read_index(starttime=t_min, endtime=t_max) # for each unique time, apply other filtering conditions and get traces unique_times = np.unique(df[["starttime", "endtime"]].values, axis=0) traces = [] for utime in unique_times: sub = _filter_index_to_bulk(utime, ind, df) traces += self._index2stream(sub, utime[0], utime[1], merge=False).traces return merge_traces(obspy.Stream(traces=traces), inplace=True)
def get_waveforms_bulk(stream: Stream, bulk: bulk_waveform_arg_type, **kwargs) -> Stream: """ Get a large number of waveforms with a bulk request. Parameters ---------- stream A stream object. bulk A list of any number of tuples containing the following: (network, station, location, channel, starttime, endtime). """ # get a dataframe of stream contents index = _get_waveform_df(stream) # get a dataframe of the bulk arguments, convert time to datetime64 request_df = get_waveform_bulk_df(bulk) if not len(request_df): # return empty string if no bulk reqs provided return obspy.Stream() # get unique times and check conditions for string columns unique_times = np.unique(request_df[["starttime", "endtime"]].values, axis=0) traces = [] for (t1, t2) in unique_times: sub = _filter_index_to_bulk((t1, t2), index_df=index, bulk_df=request_df) new = obspy.Stream(traces=[x.data for x in sub["trace"]]).slice( starttime=to_utc(t1), endtime=to_utc(t2)) traces.extend(new.traces) return merge_traces(obspy.Stream(traces=traces))
def _slide_and_correlate_traces(day, next_day, length, overlap, discard, max_lag, outkey, task): """Helper function for parallel correlating""" tr1, tr2, dist, azi, baz = task xstream = obspy.Stream() for t1 in IterTime(day, next_day - length + overlap, dt=length - overlap): sub = obspy.Stream([tr1, tr2]).slice(t1, t1 + length) if len(sub) < 2: continue st = [tr.stats.starttime for tr in sub] et = [tr.stats.endtime for tr in sub] if max(st) > min(et): # this should not happen continue sub.trim(max(st), min(et)) if discard: avail = min( (tr.data.count() if hasattr(tr.data, 'count') else len(tr)) / tr.stats.sampling_rate / length for tr in sub) if avail < discard: msg = ('discard trace combination %s-%s for time %s ' '(availability %.1f%% < %.1f%% desired)') log.debug(msg, sub[0].id, sub[1].id, str(max(st))[:19], 100 * avail, 100 * discard) continue for tr in sub: _fill_array(tr.data, fill_value=0.) tr.data = np.ma.getdata(tr.data) xtr = correlate_traces(sub[0], sub[1], max_lag) xtr.stats.starttime = t1 xtr.stats.key = outkey xtr.stats.dist = dist xtr.stats.azi = azi xtr.stats.baz = baz xstream += xtr return xstream
def fix_cut(self): """ Checks to see if any trace is cut into multiple smaller traces based off their unique IDs If there are multiple traces with the same ID, they will be added together Any missing data between cut traces will be filled with the linear interpolation between the start and end points of each trace """ toappend = [] newwf = self.copy() i = 0 for it1, trace1 in enumerate(self): item = [] idoi = trace1.id for it2, trace2 in enumerate(newwf): if trace1.id == trace2.id: item.append(it2) for tr in newwf.select(id=idoi): tr.id = 'NA.NA..NA' toappend.append(item) newwf = op.Stream() for item in toappend: newtrace = op.Trace() if len(item) > 1: newtrace = self[item[0]].__add__(self[item[1]],fill_value='interpolate') if len(item) > 2: for i in range(2,len(item)): newtrace += self[item[i]] newwf += newtrace elif len(item) > 0: newwf += self[item[0]] wf = newwf.copy() newwf = op.Stream() for t in wf: if t.stats.npts != (self.seconds*self.sr + 1): newst = t.stats.__deepcopy__() newst.npts = self.seconds*self.sr + 1 newst.starttime = self.starttime st1 = t.stats.__deepcopy__() st1.starttime = self.starttime st1.npts = 1 st2 = st1.__deepcopy__() st2.starttime = self.endtime t1 = op.Trace(data=np.ones(1,dtype=t.data.dtype),header=st1) t2 = op.Trace(data=np.ones(1,dtype=t.data.dtype),header=st2) newt = (t.__add__(t1,fill_value=0)).__add__(t2,fill_value=0) newwf += newt else: newwf += t self.clear() self += newwf
def _plotPreparation_eventMode(self): baseurl = self.baseurl+self.event_selected self.r = obspy.Stream() self.t = obspy.Stream() self.z = obspy.Stream() for station in self.stationid: self.r += obspy.read(baseurl+"/*"+station+"*R") self.t += obspy.read(baseurl+"/*"+station+"*T") self.z += obspy.read(baseurl+"/*"+station+"*Z") assert len(self.r) == len(self.t) assert len(self.r) == len(self.z) position = list(self.eventid).index(self.event_selected) self.time = np.zeros(len(self.r), dtype=np.object) self.time[:] = self.UTCDateTime[position] self.st = np.nan if(self.direction == "Vertical"): self.st = self.z.copy() elif(self.direction == "Radial"): self.st = self.r.copy() elif(self.direction == "Tangential"): self.st = self.t.copy() self.st = self.st.normalize(global_max=self.global_normal) self.st.filter("bandpass", freqmin=1. / self.filter_band[1], freqmax=1./self.filter_band[0]) self.distance_kms = np.zeros(len(self.r), dtype=np.float) self.distance_degrees = np.zeros(len(self.r), dtype=np.float) self.azimuths = np.zeros(len(self.r), dtype=np.float) self.antiAzimuths = np.zeros(len(self.r), dtype=np.float) self.parrivals = np.zeros(len(self.r), dtype=np.float) self.sarrivals = np.zeros(len(self.r), dtype=np.float) self.appro_line_distances = np.zeros(len(self.r), dtype=np.float) self.depths = np.zeros(len(self.r), dtype=np.float) for i in range(len(self.r)): self.distance_kms[i], self.distance_degrees[i], self.azimuths[i], self.antiAzimuths[ i], self.parrivals[i], self.sarrivals[i], self.appro_line_distances[i], self.depths[i] = self._calarrivals(self.st[i]) self.shift_values = np.zeros(len(self.r), dtype=np.float) if(self.y_axis_type == "Epicenter Distance"): self.shift_values = self.distance_degrees elif(self.y_axis_type == "Euclidean Distance"): self.shift_values = self.appro_line_distances elif(self.y_axis_type == "Depth"): self.shift_values = self.depths elif(self.y_axis_type == "Azimuth"): self.shift_values = self.azimuths if(np.max(self.shift_values) < self.y_range[1]): self.scale = (np.max(self.shift_values) - np.min(self.shift_values))/15*self.scale else: self.scale = (self.y_range[1]-self.y_range[0])/15*self.scale
def get_waveforms_bulk(st: Stream, bulk: List[str], **kwargs) -> Stream: """ Get a large number of waveforms with a bulk request. Parameters ---------- bulk A list of any number of lists containing the following: (network, station, location, channel, starttime, endtime). index A dataframe returned by read_index. Enables calling code to only read the index from disk once for repetitive calls. """ if not bulk: # return emtpy waveforms if empty list or None return obspy.Stream() def _func(time, ind, df, st): """ return waveforms from df of bulk parameters """ match_chars = {"*", "?", "[", "]"} ar = np.ones(len(ind)) # indices of ind to use to load data t1, t2 = time[0], time[1] df = df[(df.t1 == time[0]) & (df.t2 == time[1])] # determine which columns use any matching or other select features uses_matches = [_column_contains(df[x], match_chars) for x in NSLC] match_ar = np.array(uses_matches).any(axis=0) df_match = df[match_ar] df_no_match = df[~match_ar] # handle columns that need matches (more expensive) if not df_match.empty: match_bulk = df_match.to_records(index=False) mar = np.array( [filter_index(ind, *tuple(b)[:4]) for b in match_bulk]) ar = np.logical_and(ar, mar.any(axis=0)) # handle columns that do not need matches if not df_no_match.empty: nslc1 = set(get_nslc_series(df_no_match)) nslc2 = get_nslc_series(ind) ar = np.logical_and(ar, nslc2.isin(nslc1)) # get a list of used traces, combine and trim st = obspy.Stream([x for x, y in zip(st, ar) if y]) return st.slice(starttime=UTC(t1), endtime=UTC(t2)) # get a dataframe of stream contents index = _stream_data_to_df(st) # get a dataframe of the bulk arguments, convert time to float df = pd.DataFrame(bulk, columns=list(NSLC) + ["utc1", "utc2"]) df["t1"] = df["utc1"].apply(float) df["t2"] = df["utc2"].apply(float) t1, t2 = df["t1"].min(), df["t2"].max() # filter index and streams to be as short as possible needed = ~((index.starttime > t2) | (index.endtime < t1)) ind = index[needed] st = obspy.Stream([tr for tr, bo in zip(st, needed.values) if bo]) # groupby.apply calls two times for each time set, avoid this. unique_times = np.unique(df[["t1", "t2"]].values, axis=0) streams = [_func(time, df=df, ind=ind, st=st) for time in unique_times] return reduce(add, streams)
def multiTemplateMatch(stTempLow, stLow, threshLow, stTempHigh, stHigh, threshHigh, numComp, tolerance, distance): # make a couple useful list detectionsTemp = [] detections = [] # iterate through each channel for s in range(len(stTempLow)): # call the template matching function in each band #detectionsLow,sl = correlation_detector(obspy.Stream(stLow[s]),obspy.Stream(stTempLow[s]),threshLow,tolerance) #detectionsHigh,sh = correlation_detector(obspy.Stream(stHigh[s]),obspy.Stream(stTempHigh[s]),threshHigh,tolerance) detectionsLow, sl = correlation_detector(obspy.Stream(stLow[s]), obspy.Stream(stTempLow[s]), threshLow, distance) detectionsHigh, sh = correlation_detector(obspy.Stream(stHigh[s]), obspy.Stream(stTempHigh[s]), threshHigh, distance) #print(len(detectionsLow)) #print(len(detectionsHigh)) # get all high frequency trigger times for today detHighTimes = [] for i in range(len(detectionsHigh)): detHighTimes.append(detectionsHigh[i].get("time")) # loop through all low frequency triggers for today for i in range(len(detectionsLow)): detLowTime = detectionsLow[i].get("time") # calculate time difference between low freq trigger and all high freq triggers diffs = np.subtract(detLowTime, detHighTimes) # only interested in positive values of 'diffs', which indicates high freq trigger first diffs[diffs < -1 * tolerance] = float("nan") # save low freq trigger if a high freq trigger is sufficiently close if len(diffs) > 0: if min(diffs) < tolerance: detectionsTemp.append(detLowTime) # sort detections chronologically detectionsTemp.sort() #print(detectionsTemp) # save detections if they show up on desired number of components if len(detectionsTemp) > 0: for d in range(len(detectionsTemp) - numComp - 1): #print(detectionsTemp[d+numComp-1] - detectionsTemp[d]) if detectionsTemp[d + numComp - 1] - detectionsTemp[d] < tolerance: detections.append(detectionsTemp[d]) return detections
def stream_bulk_split(st: Stream, bulk: List[waveform_request_type], fill_value: Any = None) -> List[Stream]: """ Split a stream into a list of streams that meet requirements in bulk. This is similar to the get_waveforms_bulk methods of waveform_client, but rather than merging any overlapping data it is returned in a list of traces. Parameters ---------- st A stream object bulk A bulk request. Wildcards not currently supported on str params. fill_value If not None fill any missing data in time range with this value. Returns ------- List of traces, each meeting the corresponding request in bulk. """ # return nothing if empty bulk or stream args bulk = _get_bulk(bulk) if not bulk or len(st) == 0: return [] # # get dataframe of stream contents sdf = _stream_data_to_df(st) # iterate stream, return output out = [] for barg in bulk: assert len( barg) == 6, f"{barg} is not a valid bulk arg, must have len 6" need = filter_index(sdf, *barg) traces = [tr for tr, bo in zip(st, need) if bo] new_st = obspy.Stream(traces) t1, t2 = to_utc(barg[-2]), to_utc(barg[-1]) new = new_st.slice(starttime=t1, endtime=t2) # apply fill if needed if fill_value is not None: new = new.trim(starttime=t1, endtime=t2, fill_value=fill_value, pad=True) if new is None or not len(new): out.append(obspy.Stream()) continue new = merge_traces(new) out.append(new) assert len(out) == len(bulk), "output is not the same len as stream list" return out
def _removeUnusedTraces(self, verbose=True): ''' Remove traces which are neither in template or data ''' # Check something assert (hasattr(self, 'stations')), 'You need to compute stations info first' # Create empty obspy stream to fill with template st1i = obspy.Stream() # Select only used traces bo1 = np.isin(self.stations['nscT'], self.stations['CommonTr']) ix1 = np.where(bo1)[0] [st1i.append(self.template[i]) for i in ix1] if verbose: removed = self.stations['nscT'][~bo1] if removed.size == 0: print('No traces removed in template') else: t = [ print('Trace {} removed from template'.format(r)) for r in removed ] # Create empty obspy stream to fill with data st2i = obspy.Stream() # Select only used traces bo2 = np.isin(self.stations['nscD'], self.stations['CommonTr']) ix2 = np.where(bo2)[0] [st2i.append(self.data[i]) for i in ix2] if verbose: removed = self.stations['nscD'][~bo2] if removed.size == 0: print('No traces removed in data') else: t = [ print('Trace {} removed from data'.format(r)) for r in removed ] # Put them in current streams self.template = st1i self.data = st2i # All done return
def slice_to_plot_traces(value_1, value_2, phase_name): """ for all the components, we slice for the given phase_name """ phase_name_used = name_mapper_time[phase_name] thetime = value_1.info[phase_name_used] if(thetime == None): return None, None eventtime = value_1.info["eventtime"] # "obs_z", "syn_z", "obs_r", "syn_r", "obs_t", "syn_t" sliced_value_1 = to_plot_trace(None, None, None, None, None, None, None) sliced_value_2 = to_plot_trace(None, None, None, None, None, None, None) sliced_value_1.info = value_1.info sliced_value_2.info = value_2.info # ! fix bugs here, note we have to copy the sliced data here as they will be used later. (but still in the same memory space) sliced_value_1.obs_z = value_1.obs_z.slice( eventtime+thetime-20, eventtime+thetime+50).copy() sliced_value_2.obs_z = value_2.obs_z.slice( eventtime+thetime-20, eventtime+thetime+50).copy() sliced_value_1.syn_z = value_1.syn_z.slice( eventtime+thetime-20, eventtime+thetime+50).copy() sliced_value_2.syn_z = value_2.syn_z.slice( eventtime + thetime - 20, eventtime + thetime + 50).copy() normalize_st = obspy.Stream()+sliced_value_1.obs_z + \ sliced_value_2.obs_z + sliced_value_1.syn_z + sliced_value_2.syn_z normalize_st.normalize(global_max=True) sliced_value_1.obs_r = value_1.obs_r.slice( eventtime+thetime-20, eventtime+thetime+50).copy() sliced_value_1.syn_r = value_1.syn_r.slice( eventtime+thetime-20, eventtime+thetime+50).copy() sliced_value_2.obs_r = value_2.obs_r.slice( eventtime+thetime-20, eventtime+thetime+50).copy() sliced_value_2.syn_r = value_2.syn_r.slice( eventtime + thetime - 20, eventtime + thetime + 50).copy() normalize_st = obspy.Stream()+sliced_value_1.obs_r + \ sliced_value_2.obs_r + sliced_value_1.syn_r + sliced_value_2.syn_r normalize_st.normalize(global_max=True) sliced_value_1.obs_t = value_1.obs_t.slice( eventtime+thetime-20, eventtime+thetime+50).copy() sliced_value_1.syn_t = value_1.syn_t.slice( eventtime+thetime-20, eventtime+thetime+50).copy() sliced_value_2.obs_t = value_2.obs_t.slice( eventtime+thetime-20, eventtime+thetime+50).copy() sliced_value_2.syn_t = value_2.syn_t.slice( eventtime + thetime - 20, eventtime + thetime + 50).copy() normalize_st = obspy.Stream()+sliced_value_1.obs_t + \ sliced_value_2.obs_t + sliced_value_1.syn_t + sliced_value_2.syn_t normalize_st.normalize(global_max=True) return sliced_value_1, sliced_value_2
def test_continuous_segments_combined(self): """ Test continuous segments from traces in two files that are continuous. Also test a continuous segment that is continuous but has a different sampling rate """ tr_1 = obspy.Trace(data=np.arange(10, dtype=np.int32), header={"starttime": obspy.UTCDateTime(0)}) tr_2 = obspy.Trace(data=np.arange(10, dtype=np.int32), header={"starttime": obspy.UTCDateTime(10)}) tr_3 = obspy.Trace(data=np.arange(10, dtype=np.int32), header={ "starttime": obspy.UTCDateTime(20), "sampling_rate": 0.5 }) st = obspy.Stream(traces=[tr_1, tr_3]) st2 = obspy.Stream(traces=[tr_2]) with NamedTemporaryFile() as tf1, NamedTemporaryFile() as tf2: st.write(tf1.name, format="mseed") st2.write(tf2.name, format="mseed") md = MSEEDMetadata(files=[tf1.name, tf2.name]) c_seg = md.meta["c_segments"] self.assertEqual(len(c_seg), 2) c = c_seg[0] self.assertEqual(c["start_time"], obspy.UTCDateTime(0)) self.assertEqual(c["end_time"], obspy.UTCDateTime(20)) self.assertEqual(c["segment_length"], 20) self.assertEqual(c["sample_min"], 0) self.assertEqual(c["sample_max"], 9) self.assertEqual(c["num_samples"], 20) self.assertEqual(c["sample_median"], 4.5) self.assertEqual(c["sample_lower_quartile"], 2.0) self.assertEqual(c["sample_upper_quartile"], 7.0) self.assertEqual(c["sample_rate"], 1.0) # Not continuous because of different sampling_rate (0.5) c = c_seg[1] self.assertEqual(c["start_time"], obspy.UTCDateTime(20)) self.assertEqual(c["end_time"], obspy.UTCDateTime(40)) self.assertEqual(c["segment_length"], 20) self.assertEqual(c["sample_min"], 0) self.assertEqual(c["sample_max"], 9) self.assertEqual(c["num_samples"], 10) self.assertEqual(c["sample_median"], 4.5) self.assertEqual(c["sample_lower_quartile"], 2.25) self.assertEqual(c["sample_upper_quartile"], 6.75) self.assertEqual(c["sample_rate"], 0.5)
def perwhiten(dt, wlen, cuttime1, cuttime2, reftime, f1,f2,f3,f4): global fft_all, st nft = int(next_pow_2((cuttime2 - cuttime1)/dt)) nwlen = int(wlen/dt) fft_all = obspy.Stream() for tr in st: #------- cut waveform ------ cutbtime = reftime+cuttime1 cutetime = reftime+cuttime2 if tr.stats.starttime > cutbtime or tr.stats.endtime < cutetime: continue tr.trim(cutbtime, cutetime) #----------normalize---------- if wlen == 0: tr.data /= np.abs(tr.data) elif wlen > 0: tr.data /= smooth(np.abs(tr.data),half_len=nwlen) else: raise ValueError("Half window length must be greater than zero") #tr.write(join(folder,"%s.%s.%s.BHZ.norm" % (tr.stats.network, tr.stats.station, tr.stats.location)), "SAC") #----------- Whiten ----------- (tr.data,tr.stats.delta) = whiten(tr.data, nft, dt, f1, f2, f3, f4) #-------write spec to array -------- fft_all.append(tr) ''' tr1 = tr.copy() tr1.data = tr.data.real tr1.write(join(folder,"ft.%s.%s.%s.BHZ.norm.rl" % (tr.stats.network, tr.stats.station, tr.stats.location)), "SAC") tr2 = tr.copy() tr2.data = tr.data.imag tr2.write(join(folder,"ft.%s.%s.%s.BHZ.norm.im" % (tr.stats.network, tr.stats.station, tr.stats.location)), "SAC") ''' return len(fft_all)
def select_station_components(st, zchan): """ Select the 3-component (Z, E, N) traces with the same channel prefix. Parameters ---------- st: obspy stream Original data. zchan: string Z component name. Returns ------- st_select: obspy stream Three-component data. """ comps = ["Z", "N", "E"] st_select = obspy.Stream() for comp in comps: chan_id = zchan[:-1] + comp _st = st.select(id=chan_id) if len(_st) == 0: continue st_select.append(_st[0]) return st_select
def get_waveforms( self, network=None, station=None, location=None, channel=None, starttime=None, endtime=None, ) -> obspy.Stream: """ Get waveforms from the cache, read from disk and cache if needed. See obplus.WaveBank.get_waveforms for param descriptions. """ filt = filter_index(self.index, network, station, location, channel, starttime, endtime) ser = self.index[filt].set_index("unique_key")["st_call"] # drop duplicates ser = ser[~ser.index.duplicated()] # no waveforms found, return empty waveforms if not len(ser): return obspy.Stream() st = reduce(add, (x() for x in ser)) if starttime is not None or endtime is not None: # use start/endtime or set far out constants starttime = starttime or 0 endtime = endtime or 32503680000 return st.trim(starttime=starttime, endtime=endtime) else: return st
def _assert_reftek130_test_stream(self, st_reftek): """ Test reftek 130 data read into a stream object against miniseed files converted using "rt_mseed" utility from Trimble/Reftek. Note that rt_mseed fills in network as "XX", location as "01" and channels as "001", "002", "003". """ st_mseed = obspy.Stream() for file_ in self.mseed_files: st_mseed += obspy.read(file_, "MSEED") # reftek reader correctly fills in band+instrument code but rt_mseed # does not apparently, so set it now for the comparison for tr in st_mseed: tr.stats.channel = "EH" + tr.stats.channel[-1] tr.stats.pop("_format") tr.stats.pop("mseed") # check reftek130 low-level headers separately: for tr in st_reftek: self.assertTrue("reftek130" in tr.stats) # XXX TODO check reftek specific headers tr.stats.pop("reftek130") tr.stats.pop("_format", None) # sort streams st_reftek = st_reftek.sort() st_mseed = st_mseed.sort() # check amount of traces self.assertEqual(len(st_reftek), len(st_mseed)) # check equality of headers for tr_got, tr_expected in zip(st_reftek, st_mseed): self.assertEqual(tr_got.stats, tr_expected.stats) # check equality of data for tr_got, tr_expected in zip(st_reftek, st_mseed): np.testing.assert_array_equal(tr_got.data, tr_expected.data)
def __init__(self, pathname, datestr, suffix, switchEN=False, reverseE=False, reverseN=False): self.datestr = datestr self.st = obspy.read(join(pathname, '*' + datestr + '*' + suffix)) if len(self.st) != 3: raise ValueError( 'Sismogram must be in 3 components, but there are {} of {}'. format(len(self.st), datestr)) # if not (self.st[0].stats.npts == self.st[1].stats.npts == self.st[2].stats.npts): # raise ValueError('Samples are different in 3 components') if reverseE: self.st.select(channel='*[E2]')[0].data *= -1 if reverseN: self.st.select(channel='*[N1]')[0].data *= -1 if switchEN: chE = self.st.select(channel='*[E2]')[0].stats.channel chN = self.st.select(channel='*[N1]')[0].stats.channel self.st.select(channel='*[E2]')[0].stats.channel = chN self.st.select(channel='*[N1]')[0].stats.channel = chE self.st.sort() self.rf = obspy.Stream() self.timeoffset = 0 self.rms = np.array([0]) self.it = 0 self.PArrival = None self.PRaypara = None self.SArrival = None self.SRaypara = None
def gapped_high_sample_stream(self): """ Create a stream which has two overlapping traces with high sampling rates. """ # first trace stats1 = { "sampling_rate": 6000.0, "starttime": UTCDateTime(2017, 9, 23, 18, 50, 29, 715100), "endtime": UTCDateTime(2017, 9, 23, 18, 50, 31, 818933), "network": "XI", "station": "00037", "location": "00", "channel": "FL1", } data1 = np.random.rand(12624) tr1 = obspy.Trace(data=data1, header=stats1) # second trace stat2 = { "sampling_rate": 6000.0, "delta": 0.00016666666666666666, "starttime": UTCDateTime(2017, 9, 23, 18, 50, 31, 819100), "endtime": UTCDateTime(2017, 9, 23, 18, 50, 31, 973933), "npts": 930, "calib": 1.0, "network": "XI", "station": "00037", "location": "00", "channel": "FL1", } data2 = np.random.rand(930) tr2 = obspy.Trace(data=data2, header=stat2) return obspy.Stream(traces=[tr1, tr2])
def gappy_stream(self): """Create a very simple mseed with one gap, return it.""" stats = dict( network="UU", station="ELU", location="01", channel="ELZ", sampling_rate=1, starttime=self.start, ) len1 = int(self.gap_start - self.start) # create first trace ar1 = np.random.rand(len1) tr1 = obspy.Trace(data=ar1, header=stats) assert tr1.stats.endtime <= self.gap_start # create second trace len2 = int(self.end - self.gap_end) ar2 = np.random.rand(len2) stats2 = dict(stats) stats2.update({"starttime": self.gap_end}) tr2 = obspy.Trace(data=ar2, header=stats2) # assemble traces make sure gap is there assert tr2.stats.starttime >= self.gap_end st = obspy.Stream(traces=[tr1, tr2]) gaps = st.get_gaps() assert len(gaps) == 1 return st
def filter_st(st): # we should assure only to add one type of data, as the order of HH,BH,SH (we don't consider the case like # only 2 HH but 3 BH.) newst = obspy.Stream() # get band code status band_code = None band_code_list = [] for trace in st: theid = trace.id _, _, loc, cha = theid.split(".") band_code_list.append(cha[:2]) if (len(band_code_list) == 0): return newst else: if ("HH" in band_code_list): band_code = "HH" elif ("BH" in band_code_list): band_code = "BH" elif ("SH" in band_code_list): band_code = "SH" else: return newst for trace in st: theid = trace.id _, _, loc, cha = theid.split(".") con1 = ((loc == "") or (loc == "00")) con2 = (cha[:2] == band_code) if (con1 and con2): newst += trace return newst
def test_empty_stream(self): """ test some empty streams as inputs """ stream_dict = {0: obspy.read(), 1: obspy.read(), 2: obspy.Stream()} dar = obspy_to_array(stream_dict) assert isinstance(dar, xr.DataArray) # empty waveforms should not have been added to data array assert 2 not in dar.stream_id.values
def _stretch_wrapper(groupnames, fname, outkey, filter=None, **kwargs): """ Wrapper around `~yam.stretch.stretch()` :param groupname: group to load the correlations from :param fname: file to load correlations from :param fname_stretch: file for writing results :param outkey: key to write stretch results to :param filter: filter correlations before stretching (bandpass, tuple with min and max frequency) :param \*\*kwargs: all other kwargs are passed to `~yam.stretch.stretch()` function """ with h5py.File(fname, 'r') as f: traces = [obspyh5.dataset2trace(f[g]) for g in groupnames] stream = obspy.Stream(traces) for tr in stream: tr.data = np.require(tr.data, float) if filter: _filter(stream, filter) stretchres = yam.stretch.stretch(stream, **kwargs) if stretchres is not None: stretchres['attrs']['key'] = outkey return stretchres
def __init__(self, cfg_file, staname, suffix, daterange1, daterange2): self.model = TauPyModel(model="iasp91") self.daterange1 = daterange1 self.daterange2 = daterange2 config.read(cfg_file) self.data_path = os.path.join(config.get('path', 'data_path'), staname) self.out_path = os.path.join(config.get('path', 'out_path'), staname) self.RF_path = os.path.join(config.get('path', 'RF_path'), staname) self.evt_list = config.get('path', 'evt_list') self.image_path = config.get('path', 'image_path') self.gate_mw = config.getfloat('para', 'gate_mw') self.gate_dis1 = config.getfloat('para', 'gate_dis1') self.gate_dis2 = config.getfloat('para', 'gate_dis2') self.time_before = config.getfloat('para', 'time_before') self.time_after = config.getfloat('para', 'time_after') self.tolerance = config.getfloat('para', 'tolerance') self.offset = config.getfloat('para', 'offset') self.gate_noise = config.getfloat('para', 'gate_noise') self.gauss = config.getfloat('para', 'gauss') self.freqmin = config.getfloat('para', 'freqmin') self.freqmax = config.getfloat('para', 'freqmax') self.sampling = config.getfloat('para', 'sampling') self.newlength = ((self.time_before + self.time_after) / self.sampling) + 1 self.zfiles = glob.glob(os.path.join(self.data_path, suffix)) self.st = obspy.Stream() for zfile_name in self.zfiles: self.st.append(obspy.read(zfile_name)[0]) print(len(self.st)) self.stalat = self.st[0].stats.sac.stla self.stalon = self.st[0].stats.sac.stlo self.eq_lst = searcheq(self.stalat, self.stalon, self.daterange1, self.daterange2, self.gate_dis1, self.gate_dis2, self.gate_mw, self.evt_list) self.eq = assignFiles(self.zfiles, self.st, self.eq_lst, self.tolerance)
def readdata(t1, t2, dbname='cascadia'): ''' Get waveforms beteen two dates (obspy.UTCDateTime) t1 and t2. ''' # Get correponding waveforms session = opendatabase(dbname) q = session.query(Waveform) res = [] t = 0 c = 0 ''' while len(res)==0: res = q.filter((Waveform.starttime>=t1.timestamp-t) & (Waveform.endtime<=t2.timestamp)).all() t += 1*3600. c += 1 ''' t = 7200. res = q.filter((Waveform.starttime >= t1.timestamp - t) & (Waveform.endtime <= t2.timestamp + t)).all() S = obspy.Stream() for r in res: S.append(r.waveform()[0]) # Put everything in a Stream # All done return S
def select_station_components(st, zchan): """ Select the 3-component traces from the same channel. Parameters ---------- st: Obspy stream Waveform data. zchan: string Z component name at a certain station. Returns ------- st_select: Obspy stream. Three-component waveform data. """ comps = ["Z", "N", "E"] st_select = obspy.Stream() for comp in comps: chan_id = zchan[:-1] + comp _st = st.select(id=chan_id) if len(_st) == 0: continue st_select.append(_st[0]) return st_select
def create_stream(self, starttime, endtime, seed_ids=None, sampling_rate=None): """ create a waveforms from random data """ t1 = obspy.UTCDateTime(starttime) t2 = obspy.UTCDateTime(endtime) sr = sampling_rate or self.sampling_rate ar_len = int((t2.timestamp - t1.timestamp) * sr) st = obspy.Stream() for seed in seed_ids or self.seed_ids: n, s, l, c = seed.split(".") meta = { "sampling_rate": sr, "starttime": t1, "network": n, "station": s, "location": l, "channel": c, } data = np.random.randn(ar_len) tr = obspy.Trace(data=data, header=meta) st.append(tr) return st
def __init__(self, pathname, datestr, suffix='SAC'): """Class for processing event data with 3 components, which read SAC files of ``pathname*datastr*suffix`` :param pathname: Directory to SAC files :type pathname: string :param datestr: date part in filename, e.g., ``2021.122.12.23.40`` :type datestr: string :param suffix: suffix for SAC files, defaults to 'SAC' :type suffix: str, optional """ self.datestr = datestr self.filestr = join(pathname, '*' + datestr + '*' + suffix) self.st = obspy.read(self.filestr) if len(self.st) < 3: channel = ' '.join([tr.stats.channel for tr in self.st]) raise ValueError( 'Sismogram must be in 3 components, but there are only channel {} of {}' .format(channel, datestr)) elif len(self.st) > 3: raise ValueError( '{} has more than 3 components, please select to delete redundant seismic components' .format(datestr)) else: pass self.st.sort() self.rf = obspy.Stream() self.timeoffset = 0 self.rms = np.array([0]) self.it = 0 self.trigger_shift = 0 self.inc_correction = 0 self.set_comp()
def select_by_sampling_rate(st, threshold=15): """ Select data with sampling rate above threshold. Parameters ---------- st: obspy stream seismic data threshold: int sampling rate threshold Returns ------- st_new: obspy stream new seismic stream data """ st_new = obspy.Stream() for tr in st: if tr.stats.sampling_rate < threshold: continue st_new.append(tr) print("Number of traces change(>%d Hz): %d --> %d" % (threshold, len(st), len(st_new))) return st_new
def _func(time, ind, df, st): """ return waveforms from df of bulk parameters """ match_chars = {"*", "?", "[", "]"} ar = np.ones(len(ind)) # indices of ind to use to load data t1, t2 = time[0], time[1] df = df[(df.t1 == time[0]) & (df.t2 == time[1])] # determine which columns use any matching or other select features uses_matches = [_column_contains(df[x], match_chars) for x in NSLC] match_ar = np.array(uses_matches).any(axis=0) df_match = df[match_ar] df_no_match = df[~match_ar] # handle columns that need matches (more expensive) if not df_match.empty: match_bulk = df_match.to_records(index=False) mar = np.array( [filter_index(ind, *tuple(b)[:4]) for b in match_bulk]) ar = np.logical_and(ar, mar.any(axis=0)) # handle columns that do not need matches if not df_no_match.empty: nslc1 = set(get_nslc_series(df_no_match)) nslc2 = get_nslc_series(ind) ar = np.logical_and(ar, nslc2.isin(nslc1)) # get a list of used traces, combine and trim st = obspy.Stream([x for x, y in zip(st, ar) if y]) return st.slice(starttime=UTC(t1), endtime=UTC(t2))