def stack(stream, length=None, move=None): """ Stack traces in stream by correlation id :param stream: |Stream| object with correlations :param length: time span of one trace in the stack in seconds (alternatively a string consisting of a number and a unit -- ``'d'`` for days and ``'h'`` for hours -- can be specified, i.e. ``'3d'`` stacks together all traces inside a three days time window, default: None, which stacks together all traces) :param move: define a moving stack, float or string, default: None -- no moving stack, if specified move usually is smaller than length to get an overlap in the stacked traces :return: |Stream| object with stacked correlations """ stream.sort() stream_stack = obspy.Stream() ids = {_corr_id(tr) for tr in stream} ids.discard(None) for id_ in ids: traces = [tr for tr in stream if _corr_id(tr) == id_] if length is None: data = np.mean([tr.data for tr in traces], dtype=float, axis=0) tr_stack = obspy.Trace(data, header=traces[0].stats) tr_stack.stats.key = tr_stack.stats.key + '_s' if 'num' in traces[0].stats: tr_stack.stats.num = sum(tr.stats.num for tr in traces) else: tr_stack.stats.num = len(traces) stream_stack.append(tr_stack) else: t1 = traces[0].stats.starttime lensec = _time2sec(length) movesec = _time2sec(move) if move else lensec if (lensec % (24 * 3600) == 0 or isinstance(length, str) and 'd' in length): t1 = UTC(t1.year, t1.month, t1.day) elif (lensec % 3600 == 0 or isinstance(length, str) and 'm' in length): t1 = UTC(t1.year, t1.month, t1.day, t1.hour) t2 = max(t1, traces[-1].stats.endtime - lensec) for t in IterTime(t1, t2, dt=movesec): sel = [tr for tr in traces if -0.1 <= tr.stats.starttime - t <= lensec + 0.1] if len(sel) == 0: continue data = np.mean([tr.data for tr in sel], dtype=float, axis=0) tr_stack = obspy.Trace(data, header=sel[0].stats) key_add = '_s%s' % length + (move is not None) * ('m%s' % move) tr_stack.stats.key = tr_stack.stats.key + key_add tr_stack.stats.starttime = t if 'num' in traces[0].stats: tr_stack.stats.num = sum(tr.stats.num for tr in sel) else: tr_stack.stats.num = len(sel) stream_stack.append(tr_stack) return stream_stack
def date2time(sta_sdate, sta_edate): logger = logging.getLogger(__name__) smonth = f'0{sta_sdate.month}' if sta_sdate.month < 10 else f'{sta_sdate.month}' emonth = f'0{sta_edate.month}' if sta_edate.month < 10 else f'{sta_edate.month}' sday = f'0{sta_sdate.day}' if sta_sdate.day < 10 else f'{sta_sdate.day}' eday = f'0{sta_edate.day}' if sta_edate.day < 10 else f'{sta_edate.day}' stime = f'{sta_sdate.year}-{smonth}-{sday}' etime = f'{sta_edate.year}-{emonth}-{eday}' return UTC(stime), UTC(etime)
def _filter_starttime_endtime(df, starttime=None, endtime=None): """ Filter dataframe on starttime and endtime. """ bool_index = np.ones(len(df), dtype=bool) t1 = UTC(starttime).timestamp if starttime is not None else -1 * np.inf t2 = UTC(endtime).timestamp if endtime is not None else np.inf # get time columns start_col = getattr(df, "starttime", getattr(df, "start_date", None)) end_col = getattr(df, "endtime", getattr(df, "end_date", None)) in_time = ~((end_col < t1) | (start_col > t2)) return np.logical_and(bool_index, in_time.values)
def _trim_time_period(stream, time_period): """Restrict traces of stream to given time period""" if time_period is None: return starttime, endtime = time_period traces = [ tr for tr in stream if (starttime is None or tr.stats.starttime >= UTC(starttime)) and ( endtime is None or tr.stats.starttime < UTC(endtime)) ] stream.traces = traces
def custom_get_waveforms(network, station, location, channel, starttime, endtime, quality=None, minimumlength=None, longestonly=None, filename=None, attach_response=False, **kwargs): with pyasdf.ASDFDataSet('/g/data/ha3/Passive/_ANU/7X(2009-2011)/ASDF/7X(2009-2011).h5', mode='r') as asdfDataSet: st = Stream() # ignoring channel for now as all the 7D network waveforms have only BH? channels filteredList = [i for i in asdfDataSet.waveforms[network + '.' + station].list() if 'raw_recording' in i and UTC(i.split("__")[1]) < starttime and UTC(i.split("__")[2]) > endtime] for t in filteredList: st += asdfDataSet.waveforms[network + '.' + station][t] return st
def test_downsample_and_shift(self): tr = read()[0] t = tr.stats.starttime = UTC('2018-01-01T00:00:10.000000Z') # decimate tr2 = _downsample_and_shift(tr.copy(), 50.) self.assertEqual(tr2.stats.sampling_rate, 50) # interpolate tr2 = _downsample_and_shift(tr.copy(), 40.) self.assertEqual(tr2.stats.sampling_rate, 40) # decimate and time shift tr2 = tr.copy() tr2.stats.starttime += 0.002 tr2 = _downsample_and_shift(tr2, 50.) self.assertEqual(tr2.stats.sampling_rate, 50) self.assertEqual(tr2.stats.starttime, t) tr2 = tr.copy() tr2.stats.starttime -= 0.002 tr2 = _downsample_and_shift(tr2, 50.) self.assertEqual(tr2.stats.sampling_rate, 50) self.assertEqual(tr2.stats.starttime, t) # interpolate and time shift tr2 = tr.copy() tr2.stats.starttime += 0.002 tr2 = _downsample_and_shift(tr2, 40.) self.assertEqual(tr2.stats.sampling_rate, 40) self.assertEqual(tr2.stats.starttime - tr2.stats.delta, t) tr2 = tr.copy() tr2.stats.starttime -= 0.002 tr2 = _downsample_and_shift(tr2, 40.) self.assertEqual(tr2.stats.sampling_rate, 40) self.assertEqual(tr2.stats.starttime, t)
def test_preprocess(self): stream = read() day = UTC('2018-01-02') for tr in stream: tr.stats.starttime = day tr = stream[1] tr.id = 'GR.FUR..BH' + tr.stats.channel[-1] tr.stats.sampling_rate = 80. tr = stream[2] tr.id = 'GR.WET..BH' + tr.stats.channel[-1] tr.stats.sampling_rate = 50. stream = stream.cutout(day + 0.01, day + 10) stream = stream.cutout(day + 14, day + 16.05) norm = ('clip', 'spectral_whitening', 'mute_envelope', '1bit') # see https://docs.scipy.org/doc/numpy-1.13.0/release.html# # assigning-to-slices-views-of-maskedarray ignore_msg = r'setting an item on a masked array which has a shared' with np.warnings.catch_warnings(): np.warnings.filterwarnings('ignore', ignore_msg) preprocess(stream, day=day, inventory=read_inventory(), remove_response=True, filter=None, normalization=norm, time_norm_options=None, spectral_whitening_options=None, decimate=5) for tr in stream: self.assertEqual(tr.stats.sampling_rate, 10) for tr in stream: self.assertEqual(set(tr.data._data), {-1, 0, 1}) mask = np.ma.getmask(tr.data) np.testing.assert_equal(tr.data[mask]._data, 0) self.assertGreater(np.count_nonzero(mask), 0) self.assertEqual(len(stream), 3)
def test_write_pha_minimal(self): ori = Origin(time=UTC(0), latitude=42, longitude=43, depth=10000) pick = Pick(time=UTC(10), phase_hint='S', waveform_id=WaveformStreamID(station_code='STA')) del ori.latitude_errors del ori.longitude_errors del ori.depth_errors cat = Catalog([Event(origins=[ori], picks=[pick])]) with NamedTemporaryFile() as tf: tempfile = tf.name with self.assertWarnsRegex(UserWarning, 'Missing mag'): cat.write(tempfile, 'HYPODDPHA') cat2 = read_events(tempfile) self.assertEqual(len(cat2), 1) self.assertEqual(len(cat2[0].picks), 1)
def test_shift(self): tr = read()[0] dt = tr.stats.delta t = tr.stats.starttime = UTC('2018-01-01T00:00:10.000000Z') tr2 = tr.copy() _downsample_and_shift(tr2) self.assertEqual(tr2, tr) tr2 = tr.copy() tr2.stats.starttime = t + 0.1 * dt _downsample_and_shift(tr2) self.assertEqual(tr2.stats.starttime, t) tr2 = tr.copy() tr2.stats.starttime = t - 0.1 * dt _downsample_and_shift(tr2) self.assertEqual(tr2.stats.starttime, t) tr2 = tr.copy() tr2.stats.starttime = t - 0.49 * dt _downsample_and_shift(tr2) self.assertEqual(tr2.stats.starttime, t) tr2 = tr.copy() tr2.stats.starttime = t - 0.0001 * dt _downsample_and_shift(tr2) self.assertEqual(tr2.stats.starttime, t) # shift cumulatively by +1 sample tr2 = tr.copy() tr2.stats.starttime += 0.3 * dt _downsample_and_shift(tr2) tr2.stats.starttime += 0.3 * dt _downsample_and_shift(tr2) tr2.stats.starttime += 0.4 * dt _downsample_and_shift(tr2) self.assertEqual(tr2.stats.starttime, t) np.testing.assert_allclose(tr2.data[201:-200], tr.data[200:-201], rtol=1e-2, atol=1) cc = correlate(tr2.data, tr.data, 1000) shift, cc_max = xcorr_max(cc) self.assertEqual(shift, 1) self.assertGreater(cc_max, 0.995) # shift cumulatively by -1 sample tr2 = tr.copy() tr2.stats.starttime -= 0.3 * dt _downsample_and_shift(tr2) tr2.stats.starttime -= 0.3 * dt _downsample_and_shift(tr2) tr2.stats.starttime -= 0.4 * dt _downsample_and_shift(tr2) self.assertEqual(tr2.stats.starttime, t) np.testing.assert_allclose(tr2.data[200:-201], tr.data[201:-200], rtol=1e-2, atol=2) cc = correlate(tr2.data, tr.data, 1000) shift, cc_max = xcorr_max(cc) self.assertEqual(shift, -1) self.assertGreater(cc_max, 0.995)
def get_cmap(extend=False): bounds = [b.matplotlib_date for b in get_bounds()] if extend: bounds[-1] = UTC('2018-05-26').matplotlib_date colors = get_colors()[:len(bounds) - 1] cmap = ListedColormap(colors, name='AccentL') norm = BoundaryNorm(bounds, ncolors=len(colors)) return cmap, norm
def _create_event(ser): """ create an event from a row from the event dataframe """ event = oe.Event( resource_id=rid(ser.evid), creation=oe.CreationInfo(agency_id=ser.auth, creation_time=UTC(ser.lddate)), preferred_origin_id=str(ser.prefor), ) return event
def get_waveforms( stream: Stream, network: str = "*", station: str = "*", location: str = "*", channel: str = "*", starttime: Optional[UTC] = None, endtime: Optional[UTC] = None, ): """ A subset of the Client.get_waveforms method. Simply makes successive calls to Stream.select and Stream.trim under the hood. Matching is available on all str parameters. Parameters ---------- network The network code station The station code location Location code channel Channel code starttime Starttime for query endtime Endtime for query Returns ------- Stream """ stream = stream.copy() st = stream.select(network=network, station=station, location=location, channel=channel) st = st.trim(starttime=UTC(starttime or SMALL_UTC), endtime=UTC(endtime or BIG_UTC)) return st
def _create_pick(ser): """ create picks """ ser = ser[(ser != -1) & ~(ser.isnull())] co = oe.CreationInfo( agencey_idr=ser.get('auth'), creation_time=UTC(ser.get('lddate')), ) seed_str = _get_seed_str(ser) wid = oe.WaveformStreamID(seed_string=seed_str) pick = oe.Pick( time=UTC(ser.time), resource_id=rid(ser.arid), creation_info=co, waveform_id=wid, ) return pick
def create_config(conf='conf.json', tutorial=False, less_data=False): """Create JSON config file and download tutorial data if requested""" shutil.copyfile(resource_filename('yam', 'conf_example.json'), conf) temp_dir = os.path.join(tempfile.gettempdir(), 'yam_example_data') template = os.path.join(temp_dir, 'example_data') station_template = os.path.join(temp_dir, 'example_inventory') try: num_files = (len([name for name in os.listdir(template)]), len([name for name in os.listdir(station_template)])) except FileNotFoundError: num_files = (0, 0) if tutorial and (num_files[0] < (9 if less_data else 54) or num_files[1] < 3): print('Download example data from Geofon') from obspy import UTCDateTime as UTC from obspy.clients.fdsn.mass_downloader import (GlobalDomain, Restrictions, MassDownloader) domain = GlobalDomain() restrictions = Restrictions( starttime=UTC('2010-02-04' if less_data else '2010-02-01'), endtime=UTC('2010-02-06' if less_data else '2010-02-15'), network='CX', station='PATCX', location=None, channel_priorities=["BH[ZN]"], chunklength_in_sec=86400, reject_channels_with_gaps=False, minimum_length=0.5) mdl = MassDownloader(providers=['GFZ']) kw = dict(threads_per_client=1, download_chunk_size_in_mb=200) mdl.download(domain, restrictions, template, station_template, **kw) restrictions.station = 'PB06' if not less_data: restrictions.endtime = UTC('2010-02-12') mdl.download(domain, restrictions, template, station_template, **kw) restrictions.station = 'PB01' restrictions.endtime = UTC('2010-02-04 08:00:00') restrictions.channel_priorities = ["BHZ"] mdl.download(domain, restrictions, template, station_template, **kw) if not less_data: restrictions.starttime = UTC('2010-02-08 00:00:00') restrictions.endtime = UTC('2010-02-09 23:55:00') restrictions.channel_priorities = ["BHZ"] mdl.download(domain, restrictions, template, station_template, **kw) if tutorial: dest_dir = os.path.dirname(conf) dest_dir_data = os.path.join(dest_dir, 'example_data') dest_dir_inv = os.path.join(dest_dir, 'example_inventory') if not os.path.exists(dest_dir_data): if less_data: ignore = shutil.ignore_patterns('*2010020[123]T000000Z__*', '*2010020[6-9]T000000Z__*', '*2010021?T000000Z__*') else: ignore = None shutil.copytree(template, dest_dir_data, ignore=ignore) if not os.path.exists(dest_dir_inv): shutil.copytree(station_template, dest_dir_inv)
def _download_bingham(self): """ Use obspy's mass downloader to get station/waveforms data. """ bank = WaveBank(self.waveform_path) domain = CircularDomain( self.latitude, self.longitude, minradius=0, maxradius=kilometers2degrees(self.max_dist), ) chan_priorities = ["HH[ZNE]", "BH[ZNE]", "EL[ZNE]", "EN[ZNE]"] cat = obspy.read_events(str(self.source_path / "events.xml")) df = events_to_df(cat) for _, row in df.iterrows(): starttime = row.time - self.time_before endtime = row.time + self.time_after restrictions = Restrictions( starttime=UTC(starttime), endtime=UTC(endtime), minimum_length=0.90, minimum_interstation_distance_in_m=100, channel_priorities=chan_priorities, location_priorities=["", "00", "01", "--"], ) kwargs = dict( domain=domain, restrictions=restrictions, mseed_storage=str(self.waveform_path), stationxml_storage=str(self.station_path), ) MassDownloader(providers=[self._download_client]).download( **kwargs) # ensure data were downloaded bank.update_index() assert not bank.read_index(starttime=starttime, endtime=endtime).empty # update wavebank WaveBank(self.waveform_path).update_index()
def multi_download(client,inv,net,stn,slat,slon,elat,elon,evdp,evtime,em,emt,fcat,stalons,stalats,staNetNames,phase='P',locations=[""]): logger = logging.getLogger(__name__) strm = None j=0 msg = None model = TauPyModel('iasp91') arrivals = model.get_travel_times_geo(float(evdp),slat,slon,float(elat),float(elon),phase_list=[phase]) if phase=='P': t1 = UTC(str(evtime)) + int(arrivals[0].time - 50) t2 = UTC(str(evtime)) + int(arrivals[0].time + 110) # t1 = UTC(str(evtime)) + int(arrivals[0].time - 25) # t2 = UTC(str(evtime)) + int(arrivals[0].time + 75) elif phase=='SKS': t1 = UTC(str(evtime)) + int(arrivals[0].time - 80) t2 = UTC(str(evtime)) + int(arrivals[0].time + 80) # sel_inv = inv.select(network=net).select(station=stn)[0][0] # if not sel_inv.is_active(starttime=t1, endtime=t2): # # logger.warning(f"------> Station not active during {evtime}") # msg = f"Station not active during {evtime}" # return strm, 0, msg # process_id = os.getpid() pharr = UTC(str(evtime)) + arrivals[0].time while not strm: client_local = Client(client[j]) stats_args = {"_format":'H5', "onset" : UTC(str(evtime)) + arrivals[0].time, "event_latitude": elat, "event_longitude": elon,"event_depth":evdp, "event_magnitude":em,"event_time":UTC(str(evtime)),"phase":phase,"station_latitude":slat,"station_longitude":slon,"inclination":arrivals[0].incident_angle,"slowness":arrivals[0].ray_param_sec_degree} if phase=='P': for loc in locations: with Timeout(5): strm = retrieve_waveform(client_local,net,stn,t1,t2,stats_dict=stats_args,cha="BHE,BHN,BHZ",loc=loc,pharr = pharr, phasenm = phase) if strm: break elif phase=='SKS': for loc in locations: # print(f"Location: {loc}") with Timeout(5): strm = retrieve_waveform(client_local,net,stn,t1,t2,stats_dict=stats_args,cha="BHE,BHN,BHZ",attach_response=True,loc=loc,pharr = pharr, phasenm = phase) if strm: break #break the locations loop if strm: fcat.write('{} | {:9.4f}, {:9.4f} | {:5.1f} | {:5.1f} {:4s} | {}\n'.format(evtime,elat,elon,evdp,em,emt,client[j])) stalons.append(slon) stalats.append(slat) staNetNames.append(f"{net}_{stn}") # print("stream obtained\n") msg = f"Data {evtime}" res = 1 break elif j == len(client)-1: res = 0 msg = f"No data {evtime}" break j+=1 return strm, res, msg
def main(): base_dir = '/g/data/ha3/Passive/_ANU/7B(1993-1995)' asdf_file = os.path.join(base_dir, 'ASDF', '7B(1993-1995).h5') out_dir = os.path.join(base_dir, 'small_mseed_DATA') inv = read_inventory(os.path.join(base_dir, '7B.xml')) asdf = ASDFDataSet(asdf_file, mode='r') for sta in inv.networks[0].stations: if asdf.waveforms.__contains__(inv.networks[0].code + '.' + sta.code): for i in asdf.waveforms[inv.networks[0].code + '.' + sta.code].list(): if i.endswith('raw_recording'): start_time = UTC(i.split("__")[1]) st = asdf.waveforms[inv.networks[0].code + '.' + sta.code][i] medn = np.median(st[0].data) while (abs(st[0].data[np.argmax(st[0].data)]) > 1e8 or abs(st[0].data[np.argmin(st[0].data)]) > 1e8): if abs(st[0].data[np.argmax(st[0].data)]) > 1e8: st[0].data[np.argmax( st[0].data)] = abs(medn) if st[0].data[ np.argmax(st[0].data)] > 0 else -abs(medn) if abs(st[0].data[np.argmin(st[0].data)]) > 1e8: st[0].data[np.argmin( st[0].data)] = abs(medn) if st[0].data[ np.argmin(st[0].data)] > 0 else -abs(medn) while (start_time + 86400 < UTC(i.split("__")[2])): tr = st[0].copy() create_chunk(out_dir, tr, start_time, start_time + 86400, sta) start_time += 86400 if start_time < UTC(i.split("__")[2]): tr = st[0].copy() create_chunk(out_dir, tr, start_time, UTC(i.split("__")[2]), sta)
def _create_origin(ser): """ create an origin and attach to event """ event = get_object(ser.evid) origin = oe.Origin( resource_id=rid(ser.orid), time=UTC(ser.time), latitude=ser.lat, longitude=ser.lon, depth=ser.depth * 1000, # convert to m ) # temporarily attach event reference to origin origin.__dict__['event'] = event event.origins.append(origin)
def test_stretch(self): h = {'sampling_rate': 100} # TODO: allow to call stretch without these headers h['network1'] = h['network2'] = 'NET' h['station1'] = h['station2'] = h['network'] = h['location'] = 'STA' h['location1'] = h['location2'] = '' h['channel1'] = h['channel2'] = h['location'] = h['channel'] = 'HHZ' h['dist'] = h['azi'] = h['baz'] = 0 vel_changes = [0, 1, -1] traces = [] dt = 24 * 3600 t0 = UTC() for i, v in enumerate(vel_changes): mul = 1 + v / 100 # there is a small difference, because the routines from MIIC # use the following approximation for stretching: # mul = np.exp(v / 100) t = np.linspace(-10 * mul, 10 * mul, 10001) data = np.cos(2 * np.pi * t) h['starttime'] = t0 + i * dt tr = Trace(data, header=h) traces.append(tr) d = stretch(Stream(traces), reftr=traces[0], str_range=1.1, nstr=2201, time_windows=[[1], 4], sides='both') expect = np.array(vel_changes)[:, np.newaxis] np.testing.assert_allclose(d['velchange_vs_time'], expect, atol=0.008) # routine from miic uses approximation exp(dv/v) = 1 + dv/v corrected = -np.log(d['velchange_vs_time'] / -100 + 1) * 100 np.testing.assert_allclose(corrected, expect, rtol=1e-4) # test writing and reading with tempfile.TemporaryDirectory(prefix='yam_') as tmpdir: fname = os.path.join(tmpdir, 'stretch.h5') d['attrs']['key'] = 'test' write_dict(d, fname) d2 = read_dicts(fname)[0] for key in d: if key == 'sim_mat': np.testing.assert_allclose(d2[key], d[key], rtol=1e-3) elif isinstance(d2[key], np.ndarray): np.testing.assert_equal(d2[key], d[key]) else: self.assertEqual(d2[key], d[key]) d2['attrs']['key'] = 'test2' write_dict(d2, fname)
def test_stretch(self): h = {'sampling_rate': 100} h['network1'] = h['network2'] = 'NET' h['station1'] = h['station2'] = h['network'] = h['location'] = 'STA' h['location1'] = h['location2'] = '' h['channel1'] = h['channel2'] = h['location'] = h['channel'] = 'HHZ' h['dist'] = h['azi'] = h['baz'] = 0 vel_changes = [0, 1, -1] traces = [] dt = 24 * 3600 t0 = UTC() for i, v in enumerate(vel_changes): mul = 1 + v / 100 t = np.linspace(-10 * mul, 10 * mul, 10001) data = np.cos(2 * np.pi * t) h['starttime'] = t0 + i * dt tr = Trace(data, header=h) traces.append(tr) d = stretch(Stream(traces), max_stretch=1.1, num_stretch=2201, tw=(1, 5), sides='both', reftr=traces[0]) expect = np.array(vel_changes) np.testing.assert_allclose(d['velchange_vs_time'], expect) np.testing.assert_allclose(d['corr_vs_time'], (1, 1, 1)) self.assertAlmostEqual(d['velchange_values'][-1], 1.1) self.assertEqual(len(d['velchange_values']), 2201) # test writing and reading with tempfile.TemporaryDirectory(prefix='yam_') as tmpdir: fname = os.path.join(tmpdir, 'stretch.h5') d['attrs']['key'] = 'test' write_dict(d, fname) d2 = read_dicts(fname)[0] for key in d: if key == 'sim_mat': np.testing.assert_allclose(d2[key], d[key], rtol=1e-3) elif isinstance(d2[key], np.ndarray): np.testing.assert_equal(d2[key], d[key]) else: self.assertEqual(d2[key], d[key]) d2['attrs']['key'] = 'test2' write_dict(d2, fname)
# Copyright 2013-2016 Tom Eulenfeld, MIT license from obspy import read_events, read_inventory, Stream, UTCDateTime as UTC from obspy.clients.fdsn import Client from rf.rfstream import obj2stats, rfstats from rf import RFStream evname = './example_events.xml' invname = './example_inventory.xml' wavname = './example_data.mseed' wavname2 = './minimal_example.sac' lon, lat = -70, -21 t1, t2 = UTC('2011-02-01'), UTC('2011-06-01') seedid = 'CX.PB01..BH?' def get_events(): try: return read_events(evname) except: pass client = Client() events = client.get_events(starttime=t1, endtime=t2, latitude=lat, longitude=lon, minradius=30, maxradius=90, minmagnitude=6., maxmagnitude=6.5)
def iter_matching_event(lat, lon, ddeg, time): for a in ev: if (abs(a['lat'] - lat) < ddeg and abs(a['lon'] - lon) < ddeg and abs(time - UTC(a['time'])) < 5): yield a
# Date: 2005/02/05 Time: 18:43:30.3100 GMT # Lat= 37.40 Lon= -121.48 Depth= 7.20 # Moment Tensor Depth= 11.00 # Expo= +22 -0.193150 -2.045900 2.239000 0.137150 0.253480 0.837600 # Md= 4.25 Ml= 4.42 Mw= 4.18 Scalar Moment= 2.317000E+22 # Fault plane: strike= 56 dip=88 slip= 6 # Fault plane: strike=326 dip=84 slip= 178 pattern = (r'Date:\s*([\d/]+)\s*Time:\s*([\d:.]+).*?' r'Lat=\s*([.\d-]+)\s*Lon=\s*([.\d-]+).+?Mw=\s*([\d.]+)') for match in re.findall(pattern, text1, flags=re.DOTALL): date, time, lat, lon, mag = match lat = float(lat) lon = float(lon) mag = float(mag) utc = UTC(date.replace('/', '_') + ' ' + time) for a in iter_matching_event(lat, lon, 0.5, utc): v = (a['time'], 0, dic(a, 'Berkeley', mag)) events.append(v) # 2005/01/28 22:37:07 34.71 -111.00 3 4.0 Arizona Y 20050128223707 Mechanism for line in text2.split('\n'): if line.startswith('#'): continue date, time, lat, lon, dep, mag, *bla = line.split() utc = UTC(date.replace('/', '_') + ' ' + time) lat = float(lat) try: lon = float(lon) except: lon = float(lon + dep)
return st # ---+----------Main--------------------------------- if __name__ == '__main__': # we use centre of Australia to calculate radius and gather events from 15 to 90 degrees lonlat = [133.88, -23.69] # Change parameters below data = os.path.join('DATA', '') invfile = data + '7X-inventory.xml' datafile = data + '7X-event_waveforms_for_rf.h5' start_time = '2009-12-01 00:00:00' end_time = '2011-04-01 00:00:00' inventory = read_inventory(invfile) # ----------------- End ---------------------- catalog = get_events(lonlat, UTC(start_time), UTC(end_time)) stream = RFStream() with tqdm() as pbar: for s in iter_event_data(catalog, inventory, custom_get_waveforms, pbar=pbar): for trace in s: stream.extend(s) stream.write(datafile, 'H5')
def obtain_events(self, catalogxmlloc, catalogtxtloc, minmagnitude=5.5, maxmagnitude=9.5): ## Check for the station information if os.path.exists(self.inventorytxtfile): invent_df = pd.read_csv(self.inventorytxtfile, sep="|", keep_default_na=False, na_values=[""]) total_stations = invent_df.shape[0] if invent_df.shape[0] == 0: self.logger.error("No data available, exiting...") sys.exit() else: self.logger.error("No data available, exiting...") sys.exit() tot_evnt_stns = 0 if not self.inv: self.logger.info( "Reading station inventory to obtain events catalog") try: # Read the station inventory self.inv = read_inventory(self.inventoryfile, format="STATIONXML") except Exception as exception: self.logger.error("No available data", exc_info=True) sys.exit() # list all the events during the station active time self.staNamesNet, staLats, staLons = [], [], [] count = 1 for net in self.inv: for sta in net: network = net.code #network name station = sta.code #station name print("\n") self.logger.info( f"{count}/{total_stations} Retrieving event info for {network}-{station}" ) count += 1 self.staNamesNet.append(f"{network}_{station}") sta_lat = sta.latitude #station latitude staLats.append(sta_lat) sta_lon = sta.longitude #station longitude staLons.append(sta_lon) sta_sdate = sta.start_date #station start date sta_edate = sta.end_date #station end date # sta_edate_str = sta_edate if not sta_edate: sta_edate = UTC("2599-12-31T23:59:59") # sta_edate_str = "2599-12-31T23:59:59" stime, etime = date2time( sta_sdate, sta_edate) #station start and end time in UTC catalogxml = catalogxmlloc + f'{network}-{station}-{sta_sdate.year}-{sta_edate.year}-{self.method}-{self.method}_events.xml' #xml catalog # self.allcatalogxml.append(catalogxml) catalogtxt = catalogtxtloc + f'{network}-{station}-{sta_sdate.year}-{sta_edate.year}-events-info-{self.method}.txt' #txt catalog if not os.path.exists(catalogxml) and not os.path.exists( catalogtxt): self.logger.info( f"Obtaining catalog: {self.method}: {network}-{station}-{sta_sdate.year}-{sta_edate.year}" ) kwargs = { 'starttime': stime, 'endtime': etime, 'latitude': sta_lat, 'longitude': sta_lon, 'minradius': self.minradius, 'maxradius': self.maxradius, 'minmagnitude': minmagnitude, 'maxmagnitude': maxmagnitude } client = Client('IRIS') try: catalog = client.get_events(**kwargs) except: self.logger.warning( "ConnectionResetError while obtaining the events from the client - IRIS" ) continue catalog.write(catalogxml, 'QUAKEML') #writing xml catalog tot_evnt_stns += len(catalog) evtimes,evlats,evlons,evdps,evmgs,evmgtps=[],[],[],[],[],[] self.logger.info("Writing the event data into a text file") with open(catalogtxt, 'w') as f: f.write('evtime,evlat,evlon,evdp,evmg\n') for cat in catalog: try: try: evtime, evlat, evlon, evdp, evmg, evmgtp = cat.origins[ 0].time, cat.origins[ 0].latitude, cat.origins[ 0].longitude, cat.origins[ 0].depth / 1000, cat.magnitudes[ 0].mag, cat.magnitudes[ 0].magnitude_type except: evtime, evlat, evlon, evdp, evmg, evmgtp = cat.origins[ 0].time, cat.origins[ 0].latitude, cat.origins[ 0].longitude, cat.origins[ 0].depth / 1000, cat.magnitudes[ 0].mag, "Mww" evtimes.append(str(evtime)) evlats.append(float(evlat)) evlons.append(float(evlon)) evdps.append(float(evdp)) evmgs.append(float(evmg)) evmgtps.append(str(evmgtp)) f.write( '{},{:.4f},{:.4f},{:.1f},{:.1f}\n'.format( evtime, evlat, evlon, evdp, evmg)) #writing txt catalog except Exception as exception: self.logger.warning( f"Unable to write for {evtime}") self.logger.info( "Finished writing the event data into a text and xml file" ) else: self.logger.info( f"{catalogxml.split('/')[-1]} and {catalogtxt.split('/')[-1]} already exists!" )
class TestGetGaps: """ test that the get_gaps method returns info about gaps """ start = UTC("2017-09-18") end = UTC("2017-09-28") sampling_rate = 1 gaps = [ (UTC("2017-09-18T18-00-00"), UTC("2017-09-18T19-00-00")), (UTC("2017-09-18T20-00-00"), UTC("2017-09-18T20-00-15")), (UTC("2017-09-20T01-25-35"), UTC("2017-09-20T01-25-40")), (UTC("2017-09-21T05-25-35"), UTC("2017-09-25T10-36-42")), ] durations = np.array([y - x for x, y in gaps]) overlap = 0 def _make_gappy_archive(self, path): """ Create the gappy archive defined by params in class. """ ArchiveDirectory( path, self.start, self.end, self.sampling_rate, gaps=self.gaps, overlap=self.overlap, ).create_directory() return path # fixtures @pytest.fixture(scope="class") def gappy_dir(self, class_tmp_dir): """ create a directory that has gaps in it """ self._make_gappy_archive(join(class_tmp_dir, "temp1")) return class_tmp_dir @pytest.fixture(scope="class") def gappy_bank(self, gappy_dir): """ init a sbank on the gappy data """ bank = WaveBank(gappy_dir) # make sure index is updated after gaps are introduced if os.path.exists(bank.index_path): os.remove(bank.index_path) bank._index_cache = obsplus.bank.utils._IndexCache(bank, 5) bank.update_index() return bank @pytest.fixture() def gappy_and_contiguous_bank(self, tmp_path): """ Create a directory with gaps and continuous data """ # first create directory with gaps self._make_gappy_archive(tmp_path) # first write data with no gaps st = obspy.read() for num, tr in enumerate(st): tr.stats.station = "GOOD" tr.write(str(tmp_path / f"good_{num}.mseed"), "mseed") return WaveBank(tmp_path).update_index() @pytest.fixture(scope="class") def empty_bank(self): """ create a Sbank object initated on an empty directory """ with tempfile.TemporaryDirectory() as td: bank = WaveBank(td) yield bank @pytest.fixture(scope="class") def gap_df(self, gappy_bank): """ return a gap df from the gappy bank""" return gappy_bank.get_gaps_df() @pytest.fixture(scope="class") def uptime_df(self, gappy_bank): """ return the uptime dataframe from the gappy bank """ return gappy_bank.get_uptime_df() @pytest.fixture() def uptime_default(self, default_wbank): """ return the uptime from the default stream bank. """ return default_wbank.get_uptime_df() # tests def test_gaps_length(self, gap_df): """ ensure each of the gaps shows up in df """ assert isinstance(gap_df, pd.DataFrame) assert not gap_df.empty group = gap_df.groupby(["network", "station", "location", "channel"]) for gnum, df in group: assert len(df) == len(self.gaps) dif = abs(df.gap_duration - self.durations) assert (dif < (1.5 * self.sampling_rate)).all() def test_gappy_uptime_df(self, uptime_df): """ ensure the uptime df is of correct type and accurate """ assert isinstance(uptime_df, pd.DataFrame) gap_duration = sum([x[1] - x[0] for x in self.gaps]) duration = self.end - self.start uptime_percent = (duration - gap_duration) / duration assert (abs(uptime_df["availability"] - uptime_percent) < 0.001).all() def test_uptime_default(self, uptime_default): """ Ensure the uptime of the basic bank (no gaps) has expected times/channels. """ df = uptime_default st = obspy.read() assert not df.empty, "uptime df is empty" assert len(df) == len(st) assert {tr.id for tr in st} == set(obsplus.utils.get_nslc_series(df)) assert (df["gap_duration"] == 0).all() def test_empty_directory(self, empty_bank): """ ensure an empty bank get_gaps returns and empty df with expected columns """ gaps = empty_bank.get_gaps_df() assert not len(gaps) assert set(WaveBank.gap_columns).issubset(set(gaps.columns)) def test_kemmerer_uptime(self, kem_fetcher): """ ensure the kemmerer bank returns an uptime df""" bank = kem_fetcher.waveform_client df = bank.get_uptime_df() assert (df["uptime"] == df["duration"]).all() def test_gappy_and_contiguous_uptime(self, gappy_and_contiguous_bank): """ Ensure when there are gappy streams and continguous streams get_uptime still returns correct results. """ wbank = gappy_and_contiguous_bank index = wbank.read_index() uptime = wbank.get_uptime_df() # make sure the same seed ids are in the index as uptime df seeds_from_index = set(obsplus.utils.get_nslc_series(index)) seeds_from_uptime = set(obsplus.utils.get_nslc_series(uptime)) assert seeds_from_index == seeds_from_uptime assert not uptime.isnull().any().any()
class TestSummarizeStreams: """tests for summarizing streams.""" start = UTC("2017-09-20T01-00-00") end = UTC("2017-09-20T02-00-00") gap_start = UTC("2017-09-20T01-25-35") gap_end = UTC("2017-09-20T01-25-40") def clean_dataframe(self, df): """Function to fix some common issues with the dataframe.""" for id_code in NSLC: df[id_code] = ( df[id_code].astype(str).str.replace("b'", "").str.replace("'", "") ) for time_col in ["starttime", "endtime"]: df[time_col] = df[time_col].astype("datetime64[ns]") return df[sorted(df.columns)] @pytest.fixture def gappy_stream(self): """Create a very simple mseed with one gap, return it.""" stats = dict( network="UU", station="ELU", location="01", channel="ELZ", sampling_rate=1, starttime=self.start, ) len1 = int(self.gap_start - self.start) # create first trace ar1 = np.random.rand(len1) tr1 = obspy.Trace(data=ar1, header=stats) assert tr1.stats.endtime <= self.gap_start # create second trace len2 = int(self.end - self.gap_end) ar2 = np.random.rand(len2) stats2 = dict(stats) stats2.update({"starttime": self.gap_end}) tr2 = obspy.Trace(data=ar2, header=stats2) # assemble traces make sure gap is there assert tr2.stats.starttime >= self.gap_end st = obspy.Stream(traces=[tr1, tr2]) gaps = st.get_gaps() assert len(gaps) == 1 return st @pytest.fixture def gappy_mseed_path(self, gappy_stream, tmp_path): """Return a path to the saved mseed file with gaps.""" out_path = tmp_path / "out.mseed" gappy_stream.write(str(out_path), format="mseed") return out_path def test_summarize_mseed(self, gappy_stream, gappy_mseed_path): """ Summarize mseed should return the same answer as the generic summary function. """ summary_1 = summarize_mseed(str(gappy_mseed_path)) df1 = self.clean_dataframe(pd.DataFrame(summary_1)) summary_2 = summarize_generic_stream(str(gappy_mseed_path)) df2 = self.clean_dataframe(pd.DataFrame(summary_2)) assert len(df1) == len(df2) assert (df1 == df2).all().all()
inv = read_inventory(os.path.join(base_dir, '7B.xml')) asdf = ASDFDataSet(asdf_file, mode='r') def create_chunk(trace, st_time, end_time, sta): trace.trim(starttime=st_time, endtime=end_time) st_out = Stream(traces=[trace, ]) dest_dir = os.path.join(out_dir, str(trace.stats.starttime.timetuple().tm_year), str(tr.stats.starttime.timetuple().tm_yday)) if not os.path.exists(dest_dir): os.makedirs(dest_dir) st_out.write(os.path.join(dest_dir, sta.code+'_'+trace.stats.channel+'_'+str(st_time)+'_'+str(end_time)+'.ms'), format='MSEED') for sta in inv.networks[0].stations: if asdf.waveforms.__contains__(inv.networks[0].code+'.'+sta.code): for i in asdf.waveforms[inv.networks[0].code+'.'+sta.code].list(): if i.endswith('raw_recording'): start_time = UTC(i.split("__")[1]) st = asdf.waveforms[inv.networks[0].code+'.'+sta.code][i] medn = np.median(st[0].data) while (abs(st[0].data[np.argmax(st[0].data)]) > 1e8 or abs(st[0].data[np.argmin(st[0].data)]) > 1e8): if abs(st[0].data[np.argmax(st[0].data)]) > 1e8: st[0].data[np.argmax(st[0].data)] = abs(medn) if st[0].data[np.argmax(st[0].data)] > 0 else -abs(medn) if abs(st[0].data[np.argmin(st[0].data)]) > 1e8: st[0].data[np.argmin(st[0].data)] = abs(medn) if st[0].data[np.argmin(st[0].data)] > 0 else -abs(medn) while (start_time+86400<UTC(i.split("__")[2])): tr = st[0].copy() create_chunk(tr, start_time, start_time+86400, sta) start_time += 86400 if start_time < UTC(i.split("__")[2]): tr=st[0].copy() create_chunk(tr, start_time, UTC(i.split("__")[2]), sta)
def test_correlate(self): stream = read() stream2 = stream.copy() stream3 = stream.copy() for tr in stream2: tr.id = 'GR.FUR..BH' + tr.stats.channel[-1] tr.stats.sampling_rate = 80. for tr in stream3: tr.id = 'GR.WET..BH' + tr.stats.channel[-1] tr.stats.sampling_rate = 50. stream = stream + stream2 + stream3 day = UTC('2018-01-02') for tr in stream: tr.stats.starttime = day # create some gaps stream = stream.cutout(day + 0.01, day + 10) stream = stream.cutout(day + 14, day + 16.05) # prepare mock objects for call to yam_correlate def data(starttime, endtime, **kwargs): return stream.select(**kwargs).slice(starttime, endtime) io = {'data': data, 'data_format': None, 'inventory': read_inventory()} res = yam_correlate(io, day, 'outkey', keep_correlations=True) self.assertEqual(len(res['corr']), 6) # by default only 'ZZ' combinations for tr in res['corr']: self.assertEqual(tr.stats.station[-1], 'Z') self.assertEqual(tr.stats.channel[-1], 'Z') if len(set(tr.id.split('.'))) == 2: # autocorr np.testing.assert_allclose(xcorr_max(tr.data), (0, 1.)) res = yam_correlate( io, day, 'outkey', station_combinations=('GR.FUR-GR.WET', 'RJOB-RJOB'), component_combinations=('ZZ', 'NE', 'NR'), keep_correlations=True, stack='1d', njobs=self.njobs) self.assertEqual(len(res['corr']), 7) self.assertEqual(len(res['stack']), 7) ids = ['RJOB.EHE.RJOB.EHN', 'RJOB.EHZ.RJOB.EHZ', 'FUR.BHE.WET.BHN', 'FUR.BHN.WET.BHE', 'FUR.BHR.WET.BHN', 'FUR.BHN.WET.BHR', 'FUR.BHZ.WET.BHZ'] for tr in res['corr']: self.assertIn(tr.id, ids) if len(set(tr.id.split('.'))) == 2: # autocorr np.testing.assert_allclose(xcorr_max(tr.data), (0, 1.)) res = yam_correlate( io, day, 'outkey', only_auto_correlation=True, station_combinations=('GR.FUR-GR.WET', 'RJOB-RJOB'), component_combinations=['ZN', 'RT'], njobs=self.njobs, keep_correlations=True, remove_response=True) self.assertEqual(len(res['corr']), 1) tr = res['corr'][0] self.assertEqual(tr.stats.station[-1], 'N') self.assertEqual(tr.stats.channel[-1], 'Z') stream.traces = [tr for tr in stream if tr.stats.channel[-1] != 'N'] res = yam_correlate( io, day, 'outkey', station_combinations=('GR.FUR-GR.WET', 'RJOB-RJOB'), component_combinations=('NT', 'NR'), discard=0.0, keep_correlations=True) self.assertEqual(res, None)
def main(inventory_file, waveform_database, event_catalog_file, event_trace_datafile, start_time, end_time, taup_model, distance_range, magnitude_range, catalog_only=False): log = logging.getLogger(__name__) log.setLevel(logging.INFO) waveform_db_is_web = is_url( waveform_database ) or waveform_database in obspy.clients.fdsn.header.URL_MAPPINGS if not waveform_db_is_web: assert os.path.exists( waveform_database), "Cannot find waveform database file {}".format( waveform_database) log.info("Using waveform data source: {}".format(waveform_database)) min_dist_deg = distance_range[0] max_dist_deg = distance_range[1] min_mag = magnitude_range[0] max_mag = magnitude_range[1] inventory = read_inventory(inventory_file) log.info("Loaded inventory {}".format(inventory_file)) # Compute reference lonlat from the inventory. channels = inventory.get_contents()['channels'] lonlat_coords = [] for ch in channels: coords = inventory.get_coordinates(ch) lonlat_coords.append((coords['longitude'], coords['latitude'])) lonlat_coords = np.array(lonlat_coords) lonlat = np.mean(lonlat_coords, axis=0) log.info("Inferred reference coordinates {}".format(lonlat)) # If start and end time not provided, infer from date range of inventory. if not start_time: start_time = inventory[0].start_date for net in inventory: start_time = min(start_time, net.start_date) log.info("Inferred start time {}".format(start_time)) # end if if not end_time: end_time = inventory[0].end_date if end_time is None: end_time = UTC.now() for net in inventory: end_time = max(end_time, net.end_date) log.info("Inferred end time {}".format(end_time)) # end if start_time = UTC(start_time) end_time = UTC(end_time) event_catalog_file = timestamp_filename(event_catalog_file, start_time, end_time) event_trace_datafile = timestamp_filename(event_trace_datafile, start_time, end_time) assert not os.path.exists(event_trace_datafile), \ "Output file {} already exists, please remove!".format(event_trace_datafile) log.info("Traces will be written to: {}".format(event_trace_datafile)) exit_after_catalog = catalog_only catalog = get_events(lonlat, start_time, end_time, event_catalog_file, (min_dist_deg, max_dist_deg), (min_mag, max_mag), exit_after_catalog) if waveform_db_is_web: log.info("Use fresh query results from web") client = Client(waveform_database) waveform_getter = client.get_waveforms else: # Form closure to allow waveform source file to be derived from a setting (or command line input) asdf_dataset = FederatedASDFDataSet(waveform_database, logger=log) def closure_get_waveforms(network, station, location, channel, starttime, endtime): return asdf_get_waveforms(asdf_dataset, network, station, location, channel, starttime, endtime) waveform_getter = closure_get_waveforms # end if with tqdm(smoothing=0) as pbar: stream_count = 0 for s in iter_event_data(catalog, inventory, waveform_getter, tt_model=taup_model, pbar=pbar): # Write traces to output file in append mode so that arbitrarily large file # can be processed. If the file already exists, then existing streams will # be overwritten rather than duplicated. # Check first if rotation for unaligned *H1, *H2 channels to *HN, *HE is required. if not s: continue # end if if s.select(component='1') and s.select(component='2'): try: s.rotate('->ZNE', inventory=inventory) except ValueError as e: log.error('Unable to rotate to ZNE with error:\n{}'.format( str(e))) continue # end try # end if # Order the traces in ZNE ordering. This is required so that normalization # can be specified in terms of an integer index, i.e. the default of 0 in rf # library will normalize against the Z component. s.traces = sorted(s.traces, key=zne_order) # Assert the ordering of traces in the stream is ZNE. assert s[0].stats.channel[-1] == 'Z' assert s[1].stats.channel[-1] == 'N' assert s[2].stats.channel[-1] == 'E' # Iterator returns rf.RFStream. Write traces from obspy.Stream to decouple from RFStream. grp_id = '.'.join(s.traces[0].id.split('.')[0:3]) event_time = str(s.traces[0].meta.event_time)[0:19] pbar.set_description("{} -- {}".format(grp_id, event_time)) out_stream = obspy.Stream([tr for tr in s]) assert out_stream[0].stats.channel[-1] == 'Z' assert out_stream[1].stats.channel[-1] == 'N' assert out_stream[2].stats.channel[-1] == 'E' write_h5_event_stream(event_trace_datafile, out_stream, mode='a') stream_count += 1 # end for if stream_count == 0: log.warning("No traces found!") else: log.info("Wrote {} streams to output file".format(stream_count))