def sort_shift(df, st, dayst, sr): st.sort() dayst.sort() # check to make sure same length if len(st) > len(dayst): # remove extra traces from st for tr in st: if len( dayst.select(station=tr.stats.station, channel=tr.stats.channel)) == 0: st.remove(tr) # gets number of samples between template time and event origin time origintime = UTCDateTime(df['Date'] + 'T' + df['Time']) regional = df['Regional'] eventid = regional + str(df['ID']) detail = get_event_by_id(eventid, includesuperseded=True) phases = get_phase_dataframe(detail, catalog=regional) phases = phases[phases['Status'] == 'manual'] shifts = np.zeros(len(st), dtype=int) for ii in range(len(phases)): net = phases.iloc[ii]['Channel'].split('.')[0] sta = phases.iloc[ii]['Channel'].split('.')[1] comp = phases.iloc[ii]['Channel'].split('.')[2] arr = UTCDateTime(phases.iloc[ii]['Arrival Time']) shift = int(np.round((arr - origintime) * sr)) for jj in range(len(st)): if sta == st[jj].stats.station and comp == st[jj].stats.channel: print(sta + " " + comp + " " + str(shift)) shifts[jj] = shift return shifts, st, dayst, phases
def test_magnitude_dataframe(): cassettes, datadir = get_datadir() tape_file = os.path.join(cassettes, 'dataframes_magnitude.yaml') with vcr.use_cassette(tape_file): detail = get_event_by_id('us1000778i') # 2016 NZ event df = get_phase_dataframe(detail, catalog='us') assert len(df) == 174
def test_magnitude_dataframe(): datadir = get_datadir() tape_file = os.path.join(datadir, 'vcr_magnitude_dataframe.yaml') with vcr.use_cassette(tape_file): detail = get_event_by_id('us1000778i') # 2016 NZ event df = get_phase_dataframe(detail, catalog='us') assert len(df) == 174
def test_phase_dataframe(): cassettes, datadir = get_datadir() tape_file = os.path.join(cassettes, "dataframes_phase.yaml") with vcr.use_cassette(tape_file, record_mode="new_episodes"): detail = get_event_by_id("us1000778i") # 2016 NZ event df = get_phase_dataframe(detail, catalog="us") assert len(df) == 174
def retrieve_usgs_catalog(**kwargs): """ Wrapper on obspy.clients.fdsn.Client and libcomcat (usgs) to retrieve a full catalog, including phase picks (that otherwise are not supported by the usgs fdsn implementation) :param kwargs: Will be passed to the Client (e.g. minlongitude, maxmagnitude etc...) :return: obspy.core.events.Catalog """ cli = Client('https://earthquake.usgs.gov') cat = cli.get_events(**kwargs) # Now loop over each event and grab the phase dataframe using libcomcat for ev in cat: print(ev.resource_id.id) eid = ev.resource_id.id.split('=')[-2].split('&')[0] detail = get_event_by_id(eid, includesuperseded=True) phase_df = get_phase_dataframe(detail) o = ev.preferred_origin() for i, phase_info in phase_df.iterrows(): seed_id = phase_info['Channel'].split('.') loc = seed_id[-1] if loc == '--': loc = '' wf_id = WaveformStreamID(network_code=seed_id[0], station_code=seed_id[1], location_code=loc, channel_code=seed_id[2]) pk = Pick(time=UTCDateTime(phase_info['Arrival Time']), method=phase_info['Status'], waveform_id=wf_id, phase_hint=phase_info['Phase']) ev.picks.append(pk) arr = Arrival(pick_id=pk.resource_id.id, phase=pk.phase_hint, azimuth=phase_info['Azimuth'], distance=phase_info['Distance'], time_residual=phase_info['Residual'], time_weight=phase_info['Weight']) o.arrivals.append(arr) # Try to read focal mechanisms/moment tensors if 'moment-tensor' in detail.products: # Always take MT where available mt_xml = detail.getProducts('moment-tensor')[0].getContentBytes( 'quakeml.xml')[0] elif 'focal-mechanism' in detail.products: mt_xml = detail.getProducts('focal-mechanism')[0].getContentBytes( 'quakeml.xml')[0] else: continue mt_ev = read_events( io.TextIOWrapper(io.BytesIO(mt_xml), encoding='utf-8')) FM = mt_ev[0].focal_mechanisms[0] FM.triggering_origin_id = ev.preferred_origin().resource_id.id ev.focal_mechanisms = [FM] return cat
def make_template(df, sr): client = Client("IRIS") # make templates regional = df['Regional'] eventid = regional + str(df['ID']) detail = get_event_by_id(eventid, includesuperseded=True) phases = get_phase_dataframe(detail, catalog=regional) phases = phases[phases['Status'] == 'manual'] print(phases) phases = phases[~phases. duplicated(keep='first', subset=['Channel', 'Phase'])] print(phases) st = Stream() tr = Stream() print(phases) for ii in range(len(phases)): net = phases.iloc[ii]['Channel'].split('.')[0] sta = phases.iloc[ii]['Channel'].split('.')[1] comp = phases.iloc[ii]['Channel'].split('.')[2] #phase=phases.iloc[ii]['Phase'] arr = UTCDateTime(phases.iloc[ii]['Arrival Time']) #print(int(np.round(arr.microsecond/(1/sr*10**6))*1/sr*10**6)==1000000) if int(np.round(arr.microsecond / (1 / sr * 10**6)) * 1 / sr * 10**6) == 1000000: arr.microsecond = 0 arr.second = arr.second + 1 else: arr.microsecond = int( np.round(arr.microsecond / (1 / sr * 10**6)) * 1 / sr * 10**6) t1 = arr - 1 t2 = arr + 9 try: tr = client.get_waveforms(net, sta, "*", comp, t1 - 2, t2 + 2) except: print("No data for " + net + " " + sta + " " + comp + " " + str(t1) + " " + str(t2)) else: print("Data available for " + net + " " + sta + " " + comp + " " + str(t1) + " " + str(t2)) tr.detrend() tr.trim(starttime=t1 - 2, endtime=t2 + 2, nearest_sample=1, pad=1, fill_value=0) tr.filter("bandpass", freqmin=2, freqmax=7) tr.interpolate(sampling_rate=sr, starttime=t1) tr.trim(starttime=t1, endtime=t2, nearest_sample=1, pad=1, fill_value=0) st += tr return st
def check_phase_info(df): exists = 1 regional = df['Regional'] eventid = regional + str(df['ID']) try: detail = get_event_by_id(eventid, includesuperseded=True) except: exists = 0 try: phases = get_phase_dataframe(detail, catalog=regional) except: exists = 0 return exists
def test_nan_mags(): detail = get_event_by_id('us2000arrw') try: _ = get_phase_dataframe(detail) except ParsingError: pass
def make_template(df, sampling_rate, filter=[0.2, 8], tcs_length=[1, 9]): ''' download template by event ID original function by: Amanda Thomas modified by: Tim Lin -merge data with interpolate data point 2020.09 -add multiple attemps when download has unexpected issue 2020.09 ''' from obspy.clients.fdsn import Client from libcomcat.search import get_event_by_id from libcomcat.dataframes import get_phase_dataframe from obspy import Stream import time client = Client("IRIS") # make templates regional = df['Regional'] eventid = regional + str(df['ID']) detail = get_event_by_id(eventid, includesuperseded=True) OT = UTCDateTime(detail.time) #event origin time phases = get_phase_dataframe(detail, catalog=regional) phases = phases[phases['Status'] == 'manual'] phases = phases[~phases. duplicated(keep='first', subset=['Channel', 'Phase'])] st = Stream() tr = Stream() sav_net_sta_comp = [] sav_phase = [] sav_arr = [] sav_travel = [] All_info = {} for ii in range(len(phases)): elems = phases.iloc[ii]['Channel'].split('.') net = elems[0] sta = elems[1] comp = elems[2] location = elems[3] if location == '--': location = '' #sometime the location use -- instead of empty str Phase = phases.iloc[ii][ 'Phase'] #P or S wave, save this info for further relocation #phase = phases.iloc[ii]['Phase'] arr = UTCDateTime(phases.iloc[ii]['Arrival Time']) #print(int(np.round(arr.microsecond/(1/sampling_rate*10**6))*1/sampling_rate*10**6)==1000000) if int( np.round(arr.microsecond / (1 / sampling_rate * 10**6)) * 1 / sampling_rate * 10**6) == 1000000: arr.microsecond = 0 #arr.second=arr.second+1 arr = arr + 1 #print('arrival Time after arrangement',arr) else: arr.microsecond = int( np.round(arr.microsecond / (1 / sampling_rate * 10**6)) * 1 / sampling_rate * 10**6) t1 = arr - tcs_length[0] t2 = arr + tcs_length[1] #try: i_attempt = 0 #reset number of attempt tr_exist = False while i_attempt < 5: try: #tr = client.get_waveforms(net, sta, "*", comp, t1-2, t2+2) #Be careful, this may query more than 1 trace channel i.e. '00','01'... tr = client.get_waveforms( net, sta, location, comp, t1 - 2, t2 + 2 ) #Be careful, this may query more than 1 trace channel i.e. '00','01'... tr_exist = True break except: #print('Attempts %d'%(i_attempt),net, sta, "*", comp, t1-2, t2+2) time.sleep(2) #wait 2 sec and try again later... i_attempt += 1 continue if not (tr_exist): #some unexpected error when attemp to client.get_waveforms #print("No data for "+net+" "+sta+" "+comp+" "+str(t1)+" "+str(t2)) continue else: #print("Data available:",net, sta, location, comp, t1-2, t2+2) tr.merge(method=1, interpolation_samples=-1, fill_value='interpolate') tr.detrend('linear') tr.trim(starttime=t1 - 2, endtime=t2 + 2, nearest_sample=1, pad=1, fill_value=0) if filter: tr.filter("bandpass", freqmin=filter[0], freqmax=filter[1]) tr.interpolate(sampling_rate=sampling_rate, starttime=t1, method='linear') tr.trim(starttime=t1, endtime=t2, nearest_sample=1, pad=1, fill_value=0) assert len( tr) == 1, 'Unexpecting error when query:%s %s %s %s %s %s' % ( net, sta, location, comp, t1 - 2, t2 + 2) #tr[0].stats.location = tr[0].stats.location+'.'+Phase st += tr.copy() #save name, time and "phase" info for later relocation #.ms only gives starttime (know arrival time) but not P or S wave sav_net_sta_comp.append(net + '.' + sta + '.' + comp + '.' + location) sav_phase.append(Phase) #tmp_arrT = arr.strftime('%Y-%m-%dT%H:%M:%S.%f') #arrival time in isoformat i.e. 2018-05-05T17:44:18 or 2018-05-05T17:44:23.960000 tmp_arrT = arr.strftime( '%Y-%m-%dT%H:%M:%S.%f')[:-4] #accuracy to 0.01 sec #if len(tmp_arrT)==26: # tmp_arrT = tmp_arrT[:-4] #print('Time:',tmp_arrT) sav_arr.append(tmp_arrT) sav_travel.append(arr - OT) #travel time (relative to the origin) #=========IMPORTANT NOTE!!! the order of these array are NOT necessarily the same as st===================================================== #=========When there are both P and S data in the same net.sta.chn.loc, the order will be messed up when write in .ms file================== #=========Use these info as extra caution=================================================================================================== #=========2020.11.12 Update: When calling bulk_make_template will will be runing additional re-order(All_info_reorder) to all the All_info== #=========================so the data generated from download_tools.bulk_make_template will be ready to use (the right order as .ms file)=== All_info['net_sta_comp'] = np.array(sav_net_sta_comp) All_info['phase'] = np.array(sav_phase) All_info['arrival'] = np.array(sav_arr) #absolute arrival time All_info['travel'] = np.array(sav_travel) #phase travel time (sec) All_info['OT_template'] = OT.strftime('%Y-%m-%dT%H:%M:%S.%f')[:-4] All_info['tcs_length'] = tcs_length All_info['filter'] = filter All_info['eqloc'] = [df['Lon'], df['Lat'], df['Depth']] All_info['eqmag'] = df['Magnitude'] All_info['evid'] = eventid return st, All_info
def main(): parser = get_parser() args = parser.parse_args() setup_logger(args.logfile, args.loglevel) if args.eventid: detail = get_event_by_id(args.eventid, catalog=args.catalog) try: df = get_phase_dataframe(detail, args.catalog) filename = save_dataframe( df, args.directory, detail, args.format, catalog=args.catalog) print('Saved phase data for %s to %s' % (detail.id, filename)) sys.exit(0) except Exception as e: fmt = ('Could not extract the phase data due to the ' 'following error: \n"%s"\n\nExiting.') print(fmt % (str(e))) sys.exit(1) if args.bounds and args.radius: print('Please specify either a bounding box OR radius search.') sys.exit(1) if not os.path.isdir(args.directory): os.makedirs(args.directory) latitude = None longitude = None radiuskm = None lonmin = latmin = lonmax = latmax = None starttime = endtime = None if args.radius: latitude = args.radius[0] longitude = args.radius[1] radiuskm = args.radius[2] if args.bounds: lonmin, lonmax, latmin, latmax = args.bounds # fix longitude bounds when crossing dateline if lonmin > lonmax and lonmax >= -180: lonmin -= 360 minmag = 0.0 maxmag = 9.9 if args.magRange: minmag = args.magRange[0] maxmag = args.magRange[1] events = search(starttime=args.startTime, endtime=args.endTime, updatedafter=args.after, minlatitude=latmin, maxlatitude=latmax, minlongitude=lonmin, maxlongitude=lonmax, latitude=latitude, longitude=longitude, maxradiuskm=radiuskm, catalog=args.catalog, contributor=args.contributor, maxmagnitude=maxmag, minmagnitude=minmag) if not len(events): print('No events found matching your search criteria. Exiting.') sys.exit(0) for event in events: if not event.hasProduct('phase-data'): continue try: detail = event.getDetailEvent() try: df = get_phase_dataframe(detail, args.catalog) except Exception as e: fmt = ('Could not get phase dataframe for ' 'event %. Error "%s". Continuing.') tpl = (detail.id, str(e)) print(fmt % tpl) filename = save_dataframe( df, args.directory, detail, args.format, catalog=args.catalog) print('Saved phase data for %s to %s' % (event.id, filename)) except Exception as e: print('Failed to retrieve phase data for event %s. Error "%s"... continuing.' % ( event.id, str(e))) continue
def make_training_data(df, sr, winsize, phase): # make templates regional = df['Regional'] if regional == 'uw': client = Client("IRIS") elif regional == "nc": client = Client("NCEDC") eventid = regional + str(df['ID']) detail = get_event_by_id(eventid, includesuperseded=True) phases = get_phase_dataframe(detail, catalog=regional) phases = phases[phases['Status'] == 'manual'] if phase != 'N': phases = phases[phases['Phase'] == phase] # phases=phases[~phases.duplicated(keep='first',subset=['Channel','Phase'])] print(phases) st = Stream() for ii in range(len(phases)): tr = Stream() net = phases.iloc[ii]['Channel'].split('.')[0] sta = phases.iloc[ii]['Channel'].split('.')[1] comp = phases.iloc[ii]['Channel'].split('.')[2] pors = phases.iloc[ii]['Phase'] #phase=phases.iloc[ii]['Phase'] arr = UTCDateTime(phases.iloc[ii]['Arrival Time']) #print(int(np.round(arr.microsecond/(1/sr*10**6))*1/sr*10**6)==1000000) t1 = arr - winsize / 2 t2 = arr + winsize / 2 if phase == 'N': t1 -= 120 t2 -= 120 try: # try to get the data tr = client.get_waveforms(net, sta, "*", comp, t1 - 1, t2 + 1) #print('Tr has length '+str(len(tr))) except: print("No data for " + net + " " + sta + " " + comp + " " + str(t1) + " " + str(t2)) else: print("Data available for " + net + " " + sta + " " + comp + " " + str(t1) + " " + str(t2)) try: # try to subsample the data tr.interpolate(sampling_rate=sr, starttime=t1) except: print("Data interp issues") else: tr.trim(starttime=t1, endtime=t2, nearest_sample=1, pad=1, fill_value=0) if len(tr) > 0: tr[0].stats.location = pors st += tr for tr in st: # get rid of things that have lengths less than the desired length if len(tr.data) != sr * winsize + 1: st.remove(tr) for tr in st: # get rid of things that have all zeros if np.sum(tr.data) == len(tr.data): st.remove(tr) for tr in st: # get rid of things that NaNs if np.sum(np.isnan(tr.data)) > 0: st.remove(tr) st.detrend() #plot_training_data_streams(st,sr) stout = np.zeros((len(st), sr * winsize + 1)) pors = np.zeros(len(st)) for ii in range(len(st)): stout[ii, :] = st[ii].data if st[ii].stats.location == 'P': pors[ii] = 0 if st[ii].stats.location == 'S': pors[ii] = 1 return stout, pors