def preprocess(db, stations, comps, goal_day, params, responses=None): """ Fetches data for each ``stations`` and each ``comps`` using the data_availability table in the database. To correct for instrument responses, make sure to set ``remove_response`` to "Y" in the config and to provide the ``responses`` DataFrame. :Example: >>> from msnoise.api import connect, get_params, preload_instrument_responses >>> from msnoise.preprocessing import preprocess >>> db = connect() >>> params = get_params(db) >>> responses = preload_instrument_responses(db) >>> st = preprocess(db, ["YA.UV06","YA.UV10"], ["Z",], "2010-09-01", params, responses) >>> st 2 Trace(s) in Stream: YA.UV06.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples YA.UV10.00.HHZ | 2010-09-01T00:00:00.000000Z - 2010-09-01T23:59:59.950000Z | 20.0 Hz, 1728000 samples :type db: :class:`sqlalchemy.orm.session.Session` :param db: A :class:`~sqlalchemy.orm.session.Session` object, as obtained by :func:`msnoise.api.connect`. :type stations: list of str :param stations: a list of station names, in the format NET.STA. :type comps: list of str :param comps: a list of component names, in Z,N,E,1,2. :type goal_day: str :param goal_day: the day of data to load, ISO 8601 format: e.g. 2016-12-31. :type params: class :param params: an object containing the config parameters, as obtained by :func:`msnoise.api.get_params`. :type responses: :class:`pandas.DataFrame` :param responses: a DataFrame containing the instrument responses, as obtained by :func:`msnoise.api.preload_instrument_responses`. :rtype: :class:`obspy.core.stream.Stream` :return: A Stream object containing all traces. """ datafiles = {} output = Stream() MULTIPLEX = False MULTIPLEX_files = {} for station in stations: datafiles[station] = {} net, sta, loc = station.split('.') gd = datetime.datetime.strptime(goal_day, '%Y-%m-%d') files = get_data_availability(db, net=net, sta=sta, loc=loc, starttime=gd, endtime=gd) for comp in comps: datafiles[station][comp] = [] for file in files: if file.sta != "MULTIPLEX": if file.chan[-1] not in comps: continue fullpath = os.path.join(file.path, file.file) datafiles[station][file.chan[-1]].append(fullpath) else: MULTIPLEX = True print("Mutliplex mode, reading the files") fullpath = os.path.join(file.path, file.file) multiplexed = sorted(glob.glob(fullpath)) for comp in comps: for fn in multiplexed: if fn in MULTIPLEX_files: _ = MULTIPLEX_files[fn] else: # print("Reading %s" % fn) _ = read(fn, format=params.archive_format or None) traces = [] for tr in _: if "%s.%s" % ( tr.stats.network, tr.stats.station ) in stations and tr.stats.channel[-1] in comps: traces.append(tr) del _ _ = Stream(traces=traces) MULTIPLEX_files[fn] = _ datafiles[station][comp].append(_) for istation, station in enumerate(stations): net, sta, loc = station.split(".") for comp in comps: files = eval("datafiles['%s']['%s']" % (station, comp)) if len(files) != 0: logger.debug("%s.%s Reading %i Files" % (station, comp, len(files))) traces = [] for file in files: if isinstance(file, Stream): st = file.select(network=net, station=sta, component=comp).copy() else: try: # print("Reading %s" % file) # t= time.time() st = read(file, dytpe=np.float, starttime=UTCDateTime(gd), endtime=UTCDateTime(gd) + 86400, station=sta, format=params.archive_format or None) # print("done in", time.time()-t) except: logger.debug("ERROR reading file %s" % file) # TODO add traceback (optional?) continue for tr in st: if len(tr.stats.channel) == 2: tr.stats.channel += tr.stats.location tr.stats.location = "00" tmp = st.select(network=net, station=sta, component=comp) if not len(tmp): for tr in st: tr.stats.network = net st = st.select(network=net, station=sta, component=comp) else: st = tmp for tr in st: tr.data = tr.data.astype(np.float) tr.stats.network = tr.stats.network.upper() tr.stats.station = tr.stats.station.upper() tr.stats.channel = tr.stats.channel.upper() if tr.stats.location == "": tr.stats.location = "--" traces.append(tr) del st stream = Stream(traces=traces) if not (len(stream)): continue f = io.BytesIO() stream.write(f, format='MSEED') f.seek(0) stream = read(f, format="MSEED") stream.sort() # try: # # HACK not super clean... should find a way to prevent the # # same trace id with different sps to occur # stream.merge(method=1, interpolation_samples=3, fill_value=None) # except: # logger.debug("Error while merging...") # traceback.print_exc() # continue # stream = stream.split() if not len(stream): continue logger.debug("%s Checking sample alignment" % stream[0].id) for i, trace in enumerate(stream): stream[i] = check_and_phase_shift( trace, params.preprocess_taper_length) logger.debug("%s Checking Gaps" % stream[0].id) if len(getGaps(stream)) > 0: max_gap = params.preprocess_max_gap * stream[ 0].stats.sampling_rate gaps = getGaps(stream) while len(gaps): too_long = 0 for gap in gaps: if int(gap[-1]) <= max_gap: try: stream[gap[0]] = stream[gap[0]].__add__( stream[gap[1]], method=1, fill_value="interpolate") stream.remove(stream[gap[1]]) except: stream.remove(stream[gap[1]]) break else: too_long += 1 if too_long == len(gaps): break gaps = getGaps(stream) del gaps stream = stream.split() for tr in stream: if tr.stats.sampling_rate < (params.goal_sampling_rate - 1): stream.remove(tr) taper_length = params.preprocess_taper_length # seconds for trace in stream: if trace.stats.npts < (4 * taper_length * trace.stats.sampling_rate): stream.remove(trace) else: trace.detrend(type="demean") trace.detrend(type="linear") trace.taper(max_percentage=None, max_length=taper_length) if not len(stream): logger.debug(" has only too small traces, skipping...") continue for trace in stream: logger.debug("%s Highpass at %.2f Hz" % (trace.id, params.preprocess_highpass)) trace.filter("highpass", freq=params.preprocess_highpass, zerophase=True, corners=4) if trace.stats.sampling_rate != params.goal_sampling_rate: logger.debug("%s Lowpass at %.2f Hz" % (trace.id, params.preprocess_lowpass)) trace.filter("lowpass", freq=params.preprocess_lowpass, zerophase=True, corners=8) if params.resampling_method == "Resample": logger.debug("%s Downsample to %.1f Hz" % (trace.id, params.goal_sampling_rate)) trace.data = resample( trace.data, params.goal_sampling_rate / trace.stats.sampling_rate, 'sinc_fastest') elif params.resampling_method == "Decimate": decimation_factor = trace.stats.sampling_rate / params.goal_sampling_rate if not int(decimation_factor) == decimation_factor: logger.warning( "%s CANNOT be decimated by an integer factor, consider using Resample or Lanczos methods" " Trace sampling rate = %i ; Desired CC sampling rate = %i" % (trace.id, trace.stats.sampling_rate, params.goal_sampling_rate)) sys.stdout.flush() sys.exit() logger.debug("%s Decimate by a factor of %i" % (trace.id, decimation_factor)) trace.data = trace.data[::int(decimation_factor)] elif params.resampling_method == "Lanczos": logger.debug("%s Downsample to %.1f Hz" % (trace.id, params.goal_sampling_rate)) trace.data = np.array(trace.data) trace.interpolate( method="lanczos", sampling_rate=params.goal_sampling_rate, a=1.0) trace.stats.sampling_rate = params.goal_sampling_rate del trace if params.remove_response: logger.debug('%s Removing instrument response' % stream[0].id) try: stream.attach_response(responses) stream.remove_response( pre_filt=params.response_prefilt, taper=False) except: logger.error("Bad or no instrument response " "information for %s, skipping" % stream[0].id) continue for tr in stream: tr.data = tr.data.astype(np.float32) output += stream del stream del files clean_scipy_cache() del MULTIPLEX_files return output
etime = stime + 120 client = Client() inv = client.get_stations(network=net, station=sta, starttime=stime, endtime=etime, channel=chan, level="response") st = Stream() st += client.get_waveforms(net, sta, loc, chan, stime, etime) st.detrend('constant') st.merge(fill_value=0) st.attach_response(inv) st.remove_response(output="DISP") #st.rotate(method="->ZNE",inventory=inv) st.filter("bandpass", freqmin=.5, freqmax=5) tr = st[0] t = np.linspace(0, (tr.stats.npts - 1) / tr.stats.sampling_rate, num=tr.stats.npts) fig = plt.figure(1, figsize=(12, 12)) plt.ylabel('Displacement (mm)', fontsize=14) plt.xlim([0, 120]) plt.ylim([-.02, .02]) plt.xlabel('seconds after origin', fontsize=14) plt.title('%s-%s-%s-%s, 2019-08-16, Hutchinson, KS Earthquake' % (net, sta, loc, chan), fontsize=14)
def getChannelWaveformFiles (network, station, location, channel, starttime, endtime, removeTrend, performInstrumentCorrection,applyScale,deconFilter1, deconFilter2, deconFilter3, deconFilter4, waterLevel, unit, client, fileTag, respdir = None, inventory = None): debug = False # # stream holds the final stream # thisStartTime = UTCDateTime(starttime) thisEndTime = UTCDateTime(endtime) stream = Stream() streamIn = Stream() try: # # read in the files to a stream # print("[INFO] checking:",fileTag) if performInstrumentCorrection: streamIn = read(fileTag, starttime=thisStartTime, endtime=thisEndTime, nearest_sample=True, apply_calib=False ) else: print("[INFO] Apply scaling") streamIn = read(fileTag, starttime=thisStartTime, endtime=thisEndTime, nearest_sample=True, apply_calib=applyScale ) #print "STREAM IN",fileTag, starttime, endtime except Exception as e: print(str(e)) print("[ERRORnnelWaveformFile] ",network, station, location, channel, starttime, endtime) return(None) try: # # select the desire streams only # if location == "--": streamOut = streamIn.select(network=network, station=station, location="", channel=channel) else: streamOut = streamIn.select(network=network, station=station, location=location, channel=channel) for i in range(len(streamOut)): if performInstrumentCorrection: # # get the network, station, location and channel information # (thisNSLC,thisTime,junk) = str(streamOut[i]).split('|') (net,sta,loc,chan) = thisNSLC.strip().split('.') if len(loc) == 0: loc = "--" # # if respdir is defined, first look into user's respdir for stationXML files, if not found get it from FDSN # (start,end) = thisTime.split(' - ') inv = None if ( respdir is not None): print("[INFO] Getting response from", respdir) thisloc = loc if loc == '--': thisloc = '' inventory, inv = getResponseFromFile (inventory, respdir, net, sta, thisloc, chan, starttime, debug) if inv is not None: if debug: print("[INFO]: Attaching",inv) streamOut[i].attach_response(inv) stream += streamOut[i] else: thisStarttime = UTCDateTime(start.strip()) print("NO RESPONSE FILE:",net,sta,loc,chan,thisStarttime) if (inv is None and client is not None): # # The FDSN webservices return StationXML metadata. # print("[INFO] Getting response from IRIS") try: thisStarttime = UTCDateTime(start.strip()) thisEndtime = UTCDateTime(end.strip()) inv = client.get_stations(network=net,station=sta,location=loc,channel=chan,starttime=thisStarttime,endtime=thisEndtime,level="response") streamOut[i].attach_response(inv) stream += streamOut[i] if debug: print("[INFO] Response attached:",inv) except Exception as e: print(str(e)) thisStarttime = UTCDateTime(start.strip()) print("NO RESPONSE:",net,sta,loc,chan,thisStarttime,thisEndtime) continue else: print("[INFO] Response not removed") stream += streamOut[i] # # print stream Gap information # # print "\n\nSTREAM GAP INFORMATION:\n" #stream.printGaps() if(removeTrend > 0): stream.detrend("demean") # # remove the instrument response # if performInstrumentCorrection: print("[INFO] PERFORM INSTRUMENT CORRECTION",unit) if deconFilter1<=0 and deconFilter2<=0 and deconFilter3<=0 and deconFilter4<=0: print("[INFO] NO DECON FILTER APPLIED") stream.remove_response(output=unit,pre_filt=None, zero_mean=False, taper=False,water_level=waterLevel) else: stream.remove_response(output=unit,pre_filt=[deconFilter1,deconFilter2,deconFilter3,deconFilter4], zero_mean=False, taper=False,water_level=waterLevel) except Exception as e: print(str(e)) print("[ERROR] get_waveforms",network, station, location, channel, starttime, endtime) return(None,None) return(inventory,stream)
st2 += client.get_waveforms("BK", "*", "*", "HH*", start, end, attach_response=True) #%% ## Get the sampling rate: samprate = st2[0].stats['sampling_rate'] print('Remove Response') ## Make the prefilt for the instrment response - AT.SIT is @ 50Hz so 25 is nyquist prefilt = (0.005, 0.006, ((samprate / 2) - 5), (samprate / 2) ) ## this is 20 to 25 at the end st2.remove_response( output='VEL', pre_filt=prefilt) ## The units of data are now Velocity, m/s #%% print('Plot Raw data') ## Plot this unfiltered (except for instrument response) data: plt.figure(figsize=(12, 6)) plt.plot(st2[0].times(), st2[0].data) plt.xlabel('Time from ') plt.ylabel('Velocity (m/s)') plt.title('Instrument Response Removed, Unfiltered, \n AT station SIT') #plt.savefig(data_dir + 'at.sit_unfilt.png') print('Write unfiltered response removed') ## Write the data (instr resp removed) to a SAC file, m/s #st2.write(data_dir+'at.sit_unfilt.sac',format='SAC') #%% print('Filter')
def main(args=None): if args is None: # Run Input Parser args = get_event_arguments() # Load Database # stdb>0.1.3 try: db, stkeys = stdb.io.load_db(fname=args.indb, keys=args.stkeys) # stdb=0.1.3 except: db = stdb.io.load_db(fname=args.indb) # Construct station key loop allkeys = db.keys() sorted(allkeys) # Extract key subset if len(args.stkeys) > 0: stkeys = [] for skey in args.stkeys: stkeys.extend([s for s in allkeys if skey in s]) else: stkeys = db.keys() sorted(stkeys) # Loop over station keys for stkey in list(stkeys): # Extract station information from dictionary sta = db[stkey] # Define path to see if it exists eventpath = Path('EVENTS') / Path(stkey) if not eventpath.is_dir(): print('Path to ' + str(eventpath) + ' doesn`t exist - creating it') eventpath.mkdir(parents=True) # Establish client if len(args.UserAuth) == 0: client = Client(args.Server) else: client = Client(args.Server, user=args.UserAuth[0], password=args.UserAuth[1]) # Get catalogue search start time if args.startT is None: tstart = sta.startdate else: tstart = args.startT # Get catalogue search end time if args.endT is None: tend = sta.enddate else: tend = args.endT if tstart > sta.enddate or tend < sta.startdate: continue # Temporary print locations tlocs = sta.location if len(tlocs) == 0: tlocs = [''] for il in range(0, len(tlocs)): if len(tlocs[il]) == 0: tlocs[il] = "--" sta.location = tlocs # Update Display print(" ") print(" ") print("|===============================================|") print("|===============================================|") print("| {0:>8s} |".format( sta.station)) print("|===============================================|") print("|===============================================|") print("| Station: {0:>2s}.{1:5s} |".format( sta.network, sta.station)) print("| Channel: {0:2s}; Locations: {1:15s} |".format( sta.channel, ",".join(tlocs))) print("| Lon: {0:7.2f}; Lat: {1:6.2f} |".format( sta.longitude, sta.latitude)) print("| Start time: {0:19s} |".format( sta.startdate.strftime("%Y-%m-%d %H:%M:%S"))) print("| End time: {0:19s} |".format( sta.enddate.strftime("%Y-%m-%d %H:%M:%S"))) print("|-----------------------------------------------|") print("| Searching Possible events: |") print("| Start: {0:19s} |".format( tstart.strftime("%Y-%m-%d %H:%M:%S"))) print("| End: {0:19s} |".format( tend.strftime("%Y-%m-%d %H:%M:%S"))) if args.maxmag is None: print("| Mag: >{0:3.1f}".format(args.minmag) + " |") else: print("| Mag: {0:3.1f} - {1:3.1f}".format( args.minmag, args.maxmag) + " |") print("| ... |") # Get catalogue using deployment start and end cat = client.get_events(starttime=tstart, endtime=tend, minmagnitude=args.minmag, maxmagnitude=args.maxmag) # Total number of events in Catalogue nevK = 0 nevtT = len(cat) print("| Found {0:5d}".format(nevtT) + " possible events |") ievs = range(0, nevtT) # Select order of processing if args.reverse: ievs = range(0, nevtT) else: ievs = range(nevtT - 1, -1, -1) # Read through catalogue for iev in ievs: # Extract event ev = cat[iev] window = 7200. new_sampling_rate = 5. time = ev.origins[0].time dep = ev.origins[0].depth lon = ev.origins[0].longitude lat = ev.origins[0].latitude epi_dist, az, baz = epi(lat, lon, sta.latitude, sta.longitude) epi_dist /= 1000. gac = k2d(epi_dist) mag = ev.magnitudes[0].mag if mag is None: mag = -9. # If distance between 85 and 120 deg: if (gac > args.mindist and gac < args.maxdist): # Display Event Info nevK = nevK + 1 if args.reverse: inum = iev + 1 else: inum = nevtT - iev + 1 print(" ") print("****************************************************") print("* #{0:d} ({1:d}/{2:d}): {3:13s}".format( nevK, inum, nevtT, time.strftime("%Y%m%d_%H%M%S"))) print("* Origin Time: " + time.strftime("%Y-%m-%d %H:%M:%S")) print("* Lat: {0:6.2f}; Lon: {1:7.2f}".format(lat, lon)) print("* Dep: {0:6.2f}; Mag: {1:3.1f}".format( dep / 1000., mag)) print("* Dist: {0:7.2f} km; {1:7.2f} deg".format( epi_dist, gac)) t1 = time t2 = t1 + window # Time stamp tstamp = str(time.year).zfill(4)+'.' + \ str(time.julday).zfill(3)+'.' tstamp = tstamp + str(time.hour).zfill(2) + \ '.'+str(time.minute).zfill(2) # Define file names (to check if files already exist) filename = eventpath / (tstamp + '.event.pkl') file1 = eventpath / (tstamp + '.1.SAC') file2 = eventpath / (tstamp + '.2.SAC') fileZ = eventpath / (tstamp + '.Z.SAC') fileP = eventpath / (tstamp + '.P.SAC') print() print("* Channels selected: " + str(args.channels) + ' and vertical') # If data file exists, continue if filename.exists(): if not args.ovr: print("*") print("* " + str(filename)) print("* -> File already exists, continuing") continue if "P" not in args.channels: # Number of channels ncomp = 3 # Comma-separated list of channels for Client channels = sta.channel.upper() + '1,' + \ sta.channel.upper() + '2,' + \ sta.channel.upper() + 'Z' # Get waveforms from client try: print("* " + tstamp + " ") print("* -> Downloading Seismic data... ") sth = client.get_waveforms(network=sta.network, station=sta.station, location=sta.location[0], channel=channels, starttime=t1, endtime=t2, attach_response=True) print("* ...done") except: print(" Error: Unable to download ?H? components - " + "continuing") continue st = sth elif "H" not in args.channels: # Number of channels ncomp = 2 # Comma-separated list of channels for Client channels = sta.channel.upper() + 'Z' # Get waveforms from client try: print("* " + tstamp + " ") print("* -> Downloading Seismic data... ") sth = client.get_waveforms(network=sta.network, station=sta.station, location=sta.location[0], channel=channels, starttime=t1, endtime=t2, attach_response=True) print("* ...done") except: print(" Error: Unable to download ?H? components - " + "continuing") continue try: print("* -> Downloading Pressure data...") stp = client.get_waveforms(network=sta.network, station=sta.station, location=sta.location[0], channel='?DH', starttime=t1, endtime=t2, attach_response=True) print("* ...done") if len(stp) > 1: print("WARNING: There are more than one ?DH trace") print("* -> Keeping the highest sampling rate") if stp[0].stats.sampling_rate > \ stp[1].stats.sampling_rate: stp = Stream(traces=stp[0]) else: stp = Stream(traces=stp[1]) except: print(" Error: Unable to download ?DH component - " + "continuing") continue st = sth + stp else: # Comma-separated list of channels for Client ncomp = 4 # Comma-separated list of channels for Client channels = sta.channel.upper() + '1,' + \ sta.channel.upper() + '2,' + \ sta.channel.upper() + 'Z' # Get waveforms from client try: print("* " + tstamp + " ") print("* -> Downloading Seismic data... ") sth = client.get_waveforms(network=sta.network, station=sta.station, location=sta.location[0], channel=channels, starttime=t1, endtime=t2, attach_response=True) print("* ...done") except: print(" Error: Unable to download ?H? components - " + "continuing") continue try: print("* -> Downloading Pressure data...") stp = client.get_waveforms(network=sta.network, station=sta.station, location=sta.location[0], channel='?DH', starttime=t1, endtime=t2, attach_response=True) print(" ...done") if len(stp) > 1: print("WARNING: There are more than one ?DH trace") print("* -> Keeping the highest sampling rate") if stp[0].stats.sampling_rate > \ stp[1].stats.sampling_rate: stp = Stream(traces=stp[0]) else: stp = Stream(traces=stp[1]) except: print(" Error: Unable to download ?DH component - " + "continuing") continue st = sth + stp # Detrend, filter st.detrend('demean') st.detrend('linear') st.filter('lowpass', freq=0.5 * args.new_sampling_rate, corners=2, zerophase=True) st.resample(args.new_sampling_rate) # Check streams is_ok, st = utils.QC_streams(t1, t2, st) if not is_ok: continue sth = st.select(component='1') + st.select(component='2') + \ st.select(component='Z') # Remove responses print("* -> Removing responses - Seismic data") sth.remove_response(pre_filt=args.pre_filt, output=args.units) # Extract traces - Z trZ = sth.select(component='Z')[0] trZ = utils.update_stats(trZ, sta.latitude, sta.longitude, sta.elevation, 'Z') trZ.write(str(fileZ), format='SAC') # Extract traces - H if "H" in args.channels: tr1 = sth.select(component='1')[0] tr2 = sth.select(component='2')[0] tr1 = utils.update_stats(tr1, sta.latitude, sta.longitude, sta.elevation, '1') tr2 = utils.update_stats(tr2, sta.latitude, sta.longitude, sta.elevation, '2') tr1.write(str(file1), format='SAC') tr2.write(str(file2), format='SAC') if "P" in args.channels: stp = st.select(component='H') print("* -> Removing responses - Pressure data") stp.remove_response(pre_filt=args.pre_filt) trP = stp[0] trP = utils.update_stats(trP, sta.latitude, sta.longitude, sta.elevation, 'P') trP.write(str(fileP), format='SAC') else: stp = Stream() # Write out SAC data eventstream = EventStream(sta, sth, stp, tstamp, lat, lon, time, window, args.new_sampling_rate, ncomp) eventstream.save(filename)