def test_get_waveform(buffer_mb): formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') handler = logging.FileHandler('%s/a_%d.txt' % (tempdir, int(buffer_mb)), mode='w') handler.setFormatter(formatter) logger = logging.getLogger('test') logger.setLevel(logging.DEBUG) logger.addHandler(handler) fds = FederatedASDFDataSet(asdf_file_list, logger=logger, single_item_read_limit_in_mb=buffer_mb) rows = np.array( fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00')) for n, s, l, c in rows[:, 0:4]: wc = fds.get_waveform_count(n, s, l, c, '1900-01-01T00:00:00', '2100-01-01T00:00:00') stream = fds.get_waveforms(n, s, l, c, '1900-01-01T00:00:00', '2100-01-01T00:00:00', trace_count_threshold=1e4) assert wc == len(stream) logger.info('%s.%s: %d traces fetched' % (n, s, len(stream)))
def test_get_coordinates(): fds = FederatedASDFDataSet(asdf_file_list) rows = np.array(fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00')) station_set = set() for n, s in rows[:, 0:2]: station_set.add((n, s)) # we should have coordinates for each station assert len(fds.unique_coordinates) == len(station_set)
def test_get_stations(): fds = FederatedASDFDataSet(asdf_file_list) rows = np.array(fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00')) station_set = set() for n, s in rows[:, 0:2]: station_set.add((n, s)) # There are eight stations in the h5 file assert len(station_set) == 8
def test_get_local_net_sta_list(): fds = FederatedASDFDataSet(asdf_file_list) local_netsta_list = list(fds.local_net_sta_list()) rows = np.array(fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00')) # Get a list of unique stations stations = set() for n, s in rows[:,0:2]: stations.add((n, s)) # end for # On serial runs, all stations should be allocated to rank 0 assert len(local_netsta_list) == len(stations)
def test_db_integrity(): fds = FederatedASDFDataSet(asdf_file_list) # get number of waveforms from the db directly conn = sqlite3.connect(fds.fds.db_fn) query = 'select count(*) from wdb;' db_waveform_count = conn.execute(query).fetchall()[0][0] # fetch waveform counts for each unique combination of net, sta, loc, cha waveform_count = 0 rows = fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00') for row in rows: n, s, l, c, _, _ = row waveform_count += fds.get_waveform_count(n, s, l, c, '1900:01:01T00:00:00', '2100:01:01T00:00:00') # end for assert waveform_count == db_waveform_count
def process(asdf_source, start_time, end_time, net, sta, cha, output_basename): """ ASDF_SOURCE: Text file containing a list of paths to ASDF files\n START_TIME: Start time in UTCDateTime format\n END_TIME: End time in UTCDateTime format\n NET: Network name\n STA: Station name ('*' for all stations; note that * must be in quotation marks)\n CHA: Channel name ('*' for all channels; note that * must be in quotation marks) \n OUTPUT_BASENAME: Basename of output file Example usage: mpirun -np 112 python plot_data_quality.py asdf_files.txt 1980:01:01 2020:01:01 OA '*' '*' data_quality.oa """ start_time = UTCDateTime(start_time) end_time = UTCDateTime(end_time) if (sta == '*'): sta = None if (cha == '*'): cha = None comm = MPI.COMM_WORLD nproc = comm.Get_size() rank = comm.Get_rank() l = setup_logger(name=output_basename, log_file='%s.log' % output_basename) fds = FederatedASDFDataSet(asdf_source, logger=l) stations = [] if rank == 0: stations = fds.get_stations(start_time, end_time, network=net, station=sta, channel=cha) stations = split_list(sorted(stations), nproc) # end if stations = comm.bcast(stations, root=0) results = process_data(rank, fds, sorted(stations[rank]), start_time, end_time) results = comm.gather(results, root=0) if rank == 0: results = [item for sublist in results for item in sublist] # flatten sublists for each proc stations = [item for sublist in stations for item in sublist] # flatten sublists for each proc plot_results(stations, results, output_basename)
def test_get_global_time_range(): fds = FederatedASDFDataSet(asdf_file_list) rows = np.array(fds.get_stations('1900-01-01T00:00:00', '2100-01-01T00:00:00')) station_set = set() for n, s in rows[:, 0:2]: station_set.add((n, s)) minlist =[] maxlist = [] for (n, s) in station_set: min, max = fds.get_global_time_range(n, s) minlist.append(min) maxlist.append(max) # end for min = UTCDateTime(np.array(minlist).min()) max = UTCDateTime(np.array(maxlist).max()) # Ensure aggregate min/max to corresponding values in the db assert min == UTCDateTime('2000-01-01T00:00:00.000000Z') assert max == UTCDateTime('2002-01-01T00:00:00.000000Z')
def process(asdf_source, event_folder, output_path, min_magnitude, restart, save_quality_plots): """ ASDF_SOURCE: Text file containing a list of paths to ASDF files EVENT_FOLDER: Path to folder containing event files\n OUTPUT_PATH: Output folder \n """ comm = MPI.COMM_WORLD nproc = comm.Get_size() rank = comm.Get_rank() proc_workload = None if (rank == 0): def outputConfigParameters(): # output config parameters fn = 'pick.%s.cfg' % (datetime.now().strftime('%Y-%m-%d-%H-%M-%S')) fn = os.path.join(output_path, fn) f = open(fn, 'w+') f.write('Parameter Values:\n\n') f.write('%25s\t\t: %s\n' % ('ASDF_SOURCE', asdf_source)) f.write('%25s\t\t: %s\n' % ('EVENT_FOLDER', event_folder)) f.write('%25s\t\t: %s\n' % ('OUTPUT_PATH', output_path)) f.write('%25s\t\t: %s\n' % ('MIN_MAGNITUDE', min_magnitude)) f.write('%25s\t\t: %s\n' % ('RESTART_MODE', 'TRUE' if restart else 'FALSE')) f.write('%25s\t\t: %s\n' % ('SAVE_PLOTS', 'TRUE' if save_quality_plots else 'FALSE')) f.close() # end func outputConfigParameters() # end if # ================================================== # Create output-folder for snr-plots # ================================================== plot_output_folder = None if (save_quality_plots): plot_output_folder = os.path.join(output_path, 'plots') if (rank == 0): if (not os.path.exists(plot_output_folder)): os.mkdir(plot_output_folder) # end if comm.Barrier() # end if # ================================================== # Read catalogue and retrieve origin times # ================================================== cat = CatalogCSV(event_folder) events = cat.get_events() originTimestamps = cat.get_preferred_origin_timestamps() # ================================================== # Create lists of pickers for both p- and s-arrivals # ================================================== sigmalist = np.arange(8, 3, -1) pickerlist_p = [] pickerlist_s = [] for sigma in sigmalist: picker_p = aicdpicker.AICDPicker(t_ma=5, nsigma=sigma, t_up=1, nr_len=5, nr_coeff=2, pol_len=10, pol_coeff=10, uncert_coeff=3) picker_s = aicdpicker.AICDPicker(t_ma=15, nsigma=sigma, t_up=1, nr_len=5, nr_coeff=2, pol_len=10, pol_coeff=10, uncert_coeff=3) pickerlist_p.append(picker_p) pickerlist_s.append(picker_s) # end for # ================================================== # Define theoretical model # Instantiate data-access object # Retrieve estimated workload # ================================================== taupyModel = TauPyModel(model='iasp91') fds = FederatedASDFDataSet(asdf_source, use_json_db=False, logger=None) workload = getWorkloadEstimate(fds, originTimestamps) # ================================================== # Define output header and open output files # depending on the mode of operation (fresh/restart) # ================================================== header = '#eventID originTimestamp mag originLon originLat originDepthKm net sta cha pickTimestamp stationLon stationLat az baz distance ttResidual snr qualityMeasureCWT domFreq qualityMeasureSlope bandIndex nSigma\n' ofnp = os.path.join(output_path, 'p_arrivals.%d.txt' % (rank)) ofns = os.path.join(output_path, 's_arrivals.%d.txt' % (rank)) ofp = None ofs = None if (restart == False): ofp = open(ofnp, 'w+') ofs = open(ofns, 'w+') ofp.write(header) ofs.write(header) else: ofp = open(ofnp, 'a+') ofs = open(ofns, 'a+') # end if progTracker = ProgressTracker(output_folder=output_path, restart_mode=restart) totalTraceCount = 0 for nc, sc, start_time, end_time in fds.local_net_sta_list(): day = 24 * 3600 dayCount = 0 curr = start_time traceCountP = 0 pickCountP = 0 traceCountS = 0 pickCountS = 0 sw_start = datetime.now() step = day while (curr < end_time): if (curr + step > end_time): step = end_time - curr # end if eventIndices = (np.where((originTimestamps >= curr.timestamp) & \ (originTimestamps <= (curr + day).timestamp)))[0] if (eventIndices.shape[0] > 0): totalTraceCount += 1 stations = fds.get_stations(curr, curr + day, network=nc, station=sc) stations_zch = [s for s in stations if 'Z' in s[3]] # only Z channels stations_nch = [ s for s in stations if 'N' in s[3] or '1' in s[3] ] # only N channels stations_ech = [ s for s in stations if 'E' in s[3] or '2' in s[3] ] # only E channels for codes in stations_zch: if (progTracker.increment()): pass else: continue st = fds.get_waveforms(codes[0], codes[1], codes[2], codes[3], curr, curr + step, automerge=True, trace_count_threshold=200) if (len(st) == 0): continue dropBogusTraces(st) slon, slat = codes[4], codes[5] for ei in eventIndices: event = events[ei] po = event.preferred_origin da = gps2dist_azimuth(po.lat, po.lon, slat, slon) mag = None if (event.preferred_magnitude): mag = event.preferred_magnitude.magnitude_value elif (len(po.magnitude_list)): mag = po.magnitude_list[0].magnitude_value if (mag == None): mag = np.NaN if (np.isnan(mag) or mag < min_magnitude): continue result = extract_p( taupyModel, pickerlist_p, event, slon, slat, st, plot_output_folder=plot_output_folder) if (result): picklist, residuallist, snrlist, bandindex, pickerindex = result arcdistance = kilometers2degrees(da[0] / 1e3) for ip, pick in enumerate(picklist): line = '%s %f %f %f %f %f ' \ '%s %s %s %f %f %f ' \ '%f %f %f ' \ '%f %f %f %f %f '\ '%d %d\n' % (event.public_id, po.utctime.timestamp, mag, po.lon, po.lat, po.depthkm, codes[0], codes[1], codes[3], pick.timestamp, slon, slat, da[1], da[2], arcdistance, residuallist[ip], snrlist[ip, 0], snrlist[ip, 1], snrlist[ip, 2], snrlist[ip, 3], bandindex, sigmalist[pickerindex]) ofp.write(line) # end for ofp.flush() pickCountP += 1 # end if if (len(stations_nch) == 0 and len(stations_ech) == 0): result = extract_s( taupyModel, pickerlist_s, event, slon, slat, st, None, da[2], plot_output_folder=plot_output_folder) if (result): picklist, residuallist, snrlist, bandindex, pickerindex = result arcdistance = kilometers2degrees(da[0] / 1e3) for ip, pick in enumerate(picklist): line = '%s %f %f %f %f %f ' \ '%s %s %s %f %f %f ' \ '%f %f %f ' \ '%f %f %f %f %f ' \ '%d %d\n' % (event.public_id, po.utctime.timestamp, mag, po.lon, po.lat, po.depthkm, codes[0], codes[1], codes[3], pick.timestamp, slon, slat, da[1], da[2], arcdistance, residuallist[ip], snrlist[ip, 0], snrlist[ip, 1], snrlist[ip, 2], snrlist[ip, 3], bandindex, sigmalist[pickerindex]) ofs.write(line) # end for ofs.flush() pickCountS += 1 # end if # end if # end for traceCountP += len(st) # end for if (len(stations_nch) > 0 and len(stations_nch) == len(stations_ech)): for codesn, codese in zip(stations_nch, stations_ech): if (progTracker.increment()): pass else: continue stn = fds.get_waveforms(codesn[0], codesn[1], codesn[2], codesn[3], curr, curr + step, automerge=True, trace_count_threshold=200) ste = fds.get_waveforms(codese[0], codese[1], codese[2], codese[3], curr, curr + step, automerge=True, trace_count_threshold=200) dropBogusTraces(stn) dropBogusTraces(ste) if (len(stn) == 0): continue if (len(ste) == 0): continue slon, slat = codesn[4], codesn[5] for ei in eventIndices: event = events[ei] po = event.preferred_origin da = gps2dist_azimuth(po.lat, po.lon, slat, slon) mag = None if (event.preferred_magnitude): mag = event.preferred_magnitude.magnitude_value elif (len(po.magnitude_list)): mag = po.magnitude_list[0].magnitude_value if (mag == None): mag = np.NaN if (np.isnan(mag) or mag < min_magnitude): continue result = extract_s( taupyModel, pickerlist_s, event, slon, slat, stn, ste, da[2], plot_output_folder=plot_output_folder) if (result): picklist, residuallist, snrlist, bandindex, pickerindex = result arcdistance = kilometers2degrees(da[0] / 1e3) for ip, pick in enumerate(picklist): line = '%s %f %f %f %f %f ' \ '%s %s %s %f %f %f ' \ '%f %f %f ' \ '%f %f %f %f %f ' \ '%d %d\n' % (event.public_id, po.utctime.timestamp, mag, po.lon, po.lat, po.depthkm, codesn[0], codesn[1], '00T', pick.timestamp, slon, slat, da[1], da[2], arcdistance, residuallist[ip], snrlist[ip, 0], snrlist[ip, 1], snrlist[ip, 2], snrlist[ip, 3], bandindex, sigmalist[pickerindex]) ofs.write(line) # end for ofs.flush() pickCountS += 1 # end if # end for traceCountS += (len(stn) + len(ste)) # end for # end if # end if curr += step dayCount += 1 # wend sw_stop = datetime.now() totalTime = (sw_stop - sw_start).total_seconds() gc.collect() print '(Rank %d: %5.2f%%, %d/%d) Processed %d traces and found %d p-arrivals and %d s-arrivals for ' \ 'network %s station %s in %f s. Memory usage: %5.2f MB.' % \ (rank, (float(totalTraceCount) / float(workload) * 100) if workload > 0 else 100, totalTraceCount, workload, traceCountP + traceCountS, pickCountP, pickCountS, nc, sc, totalTime, round(psutil.Process().memory_info().rss / 1024. / 1024., 2)) # end for ofp.close() ofs.close() print 'Processing complete on rank %d' % (rank) del fds
from obspy import UTCDateTime, read_events, read_inventory from obspy.taup.taup_geo import calc_dist from obspy.clients.iris import Client as IrisClient from obspy.clients.fdsn import Client from obspy.taup import TauPyModel from obspy.signal.trigger import trigger_onset, z_detect, classic_sta_lta, recursive_sta_lta, ar_pick from obspy.signal.rotate import rotate_ne_rt from obspy.core.event import Pick, CreationInfo, WaveformStreamID, ResourceIdentifier, Arrival, Event, Origin, Arrival, \ OriginQuality, Magnitude, Comment fds = FederatedASDFDataSet( '/g/data/ha3/Passive/SHARED_DATA/Index/asdf_files.txt', logger=None) stations = fds.get_stations('2009-05-17T00:00:00', '2009-05-18T00:00:00', station='QLP') print(stations) s = fds.get_waveforms('AU', 'QLP', '', 'BHE', '2011-03-15T00:00:00', '2011-03-16T00:00:00', trace_count_threshold=10) print(s) if len(s) > 0: print("Plotting traces")