def read_sds(window): config = get_config() station = window['station'] starttime = window['starttime'] endtime = window['endtime'] + 0.1 client = Client(sds_root=config['SDS_ROOT']) stream = client.get_waveforms(network="*", station=station, location="*", channel="*", starttime=starttime, endtime=endtime) stream.sort(keys=['channel'], reverse=True) stream_list = {} for trace in stream: geophone_type = trace.stats.channel[0:2] if not stream_list.get(geophone_type): stream_list[geophone_type] = Stream(trace) else: stream_list[geophone_type].append(trace) return stream_list
def read_sds(pick, sds_root, phase="P", component="Z", trace_length=30, sample_rate=100): client = Client(sds_root=sds_root) if not pick.phase_hint == phase: return if not pick.waveform_id.channel_code[-1] == component: return t = pick.time t = t - trace_length + np.random.random_sample() * trace_length net = "*" sta = pick.waveform_id.station_code loc = "*" chan = "??" + component st = client.get_waveforms(net, sta, loc, chan, t, t + trace_length + 1) if st.traces: trace = st.traces[0] trace = signal_preprocessing(trace) points = trace_length * sample_rate + 1 trim_trace(trace, points) return trace
def write_station_dataset(dataset_output_dir, sds_root, nslc, start_time, end_time, trace_length=30, sample_rate=100, remove_dir=False): if remove_dir: shutil.rmtree(dataset_output_dir, ignore_errors=True) os.makedirs(dataset_output_dir, exist_ok=True) client = Client(sds_root=sds_root) net, sta, loc, chan = nslc t = start_time counter = 0 while t < end_time: stream = client.get_waveforms(net, sta, loc, chan, t, t + trace_length + 1) stream = signal_preprocessing(stream) points = trace_length * sample_rate + 1 for trace in stream: try: trim_trace(trace, points) except IndexError as err: print(err) stream.remove(trace) continue finally: trace.picks = [] time_stamp = trace.stats.starttime.isoformat() trace.write(dataset_output_dir + '/' + time_stamp + trace.get_id() + ".pkl", format="PICKLE") counter += 1 t += trace_length
def process(self, window, *args, **kwargs): station = window['station'] starttime = window['starttime'] endtime = window['endtime'] client = Client(sds_root=self.sds_root) stream = client.get_waveforms(network="*", station=station, location="*", channel="*", starttime=starttime, endtime=endtime) stream.sort(keys=['channel'], reverse=True) seismometer_list = {} for trace in stream: current_type = trace.stats.channel[0:2] if not seismometer_list.get(current_type): seismometer_list[current_type] = Stream(trace) else: seismometer_list[current_type].append(trace) for key, value in seismometer_list.items(): yield value
def test_read_from_SDS_with_wildcarded_seed_ids(self): """ Test reading data with wildcarded SEED IDs. """ year, doy = 2015, 1 t = UTCDateTime("%d-%03dT00:00:00" % (year, doy)) with TemporarySDSDirectory(year=year, doy=doy) as temp_sds: # test different wildcard combinations in SEED ID client = Client(temp_sds.tempdir) for wildcarded_seed_id, num_matching_ids in zip( ("AB.ZZZ3..HH?", "AB.ZZZ3..HH*", "*.*..HHZ", "*.*.*.HHZ", "*.*.*.*"), (3, 3, 4, 8, 48)): net, sta, loc, cha = wildcarded_seed_id.split(".") st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200) self.assertEqual(len(st), num_matching_ids) # test with SDS type wildcards for type_wildcard in ("*", "?"): net, sta, loc, cha = wildcarded_seed_id.split(".") st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200) self.assertEqual(len(st), num_matching_ids)
def test_read_from_sds_with_wildcarded_seed_ids(self): """ Test reading data with wildcarded SEED IDs. """ year, doy = 2015, 1 t = UTCDateTime("%d-%03dT00:00:00" % (year, doy)) with TemporarySDSDirectory(year=year, doy=doy) as temp_sds: # test different wildcard combinations in SEED ID client = Client(temp_sds.tempdir) for wildcarded_seed_id, num_matching_ids in zip( ("AB.ZZZ3..HH?", "AB.ZZZ3..HH*", "*.*..HHZ", "*.*.*.HHZ", "*.*.*.*"), (3, 3, 4, 8, 48)): net, sta, loc, cha = wildcarded_seed_id.split(".") st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200) self.assertEqual(len(st), num_matching_ids) # test with SDS type wildcards for type_wildcard in ("*", "?"): net, sta, loc, cha = wildcarded_seed_id.split(".") st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200) self.assertEqual(len(st), num_matching_ids)
def read_sds(event, sds_root, phase="P", component="Z", trace_length=30, sample_rate=100, random_time=0): stream = Stream() client = Client(sds_root=sds_root) for pick in event.picks: if not pick.phase_hint == phase: print("Skip " + pick.phase_hint + " phase pick") continue if not pick.waveform_id.channel_code[-1] == component: print(pick.waveform_id.channel_code) continue t = event.origins[0].time if pick.time > t + trace_length: t = pick.time - trace_length + 5 print("origin: " + t.isoformat() + " pick: " + pick.time.isoformat()) if random_time: t = t - random_time + np.random.random_sample() * random_time * 2 net = "*" sta = pick.waveform_id.station_code loc = "*" chan = "??" + component st = client.get_waveforms(net, sta, loc, chan, t, t + trace_length + 1) if st.traces: trace = st.traces[0] trace = signal_preprocessing(trace) points = trace_length * sample_rate + 1 try: trim_trace(trace, points) except Exception as err: print(err) continue stream += st.traces[0] else: print("No trace in ", t.isoformat(), net, sta, loc, chan) return stream
def test_read_from_sds(self): """ Test reading data across year and day breaks from SDS directory structure. Also tests looking for data on the wrong side of a day break (data usually get written for some seconds into the previous or next file around midnight). """ # test for one specific SEED ID, without wildcards seed_id = "AB.XYZ..HHZ" net, sta, loc, cha = seed_id.split(".") # use three different day breaks in the testing: # - normal day break during one year # (same directory, separate filenames) # - day break at end of year # (separate directories, separate filenames) # - leap-year # - non-leap-year for year, doy in ((2015, 123), (2015, 1), (2012, 1)): t = UTCDateTime("%d-%03dT00:00:00" % (year, doy)) with TemporarySDSDirectory(year=year, doy=doy) as temp_sds: # normal test reading across the day break client = Client(temp_sds.tempdir) st = client.get_waveforms(net, sta, loc, cha, t - 20, t + 20) self.assertEqual(len(st), 1) self.assertEqual(st[0].stats.starttime, t - 20) self.assertEqual(st[0].stats.endtime, t + 20) self.assertEqual(len(st[0]), 5) # test merge option st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200, merge=False) self.assertEqual(len(st), 2) st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200, merge=None) self.assertEqual(len(st), 2) st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200, merge=0) self.assertEqual(len(st), 1) # test reading data from a single day file # (data is in the file where it's expected) st = client.get_waveforms(net, sta, loc, cha, t - 80, t - 30) self.assertEqual(len(st), 1) # test reading data from a single day file # (data is in the dayfile of the previous day) st = client.get_waveforms(net, sta, loc, cha, t + 20, t + 40) self.assertEqual(len(st), 1) # test that format autodetection with `format=None` works client = Client(temp_sds.tempdir, format=None) st = client.get_waveforms(net, sta, loc, cha, t - 200, t + 200) self.assertEqual(len(st), 1)
def main(): # Arguments argu_parser = argparse.ArgumentParser( description="Run the main code to compute quality sheets for a set of STATIONS") argu_parser.add_argument("-s", "--stations", nargs='*', required=True, help='list of stations name Separate with spaces that must be in the station_dictionnary file ex: MEUD00 OBP10') argu_parser.add_argument("-b", "--starttime", default=UTCDateTime(2000, 1, 1), type=UTCDateTime, help="Start time for processing. Various format accepted. Example : 2012,2,1 / 2012-02-01 / 2012,032 / 2012032 / etc ... See UTCDateTime for a complete list. Default is 2010-1-1") argu_parser.add_argument("-e", "--endtime", default=UTCDateTime(2055, 9, 16), type=UTCDateTime, help="End time for processing. Various format accepted. Example : 2012,2,1 / 2012-02-01 / 2012,032 / 2012032 / etc ... See UTCDateTime for a complete list. Default is 2015-1-1") argu_parser.add_argument("-c", "--channels", nargs='+', help="Process only CHANNELS. Do not use this option if you want to process all available channels indicated in station_dictionnary. Separate CHANNELS with spaces. No wildcard. Default is all channels") argu_parser.add_argument("-pkl", "--path_pkl", default='./PKL', help="output directory for pkl files. Default is ./PKL ") argu_parser.add_argument("-plt", "--path_plt", default='./PLT', help="output directory for plt files. Default is ./PLT ") argu_parser.add_argument("-force_paz", default=False, action='store_true', help="Use this option if you want don't want to use the dataless file specified in station_dictionnary. Only PAZ response computed from sensor and digitizer will be used. Use for debug only. Dataless recommended") argu_parser.add_argument("--color_map", default='pqlx', help="Color map for PPSD. Default is pqlx") args = argu_parser.parse_args() # List of stations STA = args.stations chan_proc = args.channels # Time span start = args.starttime stop = args.endtime # Color Map if args.color_map == 'pqlx': cmap = pqlx elif args.color_map == 'viridis_white': from obspy.imaging.cm import viridis_white cmap = viridis_white elif args.color_map == 'viridis_white_r': from obspy.imaging.cm import viridis_white_r cmap = viridis_white_r else: cmap = get_cmap(args.color_map) # Output Paths PATH_PKL = os.path.abspath(args.path_pkl) PATH_PLT = os.path.abspath(args.path_plt) # Create PKL and PLT directory if they don't exist if not os.path.exists(PATH_PKL): os.makedirs(PATH_PKL) if not os.path.exists(PATH_PLT): os.makedirs(PATH_PLT) # ---------- # Check if all stations are in station_dictionnary for dict_station_name in STA: try: dict_station_name except: print dict_station_name + " is not in station_dictionnary.py" exit() try: eval(dict_station_name)['network'] except: print dict_station_name + " does not have a network in station_dictionnary.py" exit() try: eval(dict_station_name)['station'] except: print dict_station_name + " does not have a station in station_dictionnary.py" exit() try: eval(dict_station_name)['locid'] except: print dict_station_name + " does not have a locid in station_dictionnary.py" exit() try: eval(dict_station_name)['dataless_file'] # if there is a dataless_file, check if the file exists if not os.path.isfile(eval(dict_station_name)['dataless_file']): print dict_station_name + " does not have a valid dataless_file in station_dictionnary.py :" print eval(dict_station_name)['dataless_file'] + ' does not exist' exit() except: pass try: eval(dict_station_name)['path_data'] except: print dict_station_name + " does not have a path_data in station_dictionnary.py" exit() if not os.path.exists(eval(dict_station_name)['path_data']): print dict_station_name + " does not have a valid path_data in station_dictionnary.py :" print eval(dict_station_name)['path_data'] + ' does not exist' exit() # Loop over stations for dict_station_name in STA: net = eval(dict_station_name)['network'] sta = eval(dict_station_name)['station'] locid = eval(dict_station_name)['locid'] sds_path = eval(dict_station_name)['path_data'] try: title_comment = eval(dict_station_name)['title_comment'] except: title_comment = False pass # use only channels in station_dictionnay if isinstance(chan_proc, list): chan_proc = np.intersect1d( chan_proc, eval(dict_station_name)['channels']) else: chan_proc = eval(dict_station_name)['channels'] # Look for a DATALESS dataless = None if not args.force_paz: try: dataless = glob.glob(eval(dict_station_name)['dataless_file'])[0] except: print "No valid dataless found for " + net + "." + sta + "." + locid else: print "Using dataless file : " + dataless paz = None if dataless is None: # Look for a PAZ try: sismo = eval((eval(dict_station_name)['sensor'].lower())) acq = eval((eval(dict_station_name)['digitizer'].lower())) except: print "No PAZ found for " + net + "." + sta + "." + locid else: paz = {'gain': sismo['gain'], 'poles': sismo['poles'], 'zeros': sismo['zeros'], 'sensitivity': sismo['sensitivity'] / acq['lsb']} print "PAZ from instruments.py: " print paz # exit if no dataless nor paz if dataless is None and paz is None: print "you must provide a dataless file or a sensor and a digitizer from instruments.py" exit() # exit if dataless AND paz are provided if dataless is not None and paz is not None: print "you must provide a dataless file or a sensor and a digitizer from instruments.py but not both !" exit() print "SDS archive is " + str(sds_path) # Loop over channels for chan in chan_proc: # Try to load a pickle file for this sta/net/chan filename_pkl = PATH_PKL + '/' + net + "." + \ sta + "." + locid + "." + chan + ".pkl" is_pickle = False try: pkl_file = open(filename_pkl, 'r') quality_check = cPickle.load(pkl_file) except: print "No pickle file found for " + net + "." + sta + "." + locid + "." + chan + " (looked for " + filename_pkl + ")" else: print "Use the pickle file : " + filename_pkl is_pickle = True sds_client = Client(sds_path) print "Reading %s.%s.%s.%s in SDS archive from %s to %s" % (net, sta, locid, chan, start, stop) all_streams = sds_client.get_waveforms( net, sta, locid, chan, start, stop) if len(all_streams.traces) == 0: print "No data found for %s.%s.%s.%s in SDS archive from %s to %s" % (net, sta, locid, chan, start, stop) exit() print "Processing data" # Initiate the QC if is_pickle is False: quality_check = QC(all_streams[0].stats, dataless=dataless, paz=paz, skip_on_gaps=True) is_pickle = True # Remove the minutes before the next hour (useful when # computing statistics per hour) # mst = min start time mst = min([temp.stats.starttime for temp in all_streams]) mst = UTCDateTime(mst.year, mst.month, mst.day, mst.hour, 0, 0) + 3600. all_streams.trim(starttime=mst, nearest_sample=False) # Trim to stop when asked all_streams.trim(endtime=stop) # Remove the minutes after the last hour (useful when # computing statistics per hour) # met = max end time met = max([temp.stats.endtime for temp in all_streams]) met = UTCDateTime(met.year, met.month, met.day, met.hour, 0, 0) all_streams.trim(endtime=met, nearest_sample=False) # Add all streams to QC quality_check.add(all_streams) # save # This can be 2 levels below to save a little bit of time # (save and loading) if is_pickle: print filename_pkl quality_check.save(filename_pkl) print filename_pkl + " updated" else: print "!!!!! Nothing saved/created for " + net + "." + sta + "." + locid # Plot if is_pickle and len(quality_check.times_used) > 0: filename_plt = PATH_PLT + '/' + net + "." + \ sta + "." + locid + "." + chan + ".png" quality_check.plot(cmap=cmap, filename=filename_plt, show_percentiles=True, starttime=start, endtime=stop, title_comment=title_comment) print filename_plt + " updated"
from obspy.signal.trigger import * from mpl_toolkits import mplot3d import pandas as pd from sklearn import cluster sdsRoot = "/mnt/DATA/DATA" client = Client(sds_root=sdsRoot) client.nslc = client.get_all_nslc(sds_type="D") t = UTCDateTime("201602060356") stream = Stream() counter = 0 for net, sta, loc, chan in client.nslc: counter += 1 st = client.get_waveforms(net, sta, loc, chan, t, t + 60) try: print(net, sta, loc, chan) st.traces[0].stats.distance = counter stream += st except IndexError: pass # stream.normalize() # stream.detrend() def normalized(x): x = (x - x.mean()) / (x.max() - x.min()) return x
# print(sys.argv[4]) # print(sys.argv[5]) # print(sys.argv[6]) # print(sys.argv[7]) # print(sys.argv[8]) # print(sys.argv[9]) # exit(0) dpi = int(sys.argv[7]) sizex = int(sys.argv[8]) sizey = int(sys.argv[9]) client = Client(client_root) #("/mnt/ide/seed") tStart = UTCDateTime(start_time) #("2020-06-08T06:30:00.000") tEnd = UTCDateTime(end_time) #("2020-06-08T09:30:00.000") traces = client.get_waveforms(network, station, "*", channel, tStart, tEnd) for tr in traces: trId = tr.get_id() fileName = 'out_' + trId + '.png' #os.remove('out_'+trId+'.png')#TODO non li cancella, problema di permessi tr.plot( type='dayplot', dpi=dpi, x_labels_size=int(8 * 100 / int(dpi)), y_labels_size=int(8 * 100 / int(dpi)), title_size=int(1000 / int(dpi)), size=(sizex, sizey), #bgcolor='black', #grid_color='white',
nz = [] ez = [] en = [] dates = [] fig, ax = plt.subplots() fig.suptitle(station_name + ' ' + loc_id + ' ' + chan + ' ' + start.strftime("%Y.%j") + "." + end.strftime("%Y.%j"), fontsize=20) while t1 < end: try: st = sds_client.get_waveforms(network='G', station=station_name, location=loc_id, channel=chan + "Z", starttime=t1, endtime=t2) tr = st[0] tr.filter('bandpass', freqmin=freqmin, freqmax=freqmax) z = tr.std() try: st = sds_client.get_waveforms(network='G', station=station_name, location=loc_id, channel=chan + "N", starttime=t1, endtime=t2) tr = st[0] tr.filter('bandpass', freqmin=freqmin, freqmax=freqmax)
def read_traces(config): basepath = config.data_dir if config.data_day: basepath = os.path.join(basepath, config.data_day) if config.data_hours: basepath = os.path.join(basepath, config.data_hours) kwargs = {} if config.data_format: kwargs['format'] = config.data_format tmpst = Stream() if config.dataarchive_type == 'SDS': client = Client(basepath) start_t = UTCDateTime(config.start_time) end_t = UTCDateTime(config.end_time) tmpst = client.get_waveforms(config.data_network, "*", "*", '*', start_t, end_t) else: for filename in glob(os.path.join(basepath, '*')): try: if config.start_time: tmpst += read(filename, starttime=UTCDateTime(config.start_time), endtime=UTCDateTime(config.end_time), **kwargs) else: tmpst += read(filename, **kwargs) except Exception: continue # Get the intersection between the list of available stations # and the list of requested stations: tmpst_select = Stream() for ch in config.channel: tmpst_select += tmpst.select(channel=ch) tmpst_stations = [tr.stats.station for tr in tmpst_select] stations = sorted(set(tmpst_stations) & set(config.stations)) # Retain only requested channel and stations: st = Stream(tr for tr in tmpst_select if tr.stats.station in stations) if not st: print('Could not read any trace!') sys.exit(1) st.sort() # Check sampling rate config.delta = None for tr in st: tr.detrend(type='constant') tr.taper(type='hann', max_percentage=0.005, side='left') sampling_rate = tr.stats.sampling_rate # Resample data, if requested if config.sampl_rate_data: if sampling_rate >= config.sampl_rate_data: dec_ct = int(sampling_rate / config.sampl_rate_data) tr.decimate(dec_ct, strict_length=False, no_filter=True) else: raise ValueError( 'Sampling frequency for trace %s is lower than %s' % (tr.id, config.sampl_rate_data)) delta = tr.stats.delta if config.delta is None: config.delta = delta else: if delta != config.delta: raise ValueError( 'Trace %s has different delta: %s (expected: %s)' % (tr.id, delta, config.delta)) # Recompute sampling rate after resampling config.sampl_rate_data = st[0].stats.sampling_rate print('Number of traces in stream = ', len(st)) # Check for common starttime and endtime of the traces st_starttime = max([tr.stats.starttime for tr in st]) st_endtime = min([tr.stats.endtime for tr in st]) if config.start_time: st.trim(max(st_starttime, UTCDateTime(config.start_time)), min(st_endtime, UTCDateTime(config.end_time))) else: st.trim(st_starttime, st_endtime) # --- cut the data to the selected length dt------------------------------ if config.cut_data: st.trim(st[0].stats.starttime + config.cut_start, st[0].stats.starttime + config.cut_start + config.cut_delta) else: config.cut_start = 0. config.starttime = st[0].stats.starttime # attach station list and trace ids to config file config.stations = stations config.trids = [tr.id for tr in st] return st