def write(stream_files, subfolder): if not os.path.exists(os.path.join(output_dir, subfolder)): os.makedirs(os.path.join(output_dir, subfolder)) if not os.path.exists( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)): os.makedirs( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)) # Write event waveforms and cluster_id in .tfrecords #output_name = "positives.tfrecords" output_name = args.file_name output_path = os.path.join( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name) writer = DataWriter(output_path) for stream_file in stream_files: stream_path = os.path.join( os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file) #print "[tfrecords positives] Loading Stream {}".format(stream_file) st_event = read(stream_path) #print '[tfrecords positives] Preprocessing stream' st_event = preprocess_stream(st_event) #Select only the specified channels st_event_select = utils.select_components(st_event, cfg) #LOCATION CLUSTERS cluster_id = 0 #We work with only one location for the moment (cluster id = 0) if cat is not None: stream_start_time = st_event[0].stats.starttime stream_end_time = st_event[-1].stats.endtime station = st_event[0].stats.station lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station) c = clusters.nearest_cluster(lat, lon, depth) cluster_id = c.id print("[tfrecords positives] Assigning cluster " + str(cluster_id) + " to event.") #cluster_id = filtered_catalog.cluster_id.values[event_n] n_traces = len(st_event_select) if utils.check_stream(st_event_select, cfg): #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces)) # Write tfrecords writer.write(st_event_select, cluster_id) # Cleanup writer print("[tfrecords positives] Number of windows written={}".format( writer._written)) writer.close()
def write(stream_files, subfolder): if not os.path.exists(os.path.join(output_dir, subfolder)): os.makedirs(os.path.join(output_dir, subfolder)) if not os.path.exists( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_negatives)): os.makedirs( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_negatives)) # Write event waveforms and cluster_id in .tfrecords output_name = output_name = args.file_name output_path = os.path.join( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_negatives), output_name) writer = DataWriter(output_path) for stream_file in stream_files: stream_path = os.path.join( os.path.join(dataset_dir, cfg.mseed_noise_dir), stream_file) #print "[tfrecords negatives] Loading Stream {}".format(stream_file) st_event = read(stream_path) #print '[tfrecords negatives] Preprocessing stream' #filtrem if cfg.filterfreq: st_event = utils.filter_stream(st_event) #Select only the specified channels st_event_select = utils.select_components(st_event, cfg) #cluster_id = filtered_catalog.cluster_id.values[event_n] cluster_id = -1 #We work with only one location for the moment (cluster id = 0) n_traces = len(st_event_select) if utils.check_stream(st_event_select, cfg): #print("[tfrecords negatives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces)) # Write tfrecords writer.write(st_event_select, cluster_id) # Cleanup writer print("[tfrecords negatives] Number of windows written={}".format( writer._written)) writer.close()
def main(args): setproctitle.setproctitle('quakenet_predict_from_tfrecords') # Create dir to store tfrecords if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # Load stream stream_path = args.stream_path stream_file = os.path.split(stream_path)[-1] print "+ Loading Stream {}".format(stream_file) stream = read(stream_path) print '+ Preprocessing stream' stream = preprocess_stream(stream) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(args.output_dir, "metadata.json") # Csv of start and end times times_csv = {} times_csv = {"start_time": [], "end_time": []} # Write event waveforms and cluster_id=-1 in .tfrecords output_name = stream_file.split(".mseed")[0] + ".tfrecords" output_path = os.path.join(args.output_dir, output_name) writer = DataWriter(output_path) # Create window generator win_gen = stream.slide(window_length=args.window_size, step=args.window_step, include_partial_windows=False) if args.max_windows is None: total_time = stream[-1].stats.endtime - stream[0].stats.starttime max_windows = (total_time - args.window_size) / args.window_step print "total time {}, wind_size {}, win_step {}".format( total_time, args.window_size, args.window_step) else: max_windows = args.max_windows start_time = time.time() for idx, win in tqdm(enumerate(win_gen), total=int(max_windows), unit="window", leave=False): # If there is not trace skip this waveform n_traces = len(win) if n_traces == 0: continue # Check trace is complete if len(win) == 3: n_samples = min(len(win[0].data), len(win[1].data)) n_samples = min(n_samples, len(win[2].data)) else: n_sample = 10 n_pts = win[0].stats.sampling_rate * args.window_size + 1 # there is no event if (len(win) == 3) and (n_pts == n_samples): # Write tfrecords writer.write(win, -1) # Write start and end times in csv times_csv["start_time"].append(win[0].stats.starttime) times_csv["end_time"].append(win[0].stats.endtime) # Plot events if args.plot: trace = win[0] viz_dir = os.path.join(args.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) trace.plot( outfile=os.path.join(viz_dir, "window_{}.png".format(idx))) # if idx % 1000 ==0 and idx != 0: # print "{} windows created".format(idx) if idx == max_windows: break # Cleanup writer print("Number of windows written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata) # Write start and end times df = pd.DataFrame.from_dict(times_csv) output_times = os.path.join(args.output_dir, "catalog_times.csv") df.to_csv(output_times) print "Last window analyzed ends on", win[0].stats.endtime print "Time to create tfrecords: {}s".format(time.time() - start_time)
def main(_): # Create dir to store tfrecords if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) # Load stream stream_path = FLAGS.stream_path print stream_path stream_file = os.path.split(stream_path)[-1] print "+ Loading Stream {}".format(stream_file) stream = read(stream_path) print '+ Preprocessing stream' stream = preprocess_stream(stream) #stream.resample(10.0) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(FLAGS.output_dir,"metadata.json") # Load Catalog print "+ Loading Catalog" cat = load_catalog(FLAGS.catalog) starttime = stream[0].stats.starttime.timestamp endtime = stream[-1].stats.endtime.timestamp print "startime", UTCDateTime(starttime) print "endtime", UTCDateTime(endtime) #print stream[0].stats #m2 = re.search(cat.stname.values[:], stream_file.split(".")[1]) #print m2.group() cat = filter_catalog(cat, starttime, endtime) #cat = cat[(cat.stname == str(stream_file.split(".")[1]))] # cat = cat[(cat.stname == str(stream_file.split(".")[1])) or # (cat.stname == str(stream_file.split(".")[1][:-1]))] #print cat print "First event in filtered catalog", cat.Date.values[0], cat.Time.values[0] print "Last event in filtered catalog", cat.Date.values[-1], cat.Time.values[-1] cat_event_times = cat.utc_timestamp.values # Write event waveforms and cluster_id=-1 in .tfrecords n_tfrecords = 0 output_name = "noise_" + stream_file.split(".mseed")[0] + \ "_" + str(n_tfrecords) + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) # Create window generator win_gen = stream.slide(window_length=FLAGS.window_size, step=FLAGS.window_step, include_partial_windows=False) #Create window generator and shuffle the order,2017/12/4 #win_gen = [tr for tr in stream.slide(window_length=FLAGS.window_size, # step=FLAGS.window_step, # include_partial_windows=False)] #random.shuffle(win_gen) if FLAGS.max_windows is None: total_time = stream[0].stats.endtime - stream[0].stats.starttime max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step else: max_windows = FLAGS.max_windows # Create adjacent windows in the stream. Check there is no event inside # using the catalog and then write in a tfrecords with label=-1 n_tfrecords = 0 for idx, win in enumerate(win_gen): # If there is not trace skip this waveform n_traces = len(win) if n_traces == 0: continue # Check trace is complete if len(win)==3: n_samples = min(len(win[0].data),len(win[1].data)) n_samples = min(n_samples, len(win[2].data)) #to get rid of super small amplitude,2017/12/6 ampl_e,ampl_n,ampl_z=filter_small_ampitude(win,n_samples) if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3: continue #a = remove_repeat(win, n_samples) #if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3: # continue else: n_sample = 10 ampl_e = max(abs(win[0:-1].data)) if ampl_e < 1e-10: continue n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1 # Check if there is an event in the window window_start = win[0].stats.starttime.timestamp window_end = win[-1].stats.endtime.timestamp ##add window extend to broaden the window,so that more events can be avoid,2017/12/07 window_start_extend = window_start - FLAGS.window_step window_end_extend = window_end + FLAGS.window_step after_start = cat_event_times > window_start_extend before_end = cat_event_times < window_end_extend #print window_start_extend,window_end_extend try: cat_idx = np.where(after_start == before_end)[0][0] event_time = cat_event_times[cat_idx] is_event = True assert window_start_extend < cat.utc_timestamp.values[cat_idx] assert window_end_extend > cat.utc_timestamp.values[cat_idx] print "avoiding event {}, {}".format(cat.Date.values[cat_idx], cat.Time.values[cat_idx]) except IndexError: # there is no event is_event = False if (len(win)==3) and (n_pts == n_samples): # Write tfrecords #writer.write(win.normalize(), -1) writer.write(win.copy().normalize(),-1) #writer.write(win.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(), # -1) # Plot events if FLAGS.plot: import matplotlib matplotlib.use('Agg') #trace = win[0].filter('bandpass', freqmin=0.5, freqmax=20) trace = win[0] viz_dir = os.path.join( FLAGS.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) # trace.resample(10).plot(outfile=os.path.join(viz_dir, # "noise_{}.png".format(str(window_start)))) trace.plot(outfile=os.path.join(viz_dir, ####changed at 2017/11/25,use max cluster_prob instead of cluster_id # "event_{}_cluster_{}.png".format(idx,cluster_id))) "noise_{}_{}.png".format(win[0].stats.station, str(win[0].stats.starttime).replace( ':', '_')))) if idx % 1000 ==0 and idx != 0: print "{} windows created".format(idx) # Save num windows created in metadata metadata[output_name] = writer._written print "creating a new tfrecords" n_tfrecords +=1 output_name = "noise_" + stream_file.split(".mseed")[0] + \ "_" + str(n_tfrecords) + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) if idx == max_windows: break # Cleanup writer print("Number of windows written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata)
def main(_): # Create dir to store tfrecords if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) # Load stream stream_path = FLAGS.stream_path stream_file = os.path.split(stream_path)[-1] print("+ Loading Stream {}".format(stream_file)) stream = read(stream_path) print('+ Preprocessing stream') stream = preprocess_stream(stream) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(FLAGS.output_dir, "metadata.json") # Load Catalog print("+ Loading Catalog") cat = load_catalog(FLAGS.catalog) starttime = stream[0].stats.starttime.timestamp endtime = stream[-1].stats.endtime.timestamp print("startime", UTCDateTime(starttime)) print("endtime", UTCDateTime(endtime)) cat = filter_catalog(cat, starttime, endtime) print("First event in filtered catalog", cat.Date.values[0], cat.Time.values[0]) print("Last event in filtered catalog", cat.Date.values[-1], cat.Time.values[-1]) cat_event_times = cat.utc_timestamp.values # Write event waveforms and cluster_id=-1 in .tfrecords n_tfrecords = 0 output_name = "noise_" + stream_file.split(".mseed")[0] + \ "_" + str(n_tfrecords) + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) # Create window generator win_gen = stream.slide(window_length=FLAGS.window_size, step=FLAGS.window_step, include_partial_windows=False) if FLAGS.max_windows is None: total_time = stream[0].stats.endtime - stream[0].stats.starttime max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step else: max_windows = FLAGS.max_windows # Create adjacent windows in the stream. Check there is no event inside # using the catalog and then write in a tfrecords with label=-1 n_tfrecords = 0 for idx, win in enumerate(win_gen): # If there is not trace skip this waveform n_traces = len(win) if n_traces == 0: continue # Check trace is complete if len(win) == 3: n_samples = min(len(win[0].data), len(win[1].data)) n_samples = min(n_samples, len(win[2].data)) else: n_sample = 10 n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1 # Check if there is an event in the window window_start = win[0].stats.starttime.timestamp window_end = win[-1].stats.endtime.timestamp after_start = cat_event_times > window_start before_end = cat_event_times < window_end try: cat_idx = np.where(after_start == before_end)[0][0] event_time = cat_event_times[cat_idx] is_event = True assert window_start < cat.utc_timestamp.values[cat_idx] assert window_end > cat.utc_timestamp.values[cat_idx] print("avoiding event {}, {}".format(cat.Date.values[cat_idx], cat.Time.values[cat_idx])) except IndexError: # there is no event is_event = False if (len(win) == 3) and (n_pts == n_samples): # Write tfrecords writer.write(win, -1) # Plot events if FLAGS.plot: trace = win[0] viz_dir = os.path.join(FLAGS.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) trace.plot(outfile=os.path.join( viz_dir, "noise_{}.png".format(idx))) if idx % 1000 == 0 and idx != 0: print("{} windows created".format(idx)) # Save num windows created in metadata metadata[output_name] = writer._written print("creating a new tfrecords") n_tfrecords += 1 output_name = "noise_" + stream_file.split(".mseed")[0] + \ "_" + str(n_tfrecords) + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) if idx == max_windows: break # Cleanup writer print("Number of windows written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata)
def main(args): if not os.path.exists(args.outroot): os.makedirs(args.outroot) # copy some files copy(os.path.join(args.inroot, 'params.pkl'), args.outroot) copy(os.path.join(args.inroot, 'event_channel_dict.pkl'), args.outroot) if not os.path.exists(args.outroot): os.makedirs(args.outroot) for dataset in ['train', 'validate', 'test']: for datatype in ['events', 'noise']: inpath = os.path.join(args.inroot, dataset, datatype) outpath = os.path.join(args.outroot, dataset, datatype) if not os.path.exists(outpath): os.makedirs(outpath) mseedpath = os.path.join(outpath, 'mseed') if not os.path.exists(mseedpath): os.makedirs(mseedpath) mseedpath = os.path.join(outpath, 'mseed_raw') if not os.path.exists(mseedpath): os.makedirs(mseedpath) if datatype == 'events': xmlpath = os.path.join(outpath, 'xml') if not os.path.exists(xmlpath): os.makedirs(xmlpath) # inroot example: output/MN/streams # inpath example: output/MN/streams/train/events for dirpath, dirnames, filenames in os.walk(inpath): for name in filenames: if name.endswith(".tfrecords"): filename_root = name.replace('.tfrecords', '') print 'Processing:', name, os.path.join(outpath, filename_root + '.tfrecords') # copy some files copy(os.path.join(inpath, 'mseed_raw', filename_root + '.mseed'), os.path.join(outpath, 'mseed_raw')) if datatype == 'events': copy(os.path.join(inpath, 'xml', filename_root + '.xml'), os.path.join(outpath, 'xml')) # read raw mseed stream = read(os.path.join(inpath, 'mseed_raw', filename_root + '.mseed'), format='MSEED') # store absolute maximum stream_max = np.absolute(stream.max()).max() # normalize by absolute maximum stream.normalize(global_max = True) # write new processed miniseed streamfile = os.path.join(outpath, 'mseed', filename_root + '.mseed') stream.write(streamfile, format='MSEED', encoding='FLOAT32') n_traces = 3 win_size = 10001 # read old tfrecords # https://www.kaggle.com/mpekalski/reading-tfrecord record_iterator = tf.python_io.tf_record_iterator(path=os.path.join(inpath, filename_root + '.tfrecords')) for string_record in record_iterator: example = tf.train.Example() example.ParseFromString(string_record) distance_id = int(example.features.feature['distance_id'].int64_list.value[0]) magnitude_id = int(example.features.feature['magnitude_id'].int64_list.value[0]) depth_id = int(example.features.feature['depth_id'].int64_list.value[0]) azimuth_id = int(example.features.feature['azimuth_id'].int64_list.value[0]) distance = float(example.features.feature['distance'].float_list.value[0]) magnitude = float(example.features.feature['magnitude'].float_list.value[0]) depth = float(example.features.feature['depth'].float_list.value[0]) azimuth = float(example.features.feature['azimuth'].float_list.value[0]) print 'id', distance_id, 'im', magnitude_id, 'ide', depth_id, 'iaz', azimuth_id, \ 'd', distance, 'm', magnitude, 'de', depth, 'az', azimuth # filename_queue = tf.train.string_input_producer([os.path.join(inpath, filename_root + '.tfrecords')], shuffle=False) # reader = tf.TFRecordReader() # example_key, serialized_example = reader.read(filename_queue) # # data_pipeline._parse_example() # features = tf.parse_single_example( # serialized_example, # features={ # 'window_size': tf.FixedLenFeature([], tf.int64), # 'n_traces': tf.FixedLenFeature([], tf.int64), # 'data': tf.FixedLenFeature([], tf.string), # #'stream_max': tf.FixedLenFeature([], tf.float32), # 'distance_id': tf.FixedLenFeature([], tf.int64), # 'magnitude_id': tf.FixedLenFeature([], tf.int64), # 'depth_id': tf.FixedLenFeature([], tf.int64), # 'azimuth_id': tf.FixedLenFeature([], tf.int64), # 'distance': tf.FixedLenFeature([], tf.float32), # 'magnitude': tf.FixedLenFeature([], tf.float32), # 'depth': tf.FixedLenFeature([], tf.float32), # 'azimuth': tf.FixedLenFeature([], tf.float32), # 'start_time': tf.FixedLenFeature([],tf.int64), # 'end_time': tf.FixedLenFeature([], tf.int64)}) # features['name'] = example_key # # END - data_pipeline._parse_example() # # print "features['distance_id']", features['distance_id'] # print 'distance_id shape', tf.shape(features['distance_id']) # with tf.Session() as sess: # print sess.run(features['distance_id']) # #print 'distance_id', distance_id # #print 'distance_id shape', tf.shape(distance_id) # magnitude_id = features['magnitude_id'] # depth_id = features['depth_id'] # azimuth_id = features['azimuth_id'] # distance = features['distance'] # magnitude = features['magnitude'] # depth = features['depth'] # azimuth = features['azimuth'] # write new tfrecords writer = DataWriter(os.path.join(outpath, filename_root + '.tfrecords')) writer.write(stream, stream_max, distance_id, magnitude_id, depth_id, azimuth_id, distance, magnitude, depth, azimuth)
def main(_): stream_files = [ file for file in os.listdir(FLAGS.stream_dir) if fnmatch.fnmatch(file, '*') ] print "List of streams to anlayze", stream_files # Create dir to store tfrecords if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(FLAGS.output_dir, "metadata.json") # Load Catalog print "+ Loading Catalog" cat = load_catalog(FLAGS.catalog) cat = filter_catalog(cat) for stream_file in stream_files: # Load stream stream_path = os.path.join(FLAGS.stream_dir, stream_file) print "+ Loading Stream {}".format(stream_file) stream = read(stream_path) print '+ Preprocessing stream' stream = preprocess_stream(stream) # Filter catalog according to the loaded stream start_date = stream[0].stats.starttime end_date = stream[-1].stats.endtime print("-- Start Date={}, End Date={}".format(start_date, end_date)) filtered_catalog = cat[((cat.utc_timestamp >= start_date) & (cat.utc_timestamp < end_date))] # Propagation time from source to station travel_time = get_travel_time(filtered_catalog) # Write event waveforms and cluster_id in .tfrecords output_name = stream_file.split(".mseed")[0] + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) print("+ Creating tfrecords for {} events".format( filtered_catalog.shape[0])) # Loop over all events in the considered stream for event_n in range(filtered_catalog.shape[0]): event_time = filtered_catalog.utc_timestamp.values[event_n] event_time += travel_time[event_n] st_event = stream.slice( UTCDateTime(event_time), UTCDateTime(event_time) + FLAGS.window_size).copy() cluster_id = filtered_catalog.cluster_id.values[event_n] n_traces = len(st_event) # If there is not trace skip this waveform if n_traces == 0: continue n_samples = len(st_event[0].data) n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1 if (len(st_event) == 3) and (n_pts == n_samples): # Write tfrecords writer.write(st_event, cluster_id) # Save window and cluster_id if FLAGS.save_mseed: output_label = "label_{}_lat_{:.3f}_lon_{:.3f}.mseed".format( cluster_id, filtered_catalog.latitude.values[event_n], filtered_catalog.longitude.values[event_n]) output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed") if not os.path.exists(output_mseed_dir): os.makedirs(output_mseed_dir) output_mseed = os.path.join(output_mseed_dir, output_label) st_event.write(output_mseed, format="MSEED") # Plot events if FLAGS.plot: trace = st_event[0] viz_dir = os.path.join(FLAGS.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) trace.plot(outfile=os.path.join( viz_dir, "event_{}.png".format(event_n))) else: print "Missing waveform for event:", UTCDateTime(event_time) # Cleanup writer print("Number of events written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata)
def write(stream_files, subfolder): if not os.path.exists(os.path.join(output_dir, subfolder)): os.makedirs(os.path.join(output_dir, subfolder)) if not os.path.exists(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)): os.makedirs(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)) # Write event waveforms and cluster_id in .tfrecords #output_name = "positives.tfrecords" output_name = args.file_name output_path = os.path.join(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name) writer = DataWriter(output_path) for stream_file in stream_files: stream_path = os.path.join(os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file) #print "[tfrecords positives] Loading Stream {}".format(stream_file) st_event = read(stream_path) #print '[tfrecords positives] Preprocessing stream' #Filtrem if cfg.filterfreq: st_event = utils.filter_stream(st_event) #Select only the specified channels st_event_select = utils.select_components(st_event, cfg) #LOCATION CLUSTERS lat = 0 lon = 0 depth = 0 cluster_id = 0 #We work with only one location for the moment (cluster id = 0) if cat is not None: stream_start_time = st_event[0].stats.starttime stream_end_time = st_event[-1].stats.endtime station = st_event[0].stats.station lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station) c = clusters.nearest_cluster(lat, lon, depth) if c is not None: #can be None in case of polygons-based clustering cluster_id = c.id else: cluster_id = -1 #signaling that the earthquake has to be discarded print("[tfrecords positives] Assigning cluster "+str(cluster_id)+" to event (lat = "+str(lat)+", lon = "+str(lon)+").") #cluster_id = filtered_catalog.cluster_id.values[event_n] if cluster_id >= 0: #no clustering or a valid cluster n_traces = len(st_event_select) if utils.check_stream(st_event_select, cfg): #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces)) # Write tfrecords #DEBUG: STA_LTA #df = st_event_select[0].stats.sampling_rate #cft = classic_sta_lta(st_event_select[0], int(5 * df), int(10 * df)) #for trig in cft: # if trig != .0: # print(trig) writer.write(st_event_select, cluster_id) else: print ("[tfrecords positives] \033[91m WARNING!!\033[0m Discarding point as no cluster found for the given lat="+str(lat)+", lon="+str(lon)+", depth="+str(depth)) # Cleanup writer print("[tfrecords positives] Number of windows written={}".format(writer._written)) writer.close()
def main(_): if FLAGS.stretch_data: print "ADD NOISE AND STRETCH DATA" if FLAGS.compress_data: print "ADD NOISE AND COMPRESS DATA" if FLAGS.shift_data: print "ADD NOISE AND SHIFT DATA" # Make dirs output_dir = os.path.split(FLAGS.output)[0] if not os.path.exists(output_dir): os.makedirs(output_dir) if FLAGS.plot: if not os.path.exists(os.path.join(output_dir, "true_data")): os.makedirs(os.path.join(output_dir, "true_data")) if not os.path.exists(os.path.join(output_dir, "augmented_data")): os.makedirs(os.path.join(output_dir, "augmented_data")) cfg = config.Config() cfg.batch_size = 1 cfg.n_epochs = 1 data_pipeline = DataPipeline(FLAGS.tfrecords, config=cfg, is_training=False) samples = data_pipeline.samples labels = data_pipeline.labels with tf.Session() as sess: coord = tf.train.Coordinator() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners(coord=coord) output_tfrecords = FLAGS.output writer = DataWriter(output_tfrecords) n_examples = 0 while True: try: sample, label = sess.run([samples, labels]) sample = np.squeeze(sample, axis=0) label = label[0] noised_sample = add_noise_to_signal(np.copy(sample)) if FLAGS.compress_data: noised_sample = compress_signal(noised_sample) if FLAGS.stretch_data: noised_sample = stretch_signal(noised_sample) if FLAGS.shift_data: noised_sample = shift_signal(noised_sample) if FLAGS.plot: plot_true_and_augmented_data(sample, noised_sample, label, n_examples) stream = convert_np_to_stream(noised_sample) writer.write(stream, label) n_examples += 1 except KeyboardInterrupt: print 'stopping data augmentation' break except tf.errors.OutOfRangeError: print 'Augmentation completed ({} epochs, {} examples seen).'\ .format(cfg.n_epochs,n_examples-1) break writer.close() coord.request_stop() coord.join(threads)
def main(args): random.seed(datetime.now()) if args.n_distances < 1: args.n_distances = None # print distance classifications if args.n_distances != None: print 'dist_class, dist_deg, dist_km' for dclass in range(0, args.n_distances, 1): dist_deg = util.classification2distance(dclass, args.n_distances) dist_km = geo.degrees2kilometers(dist_deg) print "{} {:.2f} {:.1f}".format(dclass, dist_deg, dist_km) print '' if args.n_magnitudes < 1: args.n_magnitudes = None # print magtitude classifications if args.n_magnitudes != None: print 'mag_class, mag' for mclass in range(0, args.n_magnitudes, 1): mag = util.classification2magnitude(mclass, args.n_magnitudes) print "{} {:.2f}".format(mclass, mag) print '' if args.n_depths < 1: args.n_depths = None # print depth classifications if args.n_depths != None: print 'depth_class, depth' for dclass in range(0, args.n_depths, 1): depth = util.classification2depth(dclass, args.n_depths) print "{} {:.1f}".format(dclass, depth) print '' if args.n_azimuths < 1: args.n_azimuths = None # print azimuth classifications if args.n_azimuths != None: print 'azimuth_class, azimuth' for aclass in range(0, args.n_azimuths, 1): azimuth = util.classification2azimuth(aclass, args.n_azimuths) print "{} {:.1f}".format(aclass, azimuth) print '' if not os.path.exists(args.outpath): os.makedirs(args.outpath) # save arguments with open(os.path.join(args.outpath, 'params.pkl'), 'w') as file: file.write(pickle.dumps(args)) # use `pickle.loads` to do the reverse for dataset in ['train', 'validate', 'test']: for datatype in ['events', 'noise']: datapath = os.path.join(args.outpath, dataset, datatype) if not os.path.exists(datapath): os.makedirs(datapath) mseedpath = os.path.join(datapath, 'mseed') if not os.path.exists(mseedpath): os.makedirs(mseedpath) mseedpath = os.path.join(datapath, 'mseed_raw') if not os.path.exists(mseedpath): os.makedirs(mseedpath) if datatype == 'events': xmlpath = os.path.join(datapath, 'xml') if not os.path.exists(xmlpath): os.makedirs(xmlpath) # read catalog of events #filenames = args.event_files_path + os.sep + '*.xml' catalog_dict = {} catalog_all = [] for dirpath, dirnames, filenames in os.walk(args.event_files_path): for name in filenames: if name.endswith(".xml"): file = os.path.join(dirpath, name) catalog = read_events(file) target_count = int(args.event_fraction * float(catalog.count())) print catalog.count(), 'events:', 'read from:', file, 'will use:', target_count, 'since args.event_fraction=', args.event_fraction if (args.event_fraction < 1.0): while catalog.count() > target_count: del catalog[random.randint(0, catalog.count() - 1)] if not args.systematic: tokens = name.split('_') net_sta = tokens[0] + '_' + tokens[1] if not net_sta in catalog_dict: catalog_dict[net_sta] = catalog else: catalog_dict[net_sta] += catalog # sort catalog by date catalog_dict[net_sta] = Catalog(sorted(catalog_dict[net_sta], key=lambda e: e.origins[0].time)) else: catalog_all += catalog # read list of channels to use inventory_full = read_inventory(args.channel_file) inventory_full = inventory_full.select(channel=args.channel_prefix+'Z', sampling_rate=args.sampling_rate) #print(inventory) client = fdsn.Client(args.base_url) # get existing already processed event channel dictionary try: with open(os.path.join(args.outpath, 'event_channel_dict.pkl'), 'r') as file: event_channel_dict = pickle.load(file) except IOError: event_channel_dict = {} print 'Existing event_channel_dict size:', len(event_channel_dict) n_noise = int(0.5 + float(args.n_streams) * args.noise_fraction) n_events = args.n_streams - n_noise n_validate = int(0.5 + float(n_events) * args.validation_fraction) n_test = int(0.5 + float(n_events) * args.test_fraction) n_train = n_events - n_validate - n_test n_count = 0; n_streams = 0 if args.systematic: event_ndx = 0 net_ndx = 0 sta_ndx = 0 channel_ndx = -1 # distance_id_count = {} # max_num_for_distance_id = {} # if args.n_distances != None: # # train # distance_id_count['train'] = [0] * args.n_distances # max_num_for_distance_id['train'] = 1 + int(2.0 * float(n_train) / float(args.n_distances)) # print 'Maximum number events for each distance bin train:', max_num_for_distance_id['train'] # # validate # distance_id_count['validate'] = [0] * args.n_distances # max_num_for_distance_id['validate'] = 1 + int(2.0 * float(n_validate) / float(args.n_distances)) # print 'Maximum number events for each distance bin validate:', max_num_for_distance_id['validate'] # # test # distance_id_count['test'] = [0] * args.n_distances # max_num_for_distance_id['test'] = 1 + int(2.0 * float(n_test) / float(args.n_distances)) # print 'Maximum number events for each distance bin test:', max_num_for_distance_id['test'] while args.systematic or n_streams < args.n_streams: try: # choose event or noise is_noise = n_streams >= n_events # reset validate test count if switching from event to noise if n_streams == n_events: n_validate = int(0.5 + float(n_noise) * args.validation_fraction) n_test = int(0.5 + float(n_noise) * args.test_fraction) n_train = n_noise - n_validate - n_test n_count = 0; # set out paths if is_noise: datatype = 'noise' else: datatype = 'events' if n_count < n_train: dataset = 'train' elif n_count < n_train + n_validate: dataset = 'validate' else: dataset = 'test' datapath = os.path.join(args.outpath, dataset, datatype) # get random channel from Inventory #inventory = inventory_full.select(time=origin.time) inventory = inventory_full if args.systematic: try: catalog, event_ndx, event, origin, channel, net_ndx, net, sta_ndx, sta, channel_ndx \ = get_systematic_channel(inventory, catalog_all, is_noise, event_ndx, net_ndx, sta_ndx, channel_ndx) except ValueError: break else: try: catalog, event_ndx, event, origin, channel, net_ndx, net, sta_ndx, sta, channel_ndx = get_random_channel(inventory, catalog_dict, is_noise) except ValueError: continue distance_id = 0 distance = -999.0 magnitude = -999.0 depth = -999.0 azimuth = -999.0 if not is_noise: dist_meters, azim, bazim = geo.gps2dist_azimuth(channel.latitude, channel.longitude, origin.latitude, origin.longitude, a=geo.WGS84_A, f=geo.WGS84_F) distance = geo.kilometer2degrees(dist_meters / 1000.0, radius=6371) azimuth = azim magnitude = event.preferred_magnitude().mag depth = origin.depth / 1000.0 if args.n_distances != None: distance_id = util.distance2classification(distance, args.n_distances) # if distance_id_count[dataset][distance_id] >= max_num_for_distance_id[dataset]: # print 'Skipping event_channel: distance bin', distance_id, 'for', dataset, 'already full:', \ # distance_id_count[dataset][distance_id], '/', max_num_for_distance_id[dataset] # continue print '' print 'Event:', origin.time.isoformat(), event.event_descriptions[0].text, \ ', Dist(deg): {:.2f} Dist(km): {:.1f} ID: {}'.format(distance, geo.degrees2kilometers(distance), distance_id), \ ', Mag: {:.2f}'.format(magnitude), \ ', Depth(km): {:.1f}'.format(depth), \ ', Az(deg): {:.1f}'.format(azimuth) print 'Retrieving channels:', (n_streams + 1), '/ ', args.n_streams, (', NOISE, ' if is_noise else ', EVENT, '), 'event', event_ndx, origin.time, \ ', net', net_ndx, ', sta', sta_ndx, ', chan', channel_ndx, \ ', ', net.code, sta.code, \ channel.code, channel.location_code, \ channel.sample_rate # check station was available at origin.time if not sta.is_active(time=origin.time): print 'Skipping event_channel: station not active at origin.time:' continue #key = str(event_ndx) + '_' + str(net_ndx) + '_' + str(sta_ndx) + '_' + str(channel_ndx) + '_' + str(is_noise) key = str(event_ndx) + '_' + net.code + '_' + sta.code + '_' + channel.code + '_' + str(is_noise) if key in event_channel_dict: print 'Skipping event_channel: already processed.' continue event_channel_dict[key] = 1 # get start time for waveform request ttime = get_first_P_travel_time(origin, channel) arrival_time = origin.time + ttime if is_noise: # get start time of next event event2 = catalog[event_ndx + 1] origin2 = event2.preferred_origin() # check that origins are at least min time apart if origin2.time - origin.time < MIN_INTER_EVENT_TIME: print 'Skipping noise event_channel: inter event time too small: ', str(origin2.time - origin.time), \ origin2.time, origin.time continue ttime2 = get_first_P_travel_time(origin2, channel) arrival_time2 = origin2.time + ttime2 arrival_time = (arrival_time + ((arrival_time2 - arrival_time) / 2.0)) - args.window_start start_time = arrival_time - args.window_start # request data for 3 channels #for orientation in ['Z', 'N', 'E', '1', '2']: # req_chan = args.channel_prefix + orientation channel_name = net.code + '_' + sta.code + '_' + channel.location_code + '_' + args.channel_prefix padded_start_time = start_time - WINDOW_PADDING_FDSN padded_end_time = start_time + args.window_length + 2.0 * WINDOW_PADDING_FDSN chan_param = args.channel_prefix + '?' # kluge to get url used for data request kwargs = {'network': net.code, 'station': sta.code, 'location': channel.location_code, 'channel': chan_param, 'starttime': padded_start_time, 'endtime': padded_end_time} #url = client._create_url_from_parameters('dataselect', DEFAULT_PARAMETERS['dataselect'], **kwargs) url = fdsn.client.build_url(client.base_url, 'dataselect', client.major_versions['dataselect'], "query", parameters=kwargs) print ' java net.alomax.seisgram2k.SeisGram2K', '\"', url, '\"' try: stream = client.get_waveforms( \ net.code, sta.code, channel.location_code, chan_param, \ padded_start_time, padded_end_time, \ attach_response=True) except fdsn.header.FDSNException as ex: print 'Skipping channel:', channel_name, 'FDSNException:', ex, continue print stream # TEST # for trace in stream: # print '==========> trace.stats', trace.stats # check some things if (len(stream) != 3): print 'Skipping channel: len(stream) != 3:', channel_name continue ntrace = 0 for trace in stream: if (len(trace) < 1): print 'Skipping trace: len(trace) < 1:', channel_name continue if (trace.stats.starttime > start_time or trace.stats.endtime < start_time + args.window_length): print 'Skipping trace: does not contain required time window:', channel_name continue ntrace += 1 if (ntrace != 3): print 'Skipping channel: ntrace != 3:', channel_name continue # pre-process streams # sort so that channels will be ingested in NN always in same order ENZ stream.sort(['channel']) # detrend - this is meant to be equivalent to detrend or a long period low-pass (e.g. at 100sec) applied to real-time data stream.detrend(type='linear') for trace in stream: # correct for required sampling rate if abs(trace.stats.sampling_rate - args.sampling_rate) / args.sampling_rate > 0.01: trace.resample(args.sampling_rate) # apply high-pass filter if requested if args.hp_filter_freq > 0.0: stream.filter('highpass', freq=args.hp_filter_freq, corners=args.hp_filter_corners) # check signal to noise ratio, if fail, repeat on 1sec hp data to capture local/regional events in longer period microseismic noise sn_type = 'BRB' first_pass = True; while True: if is_noise: snrOK = True else: snrOK = False for trace in stream: # slice with 1sec margin of error for arrival time to: 1) avoid increasing noise amplitude with signal, 2) avoid missing first P in signal if (first_pass): signal_slice = trace.slice(starttime=arrival_time - 1.0, endtime=arrival_time - 1.0 + args.snr_window_length) noise_slice = trace.slice(endtime=arrival_time - 1.0) else: # highpass at 1sec filt_trace = trace.copy() filt_trace.filter('highpass', freq=1.0, corners=4) signal_slice = filt_trace.slice(starttime=arrival_time - 1.0, endtime=arrival_time - 1.0 + args.snr_window_length) noise_slice = filt_trace.slice(endtime=arrival_time - 1.0) sn_type = '1HzHP' # check signal to noise around arrival_time # ratio of std asignal = signal_slice.std() anoise = noise_slice.std() snr = asignal / anoise print trace.id, sn_type, 'snr:', snr, 'std_signal:', asignal, 'std_noise:', anoise # ratio of peak amplitudes (DO NOT USE, GIVE UNSTABLE RESULTS!) # asignal = signal_slice.max() # anoise = noise_slice.max() # snr = np.absolute(asignal / anoise) # print trace.id, sn_type, 'snr:', snr, 'amax_signal:', asignal, 'amax_noise:', anoise if is_noise: snrOK = snrOK and snr <= MAX_SNR_NOISE if not snrOK: break else: snrOK = snrOK or snr >= args.snr_accept if (first_pass and not snrOK and args.hp_filter_freq < 0.0): first_pass = False; continue else: break if (not snrOK): if is_noise: print 'Skipping channel:', sn_type, 'snr >', MAX_SNR_NOISE, 'on one or more traces:', channel_name else: print 'Skipping channel:', sn_type, 'snr < args.snr_accept:', args.snr_accept, 'on all traces:', channel_name continue # trim data to required window # try to make sure samples and start/end times align as closely as possible to first trace trace = stream.traces[0] trace = trace.slice(starttime=start_time, endtime=start_time + args.window_length, nearest_sample=True) start_time = trace.stats.starttime stream = stream.slice(starttime=start_time, endtime=start_time + args.window_length, nearest_sample=True) cstart_time = '%04d.%02d.%02d.%02d.%02d.%02d.%03d' % \ (start_time.year, start_time.month, start_time.day, start_time.hour, start_time.minute, \ start_time.second, start_time.microsecond // 1000) # process each trace try: for trace in stream: # correct for overall sensitivity or gain trace.normalize(trace.stats.response.instrument_sensitivity.value) trace.data = trace.data.astype(np.float32) # write miniseed #tracefile = os.path.join(datapath, 'mseed', trace.id + '.' + cstart_time + '.mseed') #trace.write(tracefile, format='MSEED', encoding='FLOAT32') #print 'Channel written:', tracefile, trace.count(), 'samples' except AttributeError as err: print 'Skipping channel:', channel_name, ': Error applying trace.normalize():' , err filename_root = channel_name + '.' + cstart_time # write raw miniseed streamfile = os.path.join(datapath, 'mseed_raw', filename_root + '.mseed') stream.write(streamfile, format='MSEED', encoding='FLOAT32') print 'Stream written:', stream.count(), 'traces:' print ' java net.alomax.seisgram2k.SeisGram2K', streamfile # store absolute maximum stream_max = np.absolute(stream.max()).max() # normalize by absolute maximum stream.normalize(global_max = True) # 20180521 AJL # spherical coordinates # raw data always in same order ENZ # tensor indexing is [traces, datapoints, comps] if args.spherical: rad2deg = 180.0 / math.pi # calculate modulus temp_square = np.add(np.square(stream.traces[0].data), np.add(np.square(stream.traces[1].data), np.square(stream.traces[2].data))) temp_modulus = np.sqrt(temp_square) # calculate azimuth temp_azimuth = np.add( np.multiply(np.arctan2(stream.traces[0].data, stream.traces[1].data), rad2deg), 180.0) # calculate inclination temp_inclination = np.multiply(np.arcsin(np.divide(stream.traces[2].data, temp_modulus)), rad2deg) # reset stream data to spherical coordinates stream.traces[0].data = temp_inclination stream.traces[1].data = temp_azimuth temp_modulus = np.multiply(temp_modulus, 100.0) # increase scale for plotting purposes stream.traces[2].data = temp_modulus # put absolute maximum normalization in first element of data array, to seed NN magnitude estimation # 20180816 AJL - do not mix max with data # for trace in stream: # trace.data[0] = stream_max print 'stream_max', stream_max # write processed miniseed streamfile = os.path.join(datapath, 'mseed', filename_root + '.mseed') stream.write(streamfile, format='MSEED', encoding='FLOAT32') print 'Stream written:', stream.count(), 'traces:' print ' java net.alomax.seisgram2k.SeisGram2K', streamfile # write event waveforms and distance_id in .tfrecords magnitude_id = 0 depth_id = 0 azimuth_id = 0 if not is_noise: # if args.n_distances != None: # distance_id_count[dataset][distance_id] += 1 if args.n_magnitudes != None: magnitude_id = util.magntiude2classification(magnitude, args.n_magnitudes) if args.n_depths != None: depth_id = util.depth2classification(depth, args.n_depths) if args.n_azimuths != None: azimuth_id = util.azimuth2classification(azimuth, args.n_azimuths) else: distance_id = -1 distance = 0.0 output_name = filename_root + '.tfrecords' output_path = os.path.join(datapath, output_name) writer = DataWriter(output_path) writer.write(stream, stream_max, distance_id, magnitude_id, depth_id, azimuth_id, distance, magnitude, depth, azimuth) if not is_noise: print '==== Event stream tfrecords written:', output_name, \ 'Dist(deg): {:.2f} Dist(km): {:.1f} ID: {}'.format(distance, geo.degrees2kilometers(distance), distance_id), \ ', Mag: {:.2f} ID: {}'.format(magnitude, magnitude_id), \ ', Depth(km): {:.1f} ID: {}'.format(depth, depth_id), \ ', Az(deg): {:.1f} ID: {}'.format(azimuth, azimuth_id) else: print '==== Noise stream tfrecords written:', output_name, 'ID: Dist {}, Mag {}, Depth {}, Az {}'.format(distance_id, magnitude_id, depth_id, azimuth_id) # write event data if not is_noise: filename = os.path.join(datapath, 'xml', filename_root + '.xml') event.write(filename, 'QUAKEML') n_streams += 1 n_count += 1 except KeyboardInterrupt: print 'Stopping: KeyboardInterrupt' break except Exception as ex: print 'Skipping stream: Exception:', ex traceback.print_exc() continue print n_streams, 'streams:', 'written to:', args.outpath # save event_channel_dict with open(os.path.join(args.outpath, 'event_channel_dict.pkl'), 'w') as file: file.write(pickle.dumps(event_channel_dict))
def main(_): stream_files = [ file for file in os.listdir(FLAGS.stream_dir) if fnmatch.fnmatch(file, '*.mseed') ] print "List of streams to anlayze", stream_files # Create dir to store tfrecords if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(FLAGS.output_dir, "metadata.json") # Load Catalog print "+ Loading Catalog" for stream_file in stream_files: cat = load_catalog(FLAGS.catalog) #cat = filter_catalog(cat,stream_file.split(".mseed")[0]) # Load stream stream_path = os.path.join(FLAGS.stream_dir, stream_file) print "+ Loading Stream {}".format(stream_file) stream = read(stream_path) print '+ Preprocessing stream' stream = preprocess_stream(stream) # Filter catalog according to the loaded stream start_date = stream[0].stats.starttime end_date = stream[-1].stats.endtime print("-- Start Date={}, End Date={}".format(start_date, end_date)) filtered_catalog = cat[((cat.utc_timestamp >= start_date) & (cat.utc_timestamp < end_date))] #print(1111, cat) # Propagation time from source to station #travel_time = get_travel_time(filtered_catalog) # Write event waveforms and cluster_id in .tfrecords output_name = stream_file.split(".mseed")[0] + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) print("+ Creating tfrecords for {} events".format( filtered_catalog.shape[0])) # Loop over all events in the considered stream for event_n in range(filtered_catalog.shape[0]): event_time = filtered_catalog.utc_timestamp.values[event_n] # event_time += travel_time[event_n] st_event = stream.slice( UTCDateTime(event_time), UTCDateTime(event_time) + FLAGS.window_size).copy() cluster_id = filtered_catalog.cluster_id.values[event_n] #cluster_id =1 n_traces = len(st_event) # If there is no trace skip this waveform if n_traces == 0: continue n_samples = len(st_event[0].data) n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1 if (len(st_event) == 3) and (n_pts == n_samples): # Write tfrecords # use filter_small_ampitude to get rid of super small amplitude,2017/12/6 ampl_e, ampl_n, ampl_z = filter_small_ampitude( st_event, n_samples) if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3: continue a = remove_repeat(st_event, n_samples) if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3: continue writer.write(st_event.copy().resample(10).normalize(), cluster_id) #writer.write(st_event.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(), cluster_id) #print (len(st_event[0])) # Save window and cluster_id if FLAGS.save_mseed: output_label = "{}_{}.mseed".format( st_event[0].stats.station, str(st_event[0].stats.starttime).replace(':', '_')) output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed") if not os.path.exists(output_mseed_dir): os.makedirs(output_mseed_dir) output_mseed = os.path.join(output_mseed_dir, output_label) st_event.write(output_mseed, format="MSEED") # Plot events if FLAGS.plot: trace = st_event[0].filter('bandpass', freqmin=0.5, freqmax=20) #trace = st_event[0] viz_dir = os.path.join(FLAGS.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) trace.plot(outfile=os.path.join( viz_dir, ####changed at 2017/11/25,use max cluster_prob instead of cluster_id # "event_{}_cluster_{}.png".format(idx,cluster_id))) "event_{}_{}.png".format( st_event[0].stats.station, str(st_event[0].stats.starttime).replace(':', '_') ))) else: print "Missing waveform for event:", UTCDateTime(event_time) # Cleanup writer print("Number of events written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata)