def write(stream_files, subfolder): if not os.path.exists(os.path.join(output_dir, subfolder)): os.makedirs(os.path.join(output_dir, subfolder)) if not os.path.exists( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)): os.makedirs( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)) # Write event waveforms and cluster_id in .tfrecords #output_name = "positives.tfrecords" output_name = args.file_name output_path = os.path.join( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name) writer = DataWriter(output_path) for stream_file in stream_files: stream_path = os.path.join( os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file) #print "[tfrecords positives] Loading Stream {}".format(stream_file) st_event = read(stream_path) #print '[tfrecords positives] Preprocessing stream' st_event = preprocess_stream(st_event) #Select only the specified channels st_event_select = utils.select_components(st_event, cfg) #LOCATION CLUSTERS cluster_id = 0 #We work with only one location for the moment (cluster id = 0) if cat is not None: stream_start_time = st_event[0].stats.starttime stream_end_time = st_event[-1].stats.endtime station = st_event[0].stats.station lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station) c = clusters.nearest_cluster(lat, lon, depth) cluster_id = c.id print("[tfrecords positives] Assigning cluster " + str(cluster_id) + " to event.") #cluster_id = filtered_catalog.cluster_id.values[event_n] n_traces = len(st_event_select) if utils.check_stream(st_event_select, cfg): #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces)) # Write tfrecords writer.write(st_event_select, cluster_id) # Cleanup writer print("[tfrecords positives] Number of windows written={}".format( writer._written)) writer.close()
def write(stream_files, subfolder): if not os.path.exists(os.path.join(output_dir, subfolder)): os.makedirs(os.path.join(output_dir, subfolder)) if not os.path.exists( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_negatives)): os.makedirs( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_negatives)) # Write event waveforms and cluster_id in .tfrecords output_name = output_name = args.file_name output_path = os.path.join( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_negatives), output_name) writer = DataWriter(output_path) for stream_file in stream_files: stream_path = os.path.join( os.path.join(dataset_dir, cfg.mseed_noise_dir), stream_file) #print "[tfrecords negatives] Loading Stream {}".format(stream_file) st_event = read(stream_path) #print '[tfrecords negatives] Preprocessing stream' #filtrem if cfg.filterfreq: st_event = utils.filter_stream(st_event) #Select only the specified channels st_event_select = utils.select_components(st_event, cfg) #cluster_id = filtered_catalog.cluster_id.values[event_n] cluster_id = -1 #We work with only one location for the moment (cluster id = 0) n_traces = len(st_event_select) if utils.check_stream(st_event_select, cfg): #print("[tfrecords negatives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces)) # Write tfrecords writer.write(st_event_select, cluster_id) # Cleanup writer print("[tfrecords negatives] Number of windows written={}".format( writer._written)) writer.close()
def processMseed(stream_path, cat, output_dir, plot, onlyStation): stream_file = os.path.basename(stream_path) #2. Process .mseed #print("Processing stream "+stream_file) # Load stream print ("[obtain training windows] Loading Stream {}".format(stream_file)) stream = read(stream_path) print ('[obtain training windows] Preprocessing stream') stream = utils.preprocess_stream(stream) stream_start_time = stream[0].stats.starttime stream_end_time = stream[-1].stats.endtime total_time = stream_start_time - stream_end_time print(stream) z_streams = stream.select(component="Z") #if len(z_streams) > 1: #multiple stations, need to cut for z_stream in z_streams: #Slice the input stream horizontally, for one station station = z_stream.stats.station print ("[obtain training windows] found station "+station+".") if onlyStation is not None and onlyStation != station: continue print ("[obtain training windows] ---------- Station "+station+" ---------") substream = stream.select(station=station) ########### #DEBUG: STA_LTA #df = substream[0].stats.sampling_rate #cft = classic_sta_lta(substream[0], int(5 * df), int(10 * df)) #for trig in cft: # if trig > 1.99: # print(trig) ########### ptime = getPtime(cat, stream_start_time, stream_end_time, station, phase_hint="P") if ptime is not None: print ("[obtain training windows] Found event in the catalog: "+str(ptime)) event_window_start = ptime - cfg.pwave_window/2 event_window_end = ptime + cfg.pwave_window/2 print ("[obtain training windows] Extracting full stream and saving into "+os.path.join(output_dir, cfg.mseed_dir)+"/"+stream_file+"_"+station+".mseed") if plot: customPlot(substream, ptime, os.path.join(output_dir, cfg.png_dir)+"/"+utils.fileNameWithoutExtension(stream_file)+"_"+station+".png") substream.write(os.path.join(output_dir, cfg.mseed_dir)+"/"+utils.fileNameWithoutExtension(stream_file)+"_"+station+".mseed", format="MSEED") #Simplified preprocessing #if cfg.only_one_positive: # one_positive = substream.slice(starttime=ptime - cfg.window_size/2, endtime=ptime + cfg.window_size/2, keep_empty_traces=False, nearest_sample=True) #Slice the input stream vertically, by time sys.stdout.write("[obtain training windows] Extracting positive and negative windows and saving into "+output_dir+":\n") win_gen = substream.slide(window_length=cfg.window_size, step=cfg.window_stride, include_partial_windows=False) total_time = substream[0].stats.endtime - substream[0].stats.starttime max_windows = (total_time - cfg.window_size) / cfg.window_stride num_negatives = 0 num_positives = 0 num_errors = 0 num_skipped = 0 for idx, win in enumerate(win_gen): if utils.check_stream(win, cfg, False): window_start = win[0].stats.starttime.timestamp window_end = win[-1].stats.endtime.timestamp #Event window: [timeP-cfg.pwave_window..timeP+cfg.pwave_window] #Do not use negatives #window_ptime = cat.getPtime(window_start, window_end, station) #FILTER? #win_filt = win.copy() #if cfg.filterfreq >= 0.0: # win.filter('lowpass', freq=cfg.filterfreq, corners=2, zerophase=True) if (window_start <= event_window_start) and (window_end >= event_window_end): #positive: #positive win.write(os.path.join(output_dir, cfg.mseed_event_dir)+"/"+utils.fileNameWithoutExtension(stream_file)+"_"+station+"_"+str(idx)+".mseed", format="MSEED") if plot: win.plot(outfile=os.path.join(output_dir, cfg.png_event_dir)+"/"+utils.fileNameWithoutExtension(stream_file)+"_"+station+"_"+str(idx)+".png") sys.stdout.write("\033[92m.\033[0m") num_positives = num_positives+1 elif (window_end < event_window_start-cfg.window_avoid_negatives_before) or (window_start > event_window_end+cfg.window_avoid_negatives_after):# negative win.write(os.path.join(output_dir, cfg.mseed_noise_dir)+"/"+utils.fileNameWithoutExtension(stream_file)+"_"+station+"_noise"+str(idx)+".mseed", format="MSEED") if plot: win.plot(outfile=os.path.join(output_dir, cfg.png_noise_dir)+"/"+utils.fileNameWithoutExtension(stream_file)+"_"+station+"_noise"+str(idx)+".png") sys.stdout.write("\033[91m.\033[0m") sys.stdout.flush() num_negatives = num_negatives+1 else: # skipped sys.stdout.write(".") num_skipped = num_skipped+1 else: sys.stdout.write("\033[93m.\033[0m") sys.stdout.flush() num_errors = num_errors+1 print("\n[obtain training windows] "+str(num_positives)+" positive windows obtained.") print("[obtain training windows] "+str(num_negatives)+" negative windows obtained.") print("[obtain training windows] "+str(num_skipped)+" windows skipped (neither clear positive nor negative).") print("[obtain training windows] "+str(num_errors)+" windows discarded because of errors (config debug=True for details).") else: print ("[obtain training windows] \033[93m WARNING\033[0m No event detected at station "+station+" from "+stream_path+".")
def main(args): setproctitle.setproctitle('quakenet_predict') if not os.path.exists(cfg.CHECKPOINT_DIR): print ("\033[91m ERROR!!\033[0m Missing directory "+cfg.CHECKPOINT_DIR+". Run step 4 first.") sys.exit(0) ckpt = tf.train.get_checkpoint_state(cfg.CHECKPOINT_DIR) # Remove previous output directory if os.path.exists(cfg.OUTPUT_PREDICT_BASE_DIR): shutil.rmtree(cfg.OUTPUT_PREDICT_BASE_DIR) os.makedirs(cfg.OUTPUT_PREDICT_BASE_DIR) os.makedirs(os.path.join(cfg.OUTPUT_PREDICT_BASE_DIR,"viz")) if cfg.save_sac: os.makedirs(os.path.join(cfg.OUTPUT_PREDICT_BASE_DIR,"sac")) # Load stream stream_path = args.stream_path stream_file = os.path.split(stream_path)[-1] print "+ Loading Stream {}".format(stream_file) stream = read(stream_path) print '+ Preprocessing stream' stream = utils.preprocess_stream(stream) #if args.metadata_path is not None: #This is groundtruth data # print("Reading metadata file "+args.metadata_path) # obspyCatalogMeta = seisobs.seis2cat(args.metadata_path) # # TODO: change and look at all streams # stream_path = args.stream_path # stream_file = os.path.split(stream_path)[-1] # print " + Loading stream {}".format(stream_file) # stream = load_stream(stream_path) # print " + Preprocess stream" # stream = preprocess_stream(stream) # print " -- Stream is ready, starting detection" # Create catalog name in which the events are stored catalog_name = os.path.split(stream_file)[-1].split(".mseed")[0] + ".csv" output_catalog = os.path.join(cfg.OUTPUT_PREDICT_BASE_DIR, catalog_name) print 'Catalog created to store events', output_catalog # Dictonary to store info on detected events events_dic ={"start_time": [], "end_time": [], "cluster_id": [], "clusters_prob": []} total_time_in_sec = stream[0].stats.endtime - stream[0].stats.starttime # stream data with a placeholder samples = { 'data': tf.placeholder(tf.float32, shape=(1, cfg.win_size, 3), name='input_data'), 'cluster_id': tf.placeholder(tf.int64, shape=(1,), name='input_label') } # set up model and validation metrics model = models.get(cfg.model, samples, cfg, cfg.CHECKPOINT_DIR, is_training=False) with tf.Session() as sess: model.load(sess) print 'Predicting using model at step {}'.format( sess.run(model.global_step)) step = tf.train.global_step(sess, model.global_step) n_events = 0 time_start = time.time() try: fromTime = UTCDateTime(2015, 2, 5, 5, 40, 18, 10000) win = stream.slice(fromTime, fromTime + cfg.WINDOW_SIZE).copy() win.plot(outfile=cfg.OUTPUT_PREDICT_BASE_DIR+"/"+stream_file+"_input_window_.png") idx = 0 # Fetch class_proba and label to_fetch = [samples['data'], model.layers['class_prob'], model.layers['class_prediction']] # Feed window and fake cluster_id (needed by the net) but # will be predicted if utils.check_stream(win, cfg): feed_dict = {samples['data']: utils.fetch_window_data(win, cfg), samples['cluster_id']: np.array([0])} sample, class_prob_, cluster_id = sess.run(to_fetch, feed_dict) else: print ("\033[91m ERROR!!\033[0m Incomplete data.") sys.exit(0) # # Keep only clusters proba, remove noise proba clusters_prob = class_prob_[0,1::] cluster_id -= 1 # label for noise = -1, label for cluster \in {0:n_clusters} is_event = cluster_id[0] > -1 if is_event: n_events += 1 # print "event {} ,cluster id {}".format(is_event,class_prob_) if is_event: events_dic["start_time"].append(win[0].stats.starttime) events_dic["end_time"].append(win[0].stats.endtime) events_dic["cluster_id"].append(cluster_id[0]) events_dic["clusters_prob"].append(list(clusters_prob)) if idx % 1000 ==0: print "Analyzing {} records".format(win[0].stats.starttime) if is_event: win_filtered = win.copy() # win_filtered.filter("bandpass",freqmin=4.0, freqmax=16.0) win_filtered.plot(outfile=os.path.join(cfg.OUTPUT_PREDICT_BASE_DIR,"viz", "event_{}_cluster_{}.png".format(idx,cluster_id))) print "found {} events".format(n_events) except KeyboardInterrupt: print 'Interrupted at time {}.'.format(win[0].stats.starttime) print "processed {} windows, found {} events".format(idx+1,n_events) print "Run time: ", time.time() - time_start df = pd.DataFrame.from_dict(events_dic) df.to_csv(output_catalog) customPlot(stream, cfg.OUTPUT_PREDICT_BASE_DIR+"/"+stream_file+".png", events_dic["start_time"]) print "Run time: ", time.time() - time_start
def processMseed(stream_path, cat, output_dir, plot, onlyStation): stream_file = os.path.basename(stream_path) #2. Process .mseed #print("Processing stream "+stream_file) # Load stream print "[obtain training windows] Loading Stream {}".format(stream_file) stream = read(stream_path) print '[obtain training windows] Preprocessing stream' stream = preprocess_stream(stream) stream_start_time = stream[0].stats.starttime stream_end_time = stream[-1].stats.endtime total_time = stream_start_time - stream_end_time z_streams = stream.select(component="Z") #if len(z_streams) > 1: #multiple stations, need to cut for z_stream in z_streams: #Slice the input stream horizontally, for one station station = z_stream.stats.station if onlyStation is not None and onlyStation != station: continue print("[obtain training windows] ---------- Station " + station + " ---------") substream = stream.select(station=station) ptime = cat.getPtime(stream_start_time, stream_end_time, station) if ptime is not None: print("[obtain training windows] Found event in the catalog: " + str(ptime)) event_window_start = ptime - cfg.pwave_window / 2 event_window_end = ptime + cfg.pwave_window / 2 print( "[obtain training windows] Extracting full stream and saving into " + os.path.join(output_dir, cfg.mseed_dir) + "/" + stream_file + "_" + station + ".mseed") if plot: customPlot( substream, ptime, os.path.join(output_dir, cfg.png_dir) + "/" + utils.fileNameWithoutExtension(stream_file) + "_" + station + ".png") substream.write(os.path.join(output_dir, cfg.mseed_dir) + "/" + utils.fileNameWithoutExtension(stream_file) + "_" + station + ".mseed", format="MSEED") #Slice the input stream vertically, by time sys.stdout.write( "[obtain training windows] Extracting positive and negative windows and saving into " + output_dir + ":\n") win_gen = substream.slide(window_length=cfg.window_size, step=cfg.window_step_negatives, include_partial_windows=False) total_time = substream[0].stats.endtime - substream[ 0].stats.starttime max_windows = (total_time - cfg.window_size) / cfg.window_step_negatives num_negatives = 0 num_positives = 0 num_errors = 0 num_skipped = 0 for idx, win in enumerate(win_gen): if utils.check_stream(win, cfg, False): window_start = win[0].stats.starttime.timestamp window_end = win[-1].stats.endtime.timestamp #Event window: [timeP-cfg.pwave_window..timeP+cfg.pwave_window] #Do not use negatives #window_ptime = cat.getPtime(window_start, window_end, station) if (window_start <= event_window_start) and ( window_end >= event_window_end): #positive: #positive win.write( os.path.join(output_dir, cfg.mseed_event_dir) + "/" + utils.fileNameWithoutExtension(stream_file) + "_" + station + "_" + str(idx) + ".mseed", format="MSEED") if plot: win.plot( outfile=os.path.join(output_dir, cfg.png_event_dir) + "/" + utils.fileNameWithoutExtension(stream_file) + "_" + station + "_" + str(idx) + ".png") sys.stdout.write("\033[92m.\033[0m") num_positives = num_positives + 1 elif (window_end < event_window_start - cfg.window_avoid_negatives) or ( window_start > event_window_end + cfg.window_avoid_negatives): # negative win.write( os.path.join(output_dir, cfg.mseed_noise_dir) + "/" + utils.fileNameWithoutExtension(stream_file) + "_" + station + "_noise" + str(idx) + ".mseed", format="MSEED") if plot: win.plot( outfile=os.path.join(output_dir, cfg.png_noise_dir) + "/" + utils.fileNameWithoutExtension(stream_file) + "_" + station + "_noise" + str(idx) + ".png") sys.stdout.write("\033[91m.\033[0m") sys.stdout.flush() num_negatives = num_negatives + 1 else: # skipped sys.stdout.write(".") num_skipped = num_skipped + 1 else: sys.stdout.write("\033[93m.\033[0m") sys.stdout.flush() num_errors = num_errors + 1 print("\n[obtain training windows] " + str(num_positives) + " positive windows obtained.") print("[obtain training windows] " + str(num_negatives) + " negative windows obtained.") print("[obtain training windows] " + str(num_skipped) + " windows skipped (neither clear positive nor negative).") print( "[obtain training windows] " + str(num_errors) + " windows discarded because of errors (config debug=True for details)." ) else: print( "[obtain training windows] \033[93m WARNING\033[0m No event detected at station " + station + " from " + stream_path + ".")
def predict(path, stream_file, sess, model, samples, cat): global truePositives global falsePositives global trueNegatives global falseNegatives global perf_total_predictions global perf_total_time # Load stream stream_path = path+"/"+stream_file #TODO join stream_file = os.path.split(stream_path)[-1] stream_file_without_extension = os.path.split(stream_file)[-1].split(".mseed")[0] print "[classify] Loading Stream {}".format(stream_file) stream = read(stream_path) print '[classify] Preprocessing stream' stream = utils.preprocess_stream(stream) #Select only the specified channels stream_select = utils.select_components(stream, cfg) outputSubdir = os.path.join(output_dir, stream_file_without_extension) if os.path.exists(outputSubdir): shutil.rmtree(outputSubdir) os.makedirs(outputSubdir) outputSubdirSubplots = os.path.join(outputSubdir, "subPlots") os.makedirs(outputSubdirSubplots) os.makedirs(os.path.join(output_dir+"/"+stream_file_without_extension,"viz")) if cfg.save_sac: os.makedirs(os.path.join(output_dir+"/"+stream_file_without_extension,"sac")) #if args.metadata_path is not None: #This is groundtruth data # print("Reading metadata file "+args.metadata_path) # obspyCatalogMeta = seisobs.seis2cat(args.metadata_path) # # TODO: change and look at all streams # stream_path = args.stream_path # stream_file = os.path.split(stream_path)[-1] # print " + Loading stream {}".format(stream_file) # stream = load_stream(stream_path) # print " + Preprocess stream" # stream = preprocess_stream(stream) # print " -- Stream is ready, starting detection" # Create catalog name in which the events are stored catalog_name = os.path.split(stream_file)[-1].split(".mseed")[0] + ".csv" output_catalog = os.path.join(output_dir, catalog_name) print '[classify] Catalog created to store events', output_catalog # Dictonary to store info on detected events events_dic ={"start_time": [], "end_time": [], "cluster_id": [], "clusters_prob": []} true_positive_dic ={"start_time": [], "end_time": [], "cluster_id": [], "clusters_prob": []} false_positive_dic ={"start_time": [], "end_time": [], "cluster_id": [], "clusters_prob": []} false_negative_dic ={"start_time": [], "end_time": [], "cluster_id": [], "clusters_prob": []} missed_dic ={"start_time": [], "end_time": [], "cluster_id": [], "clusters_prob": []} # Windows generator win_gen = stream_select.slide(window_length=cfg.window_size, step=cfg.window_step_predict, include_partial_windows=False) total_time_in_sec = stream_select[0].stats.endtime - stream_select[0].stats.starttime max_windows = (total_time_in_sec - cfg.window_size) / cfg.window_step_predict step = tf.train.global_step(sess, model.global_step) n_events = 0 time_start = time.time() #if stations is not None: # station = stream_select[-1].stats.station # stationLAT, stationLONG, stationDEPTH = utils.station_coordinates(station, stations) station = stream[0].stats.station stream_start_time = stream[0].stats.starttime stream_end_time = stream[-1].stats.endtime ptime = cat.getPtime(stream_start_time, stream_end_time, station) event_window_start = ptime - cfg.pwave_window/2 event_window_end = ptime + cfg.pwave_window/2 print("ptime="+str(ptime)) print("event_window_start="+str(event_window_start)+"<->event_window_end="+str(event_window_end)) try: for idx, win in enumerate(win_gen): #Check the groundtruth isPositive = False if cat is not None: #print("win[0].stats.starttime ="+str(win[0].stats.starttime)) #print("win[0].stats.endtime ="+str(win[0].stats.endtime)) #print("cat.start_time[0] ="+str(cat.start_time[0])) #print("cat.end_time[0] ="+str(cat.end_time[0])) window_start = win[0].stats.starttime window_end = win[-1].stats.endtime print("window: "+str(window_start)+"-"+str(window_end)) if (window_start <= event_window_start) and (window_end >= event_window_end): #positive isPositive = True print("REAL POSITIVE") else:# negative isPositive = False print("REAL NEGATIVE") #if stations is not None: # isPositive = utils.isPositive(window_start, window_end, cat, stationLAT, stationLONG, stationDEPTH, cfg.mean_velocity) #else: # isPositive = utils.isPositive(window_start, window_end, cat) #Event window: [timeP-cfg.pwave_window..timeP+cfg.pwave_window] #Do not use negatives #for i in range(0, len(cat.start_time)): # if (UTCDateTime(cat.start_time[i]) >= UTCDateTime(win[0].stats.starttime)) and (UTCDateTime(cat.end_time[i]) <= UTCDateTime(win[0].stats.endtime)):# and (cat.end_time[0] <= win[0].stats.endtime): # isPositive = True # #print("\033[92m isPositive = True\033[0m") # else: # isPositive = False # Fetch class_proba and label to_fetch = [samples['data'], model.layers['class_prob'], model.layers['class_prediction']] # Feed window and fake cluster_id (needed by the net) but # will be predicted if utils.check_stream(win, cfg): feed_dict = {samples['data']: utils.fetch_window_data(win, cfg), samples['cluster_id']: np.array([0])} perf_start_time = time.time() sample, class_prob_, cluster_id = sess.run(to_fetch, feed_dict) perf_end_time = time.time() perf_elapsed_time = perf_end_time - perf_start_time perf_total_time = perf_total_time + perf_elapsed_time perf_total_predictions = perf_total_predictions + 1 print("Prediction time = "+str(perf_elapsed_time)) print("Average time per prediction in secs = "+str(perf_total_time/perf_total_predictions)) else: missed_dic["start_time"].append(win[0].stats.starttime) continue # # Keep only clusters proba, remove noise proba clusters_prob = class_prob_[0,1::] cluster_id -= 1 # label for noise = -1, label for cluster \in {0:n_clusters} is_event = cluster_id[0] > -1 if is_event: print("PREDICTED POSITIVE") n_events += 1 else: print("PREDICTED NEGATIVE") # print "event {} ,cluster id {}".format(is_event,class_prob_) if is_event: events_dic["start_time"].append(win[0].stats.starttime) events_dic["end_time"].append(win[0].stats.endtime) events_dic["cluster_id"].append(cluster_id[0]) events_dic["clusters_prob"].append(list(clusters_prob)) if evaluation and isPositive: #sys.stdout.write("\033[92m HIT\033[0m (positive)\n") sys.stdout.write("\033[92mP\033[0m") sys.stdout.flush() truePositives = truePositives+1 true_positive_dic["start_time"].append(win[0].stats.starttime) true_positive_dic["end_time"].append(win[0].stats.endtime) true_positive_dic["cluster_id"].append(cluster_id[0]) true_positive_dic["clusters_prob"].append(list(clusters_prob)) #break elif evaluation: #sys.stdout.write("\033[91m MISS\033[0m (false positive)\n") sys.stdout.write("\033[91mP\033[0m") sys.stdout.flush() falsePositives = falsePositives+1 false_positive_dic["start_time"].append(win[0].stats.starttime) false_positive_dic["end_time"].append(win[0].stats.endtime) false_positive_dic["cluster_id"].append(cluster_id[0]) false_positive_dic["clusters_prob"].append(list(clusters_prob)) else: if evaluation and isPositive: #sys.stdout.write("\033[91m MISS\033[0m (false negative)\n") sys.stdout.write("\033[91mN\033[0m") sys.stdout.flush() falseNegatives = falseNegatives+1 false_negative_dic["start_time"].append(win[0].stats.starttime) false_negative_dic["end_time"].append(win[0].stats.endtime) false_negative_dic["cluster_id"].append(cluster_id[0]) false_negative_dic["clusters_prob"].append(list(clusters_prob)) elif evaluation: #sys.stdout.write("\033[92m HIT\033[0m (negative)\n") sys.stdout.write("\033[92mN\033[0m") sys.stdout.flush() trueNegatives = trueNegatives+1 #if idx % 1000 ==0: # print "\n[classify] Analyzing {} records".format(win[0].stats.starttime) if is_event: win_filtered = win.copy() #win_filtered.plot(outfile=os.path.join(output_dir+"/"+stream_file_without_extension,"viz", # "event_{}_cluster_{}.png".format(idx,cluster_id))) if cfg.save_sac and is_event: win_filtered = win.copy() win_filtered.write(os.path.join(output_dir,"sac", "event_{}_cluster_{}.sac".format(idx,cluster_id)), format="SAC") if idx >= max_windows: print "[classify] stopped after {} windows".format(max_windows) print "[classify] found {} events".format(n_events) break except KeyboardInterrupt: print '[classify] Interrupted at time {}.'.format(win[0].stats.starttime) print "[classify] processed {} windows, found {} events".format(idx+1,n_events) print "[classify] Run time: ", time.time() - time_start df = pd.DataFrame.from_dict(events_dic) df.to_csv(output_catalog) #Plot everything customPlot(stream, output_dir+"/"+stream_file+"_"+str(idx)+".png", events_dic["start_time"], missed_dic["start_time"], true_positive_dic["start_time"], false_positive_dic["start_time"], false_negative_dic["start_time"]) #Plot only 10min sections with events #max_secs_to_show = 600 #win_gen = stream_select.slide(window_length=max_secs_to_show, # step=max_secs_to_show, # include_partial_windows=False) #for idx, win in enumerate(win_gen): # customPlot(win, outputSubdirSubplots+"/win_"+str(idx)+".png", events_dic["start_time"], missed_dic["start_time"]) #win = substream.slice(UTCDateTime(timeP), UTCDateTime(timeP) + cfg.window_size).copy() print "\n[classify] Run time: ", time.time() - time_start return events_dic
def write(stream_files, subfolder): if not os.path.exists(os.path.join(output_dir, subfolder)): os.makedirs(os.path.join(output_dir, subfolder)) if not os.path.exists(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)): os.makedirs(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)) # Write event waveforms and cluster_id in .tfrecords #output_name = "positives.tfrecords" output_name = args.file_name output_path = os.path.join(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name) writer = DataWriter(output_path) for stream_file in stream_files: stream_path = os.path.join(os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file) #print "[tfrecords positives] Loading Stream {}".format(stream_file) st_event = read(stream_path) #print '[tfrecords positives] Preprocessing stream' #Filtrem if cfg.filterfreq: st_event = utils.filter_stream(st_event) #Select only the specified channels st_event_select = utils.select_components(st_event, cfg) #LOCATION CLUSTERS lat = 0 lon = 0 depth = 0 cluster_id = 0 #We work with only one location for the moment (cluster id = 0) if cat is not None: stream_start_time = st_event[0].stats.starttime stream_end_time = st_event[-1].stats.endtime station = st_event[0].stats.station lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station) c = clusters.nearest_cluster(lat, lon, depth) if c is not None: #can be None in case of polygons-based clustering cluster_id = c.id else: cluster_id = -1 #signaling that the earthquake has to be discarded print("[tfrecords positives] Assigning cluster "+str(cluster_id)+" to event (lat = "+str(lat)+", lon = "+str(lon)+").") #cluster_id = filtered_catalog.cluster_id.values[event_n] if cluster_id >= 0: #no clustering or a valid cluster n_traces = len(st_event_select) if utils.check_stream(st_event_select, cfg): #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces)) # Write tfrecords #DEBUG: STA_LTA #df = st_event_select[0].stats.sampling_rate #cft = classic_sta_lta(st_event_select[0], int(5 * df), int(10 * df)) #for trig in cft: # if trig != .0: # print(trig) writer.write(st_event_select, cluster_id) else: print ("[tfrecords positives] \033[91m WARNING!!\033[0m Discarding point as no cluster found for the given lat="+str(lat)+", lon="+str(lon)+", depth="+str(depth)) # Cleanup writer print("[tfrecords positives] Number of windows written={}".format(writer._written)) writer.close()