def write(stream_files, subfolder): if not os.path.exists(os.path.join(output_dir, subfolder)): os.makedirs(os.path.join(output_dir, subfolder)) if not os.path.exists( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)): os.makedirs( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)) # Write event waveforms and cluster_id in .tfrecords #output_name = "positives.tfrecords" output_name = args.file_name output_path = os.path.join( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name) writer = DataWriter(output_path) for stream_file in stream_files: stream_path = os.path.join( os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file) #print "[tfrecords positives] Loading Stream {}".format(stream_file) st_event = read(stream_path) #print '[tfrecords positives] Preprocessing stream' st_event = preprocess_stream(st_event) #Select only the specified channels st_event_select = utils.select_components(st_event, cfg) #LOCATION CLUSTERS cluster_id = 0 #We work with only one location for the moment (cluster id = 0) if cat is not None: stream_start_time = st_event[0].stats.starttime stream_end_time = st_event[-1].stats.endtime station = st_event[0].stats.station lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station) c = clusters.nearest_cluster(lat, lon, depth) cluster_id = c.id print("[tfrecords positives] Assigning cluster " + str(cluster_id) + " to event.") #cluster_id = filtered_catalog.cluster_id.values[event_n] n_traces = len(st_event_select) if utils.check_stream(st_event_select, cfg): #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces)) # Write tfrecords writer.write(st_event_select, cluster_id) # Cleanup writer print("[tfrecords positives] Number of windows written={}".format( writer._written)) writer.close()
def write(stream_files, subfolder): if not os.path.exists(os.path.join(output_dir, subfolder)): os.makedirs(os.path.join(output_dir, subfolder)) if not os.path.exists( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_negatives)): os.makedirs( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_negatives)) # Write event waveforms and cluster_id in .tfrecords output_name = output_name = args.file_name output_path = os.path.join( os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_negatives), output_name) writer = DataWriter(output_path) for stream_file in stream_files: stream_path = os.path.join( os.path.join(dataset_dir, cfg.mseed_noise_dir), stream_file) #print "[tfrecords negatives] Loading Stream {}".format(stream_file) st_event = read(stream_path) #print '[tfrecords negatives] Preprocessing stream' #filtrem if cfg.filterfreq: st_event = utils.filter_stream(st_event) #Select only the specified channels st_event_select = utils.select_components(st_event, cfg) #cluster_id = filtered_catalog.cluster_id.values[event_n] cluster_id = -1 #We work with only one location for the moment (cluster id = 0) n_traces = len(st_event_select) if utils.check_stream(st_event_select, cfg): #print("[tfrecords negatives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces)) # Write tfrecords writer.write(st_event_select, cluster_id) # Cleanup writer print("[tfrecords negatives] Number of windows written={}".format( writer._written)) writer.close()
def predict(path, stream_file, sess, model, samples, cat): global truePositives global falsePositives global trueNegatives global falseNegatives global perf_total_predictions global perf_total_time # Load stream stream_path = path+"/"+stream_file #TODO join stream_file = os.path.split(stream_path)[-1] stream_file_without_extension = os.path.split(stream_file)[-1].split(".mseed")[0] print "[classify] Loading Stream {}".format(stream_file) stream = read(stream_path) print '[classify] Preprocessing stream' stream = utils.preprocess_stream(stream) #Select only the specified channels stream_select = utils.select_components(stream, cfg) outputSubdir = os.path.join(output_dir, stream_file_without_extension) if os.path.exists(outputSubdir): shutil.rmtree(outputSubdir) os.makedirs(outputSubdir) outputSubdirSubplots = os.path.join(outputSubdir, "subPlots") os.makedirs(outputSubdirSubplots) os.makedirs(os.path.join(output_dir+"/"+stream_file_without_extension,"viz")) if cfg.save_sac: os.makedirs(os.path.join(output_dir+"/"+stream_file_without_extension,"sac")) #if args.metadata_path is not None: #This is groundtruth data # print("Reading metadata file "+args.metadata_path) # obspyCatalogMeta = seisobs.seis2cat(args.metadata_path) # # TODO: change and look at all streams # stream_path = args.stream_path # stream_file = os.path.split(stream_path)[-1] # print " + Loading stream {}".format(stream_file) # stream = load_stream(stream_path) # print " + Preprocess stream" # stream = preprocess_stream(stream) # print " -- Stream is ready, starting detection" # Create catalog name in which the events are stored catalog_name = os.path.split(stream_file)[-1].split(".mseed")[0] + ".csv" output_catalog = os.path.join(output_dir, catalog_name) print '[classify] Catalog created to store events', output_catalog # Dictonary to store info on detected events events_dic ={"start_time": [], "end_time": [], "cluster_id": [], "clusters_prob": []} true_positive_dic ={"start_time": [], "end_time": [], "cluster_id": [], "clusters_prob": []} false_positive_dic ={"start_time": [], "end_time": [], "cluster_id": [], "clusters_prob": []} false_negative_dic ={"start_time": [], "end_time": [], "cluster_id": [], "clusters_prob": []} missed_dic ={"start_time": [], "end_time": [], "cluster_id": [], "clusters_prob": []} # Windows generator win_gen = stream_select.slide(window_length=cfg.window_size, step=cfg.window_step_predict, include_partial_windows=False) total_time_in_sec = stream_select[0].stats.endtime - stream_select[0].stats.starttime max_windows = (total_time_in_sec - cfg.window_size) / cfg.window_step_predict step = tf.train.global_step(sess, model.global_step) n_events = 0 time_start = time.time() #if stations is not None: # station = stream_select[-1].stats.station # stationLAT, stationLONG, stationDEPTH = utils.station_coordinates(station, stations) station = stream[0].stats.station stream_start_time = stream[0].stats.starttime stream_end_time = stream[-1].stats.endtime ptime = cat.getPtime(stream_start_time, stream_end_time, station) event_window_start = ptime - cfg.pwave_window/2 event_window_end = ptime + cfg.pwave_window/2 print("ptime="+str(ptime)) print("event_window_start="+str(event_window_start)+"<->event_window_end="+str(event_window_end)) try: for idx, win in enumerate(win_gen): #Check the groundtruth isPositive = False if cat is not None: #print("win[0].stats.starttime ="+str(win[0].stats.starttime)) #print("win[0].stats.endtime ="+str(win[0].stats.endtime)) #print("cat.start_time[0] ="+str(cat.start_time[0])) #print("cat.end_time[0] ="+str(cat.end_time[0])) window_start = win[0].stats.starttime window_end = win[-1].stats.endtime print("window: "+str(window_start)+"-"+str(window_end)) if (window_start <= event_window_start) and (window_end >= event_window_end): #positive isPositive = True print("REAL POSITIVE") else:# negative isPositive = False print("REAL NEGATIVE") #if stations is not None: # isPositive = utils.isPositive(window_start, window_end, cat, stationLAT, stationLONG, stationDEPTH, cfg.mean_velocity) #else: # isPositive = utils.isPositive(window_start, window_end, cat) #Event window: [timeP-cfg.pwave_window..timeP+cfg.pwave_window] #Do not use negatives #for i in range(0, len(cat.start_time)): # if (UTCDateTime(cat.start_time[i]) >= UTCDateTime(win[0].stats.starttime)) and (UTCDateTime(cat.end_time[i]) <= UTCDateTime(win[0].stats.endtime)):# and (cat.end_time[0] <= win[0].stats.endtime): # isPositive = True # #print("\033[92m isPositive = True\033[0m") # else: # isPositive = False # Fetch class_proba and label to_fetch = [samples['data'], model.layers['class_prob'], model.layers['class_prediction']] # Feed window and fake cluster_id (needed by the net) but # will be predicted if utils.check_stream(win, cfg): feed_dict = {samples['data']: utils.fetch_window_data(win, cfg), samples['cluster_id']: np.array([0])} perf_start_time = time.time() sample, class_prob_, cluster_id = sess.run(to_fetch, feed_dict) perf_end_time = time.time() perf_elapsed_time = perf_end_time - perf_start_time perf_total_time = perf_total_time + perf_elapsed_time perf_total_predictions = perf_total_predictions + 1 print("Prediction time = "+str(perf_elapsed_time)) print("Average time per prediction in secs = "+str(perf_total_time/perf_total_predictions)) else: missed_dic["start_time"].append(win[0].stats.starttime) continue # # Keep only clusters proba, remove noise proba clusters_prob = class_prob_[0,1::] cluster_id -= 1 # label for noise = -1, label for cluster \in {0:n_clusters} is_event = cluster_id[0] > -1 if is_event: print("PREDICTED POSITIVE") n_events += 1 else: print("PREDICTED NEGATIVE") # print "event {} ,cluster id {}".format(is_event,class_prob_) if is_event: events_dic["start_time"].append(win[0].stats.starttime) events_dic["end_time"].append(win[0].stats.endtime) events_dic["cluster_id"].append(cluster_id[0]) events_dic["clusters_prob"].append(list(clusters_prob)) if evaluation and isPositive: #sys.stdout.write("\033[92m HIT\033[0m (positive)\n") sys.stdout.write("\033[92mP\033[0m") sys.stdout.flush() truePositives = truePositives+1 true_positive_dic["start_time"].append(win[0].stats.starttime) true_positive_dic["end_time"].append(win[0].stats.endtime) true_positive_dic["cluster_id"].append(cluster_id[0]) true_positive_dic["clusters_prob"].append(list(clusters_prob)) #break elif evaluation: #sys.stdout.write("\033[91m MISS\033[0m (false positive)\n") sys.stdout.write("\033[91mP\033[0m") sys.stdout.flush() falsePositives = falsePositives+1 false_positive_dic["start_time"].append(win[0].stats.starttime) false_positive_dic["end_time"].append(win[0].stats.endtime) false_positive_dic["cluster_id"].append(cluster_id[0]) false_positive_dic["clusters_prob"].append(list(clusters_prob)) else: if evaluation and isPositive: #sys.stdout.write("\033[91m MISS\033[0m (false negative)\n") sys.stdout.write("\033[91mN\033[0m") sys.stdout.flush() falseNegatives = falseNegatives+1 false_negative_dic["start_time"].append(win[0].stats.starttime) false_negative_dic["end_time"].append(win[0].stats.endtime) false_negative_dic["cluster_id"].append(cluster_id[0]) false_negative_dic["clusters_prob"].append(list(clusters_prob)) elif evaluation: #sys.stdout.write("\033[92m HIT\033[0m (negative)\n") sys.stdout.write("\033[92mN\033[0m") sys.stdout.flush() trueNegatives = trueNegatives+1 #if idx % 1000 ==0: # print "\n[classify] Analyzing {} records".format(win[0].stats.starttime) if is_event: win_filtered = win.copy() #win_filtered.plot(outfile=os.path.join(output_dir+"/"+stream_file_without_extension,"viz", # "event_{}_cluster_{}.png".format(idx,cluster_id))) if cfg.save_sac and is_event: win_filtered = win.copy() win_filtered.write(os.path.join(output_dir,"sac", "event_{}_cluster_{}.sac".format(idx,cluster_id)), format="SAC") if idx >= max_windows: print "[classify] stopped after {} windows".format(max_windows) print "[classify] found {} events".format(n_events) break except KeyboardInterrupt: print '[classify] Interrupted at time {}.'.format(win[0].stats.starttime) print "[classify] processed {} windows, found {} events".format(idx+1,n_events) print "[classify] Run time: ", time.time() - time_start df = pd.DataFrame.from_dict(events_dic) df.to_csv(output_catalog) #Plot everything customPlot(stream, output_dir+"/"+stream_file+"_"+str(idx)+".png", events_dic["start_time"], missed_dic["start_time"], true_positive_dic["start_time"], false_positive_dic["start_time"], false_negative_dic["start_time"]) #Plot only 10min sections with events #max_secs_to_show = 600 #win_gen = stream_select.slide(window_length=max_secs_to_show, # step=max_secs_to_show, # include_partial_windows=False) #for idx, win in enumerate(win_gen): # customPlot(win, outputSubdirSubplots+"/win_"+str(idx)+".png", events_dic["start_time"], missed_dic["start_time"]) #win = substream.slice(UTCDateTime(timeP), UTCDateTime(timeP) + cfg.window_size).copy() print "\n[classify] Run time: ", time.time() - time_start return events_dic
def write(stream_files, subfolder): if not os.path.exists(os.path.join(output_dir, subfolder)): os.makedirs(os.path.join(output_dir, subfolder)) if not os.path.exists(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)): os.makedirs(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)) # Write event waveforms and cluster_id in .tfrecords #output_name = "positives.tfrecords" output_name = args.file_name output_path = os.path.join(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name) writer = DataWriter(output_path) for stream_file in stream_files: stream_path = os.path.join(os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file) #print "[tfrecords positives] Loading Stream {}".format(stream_file) st_event = read(stream_path) #print '[tfrecords positives] Preprocessing stream' #Filtrem if cfg.filterfreq: st_event = utils.filter_stream(st_event) #Select only the specified channels st_event_select = utils.select_components(st_event, cfg) #LOCATION CLUSTERS lat = 0 lon = 0 depth = 0 cluster_id = 0 #We work with only one location for the moment (cluster id = 0) if cat is not None: stream_start_time = st_event[0].stats.starttime stream_end_time = st_event[-1].stats.endtime station = st_event[0].stats.station lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station) c = clusters.nearest_cluster(lat, lon, depth) if c is not None: #can be None in case of polygons-based clustering cluster_id = c.id else: cluster_id = -1 #signaling that the earthquake has to be discarded print("[tfrecords positives] Assigning cluster "+str(cluster_id)+" to event (lat = "+str(lat)+", lon = "+str(lon)+").") #cluster_id = filtered_catalog.cluster_id.values[event_n] if cluster_id >= 0: #no clustering or a valid cluster n_traces = len(st_event_select) if utils.check_stream(st_event_select, cfg): #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces)) # Write tfrecords #DEBUG: STA_LTA #df = st_event_select[0].stats.sampling_rate #cft = classic_sta_lta(st_event_select[0], int(5 * df), int(10 * df)) #for trig in cft: # if trig != .0: # print(trig) writer.write(st_event_select, cluster_id) else: print ("[tfrecords positives] \033[91m WARNING!!\033[0m Discarding point as no cluster found for the given lat="+str(lat)+", lon="+str(lon)+", depth="+str(depth)) # Cleanup writer print("[tfrecords positives] Number of windows written={}".format(writer._written)) writer.close()