def score(self, detections, catalog_path, lag_allowed=1.0): """ Calculate the number of False and Missed detections Parameters: detections: list. List of timestamps of detected events catalog_path: csv file. Path to the catalog of events and their timestamps lag_allowed: float. time lag between a cataloged and detected event to be considered as a true detection Returns: false_pos: int. Number of false detections false_neg: int. Number of missed detections """ catalog = load_catalog(catalog_path) events = catalog.utc_timestamp detection_times = [detection.detect_time.timestamp for detection in detections] detection_results = [False] * len(detection_times) for d in xrange(len(detection_times)): detected_event = detection_times[d] for event_time in events: if np.abs(detected_event - event_time) <= lag_allowed: detection_results[d] = True if len(detection_times)>0: false_pos = (~np.array(detection_results)).sum() false_neg = len(events) - sum(detection_results) return false_pos, false_neg else: return 0, len(events)
def main(_): if not os.path.exists(FLAGS.output): os.makedirs(FLAGS.output) # Load Catalog cat_path = FLAGS.catalog cat = load_catalog(cat_path) cat = filter_catalog(cat) # Load stream stream_path = FLAGS.stream print " + Loading stream" stream = read(stream_path) if FLAGS.with_preprocessing: print " + Preprocessing stream" stream = preprocess_stream(stream) # Filter catalog according to the loaded stream start_date = stream[0].stats.starttime end_date = stream[-1].stats.endtime print(" + Loaded Stream with Start Date={} and End Date={}".format(start_date, end_date)) filtered_catalog = cat[ ((cat.utc_timestamp >= start_date) & (cat.utc_timestamp < end_date))] travel_time = get_travel_time(filtered_catalog) print(" + Plotting {} events".format(filtered_catalog.shape[0])) for event_n in range(filtered_catalog.shape[0]): event_time = filtered_catalog.utc_timestamp.values[event_n] + travel_time[event_n] cluster_id= filtered_catalog.cluster_id.values[event_n] st_event = stream.slice( UTCDateTime(event_time), UTCDateTime(event_time) + FLAGS.window_size).copy() if len(st_event)==3: trace = st_event[0] name_png = "event_{}_clusterid_{}.png".format(event_n,cluster_id) trace.plot(outfile=os.path.join(FLAGS.output,name_png)) else: print st_event
def main(args): # Remove previous output directory output_viz = os.path.join(args.output, "viz") output_sac = os.path.join(args.output, "sac") if args.plot: if os.path.exists(output_viz): shutil.rmtree(output_viz) os.makedirs(output_viz) if args.save_sac: if os.path.exists(output_sac): shutil.rmtree(output_sac) os.makedirs(output_sac) # Read stream print "+ Loading stream" st = load_stream(args.stream_path) # Read catalog print "+ Loading catalog" cat = load_catalog(args.catalog_path) # Look events in catalog and plot windows print "+ Creating windows with detected events from ConvNetQuake" for event in tqdm(range(cat.shape[0]), total=cat.shape[0], unit="events", leave=False): win_start = UTCDateTime(cat.iloc[event].start_time) win_end = UTCDateTime(cat.iloc[event].end_time) win = st.slice(win_start, win_end).copy() if args.plot: win.plot( outfile=os.path.join(output_viz, "event_{}.png".format(event))) if args.save_sac: for tr in win: if isinstance(tr.data, np.ma.masked_array): tr.data = tr.data.filled() win.write(os.path.join(output_sac, "event_{}_.sac".format(event)), format="SAC")
def load_catalog(self, catalog_path): self.catalog = data_io.load_catalog(catalog_path) self.statusBar.showMessage('Loaded catalog {}.'.format( os.path.split(catalog_path)[-1]))
def main(_): # Create dir to store tfrecords if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) # Load stream stream_path = FLAGS.stream_path print stream_path stream_file = os.path.split(stream_path)[-1] print "+ Loading Stream {}".format(stream_file) stream = read(stream_path) print '+ Preprocessing stream' stream = preprocess_stream(stream) #stream.resample(10.0) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(FLAGS.output_dir,"metadata.json") # Load Catalog print "+ Loading Catalog" cat = load_catalog(FLAGS.catalog) starttime = stream[0].stats.starttime.timestamp endtime = stream[-1].stats.endtime.timestamp print "startime", UTCDateTime(starttime) print "endtime", UTCDateTime(endtime) #print stream[0].stats #m2 = re.search(cat.stname.values[:], stream_file.split(".")[1]) #print m2.group() cat = filter_catalog(cat, starttime, endtime) #cat = cat[(cat.stname == str(stream_file.split(".")[1]))] # cat = cat[(cat.stname == str(stream_file.split(".")[1])) or # (cat.stname == str(stream_file.split(".")[1][:-1]))] #print cat print "First event in filtered catalog", cat.Date.values[0], cat.Time.values[0] print "Last event in filtered catalog", cat.Date.values[-1], cat.Time.values[-1] cat_event_times = cat.utc_timestamp.values # Write event waveforms and cluster_id=-1 in .tfrecords n_tfrecords = 0 output_name = "noise_" + stream_file.split(".mseed")[0] + \ "_" + str(n_tfrecords) + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) # Create window generator win_gen = stream.slide(window_length=FLAGS.window_size, step=FLAGS.window_step, include_partial_windows=False) #Create window generator and shuffle the order,2017/12/4 #win_gen = [tr for tr in stream.slide(window_length=FLAGS.window_size, # step=FLAGS.window_step, # include_partial_windows=False)] #random.shuffle(win_gen) if FLAGS.max_windows is None: total_time = stream[0].stats.endtime - stream[0].stats.starttime max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step else: max_windows = FLAGS.max_windows # Create adjacent windows in the stream. Check there is no event inside # using the catalog and then write in a tfrecords with label=-1 n_tfrecords = 0 for idx, win in enumerate(win_gen): # If there is not trace skip this waveform n_traces = len(win) if n_traces == 0: continue # Check trace is complete if len(win)==3: n_samples = min(len(win[0].data),len(win[1].data)) n_samples = min(n_samples, len(win[2].data)) #to get rid of super small amplitude,2017/12/6 ampl_e,ampl_n,ampl_z=filter_small_ampitude(win,n_samples) if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3: continue #a = remove_repeat(win, n_samples) #if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3: # continue else: n_sample = 10 ampl_e = max(abs(win[0:-1].data)) if ampl_e < 1e-10: continue n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1 # Check if there is an event in the window window_start = win[0].stats.starttime.timestamp window_end = win[-1].stats.endtime.timestamp ##add window extend to broaden the window,so that more events can be avoid,2017/12/07 window_start_extend = window_start - FLAGS.window_step window_end_extend = window_end + FLAGS.window_step after_start = cat_event_times > window_start_extend before_end = cat_event_times < window_end_extend #print window_start_extend,window_end_extend try: cat_idx = np.where(after_start == before_end)[0][0] event_time = cat_event_times[cat_idx] is_event = True assert window_start_extend < cat.utc_timestamp.values[cat_idx] assert window_end_extend > cat.utc_timestamp.values[cat_idx] print "avoiding event {}, {}".format(cat.Date.values[cat_idx], cat.Time.values[cat_idx]) except IndexError: # there is no event is_event = False if (len(win)==3) and (n_pts == n_samples): # Write tfrecords #writer.write(win.normalize(), -1) writer.write(win.copy().normalize(),-1) #writer.write(win.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(), # -1) # Plot events if FLAGS.plot: import matplotlib matplotlib.use('Agg') #trace = win[0].filter('bandpass', freqmin=0.5, freqmax=20) trace = win[0] viz_dir = os.path.join( FLAGS.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) # trace.resample(10).plot(outfile=os.path.join(viz_dir, # "noise_{}.png".format(str(window_start)))) trace.plot(outfile=os.path.join(viz_dir, ####changed at 2017/11/25,use max cluster_prob instead of cluster_id # "event_{}_cluster_{}.png".format(idx,cluster_id))) "noise_{}_{}.png".format(win[0].stats.station, str(win[0].stats.starttime).replace( ':', '_')))) if idx % 1000 ==0 and idx != 0: print "{} windows created".format(idx) # Save num windows created in metadata metadata[output_name] = writer._written print "creating a new tfrecords" n_tfrecords +=1 output_name = "noise_" + stream_file.split(".mseed")[0] + \ "_" + str(n_tfrecords) + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) if idx == max_windows: break # Cleanup writer print("Number of windows written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata)
def main(_): # Create dir to store tfrecords if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) # Load stream stream_path = FLAGS.stream_path stream_file = os.path.split(stream_path)[-1] print("+ Loading Stream {}".format(stream_file)) stream = read(stream_path) print('+ Preprocessing stream') stream = preprocess_stream(stream) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(FLAGS.output_dir, "metadata.json") # Load Catalog print("+ Loading Catalog") cat = load_catalog(FLAGS.catalog) starttime = stream[0].stats.starttime.timestamp endtime = stream[-1].stats.endtime.timestamp print("startime", UTCDateTime(starttime)) print("endtime", UTCDateTime(endtime)) cat = filter_catalog(cat, starttime, endtime) print("First event in filtered catalog", cat.Date.values[0], cat.Time.values[0]) print("Last event in filtered catalog", cat.Date.values[-1], cat.Time.values[-1]) cat_event_times = cat.utc_timestamp.values # Write event waveforms and cluster_id=-1 in .tfrecords n_tfrecords = 0 output_name = "noise_" + stream_file.split(".mseed")[0] + \ "_" + str(n_tfrecords) + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) # Create window generator win_gen = stream.slide(window_length=FLAGS.window_size, step=FLAGS.window_step, include_partial_windows=False) if FLAGS.max_windows is None: total_time = stream[0].stats.endtime - stream[0].stats.starttime max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step else: max_windows = FLAGS.max_windows # Create adjacent windows in the stream. Check there is no event inside # using the catalog and then write in a tfrecords with label=-1 n_tfrecords = 0 for idx, win in enumerate(win_gen): # If there is not trace skip this waveform n_traces = len(win) if n_traces == 0: continue # Check trace is complete if len(win) == 3: n_samples = min(len(win[0].data), len(win[1].data)) n_samples = min(n_samples, len(win[2].data)) else: n_sample = 10 n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1 # Check if there is an event in the window window_start = win[0].stats.starttime.timestamp window_end = win[-1].stats.endtime.timestamp after_start = cat_event_times > window_start before_end = cat_event_times < window_end try: cat_idx = np.where(after_start == before_end)[0][0] event_time = cat_event_times[cat_idx] is_event = True assert window_start < cat.utc_timestamp.values[cat_idx] assert window_end > cat.utc_timestamp.values[cat_idx] print("avoiding event {}, {}".format(cat.Date.values[cat_idx], cat.Time.values[cat_idx])) except IndexError: # there is no event is_event = False if (len(win) == 3) and (n_pts == n_samples): # Write tfrecords writer.write(win, -1) # Plot events if FLAGS.plot: trace = win[0] viz_dir = os.path.join(FLAGS.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) trace.plot(outfile=os.path.join( viz_dir, "noise_{}.png".format(idx))) if idx % 1000 == 0 and idx != 0: print("{} windows created".format(idx)) # Save num windows created in metadata metadata[output_name] = writer._written print("creating a new tfrecords") n_tfrecords += 1 output_name = "noise_" + stream_file.split(".mseed")[0] + \ "_" + str(n_tfrecords) + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) if idx == max_windows: break # Cleanup writer print("Number of windows written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata)
def main(_): stream_files = [ file for file in os.listdir(FLAGS.stream_dir) if fnmatch.fnmatch(file, '*') ] print "List of streams to anlayze", stream_files # Create dir to store tfrecords if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(FLAGS.output_dir, "metadata.json") # Load Catalog print "+ Loading Catalog" cat = load_catalog(FLAGS.catalog) cat = filter_catalog(cat) for stream_file in stream_files: # Load stream stream_path = os.path.join(FLAGS.stream_dir, stream_file) print "+ Loading Stream {}".format(stream_file) stream = read(stream_path) print '+ Preprocessing stream' stream = preprocess_stream(stream) # Filter catalog according to the loaded stream start_date = stream[0].stats.starttime end_date = stream[-1].stats.endtime print("-- Start Date={}, End Date={}".format(start_date, end_date)) filtered_catalog = cat[((cat.utc_timestamp >= start_date) & (cat.utc_timestamp < end_date))] # Propagation time from source to station travel_time = get_travel_time(filtered_catalog) # Write event waveforms and cluster_id in .tfrecords output_name = stream_file.split(".mseed")[0] + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) print("+ Creating tfrecords for {} events".format( filtered_catalog.shape[0])) # Loop over all events in the considered stream for event_n in range(filtered_catalog.shape[0]): event_time = filtered_catalog.utc_timestamp.values[event_n] event_time += travel_time[event_n] st_event = stream.slice( UTCDateTime(event_time), UTCDateTime(event_time) + FLAGS.window_size).copy() cluster_id = filtered_catalog.cluster_id.values[event_n] n_traces = len(st_event) # If there is not trace skip this waveform if n_traces == 0: continue n_samples = len(st_event[0].data) n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1 if (len(st_event) == 3) and (n_pts == n_samples): # Write tfrecords writer.write(st_event, cluster_id) # Save window and cluster_id if FLAGS.save_mseed: output_label = "label_{}_lat_{:.3f}_lon_{:.3f}.mseed".format( cluster_id, filtered_catalog.latitude.values[event_n], filtered_catalog.longitude.values[event_n]) output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed") if not os.path.exists(output_mseed_dir): os.makedirs(output_mseed_dir) output_mseed = os.path.join(output_mseed_dir, output_label) st_event.write(output_mseed, format="MSEED") # Plot events if FLAGS.plot: trace = st_event[0] viz_dir = os.path.join(FLAGS.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) trace.plot(outfile=os.path.join( viz_dir, "event_{}.png".format(event_n))) else: print "Missing waveform for event:", UTCDateTime(event_time) # Cleanup writer print("Number of events written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata)
def main(_): stream_files = [ file for file in os.listdir(FLAGS.stream_dir) if fnmatch.fnmatch(file, '*.mseed') ] print "List of streams to anlayze", stream_files # Create dir to store tfrecords if not os.path.exists(FLAGS.output_dir): os.makedirs(FLAGS.output_dir) # Dictionary of nb of events per tfrecords metadata = {} output_metadata = os.path.join(FLAGS.output_dir, "metadata.json") # Load Catalog print "+ Loading Catalog" for stream_file in stream_files: cat = load_catalog(FLAGS.catalog) #cat = filter_catalog(cat,stream_file.split(".mseed")[0]) # Load stream stream_path = os.path.join(FLAGS.stream_dir, stream_file) print "+ Loading Stream {}".format(stream_file) stream = read(stream_path) print '+ Preprocessing stream' stream = preprocess_stream(stream) # Filter catalog according to the loaded stream start_date = stream[0].stats.starttime end_date = stream[-1].stats.endtime print("-- Start Date={}, End Date={}".format(start_date, end_date)) filtered_catalog = cat[((cat.utc_timestamp >= start_date) & (cat.utc_timestamp < end_date))] #print(1111, cat) # Propagation time from source to station #travel_time = get_travel_time(filtered_catalog) # Write event waveforms and cluster_id in .tfrecords output_name = stream_file.split(".mseed")[0] + ".tfrecords" output_path = os.path.join(FLAGS.output_dir, output_name) writer = DataWriter(output_path) print("+ Creating tfrecords for {} events".format( filtered_catalog.shape[0])) # Loop over all events in the considered stream for event_n in range(filtered_catalog.shape[0]): event_time = filtered_catalog.utc_timestamp.values[event_n] # event_time += travel_time[event_n] st_event = stream.slice( UTCDateTime(event_time), UTCDateTime(event_time) + FLAGS.window_size).copy() cluster_id = filtered_catalog.cluster_id.values[event_n] #cluster_id =1 n_traces = len(st_event) # If there is no trace skip this waveform if n_traces == 0: continue n_samples = len(st_event[0].data) n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1 if (len(st_event) == 3) and (n_pts == n_samples): # Write tfrecords # use filter_small_ampitude to get rid of super small amplitude,2017/12/6 ampl_e, ampl_n, ampl_z = filter_small_ampitude( st_event, n_samples) if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3: continue a = remove_repeat(st_event, n_samples) if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3: continue writer.write(st_event.copy().resample(10).normalize(), cluster_id) #writer.write(st_event.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(), cluster_id) #print (len(st_event[0])) # Save window and cluster_id if FLAGS.save_mseed: output_label = "{}_{}.mseed".format( st_event[0].stats.station, str(st_event[0].stats.starttime).replace(':', '_')) output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed") if not os.path.exists(output_mseed_dir): os.makedirs(output_mseed_dir) output_mseed = os.path.join(output_mseed_dir, output_label) st_event.write(output_mseed, format="MSEED") # Plot events if FLAGS.plot: trace = st_event[0].filter('bandpass', freqmin=0.5, freqmax=20) #trace = st_event[0] viz_dir = os.path.join(FLAGS.output_dir, "viz", stream_file.split(".mseed")[0]) if not os.path.exists(viz_dir): os.makedirs(viz_dir) trace.plot(outfile=os.path.join( viz_dir, ####changed at 2017/11/25,use max cluster_prob instead of cluster_id # "event_{}_cluster_{}.png".format(idx,cluster_id))) "event_{}_{}.png".format( st_event[0].stats.station, str(st_event[0].stats.starttime).replace(':', '_') ))) else: print "Missing waveform for event:", UTCDateTime(event_time) # Cleanup writer print("Number of events written={}".format(writer._written)) writer.close() # Write metadata metadata[stream_file.split(".mseed")[0]] = writer._written write_json(metadata, output_metadata)