def main(args): global evaluation if not os.path.exists(output_dir): os.makedirs(output_dir) if not os.path.exists(checkpoint_dir): print ("[classify] \033[91m ERROR!!\033[0m Missing directory "+checkpoint_dir+". Run step 4 first.") sys.exit(0) ckpt = tf.train.get_checkpoint_state(checkpoint_dir) #Load model just once samples = { 'data': tf.placeholder(tf.float32, shape=(1, cfg.win_size, cfg.n_traces), name='input_data'), 'cluster_id': tf.placeholder(tf.int64, shape=(1,), name='input_label') } model = models.get(cfg.model, samples, cfg, checkpoint_dir, is_training=False) sess = tf.Session() model.load(sess) print '[classify] Evaluating using model at step {}'.format( sess.run(model.global_step)) stream_files = [file for file in os.listdir(stream_path) if fnmatch.fnmatch(file, args.pattern)] if len(stream_files)==0: print ("[classify] \033[91m ERROR!!\033[0m No files match the file pattern "+args.pattern+".") sys.exit(0) for stream_file in stream_files: stream_file_without_extension = os.path.split(stream_file)[-1].split(".mseed")[0] if args.catalog_path is None: metadata_path = os.path.join(stream_path, stream_file_without_extension+".csv") if os.path.isfile(metadata_path): print("[classify] Found groundtruth metadata in "+metadata_path+".") cat = pd.read_csv(metadata_path) evaluation = True else: print("[classify] Not found groundtruth metadata in "+metadata_path+".") cat = None else: #cat = pd.read_csv(args.catalog_path) #stations = pd.read_csv(args.stations_path) #Load metadata cat = catalog.Catalog() cat.import_json(args.catalog_path) evaluation = True predictions = predict(stream_path, stream_file, sess, model, samples, cat) #stream_file_without_extension = os.path.split(stream_file)[-1].split(".mseed")[0] #metadata_path = os.path.join(args.stream_path, stream_file_without_extension+".csv") #if os.path.isfile(metadata_path): #print("Found groundtruth metadata in "+metadata_path+".") #cat = pd.read_csv(metadata_path) #for idx, start_time in enumerate(predictions["start_time"]): # filtered_catalog = cat[ # ((cat.start_time >= predictions["start_time"][idx]) # & (cat.end_time <= predictions["end_time"][idx]))] # print(str(len(filtered_catalog["start_time"]))) #else: # print("Not found groundtruth metadata in "+metadata_path+".") #cat = load_catalog(metadata_path) #cat = filter_catalog(cat) sess.close() if evaluation: print("[classify] true positives = "+str(truePositives)) print("[classify] false positives = "+str(falsePositives)) print("[classify] true negatives = "+str(trueNegatives)) print("[classify] false negatives = "+str(falseNegatives)) if truePositives+falsePositives>0: print("[classify] precission = "+str(100*float(truePositives)/(truePositives+falsePositives))+"%") else: print("[classify] cannot compute precission as truePositives+falsePositives == 0") if truePositives+falseNegatives>0: print("[classify] recall = "+str(100*float(truePositives)/(truePositives+falseNegatives))+"%") else: print("[classify] cannot compute recall as truePositives+falseNegatives == 0") if truePositives+falsePositives+trueNegatives+falseNegatives>0: print("[classify] accuracy = "+str(100*float(truePositives+trueNegatives)/(truePositives+falsePositives+trueNegatives+falseNegatives))+"%") else: print("[classify] cannot compute accuracy as truePositives+falsePositives+trueNegatives+falseNegatives == 0")
def main(args): if args.n_clusters == None: raise ValueError('Define the number of clusters with --n_clusters') if not args.noise and not args.events: raise ValueError("Define if evaluating accuracy on noise or events") # Directory in which the evaluation summaries are written if args.noise: summary_dir = os.path.join(args.checkpoint_dir, "noise") if args.events: summary_dir = os.path.join(args.checkpoint_dir, "events") if args.save_false: false_start = [] false_end = [] false_origintime = [] false_dir = os.path.join("output", "false_predictions") if not os.path.exists(false_dir): os.makedirs(false_dir) while True: ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) if args.eval_interval < 0 or ckpt: print('Evaluating model') break print('Waiting for training job to save a checkpoint') time.sleep(args.eval_interval) cfg = config.Config() if args.noise: cfg.batch_size = 128 if args.events: cfg.batch_size = 1 if args.save_false: cfg.batch_size = 1 cfg.n_epochs = 1 cfg.add = 1 cfg.n_clusters = args.n_clusters cfg.n_clusters += 1 while True: try: # data pipeline data_pipeline = DataPipeline(args.dataset, config=cfg, is_training=False) samples = { 'data': data_pipeline.samples, 'cluster_id': data_pipeline.labels, "start_time": data_pipeline.start_time, "end_time": data_pipeline.end_time } # set up model and validation metrics model = models.get(args.model, samples, cfg, args.checkpoint_dir, is_training=False) metrics = model.validation_metrics() # Validation summary writer summary_writer = tf.summary.FileWriter(summary_dir, None) with tf.Session() as sess: coord = tf.train.Coordinator() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners(sess=sess, coord=coord) model.load(sess, args.step) print('Evaluating at step {}'.format( sess.run(model.global_step))) step = tf.train.global_step(sess, model.global_step) mean_metrics = {} for key in metrics: mean_metrics[key] = 0 n = 0 pred_labels = np.empty(1) true_labels = np.empty(1) while True: try: to_fetch = [ metrics, model.layers["class_prediction"], samples["cluster_id"], samples["start_time"], samples["end_time"] ] metrics_, batch_pred_label, batch_true_label, starttime, endtime = sess.run( to_fetch) #batch_pred_label -=1 pred_labels = np.append(pred_labels, batch_pred_label) true_labels = np.append(true_labels, batch_true_label) # Save times of false preds if args.save_false and \ batch_pred_label != batch_true_label: print("---False prediction---") print(starttime, endtime) false_origintime.append( (starttime[0] + endtime[0]) / 2) false_end.append(endtime) false_start.append(starttime) # print true_labels for key in metrics: mean_metrics[key] += cfg.batch_size * metrics_[key] n += cfg.batch_size mess = model.validation_metrics_message(metrics_) print('{:03d} | '.format(n) + mess) except KeyboardInterrupt: print('stopping evaluation') break except tf.errors.OutOfRangeError: print('Evaluation completed ({} epochs).'.format( cfg.n_epochs)) print("{} windows seen".format(n)) break if n > 0: for key in metrics: mean_metrics[key] /= n summary = tf.Summary(value=[ tf.Summary.Value(tag='{}/val'.format(key), simple_value=mean_metrics[key]) ]) if args.save_summary: summary_writer.add_summary(summary, global_step=step) summary_writer.flush() mess = model.validation_metrics_message(mean_metrics) print('Average | ' + mess) if args.eval_interval < 0: print('End of evaluation') break tf.reset_default_graph() print('Sleeping for {}s'.format(args.eval_interval)) time.sleep(args.eval_interval) finally: print('joining data threads') coord.request_stop() if args.save_false: false_preds = {} false_preds["start_time"] = false_start false_preds["end_time"] = false_end false_preds["origintime"] = false_origintime # false_preds = np.array((false_start, false_end)).transpose()[0] # print 'shape', false_preds.shape df = pd.DataFrame(false_preds) df.to_csv(os.path.join(false_dir, "false_preds.csv")) pred_labels = pred_labels[1::] true_labels = true_labels[1::] # np.save("output/pred_labels_noise.npy",pred_labels) # np.save("output/true_labels_noise.npy",true_labels) print("---Confusion Matrix----") print(confusion_matrix(true_labels, pred_labels)) coord.join(threads)
def main(args): setproctitle.setproctitle('quakenet_predict') ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) cfg = config.Config() cfg.batch_size = 1 cfg.n_clusters = args.n_clusters cfg.add = 1 cfg.n_clusters += 1 # Remove previous output directory if os.path.exists(args.output): shutil.rmtree(args.output) os.makedirs(args.output) if args.plot: os.makedirs(os.path.join(args.output, "viz")) if args.save_sac: os.makedirs(os.path.join(args.output, "sac")) # Load stream stream_path = args.stream_path stream_file = os.path.split(stream_path)[-1] print("+ Loading Stream {}".format(stream_file)) stream = read(stream_path) print('+ Preprocessing stream') stream = preprocess_stream(stream) # # TODO: change and look at all streams # stream_path = args.stream_path # stream_file = os.path.split(stream_path)[-1] # print " + Loading stream {}".format(stream_file) # stream = load_stream(stream_path) # print " + Preprocess stream" # stream = preprocess_stream(stream) # print " -- Stream is ready, starting detection" # Create catalog name in which the events are stored catalog_name = os.path.split(stream_file)[-1].split(".mseed")[0] + ".csv" output_catalog = os.path.join(args.output, catalog_name) print('Catalog created to store events', output_catalog) # Dictonary to store info on detected events events_dic = { "start_time": [], "end_time": [], "cluster_id": [], "clusters_prob": [] } # Windows generator win_gen = stream.slide(window_length=args.window_size, step=args.window_step, include_partial_windows=False) if args.max_windows is None: total_time_in_sec = stream[0].stats.endtime - stream[0].stats.starttime max_windows = (total_time_in_sec - args.window_size) / args.window_step else: max_windows = args.max_windows # stream data with a placeholder samples = { 'data': tf.placeholder(tf.float32, shape=(cfg.batch_size, 1001, 3), name='input_data'), 'cluster_id': tf.placeholder(tf.int64, shape=(cfg.batch_size, ), name='input_label') } # set up model and validation metrics model = models.get(args.model, samples, cfg, args.checkpoint_dir, is_training=False) with tf.Session() as sess: model.load(sess, args.step) print('Predicting using model at step {}'.format( sess.run(model.global_step))) step = tf.train.global_step(sess, model.global_step) n_events = 0 time_start = time.time() try: for idx, win in enumerate(win_gen): # Fetch class_proba and label to_fetch = [ samples['data'], model.layers['class_prob'], model.layers['class_prediction'] ] # Feed window and fake cluster_id (needed by the net) but # will be predicted if data_is_complete(win): feed_dict = { samples['data']: fetch_window_data(win), samples['cluster_id']: np.array([0]) } sample, class_prob_, cluster_id = sess.run( to_fetch, feed_dict) else: continue # # Keep only clusters proba, remove noise proba clusters_prob = class_prob_[0, 1::] cluster_id -= 1 # label for noise = -1, label for cluster \in {0:n_clusters} is_event = cluster_id[0] > -1 if is_event: n_events += 1 # print "event {} ,cluster id {}".format(is_event,class_prob_) if is_event: events_dic["start_time"].append(win[0].stats.starttime) events_dic["end_time"].append(win[0].stats.endtime) events_dic["cluster_id"].append(cluster_id[0]) events_dic["clusters_prob"].append(list(clusters_prob)) if idx % 1000 == 0: print("Analyzing {} records".format( win[0].stats.starttime)) if args.plot and is_event: # if args.plot: win_filtered = win.copy() # win_filtered.filter("bandpass",freqmin=4.0, freqmax=16.0) win_filtered.plot(outfile=os.path.join( args.output, "viz", "event_{}_cluster_{}.png".format( idx, cluster_id))) if args.save_sac and is_event: win_filtered = win.copy() win_filtered.write(os.path.join( args.output, "sac", "event_{}_cluster_{}.sac".format(idx, cluster_id)), format="SAC") if idx >= max_windows: print("stopped after {} windows".format(max_windows)) print("found {} events".format(n_events)) break except KeyboardInterrupt: print('Interrupted at time {}.'.format(win[0].stats.starttime)) print("processed {} windows, found {} events".format( idx + 1, n_events)) print("Run time: ", time.time() - time_start) # Dump dictionary into csv file #TODO df = pd.DataFrame.from_dict(events_dic) df.to_csv(output_catalog) print("Run time: ", time.time() - time_start)
def main(args): setproctitle.setproctitle('quakenet_eval') if args.n_clusters == None: raise ValueError('Define the number of clusters with --n_clusters') ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir) cfg = config.Config() cfg.batch_size = 1 cfg.n_clusters = args.n_clusters cfg.add = 1 cfg.n_clusters += 1 cfg.n_epochs = 1 # Remove previous output directory if os.path.exists(args.output): shutil.rmtree(args.output) os.makedirs(args.output) if args.plot: os.makedirs(os.path.join(args.output,"viz")) # data pipeline data_pipeline = DataPipeline(args.dataset, config=cfg, is_training=False) samples = { 'data': data_pipeline.samples, 'cluster_id': data_pipeline.labels, 'start_time': data_pipeline.start_time, 'end_time': data_pipeline.end_time} # set up model and validation metrics model = models.get(args.model, samples, cfg, args.checkpoint_dir, is_training=False) if args.max_windows is None: max_windows = 2**31 else: max_windows = args.max_windows # Dictonary to store info on detected events events_dic ={"start_time": [], "end_time": [], "utc_timestamp": [], "cluster_id": [], "clusters_prob": []} # Create catalog name in which the events are stored output_catalog = os.path.join(args.output,'catalog_detection.csv') print 'Catalog created to store events', output_catalog # Run ConvNetQuake with tf.Session() as sess: coord = tf.train.Coordinator() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners(sess=sess, coord=coord) model.load(sess,args.step) print 'Predicting using model at step {}'.format( sess.run(model.global_step)) step = tf.train.global_step(sess, model.global_step) n_events = 0 idx = 0 time_start = time.time() while True: try: # Fetch class_proba and label to_fetch = [samples['data'], model.layers['class_prob'], model.layers['class_prediction'], samples['start_time'], samples['end_time']] sample, class_prob_, cluster_id, start_time, end_time = sess.run(to_fetch) # # Keep only clusters proba, remove noise proba clusters_prob = class_prob_[0,1::] cluster_id -= 1 # label for noise = -1, label for cluster \in {0:n_clusters} is_event = cluster_id[0] > -1 if is_event: n_events += 1 idx +=1 if idx % 1000 ==0: print "processed {} windows".format(idx) if is_event: events_dic["start_time"].append(UTCDateTime(start_time)) events_dic["end_time"].append(UTCDateTime(end_time)) events_dic["utc_timestamp"].append((start_time + end_time)/2.0) events_dic["cluster_id"].append(cluster_id[0]) events_dic["clusters_prob"].append(list(clusters_prob)) if idx >= max_windows: print "stopped after {} windows".format(max_windows) print "found {} events".format(n_events) break except KeyboardInterrupt: print "processed {} windows, found {} events".format(idx+1,n_events) print "Run time: ", time.time() - time_start except tf.errors.OutOfRangeError: print 'Evaluation completed ({} epochs).'.format(cfg.n_epochs) break print 'joining data threads' m, s = divmod(time.time() - time_start, 60) print "Prediction took {} min {} seconds".format(m,s) coord.request_stop() coord.join(threads) # Dump dictionary into csv file df = pd.DataFrame.from_dict(events_dic) df.to_csv(output_catalog)
def main(args): setproctitle.setproctitle('quakenet_debug') if not os.path.exists(args.output): os.makedirs(args.output) if args.n_clusters == None: raise ValueError('Define the number of clusters with --n_clusters') cfg = config.Config() cfg.batch_size = 1 cfg.n_epochs = 1 cfg.add = 2 cfg.n_clusters = args.n_clusters cfg.n_clusters +=1 # data pipeline data_pipeline = dp.DataPipeline(args.dataset, cfg, False) samples = { 'data': data_pipeline.samples, 'cluster_id': data_pipeline.labels } # model model_name = args.model model = models.get(model_name, samples, cfg, args.checkpoint_dir, is_training=False) with tf.Session() as sess: coord = tf.train.Coordinator() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners(sess=sess, coord=coord) model.load(sess) step = sess.run(model.global_step) print 'Debugging at step {}'.format(step) # summary_writer = tf.train.SummaryWriter(model.checkpoint_dir, None) activations = tf.get_collection(tf.GraphKeys.ACTIVATIONS) weights = tf.get_collection(tf.GraphKeys.WEIGHTS) biases = tf.get_collection(tf.GraphKeys.BIASES) toget = {} toget['0_input'] = model.inputs['data'] for i, a in enumerate(activations): name = a.name.replace('/', '_').replace(':', '_') toget['{}_{}'.format(i+1, name)] = a for it in range(10): print 'running session' fetched = sess.run(toget) print fetched print it for f in fetched: d = fetched[f] d = np.squeeze(d, axis=0) plt.figure() if len(d.shape) == 2: for i in range(d.shape[1]): plt.plot(d[:, i]) # tot_mean = np.mean(np.mean(d,axis=1),axis=0) # plt.plot(np.mean(d,axis=1) / tot_mean) plt.savefig(os.path.join(args.output, '{}_{}.pdf'.format(it, f))) plt.clf() coord.request_stop() coord.join(threads)
def eval(args, positivesOrNegatives): global truePositives global falsePositives global trueNegatives global falseNegatives global locationHit global locationMiss #summary_dir = os.path.join(output_dir, "eval_summary_events") datasetDir = None if positivesOrNegatives: datasetDir = os.path.join(args.tfrecords_dir, cfg.output_tfrecords_dir_positives) else: datasetDir = os.path.join(args.tfrecords_dir, cfg.output_tfrecords_dir_negatives) #print(datasetDir) cfg.batch_size = 1 cfg.n_epochs = 1 cfg.add = 1 try: # data pipeline data_pipeline = DataPipeline(datasetDir, config=cfg, is_training=False) samples = { 'data': data_pipeline.samples, 'cluster_id': data_pipeline.labels, "start_time": data_pipeline.start_time, "end_time": data_pipeline.end_time } #print("data_pipeline.samples="+str(data_pipeline.samples)) # set up model and validation metrics model = models.get(cfg.model, samples, cfg, checkpoint_dir, is_training=False) metrics = model.validation_metrics() # Validation summary writer #summary_writer = tf.train.SummaryWriter(summary_dir, None) with tf.Session() as sess: coord = tf.train.Coordinator() tf.initialize_local_variables().run() threads = tf.train.start_queue_runners(sess=sess, coord=coord) model.load(sess) print 'Evaluating at step {}'.format(sess.run(model.global_step)) #step = tf.train.global_step(sess, model.global_step) mean_metrics = {} for key in metrics: mean_metrics[key] = 0 n = 0 pred_labels = np.empty(1) true_labels = np.empty(1) while True: try: to_fetch = [ metrics, model.layers["class_prediction"], samples["cluster_id"], samples["start_time"], samples["end_time"] ] metrics_, batch_pred_label, batch_true_label, starttime, endtime = sess.run( to_fetch) if positivesOrNegatives: #Positive windows #NOTE: pred label will be 0 for noise (-1) and 1 for cluster 0 if batch_true_label[0] >= 0 and batch_pred_label[ 0] >= 1: truePositives = truePositives + 1 print("TRUE POSITIVE: batch_true_label = " + str(batch_true_label[0]) + "; batch_pred_label[0] = " + str(batch_pred_label[0])) if batch_true_label[0] == batch_pred_label[0] - 1: locationHit = locationHit + 1 else: locationMiss = locationMiss + 1 #sys.stdout.write("\033[92mP\033[0m") else: falsePositives = falsePositives + 1 #sys.stdout.write("\033[91mP\033[0m") else: #Negative windows if batch_true_label[0] == -1 and batch_pred_label[ 0] == 0: trueNegatives = trueNegatives + 1 #sys.stdout.write("\033[92mN\033[0m") else: falseNegatives = falseNegatives + 1 #sys.stdout.write("\033[91mN\033[0m") #print("batch_true_label="+str(batch_true_label)) #print("batch_pred_label="+str(batch_pred_label)) batch_pred_label -= 1 pred_labels = np.append(pred_labels, batch_pred_label) true_labels = np.append(true_labels, batch_true_label) # print true_labels for key in metrics: mean_metrics[key] += cfg.batch_size * metrics_[key] n += cfg.batch_size #mess = model.validation_metrics_message(metrics_) #print '{:03d} | '.format(n)+mess except KeyboardInterrupt: print 'stopping evaluation' break except tf.errors.OutOfRangeError: print 'Evaluation completed ({} epochs).'.format( cfg.n_epochs) print "{} windows seen".format(n) break if n > 0: for key in metrics: mean_metrics[key] /= n summary = tf.Summary(value=[ tf.Summary.Value(tag='{}/val'.format(key), simple_value=mean_metrics[key]) ]) #if args.save_summary: # summary_writer.add_summary(summary, global_step=step) #summary_writer.flush() mess = model.validation_metrics_message(mean_metrics) #print 'Average | '+mess coord.request_stop() finally: pass #print 'joining data threads' pred_labels = pred_labels[1::] true_labels = true_labels[1::] # np.save("output/pred_labels_noise.npy",pred_labels) # np.save("output/true_labels_noise.npy",true_labels) #print "---Confusion Matrix----" #print confusion_matrix(true_labels, pred_labels) coord.join(threads)