Exemple #1
0
def main(args):    
    global evaluation

    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if not os.path.exists(checkpoint_dir):
        print ("[classify] \033[91m ERROR!!\033[0m Missing directory "+checkpoint_dir+". Run step 4 first.")
        sys.exit(0)
    ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
    
    #Load model just once
    samples = {
            'data': tf.placeholder(tf.float32,
                                   shape=(1, cfg.win_size, cfg.n_traces),
                                   name='input_data'),
            'cluster_id': tf.placeholder(tf.int64,
                                         shape=(1,),
                                         name='input_label')
        }
    model = models.get(cfg.model, samples, cfg,
                       checkpoint_dir,
                       is_training=False)
    sess = tf.Session() 
    model.load(sess)
    print '[classify] Evaluating using model at step {}'.format(
            sess.run(model.global_step))

    stream_files = [file for file in os.listdir(stream_path) if
                    fnmatch.fnmatch(file, args.pattern)]

    if len(stream_files)==0:
        print ("[classify] \033[91m ERROR!!\033[0m No files match the file pattern "+args.pattern+".")
        sys.exit(0)

    for stream_file in stream_files:
    	stream_file_without_extension = os.path.split(stream_file)[-1].split(".mseed")[0]
        if args.catalog_path is None:
            metadata_path = os.path.join(stream_path, stream_file_without_extension+".csv")
            if os.path.isfile(metadata_path):
                print("[classify] Found groundtruth metadata in "+metadata_path+".")  
                cat = pd.read_csv(metadata_path)
                evaluation = True
            else:
                print("[classify] Not found groundtruth metadata in "+metadata_path+".")
                cat = None
        else:
            #cat = pd.read_csv(args.catalog_path)
            #stations = pd.read_csv(args.stations_path)
            #Load metadata
            cat = catalog.Catalog()
            cat.import_json(args.catalog_path)
            evaluation = True
        predictions = predict(stream_path, stream_file, sess, model, samples, cat)
        #stream_file_without_extension = os.path.split(stream_file)[-1].split(".mseed")[0]
        #metadata_path = os.path.join(args.stream_path, stream_file_without_extension+".csv")
        #if os.path.isfile(metadata_path):
            #print("Found groundtruth metadata in "+metadata_path+".")  
            #cat = pd.read_csv(metadata_path)
            #for idx, start_time in enumerate(predictions["start_time"]):
            #    filtered_catalog = cat[
            #        ((cat.start_time >= predictions["start_time"][idx])
            #        & (cat.end_time <= predictions["end_time"][idx]))]
            #    print(str(len(filtered_catalog["start_time"])))   
        #else:
        #    print("Not found groundtruth metadata in "+metadata_path+".")     
        #cat = load_catalog(metadata_path)
        #cat = filter_catalog(cat)
    sess.close()

    if evaluation:
        print("[classify] true positives = "+str(truePositives))
        print("[classify] false positives = "+str(falsePositives))
        print("[classify] true negatives = "+str(trueNegatives))
        print("[classify] false negatives = "+str(falseNegatives))

        if truePositives+falsePositives>0:
            print("[classify] precission = "+str(100*float(truePositives)/(truePositives+falsePositives))+"%")
        else:
            print("[classify] cannot compute precission as truePositives+falsePositives == 0")

        if truePositives+falseNegatives>0:
            print("[classify] recall = "+str(100*float(truePositives)/(truePositives+falseNegatives))+"%")
        else:
            print("[classify] cannot compute recall as truePositives+falseNegatives == 0")

        if truePositives+falsePositives+trueNegatives+falseNegatives>0:
            print("[classify] accuracy = "+str(100*float(truePositives+trueNegatives)/(truePositives+falsePositives+trueNegatives+falseNegatives))+"%")
        else:
            print("[classify] cannot compute accuracy as truePositives+falsePositives+trueNegatives+falseNegatives == 0")
Exemple #2
0
def main(args):

    if args.n_clusters == None:
        raise ValueError('Define the number of clusters with --n_clusters')
    if not args.noise and not args.events:
        raise ValueError("Define if evaluating accuracy on noise or events")

    # Directory in which the evaluation summaries are written
    if args.noise:
        summary_dir = os.path.join(args.checkpoint_dir, "noise")
    if args.events:
        summary_dir = os.path.join(args.checkpoint_dir, "events")
    if args.save_false:
        false_start = []
        false_end = []
        false_origintime = []
        false_dir = os.path.join("output", "false_predictions")
        if not os.path.exists(false_dir):
            os.makedirs(false_dir)

    while True:
        ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)
        if args.eval_interval < 0 or ckpt:
            print('Evaluating model')
            break
        print('Waiting for training job to save a checkpoint')
        time.sleep(args.eval_interval)

    cfg = config.Config()
    if args.noise:
        cfg.batch_size = 128
    if args.events:
        cfg.batch_size = 1
    if args.save_false:
        cfg.batch_size = 1
    cfg.n_epochs = 1
    cfg.add = 1
    cfg.n_clusters = args.n_clusters
    cfg.n_clusters += 1

    while True:
        try:
            # data pipeline
            data_pipeline = DataPipeline(args.dataset,
                                         config=cfg,
                                         is_training=False)
            samples = {
                'data': data_pipeline.samples,
                'cluster_id': data_pipeline.labels,
                "start_time": data_pipeline.start_time,
                "end_time": data_pipeline.end_time
            }

            # set up model and validation metrics
            model = models.get(args.model,
                               samples,
                               cfg,
                               args.checkpoint_dir,
                               is_training=False)
            metrics = model.validation_metrics()
            # Validation summary writer
            summary_writer = tf.summary.FileWriter(summary_dir, None)

            with tf.Session() as sess:
                coord = tf.train.Coordinator()
                tf.initialize_local_variables().run()
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)

                model.load(sess, args.step)
                print('Evaluating at step {}'.format(
                    sess.run(model.global_step)))

                step = tf.train.global_step(sess, model.global_step)
                mean_metrics = {}
                for key in metrics:
                    mean_metrics[key] = 0

                n = 0
                pred_labels = np.empty(1)
                true_labels = np.empty(1)
                while True:
                    try:
                        to_fetch = [
                            metrics, model.layers["class_prediction"],
                            samples["cluster_id"], samples["start_time"],
                            samples["end_time"]
                        ]
                        metrics_, batch_pred_label, batch_true_label, starttime, endtime = sess.run(
                            to_fetch)

                        #batch_pred_label -=1
                        pred_labels = np.append(pred_labels, batch_pred_label)
                        true_labels = np.append(true_labels, batch_true_label)

                        # Save times of false preds
                        if args.save_false and \
                                batch_pred_label != batch_true_label:
                            print("---False prediction---")
                            print(starttime, endtime)
                            false_origintime.append(
                                (starttime[0] + endtime[0]) / 2)
                            false_end.append(endtime)
                            false_start.append(starttime)

                        # print  true_labels
                        for key in metrics:
                            mean_metrics[key] += cfg.batch_size * metrics_[key]
                        n += cfg.batch_size

                        mess = model.validation_metrics_message(metrics_)
                        print('{:03d} | '.format(n) + mess)

                    except KeyboardInterrupt:
                        print('stopping evaluation')
                        break

                    except tf.errors.OutOfRangeError:
                        print('Evaluation completed ({} epochs).'.format(
                            cfg.n_epochs))
                        print("{} windows seen".format(n))
                        break

                if n > 0:
                    for key in metrics:
                        mean_metrics[key] /= n
                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag='{}/val'.format(key),
                                             simple_value=mean_metrics[key])
                        ])
                        if args.save_summary:
                            summary_writer.add_summary(summary,
                                                       global_step=step)

                summary_writer.flush()

                mess = model.validation_metrics_message(mean_metrics)
                print('Average | ' + mess)

                if args.eval_interval < 0:
                    print('End of evaluation')
                    break

            tf.reset_default_graph()
            print('Sleeping for {}s'.format(args.eval_interval))
            time.sleep(args.eval_interval)

        finally:
            print('joining data threads')
            coord.request_stop()

    if args.save_false:
        false_preds = {}
        false_preds["start_time"] = false_start
        false_preds["end_time"] = false_end
        false_preds["origintime"] = false_origintime
        # false_preds = np.array((false_start, false_end)).transpose()[0]
        # print 'shape', false_preds.shape
        df = pd.DataFrame(false_preds)
        df.to_csv(os.path.join(false_dir, "false_preds.csv"))
    pred_labels = pred_labels[1::]
    true_labels = true_labels[1::]
    # np.save("output/pred_labels_noise.npy",pred_labels)
    # np.save("output/true_labels_noise.npy",true_labels)
    print("---Confusion Matrix----")
    print(confusion_matrix(true_labels, pred_labels))

    coord.join(threads)
def main(args):
    setproctitle.setproctitle('quakenet_predict')

    ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_clusters = args.n_clusters
    cfg.add = 1
    cfg.n_clusters += 1

    # Remove previous output directory
    if os.path.exists(args.output):
        shutil.rmtree(args.output)
    os.makedirs(args.output)
    if args.plot:
        os.makedirs(os.path.join(args.output, "viz"))
    if args.save_sac:
        os.makedirs(os.path.join(args.output, "sac"))

    # Load stream
    stream_path = args.stream_path
    stream_file = os.path.split(stream_path)[-1]
    print("+ Loading Stream {}".format(stream_file))
    stream = read(stream_path)
    print('+ Preprocessing stream')
    stream = preprocess_stream(stream)

    # # TODO: change and look at all streams
    # stream_path = args.stream_path
    # stream_file = os.path.split(stream_path)[-1]
    # print " + Loading stream {}".format(stream_file)
    # stream = load_stream(stream_path)
    # print " + Preprocess stream"
    # stream = preprocess_stream(stream)
    # print " -- Stream is ready, starting detection"

    # Create catalog name in which the events are stored
    catalog_name = os.path.split(stream_file)[-1].split(".mseed")[0] + ".csv"
    output_catalog = os.path.join(args.output, catalog_name)
    print('Catalog created to store events', output_catalog)

    # Dictonary to store info on detected events
    events_dic = {
        "start_time": [],
        "end_time": [],
        "cluster_id": [],
        "clusters_prob": []
    }

    # Windows generator
    win_gen = stream.slide(window_length=args.window_size,
                           step=args.window_step,
                           include_partial_windows=False)
    if args.max_windows is None:
        total_time_in_sec = stream[0].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time_in_sec - args.window_size) / args.window_step
    else:
        max_windows = args.max_windows

    # stream data with a placeholder
    samples = {
        'data':
        tf.placeholder(tf.float32,
                       shape=(cfg.batch_size, 1001, 3),
                       name='input_data'),
        'cluster_id':
        tf.placeholder(tf.int64, shape=(cfg.batch_size, ), name='input_label')
    }

    # set up model and validation metrics
    model = models.get(args.model,
                       samples,
                       cfg,
                       args.checkpoint_dir,
                       is_training=False)

    with tf.Session() as sess:

        model.load(sess, args.step)
        print('Predicting using model at step {}'.format(
            sess.run(model.global_step)))

        step = tf.train.global_step(sess, model.global_step)

        n_events = 0
        time_start = time.time()
        try:
            for idx, win in enumerate(win_gen):

                # Fetch class_proba and label
                to_fetch = [
                    samples['data'], model.layers['class_prob'],
                    model.layers['class_prediction']
                ]
                # Feed window and fake cluster_id (needed by the net) but
                # will be predicted
                if data_is_complete(win):
                    feed_dict = {
                        samples['data']: fetch_window_data(win),
                        samples['cluster_id']: np.array([0])
                    }
                    sample, class_prob_, cluster_id = sess.run(
                        to_fetch, feed_dict)
                else:
                    continue

                # # Keep only clusters proba, remove noise proba
                clusters_prob = class_prob_[0, 1::]
                cluster_id -= 1

                # label for noise = -1, label for cluster \in {0:n_clusters}

                is_event = cluster_id[0] > -1
                if is_event:
                    n_events += 1
                # print "event {} ,cluster id {}".format(is_event,class_prob_)

                if is_event:
                    events_dic["start_time"].append(win[0].stats.starttime)
                    events_dic["end_time"].append(win[0].stats.endtime)
                    events_dic["cluster_id"].append(cluster_id[0])
                    events_dic["clusters_prob"].append(list(clusters_prob))

                if idx % 1000 == 0:
                    print("Analyzing {} records".format(
                        win[0].stats.starttime))

                if args.plot and is_event:
                    # if args.plot:
                    win_filtered = win.copy()
                    # win_filtered.filter("bandpass",freqmin=4.0, freqmax=16.0)
                    win_filtered.plot(outfile=os.path.join(
                        args.output, "viz", "event_{}_cluster_{}.png".format(
                            idx, cluster_id)))

                if args.save_sac and is_event:
                    win_filtered = win.copy()
                    win_filtered.write(os.path.join(
                        args.output, "sac",
                        "event_{}_cluster_{}.sac".format(idx, cluster_id)),
                                       format="SAC")

                if idx >= max_windows:
                    print("stopped after {} windows".format(max_windows))
                    print("found {} events".format(n_events))
                    break

        except KeyboardInterrupt:
            print('Interrupted at time {}.'.format(win[0].stats.starttime))
            print("processed {} windows, found {} events".format(
                idx + 1, n_events))
            print("Run time: ", time.time() - time_start)

    # Dump dictionary into csv file
    #TODO
    df = pd.DataFrame.from_dict(events_dic)
    df.to_csv(output_catalog)

    print("Run time: ", time.time() - time_start)
def main(args):
    setproctitle.setproctitle('quakenet_eval')

    if args.n_clusters == None:
        raise ValueError('Define the number of clusters with --n_clusters')

    ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_clusters = args.n_clusters
    cfg.add = 1
    cfg.n_clusters += 1
    cfg.n_epochs = 1

    # Remove previous output directory
    if os.path.exists(args.output):
        shutil.rmtree(args.output)
    os.makedirs(args.output)
    if args.plot:
        os.makedirs(os.path.join(args.output,"viz"))

    # data pipeline
    data_pipeline = DataPipeline(args.dataset, config=cfg,
                                    is_training=False)
    samples = {
        'data': data_pipeline.samples,
        'cluster_id': data_pipeline.labels,
        'start_time': data_pipeline.start_time,
        'end_time': data_pipeline.end_time}

    # set up model and validation metrics
    model = models.get(args.model, samples, cfg,
                        args.checkpoint_dir,
                        is_training=False)

    if args.max_windows is None:
        max_windows = 2**31
    else:
        max_windows = args.max_windows

    # Dictonary to store info on detected events
    events_dic ={"start_time": [],
                 "end_time": [],
                 "utc_timestamp": [],
                 "cluster_id": [],
                 "clusters_prob": []}

    # Create catalog name in which the events are stored
    output_catalog = os.path.join(args.output,'catalog_detection.csv')
    print 'Catalog created to store events', output_catalog


    # Run ConvNetQuake
    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        tf.initialize_local_variables().run()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        model.load(sess,args.step)
        print 'Predicting using model at step {}'.format(
                sess.run(model.global_step))

        step = tf.train.global_step(sess, model.global_step)

        n_events = 0
        idx = 0
        time_start = time.time()
        while True:
            try:
                # Fetch class_proba and label
                to_fetch = [samples['data'],
                            model.layers['class_prob'],
                            model.layers['class_prediction'],
                            samples['start_time'],
                            samples['end_time']]
                sample, class_prob_, cluster_id, start_time, end_time = sess.run(to_fetch)

                # # Keep only clusters proba, remove noise proba
                clusters_prob = class_prob_[0,1::]
                cluster_id -= 1

                # label for noise = -1, label for cluster \in {0:n_clusters}

                is_event = cluster_id[0] > -1
                if is_event:
                    n_events += 1

                idx +=1
                if idx % 1000 ==0:
                    print "processed {} windows".format(idx)

                if is_event:
                    events_dic["start_time"].append(UTCDateTime(start_time))
                    events_dic["end_time"].append(UTCDateTime(end_time))
                    events_dic["utc_timestamp"].append((start_time +
                                                        end_time)/2.0)
                    events_dic["cluster_id"].append(cluster_id[0])
                    events_dic["clusters_prob"].append(list(clusters_prob))

                if idx >= max_windows:
                    print "stopped after {} windows".format(max_windows)
                    print "found {} events".format(n_events)
                    break

            except KeyboardInterrupt:
                print "processed {} windows, found {} events".format(idx+1,n_events)
                print "Run time: ", time.time() - time_start

            except tf.errors.OutOfRangeError:
                print 'Evaluation completed ({} epochs).'.format(cfg.n_epochs)
                break

        print 'joining data threads'
        m, s = divmod(time.time() - time_start, 60)
        print "Prediction took {} min {} seconds".format(m,s)
        coord.request_stop()
        coord.join(threads)


    # Dump dictionary into csv file
    df = pd.DataFrame.from_dict(events_dic)
    df.to_csv(output_catalog)
Exemple #5
0
def main(args):
  setproctitle.setproctitle('quakenet_debug')

  if not os.path.exists(args.output):
    os.makedirs(args.output)

  if args.n_clusters == None:
    raise ValueError('Define the number of clusters with --n_clusters')

  cfg = config.Config()
  cfg.batch_size = 1
  cfg.n_epochs = 1
  cfg.add = 2
  cfg.n_clusters = args.n_clusters
  cfg.n_clusters +=1

  # data pipeline
  data_pipeline = dp.DataPipeline(args.dataset, cfg, False)

  samples = {
    'data': data_pipeline.samples,
    'cluster_id': data_pipeline.labels
    }

  # model
  model_name = args.model
  model = models.get(model_name, samples,
                     cfg, args.checkpoint_dir, is_training=False)

  with tf.Session() as sess:
    coord = tf.train.Coordinator()
    tf.initialize_local_variables().run()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)

    model.load(sess)
    step = sess.run(model.global_step)
    print  'Debugging at step {}'.format(step)
    # summary_writer = tf.train.SummaryWriter(model.checkpoint_dir, None)

    activations = tf.get_collection(tf.GraphKeys.ACTIVATIONS)
    weights = tf.get_collection(tf.GraphKeys.WEIGHTS)
    biases = tf.get_collection(tf.GraphKeys.BIASES)

    toget = {}
    toget['0_input'] = model.inputs['data']
    for i, a in enumerate(activations):
      name = a.name.replace('/', '_').replace(':', '_')
      toget['{}_{}'.format(i+1, name)] = a

    for it in range(10):
      print 'running session'
      fetched = sess.run(toget)
      print fetched

      print it
      for f in fetched:
        d = fetched[f]
        d = np.squeeze(d, axis=0)

        plt.figure()
        if len(d.shape) == 2:
          for i in range(d.shape[1]):
            plt.plot(d[:, i])
          # tot_mean = np.mean(np.mean(d,axis=1),axis=0)
          # plt.plot(np.mean(d,axis=1) / tot_mean)
        plt.savefig(os.path.join(args.output, '{}_{}.pdf'.format(it, f)))
        plt.clf()

      coord.request_stop()
      coord.join(threads)
def eval(args, positivesOrNegatives):
    global truePositives
    global falsePositives
    global trueNegatives
    global falseNegatives
    global locationHit
    global locationMiss
    #summary_dir = os.path.join(output_dir, "eval_summary_events")

    datasetDir = None

    if positivesOrNegatives:
        datasetDir = os.path.join(args.tfrecords_dir,
                                  cfg.output_tfrecords_dir_positives)
    else:
        datasetDir = os.path.join(args.tfrecords_dir,
                                  cfg.output_tfrecords_dir_negatives)

    #print(datasetDir)

    cfg.batch_size = 1
    cfg.n_epochs = 1
    cfg.add = 1

    try:
        # data pipeline
        data_pipeline = DataPipeline(datasetDir, config=cfg, is_training=False)
        samples = {
            'data': data_pipeline.samples,
            'cluster_id': data_pipeline.labels,
            "start_time": data_pipeline.start_time,
            "end_time": data_pipeline.end_time
        }

        #print("data_pipeline.samples="+str(data_pipeline.samples))

        # set up model and validation metrics
        model = models.get(cfg.model,
                           samples,
                           cfg,
                           checkpoint_dir,
                           is_training=False)

        metrics = model.validation_metrics()
        # Validation summary writer
        #summary_writer = tf.train.SummaryWriter(summary_dir, None)

        with tf.Session() as sess:
            coord = tf.train.Coordinator()
            tf.initialize_local_variables().run()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            model.load(sess)
            print 'Evaluating at step {}'.format(sess.run(model.global_step))

            #step = tf.train.global_step(sess, model.global_step)
            mean_metrics = {}
            for key in metrics:
                mean_metrics[key] = 0

            n = 0
            pred_labels = np.empty(1)
            true_labels = np.empty(1)
            while True:
                try:
                    to_fetch = [
                        metrics, model.layers["class_prediction"],
                        samples["cluster_id"], samples["start_time"],
                        samples["end_time"]
                    ]
                    metrics_, batch_pred_label, batch_true_label, starttime, endtime = sess.run(
                        to_fetch)

                    if positivesOrNegatives:
                        #Positive windows
                        #NOTE: pred label will be 0 for noise (-1) and 1 for cluster 0
                        if batch_true_label[0] >= 0 and batch_pred_label[
                                0] >= 1:
                            truePositives = truePositives + 1
                            print("TRUE POSITIVE: batch_true_label = " +
                                  str(batch_true_label[0]) +
                                  "; batch_pred_label[0] = " +
                                  str(batch_pred_label[0]))
                            if batch_true_label[0] == batch_pred_label[0] - 1:
                                locationHit = locationHit + 1
                            else:
                                locationMiss = locationMiss + 1
                            #sys.stdout.write("\033[92mP\033[0m")
                        else:
                            falsePositives = falsePositives + 1
                            #sys.stdout.write("\033[91mP\033[0m")
                    else:
                        #Negative windows
                        if batch_true_label[0] == -1 and batch_pred_label[
                                0] == 0:
                            trueNegatives = trueNegatives + 1
                            #sys.stdout.write("\033[92mN\033[0m")
                        else:
                            falseNegatives = falseNegatives + 1
                            #sys.stdout.write("\033[91mN\033[0m")

                    #print("batch_true_label="+str(batch_true_label))
                    #print("batch_pred_label="+str(batch_pred_label))
                    batch_pred_label -= 1
                    pred_labels = np.append(pred_labels, batch_pred_label)
                    true_labels = np.append(true_labels, batch_true_label)

                    # print  true_labels
                    for key in metrics:
                        mean_metrics[key] += cfg.batch_size * metrics_[key]
                    n += cfg.batch_size

                    #mess = model.validation_metrics_message(metrics_)
                    #print '{:03d} | '.format(n)+mess

                except KeyboardInterrupt:
                    print 'stopping evaluation'
                    break

                except tf.errors.OutOfRangeError:
                    print 'Evaluation completed ({} epochs).'.format(
                        cfg.n_epochs)
                    print "{} windows seen".format(n)
                    break

            if n > 0:
                for key in metrics:
                    mean_metrics[key] /= n
                    summary = tf.Summary(value=[
                        tf.Summary.Value(tag='{}/val'.format(key),
                                         simple_value=mean_metrics[key])
                    ])
                    #if args.save_summary:
                    #    summary_writer.add_summary(summary, global_step=step)

            #summary_writer.flush()

            mess = model.validation_metrics_message(mean_metrics)
            #print 'Average | '+mess
            coord.request_stop()
    finally:
        pass
        #print 'joining data threads'

    pred_labels = pred_labels[1::]
    true_labels = true_labels[1::]
    # np.save("output/pred_labels_noise.npy",pred_labels)
    # np.save("output/true_labels_noise.npy",true_labels)
    #print "---Confusion Matrix----"
    #print confusion_matrix(true_labels, pred_labels)

    coord.join(threads)