Python DataPipeline Beispiele

Programmiersprache: Python

Namespace / Paketname: quakenet.data_pipeline

Klasse / Typ: DataPipeline

Beispiele auf hotexamples.com: 4

Python DataPipeline - 4 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die quakenet.data_pipeline.DataPipeline, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

DataPipeline(4)

Häufig verwendete Methoden

DataPipeline (4)

Beispiel #1

Datei anzeigen

Datei: evaluate.py Projekt: kexinrong/ConvNetQuake

def main(args):

    if args.n_clusters == None:
        raise ValueError('Define the number of clusters with --n_clusters')
    if not args.noise and not args.events:
        raise ValueError("Define if evaluating accuracy on noise or events")

    # Directory in which the evaluation summaries are written
    if args.noise:
        summary_dir = os.path.join(args.checkpoint_dir, "noise")
    if args.events:
        summary_dir = os.path.join(args.checkpoint_dir, "events")
    if args.save_false:
        false_start = []
        false_end = []
        false_origintime = []
        false_dir = os.path.join("output", "false_predictions")
        if not os.path.exists(false_dir):
            os.makedirs(false_dir)

    while True:
        ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)
        if args.eval_interval < 0 or ckpt:
            print 'Evaluating model'
            break
        print 'Waiting for training job to save a checkpoint'
        time.sleep(args.eval_interval)

    cfg = config.Config()
    if args.noise:
        cfg.batch_size = 256
    if args.events:
        cfg.batch_size = 1
    if args.save_false:
        cfg.batch_size = 1
    cfg.n_epochs = 1
    cfg.add = 1
    cfg.n_clusters = args.n_clusters
    cfg.n_clusters += 1

    while True:
        try:
            # data pipeline
            data_pipeline = DataPipeline(args.dataset,
                                         config=cfg,
                                         is_training=False)
            samples = {
                'data': data_pipeline.samples,
                'cluster_id': data_pipeline.labels,
                "start_time": data_pipeline.start_time,
                "end_time": data_pipeline.end_time
            }

            # set up model and validation metrics
            model = models.get(args.model,
                               samples,
                               cfg,
                               args.checkpoint_dir,
                               is_training=False)
            metrics = model.validation_metrics()
            # Validation summary writer
            summary_writer = tf.train.SummaryWriter(summary_dir, None)

            with tf.Session() as sess:
                coord = tf.train.Coordinator()
                tf.initialize_local_variables().run()
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)

                model.load(sess, args.step)
                print 'Evaluating at step {}'.format(
                    sess.run(model.global_step))

                step = tf.train.global_step(sess, model.global_step)
                mean_metrics = {}
                for key in metrics:
                    mean_metrics[key] = 0

                n = 0
                pred_labels = np.empty(1)
                true_labels = np.empty(1)
                while True:
                    try:
                        to_fetch = [
                            metrics, model.layers["class_prediction"],
                            samples["cluster_id"], samples["start_time"],
                            samples["end_time"]
                        ]
                        metrics_, batch_pred_label, batch_true_label, starttime, endtime = sess.run(
                            to_fetch)

                        batch_pred_label -= 1
                        pred_labels = np.append(pred_labels, batch_pred_label)
                        true_labels = np.append(true_labels, batch_true_label)

                        # Save times of false preds
                        if args.save_false and \
                                batch_pred_label != batch_true_label:
                            print "---False prediction---"
                            print UTCDateTime(starttime), UTCDateTime(endtime)
                            false_origintime.append(
                                (starttime[0] + endtime[0]) / 2)
                            false_end.append(UTCDateTime(endtime))
                            false_start.append(UTCDateTime(starttime))

                        # print  true_labels
                        for key in metrics:
                            mean_metrics[key] += cfg.batch_size * metrics_[key]
                        n += cfg.batch_size

                        mess = model.validation_metrics_message(metrics_)
                        print '{:03d} | '.format(n) + mess

                    except KeyboardInterrupt:
                        print 'stopping evaluation'
                        break

                    except tf.errors.OutOfRangeError:
                        print 'Evaluation completed ({} epochs).'.format(
                            cfg.n_epochs)
                        print "{} windows seen".format(n)
                        break

                if n > 0:
                    for key in metrics:
                        mean_metrics[key] /= n
                        summary = tf.Summary(value=[
                            tf.Summary.Value(tag='{}/val'.format(key),
                                             simple_value=mean_metrics[key])
                        ])
                        if args.save_summary:
                            summary_writer.add_summary(summary,
                                                       global_step=step)

                summary_writer.flush()

                mess = model.validation_metrics_message(mean_metrics)
                print 'Average | ' + mess

                if args.eval_interval < 0:
                    print 'End of evaluation'
                    break

            tf.reset_default_graph()
            print 'Sleeping for {}s'.format(args.eval_interval)
            time.sleep(args.eval_interval)

        finally:
            print 'joining data threads'
            coord.request_stop()

    if args.save_false:
        false_preds = {}
        false_preds["start_time"] = false_start
        false_preds["end_time"] = false_end
        false_preds["origintime"] = false_origintime
        # false_preds = np.array((false_start, false_end)).transpose()[0]
        # print 'shape', false_preds.shape
        df = pd.DataFrame(false_preds)
        df.to_csv(os.path.join(false_dir, "false_preds.csv"))
    pred_labels = pred_labels[1::]
    true_labels = true_labels[1::]
    # np.save("output/pred_labels_noise.npy",pred_labels)
    # np.save("output/true_labels_noise.npy",true_labels)
    print "---Confusion Matrix----"
    print confusion_matrix(true_labels, pred_labels)

    coord.join(threads)

Beispiel #2

Datei anzeigen

Datei: data_augmentation.py Projekt: kajjjak/ConvNetQuake

def main(_):

    if FLAGS.stretch_data:
        print "ADD NOISE AND STRETCH DATA"
    if FLAGS.compress_data:
        print "ADD NOISE AND COMPRESS DATA"
    if FLAGS.shift_data:
        print "ADD NOISE AND SHIFT DATA"

    # Make dirs
    output_dir = os.path.split(FLAGS.output)[0]
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    if FLAGS.plot:
        if not os.path.exists(os.path.join(output_dir, "true_data")):
            os.makedirs(os.path.join(output_dir, "true_data"))
        if not os.path.exists(os.path.join(output_dir, "augmented_data")):
            os.makedirs(os.path.join(output_dir, "augmented_data"))

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_epochs = 1

    data_pipeline = DataPipeline(FLAGS.tfrecords,
                                 config=cfg,
                                 is_training=False)
    samples = data_pipeline.samples
    labels = data_pipeline.labels

    with tf.Session() as sess:
        coord = tf.train.Coordinator()
        tf.initialize_local_variables().run()
        threads = tf.train.start_queue_runners(coord=coord)

        output_tfrecords = FLAGS.output
        writer = DataWriter(output_tfrecords)
        n_examples = 0
        while True:
            try:
                sample, label = sess.run([samples, labels])
                sample = np.squeeze(sample, axis=0)
                label = label[0]

                noised_sample = add_noise_to_signal(np.copy(sample))
                if FLAGS.compress_data:
                    noised_sample = compress_signal(noised_sample)
                if FLAGS.stretch_data:
                    noised_sample = stretch_signal(noised_sample)
                if FLAGS.shift_data:
                    noised_sample = shift_signal(noised_sample)

                if FLAGS.plot:
                    plot_true_and_augmented_data(sample, noised_sample, label,
                                                 n_examples)

                stream = convert_np_to_stream(noised_sample)
                writer.write(stream, label)

                n_examples += 1

            except KeyboardInterrupt:
                print 'stopping data augmentation'
                break

            except tf.errors.OutOfRangeError:
                print 'Augmentation completed ({} epochs, {} examples seen).'\
                                .format(cfg.n_epochs,n_examples-1)
                break

        writer.close()
        coord.request_stop()
        coord.join(threads)

Beispiel #3

Datei anzeigen

def main(args):
    setproctitle.setproctitle('quakenet_eval')

    if args.n_clusters == None:
        raise ValueError('Define the number of clusters with --n_clusters')

    ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_clusters = args.n_clusters
    cfg.add = 1
    cfg.n_clusters += 1
    cfg.n_epochs = 1

    # Remove previous output directory
    if os.path.exists(args.output):
        shutil.rmtree(args.output)
    os.makedirs(args.output)
    if args.plot:
        os.makedirs(os.path.join(args.output, "viz"))

    # data pipeline
    data_pipeline = DataPipeline(args.dataset, config=cfg, is_training=False)
    samples = {
        'data': data_pipeline.samples,
        'cluster_id': data_pipeline.labels,
        'start_time': data_pipeline.start_time,
        'end_time': data_pipeline.end_time
    }

    # set up model and validation metrics
    model = models.get(args.model,
                       samples,
                       cfg,
                       args.checkpoint_dir,
                       is_training=False)

    if args.max_windows is None:
        max_windows = 2**31
    else:
        max_windows = args.max_windows

    # Dictonary to store info on detected events
    events_dic = {
        "start_time": [],
        "end_time": [],
        "utc_timestamp": [],
        "cluster_id": [],
        "clusters_prob": []
    }

    # Create catalog name in which the events are stored
    output_catalog = os.path.join(args.output, 'catalog_detection.csv')
    print 'Catalog created to store events', output_catalog

    # Run ConvNetQuake
    with tf.Session(config=tf_config) as sess:
        coord = tf.train.Coordinator()
        tf.initialize_local_variables().run()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        model.load(sess, args.step)
        print 'Predicting using model at step {}'.format(
            sess.run(model.global_step))

        step = tf.train.global_step(sess, model.global_step)

        n_events = 0
        idx = 0
        time_start = time.time()
        while True:
            try:
                # Fetch class_proba and label
                to_fetch = [
                    samples['data'], model.layers['class_prob'],
                    model.layers['class_prediction'], samples['start_time'],
                    samples['end_time']
                ]
                sample, class_prob_, cluster_id, start_time, end_time = sess.run(
                    to_fetch)

                # # Keep only clusters proba, remove noise proba
                clusters_prob = class_prob_[0, 1::]
                cluster_id -= 1

                # label for noise = -1, label for cluster \in {0:n_clusters}

                is_event = cluster_id[0] > -1
                if is_event:
                    n_events += 1

                idx += 1
                if idx % 1000 == 0:
                    print "processed {} windows".format(idx)

                if is_event:
                    events_dic["start_time"].append(UTCDateTime(start_time))
                    events_dic["end_time"].append(UTCDateTime(end_time))
                    events_dic["utc_timestamp"].append(
                        (start_time + end_time) / 2.0)
                    events_dic["cluster_id"].append(cluster_id[0])
                    events_dic["clusters_prob"].append(list(clusters_prob))

                if idx >= max_windows:
                    print "stopped after {} windows".format(max_windows)
                    print "found {} events".format(n_events)
                    break

            except KeyboardInterrupt:
                print "processed {} windows, found {} events".format(
                    idx + 1, n_events)
                print "Run time: ", time.time() - time_start

            except tf.errors.OutOfRangeError:
                print 'Evaluation completed ({} epochs).'.format(cfg.n_epochs)
                break

        print 'joining data threads'
        m, s = divmod(time.time() - time_start, 60)
        print "Prediction took {} min {} seconds".format(m, s)
        coord.request_stop()
        coord.join(threads)

    # Dump dictionary into csv file
    df = pd.DataFrame.from_dict(events_dic)
    df.to_csv(output_catalog)

Beispiel #4

Datei anzeigen

Datei: step5_eval_over_tfrecords.py Projekt: d3ibid/deepquake

def eval(args, positivesOrNegatives):
    global truePositives
    global falsePositives
    global trueNegatives
    global falseNegatives
    global locationHit
    global locationMiss
    #summary_dir = os.path.join(output_dir, "eval_summary_events")

    datasetDir = None

    if positivesOrNegatives:
        datasetDir = os.path.join(args.tfrecords_dir,
                                  cfg.output_tfrecords_dir_positives)
    else:
        datasetDir = os.path.join(args.tfrecords_dir,
                                  cfg.output_tfrecords_dir_negatives)

    #print(datasetDir)

    cfg.batch_size = 1
    cfg.n_epochs = 1
    cfg.add = 1

    try:
        # data pipeline
        data_pipeline = DataPipeline(datasetDir, config=cfg, is_training=False)
        samples = {
            'data': data_pipeline.samples,
            'cluster_id': data_pipeline.labels,
            "start_time": data_pipeline.start_time,
            "end_time": data_pipeline.end_time
        }

        #print("data_pipeline.samples="+str(data_pipeline.samples))

        # set up model and validation metrics
        model = models.get(cfg.model,
                           samples,
                           cfg,
                           checkpoint_dir,
                           is_training=False)

        metrics = model.validation_metrics()
        # Validation summary writer
        #summary_writer = tf.train.SummaryWriter(summary_dir, None)

        with tf.Session() as sess:
            coord = tf.train.Coordinator()
            tf.initialize_local_variables().run()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            model.load(sess)
            print 'Evaluating at step {}'.format(sess.run(model.global_step))

            #step = tf.train.global_step(sess, model.global_step)
            mean_metrics = {}
            for key in metrics:
                mean_metrics[key] = 0

            n = 0
            pred_labels = np.empty(1)
            true_labels = np.empty(1)
            while True:
                try:
                    to_fetch = [
                        metrics, model.layers["class_prediction"],
                        samples["cluster_id"], samples["start_time"],
                        samples["end_time"]
                    ]
                    metrics_, batch_pred_label, batch_true_label, starttime, endtime = sess.run(
                        to_fetch)

                    if positivesOrNegatives:
                        #Positive windows
                        #NOTE: pred label will be 0 for noise (-1) and 1 for cluster 0
                        if batch_true_label[0] >= 0 and batch_pred_label[
                                0] >= 1:
                            truePositives = truePositives + 1
                            print("TRUE POSITIVE: batch_true_label = " +
                                  str(batch_true_label[0]) +
                                  "; batch_pred_label[0] = " +
                                  str(batch_pred_label[0]))
                            if batch_true_label[0] == batch_pred_label[0] - 1:
                                locationHit = locationHit + 1
                            else:
                                locationMiss = locationMiss + 1
                            #sys.stdout.write("\033[92mP\033[0m")
                        else:
                            falsePositives = falsePositives + 1
                            #sys.stdout.write("\033[91mP\033[0m")
                    else:
                        #Negative windows
                        if batch_true_label[0] == -1 and batch_pred_label[
                                0] == 0:
                            trueNegatives = trueNegatives + 1
                            #sys.stdout.write("\033[92mN\033[0m")
                        else:
                            falseNegatives = falseNegatives + 1
                            #sys.stdout.write("\033[91mN\033[0m")

                    #print("batch_true_label="+str(batch_true_label))
                    #print("batch_pred_label="+str(batch_pred_label))
                    batch_pred_label -= 1
                    pred_labels = np.append(pred_labels, batch_pred_label)
                    true_labels = np.append(true_labels, batch_true_label)

                    # print  true_labels
                    for key in metrics:
                        mean_metrics[key] += cfg.batch_size * metrics_[key]
                    n += cfg.batch_size

                    #mess = model.validation_metrics_message(metrics_)
                    #print '{:03d} | '.format(n)+mess

                except KeyboardInterrupt:
                    print 'stopping evaluation'
                    break

                except tf.errors.OutOfRangeError:
                    print 'Evaluation completed ({} epochs).'.format(
                        cfg.n_epochs)
                    print "{} windows seen".format(n)
                    break

            if n > 0:
                for key in metrics:
                    mean_metrics[key] /= n
                    summary = tf.Summary(value=[
                        tf.Summary.Value(tag='{}/val'.format(key),
                                         simple_value=mean_metrics[key])
                    ])
                    #if args.save_summary:
                    #    summary_writer.add_summary(summary, global_step=step)

            #summary_writer.flush()

            mess = model.validation_metrics_message(mean_metrics)
            #print 'Average | '+mess
            coord.request_stop()
    finally:
        pass
        #print 'joining data threads'

    pred_labels = pred_labels[1::]
    true_labels = true_labels[1::]
    # np.save("output/pred_labels_noise.npy",pred_labels)
    # np.save("output/true_labels_noise.npy",true_labels)
    #print "---Confusion Matrix----"
    #print confusion_matrix(true_labels, pred_labels)

    coord.join(threads)