Esempio n. 1
0
    def predict(self, dataset_path, template_path,
                catalog_path):
        template = load_stream(template_path)
        stream = load_stream(dataset_path)
        try:
            print self.beta
        except AttributeError:
            self.beta = 8.5
        templates = [template]
        template_name = [os.path.split(template_path)[-1].split('.')[0]]

        day_streams = self._split_stream_into_day_streams(stream)
        print '-------'
        print ' + Running template matching method on test set'
        detections = []
        for st in day_streams:
            detections.append(mf.match_filter(template_names=template_name,
                                              template_list=templates,
                                              st=st, threshold=self.beta,
                                              threshold_type='MAD',
                                              trig_int=1.0,
                                              plotvar=False))

        # Flatten list of detections
        self.detections = [d for detection in detections for d in detection]
Esempio n. 2
0
    def fit(self, dataset_path, template_path,
            catalog_path):
        """ Detect events in stream for various beta and find the optimal
        beta parameter
        """
        template = load_stream(template_path)
        stream = load_stream(dataset_path)
        templates = [template]
        template_name = [os.path.split(template_path)[-1].split('.')[0]]

        day_streams = self._split_stream_into_day_streams(stream)
        detection_results = np.zeros(len(self._betas))
        for k in range(len(self._betas)):
            beta = self._betas[k]
            print '------'
            print ' + Running template matching method for beta =',beta
            detections = []
            for st in day_streams:
                detections.append(mf.match_filter(template_names=template_name,
                                                  template_list=templates,
                                                  st=st, threshold=beta,
                                                  threshold_type='MAD',
                                                  trig_int=1.0,
                                                  plotvar=False))
            # Flatten list of detections
            detections = [d for detection in detections for d in detection]
            false_pos, false_neg = self.score(detections, catalog_path)
            print 'FP: {}, FN: {}'.format(false_pos, false_neg)
            detection_results[k] = false_pos + false_neg
        self.beta = self._betas[np.argmin(detection_results)]
Esempio n. 3
0
def test_load_stream():
    tpath = path.join(settings.TEST_DATA_DIR,
                      'event_stream.mseed')
    stream = data_io.load_stream(tpath)
    assert(len(stream) == 3)
    assert(stream[0].stats.sampling_rate == 100 )
    assert(stream[0].stats.npts == 1001 )
Esempio n. 4
0
def fetch_window_and_label(stream_name):
    """Load window stream, extract data and label"""
    stream = load_stream(stream_name)
    data = np.empty((1001, 3))
    for i in range(3):
        data[:, i] = stream[i].data.astype(np.float32)
    data = np.expand_dims(data, 0)
    stream_name = os.path.split(stream_name)[-1]
    label = np.empty((1,))
    label[0] = stream_name.split("_")[1]
    return data, label
Esempio n. 5
0
def main(args):
    # Remove previous output directory
    output_viz = os.path.join(args.output, "viz")
    output_sac = os.path.join(args.output, "sac")
    if args.plot:
        if os.path.exists(output_viz):
            shutil.rmtree(output_viz)
        os.makedirs(output_viz)
    if args.save_sac:
        if os.path.exists(output_sac):
            shutil.rmtree(output_sac)
        os.makedirs(output_sac)

    # Read stream
    print "+ Loading stream"
    st = load_stream(args.stream_path)
    # Read catalog
    print "+ Loading catalog"
    cat = load_catalog(args.catalog_path)

    # Look events in catalog and plot windows
    print "+ Creating windows with detected events from ConvNetQuake"
    for event in tqdm(range(cat.shape[0]),
                      total=cat.shape[0],
                      unit="events",
                      leave=False):
        win_start = UTCDateTime(cat.iloc[event].start_time)
        win_end = UTCDateTime(cat.iloc[event].end_time)
        win = st.slice(win_start, win_end).copy()
        if args.plot:
            win.plot(
                outfile=os.path.join(output_viz, "event_{}.png".format(event)))
        if args.save_sac:
            for tr in win:
                if isinstance(tr.data, np.ma.masked_array):
                    tr.data = tr.data.filled()
            win.write(os.path.join(output_sac, "event_{}_.sac".format(event)),
                      format="SAC")
Esempio n. 6
0
def make_synthetic_data(templates_dir,
                        trace_duration,
                        output_path,
                        max_amplitude,
                        stream_nb=0,
                        random_scalings=1):
    """ Insert a template and add noise to create a stream. 
    Also create a label stream with 0 if no event and 1 if there
    is an event"""

    template_streams = []
    template_names = []
    for f in os.listdir(templates_dir):
        f = os.path.join(templates_dir, f)
        if os.path.isfile(f) and '.mseed' in f:
            template_streams.append(data_io.load_stream(f))
            template_names.append(f)

    if len(template_streams) < 1:
        raise ValueError(
            'Invalid path "{}", contains no .mseed template_streams'.format(
                template_streams_dir))

    print '+ Creating synthetic data from {} template_streams'.format(
        len(template_streams))

    # TODO(mika): check that all the template_streams have the same meta
    sampling_rate = template_streams[0][0].stats.sampling_rate
    n_channels = len(template_streams[0])
    stream_info = {}
    stream_info['station'] = template_streams[0][0].stats.station
    stream_info['network'] = template_streams[0][0].stats.network
    stream_info['channels'] = [tr.stats.channel for tr in template_streams[0]]

    out_npts = int(sampling_rate * trace_duration)

    # Centered, normalized and windowed template signal
    templates = get_normalized_templates(template_streams)
    original_dtype = templates[0].dtype
    # TODO: check range of original data, what should it be?

    print '  Sampling rate {} Hz'.format(sampling_rate)

    # Sample-accurate intervals between events (1min to 1h delay).
    # Slightly oversample so that the we cover the full trace length.
    low = 60 * sampling_rate
    high = 60 * 60 * sampling_rate
    mean = (low + high) / 2.0
    intervals = np.random.randint(low=low,
                                  high=high,
                                  size=int(1.5 * out_npts / mean))

    # Keep only events in the time interval of the synthetic data
    events = np.cumsum(intervals)
    maxpts = max([t.shape[1] for t in templates])  # Max length of a template
    events = events[events < out_npts - maxpts]

    # Random scaling and event
    if random_scalings == 1:
        scales = max_amplitude * np.random.uniform(size=len(events))
    else:
        scales = max_amplitude * np.ones(len(events))
    template_ids = np.random.randint(0, len(templates), size=len(events))

    # Noise floor (WGN)
    # TODO(mika): replace with more realistic Earth noise
    # TODO(tibo): extract few  seconds of recording in CA
    noise = np.random.normal(size=(n_channels, out_npts))

    # Create a signal with noise
    signal = np.copy(noise)

    # Create label stream
    label = np.zeros((1, out_npts))

    # Measure SNR
    A_noise = 0
    A_signal = 0

    # Add events to the signal and 1 in label when there is an event
    for e, s, tid in zip(events, scales, template_ids):
        # TODO: adds random shift to model the propagation time
        t = templates[tid]
        npts = t.shape[1]
        signal[:, e:e + npts] += s * t
        label[:, e:e + npts] = 1

        A_noise += np.sum(np.square(noise[:, e:e + npts]))
        A_signal += np.sum(np.square(s * t))

    snr = A_signal / A_noise
    snr = 10 * np.log10(snr)

    print "+ Converting back to {}".format(original_dtype)
    signal = signal.astype(original_dtype)
    label = label.astype(original_dtype)
    out_stream = data_conversion.array2stream(signal, sampling_rate,
                                              stream_info)
    out_label = data_conversion.array2stream(label, sampling_rate, stream_info)

    print '+ Generated {} events in {}s'.format(len(events), trace_duration)
    print '+ SNR on events windows: {:.1f} dB'.format(snr)

    # Prepare catalog
    starttime = out_stream[0].stats.starttime
    events_time = [starttime + e * 1.0 / sampling_rate for e in events]

    meta = {
        'sampling_rate': sampling_rate,
        'n_events': len(events),
        'snr': snr,
        'max_amplitude': max_amplitude,
        'templates': template_names,
    }

    # Save catalog and stream
    catalog_fmt = 'catalog_{:03d}.csv'
    stream_fmt = 'stream_{:03d}.mseed'
    label_fmt = 'label_{:03d}.mseed'
    meta_fmt = 'meta_{:03d}.json'
    catalog_path = os.path.join(output_path, catalog_fmt.format(stream_nb))
    stream_path = os.path.join(output_path, stream_fmt.format(stream_nb))
    meta_path = os.path.join(output_path, meta_fmt.format(stream_nb))
    label_path = os.path.join(output_path, label_fmt.format(stream_nb))

    print '+ Saving to disk'
    data_io.write_catalog(events_time, catalog_path)
    data_io.write_stream(out_stream, stream_path)
    data_io.write_stream(out_label, label_path)
    with open(meta_path, 'w') as f:
        json.dump(meta, f, indent=2)
Esempio n. 7
0
 def run(self):
     self.stream = data_io.load_stream(self.stream_path)
Esempio n. 8
0
def main(args):
    setproctitle.setproctitle('quakenet_predict')

    ckpt = tf.train.get_checkpoint_state(args.checkpoint_dir)

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.n_clusters = args.n_clusters
    cfg.add = 1
    cfg.n_clusters += 1

    # Remove previous output directory
    if os.path.exists(args.output):
        shutil.rmtree(args.output)
    os.makedirs(args.output)
    if args.plot:
        os.makedirs(os.path.join(args.output, "viz"))
        os.makedirs(os.path.join(args.output, "viz_not"))
    if args.save_sac:
        os.makedirs(os.path.join(args.output, "sac"))

    # Load stream
    #stream_path = args.stream_path
    #stream_file = os.path.split(stream_path)[-1]
    #print "+ Loading Stream {}".format(stream_file)
    #stream = read(stream_path)
    #print '+ Preprocessing stream'
    #stream = preprocess_stream(stream)

    # # change to use a dir list,2017/12/07
    stream_path = args.stream_path
    try:
        stream_files = [
            file for file in os.listdir(stream_path)
            if fnmatch.fnmatch(file, '*.mseed')
        ]
    except:
        stream_files = os.path.split(stream_path)[-1]
    # stream data with a placeholder
    samples = {
        'data':
        tf.placeholder(tf.float32,
                       shape=(cfg.batch_size, cfg.win_size, 3),
                       name='input_data'),
        'cluster_id':
        tf.placeholder(tf.int64, shape=(cfg.batch_size, ), name='input_label')
    }

    # set up model and validation metrics
    model = models.get(args.model,
                       samples,
                       cfg,
                       args.checkpoint_dir,
                       is_training=False)
    with tf.Session() as sess:
        model.load(sess, args.step)
        print 'Predicting using model at step {}'.format(
            sess.run(model.global_step))

        step = tf.train.global_step(sess, model.global_step)

        n_events = 0
        time_start = time.time()
        for stream_file in stream_files:
            stream_path1 = os.path.join(stream_path, stream_file)
            print " + Loading stream {}".format(stream_file)
            stream = load_stream(stream_path1)
            #print stream[0],stream[1].stats
            print " + Preprocess stream"
            stream = preprocess_stream(stream)
            print " -- Stream is ready, starting detection"

            # Create catalog name in which the events are stored
            catalog_name = os.path.split(stream_file)[-1].split(
                ".mseed")[0] + ".csv"
            output_catalog = os.path.join(args.output, catalog_name)
            print 'Catalog created to store events', output_catalog

            # Dictonary to store info on detected events
            events_dic = {
                "start_time": [],
                "end_time": [],
                "cluster_id": [],
                "clusters_prob": []
            }
            # Windows generator
            # win_gen = stream.slide(window_length=args.window_size,
            #                step=args.window_step,
            #                include_partial_windows=False)
            if args.save_sac:
                first_slice = stream.slice(
                    stream[0].stats.starttime,
                    stream[0].stats.starttime + args.window_size)
                first_slice.write(os.path.join(
                    args.output, "sac", "{}_{}_{}.sac".format(
                        stream[0].stats.station, '0',
                        str(stream[0].stats.starttime).replace(':', '_'))),
                                  format="SAC")
            if args.max_windows is None:
                total_time_in_sec = stream[0].stats.endtime - stream[
                    0].stats.starttime
                max_windows = (total_time_in_sec -
                               args.window_size) / args.window_step
            else:
                max_windows = args.max_windows
            try:
                lists = [0]
                #lists = np.arange(0,30,5)
                for i in lists:

                    win_gen = stream.slide(window_length=args.window_size,
                                           step=args.window_step,
                                           offset=i,
                                           include_partial_windows=False)
                    for idx, win in enumerate(win_gen):
                        if data_is_complete(win):
                            ampl_e, ampl_n, ampl_z = filter_small_ampitude(win)
                            if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3:
                                continue
                            ampm_e = max(abs(win[0].data))
                            ampm_n = max(abs(win[1].data))
                            ampm_z = max(abs(win[2].data))
                            if ampm_e < 600 and ampm_n < 600 and ampm_z < 600:
                                continue
                    # Fetch class_proba and label
                            to_fetch = [
                                samples['data'], model.layers['class_prob'],
                                model.layers['class_prediction']
                            ]

                            # Feed window and fake cluster_id (needed by the net) but
                            # will be predicted
                            feed_dict = {
                                samples['data']:
                                fetch_window_data(win.copy().normalize()),
                                samples['cluster_id']:
                                np.array([0])
                            }
                            sample, class_prob_, cluster_id = sess.run(
                                to_fetch, feed_dict)
                        else:
                            continue

                    # # Keep only clusters proba, remove noise proba
                        clusters_prob = class_prob_[0, 1::]
                        cluster_id -= 1

                        # label for noise = -1, label for cluster \in {0:n_clusters}

                        is_event = cluster_id[0] > -1
                        #print cluster_id[0],is_event
                        probs = '{:.5f}'.format(max(list(clusters_prob)))
                        save_event = (float(probs) - 0.1) >= 0
                        #print probs,save_event
                        if is_event:
                            n_events += 1
                            print "event {} ,cluster id {}".format(
                                is_event, class_prob_)
                            events_dic["start_time"].append(
                                win[0].stats.starttime)
                            events_dic["end_time"].append(win[0].stats.endtime)
                            events_dic["cluster_id"].append(cluster_id[0])
                            events_dic["clusters_prob"].append(
                                list(clusters_prob))

                        if idx % 1000 == 0:
                            print "Analyzing {} records".format(
                                win[0].stats.starttime)

                        if args.plot:
                            import matplotlib
                            matplotlib.use('Agg')
                            win_filtered = win.copy()

                            if is_event:
                                # if args.plot:

                                # win_filtered.filter("bandpass",freqmin=4.0, freqmax=16.0)
                                win_filtered.plot(outfile=os.path.join(
                                    args.output,
                                    "viz",
                                    ####changed at 2017/11/25,use max cluster_prob instead of cluster_id
                                    #                "event_{}_cluster_{}.png".format(idx,cluster_id)))
                                    "{}_{}_{}.png".format(
                                        win[0].stats.station, str(probs),
                                        str(win[0].stats.starttime).replace(
                                            ':', '_'))))
                            else:

                                win_filtered.plot(outfile=os.path.join(
                                    args.output, "viz_not",
                                    "{}_{}_{}.png".format(
                                        win[0].stats.station, str(probs),
                                        str(win[0].stats.starttime).replace(
                                            ':', '_'))))

                        if args.save_sac and save_event:
                            # win_filtered = win.copy()
                            save_start = win[0].stats.starttime - 12
                            save_end = win[0].stats.endtime + 10
                            win_filtered = stream.slice(save_start, save_end)
                            win_filtered.write(os.path.join(
                                args.output, "sac", "{}_{}_{}.sac".format(
                                    win[0].stats.station, str(probs),
                                    str(win[0].stats.starttime).replace(
                                        ':', '_'))),
                                               format="SAC")

                        if idx >= max_windows:
                            print "stopped after {} windows".format(
                                max_windows)
                            print "found {} events".format(n_events)
                            break

            except KeyboardInterrupt:
                print 'Interrupted at time {}.'.format(win[0].stats.starttime)
                print "processed {} windows, found {} events".format(
                    idx + 1, n_events)
                print "Run time: ", time.time() - time_start

    # Dump dictionary into csv file
    #TODO
            df = pd.DataFrame.from_dict(events_dic)
            df.to_csv(output_catalog)

    print "Run time: ", time.time() - time_start
def evaluate():
    """
    Eval unet using specified args:
    """

    #data_files, data_size = load_datafiles(FLAGS.tfrecords_prefix)

    setproctitle.setproctitle('quakenet')

    tf.set_random_seed(1234)

    cfg = config.Config()
    cfg.batch_size = 1
    cfg.add = 1
    cfg.n_clusters = FLAGS.num_classes
    cfg.n_clusters += 1

    # stream data with a placeholder
    samples = {
        'data':
        tf.placeholder(tf.float32,
                       shape=(cfg.batch_size, cfg.win_size, 3),
                       name='input_data')
    }
    stream_path = FLAGS.stream_path
    try:
        #stream_files = [file for file in os.listdir(stream_path) if
        #                fnmatch.fnmatch(file, '*.mseed')]
        stream_files = [
            file for file in tree(stream_path)
            if fnmatch.fnmatch(file, '*.mseed')
        ]
    except:
        stream_files = os.path.split(stream_path)[-1]
        print("stream_files", stream_files)
    #data_files, data_size = load_datafiles(stream_path)
    n_events = 0
    time_start = time.time()
    print(" + Loading stream files {}".format(stream_files))

    events_dic = {
        "slice_start_time": [],
        "P_pick": [],
        "stname": [],
        "utc_timestamp_p": [],
        "utc_timestamp_s": [],
        "S_pick": []
    }
    with tf.Session() as sess:

        logits = unet.build_30s(samples['data'], FLAGS.num_classes, False)
        time_start = time.time()
        catalog_name = "PS_pick_blocks.csv"
        output_catalog = os.path.join(FLAGS.output_dir, catalog_name)
        print('Catalog created to store events', output_catalog)
        init_op = tf.group(tf.global_variables_initializer(),
                           tf.local_variables_initializer())

        sess.run(init_op)
        saver = tf.train.Saver()

        if not tf.gfile.Exists(FLAGS.checkpoint_path + '.meta'):
            raise ValueError("Can't find checkpoint file")
        else:
            print('[INFO    ]\tFound checkpoint file, restoring model.')
            saver.restore(sess, FLAGS.checkpoint_path)
        coord = tf.train.Coordinator()

        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        for stream_file in stream_files:
            #stream_path1 = os.path.join(stream_path, stream_file)
            print(" + Loading stream {}".format(stream_file))
            #stream = load_stream(stream_path1)
            stream = load_stream(stream_file)
            stream = stream.normalize()
            #print stream[0],stream[1].stats
            print(" + Preprocess stream")
            # stream = preprocess_stream(stream)
            print(" -- Stream is ready, starting detection")
            try:
                #lists = [0]
                lists = np.arange(0, 30, 10)
                for i in lists:
                    win_gen = stream.slide(window_length=FLAGS.window_size,
                                           step=FLAGS.window_step,
                                           offset=i,
                                           include_partial_windows=False)
                    #print(win_gen)
                    for idx, win in enumerate(win_gen):
                        #win.resample(10)
                        if data_is_complete(win):
                            predicted_images = unet.predict(
                                logits, cfg.batch_size, FLAGS.image_size)
                            to_fetch = [predicted_images, samples['data']]

                            # Feed window and fake cluster_id (needed by the net) but
                            # will be predicted
                            feed_dict = {
                                samples['data']:
                                fetch_window_data(win.copy().normalize(), 3)
                            }
                            #samples_data=fetch_window_data(win.copy().normalize()
                            predicted_images_value, images_value = sess.run(
                                to_fetch, feed_dict)
                            clusters_p = np.where(
                                predicted_images_value[0, :] == 1)
                            clusters_s = np.where(
                                predicted_images_value[0, :] == 2)
                            p_boxes = group_consecutives(clusters_p[0])
                            s_boxes = group_consecutives(clusters_s[0])
                            tp = []
                            ts = []
                            tpstamp = []
                            tsstamp = []
                            if len(p_boxes) > 1:
                                for ip in range(len(p_boxes)):
                                    #print (len(p_boxes),p_boxes,p_boxes[ip])
                                    tpmean = float(
                                        min(p_boxes[ip]) / 200.00 +
                                        max(p_boxes[ip]) / 200.00)
                                    tp.append(tpmean)
                                    tpstamp = UTCDateTime(
                                        win[0].stats.starttime +
                                        tpmean).timestamp
                            if len(s_boxes) > 1:
                                for iss in range(len(s_boxes)):
                                    tsmean = float(
                                        min(s_boxes[iss]) / 200.00 +
                                        max(s_boxes[iss]) / 200.00)
                                    ts.append(tsmean)
                                    tsstamp = UTCDateTime(
                                        win[0].stats.starttime +
                                        tsmean).timestamp
                            if len(p_boxes) > 1 or len(s_boxes) > 1:
                                events_dic["slice_start_time"].append(
                                    win[0].stats.starttime)
                                events_dic["stname"].append(
                                    win[0].stats.station)
                                events_dic["P_pick"].append(tp)
                                events_dic["S_pick"].append(ts)
                                events_dic["utc_timestamp_p"].append(tpstamp)
                                events_dic["utc_timestamp_s"].append(tsstamp)
                            #print (p_boxes,s_boxes)
                            win_filtered = win.copy()
                            lab = win_filtered[2].copy()
                            lab.stats.channel = "LAB"
                            # lab =win[0].copy()

                            print("predicted_images_value",
                                  predicted_images_value.shape)
                            lab.data[...] = predicted_images_value[0, :]
                            win_filtered += lab
                            if FLAGS.save_sac:
                                output_sac = os.path.join(
                                    FLAGS.output_dir, "sac",
                                    "{}_{}.sac".format(
                                        win_filtered[0].stats.station,
                                        str(win_filtered[0].stats.starttime).
                                        replace(':', '_')))
                                print(output_sac, win_filtered)
                                win_filtered.write(output_sac, format="SAC")
                            if FLAGS.plot:
                                win_filtered.plot(outfile=os.path.join(
                                    FLAGS.output_dir, "viz",
                                    "{}_{}.png".format(
                                        win_filtered[0].stats.station,
                                        str(win_filtered[0].stats.starttime).
                                        replace(':', '_'))))
                            # Wait for threads to finish.
                            coord.join(threads)
            except KeyboardInterrupt:
                print('Interrupted at time {}.'.format(win[0].stats.starttime))
                print("processed {} windows, found {} events".format(
                    idx + 1, n_events))
                print("Run time: ", time.time() - time_start)
        df = pd.DataFrame.from_dict(events_dic)
        df.to_csv(output_catalog)

    print("Run time: ", time.time() - time_start)