Example #1
0
    def score(self, detections, catalog_path, lag_allowed=1.0):
        """ Calculate the number of False and Missed detections
        Parameters:
            detections: list. List of timestamps of detected events
            catalog_path: csv file. Path to the catalog of events and their
                          timestamps
            lag_allowed: float. time lag between a cataloged and detected
            event to be considered as a true detection
        Returns:
            false_pos: int. Number of false detections
            false_neg: int. Number of missed detections
        """
        catalog = load_catalog(catalog_path)
        events = catalog.utc_timestamp

        detection_times = [detection.detect_time.timestamp
                           for detection in detections]
        detection_results = [False] * len(detection_times)
        for d in xrange(len(detection_times)):
            detected_event = detection_times[d]
            for event_time in events:
                if np.abs(detected_event - event_time) <= lag_allowed:
                    detection_results[d] = True
        if len(detection_times)>0:
            false_pos = (~np.array(detection_results)).sum()
            false_neg = len(events) - sum(detection_results)
            return false_pos, false_neg
        else:
            return 0, len(events)
Example #2
0
def main(_):

    if not os.path.exists(FLAGS.output):
        os.makedirs(FLAGS.output)

    # Load Catalog
    cat_path = FLAGS.catalog
    cat = load_catalog(cat_path)
    cat = filter_catalog(cat)

    # Load stream
    stream_path = FLAGS.stream
    print " + Loading stream"
    stream = read(stream_path)
    if FLAGS.with_preprocessing:
        print " + Preprocessing stream"
        stream = preprocess_stream(stream)

    # Filter catalog according to the loaded stream
    start_date = stream[0].stats.starttime
    end_date = stream[-1].stats.endtime
    print(" + Loaded Stream with Start Date={} and End Date={}".format(start_date, end_date))

    filtered_catalog = cat[
        ((cat.utc_timestamp >= start_date)
         & (cat.utc_timestamp < end_date))]

    travel_time = get_travel_time(filtered_catalog)

    print(" + Plotting {} events".format(filtered_catalog.shape[0]))
    for event_n in range(filtered_catalog.shape[0]):
        event_time = filtered_catalog.utc_timestamp.values[event_n] + travel_time[event_n]
        cluster_id= filtered_catalog.cluster_id.values[event_n]
        st_event = stream.slice(
            UTCDateTime(event_time), UTCDateTime(event_time) +
                        FLAGS.window_size).copy()
        if len(st_event)==3:
            trace = st_event[0]
            name_png = "event_{}_clusterid_{}.png".format(event_n,cluster_id)
            trace.plot(outfile=os.path.join(FLAGS.output,name_png))
        else:
            print st_event
Example #3
0
def main(args):
    # Remove previous output directory
    output_viz = os.path.join(args.output, "viz")
    output_sac = os.path.join(args.output, "sac")
    if args.plot:
        if os.path.exists(output_viz):
            shutil.rmtree(output_viz)
        os.makedirs(output_viz)
    if args.save_sac:
        if os.path.exists(output_sac):
            shutil.rmtree(output_sac)
        os.makedirs(output_sac)

    # Read stream
    print "+ Loading stream"
    st = load_stream(args.stream_path)
    # Read catalog
    print "+ Loading catalog"
    cat = load_catalog(args.catalog_path)

    # Look events in catalog and plot windows
    print "+ Creating windows with detected events from ConvNetQuake"
    for event in tqdm(range(cat.shape[0]),
                      total=cat.shape[0],
                      unit="events",
                      leave=False):
        win_start = UTCDateTime(cat.iloc[event].start_time)
        win_end = UTCDateTime(cat.iloc[event].end_time)
        win = st.slice(win_start, win_end).copy()
        if args.plot:
            win.plot(
                outfile=os.path.join(output_viz, "event_{}.png".format(event)))
        if args.save_sac:
            for tr in win:
                if isinstance(tr.data, np.ma.masked_array):
                    tr.data = tr.data.filled()
            win.write(os.path.join(output_sac, "event_{}_.sac".format(event)),
                      format="SAC")
Example #4
0
 def load_catalog(self, catalog_path):
     self.catalog = data_io.load_catalog(catalog_path)
     self.statusBar.showMessage('Loaded catalog {}.'.format(
         os.path.split(catalog_path)[-1]))
Example #5
0
def main(_):

# Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Load stream
    stream_path = FLAGS.stream_path
    print stream_path
    stream_file = os.path.split(stream_path)[-1]
    print "+ Loading Stream {}".format(stream_file)
    stream = read(stream_path)
    print '+ Preprocessing stream'
    stream = preprocess_stream(stream)
    #stream.resample(10.0)
    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir,"metadata.json")

    # Load Catalog
    print "+ Loading Catalog"
    cat = load_catalog(FLAGS.catalog)
    starttime = stream[0].stats.starttime.timestamp
    endtime = stream[-1].stats.endtime.timestamp
    print "startime", UTCDateTime(starttime)
    print "endtime", UTCDateTime(endtime)
    #print stream[0].stats
    #m2 = re.search(cat.stname.values[:], stream_file.split(".")[1])
    #print m2.group()
    cat = filter_catalog(cat, starttime, endtime)
    #cat = cat[(cat.stname == str(stream_file.split(".")[1]))]


   # cat = cat[(cat.stname == str(stream_file.split(".")[1])) or
   #       (cat.stname == str(stream_file.split(".")[1][:-1]))]
    #print cat
    print "First event in filtered catalog", cat.Date.values[0], cat.Time.values[0]
    print "Last event in filtered catalog", cat.Date.values[-1], cat.Time.values[-1]
    cat_event_times = cat.utc_timestamp.values

    # Write event waveforms and cluster_id=-1 in .tfrecords
    n_tfrecords = 0
    output_name = "noise_" + stream_file.split(".mseed")[0] + \
                  "_" + str(n_tfrecords) + ".tfrecords"
    output_path = os.path.join(FLAGS.output_dir, output_name)
    writer = DataWriter(output_path)

    # Create window generator
    win_gen = stream.slide(window_length=FLAGS.window_size,
                           step=FLAGS.window_step,
                           include_partial_windows=False)

   #Create window generator and shuffle the order,2017/12/4
    #win_gen = [tr for tr in stream.slide(window_length=FLAGS.window_size,
    #                       step=FLAGS.window_step,
    #                    include_partial_windows=False)]

    #random.shuffle(win_gen)

    if FLAGS.max_windows is None:
        total_time = stream[0].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step
    else:
        max_windows = FLAGS.max_windows

    # Create adjacent windows in the stream. Check there is no event inside
    # using the catalog and then write in a tfrecords with label=-1


    n_tfrecords = 0

    for idx, win in enumerate(win_gen):

        # If there is not trace skip this waveform
        n_traces = len(win)
        if n_traces == 0:
            continue
        # Check trace is complete
        if len(win)==3:
            n_samples = min(len(win[0].data),len(win[1].data))
            n_samples = min(n_samples, len(win[2].data))
            #to get rid of super small amplitude,2017/12/6
            ampl_e,ampl_n,ampl_z=filter_small_ampitude(win,n_samples)
            if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3:
                continue
            #a = remove_repeat(win, n_samples)
            #if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3:
            #    continue
        else:
            n_sample = 10
            ampl_e = max(abs(win[0:-1].data))
            if ampl_e < 1e-10:
                continue
        n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1
        # Check if there is an event in the window
        window_start = win[0].stats.starttime.timestamp
        window_end = win[-1].stats.endtime.timestamp
        ##add window extend to broaden the window,so that more events can be avoid,2017/12/07
        window_start_extend = window_start - FLAGS.window_step
        window_end_extend = window_end + FLAGS.window_step
        after_start = cat_event_times > window_start_extend
        before_end = cat_event_times < window_end_extend

        #print window_start_extend,window_end_extend
        try:
            cat_idx = np.where(after_start == before_end)[0][0]
            event_time = cat_event_times[cat_idx]
            is_event = True
            assert window_start_extend < cat.utc_timestamp.values[cat_idx]
            assert window_end_extend > cat.utc_timestamp.values[cat_idx]
            print "avoiding event {}, {}".format(cat.Date.values[cat_idx],
                                                 cat.Time.values[cat_idx])
        except IndexError:
            # there is no event
            is_event = False
            if (len(win)==3) and (n_pts == n_samples):
                # Write tfrecords
                #writer.write(win.normalize(), -1)
                writer.write(win.copy().normalize(),-1)
                #writer.write(win.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(),
                           #  -1)
                # Plot events
                if FLAGS.plot:
                    import matplotlib
                    matplotlib.use('Agg')
                    #trace = win[0].filter('bandpass', freqmin=0.5, freqmax=20)
                    trace = win[0]
                    viz_dir = os.path.join(
                        FLAGS.output_dir, "viz", stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                   # trace.resample(10).plot(outfile=os.path.join(viz_dir,
                   #                                 "noise_{}.png".format(str(window_start))))
                    trace.plot(outfile=os.path.join(viz_dir,
                                                           ####changed at 2017/11/25,use max cluster_prob instead of cluster_id
                                                           #                "event_{}_cluster_{}.png".format(idx,cluster_id)))
                                                           "noise_{}_{}.png".format(win[0].stats.station,
                                                                                 str(win[0].stats.starttime).replace(
                                                                                     ':', '_'))))
        if idx % 1000  ==0 and idx != 0:
            print "{} windows created".format(idx)
            # Save num windows created in metadata
            metadata[output_name] = writer._written
            print "creating a new tfrecords"
            n_tfrecords +=1
            output_name = "noise_" + stream_file.split(".mseed")[0] + \
                          "_" + str(n_tfrecords) + ".tfrecords"
            output_path = os.path.join(FLAGS.output_dir, output_name)
            writer = DataWriter(output_path)

        if idx == max_windows:
            break

    # Cleanup writer
    print("Number of windows  written={}".format(writer._written))
    writer.close()

    # Write metadata
    metadata[stream_file.split(".mseed")[0]] = writer._written
    write_json(metadata, output_metadata)
Example #6
0
def main(_):

    # Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Load stream
    stream_path = FLAGS.stream_path
    stream_file = os.path.split(stream_path)[-1]
    print("+ Loading Stream {}".format(stream_file))
    stream = read(stream_path)
    print('+ Preprocessing stream')
    stream = preprocess_stream(stream)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir, "metadata.json")

    # Load Catalog
    print("+ Loading Catalog")
    cat = load_catalog(FLAGS.catalog)
    starttime = stream[0].stats.starttime.timestamp
    endtime = stream[-1].stats.endtime.timestamp
    print("startime", UTCDateTime(starttime))
    print("endtime", UTCDateTime(endtime))
    cat = filter_catalog(cat, starttime, endtime)
    print("First event in filtered catalog", cat.Date.values[0],
          cat.Time.values[0])
    print("Last event in filtered catalog", cat.Date.values[-1],
          cat.Time.values[-1])
    cat_event_times = cat.utc_timestamp.values

    # Write event waveforms and cluster_id=-1 in .tfrecords
    n_tfrecords = 0
    output_name = "noise_" + stream_file.split(".mseed")[0] + \
                  "_" + str(n_tfrecords) + ".tfrecords"
    output_path = os.path.join(FLAGS.output_dir, output_name)
    writer = DataWriter(output_path)

    # Create window generator
    win_gen = stream.slide(window_length=FLAGS.window_size,
                           step=FLAGS.window_step,
                           include_partial_windows=False)
    if FLAGS.max_windows is None:
        total_time = stream[0].stats.endtime - stream[0].stats.starttime
        max_windows = (total_time - FLAGS.window_size) / FLAGS.window_step
    else:
        max_windows = FLAGS.max_windows

    # Create adjacent windows in the stream. Check there is no event inside
    # using the catalog and then write in a tfrecords with label=-1

    n_tfrecords = 0
    for idx, win in enumerate(win_gen):

        # If there is not trace skip this waveform
        n_traces = len(win)
        if n_traces == 0:
            continue
        # Check trace is complete
        if len(win) == 3:
            n_samples = min(len(win[0].data), len(win[1].data))
            n_samples = min(n_samples, len(win[2].data))
        else:
            n_sample = 10
        n_pts = win[0].stats.sampling_rate * FLAGS.window_size + 1
        # Check if there is an event in the window
        window_start = win[0].stats.starttime.timestamp
        window_end = win[-1].stats.endtime.timestamp
        after_start = cat_event_times > window_start
        before_end = cat_event_times < window_end
        try:
            cat_idx = np.where(after_start == before_end)[0][0]
            event_time = cat_event_times[cat_idx]
            is_event = True
            assert window_start < cat.utc_timestamp.values[cat_idx]
            assert window_end > cat.utc_timestamp.values[cat_idx]
            print("avoiding event {}, {}".format(cat.Date.values[cat_idx],
                                                 cat.Time.values[cat_idx]))
        except IndexError:
            # there is no event
            is_event = False
            if (len(win) == 3) and (n_pts == n_samples):
                # Write tfrecords
                writer.write(win, -1)
                # Plot events
                if FLAGS.plot:
                    trace = win[0]
                    viz_dir = os.path.join(FLAGS.output_dir, "viz",
                                           stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                    trace.plot(outfile=os.path.join(
                        viz_dir, "noise_{}.png".format(idx)))
        if idx % 1000 == 0 and idx != 0:
            print("{} windows created".format(idx))
            # Save num windows created in metadata
            metadata[output_name] = writer._written
            print("creating a new tfrecords")
            n_tfrecords += 1
            output_name = "noise_" + stream_file.split(".mseed")[0] + \
                          "_" + str(n_tfrecords) + ".tfrecords"
            output_path = os.path.join(FLAGS.output_dir, output_name)
            writer = DataWriter(output_path)

        if idx == max_windows:
            break

    # Cleanup writer
    print("Number of windows  written={}".format(writer._written))
    writer.close()

    # Write metadata
    metadata[stream_file.split(".mseed")[0]] = writer._written
    write_json(metadata, output_metadata)
Example #7
0
def main(_):

    stream_files = [
        file for file in os.listdir(FLAGS.stream_dir)
        if fnmatch.fnmatch(file, '*')
    ]
    print "List of streams to anlayze", stream_files

    # Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir, "metadata.json")

    # Load Catalog
    print "+ Loading Catalog"
    cat = load_catalog(FLAGS.catalog)
    cat = filter_catalog(cat)

    for stream_file in stream_files:

        # Load stream
        stream_path = os.path.join(FLAGS.stream_dir, stream_file)
        print "+ Loading Stream {}".format(stream_file)
        stream = read(stream_path)
        print '+ Preprocessing stream'
        stream = preprocess_stream(stream)

        # Filter catalog according to the loaded stream
        start_date = stream[0].stats.starttime
        end_date = stream[-1].stats.endtime
        print("-- Start Date={}, End Date={}".format(start_date, end_date))

        filtered_catalog = cat[((cat.utc_timestamp >= start_date)
                                & (cat.utc_timestamp < end_date))]

        # Propagation time from source to station
        travel_time = get_travel_time(filtered_catalog)

        # Write event waveforms and cluster_id in .tfrecords
        output_name = stream_file.split(".mseed")[0] + ".tfrecords"
        output_path = os.path.join(FLAGS.output_dir, output_name)
        writer = DataWriter(output_path)
        print("+ Creating tfrecords for {} events".format(
            filtered_catalog.shape[0]))
        # Loop over all events in the considered stream
        for event_n in range(filtered_catalog.shape[0]):
            event_time = filtered_catalog.utc_timestamp.values[event_n]
            event_time += travel_time[event_n]
            st_event = stream.slice(
                UTCDateTime(event_time),
                UTCDateTime(event_time) + FLAGS.window_size).copy()
            cluster_id = filtered_catalog.cluster_id.values[event_n]
            n_traces = len(st_event)
            # If there is not trace skip this waveform
            if n_traces == 0:
                continue
            n_samples = len(st_event[0].data)
            n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1
            if (len(st_event) == 3) and (n_pts == n_samples):
                # Write tfrecords
                writer.write(st_event, cluster_id)
                # Save window and cluster_id
                if FLAGS.save_mseed:
                    output_label = "label_{}_lat_{:.3f}_lon_{:.3f}.mseed".format(
                        cluster_id, filtered_catalog.latitude.values[event_n],
                        filtered_catalog.longitude.values[event_n])
                    output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed")
                    if not os.path.exists(output_mseed_dir):
                        os.makedirs(output_mseed_dir)
                    output_mseed = os.path.join(output_mseed_dir, output_label)
                    st_event.write(output_mseed, format="MSEED")
                # Plot events
                if FLAGS.plot:
                    trace = st_event[0]
                    viz_dir = os.path.join(FLAGS.output_dir, "viz",
                                           stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                    trace.plot(outfile=os.path.join(
                        viz_dir, "event_{}.png".format(event_n)))
            else:
                print "Missing waveform for event:", UTCDateTime(event_time)

        # Cleanup writer
        print("Number of events written={}".format(writer._written))
        writer.close()
        # Write metadata
        metadata[stream_file.split(".mseed")[0]] = writer._written
        write_json(metadata, output_metadata)
Example #8
0
def main(_):

    stream_files = [
        file for file in os.listdir(FLAGS.stream_dir)
        if fnmatch.fnmatch(file, '*.mseed')
    ]
    print "List of streams to anlayze", stream_files

    # Create dir to store tfrecords
    if not os.path.exists(FLAGS.output_dir):
        os.makedirs(FLAGS.output_dir)

    # Dictionary of nb of events per tfrecords
    metadata = {}
    output_metadata = os.path.join(FLAGS.output_dir, "metadata.json")

    # Load Catalog
    print "+ Loading Catalog"

    for stream_file in stream_files:
        cat = load_catalog(FLAGS.catalog)
        #cat = filter_catalog(cat,stream_file.split(".mseed")[0])

        # Load stream
        stream_path = os.path.join(FLAGS.stream_dir, stream_file)
        print "+ Loading Stream {}".format(stream_file)
        stream = read(stream_path)
        print '+ Preprocessing stream'
        stream = preprocess_stream(stream)

        # Filter catalog according to the loaded stream
        start_date = stream[0].stats.starttime
        end_date = stream[-1].stats.endtime
        print("-- Start Date={}, End Date={}".format(start_date, end_date))

        filtered_catalog = cat[((cat.utc_timestamp >= start_date)
                                & (cat.utc_timestamp < end_date))]
        #print(1111, cat)
        # Propagation time from source to station
        #travel_time = get_travel_time(filtered_catalog)

        # Write event waveforms and cluster_id in .tfrecords
        output_name = stream_file.split(".mseed")[0] + ".tfrecords"
        output_path = os.path.join(FLAGS.output_dir, output_name)
        writer = DataWriter(output_path)
        print("+ Creating tfrecords for {} events".format(
            filtered_catalog.shape[0]))
        # Loop over all events in the considered stream
        for event_n in range(filtered_catalog.shape[0]):
            event_time = filtered_catalog.utc_timestamp.values[event_n]
            # event_time += travel_time[event_n]
            st_event = stream.slice(
                UTCDateTime(event_time),
                UTCDateTime(event_time) + FLAGS.window_size).copy()
            cluster_id = filtered_catalog.cluster_id.values[event_n]
            #cluster_id =1
            n_traces = len(st_event)
            # If there is no trace skip this waveform
            if n_traces == 0:
                continue

            n_samples = len(st_event[0].data)
            n_pts = st_event[0].stats.sampling_rate * FLAGS.window_size + 1
            if (len(st_event) == 3) and (n_pts == n_samples):
                # Write tfrecords
                # use filter_small_ampitude to get rid of super small amplitude,2017/12/6
                ampl_e, ampl_n, ampl_z = filter_small_ampitude(
                    st_event, n_samples)
                if ampl_e > 0.3 or ampl_n > 0.3 or ampl_z > 0.3:
                    continue
                a = remove_repeat(st_event, n_samples)
                if a[0] > 0.3 or a[1] > 0.3 or a[2] > 0.3:
                    continue
                writer.write(st_event.copy().resample(10).normalize(),
                             cluster_id)
                #writer.write(st_event.copy().resample(10).filter('bandpass', freqmin=0.5, freqmax=20).normalize(), cluster_id)
                #print (len(st_event[0]))
                # Save window and cluster_id
                if FLAGS.save_mseed:
                    output_label = "{}_{}.mseed".format(
                        st_event[0].stats.station,
                        str(st_event[0].stats.starttime).replace(':', '_'))

                    output_mseed_dir = os.path.join(FLAGS.output_dir, "mseed")
                    if not os.path.exists(output_mseed_dir):
                        os.makedirs(output_mseed_dir)
                    output_mseed = os.path.join(output_mseed_dir, output_label)
                    st_event.write(output_mseed, format="MSEED")

                # Plot events
                if FLAGS.plot:
                    trace = st_event[0].filter('bandpass',
                                               freqmin=0.5,
                                               freqmax=20)
                    #trace = st_event[0]
                    viz_dir = os.path.join(FLAGS.output_dir, "viz",
                                           stream_file.split(".mseed")[0])
                    if not os.path.exists(viz_dir):
                        os.makedirs(viz_dir)
                    trace.plot(outfile=os.path.join(
                        viz_dir,
                        ####changed at 2017/11/25,use max cluster_prob instead of cluster_id
                        #                "event_{}_cluster_{}.png".format(idx,cluster_id)))
                        "event_{}_{}.png".format(
                            st_event[0].stats.station,
                            str(st_event[0].stats.starttime).replace(':', '_')
                        )))
            else:
                print "Missing waveform for event:", UTCDateTime(event_time)

        # Cleanup writer
        print("Number of events written={}".format(writer._written))
        writer.close()
        # Write metadata
        metadata[stream_file.split(".mseed")[0]] = writer._written
        write_json(metadata, output_metadata)