Ejemplo n.º 1
0
def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_positives)):
        os.makedirs(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_positives))

    # Write event waveforms and cluster_id in .tfrecords
    #output_name = "positives.tfrecords"
    output_name = args.file_name
    output_path = os.path.join(
        os.path.join(os.path.join(output_dir, subfolder),
                     cfg.output_tfrecords_dir_positives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:

        stream_path = os.path.join(
            os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file)
        #print "[tfrecords positives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords positives] Preprocessing stream'
        st_event = preprocess_stream(st_event)

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg)

        #LOCATION CLUSTERS
        cluster_id = 0  #We work with only one location for the moment (cluster id = 0)
        if cat is not None:
            stream_start_time = st_event[0].stats.starttime
            stream_end_time = st_event[-1].stats.endtime
            station = st_event[0].stats.station
            lat, lon, depth = cat.getLatLongDepth(stream_start_time,
                                                  stream_end_time, station)
            c = clusters.nearest_cluster(lat, lon, depth)
            cluster_id = c.id
            print("[tfrecords positives] Assigning cluster " +
                  str(cluster_id) + " to event.")
        #cluster_id = filtered_catalog.cluster_id.values[event_n]

        n_traces = len(st_event_select)
        if utils.check_stream(st_event_select, cfg):
            #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
            # Write tfrecords
            writer.write(st_event_select, cluster_id)

    # Cleanup writer
    print("[tfrecords positives] Number of windows written={}".format(
        writer._written))
    writer.close()
Ejemplo n.º 2
0
def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_negatives)):
        os.makedirs(
            os.path.join(os.path.join(output_dir, subfolder),
                         cfg.output_tfrecords_dir_negatives))

    # Write event waveforms and cluster_id in .tfrecords
    output_name = output_name = args.file_name
    output_path = os.path.join(
        os.path.join(os.path.join(output_dir, subfolder),
                     cfg.output_tfrecords_dir_negatives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:
        stream_path = os.path.join(
            os.path.join(dataset_dir, cfg.mseed_noise_dir), stream_file)
        #print "[tfrecords negatives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords negatives] Preprocessing stream'

        #filtrem
        if cfg.filterfreq:
            st_event = utils.filter_stream(st_event)

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg)

        #cluster_id = filtered_catalog.cluster_id.values[event_n]
        cluster_id = -1  #We work with only one location for the moment (cluster id = 0)
        n_traces = len(st_event_select)
        if utils.check_stream(st_event_select, cfg):
            #print("[tfrecords negatives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
            # Write tfrecords
            writer.write(st_event_select, cluster_id)

    # Cleanup writer
    print("[tfrecords negatives] Number of windows written={}".format(
        writer._written))
    writer.close()
Ejemplo n.º 3
0
def predict(path, stream_file, sess, model, samples, cat):
    global truePositives
    global falsePositives
    global trueNegatives
    global falseNegatives

    global perf_total_predictions
    global perf_total_time

    # Load stream
    stream_path = path+"/"+stream_file #TODO join
    stream_file = os.path.split(stream_path)[-1]
    stream_file_without_extension = os.path.split(stream_file)[-1].split(".mseed")[0]
    print "[classify] Loading Stream {}".format(stream_file)
    stream = read(stream_path)
    print '[classify] Preprocessing stream'
    stream = utils.preprocess_stream(stream)


    #Select only the specified channels
    stream_select = utils.select_components(stream, cfg) 

    outputSubdir = os.path.join(output_dir, stream_file_without_extension)
    if os.path.exists(outputSubdir):
        shutil.rmtree(outputSubdir)
    os.makedirs(outputSubdir)
    outputSubdirSubplots = os.path.join(outputSubdir, "subPlots")    
    os.makedirs(outputSubdirSubplots) 
    os.makedirs(os.path.join(output_dir+"/"+stream_file_without_extension,"viz"))
    if cfg.save_sac:
        os.makedirs(os.path.join(output_dir+"/"+stream_file_without_extension,"sac"))

    #if args.metadata_path is not None: #This is groundtruth data
    #    print("Reading metadata file "+args.metadata_path)
    #    obspyCatalogMeta = seisobs.seis2cat(args.metadata_path) 

    # # TODO: change and look at all streams
    # stream_path = args.stream_path
    # stream_file = os.path.split(stream_path)[-1]
    # print " + Loading stream {}".format(stream_file)
    # stream = load_stream(stream_path)
    # print " + Preprocess stream"
    # stream = preprocess_stream(stream)
    # print " -- Stream is ready, starting detection"

    # Create catalog name in which the events are stored
    catalog_name = os.path.split(stream_file)[-1].split(".mseed")[0] + ".csv"
    output_catalog = os.path.join(output_dir, catalog_name)
    print '[classify] Catalog created to store events', output_catalog

    # Dictonary to store info on detected events
    events_dic ={"start_time": [],
                 "end_time": [],
                 "cluster_id": [],
                 "clusters_prob": []}

    true_positive_dic ={"start_time": [],
                 "end_time": [],
                 "cluster_id": [],
                 "clusters_prob": []}

    false_positive_dic ={"start_time": [],
                 "end_time": [],
                 "cluster_id": [],
                 "clusters_prob": []}
    false_negative_dic ={"start_time": [],
                 "end_time": [],
                 "cluster_id": [],
                 "clusters_prob": []}

    missed_dic ={"start_time": [],
                 "end_time": [],
                 "cluster_id": [],
                 "clusters_prob": []}

    # Windows generator
    win_gen = stream_select.slide(window_length=cfg.window_size,
                           step=cfg.window_step_predict,
                           include_partial_windows=False)

    total_time_in_sec = stream_select[0].stats.endtime - stream_select[0].stats.starttime
    max_windows = (total_time_in_sec - cfg.window_size) / cfg.window_step_predict

    

    step = tf.train.global_step(sess, model.global_step)


    n_events = 0
    time_start = time.time()

    #if stations is not None:
    #    station = stream_select[-1].stats.station
    #    stationLAT, stationLONG, stationDEPTH = utils.station_coordinates(station, stations)

    station = stream[0].stats.station
    stream_start_time = stream[0].stats.starttime
    stream_end_time = stream[-1].stats.endtime
    ptime = cat.getPtime(stream_start_time, stream_end_time, station)
    event_window_start = ptime - cfg.pwave_window/2
    event_window_end = ptime + cfg.pwave_window/2

    print("ptime="+str(ptime))
    print("event_window_start="+str(event_window_start)+"<->event_window_end="+str(event_window_end))

    try:
        for idx, win in enumerate(win_gen):
            #Check the groundtruth
            isPositive = False
            if cat is not None:
            	#print("win[0].stats.starttime ="+str(win[0].stats.starttime))
            	#print("win[0].stats.endtime ="+str(win[0].stats.endtime))
            	#print("cat.start_time[0] ="+str(cat.start_time[0]))
            	#print("cat.end_time[0] ="+str(cat.end_time[0]))

                window_start = win[0].stats.starttime
                window_end = win[-1].stats.endtime

                print("window: "+str(window_start)+"-"+str(window_end))

                if (window_start <= event_window_start) and (window_end >= event_window_end): #positive
                    isPositive = True
                    print("REAL POSITIVE")
                else:# negative
                    isPositive = False    
                    print("REAL NEGATIVE")  

                #if stations is not None:
                #    isPositive = utils.isPositive(window_start, window_end, cat, stationLAT, stationLONG, stationDEPTH, cfg.mean_velocity)
                #else:
                #    isPositive = utils.isPositive(window_start, window_end, cat)

                #Event window: [timeP-cfg.pwave_window..timeP+cfg.pwave_window]
                #Do not use negatives 

            	#for i in range(0, len(cat.start_time)):
	            #	if (UTCDateTime(cat.start_time[i]) >= UTCDateTime(win[0].stats.starttime)) and (UTCDateTime(cat.end_time[i]) <= UTCDateTime(win[0].stats.endtime)):# and (cat.end_time[0] <= win[0].stats.endtime):
		        #    	isPositive = True
		        #        #print("\033[92m isPositive = True\033[0m")
	            #    else:
	            #        isPositive = False
                         
            # Fetch class_proba and label
            to_fetch = [samples['data'], model.layers['class_prob'], model.layers['class_prediction']]
            # Feed window and fake cluster_id (needed by the net) but
            # will be predicted
            if utils.check_stream(win, cfg):
                feed_dict = {samples['data']: utils.fetch_window_data(win, cfg),
                            samples['cluster_id']: np.array([0])}
                perf_start_time = time.time()
                sample, class_prob_, cluster_id = sess.run(to_fetch,
                                                        feed_dict)
                perf_end_time = time.time()
                perf_elapsed_time = perf_end_time - perf_start_time
                perf_total_time = perf_total_time + perf_elapsed_time
                perf_total_predictions = perf_total_predictions + 1
                print("Prediction time = "+str(perf_elapsed_time))
                print("Average time per prediction in secs = "+str(perf_total_time/perf_total_predictions))
            else:
                missed_dic["start_time"].append(win[0].stats.starttime)
                continue

            # # Keep only clusters proba, remove noise proba
            clusters_prob = class_prob_[0,1::]
            cluster_id -= 1

            # label for noise = -1, label for cluster \in {0:n_clusters}

            is_event = cluster_id[0] > -1

            if is_event:
                print("PREDICTED POSITIVE")
                n_events += 1
            else:
                print("PREDICTED NEGATIVE")

            # print "event {} ,cluster id {}".format(is_event,class_prob_)
            if is_event:
                events_dic["start_time"].append(win[0].stats.starttime)
                events_dic["end_time"].append(win[0].stats.endtime)
                events_dic["cluster_id"].append(cluster_id[0])
                events_dic["clusters_prob"].append(list(clusters_prob))
                if evaluation and isPositive:
	                #sys.stdout.write("\033[92m HIT\033[0m (positive)\n")
                    sys.stdout.write("\033[92mP\033[0m")
                    sys.stdout.flush()
                    truePositives = truePositives+1
                    true_positive_dic["start_time"].append(win[0].stats.starttime)
                    true_positive_dic["end_time"].append(win[0].stats.endtime)
                    true_positive_dic["cluster_id"].append(cluster_id[0])
                    true_positive_dic["clusters_prob"].append(list(clusters_prob))
                    #break
                elif evaluation:
	                #sys.stdout.write("\033[91m MISS\033[0m (false positive)\n")
                    sys.stdout.write("\033[91mP\033[0m")
                    sys.stdout.flush()
                    falsePositives = falsePositives+1
                    false_positive_dic["start_time"].append(win[0].stats.starttime)
                    false_positive_dic["end_time"].append(win[0].stats.endtime)
                    false_positive_dic["cluster_id"].append(cluster_id[0])
                    false_positive_dic["clusters_prob"].append(list(clusters_prob))
            else:
                if evaluation and isPositive:
                    #sys.stdout.write("\033[91m MISS\033[0m (false negative)\n")
                    sys.stdout.write("\033[91mN\033[0m")
                    sys.stdout.flush()
                    falseNegatives = falseNegatives+1
                    false_negative_dic["start_time"].append(win[0].stats.starttime)
                    false_negative_dic["end_time"].append(win[0].stats.endtime)
                    false_negative_dic["cluster_id"].append(cluster_id[0])
                    false_negative_dic["clusters_prob"].append(list(clusters_prob))
                elif evaluation:
	                #sys.stdout.write("\033[92m HIT\033[0m (negative)\n")
                    sys.stdout.write("\033[92mN\033[0m")
                    sys.stdout.flush()
                    trueNegatives = trueNegatives+1

            #if idx % 1000 ==0:
            #    print "\n[classify] Analyzing {} records".format(win[0].stats.starttime)

            if is_event:
                win_filtered = win.copy()
                #win_filtered.plot(outfile=os.path.join(output_dir+"/"+stream_file_without_extension,"viz",
                #                "event_{}_cluster_{}.png".format(idx,cluster_id)))

            if cfg.save_sac and is_event:
                win_filtered = win.copy()
                win_filtered.write(os.path.join(output_dir,"sac",
                        "event_{}_cluster_{}.sac".format(idx,cluster_id)),
                        format="SAC")

            if idx >= max_windows:
                print "[classify] stopped after {} windows".format(max_windows)
                print "[classify] found {} events".format(n_events)
                break

    except KeyboardInterrupt:
        print '[classify] Interrupted at time {}.'.format(win[0].stats.starttime)
        print "[classify] processed {} windows, found {} events".format(idx+1,n_events)
        print "[classify] Run time: ", time.time() - time_start

    df = pd.DataFrame.from_dict(events_dic)
    df.to_csv(output_catalog)

    #Plot everything
    customPlot(stream, output_dir+"/"+stream_file+"_"+str(idx)+".png", events_dic["start_time"], missed_dic["start_time"], true_positive_dic["start_time"], false_positive_dic["start_time"], false_negative_dic["start_time"])
    
    #Plot only 10min sections with events
    #max_secs_to_show = 600
    #win_gen = stream_select.slide(window_length=max_secs_to_show,
    #                      step=max_secs_to_show,
    #                       include_partial_windows=False)
    #for idx, win in enumerate(win_gen):
    #    customPlot(win, outputSubdirSubplots+"/win_"+str(idx)+".png", events_dic["start_time"], missed_dic["start_time"])
    #win = substream.slice(UTCDateTime(timeP), UTCDateTime(timeP) + cfg.window_size).copy()    
    print "\n[classify] Run time: ", time.time() - time_start

    return events_dic
def write(stream_files, subfolder):

    if not os.path.exists(os.path.join(output_dir, subfolder)):
        os.makedirs(os.path.join(output_dir, subfolder))

    if not os.path.exists(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives)):
        os.makedirs(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives))

    # Write event waveforms and cluster_id in .tfrecords
    #output_name = "positives.tfrecords"  
    output_name = args.file_name
    output_path = os.path.join(os.path.join(os.path.join(output_dir, subfolder), cfg.output_tfrecords_dir_positives), output_name)
    writer = DataWriter(output_path)
    for stream_file in stream_files:

        stream_path = os.path.join(os.path.join(dataset_dir, cfg.mseed_event_dir), stream_file)
        #print "[tfrecords positives] Loading Stream {}".format(stream_file)
        st_event = read(stream_path)
        #print '[tfrecords positives] Preprocessing stream'
        
        #Filtrem
        if cfg.filterfreq:
            st_event = utils.filter_stream(st_event)  

        #Select only the specified channels
        st_event_select = utils.select_components(st_event, cfg) 

        #LOCATION CLUSTERS
        lat = 0
        lon = 0
        depth = 0
        cluster_id = 0 #We work with only one location for the moment (cluster id = 0)
        if cat is not None:
            stream_start_time = st_event[0].stats.starttime
            stream_end_time = st_event[-1].stats.endtime
            station = st_event[0].stats.station
            lat, lon, depth = cat.getLatLongDepth(stream_start_time, stream_end_time, station)
            c = clusters.nearest_cluster(lat, lon, depth)
            if c is not None: #can be None in case of polygons-based clustering
                cluster_id = c.id
            else:
                cluster_id = -1 #signaling that the earthquake has to be discarded
            print("[tfrecords positives] Assigning cluster "+str(cluster_id)+" to event (lat =  "+str(lat)+", lon = "+str(lon)+").")
        #cluster_id = filtered_catalog.cluster_id.values[event_n]

        if cluster_id >= 0: #no clustering or a valid cluster
            n_traces = len(st_event_select)
            if utils.check_stream(st_event_select, cfg):
                #print("[tfrecords positives] Writing sample with dimensions "+str(cfg.WINDOW_SIZE)+"x"+str(st_event[0].stats.sampling_rate)+"x"+str(n_traces))
                # Write tfrecords

                #DEBUG: STA_LTA
                #df = st_event_select[0].stats.sampling_rate
                #cft = classic_sta_lta(st_event_select[0], int(5 * df), int(10 * df))
                #for trig in cft:
                #    if trig != .0:
                #        print(trig)


                writer.write(st_event_select, cluster_id) 
        else:
            print ("[tfrecords positives] \033[91m WARNING!!\033[0m Discarding point as no cluster found for the given lat="+str(lat)+", lon="+str(lon)+", depth="+str(depth))

    # Cleanup writer
    print("[tfrecords positives] Number of windows written={}".format(writer._written))
    writer.close()