def test_calcVincentyInverse(self): """ Tests for the Vincenty's Inverse formulae. """ # the following will raise StopIteration exceptions because of two # nearly antipodal points self.assertRaises(StopIteration, calcVincentyInverse, 15.26804251, 2.93007342, -14.80522806, -177.2299081) self.assertRaises(StopIteration, calcVincentyInverse, 27.3562106, 72.2382356, -27.55995499, -107.78571981) self.assertRaises(StopIteration, calcVincentyInverse, 27.4675551, 17.28133229, -27.65771704, -162.65420626) self.assertRaises(StopIteration, calcVincentyInverse, 27.4675551, 17.28133229, -27.65771704, -162.65420626) self.assertRaises(StopIteration, calcVincentyInverse, 0, 0, 0, 13) # working examples res = calcVincentyInverse(0, 0.2, 0, 20) self.assertAlmostEqual(res[0], 2204125.9174282863) self.assertAlmostEqual(res[1], 90.0) self.assertAlmostEqual(res[2], 270.0) res = calcVincentyInverse(0, 0, 0, 10) self.assertAlmostEqual(res[0], 1113194.9077920639) self.assertAlmostEqual(res[1], 90.0) self.assertAlmostEqual(res[2], 270.0) res = calcVincentyInverse(0, 0, 0, 17) self.assertAlmostEqual(res[0], 1892431.3432465086) self.assertAlmostEqual(res[1], 90.0) self.assertAlmostEqual(res[2], 270.0) # out of bounds self.assertRaises(ValueError, calcVincentyInverse, 91, 0, 0, 0) self.assertRaises(ValueError, calcVincentyInverse, -91, 0, 0, 0) self.assertRaises(ValueError, calcVincentyInverse, 0, 0, 91, 0) self.assertRaises(ValueError, calcVincentyInverse, 0, 0, -91, 0)
def test_calcVincentyInverse2(self): """ Test calcVincentyInverse() method with test data from Geocentric Datum of Australia. (see http://www.icsm.gov.au/gda/gdatm/gdav2.3.pdf) """ # test data: # Point 1: Flinders Peak, Point 2: Buninyong lat1 = -(37 + (57 / 60.) + (3.72030 / 3600.)) lon1 = 144 + (25 / 60.) + (29.52440 / 3600.) lat2 = -(37 + (39 / 60.) + (10.15610 / 3600.)) lon2 = 143 + (55 / 60.) + (35.38390 / 3600.) dist = 54972.271 alpha12 = 306 + (52 / 60.) + (5.37 / 3600.) alpha21 = 127 + (10 / 60.) + (25.07 / 3600.) # calculate result calc_dist, calc_alpha12, calc_alpha21 = calcVincentyInverse(lat1, lon1, lat2, lon2) # calculate deviations from test data dist_err_rel = abs(dist - calc_dist) / dist alpha12_err = abs(alpha12 - calc_alpha12) alpha21_err = abs(alpha21 - calc_alpha21) self.assertEqual(dist_err_rel < 1.0e-5, True) self.assertEqual(alpha12_err < 1.0e-5, True) self.assertEqual(alpha21_err < 1.0e-5, True) # calculate result with +- 360 for lon values dist, alpha12, alpha21 = calcVincentyInverse(lat1, lon1 + 360, lat2, lon2 - 720) self.assertAlmostEqual(dist, calc_dist) self.assertAlmostEqual(alpha12, calc_alpha12) self.assertAlmostEqual(alpha21, calc_alpha21)
def test_calcVincentyInverse2(self): """ Test calcVincentyInverse() method with test data from Geocentric Datum of Australia. (see http://www.icsm.gov.au/gda/gdatm/gdav2.3.pdf) """ # test data: # Point 1: Flinders Peak, Point 2: Buninyong lat1 = -(37 + (57 / 60.) + (3.72030 / 3600.)) lon1 = 144 + (25 / 60.) + (29.52440 / 3600.) lat2 = -(37 + (39 / 60.) + (10.15610 / 3600.)) lon2 = 143 + (55 / 60.) + (35.38390 / 3600.) dist = 54972.271 alpha12 = 306 + (52 / 60.) + (5.37 / 3600.) alpha21 = 127 + (10 / 60.) + (25.07 / 3600.) # calculate result calc_dist, calc_alpha12, calc_alpha21 = calcVincentyInverse( lat1, lon1, lat2, lon2) # calculate deviations from test data dist_err_rel = abs(dist - calc_dist) / dist alpha12_err = abs(alpha12 - calc_alpha12) alpha21_err = abs(alpha21 - calc_alpha21) self.assertEqual(dist_err_rel < 1.0e-5, True) self.assertEqual(alpha12_err < 1.0e-5, True) self.assertEqual(alpha21_err < 1.0e-5, True) # calculate result with +- 360 for lon values dist, alpha12, alpha21 = calcVincentyInverse(lat1, lon1 + 360, lat2, lon2 - 720) self.assertAlmostEqual(dist, calc_dist) self.assertAlmostEqual(alpha12, calc_alpha12) self.assertAlmostEqual(alpha21, calc_alpha21)
def test_calcVincentyInverse(self): """ Tests for the Vincenty's Inverse formulae. """ # the following will raise StopIteration exceptions because of two # nearly antipodal points self.assertRaises(StopIteration, calcVincentyInverse, 15.26804251, 2.93007342, -14.80522806, -177.2299081) self.assertRaises(StopIteration, calcVincentyInverse, 27.3562106, 72.2382356, -27.55995499, -107.78571981) self.assertRaises(StopIteration, calcVincentyInverse, 27.4675551, 17.28133229, -27.65771704, -162.65420626) self.assertRaises(StopIteration, calcVincentyInverse, 27.4675551, 17.28133229, -27.65771704, -162.65420626) self.assertRaises(StopIteration, calcVincentyInverse, 0, 0, 0, 13) # working examples res = calcVincentyInverse(0, 0.2, 0, 20) self.assertAlmostEqual(res[0], 2204125.9174282863) self.assertAlmostEqual(res[1], 90.0) self.assertAlmostEqual(res[2], 270.0) res = calcVincentyInverse(0, 0, 0, 10) self.assertAlmostEqual(res[0], 1113194.9077920639) self.assertAlmostEqual(res[1], 90.0) self.assertAlmostEqual(res[2], 270.0) res = calcVincentyInverse(0, 0, 0, 17) self.assertAlmostEqual(res[0], 1892431.3432465086) self.assertAlmostEqual(res[1], 90.0) self.assertAlmostEqual(res[2], 270.0) # out of bounds self.assertRaises(ValueError, calcVincentyInverse, 91, 0, 0, 0) self.assertRaises(ValueError, calcVincentyInverse, -91, 0, 0, 0) self.assertRaises(ValueError, calcVincentyInverse, 0, 0, 91, 0) self.assertRaises(ValueError, calcVincentyInverse, 0, 0, -91, 0)
def calculate_preliminiaries(self): """ Calculates the envelope, STA/LTA and the finds the local extrema. """ logger.info("Calculating envelope of synthetics.") self.synthetic_envelope = envelope(self.synthetic.data) logger.info("Calculating STA/LTA.") self.stalta = sta_lta(self.synthetic_envelope, self.observed.stats.delta, self.config.min_period) self.peaks, self.troughs = utils.find_local_extrema(self.stalta) if not len(self.peaks) and len(self.troughs): return if self.ttimes: offset = self.event.origin_time - self.observed.stats.starttime min_time = self.ttimes[0]["time"] - \ self.config.max_time_before_first_arrival + offset min_idx = int(min_time / self.observed.stats.delta) dist_in_km = geodetics.calcVincentyInverse( self.station.latitude, self.station.longitude, self.event.latitude, self.event.longitude)[0] / 1000.0 max_time = dist_in_km / self.config.min_surface_wave_velocity + \ offset + self.config.max_period max_idx = int(max_time / self.observed.stats.delta) # Reject all peaks and troughs before the minimal allowed start # time and after the maximum allowed end time. first_trough, last_trough = self.troughs[0], self.troughs[-1] self.troughs = self.troughs[(self.troughs >= min_idx) & (self.troughs <= max_idx)] # If troughs have been removed, readd them add the boundaries. if len(self.troughs): if first_trough != self.troughs[0]: self.troughs = np.concatenate([ np.array([min_idx], dtype=self.troughs.dtype), self.troughs ]) if last_trough != self.troughs[-1]: self.troughs = np.concatenate([ self.troughs, np.array([max_idx], dtype=self.troughs.dtype) ]) # Make sure peaks are inside the troughs! min_trough, max_trough = self.troughs[0], self.troughs[-1] self.peaks = self.peaks[(self.peaks > min_trough) & (self.peaks < max_trough)]
def calculate_preliminiaries(self): """ Calculates the envelope, STA/LTA and the finds the local extrema. """ logger.info("Calculating envelope of synthetics.") self.synthetic_envelope = envelope(self.synthetic.data) logger.info("Calculating STA/LTA.") self.stalta = sta_lta(self.synthetic_envelope, self.observed.stats.delta, self.config.min_period) self.peaks, self.troughs = utils.find_local_extrema(self.stalta) if not len(self.peaks) and len(self.troughs): return if self.ttimes: offset = self.event.origin_time - self.observed.stats.starttime min_time = self.ttimes[0]["time"] - \ self.config.max_time_before_first_arrival + offset min_idx = int(min_time / self.observed.stats.delta) dist_in_km = geodetics.calcVincentyInverse( self.station.latitude, self.station.longitude, self.event.latitude, self.event.longitude)[0] / 1000.0 max_time = dist_in_km / self.config.min_surface_wave_velocity + \ offset + self.config.max_period max_idx = int(max_time / self.observed.stats.delta) # Reject all peaks and troughs before the minimal allowed start # time and after the maximum allowed end time. first_trough, last_trough = self.troughs[0], self.troughs[-1] self.troughs = self.troughs[(self.troughs >= min_idx) & (self.troughs <= max_idx)] # If troughs have been removed, readd them add the boundaries. if len(self.troughs): if first_trough != self.troughs[0]: self.troughs = np.concatenate([ np.array([min_idx], dtype=self.troughs.dtype), self.troughs]) if last_trough != self.troughs[-1]: self.troughs = np.concatenate([ self.troughs, np.array([max_idx], dtype=self.troughs.dtype)]) # Make sure peaks are inside the troughs! min_trough, max_trough = self.troughs[0], self.troughs[-1] self.peaks = self.peaks[(self.peaks > min_trough) & (self.peaks < max_trough)]
def reject_on_traveltimes(self): """ Reject based on traveltimes. Will reject windows containing only data before a minimum period before the first arrival and windows only containing data after the minimum allowed surface wave speed. Only call if station and event information is available! """ dist_in_km = geodetics.calcVincentyInverse( self.station.latitude, self.station.longitude, self.event.latitude, self.event.longitude)[0] / 1000.0 offset = self.event.origin_time - self.observed.stats.starttime min_time = self.ttimes[0]["time"] - self.config.min_period + offset max_time = dist_in_km / self.config.min_surface_wave_velocity + offset self.windows = [win for win in self.windows if (win.relative_endtime >= min_time) and (win.relative_starttime <= max_time)] logger.info("Rejection based on travel times retained %i windows." % len(self.windows))
def reject_on_traveltimes(self): """ Reject based on traveltimes. Will reject windows containing only data before a minimum period before the first arrival and windows only containing data after the minimum allowed surface wave speed. Only call if station and event information is available! """ dist_in_km = geodetics.calcVincentyInverse( self.station.latitude, self.station.longitude, self.event.latitude, self.event.longitude)[0] / 1000.0 offset = self.event.origin_time - self.observed.stats.starttime min_time = self.ttimes[0]["time"] - self.config.min_period + offset max_time = dist_in_km / self.config.min_surface_wave_velocity + offset self.windows = [ win for win in self.windows if (win.relative_endtime >= min_time) and ( win.relative_starttime <= max_time) ] logger.info("Rejection based on travel times retained %i windows." % len(self.windows))
def plot_data_for_station(station, available_data, event, get_data_callback, domain_bounds): """ Plots all data for a station in an interactive plot. :type station: dict :param station: A dictionary containing the keys 'id', 'latitude', 'longitude', 'elevation_in_m', and 'local_depth_in_m' describing the current station. :type available_data: dict :param available_data: The available processed and synthetic data. The raw data is always assumed to be available. :type event: dict :param event: A dictionary describing the current event. :type get_data_callback: function :param get_data_callback: Callback function returning an ObsPy Stream object. get_data_callback("raw") get_data_callback("synthetic", iteration_name) get_data_callback("processed", processing_tag) :type domain_bounds: dict :param domain_bounds: The domain bounds. """ import datetime import matplotlib.dates from matplotlib.widgets import CheckButtons from obspy.core.util.geodetics import calcVincentyInverse import textwrap # Setup the figure, the window and plot title. fig = plt.figure(figsize=(14, 9)) fig.canvas.set_window_title("Data for station %s" % station["id"]) fig.text(0.5, 0.99, "Station %s" % station["id"], verticalalignment="top", horizontalalignment="center") # Add one axis for each component. Share all axes. z_axis = fig.add_axes([0.30, 0.65, 0.68, 0.3]) n_axis = fig.add_axes([0.30, 0.35, 0.68, 0.3], sharex=z_axis, sharey=z_axis) e_axis = fig.add_axes([0.30, 0.05, 0.68, 0.3], sharex=z_axis, sharey=z_axis) axis = [z_axis, n_axis, e_axis] # Set grid, autoscale and hide all tick labels (some will be made visible # later one) for axes in axis: plt.setp(axes.get_xticklabels(), visible=False) plt.setp(axes.get_yticklabels(), visible=False) axes.grid(b=True) axes.autoscale(enable=True) axes.set_xlim(0.0, 12345.0) # Axes for the data selection check boxes. raw_check_axes = fig.add_axes([0.01, 0.8, 0.135, 0.15]) synth_check_axes = fig.add_axes([0.155, 0.8, 0.135, 0.15]) proc_check_axes = fig.add_axes([0.01, 0.5, 0.28, 0.29]) # The map axes map_axes = fig.add_axes([0.01, 0.05, 0.28, 0.40]) # Fill the check box axes. raw_check = CheckButtons(raw_check_axes, ["raw"], [True]) proc_check = CheckButtons(proc_check_axes, [ "\n".join(textwrap.wrap(_i, width=30)) for _i in available_data["processed"]], [False] * len(available_data["processed"])) synth_check = CheckButtons(synth_check_axes, available_data["synthetic"], [False] * len(available_data["synthetic"])) for check in [raw_check, proc_check, synth_check]: plt.setp(check.labels, fontsize=10) raw_check_axes.text( 0.02, 0.97, "Raw Data", transform=raw_check_axes.transAxes, verticalalignment="top", horizontalalignment="left", fontsize=10) proc_check_axes.text( 0.02, 0.97, "Processed Data", transform=proc_check_axes.transAxes, verticalalignment="top", horizontalalignment="left", fontsize=10) synth_check_axes.text( 0.02, 0.97, "Synthetic Data", transform=synth_check_axes.transAxes, verticalalignment="top", horizontalalignment="left", fontsize=10) bounds = domain_bounds["bounds"] map_object = plot_domain( bounds["minimum_latitude"], bounds["maximum_latitude"], bounds["minimum_longitude"], bounds["maximum_longitude"], bounds["boundary_width_in_degree"], rotation_axis=domain_bounds["rotation_axis"], rotation_angle_in_degree=domain_bounds["rotation_angle"], plot_simulation_domain=False, zoom=True, ax=map_axes) plot_stations_for_event(map_object=map_object, station_dict={station["id"]: station}, event_info=event) # Plot the beachball for one event. plot_events([event], map_object=map_object, beachball_size=0.05) dist = calcVincentyInverse( event["latitude"], event["longitude"], station["latitude"], station["longitude"])[0] / 1000.0 map_axes.set_title("Epicentral distance: %.1f km | Mag: %.1f %s" % (dist, event["magnitude"], event["magnitude_type"]), fontsize=10) PLOT_OBJECTS = { "raw": None, "synthetics": {}, "processed": {} } def plot(plot_type, label=None): if plot_type == "raw": st = get_data_callback("raw") PLOT_OBJECTS["raw"] = [] save_at = PLOT_OBJECTS["raw"] elif plot_type == "synthetic": st = get_data_callback("synthetic", label) PLOT_OBJECTS["synthetics"][label] = [] save_at = PLOT_OBJECTS["synthetics"][label] elif plot_type == "processed": st = get_data_callback("processed", label) PLOT_OBJECTS["processed"][label] = [] save_at = PLOT_OBJECTS["processed"][label] # Loop over all traces. for tr in st: # Normalize data. tr.data = np.require(tr.data, dtype="float32") tr.data -= tr.data.min() tr.data /= tr.data.max() tr.data -= tr.data.mean() tr.data /= np.abs(tr.data).max() * 1.1 # Figure out the correct axis. component = tr.stats.channel[-1].upper() if component == "N": axis = n_axis elif component == "E": axis = e_axis elif component == "Z": axis = z_axis else: raise NotImplementedError if plot_type == "synthetic": time_axis = matplotlib.dates.drange( event["origin_time"].datetime, (event["origin_time"] + tr.stats.delta * (tr.stats.npts)) .datetime, datetime.timedelta(seconds=tr.stats.delta)) zorder = 2 color = "red" elif plot_type == "raw": time_axis = matplotlib.dates.drange( tr.stats.starttime.datetime, (tr.stats.endtime + tr.stats.delta).datetime, datetime.timedelta(seconds=tr.stats.delta)) zorder = 0 color = "0.8" elif plot_type == "processed": time_axis = matplotlib.dates.drange( tr.stats.starttime.datetime, (tr.stats.endtime + tr.stats.delta).datetime, datetime.timedelta(seconds=tr.stats.delta)) zorder = 1 color = "0.2" else: msg = "Plot type '%s' not known" % plot_type raise ValueError(msg) save_at.append(axis.plot_date(time_axis[:len(tr.data)], tr.data, color=color, zorder=zorder, marker="", linestyle="-")) axis.set_ylim(-1.0, 1.0) axis.xaxis.set_major_formatter( matplotlib.dates.DateFormatter("%H:%M:%S")) if component == "E": try: plt.setp(axis.get_xticklabels(), visible=True) except: pass if plot_type != "raw": axis.set_xlim(time_axis[0], time_axis[-1]) # Adjust the limit only if there are no synthetics and processed if # plotting raw data. elif not PLOT_OBJECTS["synthetics"] and \ not PLOT_OBJECTS["processed"]: axis.set_xlim(time_axis[0], time_axis[-1]) for label, axis in zip(("Vertical", "North", "East"), (z_axis, n_axis, e_axis)): axis.text(0.98, 0.95, label, verticalalignment="top", horizontalalignment="right", bbox=dict(facecolor="white", alpha=0.5, pad=5), transform=axis.transAxes, fontsize=11) def _checked_raw(label): checked(label, "raw") def _checked_proc(label): checked(label, "proc") def _checked_synth(label): checked(label, "synth") def checked(label, check_box): if check_box == "raw": if PLOT_OBJECTS["raw"] is not None: for _i in PLOT_OBJECTS["raw"]: for _j in _i: _j.remove() PLOT_OBJECTS["raw"] = None else: PLOT_OBJECTS["raw"] = [] plot("raw") elif check_box == "synth": if label in PLOT_OBJECTS["synthetics"]: for _i in PLOT_OBJECTS["synthetics"][label]: for _j in _i: _j.remove() del PLOT_OBJECTS["synthetics"][label] else: PLOT_OBJECTS["synthetics"][label] = [] plot("synthetic", label=label) elif check_box == "proc": # Previously broken up. label = label.replace("\n", "") if label in PLOT_OBJECTS["processed"]: for _i in PLOT_OBJECTS["processed"][label]: for _j in _i: _j.remove() del PLOT_OBJECTS["processed"][label] else: PLOT_OBJECTS["processed"][label] = [] plot("processed", label=label) try: fig.canvas.draw() except: pass raw_check.on_clicked(_checked_raw) proc_check.on_clicked(_checked_proc) synth_check.on_clicked(_checked_synth) # Always plot the raw data by default. _checked_raw("raw") try: fig.canvas.draw() except: pass # One has call plt.show() to activate the main loop of the new figure. # Otherwise events will not work. plt.gcf().patch.set_alpha(0.0) plt.show()
def select_windows(data_trace, synthetic_trace, ev_lat, ev_lng, ev_depth_in_km, st_lat, st_lng, minimum_period, maximum_period): """ Window selection algorithm for picking windows suitable for misfit calculation based on phase differences. :param data_trace: :param synthetic_trace: :param ev_lat: :param ev_lng: :param ev_depth_in_km: :param st_lat: :param st_lng: :param minimum_period: :param maximum_period: """ print "* ---------------------------" print "* autoselect " + data_trace.stats.channel # ========================================================================= # set a couple of selection parameters - might become part of the input in # future versions # ========================================================================= # Minimum normalised correlation coefficient of the complete traces. min_cc = 0.0 # Maximum relative noise level for the whole trace. Measured from maximum # amplitudes before and after the first arrival. max_noise = 0.3 # Maximum relative noise level for individual windows. max_noise_window = 0.4 # All arrivals later than those corresponding to the threshold velocity # [km/s] will be excluded. threshold_velocity = 2.4 # Maximum allowable time shift within a window, as a fraction of the # minimum period. threshold_shift = 0.2 # Minimum normalised correlation coeficient within a window. threshold_correlation = 0.5 # Minimum length of the time windows relative to the minimum period. min_length_period = 1.5 # Minimum number of extreme in an individual time window (excluding the # edges). min_peaks_troughs = 2 # Maximum energy ratio between data and synthetics within a time window. max_energy_ratio = 3.0 # ========================================================================= # initialisations # ========================================================================= dt = synthetic_trace.stats.delta npts = synthetic_trace.stats.npts dist_in_deg = geodetics.locations2degrees(st_lat, st_lng, ev_lat, ev_lng) dist_in_km = geodetics.calcVincentyInverse( st_lat, st_lng, ev_lat, ev_lng)[0] / 1000.0 tts = getTravelTimes(dist_in_deg, ev_depth_in_km, model="ak135") first_tt_arrival = min([_i["time"] for _i in tts]) # Number of samples in the sliding window. Currently, the length of the # window is set to a multiple of the dominant period of the synthetics. # Make sure it is an uneven number; just to have an easy midpoint # definition. window_length = int(round(float(2 * minimum_period) / dt)) if not window_length % 2: window_length += 1 # Allocate arrays to collect the time dependent values. sliding_time_shift = np.zeros(npts, dtype="float32") max_cc_coeff = np.zeros(npts, dtype="float32") taper = np.hanning(window_length) # ========================================================================= # check if whole seismograms are sufficiently correlated and estimate noise # level # ========================================================================= synth = synthetic_trace.data data = data_trace.data # compute correlation coefficient norm = np.sqrt(np.sum(data ** 2)) * np.sqrt(np.sum(synth ** 2)) cc = np.sum(data * synth) / norm print "** correlation coefficient: " + str(cc) # estimate noise level from waveforms prior to the first arrival idx = int(np.ceil((first_tt_arrival - minimum_period * 0.5) / dt)) noise_absolute = data[50:idx].ptp() noise_relative = noise_absolute / data.ptp() print "** absolute noise level: " + str(noise_absolute) + " m/s" print "** relative noise level: " + str(noise_relative) # rejection criteria accept = True if cc < min_cc: print "** no windows selected, correlation " + str(cc) + \ " is below threshold value of " + str(min_cc) accept = False if noise_relative > max_noise: print "** no windows selected, noise level " + str(noise_relative) + \ " is above threshold value of " + str(max_noise) accept = False if accept is False: print "* autoselect done" return [] # ========================================================================= # compute sliding time shifts and correlation coefficients # ========================================================================= for start_idx, end_idx, midpoint_idx in _window_generator(npts, window_length): # Slice windows. Create a copy to be able to taper without affecting # the original time series. data_window = data_trace.data[start_idx: end_idx].copy() * taper synthetic_window = \ synthetic_trace.data[start_idx: end_idx].copy() * taper # Skip windows that have essentially no energy to avoid instabilities. if synthetic_window.ptp() < synthetic_trace.data.ptp() * 0.001: continue # Calculate the time shift. Here this is defined as the shift of the # synthetics relative to the data. So a value of 2, for instance, means # that the synthetics are 2 timesteps later then the data. cc = np.correlate(data_window, synthetic_window, mode="full") time_shift = cc.argmax() - window_length + 1 # Express the time shift in fraction of the minimum period. sliding_time_shift[midpoint_idx] = (time_shift * dt) / minimum_period # Normalized cross correlation. max_cc_value = cc.max() / np.sqrt((synthetic_window ** 2).sum() * (data_window ** 2).sum()) max_cc_coeff[midpoint_idx] = max_cc_value # ========================================================================= # compute the initial mask, i.e. intervals (windows) where no measurements # are made. # ========================================================================= # Step 1: Initialise masked arrays. The mask will be set to True where no # windows are chosen. time_windows = np.ma.ones(npts) time_windows.mask = np.zeros(npts) # Step 2: Mark everything more then half a dominant period before the first # theoretical arrival as positive. time_windows.mask[:int(np.ceil( (first_tt_arrival - minimum_period * 0.5) / dt))] = True # Step 3: Mark everything more then half a dominant period after the # threshold arrival time - computed from the threshold velocity - as # negative. time_windows.mask[int(np.floor(dist_in_km / threshold_velocity / dt)):] = \ True # Step 4: Mark everything with an absolute travel time shift of more than # threshold_shift times the dominant period as negative time_windows.mask[np.abs(sliding_time_shift) > threshold_shift] = True # Step 5: Mark the area around every "travel time shift jump" (based on # the traveltime time difference) negative. The width of the area is # currently chosen to be a tenth of a dominant period to each side. sample_buffer = int(np.ceil(minimum_period / dt * 0.1)) indices = np.ma.where(np.abs(np.diff(sliding_time_shift)) > 0.1)[0] for index in indices: time_windows.mask[index - sample_buffer: index + sample_buffer] = True # Step 6: Mark all areas where the normalized cross correlation coefficient # is under threshold_correlation as negative time_windows.mask[max_cc_coeff < threshold_correlation] = True # ========================================================================= # Make the final window selection. # ========================================================================= min_length = min( minimum_period / dt * min_length_period, maximum_period / dt) final_windows = [] # loop through all the time windows for i in np.ma.flatnotmasked_contiguous(time_windows): window_npts = i.stop - i.start synthetic_window = synthetic_trace.data[i.start: i.stop] data_window = data_trace.data[i.start: i.stop] # Step 7: Throw away all windows with a length of less then # min_length_period the dominant period. if (i.stop - i.start) < min_length: continue # Step 8: Exclude windows without a real peak or trough (except for the # edges). data_p, data_t, data_extrema = find_local_extrema(data_window, 0) synth_p, synth_t, synth_extrema = find_local_extrema(synthetic_window, 0) if np.min([len(synth_p), len(synth_t), len(data_p), len(data_t)]) < \ min_peaks_troughs: continue # Step 9: Peak and trough matching algorithm window_mask = np.ones(window_npts, dtype="bool") closest_peaks = find_closest(data_p, synth_p) diffs = np.diff(closest_peaks) for idx in np.where(diffs == 1)[0]: if idx > 0: start = synth_p[idx - 1] else: start = 0 if idx < (len(synth_p) - 1): end = synth_p[idx + 1] else: end = -1 window_mask[start: end] = False closest_troughs = find_closest(data_t, synth_t) diffs = np.diff(closest_troughs) for idx in np.where(diffs == 1)[0]: if idx > 0: start = synth_t[idx - 1] else: start = 0 if idx < (len(synth_t) - 1): end = synth_t[idx + 1] else: end = -1 window_mask[start: end] = False window_mask = np.ma.masked_array(window_mask, mask=window_mask) if window_mask.mask.all(): continue # Step 10: Check if the time windows have sufficiently similar energy # and are above the noise for j in np.ma.flatnotmasked_contiguous(window_mask): # Again assert a certain minimal length. if (j.stop - j.start) < min_length: continue # Compare the energy in the data window and the synthetic window. data_energy = (data_window[j.start: j.stop] ** 2).sum() synth_energy = (synthetic_window[j.start: j.stop] ** 2).sum() energies = sorted([data_energy, synth_energy]) if energies[1] > max_energy_ratio * energies[0]: continue # Check that amplitudes in the data are above the noise if noise_absolute / data_window[j.start: j.stop].ptp() > \ max_noise_window: continue final_windows.append((i.start + j.start, i.start + j.stop)) print "* autoselect done" return final_windows
def select_windows(data_trace, synthetic_trace, event_latitude, event_longitude, event_depth_in_km, station_latitude, station_longitude, minimum_period, maximum_period, min_cc=0.10, max_noise=0.10, max_noise_window=0.4, min_velocity=2.4, threshold_shift=0.30, threshold_correlation=0.75, min_length_period=1.5, min_peaks_troughs=2, max_energy_ratio=10.0, min_envelope_similarity=0.2, verbose=False, plot=False): """ Window selection algorithm for picking windows suitable for misfit calculation based on phase differences. Returns a list of windows which might be empty due to various reasons. This function is really long and a lot of things. For a more detailed description, please see the LASIF paper. :param data_trace: The data trace. :type data_trace: :class:`~obspy.core.trace.Trace` :param synthetic_trace: The synthetic trace. :type synthetic_trace: :class:`~obspy.core.trace.Trace` :param event_latitude: The event latitude. :type event_latitude: float :param event_longitude: The event longitude. :type event_longitude: float :param event_depth_in_km: The event depth in km. :type event_depth_in_km: float :param station_latitude: The station latitude. :type station_latitude: float :param station_longitude: The station longitude. :type station_longitude: float :param minimum_period: The minimum period of the data in seconds. :type minimum_period: float :param maximum_period: The maximum period of the data in seconds. :type maximum_period: float :param min_cc: Minimum normalised correlation coefficient of the complete traces. :type min_cc: float :param max_noise: Maximum relative noise level for the whole trace. Measured from maximum amplitudes before and after the first arrival. :type max_noise: float :param max_noise_window: Maximum relative noise level for individual windows. :type max_noise_window: float :param min_velocity: All arrivals later than those corresponding to the threshold velocity [km/s] will be excluded. :type min_velocity: float :param threshold_shift: Maximum allowable time shift within a window, as a fraction of the minimum period. :type threshold_shift: float :param threshold_correlation: Minimum normalised correlation coeeficient within a window. :type threshold_correlation: float :param min_length_period: Minimum length of the time windows relative to the minimum period. :type min_length_period: float :param min_peaks_troughs: Minimum number of extrema in an individual time window (excluding the edges). :type min_peaks_troughs: float :param max_energy_ratio: Maximum energy ratio between data and synthetics within a time window. Don't make this too small! :type max_energy_ratio: float :param min_envelope_similarity: The minimum similarity of the envelopes of both data and synthetics. This essentially assures that the amplitudes of data and synthetics can not diverge too much within a window. It is a bit like the inverse of the ratio of both envelopes so a value of 0.2 makes sure neither amplitude can be more then 5 times larger than the other. :type min_envelope_similarity: float :param verbose: No output by default. :type verbose: bool :param plot: Create a plot of the algortihm while it does its work. :type plot: bool """ # Shortcuts to frequently accessed variables. data_starttime = data_trace.stats.starttime data_delta = data_trace.stats.delta dt = data_trace.stats.delta npts = data_trace.stats.npts synth = synthetic_trace.data data = data_trace.data times = data_trace.times() # Fill cache if necessary. if not TAUPY_MODEL_CACHE: from obspy.taup import TauPyModel # NOQA TAUPY_MODEL_CACHE["model"] = TauPyModel("AK135") model = TAUPY_MODEL_CACHE["model"] # ------------------------------------------------------------------------- # Geographical calculations and the time of the first arrival. # ------------------------------------------------------------------------- dist_in_deg = geodetics.locations2degrees(station_latitude, station_longitude, event_latitude, event_longitude) dist_in_km = geodetics.calcVincentyInverse( station_latitude, station_longitude, event_latitude, event_longitude)[0] / 1000.0 # Get only a couple of P phases which should be the first arrival # for every epicentral distance. Its quite a bit faster than calculating # the arrival times for every phase. # Assumes the first sample is the centroid time of the event. tts = model.get_travel_times(source_depth_in_km=event_depth_in_km, distance_in_degree=dist_in_deg, phase_list=["ttp"]) # Sort just as a safety measure. tts = sorted(tts, key=lambda x: x.time) first_tt_arrival = tts[0].time # ------------------------------------------------------------------------- # Window settings # ------------------------------------------------------------------------- # Number of samples in the sliding window. Currently, the length of the # window is set to a multiple of the dominant period of the synthetics. # Make sure it is an uneven number; just to have a trivial midpoint # definition and one sample does not matter much in any case. window_length = int(round(float(2 * minimum_period) / dt)) if not window_length % 2: window_length += 1 # Use a Hanning window. No particular reason for it but its a well-behaved # window and has nice spectral properties. taper = np.hanning(window_length) # ========================================================================= # check if whole seismograms are sufficiently correlated and estimate # noise level # ========================================================================= # Overall Correlation coefficient. norm = np.sqrt(np.sum(data**2)) * np.sqrt(np.sum(synth**2)) cc = np.sum(data * synth) / norm if verbose: _log_window_selection(data_trace.id, "Correlation Coefficient: %.4f" % cc) # Estimate noise level from waveforms prior to the first arrival. idx_end = int(np.ceil((first_tt_arrival - 0.5 * minimum_period) / dt)) idx_end = max(10, idx_end) idx_start = int(np.ceil((first_tt_arrival - 2.5 * minimum_period) / dt)) idx_start = max(10, idx_start) if idx_start >= idx_end: idx_start = max(0, idx_end - 10) abs_data = np.abs(data) noise_absolute = abs_data[idx_start:idx_end].max() noise_relative = noise_absolute / abs_data.max() if verbose: _log_window_selection(data_trace.id, "Absolute Noise Level: %e" % noise_absolute) _log_window_selection(data_trace.id, "Relative Noise Level: %e" % noise_relative) # Basic global rejection criteria. accept_traces = True if (cc < min_cc) and (noise_relative > max_noise / 3.0): msg = "Correlation %.4f is below threshold of %.4f" % (cc, min_cc) if verbose: _log_window_selection(data_trace.id, msg) accept_traces = msg if noise_relative > max_noise: msg = "Noise level %.3f is above threshold of %.3f" % (noise_relative, max_noise) if verbose: _log_window_selection(data_trace.id, msg) accept_traces = msg # Calculate the envelope of both data and synthetics. This is to make sure # that the amplitude of both is not too different over time and is # used as another selector. Only calculated if the trace is generally # accepted as it is fairly slow. if accept_traces is True: data_env = obspy.signal.filter.envelope(data) synth_env = obspy.signal.filter.envelope(synth) # ------------------------------------------------------------------------- # Initial Plot setup. # ------------------------------------------------------------------------- # All the plot calls are interleaved. I realize this is really ugly but # the alternative would be to either have two functions (one with plots, # one without) or split the plotting function in various subfunctions, # neither of which are acceptable in my opinion. The impact on # performance is minimal if plotting is turned off: all imports are lazy # and a couple of conditionals are cheap. if plot: import matplotlib.pylab as plt # NOQA import matplotlib.patheffects as PathEffects # NOQA if accept_traces is True: plt.figure(figsize=(18, 12)) plt.subplots_adjust(left=0.05, bottom=0.05, right=0.98, top=0.95, wspace=None, hspace=0.0) grid = (31, 1) # Axes showing the data. data_plot = plt.subplot2grid(grid, (0, 0), rowspan=8) else: # Only show one axes it the traces are not accepted. plt.figure(figsize=(18, 3)) # Plot envelopes if needed. if accept_traces is True: plt.plot(times, data_env, color="black", alpha=0.5, lw=0.4, label="data envelope") plt.plot(synthetic_trace.times(), synth_env, color="#e41a1c", alpha=0.4, lw=0.5, label="synthetics envelope") plt.plot(times, data, color="black", label="data", lw=1.5) plt.plot(synthetic_trace.times(), synth, color="#e41a1c", label="synthetics", lw=1.5) # Symmetric around y axis. middle = data.mean() d_max, d_min = data.max(), data.min() r = max(d_max - middle, middle - d_min) * 1.1 ylim = (middle - r, middle + r) xlim = (times[0], times[-1]) plt.ylim(*ylim) plt.xlim(*xlim) offset = (xlim[1] - xlim[0]) * 0.005 plt.vlines(first_tt_arrival, ylim[0], ylim[1], colors="#ff7f00", lw=2) plt.text(first_tt_arrival + offset, ylim[1] - (ylim[1] - ylim[0]) * 0.02, "first arrival", verticalalignment="top", horizontalalignment="left", color="#ee6e00", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white") ]) plt.vlines(first_tt_arrival - minimum_period / 2.0, ylim[0], ylim[1], colors="#ff7f00", lw=2) plt.text(first_tt_arrival - minimum_period / 2.0 - offset, ylim[0] + (ylim[1] - ylim[0]) * 0.02, "first arrival - min period / 2", verticalalignment="bottom", horizontalalignment="right", color="#ee6e00", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white") ]) for velocity in [6, 5, 4, 3, min_velocity]: tt = dist_in_km / velocity plt.vlines(tt, ylim[0], ylim[1], colors="gray", lw=2) if velocity == min_velocity: hal = "right" o_s = -1.0 * offset else: hal = "left" o_s = offset plt.text(tt + o_s, ylim[0] + (ylim[1] - ylim[0]) * 0.02, str(velocity) + " km/s", verticalalignment="bottom", horizontalalignment=hal, color="0.15") plt.vlines(dist_in_km / min_velocity + minimum_period / 2.0, ylim[0], ylim[1], colors="gray", lw=2) plt.text(dist_in_km / min_velocity + minimum_period / 2.0 - offset, ylim[1] - (ylim[1] - ylim[0]) * 0.02, "min surface velocity + min period / 2", verticalalignment="top", horizontalalignment="right", color="0.15", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white") ]) plt.hlines(noise_absolute, xlim[0], xlim[1], linestyle="--", color="gray") plt.hlines(-noise_absolute, xlim[0], xlim[1], linestyle="--", color="gray") plt.text(offset, noise_absolute + (ylim[1] - ylim[0]) * 0.01, "noise level", verticalalignment="bottom", horizontalalignment="left", color="0.15", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white") ]) plt.legend(loc="lower right", fancybox=True, framealpha=0.5, fontsize="small") plt.gca().xaxis.set_ticklabels([]) # Plot the basic global information. ax = plt.gca() txt = ( "Total CC Coeff: %.4f\nAbsolute Noise: %e\nRelative Noise: %.3f" % (cc, noise_absolute, noise_relative)) ax.text(0.01, 0.95, txt, transform=ax.transAxes, fontdict=dict(fontsize="small", ha='left', va='top'), bbox=dict(boxstyle="round", fc="w", alpha=0.8)) plt.suptitle("Channel %s" % data_trace.id, fontsize="larger") # Show plot and return if not accepted. if accept_traces is not True: txt = "Rejected: %s" % (accept_traces) ax.text(0.99, 0.95, txt, transform=ax.transAxes, fontdict=dict(fontsize="small", ha='right', va='top'), bbox=dict(boxstyle="round", fc="red", alpha=1.0)) plt.show() if accept_traces is not True: return [] # Initialise masked arrays. The mask will be set to True where no # windows are chosen. time_windows = np.ma.ones(npts) time_windows.mask = False if plot: old_time_windows = time_windows.copy() # Elimination Stage 1: Eliminate everything half a period before or # after the minimum and maximum travel times, respectively. # theoretical arrival as positive. min_idx = int((first_tt_arrival - (minimum_period / 2.0)) / dt) max_idx = int( math.ceil((dist_in_km / min_velocity + minimum_period / 2.0) / dt)) time_windows.mask[:min_idx + 1] = True time_windows.mask[max_idx:] = True if plot: plt.subplot2grid(grid, (8, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="TRAVELTIME ELIMINATION") old_time_windows = time_windows.copy() # ------------------------------------------------------------------------- # Compute sliding time shifts and correlation coefficients for time # frames that passed the traveltime elimination stage. # ------------------------------------------------------------------------- # Allocate arrays to collect the time dependent values. sliding_time_shift = np.ma.zeros(npts, dtype="float32") sliding_time_shift.mask = True max_cc_coeff = np.ma.zeros(npts, dtype="float32") max_cc_coeff.mask = True for start_idx, end_idx, midpoint_idx in _window_generator( npts, window_length): if not min_idx < midpoint_idx < max_idx: continue # Slice windows. Create a copy to be able to taper without affecting # the original time series. data_window = data[start_idx:end_idx].copy() * taper synthetic_window = \ synth[start_idx: end_idx].copy() * taper # Elimination Stage 2: Skip windows that have essentially no energy # to avoid instabilities. No windows can be picked in these. if synthetic_window.ptp() < synth.ptp() * 0.001: time_windows.mask[midpoint_idx] = True continue # Calculate the time shift. Here this is defined as the shift of the # synthetics relative to the data. So a value of 2, for instance, means # that the synthetics are 2 timesteps later then the data. cc = np.correlate(data_window, synthetic_window, mode="full") time_shift = cc.argmax() - window_length + 1 # Express the time shift in fraction of the minimum period. sliding_time_shift[midpoint_idx] = (time_shift * dt) / minimum_period # Normalized cross correlation. max_cc_value = cc.max() / np.sqrt( (synthetic_window**2).sum() * (data_window**2).sum()) max_cc_coeff[midpoint_idx] = max_cc_value if plot: plt.subplot2grid(grid, (9, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="NO ENERGY IN CC WINDOW") # Axes with the CC coeffs plt.subplot2grid(grid, (15, 0), rowspan=4) plt.hlines(0, xlim[0], xlim[1], color="lightgray") plt.hlines(-threshold_shift, xlim[0], xlim[1], color="gray", linestyle="--") plt.hlines(threshold_shift, xlim[0], xlim[1], color="gray", linestyle="--") plt.text(5, -threshold_shift - (2) * 0.03, "threshold", verticalalignment="top", horizontalalignment="left", color="0.15", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white") ]) plt.plot(times, sliding_time_shift, color="#377eb8", label="Time shift in fraction of minimum period", lw=1.5) ylim = plt.ylim() plt.yticks([-0.75, 0, 0.75]) plt.xticks([300, 600, 900, 1200, 1500, 1800]) plt.ylim(ylim[0], ylim[1] + ylim[1] - ylim[0]) plt.ylim(-1.0, 1.0) plt.xlim(xlim) plt.gca().xaxis.set_ticklabels([]) plt.legend(loc="lower right", fancybox=True, framealpha=0.5, fontsize="small") plt.subplot2grid(grid, (10, 0), rowspan=4) plt.hlines(threshold_correlation, xlim[0], xlim[1], color="0.15", linestyle="--") plt.hlines(1, xlim[0], xlim[1], color="lightgray") plt.hlines(0, xlim[0], xlim[1], color="lightgray") plt.text(5, threshold_correlation + (1.4) * 0.01, "threshold", verticalalignment="bottom", horizontalalignment="left", color="0.15", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white") ]) plt.plot(times, max_cc_coeff, color="#4daf4a", label="Maximum CC coefficient", lw=1.5) plt.ylim(-0.2, 1.2) plt.yticks([0, 0.5, 1]) plt.xticks([300, 600, 900, 1200, 1500, 1800]) plt.xlim(xlim) plt.gca().xaxis.set_ticklabels([]) plt.legend(loc="lower right", fancybox=True, framealpha=0.5, fontsize="small") # Elimination Stage 3: Mark all areas where the normalized cross # correlation coefficient is under threshold_correlation as negative if plot: old_time_windows = time_windows.copy() time_windows.mask[max_cc_coeff < threshold_correlation] = True if plot: plt.subplot2grid(grid, (14, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="CORRELATION COEFF THRESHOLD ELIMINATION") # Elimination Stage 4: Mark everything with an absolute travel time # shift of more than # threshold_shift times the dominant period as # negative if plot: old_time_windows = time_windows.copy() time_windows.mask[np.ma.abs(sliding_time_shift) > threshold_shift] = True if plot: plt.subplot2grid(grid, (19, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="TIME SHIFT THRESHOLD ELIMINATION") # Elimination Stage 5: Mark the area around every "travel time shift # jump" (based on the traveltime time difference) negative. The width of # the area is currently chosen to be a tenth of a dominant period to # each side. if plot: old_time_windows = time_windows.copy() sample_buffer = int(np.ceil(minimum_period / dt * 0.1)) indices = np.ma.where(np.ma.abs(np.ma.diff(sliding_time_shift)) > 0.1)[0] for index in indices: time_windows.mask[index - sample_buffer:index + sample_buffer] = True if plot: plt.subplot2grid(grid, (20, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="TIME SHIFT JUMPS ELIMINATION") # Clip both to avoid large numbers by division. stacked = np.vstack([ np.ma.clip(synth_env, synth_env.max() * min_envelope_similarity * 0.5, synth_env.max()), np.ma.clip(data_env, data_env.max() * min_envelope_similarity * 0.5, data_env.max()) ]) # Ratio. ratio = stacked.min(axis=0) / stacked.max(axis=0) # Elimination Stage 6: Make sure the amplitudes of both don't vary too # much. if plot: old_time_windows = time_windows.copy() time_windows.mask[ratio < min_envelope_similarity] = True if plot: plt.subplot2grid(grid, (25, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="ENVELOPE AMPLITUDE SIMILARITY ELIMINATION") if plot: plt.subplot2grid(grid, (21, 0), rowspan=4) plt.hlines(min_envelope_similarity, xlim[0], xlim[1], color="gray", linestyle="--") plt.text(5, min_envelope_similarity + (2) * 0.03, "threshold", verticalalignment="bottom", horizontalalignment="left", color="0.15", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white") ]) plt.plot(times, ratio, color="#9B59B6", label="Envelope amplitude similarity", lw=1.5) plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0]) plt.ylim(0.05, 1.05) plt.xticks([300, 600, 900, 1200, 1500, 1800]) plt.xlim(xlim) plt.gca().xaxis.set_ticklabels([]) plt.legend(loc="lower right", fancybox=True, framealpha=0.5, fontsize="small") # First minimum window length elimination stage. This is cheap and if # not done it can easily destabilize the peak-and-trough marching stage # which would then have to deal with way more edge cases. if plot: old_time_windows = time_windows.copy() min_length = \ min(minimum_period / dt * min_length_period, maximum_period / dt) for i in flatnotmasked_contiguous(time_windows): # Step 7: Throw away all windows with a length of less then # min_length_period the dominant period. if (i.stop - i.start) < min_length: time_windows.mask[i.start:i.stop] = True if plot: plt.subplot2grid(grid, (26, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="MINIMUM WINDOW LENGTH ELIMINATION 1") # ------------------------------------------------------------------------- # Peak and trough marching algorithm # ------------------------------------------------------------------------- final_windows = [] for i in flatnotmasked_contiguous(time_windows): # Cut respective windows. window_npts = i.stop - i.start synthetic_window = synth[i.start:i.stop] data_window = data[i.start:i.stop] # Find extrema in the data and the synthetics. data_p, data_t = find_local_extrema(data_window) synth_p, synth_t = find_local_extrema(synthetic_window) window_mask = np.ones(window_npts, dtype="bool") closest_peaks = find_closest(data_p, synth_p) diffs = np.diff(closest_peaks) for idx in np.where(diffs == 1)[0]: if idx > 0: start = synth_p[idx - 1] else: start = 0 if idx < (len(synth_p) - 1): end = synth_p[idx + 1] else: end = -1 window_mask[start:end] = False closest_troughs = find_closest(data_t, synth_t) diffs = np.diff(closest_troughs) for idx in np.where(diffs == 1)[0]: if idx > 0: start = synth_t[idx - 1] else: start = 0 if idx < (len(synth_t) - 1): end = synth_t[idx + 1] else: end = -1 window_mask[start:end] = False window_mask = np.ma.masked_array(window_mask, mask=window_mask) if window_mask.mask.all(): continue for j in flatnotmasked_contiguous(window_mask): final_windows.append((i.start + j.start, i.start + j.stop)) if plot: old_time_windows = time_windows.copy() time_windows.mask[:] = True for start, stop in final_windows: time_windows.mask[start:stop] = False if plot: plt.subplot2grid(grid, (27, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="PEAK AND TROUGH MARCHING ELIMINATION") # Loop through all the time windows, remove windows not satisfying the # minimum number of peaks and troughs per window. Acts mainly as a # safety guard. old_time_windows = time_windows.copy() for i in flatnotmasked_contiguous(old_time_windows): synthetic_window = synth[i.start:i.stop] data_window = data[i.start:i.stop] data_p, data_t = find_local_extrema(data_window) synth_p, synth_t = find_local_extrema(synthetic_window) if np.min([len(synth_p), len(synth_t), len(data_p), len(data_t)]) < \ min_peaks_troughs: time_windows.mask[i.start:i.stop] = True if plot: plt.subplot2grid(grid, (28, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="PEAK/TROUGH COUNT ELIMINATION") # Second minimum window length elimination stage. if plot: old_time_windows = time_windows.copy() min_length = \ min(minimum_period / dt * min_length_period, maximum_period / dt) for i in flatnotmasked_contiguous(time_windows): # Step 7: Throw away all windows with a length of less then # min_length_period the dominant period. if (i.stop - i.start) < min_length: time_windows.mask[i.start:i.stop] = True if plot: plt.subplot2grid(grid, (29, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="MINIMUM WINDOW LENGTH ELIMINATION 2") # Final step, eliminating windows with little energy. final_windows = [] for j in flatnotmasked_contiguous(time_windows): # Again assert a certain minimal length. if (j.stop - j.start) < min_length: continue # Compare the energy in the data window and the synthetic window. data_energy = (data[j.start:j.stop]**2).sum() synth_energy = (synth[j.start:j.stop]**2).sum() energies = sorted([data_energy, synth_energy]) if energies[1] > max_energy_ratio * energies[0]: if verbose: _log_window_selection( data_trace.id, "Deselecting window due to energy ratio between " "data and synthetics.") continue # Check that amplitudes in the data are above the noise if noise_absolute / data[j.start: j.stop].ptp() > \ max_noise_window: if verbose: _log_window_selection( data_trace.id, "Deselecting window due having no amplitude above the " "signal to noise ratio.") final_windows.append((j.start, j.stop)) if plot: old_time_windows = time_windows.copy() time_windows.mask[:] = True for start, stop in final_windows: time_windows.mask[start:stop] = False if plot: plt.subplot2grid(grid, (30, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="LITTLE ENERGY ELIMINATION") if verbose: _log_window_selection( data_trace.id, "Done, Selected %i window(s)" % len(final_windows)) # Final step is to convert the index value windows to actual times. windows = [] for start, stop in final_windows: start = data_starttime + start * data_delta stop = data_starttime + stop * data_delta windows.append((start, stop)) if plot: # Plot the final windows to the data axes. import matplotlib.transforms as mtransforms # NOQA ax = data_plot trans = mtransforms.blended_transform_factory(ax.transData, ax.transAxes) for start, stop in final_windows: ax.fill_between([start * data_delta, stop * data_delta], 0, 1, facecolor="#CDDC39", alpha=0.5, transform=trans) plt.show() return windows
def select_windows(data_trace, synthetic_trace, event_latitude, event_longitude, event_depth_in_km, station_latitude, station_longitude, minimum_period, maximum_period, min_cc=0.10, max_noise=0.10, max_noise_window=0.4, min_velocity=2.4, threshold_shift=0.30, threshold_correlation=0.75, min_length_period=1.5, min_peaks_troughs=2, max_energy_ratio=10.0, min_envelope_similarity=0.2, verbose=False, plot=False): """ Window selection algorithm for picking windows suitable for misfit calculation based on phase differences. Returns a list of windows which might be empty due to various reasons. This function is really long and a lot of things. For a more detailed description, please see the LASIF paper. :param data_trace: The data trace. :type data_trace: :class:`~obspy.core.trace.Trace` :param synthetic_trace: The synthetic trace. :type synthetic_trace: :class:`~obspy.core.trace.Trace` :param event_latitude: The event latitude. :type event_latitude: float :param event_longitude: The event longitude. :type event_longitude: float :param event_depth_in_km: The event depth in km. :type event_depth_in_km: float :param station_latitude: The station latitude. :type station_latitude: float :param station_longitude: The station longitude. :type station_longitude: float :param minimum_period: The minimum period of the data in seconds. :type minimum_period: float :param maximum_period: The maximum period of the data in seconds. :type maximum_period: float :param min_cc: Minimum normalised correlation coefficient of the complete traces. :type min_cc: float :param max_noise: Maximum relative noise level for the whole trace. Measured from maximum amplitudes before and after the first arrival. :type max_noise: float :param max_noise_window: Maximum relative noise level for individual windows. :type max_noise_window: float :param min_velocity: All arrivals later than those corresponding to the threshold velocity [km/s] will be excluded. :type min_velocity: float :param threshold_shift: Maximum allowable time shift within a window, as a fraction of the minimum period. :type threshold_shift: float :param threshold_correlation: Minimum normalised correlation coeeficient within a window. :type threshold_correlation: float :param min_length_period: Minimum length of the time windows relative to the minimum period. :type min_length_period: float :param min_peaks_troughs: Minimum number of extrema in an individual time window (excluding the edges). :type min_peaks_troughs: float :param max_energy_ratio: Maximum energy ratio between data and synthetics within a time window. Don't make this too small! :type max_energy_ratio: float :param min_envelope_similarity: The minimum similarity of the envelopes of both data and synthetics. This essentially assures that the amplitudes of data and synthetics can not diverge too much within a window. It is a bit like the inverse of the ratio of both envelopes so a value of 0.2 makes sure neither amplitude can be more then 5 times larger than the other. :type min_envelope_similarity: float :param verbose: No output by default. :type verbose: bool :param plot: Create a plot of the algortihm while it does its work. :type plot: bool """ # Shortcuts to frequently accessed variables. data_starttime = data_trace.stats.starttime data_delta = data_trace.stats.delta dt = data_trace.stats.delta npts = data_trace.stats.npts synth = synthetic_trace.data data = data_trace.data times = data_trace.times() # Fill cache if necessary. if not TAUPY_MODEL_CACHE: from obspy.taup import TauPyModel # NOQA TAUPY_MODEL_CACHE["model"] = TauPyModel("AK135") model = TAUPY_MODEL_CACHE["model"] # ------------------------------------------------------------------------- # Geographical calculations and the time of the first arrival. # ------------------------------------------------------------------------- dist_in_deg = geodetics.locations2degrees(station_latitude, station_longitude, event_latitude, event_longitude) dist_in_km = geodetics.calcVincentyInverse( station_latitude, station_longitude, event_latitude, event_longitude)[0] / 1000.0 # Get only a couple of P phases which should be the first arrival # for every epicentral distance. Its quite a bit faster than calculating # the arrival times for every phase. # Assumes the first sample is the centroid time of the event. tts = model.get_travel_times(source_depth_in_km=event_depth_in_km, distance_in_degree=dist_in_deg, phase_list=["ttp"]) # Sort just as a safety measure. tts = sorted(tts, key=lambda x: x.time) first_tt_arrival = tts[0].time # ------------------------------------------------------------------------- # Window settings # ------------------------------------------------------------------------- # Number of samples in the sliding window. Currently, the length of the # window is set to a multiple of the dominant period of the synthetics. # Make sure it is an uneven number; just to have a trivial midpoint # definition and one sample does not matter much in any case. window_length = int(round(float(2 * minimum_period) / dt)) if not window_length % 2: window_length += 1 # Use a Hanning window. No particular reason for it but its a well-behaved # window and has nice spectral properties. taper = np.hanning(window_length) # ========================================================================= # check if whole seismograms are sufficiently correlated and estimate # noise level # ========================================================================= # Overall Correlation coefficient. norm = np.sqrt(np.sum(data ** 2)) * np.sqrt(np.sum(synth ** 2)) cc = np.sum(data * synth) / norm if verbose: _log_window_selection(data_trace.id, "Correlation Coefficient: %.4f" % cc) # Estimate noise level from waveforms prior to the first arrival. idx_end = int(np.ceil((first_tt_arrival - 0.5 * minimum_period) / dt)) idx_end = max(10, idx_end) idx_start = int(np.ceil((first_tt_arrival - 2.5 * minimum_period) / dt)) idx_start = max(10, idx_start) if idx_start >= idx_end: idx_start = max(0, idx_end - 10) abs_data = np.abs(data) noise_absolute = abs_data[idx_start:idx_end].max() noise_relative = noise_absolute / abs_data.max() if verbose: _log_window_selection(data_trace.id, "Absolute Noise Level: %e" % noise_absolute) _log_window_selection(data_trace.id, "Relative Noise Level: %e" % noise_relative) # Basic global rejection criteria. accept_traces = True if (cc < min_cc) and (noise_relative > max_noise / 3.0): msg = "Correlation %.4f is below threshold of %.4f" % (cc, min_cc) if verbose: _log_window_selection(data_trace.id, msg) accept_traces = msg if noise_relative > max_noise: msg = "Noise level %.3f is above threshold of %.3f" % ( noise_relative, max_noise) if verbose: _log_window_selection( data_trace.id, msg) accept_traces = msg # Calculate the envelope of both data and synthetics. This is to make sure # that the amplitude of both is not too different over time and is # used as another selector. Only calculated if the trace is generally # accepted as it is fairly slow. if accept_traces is True: data_env = obspy.signal.filter.envelope(data) synth_env = obspy.signal.filter.envelope(synth) # ------------------------------------------------------------------------- # Initial Plot setup. # ------------------------------------------------------------------------- # All the plot calls are interleaved. I realize this is really ugly but # the alternative would be to either have two functions (one with plots, # one without) or split the plotting function in various subfunctions, # neither of which are acceptable in my opinion. The impact on # performance is minimal if plotting is turned off: all imports are lazy # and a couple of conditionals are cheap. if plot: import matplotlib.pylab as plt # NOQA import matplotlib.patheffects as PathEffects # NOQA if accept_traces is True: plt.figure(figsize=(18, 12)) plt.subplots_adjust(left=0.05, bottom=0.05, right=0.98, top=0.95, wspace=None, hspace=0.0) grid = (31, 1) # Axes showing the data. data_plot = plt.subplot2grid(grid, (0, 0), rowspan=8) else: # Only show one axes it the traces are not accepted. plt.figure(figsize=(18, 3)) # Plot envelopes if needed. if accept_traces is True: plt.plot(times, data_env, color="black", alpha=0.5, lw=0.4, label="data envelope") plt.plot(synthetic_trace.times(), synth_env, color="#e41a1c", alpha=0.4, lw=0.5, label="synthetics envelope") plt.plot(times, data, color="black", label="data", lw=1.5) plt.plot(synthetic_trace.times(), synth, color="#e41a1c", label="synthetics", lw=1.5) # Symmetric around y axis. middle = data.mean() d_max, d_min = data.max(), data.min() r = max(d_max - middle, middle - d_min) * 1.1 ylim = (middle - r, middle + r) xlim = (times[0], times[-1]) plt.ylim(*ylim) plt.xlim(*xlim) offset = (xlim[1] - xlim[0]) * 0.005 plt.vlines(first_tt_arrival, ylim[0], ylim[1], colors="#ff7f00", lw=2) plt.text(first_tt_arrival + offset, ylim[1] - (ylim[1] - ylim[0]) * 0.02, "first arrival", verticalalignment="top", horizontalalignment="left", color="#ee6e00", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white")]) plt.vlines(first_tt_arrival - minimum_period / 2.0, ylim[0], ylim[1], colors="#ff7f00", lw=2) plt.text(first_tt_arrival - minimum_period / 2.0 - offset, ylim[0] + (ylim[1] - ylim[0]) * 0.02, "first arrival - min period / 2", verticalalignment="bottom", horizontalalignment="right", color="#ee6e00", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white")]) for velocity in [6, 5, 4, 3, min_velocity]: tt = dist_in_km / velocity plt.vlines(tt, ylim[0], ylim[1], colors="gray", lw=2) if velocity == min_velocity: hal = "right" o_s = -1.0 * offset else: hal = "left" o_s = offset plt.text(tt + o_s, ylim[0] + (ylim[1] - ylim[0]) * 0.02, str(velocity) + " km/s", verticalalignment="bottom", horizontalalignment=hal, color="0.15") plt.vlines(dist_in_km / min_velocity + minimum_period / 2.0, ylim[0], ylim[1], colors="gray", lw=2) plt.text(dist_in_km / min_velocity + minimum_period / 2.0 - offset, ylim[1] - (ylim[1] - ylim[0]) * 0.02, "min surface velocity + min period / 2", verticalalignment="top", horizontalalignment="right", color="0.15", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white")]) plt.hlines(noise_absolute, xlim[0], xlim[1], linestyle="--", color="gray") plt.hlines(-noise_absolute, xlim[0], xlim[1], linestyle="--", color="gray") plt.text(offset, noise_absolute + (ylim[1] - ylim[0]) * 0.01, "noise level", verticalalignment="bottom", horizontalalignment="left", color="0.15", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white")]) plt.legend(loc="lower right", fancybox=True, framealpha=0.5, fontsize="small") plt.gca().xaxis.set_ticklabels([]) # Plot the basic global information. ax = plt.gca() txt = ( "Total CC Coeff: %.4f\nAbsolute Noise: %e\nRelative Noise: %.3f" % (cc, noise_absolute, noise_relative)) ax.text(0.01, 0.95, txt, transform=ax.transAxes, fontdict=dict(fontsize="small", ha='left', va='top'), bbox=dict(boxstyle="round", fc="w", alpha=0.8)) plt.suptitle("Channel %s" % data_trace.id, fontsize="larger") # Show plot and return if not accepted. if accept_traces is not True: txt = "Rejected: %s" % (accept_traces) ax.text(0.99, 0.95, txt, transform=ax.transAxes, fontdict=dict(fontsize="small", ha='right', va='top'), bbox=dict(boxstyle="round", fc="red", alpha=1.0)) plt.show() if accept_traces is not True: return [] # Initialise masked arrays. The mask will be set to True where no # windows are chosen. time_windows = np.ma.ones(npts) time_windows.mask = False if plot: old_time_windows = time_windows.copy() # Elimination Stage 1: Eliminate everything half a period before or # after the minimum and maximum travel times, respectively. # theoretical arrival as positive. min_idx = int((first_tt_arrival - (minimum_period / 2.0)) / dt) max_idx = int(math.ceil(( dist_in_km / min_velocity + minimum_period / 2.0) / dt)) time_windows.mask[:min_idx + 1] = True time_windows.mask[max_idx:] = True if plot: plt.subplot2grid(grid, (8, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="TRAVELTIME ELIMINATION") old_time_windows = time_windows.copy() # ------------------------------------------------------------------------- # Compute sliding time shifts and correlation coefficients for time # frames that passed the traveltime elimination stage. # ------------------------------------------------------------------------- # Allocate arrays to collect the time dependent values. sliding_time_shift = np.ma.zeros(npts, dtype="float32") sliding_time_shift.mask = True max_cc_coeff = np.ma.zeros(npts, dtype="float32") max_cc_coeff.mask = True for start_idx, end_idx, midpoint_idx in _window_generator(npts, window_length): if not min_idx < midpoint_idx < max_idx: continue # Slice windows. Create a copy to be able to taper without affecting # the original time series. data_window = data[start_idx: end_idx].copy() * taper synthetic_window = \ synth[start_idx: end_idx].copy() * taper # Elimination Stage 2: Skip windows that have essentially no energy # to avoid instabilities. No windows can be picked in these. if synthetic_window.ptp() < synth.ptp() * 0.001: time_windows.mask[midpoint_idx] = True continue # Calculate the time shift. Here this is defined as the shift of the # synthetics relative to the data. So a value of 2, for instance, means # that the synthetics are 2 timesteps later then the data. cc = np.correlate(data_window, synthetic_window, mode="full") time_shift = cc.argmax() - window_length + 1 # Express the time shift in fraction of the minimum period. sliding_time_shift[midpoint_idx] = (time_shift * dt) / minimum_period # Normalized cross correlation. max_cc_value = cc.max() / np.sqrt((synthetic_window ** 2).sum() * (data_window ** 2).sum()) max_cc_coeff[midpoint_idx] = max_cc_value if plot: plt.subplot2grid(grid, (9, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="NO ENERGY IN CC WINDOW") # Axes with the CC coeffs plt.subplot2grid(grid, (15, 0), rowspan=4) plt.hlines(0, xlim[0], xlim[1], color="lightgray") plt.hlines(-threshold_shift, xlim[0], xlim[1], color="gray", linestyle="--") plt.hlines(threshold_shift, xlim[0], xlim[1], color="gray", linestyle="--") plt.text(5, -threshold_shift - (2) * 0.03, "threshold", verticalalignment="top", horizontalalignment="left", color="0.15", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white")]) plt.plot(times, sliding_time_shift, color="#377eb8", label="Time shift in fraction of minimum period", lw=1.5) ylim = plt.ylim() plt.yticks([-0.75, 0, 0.75]) plt.xticks([300, 600, 900, 1200, 1500, 1800]) plt.ylim(ylim[0], ylim[1] + ylim[1] - ylim[0]) plt.ylim(-1.0, 1.0) plt.xlim(xlim) plt.gca().xaxis.set_ticklabels([]) plt.legend(loc="lower right", fancybox=True, framealpha=0.5, fontsize="small") plt.subplot2grid(grid, (10, 0), rowspan=4) plt.hlines(threshold_correlation, xlim[0], xlim[1], color="0.15", linestyle="--") plt.hlines(1, xlim[0], xlim[1], color="lightgray") plt.hlines(0, xlim[0], xlim[1], color="lightgray") plt.text(5, threshold_correlation + (1.4) * 0.01, "threshold", verticalalignment="bottom", horizontalalignment="left", color="0.15", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white")]) plt.plot(times, max_cc_coeff, color="#4daf4a", label="Maximum CC coefficient", lw=1.5) plt.ylim(-0.2, 1.2) plt.yticks([0, 0.5, 1]) plt.xticks([300, 600, 900, 1200, 1500, 1800]) plt.xlim(xlim) plt.gca().xaxis.set_ticklabels([]) plt.legend(loc="lower right", fancybox=True, framealpha=0.5, fontsize="small") # Elimination Stage 3: Mark all areas where the normalized cross # correlation coefficient is under threshold_correlation as negative if plot: old_time_windows = time_windows.copy() time_windows.mask[max_cc_coeff < threshold_correlation] = True if plot: plt.subplot2grid(grid, (14, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="CORRELATION COEFF THRESHOLD ELIMINATION") # Elimination Stage 4: Mark everything with an absolute travel time # shift of more than # threshold_shift times the dominant period as # negative if plot: old_time_windows = time_windows.copy() time_windows.mask[np.ma.abs(sliding_time_shift) > threshold_shift] = True if plot: plt.subplot2grid(grid, (19, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="TIME SHIFT THRESHOLD ELIMINATION") # Elimination Stage 5: Mark the area around every "travel time shift # jump" (based on the traveltime time difference) negative. The width of # the area is currently chosen to be a tenth of a dominant period to # each side. if plot: old_time_windows = time_windows.copy() sample_buffer = int(np.ceil(minimum_period / dt * 0.1)) indices = np.ma.where(np.ma.abs(np.ma.diff(sliding_time_shift)) > 0.1)[0] for index in indices: time_windows.mask[index - sample_buffer: index + sample_buffer] = True if plot: plt.subplot2grid(grid, (20, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="TIME SHIFT JUMPS ELIMINATION") # Clip both to avoid large numbers by division. stacked = np.vstack([ np.ma.clip(synth_env, synth_env.max() * min_envelope_similarity * 0.5, synth_env.max()), np.ma.clip(data_env, data_env.max() * min_envelope_similarity * 0.5, data_env.max())]) # Ratio. ratio = stacked.min(axis=0) / stacked.max(axis=0) # Elimination Stage 6: Make sure the amplitudes of both don't vary too # much. if plot: old_time_windows = time_windows.copy() time_windows.mask[ratio < min_envelope_similarity] = True if plot: plt.subplot2grid(grid, (25, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="ENVELOPE AMPLITUDE SIMILARITY ELIMINATION") if plot: plt.subplot2grid(grid, (21, 0), rowspan=4) plt.hlines(min_envelope_similarity, xlim[0], xlim[1], color="gray", linestyle="--") plt.text(5, min_envelope_similarity + (2) * 0.03, "threshold", verticalalignment="bottom", horizontalalignment="left", color="0.15", path_effects=[ PathEffects.withStroke(linewidth=3, foreground="white")]) plt.plot(times, ratio, color="#9B59B6", label="Envelope amplitude similarity", lw=1.5) plt.yticks([0, 0.2, 0.4, 0.6, 0.8, 1.0]) plt.ylim(0.05, 1.05) plt.xticks([300, 600, 900, 1200, 1500, 1800]) plt.xlim(xlim) plt.gca().xaxis.set_ticklabels([]) plt.legend(loc="lower right", fancybox=True, framealpha=0.5, fontsize="small") # First minimum window length elimination stage. This is cheap and if # not done it can easily destabilize the peak-and-trough marching stage # which would then have to deal with way more edge cases. if plot: old_time_windows = time_windows.copy() min_length = \ min(minimum_period / dt * min_length_period, maximum_period / dt) for i in flatnotmasked_contiguous(time_windows): # Step 7: Throw away all windows with a length of less then # min_length_period the dominant periodele if (i.stop - i.start) < min_length: time_windows.mask[i.start: i.stop] = True if plot: plt.subplot2grid(grid, (26, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="MINIMUM WINDOW LENGTH ELIMINATION 1") # ------------------------------------------------------------------------- # Peak and trough marching algorithm # ------------------------------------------------------------------------- final_windows = [] for i in flatnotmasked_contiguous(time_windows): # Cut respective windows. window_npts = i.stop - i.start synthetic_window = synth[i.start: i.stop] data_window = data[i.start: i.stop] # Find extrema in the data and the synthetics. data_p, data_t = find_local_extrema(data_window) synth_p, synth_t = find_local_extrema(synthetic_window) window_mask = np.ones(window_npts, dtype="bool") closest_peaks = find_closest(data_p, synth_p) diffs = np.diff(closest_peaks) for idx in np.where(diffs == 1)[0]: if idx > 0: start = synth_p[idx - 1] else: start = 0 if idx < (len(synth_p) - 1): end = synth_p[idx + 1] else: end = -1 window_mask[start: end] = False closest_troughs = find_closest(data_t, synth_t) diffs = np.diff(closest_troughs) for idx in np.where(diffs == 1)[0]: if idx > 0: start = synth_t[idx - 1] else: start = 0 if idx < (len(synth_t) - 1): end = synth_t[idx + 1] else: end = -1 window_mask[start: end] = False window_mask = np.ma.masked_array(window_mask, mask=window_mask) if window_mask.mask.all(): continue for j in flatnotmasked_contiguous(window_mask): final_windows.append((i.start + j.start, i.start + j.stop)) if plot: old_time_windows = time_windows.copy() time_windows.mask[:] = True for start, stop in final_windows: time_windows.mask[start:stop] = False if plot: plt.subplot2grid(grid, (27, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="PEAK AND TROUGH MARCHING ELIMINATION") # Loop through all the time windows, remove windows not satisfying the # minimum number of peaks and troughs per window. Acts mainly as a # safety guard. old_time_windows = time_windows.copy() for i in flatnotmasked_contiguous(old_time_windows): synthetic_window = synth[i.start: i.stop] data_window = data[i.start: i.stop] data_p, data_t = find_local_extrema(data_window) synth_p, synth_t = find_local_extrema(synthetic_window) if np.min([len(synth_p), len(synth_t), len(data_p), len(data_t)]) < \ min_peaks_troughs: time_windows.mask[i.start: i.stop] = True if plot: plt.subplot2grid(grid, (28, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="PEAK/TROUGH COUNT ELIMINATION") # Second minimum window length elimination stage. if plot: old_time_windows = time_windows.copy() min_length = \ min(minimum_period / dt * min_length_period, maximum_period / dt) for i in flatnotmasked_contiguous(time_windows): # Step 7: Throw away all windows with a length of less then # min_length_period the dominant period. if (i.stop - i.start) < min_length: time_windows.mask[i.start: i.stop] = True if plot: plt.subplot2grid(grid, (29, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="MINIMUM WINDOW LENGTH ELIMINATION 2") # Final step, eliminating windows with little energy. final_windows = [] for j in flatnotmasked_contiguous(time_windows): # Again assert a certain minimal length. if (j.stop - j.start) < min_length: continue # Compare the energy in the data window and the synthetic window. data_energy = (data[j.start: j.stop] ** 2).sum() synth_energy = (synth[j.start: j.stop] ** 2).sum() energies = sorted([data_energy, synth_energy]) if energies[1] > max_energy_ratio * energies[0]: if verbose: _log_window_selection( data_trace.id, "Deselecting window due to energy ratio between " "data and synthetics.") continue # Check that amplitudes in the data are above the noise if noise_absolute / data[j.start: j.stop].ptp() > \ max_noise_window: if verbose: _log_window_selection( data_trace.id, "Deselecting window due having no amplitude above the " "signal to noise ratio.") final_windows.append((j.start, j.stop)) if plot: old_time_windows = time_windows.copy() time_windows.mask[:] = True for start, stop in final_windows: time_windows.mask[start:stop] = False if plot: plt.subplot2grid(grid, (30, 0), rowspan=1) _plot_mask(time_windows, old_time_windows, name="LITTLE ENERGY ELIMINATION") if verbose: _log_window_selection( data_trace.id, "Done, Selected %i window(s)" % len(final_windows)) # Final step is to convert the index value windows to actual times. windows = [] for start, stop in final_windows: start = data_starttime + start * data_delta stop = data_starttime + stop * data_delta windows.append((start, stop)) if plot: # Plot the final windows to the data axes. import matplotlib.transforms as mtransforms # NOQA ax = data_plot trans = mtransforms.blended_transform_factory(ax.transData, ax.transAxes) for start, stop in final_windows: ax.fill_between([start * data_delta, stop * data_delta], 0, 1, facecolor="#CDDC39", alpha=0.5, transform=trans) plt.show() return windows