def rf_quality_metrics_queue(oqueue, station_id, station_stream3c, similarity_eps, drop_z=True): """Produce RF quality metrics in a stream and queue the QC'd components for downstream processing. :param oqueue: Output queue where filtered streams are queued :type oqueue: queue or multiprocessing.Manager.Queue :param station_id: Station ID :type station_id: str :param station_stream3c: 3-channel stream :type station_stream3c: list(rf.RFStream) with 3 components :param similarity_eps: Distance threshold used for DBSCAN clustering :type similarity_eps: float """ streams_qual = compute_rf_quality_metrics(station_id, station_stream3c, similarity_eps) if streams_qual is not None: z_stream, p_stream, t_stream = streams_qual if drop_z: stream_qual = rf.RFStream( [tr for doublet in zip(p_stream, t_stream) for tr in doublet]) else: stream_qual = rf.RFStream([ tr for triplet in zip(z_stream, p_stream, t_stream) for tr in triplet ]) # end if oqueue.put(stream_qual)
def get_eq_waveforms(station_list, sta_inv, eq_cat, filt_kws=None, **kwargs): """ :param station_list: list of stations to retrieve data from :param sta_inv: obspy network inventory object containing station information :param eq_cat: obspy inventory object containing earthquake catalog """ ev_cnt = 0 for stat in station_list: op_stat = get_station_info(stat, sta_inv, xml=True) stat_dict = get_station_info(stat, sta_inv) for i, event in enumerate(eq_cat): cat_id = event.resource_id test_id = str(cat_id).split("=")[1].split("&")[0] print("\nSearching for event {0} at station {1}".format(test_id, stat)) waveforms = read_passive(op_stat, event, filt_kws=filt_kws, **kwargs) if waveforms != None: if len(waveforms) == 3: #print(len(waveforms)) if ev_cnt == 0: #op_stream = waveforms eq_stream = rf.RFStream(waveforms) #print kwargs if 'phase_list' in kwargs: phase = kwargs['phase_list'][0] else: phase = 'P' stats = rf.rfstats(station=stat_dict, event=event, phase=phase, dist_range=(30,90)) for tr in eq_stream: tr.stats.update(stats) else: #op_stream += waveforms temp_stream = rf.RFStream(waveforms) stats = rf.rfstats(station=stat_dict, event=event, phase=phase, dist_range=(30,90)) #print(stats) #print(stat_dict, event) if stats == None: print("No rfstats calculated... skipping event") continue else: for tr in temp_stream: tr.stats.update(stats) eq_stream.extend(temp_stream) ev_cnt += 1 else: print("Imported stream does not have 3 traces ({0})... skipping event".format(len(waveforms))) if ev_cnt != 0: return eq_stream else: print("No earthquake waveforms found...") return None
def _compute_rf(data, config, log): st = rf.RFStream() event_ids = config.get("event_ids") src_file = config.get("waveform_file") if event_ids is None: log.error("Unable to generate RF without event IDs") return st # end if if src_file is None: log.error("Unable to generate RF without path to source file") return st # end if if not os.path.isfile(src_file): log.error( "Source file {} for trace metadata not found, cannot generate RF". format(src_file)) return # end if net, sta, loc = config["station_id"].split('.') src_waveforms = read_h5_stream(src_file, net, sta, loc) assert data.shape[0] == len(event_ids) for i, event_data in enumerate(data): evid = event_ids[i] src_stream = rf.RFStream( [tr for tr in src_waveforms if tr.stats.event_id == evid]) # Z component z_header = src_stream.select(component='Z')[0].stats su_opts = config["su_energy_opts"] z_header.starttime = z_header.onset + su_opts["time_window"][0] z_header.sampling_rate = su_opts["sampling_rate"] z_header.delta = 1.0 / z_header.sampling_rate z_header.npts = event_data.shape[1] assert np.isclose( float(z_header.endtime - z_header.starttime), su_opts["time_window"][1] - su_opts["time_window"][0]) tr = rf.rfstream.RFTrace(event_data[1, :], header=z_header) st += tr # R component r_header = z_header.copy() r_header.channel = z_header.channel[:-1] + 'R' tr = rf.rfstream.RFTrace(event_data[0, :], header=r_header) st += tr # end for st.filter('bandpass', freqmin=0.05, freqmax=1.0, corners=2, zerophase=True) normalize = 0 # Use Z-component for normalization st.rf(rotate=None, method='P', deconvolve='func', func=rf_iter_deconv, normalize=normalize, min_fit_threshold=75.0) return st
def remove_small_s2n(stream, ratio): noise = stream.slice2(-5, -2, 'onset') signal = stream.slice2(-1, 2, 'onset') newstream = rf.RFStream() for i in xrange(len(stream)): rms = np.sqrt(np.mean(np.square(signal[i].data))) / np.sqrt( np.mean(np.square(noise[i].data))) if rms > ratio and stream[i].stats.distance > 35.: newstream.append(stream[i]) return newstream
def filter_crosscorr_coeff(rf_stream, time_window=(-2, 25), threshold_cc=0.70, min_fraction=0.15, apply_moveout=False): """For each trace in the stream, compute its correlation coefficient with the other traces. Return only traces matching cross correlation coefficient criteria based on C.Sippl (2016) [see http://dx.doi.org/10.1016/j.tecto.2016.03.031] :param rf_stream: Stream of RF traces to filter, should be **for a single component of a single station** :type rf_stream: rf.RFStream :param time_window: Time window to filter by, defaults to (-2, 25) :type time_window: tuple, optional :param threshold_cc: Threshold cross-correlation coefficient, defaults to 0.70. Denoted Xi in Sippl, who used value 0.80. :type threshold_cc: float, optional :param min_fraction: Minimum fraction of coefficients above threshold_cc, defaults to 0.15. Denoted tau in Sippl, who used value 0.15. :type min_fraction: float, optional :param apply_moveout: Whether to apply moveout correction to Ps phase prior to computing correlation coefficients. :type apply_moveout: bool :return: Filtered stream of RF traces :rtype: rf.RFStream """ assert_homogenous_stream(rf_stream, filter_crosscorr_coeff.__name__) # Early exit if we don't have enough traces for similarity filtering to be meaningful. if len(rf_stream) < 3: return rf_stream # end if # Trim good RFs to time range so that subsequent cross-correlation computations relate to the # relevant period around and after onset. data_cc = rf_stream.copy().trim2(*time_window, reftime='onset') if not data_cc: return data_cc # end if # Apply optional moveout if apply_moveout: data_cc.moveout() # end if # Gather all RFs into a single array for efficient computation of correlation coefficients # between all traces data_array = np.array([tr.data for tr in data_cc]) # Compute cross-correlation coefficients. cc matrix will be symmetric. # Each row of cc indicates the degree of correlation between each other trace. cc = np.corrcoef(data_array) # Determine mask of which traces meet the similarity filtering criteria fraction_above_threshold = np.sum(cc >= threshold_cc, axis=1)/len(data_cc) keep_trace_mask = (fraction_above_threshold >= min_fraction) kept_data = rf.RFStream([tr for i, tr in enumerate(rf_stream) if keep_trace_mask[i]]) return kept_data
def calculate_rfs(eq_stream, filt_kw=None, deconvolve='time', moveout=True, savefile=None, **kwargs): """ :param eq_stream: obspy stream object containing earthquake waveforms :param filt_kw: dict object containing parameters for an obspy filter :param deconvolve: string to select deconvolution method ('time' or 'freq') :param moveout: correct time delays of receiver function results for moveout (True or False) :param savefile: save result to hdf5 file (string with the full file path to file) """ rf_stream = rf.RFStream() working_stream = eq_stream.copy() for stream3c in tqdm(rf.IterMultipleComponents(working_stream, 'onset', number_components=3)): bad_npts = 0 bad_start_t = 0 for i, tr in enumerate(stream3c): samp_rate = tr.stats.sampling_rate #if samp_rate != 25: # warnings.warn("Sampling rate for tr is {0}".format(samp_rate)) npts = tr.stats.npts thr_npts = samp_rate*90 # stream should have sr*streamlength datapoints if (npts - thr_npts) == 1: thr_npts = thr_npts + 1 if thr_npts != npts: print("Expected npts {0}, actual number {1}... skipping event".format(thr_npts, npts)) bad_npts = 1 if i == 0: ref_t = tr.stats.starttime else: start_t = tr.stats.starttime if start_t != ref_t: print("Inconsistent start times ({0} and {1}) in traces... skipping event".format(ref_t, start_t)) bad_start_t = 1 if not bad_npts and not bad_start_t: if len(stream3c) != 3: continue stream3c.trim2(-25, 75, 'onset') stream3c.rotate('ZNE->LQT') #stream3c.deconvolve(method=deconvolve) stream3c.rf(deconvolve=deconvolve, filter=filt_kw, **kwargs) if moveout: stream3c.moveout() #print(stream3c) rf_stream.extend(stream3c) if savefile != None: rf_stream.write(savefile, 'H5') return rf_stream
def plot_rf_stack(rf_stream, time_window=(-10.0, 25.0), trace_height=0.2, stack_height=0.8, save_file=None, **kwargs): """Wrapper function of rf.RFStream.plot_rf() to help do RF plotting with consistent formatting and layout. :param rf_stream: RFStream to plot :type rf_stream: rf.RFStream :param time_window: Time window to plot, defaults to (-10.0, 25.0) :type time_window: tuple, optional :param trace_height: Height of a single trace (reduce to cram RFs closer together), defaults to 0.2 :type trace_height: float, optional :param stack_height: Height of mean (stacked) RF at top of plot, defaults to 0.8 :type stack_height: float, optional :param save_file: File to save resulting image into, defaults to None :type save_file: str to valid file path, optional :return: Figure handle to the stack plot :rtype: matplotlib.figure.Figure """ # Ensure traces are stackable by ignoring those that don't conform to the predominant data shape all_trace_lens = np.array([len(tr) for tr in rf_stream]) most_common_len, _ = stats.mode(all_trace_lens, axis=None) stackable_stream = rf.RFStream( [tr for tr in rf_stream if len(tr) == most_common_len]) num_stackable = len(stackable_stream) if num_stackable < len(rf_stream): logging.warning( 'Removed {} traces from RF plot to make it stackable!'.format( num_stackable)) # end if fig = stackable_stream.plot_rf(fillcolors=('#000000', '#a0a0a0'), trim=time_window, trace_height=trace_height, stack_height=stack_height, fname=save_file, show_vlines=True, **kwargs) return fig
# original file will be interpolated to 100Hz o_stream = o_stream.trim2(-5, 60, 'onset') station_list = [] # here we collect station names but maybe ID is more appropriate in case of having the same station names in different deployments for i in xrange(len(q_stream)): station_list.append(q_stream[i].stats.station.encode('utf-8')) station_list = np.unique(np.array(station_list)) print "Gathered ", len(station_list), " stations" # here we go with the main loop over stations out_file = rf.RFStream() for i in xrange(station_list.shape[0]): print "Station ", station_list[i], i + 1, " of ", station_list.shape[0] traces = q_stream.select(station=station_list[i]).copy() # we choose short RF to simplify and speed up the processing traces = traces.trim2(-5, 20, 'onset') # but keep original traces as they are to use them at the end o_traces = o_stream.select(station=station_list[i]) swipe = [] o_swipe = [] for trace in traces:
def plot_aux_data(soln, config, log, scale): """ Plot auxiliary data such as energy distribution and receiver functions. :param soln: Solution container :type soln: Customized scipy.optimize.OptimizeResult :param config: Solution configuration :type config: dict :param log: Logging instance :type log: logging.Logger :param scale: Overall image scaling factor :type scale: float :return: Matplotlib figure containing the plotted data :rtype: matplotlib.figure.Figure """ f = plt.figure(constrained_layout=False, figsize=(6.4 * scale, 6.4 * scale)) f.suptitle(config["station_id"], y=0.96, fontsize=16) gs = f.add_gridspec(2, 1, left=0.1, right=0.9, bottom=0.1, top=0.87, hspace=0.3, wspace=0.3, height_ratios=[1, 2]) gs_top = gs[0].subgridspec(1, 2) ax0 = f.add_subplot(gs_top[0, 0]) ax1 = f.add_subplot(gs_top[0, 1]) hist_alpha = 0.5 soln_alpha = 0.3 axis_font_size = 6 * scale title_font_size = 6 * scale nbins = 100 # Plot energy distribution of samples and solution clusters energy_hist, bins = np.histogram(soln.sample_funvals, bins=nbins) energy_hist = energy_hist.astype(float) / np.max(energy_hist) ax0.bar(bins[:-1], energy_hist, width=np.diff(bins), align='edge', color='#808080', alpha=hist_alpha) for i, cluster_energies in enumerate(soln.cluster_funvals): color = 'C' + str(i) cluster_hist, _ = np.histogram(cluster_energies, bins) cluster_hist = cluster_hist.astype(float) / np.max(cluster_hist) ax0.bar(bins[:-1], cluster_hist, width=np.diff(bins), align='edge', color=color, alpha=soln_alpha) # end for ax0.set_title( 'Energy distribution of random samples and solution clusters', fontsize=title_font_size) ax0.set_xlabel('$E_{SU}$ energy (arb. units)') ax0.set_ylabel('Normalized counts') ax0.tick_params(labelsize=axis_font_size) ax0.xaxis.label.set_size(axis_font_size) ax0.yaxis.label.set_size(axis_font_size) # Plot sorted per-event upwards S-wave energy at top of mantle per solution. # Collect event IDs of worst fit traces and present as table of waveform IDs. event_ids = config["event_ids"] events_best3 = [] events_worst3 = [] for i, esu in enumerate(soln.esu): assert len(esu) == len(event_ids) color = 'C' + str(i) esu_sorted = sorted(zip(esu, event_ids)) events_best3.extend(esu_sorted[:3]) events_worst3.extend(esu_sorted[-3:]) esu_sorted = [e[0] for e in esu_sorted] ax1.plot(esu_sorted, color=color, alpha=soln_alpha) # end for events_best3 = sorted(events_best3) events_worst3 = sorted(events_worst3, reverse=True) best_events_set = set() worst_events_set = set() for _, evid in events_best3: best_events_set.add(evid) if len(best_events_set) >= 3: break # end if # end for for _, evid in events_worst3: worst_events_set.add(evid) if len(worst_events_set) >= 3: break # end if # end for _tab1 = table(ax1, cellText=[[e] for e in best_events_set], colLabels=['BEST'], cellLoc='left', colWidths=[0.35], loc='upper left', edges='horizontal', fontsize=8, alpha=0.6) # alpha broken in matplotlib.table! _tab2 = table(ax1, cellText=[[e] for e in worst_events_set], colLabels=['WORST'], cellLoc='left', colWidths=[0.35], loc='upper right', edges='horizontal', fontsize=8, alpha=0.6) ax1.set_title('Ranked per-event energy for each solution point', fontsize=title_font_size) ax1.set_xlabel('Rank (out of # source events)') ax1.set_ylabel('Event $E_{SU}$ energy (arb. units)') ax1.tick_params(labelsize=axis_font_size) ax1.xaxis.label.set_size(axis_font_size) ax1.yaxis.label.set_size(axis_font_size) # Plot receiver function at base of selected layers axis_font_size = 6 * scale max_solutions = config["solver"].get("max_solutions", 3) for layer in config["layers"]: lname = layer["name"] if soln.subsurface and lname in soln.subsurface: base_seismogms = soln.subsurface[lname] # Generate RF and plot. gs_bot = gs[1].subgridspec(max_solutions, 1, hspace=0.4) for i, seismogm in enumerate(base_seismogms): soln_rf = _compute_rf(seismogm, config, log) assert isinstance(soln_rf, rf.RFStream) # Remove any traces for which deconvolution failed. # First, find their unique ID. Then remove all traces with that ID. exclude_ids = set( [tr.stats.event_id for tr in soln_rf if len(tr) == 0]) soln_rf = rf.RFStream([ tr for tr in soln_rf if tr.stats.event_id not in exclude_ids ]) axn = f.add_subplot(gs_bot[i]) if soln_rf: color = 'C' + str(i) rf_R = soln_rf.select(component='R').trim2( RF_TRIM_WINDOW[0], RF_TRIM_WINDOW[1], reftime='onset') num_RFs = len(rf_R) times = rf_R[0].times() + RF_TRIM_WINDOW[0] data = rf_R.stack()[0].data axn.plot(times, data, color=color, alpha=soln_alpha, linewidth=2) axn.text(0.95, 0.95, 'N = {}'.format(num_RFs), fontsize=10, ha='right', va='top', transform=axn.transAxes) axn.set_xlabel('Time (sec)') axn.grid(color='#80808080', linestyle=':') else: axn.annotate('Empty RF plot', (0.5, 0.5), xycoords='axes fraction', ha='center') # end if axn.set_title(' '.join([ config["station_id"], lname, 'base RF', '(soln {})'.format(i) ]), fontsize=title_font_size, y=0.92, va='top') axn.tick_params(labelsize=axis_font_size) axn.xaxis.label.set_size(axis_font_size) axn.yaxis.label.set_size(axis_font_size) # end for break # TODO: Figure out how to add more layers if needed # end if # end for return f
def rf_inversion_export(input_h5_file, output_folder, network_code, component='R', resample_freq=6.25, trim_window=(-5.0, 20.0), moveout=True): """Export receiver function to text format for ingestion into Fortran RF inversion code. :param input_h5_file: Input hdf5 file containing receiver function data :type input_h5_file: str or Path :param output_folder: Folder in which to export text files, one per channel per station. Will be appended with network code. :type output_folder: str or Path :param network_code: Network to which this RF data belongs, used to disambiguate and track folders. :type network_code: str :param component: The channel component to export, defaults to 'R' :type component: str, optional :param resample_freq: Sampling rate (Hz) of the output files, defaults to 6.25 Hz :type resample_freq: float, optional :param trim_window: Time window to export relative to onset, defaults to (-5.0, 20.0). If data needs to be resampled, the samples are anchored to the start of this time window. :type trim_window: tuple, optional :param moveout: Whether to apply moveout correction prior to exporting, defaults to True :type moveout: bool, optional """ # Process for each station: # 1. Load hdf5 file containing RFs # 2. Filter to desired component. # 3. Quality filter to those that meet criteria (Sippl cross-correlation similarity) # 4. Moveout and stack the RFs # 5. Resample (lanczos) and trim RF # 6. Export one file per station in (time, amplitude format) output_folder += "_" + network_code if not os.path.isdir(output_folder): os.makedirs(output_folder, exist_ok=True) # end if data = rf_util.read_h5_rf(input_h5_file) data = data.select(component=component) rf_util.label_rf_quality_simple_amplitude('ZRT', data, snr_cutoff=2.0, rms_amp_cutoff=0.2, max_amp_cutoff=2.0) data = rf.RFStream( [tr for tr in data if tr.stats.predicted_quality == 'a']) data_dict = rf_util.rf_to_dict(data) for sta, ch_dict in data_dict: for cha, ch_traces in ch_dict.items(): similar_traces = rf_util.filter_crosscorr_coeff( rf.RFStream(ch_traces)) if not similar_traces: continue if moveout: similar_traces.moveout() # end if stack = similar_traces.stack() trace = stack[0] exact_start_time = trace.stats.onset + trim_window[0] stack.interpolate(sampling_rate=resample_freq, method='lanczos', a=10, starttime=exact_start_time) stack.trim2(*trim_window, reftime='onset') times = trace.times() - (trace.stats.onset - trace.stats.starttime) # TODO: Remove hardwired scaling factor. # This scaling factor only applies to iterative deconvolution with default Gaussian width # factor of 2.5. Once we upgrade to rf library version >= 0.9.0, we can remove this hardwired # setting and instead have it determined programatically from rf processing metadata stored # in the trace stats structure. # The scaling factor originates in the amplitude attenuation effect of the filtering applied # in iterative deconv, see table at end of this page: # http://eqseis.geosc.psu.edu/~cammon/HTML/RftnDocs/seq01.html # The values in this reference table are derived as the integral of the area under the # Gaussian in the frequency domain. Analytically, this amounts to simply dividing by scaling # factor of a/sqrt(pi), where 'a' here is the Gaussian width used in iterative deconvolution. iterdeconv_scaling = 2.5 / np.sqrt(np.pi) column_data = np.array([times, trace.data / iterdeconv_scaling]).T fname = os.path.join( output_folder, "_".join([network_code, sta, cha]) + "_rf.dat") np.savetxt(fname, column_data, fmt=('%5.2f', '%.8f'))
def synthesize_rf_dataset(H, V_p, V_s, inclinations, distances, ds, log=None, include_t3=False, amplitudes=None, baz=0.0): """Synthesize RF R-component data set over range of inclinations and distances and get result as a rf.RFStream instance. :param H: Moho depth (km) :type H: float :param V_p: P body wave velocity in uppermost layer :type V_p: float :param V_s: S body wave velocity in uppermost layer :type V_s: float :param inclinations: Array of inclinations for which to create RFs :type inclinations: numpy.array(float) :param distances: Array of teleseismic distances corresponding to inclinations :type distances: numpy.array(float) :param ds: Final sampling rate (Hz) for the downsampled output signal :type ds: float :param log: Logger to send output to, defaults to None :type log: logger, optional :param include_t3: If True, include the third expected multiple PpSs+PsPs :type include_t3: bool, optional :param amplitudes: Custom amplitudes to apply to the multiples :type amplitudes: list(float), optional :param baz: Back azimuth for metadata :type baz: float, optional :return: Stream containing synthetic RFs :rtype: rf.RFStream """ assert len(inclinations) == len( distances), "Must provide 1:1 inclination and distance pairs" k = V_p / V_s traces = [] arrivals = None for i, inc_deg in enumerate(inclinations): theta_p = np.deg2rad(inc_deg) p = np.sin(theta_p) / V_p t1 = H * (np.sqrt((k * k / V_p / V_p) - p * p) - np.sqrt(1.0 / V_p / V_p - p * p)) t2 = H * (np.sqrt((k * k / V_p / V_p) - p * p) + np.sqrt(1.0 / V_p / V_p - p * p)) arrivals = [t1, t2] if include_t3: t3 = t1 + t2 arrivals.append(t3) if log is not None: log.info("Inclination {:3g} arrival times: {}".format( inc_deg, arrivals)) arrivals = [0] + arrivals if amplitudes is None: amplitudes = [1, 0.5, 0.4] if include_t3: amplitudes.append(-0.3) # end if else: assert len(amplitudes) == 3 + int(include_t3) # t3 amplitude should be negative assert (not include_t3) or (amplitudes[3] <= 0) # end if window = (-5.0, 50.0) # sec fs = 100.0 # Hz _, synth_signal = generate_synth_rf(arrivals, amplitudes, fs_hz=fs, window_sec=window) now = obspy.UTCDateTime.now() # Make sure time difference of events is at least 1 second, since onset time is used as part of # logic for identifying related channels in rf.RFStream. now += float(i) dt = float(window[1] - window[0]) end = now + dt onset = now - window[0] header = { 'network': 'SY', 'station': 'TST', 'location': 'GA', 'channel': 'HHR', 'sampling_rate': fs, 'starttime': now, 'endtime': end, 'onset': onset, 'station_latitude': -19.0, 'station_longitude': 137.0, # arbitrary (approx location of OA deployment) 'slowness': p * KM_PER_DEG, 'inclination': inc_deg, 'back_azimuth': baz, 'distance': float(distances[i]) } tr = rf.rfstream.RFTrace(data=synth_signal.copy(), header=header) tr = tr.decimate(int(np.round(fs / ds)), no_filter=True) traces.append(tr) # end for stream = rf.RFStream(traces) return stream, arrivals
def main(): """Main entry function for RF picking tool. """ infile = filedialog.askopenfilename(initialdir=".", title="Select RF file", filetypes=(("h5 files", "*.h5"), )) output_folder = filedialog.askdirectory( initialdir=os.path.split(infile)[0], title='Select output folder') if not os.path.isdir(output_folder): log.info("Creating output folder {}".format(output_folder)) os.makedirs(output_folder, exist_ok=True) # end if log.info("Output files will be emitted to {}".format(output_folder)) log.info("Loading %s", infile) data_all = rf_util.read_h5_rf(infile) data_dict = rf_util.rf_to_dict(data_all) stations = sorted(list(data_dict.keys())) # Assuming same rotation type for all RFs. This is consistent with the existing workflow. rf_type = data_all[0].stats.rotation for st in stations: station_db = data_dict[st] # Choose RF channel channel = rf_util.choose_rf_source_channel(rf_type, station_db) channel_data = station_db[channel] # Check assumption for tr in channel_data: assert tr.stats.rotation == rf_type, 'Mismatching RF rotation type' # Label and filter quality rf_util.label_rf_quality_simple_amplitude(rf_type, channel_data) rf_stream = rf.RFStream([ tr for tr in channel_data if tr.stats.predicted_quality == 'a' ]).sort(['back_azimuth']) if not rf_stream: log.info("No data survived filtering for %s, skipping", st) continue # Plot RF stack of primary component fig = rf_plot_utils.plot_rf_stack(rf_stream) fig.set_size_inches(8, 9) fig.suptitle("Channel {}".format(rf_stream[0].stats.channel)) ax0 = fig.axes[0] # Make sure we draw once first before capturing blit background fig.canvas.draw() # Disallow resizing to avoid having to handle blitting with resized window. win = fig.canvas.window() win.setFixedSize(win.size()) blit_background = fig.canvas.copy_from_bbox(ax0.bbox) mask = np.array([False] * len(rf_stream)) rect_select = RectangleSelector(ax0, lambda e0, e1: on_select(e0, e1, mask), useblit=True, rectprops=dict(fill=False, edgecolor='red')) cid = fig.canvas.mpl_connect( 'button_release_event', lambda e: on_release(e, ax0, mask, blit_background, rect_select)) plt.show() fig.canvas.mpl_disconnect(cid) rect_select = None selected_event_ids = [ tr.stats.event_id for i, tr in enumerate(rf_stream) if mask[i] ] log.info("{} streams selected".format(len(selected_event_ids))) log.info("Selected event ids:") log.info(selected_event_ids) network = rf_stream[0].stats.network outfile = os.path.join( output_folder, '.'.join([network, st, channel]) + '_event_mask.txt') log.info("Writing mask to file {}".format(outfile)) if os.path.exists(outfile): log.warning("Overwriting existing file {} !".format(outfile)) with open(outfile, 'w') as f: f.write('\n'.join(selected_event_ids))
def compute_rf_quality_metrics(station_id, station_stream3c, similarity_eps): """Top level function for adding quality metrics to trace metadata. :param station_id: Station ID :type station_id: str :param station_stream3c: 3-channel stream :type station_stream3c: list(rf.RFStream) with 3 components :param similarity_eps: Distance threshold used for DBSCAN clustering :type similarity_eps: float :return: Triplet of RF streams with Z, R or Q, and T components with populated quality metrics. Otherwise return None in case of failure. """ logger = logging.getLogger(__name__) # Filter out traces with NaNs - simplifies downstream code so that can don't have to worry about NaNs. # We use the fact that traces are bundled into 3-channel triplets here to discard all or none of the related # channels for an event. nonan_streams = [] for stream in station_stream3c: skip_stream = False for tr in stream: if tr.stats.type == 'rf' and np.any(np.isnan(tr.data)): logger.warning( "NaN data found in station {} trace\n{}\n, skipping!". format(station_id, tr)) skip_stream = True break # end for if skip_stream: continue nonan_streams.append(stream) # end for if len(nonan_streams) < len(station_stream3c): num_supplied = len(station_stream3c) num_discarded = num_supplied - len(nonan_streams) logger.info( "Discarded {}/{} events from station {} due to NaNs in at least one channel" .format(num_discarded, num_supplied, station_id)) # end if # Early exit if nothing left if not nonan_streams: logger.warning( "nonan_streams empty after filtering out nan traces! {}. Skipping station {}" .format(nonan_streams, station_id)) return None # end if # Flatten the traces into a single RFStream for subsequent processing rf_streams = rf.RFStream([ tr for stream in nonan_streams for tr in stream if tr.stats.type == 'rf' ]) # Subsequent functions process the data in bulk square matrices, so it is essential all traces are the same length. # If not, processing will fail due to incompatible data structure. So here we filter out traces that do not have # the expected length. Expected length is assumed to be the most common length amongst all the traces. num_traces_before = len(rf_streams) all_trace_lens = np.array([len(tr) for tr in rf_streams]) expected_len, _ = stats.mode(all_trace_lens, axis=None) expected_len = expected_len[0] if expected_len <= 1: logger.warning( "Cannot compute quality metrics on trace length {} <= 1! Skipping station {}" .format(expected_len, station_id)) return None # end if keep_traces = [] for tr in rf_streams: if len(tr) != expected_len: logger.error( "Trace {} of station {} has inconsistent sample length {} (expected {}), discarding!" .format(tr, station_id, len(tr), expected_len)) else: keep_traces.append(tr) # end if # end for streams = rf.RFStream(keep_traces) num_traces_after = len(streams) if num_traces_after < num_traces_before: num_discarded = num_traces_before - num_traces_after logger.warning( "Discarded {}/{} traces due to inconsistent trace length".format( num_discarded, num_traces_before)) # end if # Extract RF type, the primary polarized component and transverse component (ignore source stream) rf_type, p_stream, t_stream, z_stream = get_rf_stream_components(streams) if rf_type is None: logger.error( "Unrecognized RF type for station {}. File might not be RF file!". format(station_id)) return None # end if # Note that we only compute quality metrics on the p_stream. The filtering of t_stream traces should match # the filtering of p_stream traces, so t_stream does not need independent metrics. # Compute S/N ratios for primary component RFs rf_util.compute_rf_snr(p_stream) # Compute spectral entropy for primary component RFs sp_entropy = spectral_entropy(p_stream) for i, tr in enumerate(p_stream): md_dict = {'entropy': sp_entropy[i]} tr.stats.update(md_dict) # end for # Compute log10 of amplitude metrics, as these are more useful than straight amplitudes for quality classifier for tr in p_stream: tr.stats['log10_amp_max'] = np.log10(tr.stats['amp_max']) tr.stats['log10_amp_rms'] = np.log10(tr.stats['amp_rms']) tr.stats['log10_z_amp_max'] = np.log10(tr.stats['z_amp_max']) tr.stats['log10_z_amp_rms'] = np.log10(tr.stats['z_amp_rms']) # end for # Define time windows relative to onset for computing statistical ratios EVENT_SIGNAL_WINDOW = (-5.0, 25.0) NOISE_SIGNAL_WINDOW = (None, -5.0) event_signal = p_stream.copy().slice2(*EVENT_SIGNAL_WINDOW, reftime='onset').taper( 0.5, max_length=1.0) noise_signal = p_stream.copy().slice2(*NOISE_SIGNAL_WINDOW, reftime='onset').taper( 0.5, max_length=1.0) rf_util.compute_extra_rf_stats(event_signal) rf_util.compute_extra_rf_stats(noise_signal) for _i, _tr in enumerate(p_stream): _tr.stats['delta_mean_log10_cplx_amp'] = ( event_signal[_i].stats.mean_log10_cplx_amp - noise_signal[_i].stats.mean_log10_cplx_amp) _tr.stats['delta_log10_amp_20pc'] = ( event_signal[_i].stats.log10_amp_20pc - noise_signal[_i].stats.log10_amp_20pc) _tr.stats['delta_log10_amp_80pc'] = ( event_signal[_i].stats.log10_amp_80pc - noise_signal[_i].stats.log10_amp_80pc) _tr.stats['delta_log10_rms_amp'] = event_signal[ _i].stats.log10_rms_amp - noise_signal[_i].stats.log10_rms_amp # end for # Compute ratios of spectral histogram statistics noise_data = np.array([tr.data for tr in noise_signal]) event_data = np.array([tr.data for tr in event_signal]) noise_bins, noise_power = signal.welch(noise_data, detrend='linear') event_bins, event_power = signal.welch(event_data, detrend='linear') # Compute moments of the frequency distribution. Only use lower frequency bands up to 1/4 Nyquist. noise_bins = noise_bins[0:32] noise_power = noise_power[:, 0:32] event_bins = event_bins[0:32] event_power = event_power[:, 0:32] noise_m0 = np.sum(noise_power, axis=1) event_m0 = np.sum(event_power, axis=1) spectral_m0_ratio = np.log10(event_m0 / noise_m0) noise_m1 = np.sum(noise_power * noise_bins, axis=1) event_m1 = np.sum(event_power * event_bins, axis=1) spectral_m1_ratio = np.log10(event_m1 / noise_m1) noise_m2 = np.sum(noise_power * noise_bins**2, axis=1) event_m2 = np.sum(event_power * event_bins**2, axis=1) spectral_m2_ratio = np.log10(event_m2 / noise_m2) for i, tr in enumerate(p_stream): md_dict = { 'm0_delta': event_m0[i] - noise_m0[i], 'm1_delta': event_m1[i] - noise_m1[i], 'm2_delta': event_m2[i] - noise_m2[i], 'm0_ratio': spectral_m0_ratio[i], 'm1_ratio': spectral_m1_ratio[i], 'm2_ratio': spectral_m2_ratio[i] } tr.stats.update(md_dict) # end for # Compute coherence metric within targeted normalized frequency band. # Note that settings here are relative to the sampling rate. If the sampling # rate changes and you want the same absolute frequency range to be used for # coherence, then these settings need to be updated. fn_low = 0.15 fn_high = 0.3 max_coherence = compute_max_coherence(p_stream, fn_low, fn_high) for i, tr in enumerate(p_stream): md_dict = {'max_coherence': max_coherence[i]} tr.stats.update(md_dict) # end for # TODO: Compute phase weighting vector per station per 2D (back_azimuth, distance) bin # Perform clustering for all traces in a station, and assign group IDs. # This will be super expensive when there are a lot of events, as the distance calculation grows as N^2. clustering_stream = p_stream.copy() clustering_stream = clustering_stream.trim2(-5.0, 25.0, 'onset') swipe = np.array([tr.data for tr in clustering_stream]) if swipe.shape[0] > 1: ind = rf_group_by_similarity(swipe, similarity_eps) else: ind = np.array([0]) # end if num_groups = np.amax(ind) logger.info("Station {}: detected {} clusters".format( station_id, num_groups)) # Apply group for i, tr in enumerate(p_stream): md_dict = {'rf_group': ind[i] if ind[i] >= 0 else None} tr.stats.update(md_dict) # end for # TODO: Research techniques for grouping waveforms using singular value decomposition (SVD), possibly of # the complex waveform (from Hilbert transform) to determine the primary phase and amplitude components. # High similarity to the strongest eigenvectors might indicate waves in the primary group (group 0 in DBSCAN) # without the N^2 computational cost of DBSCAN. return (z_stream, p_stream, t_stream)
def main(): ''' @package extract_rf This code contains different approaches to extract RFs from H5 file in stacked form. Output is prepared for trans-dimensional inversion in ASCII format Currently there are two methods of stacking 1. rf stacked by similarity 2. all rf stacked Note the parameters of gaussian pulse and its width where Value of "a" | Frequency (hz) at which G(f) = 0.1 | Approximate Pulse Width (s) 10 4.8 0.50 5 2.4 0.75 2.5 1.2 1.00 1.25 0.6 1.50 1.0 0.5 1.67 (5/3) 0.625 0.3 2.10 0.5 0.24 2.36 0.4 0.2 2.64 0.2 0.1 3.73 ''' print("Reading the input file...") # Input file stream = rf.read_rf('/g/data/ha3/am7399/shared/OA-ZRT-R-cleaned.h5', 'H5') print("Reading is done...") net = stream[0].stats.network # output directory out_dir = net + "-INV/" # inversion programs use 1Hz pulse width, therefore higher corner should be not lower than that filter_type = 'bandpass' freqmin = 0.1 freqmax = 1.0 # Trimming window tstart = -5. tend = 40. station_list = [] group_list = [] # here we collect station names for i in xrange(len(stream)): station_list.append(stream[i].stats.station) group_list.append(stream[i].stats.rf_group) group_list = np.array(group_list) station_list = np.array(station_list) # we need to find the largest number of groups for each uniqe station gidx = np.argsort(-group_list) group_list = group_list[gidx] station_list = station_list[gidx] # unique will return first occurence of the station sorted in descending order of group number station_list, idx = np.unique(station_list, return_index=True) group_list = group_list[idx] print("Gathered ", len(station_list), " stations") for i in xrange(station_list.shape[0]): print(station_list[i], group_list[i]) estat = '' sstat = [] # while station_list[estat==station_list].shape[0]==0: # estat=input("Station to extract: ") estat = input("Station to extract [All]: ") if station_list[estat == station_list].shape[0] == 0: sstat = station_list plot = False else: sstat.append(estat) plot = True for estat in sstat: station = stream.select(station=estat, component='R').moveout() # we use a zero-phase-shift band-pass filter using 2 corners. This is done in two runs forward and backward, so we end up with 4 corners de facto. # print(station[0].stats.delta,station[0].stats.npts) if len(station) > 1: for trace in station: # preserve original amplitude to rescale later to preserve proportions relative to source if trace.stats.amax > 0: amp_max = trace.stats.amax print("*") else: amp_max = np.max(trace.data) trace.taper(0.01) # 6.25 is the frequency hardwired into the inversion program trace = trace.filter(filter_type, freqmin=freqmin, freqmax=freqmax, corners=2, zerophase=True).interpolate(6.25) # trace=trace.interpolate(6.25) trace.data = trace.data * (amp_max / np.amax(trace.data)) # end for # first we get stacks - normal and phase weighted copy_st = station.copy() stacked = station.copy().stack() stacked.trim2(tstart, tend, 'onset') time_s = stacked[0].stats.delta * np.array( list(xrange(stacked[0].stats.npts))) + tstart amp_max = np.max(stacked[0].data) phase_w = phase_weights(station) ph_weighted = copy_st.stack() ph_weighted[0].data = ph_weighted[0].data * phase_w # Note - weighting changes the real amplitude and it must be rescaled back to origin ph_weighted.trim2(tstart, tend, 'onset') time_p = ph_weighted[0].stats.delta * np.array( list(xrange(ph_weighted[0].stats.npts))) + tstart zero = ph_weighted[0].data[time_p < 0.] idx = np.max(np.where(zero <= 0.)[0]) ph_weighted[0].data[:idx + 1] = 0. # ph_weighted.filter(filter_type, freqmin=freqmin, freqmax=freqmax,corners=1,zerophase=True) ph_weighted[0].data = ph_weighted[0].data * ( amp_max / np.max(ph_weighted[0].data)) # then we take the same for each similarity groups groups = find_rf_group_ids(station) max_grp = np.max(groups) print("Max grp ", max_grp) # however first we define general plotting scheme and plot previous results fig = plt.figure(figsize=(11.69, 8.27)) columns = 2 rows = np.int(np.ceil(float(max_grp) / float(columns))) + 1 grid = gridspec.GridSpec(columns, rows, wspace=0.2, hspace=0.2) ax = plt.subplot(grid[0]) ax.plot(time_s, stacked[0].data) ax.set_title(estat + ' Stacked') ax = plt.subplot(grid[1]) ax.plot(time_p, ph_weighted[0].data) ax.set_title('Phase weighted stack') frame = 2 for i in xrange(max_grp): grp_stream = rf.RFStream() for trace in station: if trace.stats.rf_group == i: grp_stream.append(trace) print("Group: ", i, " number of records: ", len(grp_stream)) grp_stacked = grp_stream.copy().stack() grp_stck_max = np.max(np.abs(grp_stacked.copy()[0].data)) # grp_stck_max=amp_max phase_w = phase_weights(grp_stream) grp_stacked_wght = grp_stacked.copy()[0].data * phase_w grp_stacked_wght = grp_stacked_wght * ( grp_stck_max / np.max(np.abs(grp_stacked_wght))) grp_time = grp_stacked[0].stats.delta * np.array( list(xrange(grp_stacked[0].stats.npts))) + tstart ax = plt.subplot(grid[i + frame]) ax.plot(grp_time, grp_stacked_wght) ax.set_title('Group ' + str(i)) # end for if not os.path.exists(out_dir): os.makedirs(out_dir) os.makedirs(out_dir + 'PDF') # end if if plot: plt.show() else: fig.savefig(out_dir + 'PDF/' + net + '-' + estat + '-rf2-ph_weighted.pdf', format='PDF') plt.close('all') # end if with open(out_dir + net + '-' + estat + '-rf2-ph_weighted.dat', 'w') as text_file: for i in xrange(time_p.shape[0]): text_file.write( str(time_p[i]) + ' ' + str(ph_weighted[0].data[i]) + '\n') text_file.close()
# however first we define general plotting scheme and plot previous results fig = plt.figure(figsize=(11.69, 8.27)) columns = 2 rows = np.int(np.ceil(float(max_grp) / float(columns))) + 1 grid = gridspec.GridSpec(columns, rows, wspace=0.2, hspace=0.2) ax = plt.subplot(grid[0]) ax.plot(time_s, stacked[0].data) ax.set_title(estat + ' Stacked') ax = plt.subplot(grid[1]) ax.plot(time_p, ph_weighted[0].data) ax.set_title('Phase weighted stack') frame = 2 for i in xrange(max_grp): grp_stream = rf.RFStream() for trace in station: if trace.stats.rf_group == i: grp_stream.append(trace) print "Group: ", i, " number of records: ", len(grp_stream) grp_stacked = grp_stream.copy().stack() grp_stck_max = np.max(np.abs(grp_stacked.copy()[0].data)) # grp_stck_max=amp_max phase_w = phase_weights(grp_stream) grp_stacked_wght = grp_stacked.copy()[0].data * phase_w grp_stacked_wght = grp_stacked_wght * ( grp_stck_max / np.max(np.abs(grp_stacked_wght))) grp_time = grp_stacked[0].stats.delta * np.array( list(xrange(grp_stacked[0].stats.npts))) + tstart
def main(input_file, output_file, event_mask_folder='', apply_amplitude_filter=False, apply_similarity_filter=False, hk_weights=DEFAULT_HK_WEIGHTS, hk_solution_labels=DEFAULT_HK_SOLN_LABEL, hk_hpf_freq=None, hk_vp=DEFAULT_Vp, save_hk_solution=False): # docstring redundant since CLI options are already documented. log.setLevel(logging.INFO) # Read source file log.info("Loading input file {}".format(input_file)) data_all = rf_util.read_h5_rf(input_file) # Convert to hierarchical dictionary format data_dict = rf_util.rf_to_dict(data_all) event_mask_dict = None if event_mask_folder and os.path.isdir(event_mask_folder): log.info( "Applying event mask from folder {}".format(event_mask_folder)) mask_files = os.listdir(event_mask_folder) mask_files = [ f for f in mask_files if os.path.isfile(os.path.join(event_mask_folder, f)) ] pattern = r"([A-Za-z0-9\.]{5,})_event_mask\.txt" pattern = re.compile(pattern) event_mask_dict = dict() for f in mask_files: match_result = pattern.match(f) if not match_result: continue code = match_result[1] with open(os.path.join(event_mask_folder, f), 'r') as _f: events = _f.readlines() events = set([e.strip() for e in events]) event_mask_dict[code] = events # end with # end for # end if if event_mask_dict: log.info("Loaded {} event masks".format(len(event_mask_dict))) # end if # Plot all data to PDF file fixed_stack_height_inches = 0.8 y_pad_inches = 1.6 total_trace_height_inches = paper_size_A4[ 1] - fixed_stack_height_inches - y_pad_inches max_trace_height = 0.2 log.setLevel(logging.WARNING) with PdfPages(output_file) as pdf: # Would like to use Tex, but lack desktop PC privileges to update packages to what is required plt.rc('text', usetex=False) pbar = tqdm.tqdm(total=len(data_dict)) network = data_dict.network rf_type = data_dict.rotation hk_soln = dict() station_coords = dict() for st in sorted(data_dict.keys()): station_db = data_dict[st] pbar.update() pbar.set_description("{}.{}".format(network, st)) # Choose RF channel channel = rf_util.choose_rf_source_channel(rf_type, station_db) channel_data = station_db[channel] if not channel_data: continue # end if full_code = '.'.join([network, st, channel]) t_channel = list(channel) t_channel[-1] = 'T' t_channel = ''.join(t_channel) rf_stream = rf.RFStream(channel_data).sort(['back_azimuth']) if event_mask_dict and full_code in event_mask_dict: # Select events from external source event_mask = event_mask_dict[full_code] rf_stream = rf.RFStream([ tr for tr in rf_stream if tr.stats.event_id in event_mask ]).sort(['back_azimuth']) # end if if apply_amplitude_filter: # Label and filter quality rf_util.label_rf_quality_simple_amplitude(rf_type, rf_stream) rf_stream = rf.RFStream([ tr for tr in rf_stream if tr.stats.predicted_quality == 'a' ]).sort(['back_azimuth']) # end if if not rf_stream: continue if apply_similarity_filter and len(rf_stream) >= 3: rf_stream = rf_util.filter_crosscorr_coeff(rf_stream) # end if if not rf_stream: continue # Find matching T-component data events = [tr.stats.event_id for tr in rf_stream] transverse_data = station_db[t_channel] t_stream = rf.RFStream([ tr for tr in transverse_data if tr.stats.event_id in events ]).sort(['back_azimuth']) # Plot pinwheel of primary and transverse components fig = rf_plot_utils.plot_rf_wheel([rf_stream, t_stream], fontscaling=0.8) fig.set_size_inches(*paper_size_A4) plt.tight_layout() plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15) ax = fig.gca() fig.text(-0.32, -0.32, "\n".join(rf_stream[0].stats.processing), fontsize=6, transform=ax.transAxes) pdf.savefig(dpi=300, papertype='a4', orientation='portrait') plt.close() num_traces = len(rf_stream) assert len(t_stream) == num_traces or not t_stream # Plot RF stack of primary component trace_ht = min(total_trace_height_inches / num_traces, max_trace_height) fig = rf_plot_utils.plot_rf_stack( rf_stream, trace_height=trace_ht, stack_height=fixed_stack_height_inches, fig_width=paper_size_A4[0]) fig.suptitle("Channel {}".format(rf_stream[0].stats.channel)) # Customize layout to pack to top of page while preserving RF plots aspect ratios _rf_layout_A4(fig) # Save to new page in file pdf.savefig(dpi=300, papertype='a4', orientation='portrait') plt.close() # Plot RF stack of transverse component if t_stream: fig = rf_plot_utils.plot_rf_stack( t_stream, trace_height=trace_ht, stack_height=fixed_stack_height_inches, fig_width=paper_size_A4[0]) fig.suptitle("Channel {}".format(t_stream[0].stats.channel)) # Customize layout to pack to top of page while preserving RF plots aspect ratios _rf_layout_A4(fig) # Save to new page in file pdf.savefig(dpi=300, papertype='a4', orientation='portrait') plt.close() # end if # Plot H-k stack using primary RF component fig, maxima = _produce_hk_stacking(rf_stream, weighting=hk_weights, labelling=hk_solution_labels, V_p=hk_vp) if save_hk_solution and hk_hpf_freq is None: hk_soln[st] = maxima station_coords[st] = (channel_data[0].stats.station_latitude, channel_data[0].stats.station_longitude) # end if paper_landscape = (paper_size_A4[1], paper_size_A4[0]) fig.set_size_inches(*paper_landscape) # plt.tight_layout() # plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15) pdf.savefig(dpi=300, papertype='a4', orientation='landscape') plt.close() if hk_hpf_freq is not None: # Repeat H-k stack with high pass filtering fig, maxima = _produce_hk_stacking( rf_stream, weighting=hk_weights, labelling=hk_solution_labels, V_p=hk_vp, filter_options={ 'type': 'highpass', 'freq': hk_hpf_freq, 'corners': 1, 'zerophase': True }) if save_hk_solution: hk_soln[st] = maxima station_coords[st] = ( channel_data[0].stats.station_latitude, channel_data[0].stats.station_longitude) # end if fig.set_size_inches(*paper_landscape) pdf.savefig(dpi=300, papertype='a4', orientation='landscape') plt.close() # end if # end for pbar.close() # end with # Save H-k solutions to CSV file if hk_soln: assert len(hk_soln) == len(station_coords) # Sort H-k solutions by depth from low to high update_dict = {} for st, hks in hk_soln.items(): sorted_hks = sorted([tuple(hk) for hk in hks]) update_dict[st] = np.array( list(station_coords[st]) + [i for hk in sorted_hks for i in hk]) # end for hk_soln.update(update_dict) df = pd.DataFrame.from_dict(hk_soln, orient='index') colnames = [('H{}'.format(i), 'k{}'.format(i)) for i in range((len(df.columns) - 2) // 2)] colnames = ['Latitude', 'Longitude'] + list( itertools.chain.from_iterable(colnames)) df.columns = colnames csv_fname, _ = os.path.splitext(output_file) csv_fname += '.csv' df.index.name = 'Station' df.to_csv(csv_fname)
def run_batch(transect_file, rf_waveform_file, fed_db_file, amplitude_filter=False, similarity_filter=False, stack_scale=0.4, width=30.0, spacing=2.0, max_depth=200.0, channel='R', output_folder='', colormap='seismic', annotators=None): """Run CCP generation in batch mode along a series of transects. :param transect_file: File containing specification of network and station locations of ends of transects :type transect_file: str or Path :param rf_waveform_file: HDF5 file of QA'd receiver functions for the network matching the transect file :type rf_waveform_file: str or Path :param fed_db_file: Name of file with which to initialize FederatedASDFDataBase :type fed_db_file: str or Path :param amplitude_filter: Whether to use amplitude-based filtering of waveforms beform plotting. :type amplitude_filter: bool :param similarity_filter: Whether to use RF waveform similarity filtering of waveforms beform plotting. :type similarity_filter: bool :param stack_scale: Max value to represent on color scale of CCP plot :type stack_scale: float :param width: Width of transect (km) :type width: float :param spacing: Discretization size (km) for RF ray sampling :type spacing: float :param max_depth: Maximum depth of slice below the transect line (km) :type max_depth: float :param channel: Channel component ID to source for the RF amplitude :type channel: str length 1 :return: None """ print("Reading HDF5 file...") rf_stream = rf.read_rf(rf_waveform_file, 'H5').select(component=channel) rf_type = rf_stream[0].stats.rotation if amplitude_filter: # Label and filter quality rf_util.label_rf_quality_simple_amplitude(rf_type, rf_stream) rf_stream = rf.RFStream( [tr for tr in rf_stream if tr.stats.predicted_quality == 'a']) # end if # For similarity filtering, similarity filtering must applied to one station at a time. if similarity_filter: data_dict = rf_util.rf_to_dict(rf_stream) rf_stream = rf.RFStream() for _sta, ch_dict in data_dict: for _cha, ch_traces in ch_dict.items(): if len(ch_traces) >= 3: # Use short time window that cuts off by 10 sec, since we're only interested in Ps phase here. filtered_traces = rf_util.filter_crosscorr_coeff( rf.RFStream(ch_traces), time_window=(-2, 10), apply_moveout=True) rf_stream += filtered_traces else: rf_stream += rf.RFStream(ch_traces) # end if # end for # end for # end if spectral_filter = { 'type': 'highpass', 'freq': 0.2, 'corners': 1, 'zerophase': True } if spectral_filter is not None: rf_stream.filter(**spectral_filter) # end if db = FederatedASDFDataSet.FederatedASDFDataSet(fed_db_file) sta_coords = db.unique_coordinates if output_folder and not os.path.isdir(output_folder): assert not os.path.isfile(output_folder) os.makedirs(output_folder, exist_ok=True) # end if with open(transect_file, 'r') as f: net = f.readline().strip() for transect in f.readlines(): if not transect.strip(): continue sta_start, sta_end = transect.split(',') sta_start = sta_start.strip() sta_end = sta_end.strip() start = '.'.join([net, sta_start]) end = '.'.join([net, sta_end]) start = np.array(sta_coords[start]) end = np.array(sta_coords[end]) # Offset ends slightly to make sure we don't lose end stations due to truncation error. # Note: for simplicity this treats lat/lon like cartesian coords, but this is approximate # and will break down near poles, for long transects, or if transect crosses the antimeridian. dirn = (end - start) dirn = dirn / np.linalg.norm(dirn) start -= LEAD_INOUT_DIST_KM * dirn / KM_PER_DEG end += LEAD_INOUT_DIST_KM * dirn / KM_PER_DEG start_latlon = (start[1], start[0]) end_latlon = (end[1], end[0]) title = 'Network {} CCP R-stacking (profile {}-{})'.format( net, sta_start, sta_end) hf_main, hf_map, metadata = run(rf_stream, start_latlon, end_latlon, width, spacing, max_depth, channel, stacked_scale=stack_scale, title=title, colormap=colormap, background_model='ak135_60') metadata['transect_start'] = start metadata['transect_end'] = end metadata['transect_dirn'] = dirn if annotators is not None: for ant in annotators: ant(hf_main, metadata) # end for # end if outfile_base = '{}-ZRT-R_CCP_stack_{}-{}_{}km_spacing'.format( net, sta_start, sta_end, spacing) outfile = outfile_base + '.pdf' outfile_map = outfile_base + '_MAP.pdf' outfile = os.path.join(output_folder, outfile) outfile_map = os.path.join(output_folder, outfile_map) if hf_main is not None: hf_main.savefig(outfile, dpi=300) plt.close(hf_main) # endif if hf_map is not None: hf_map.savefig(outfile_map, dpi=300) plt.close(hf_map)
def main(input_file, output_file, event_mask_folder='', apply_amplitude_filter=False, apply_similarity_filter=False, hk_weights=DEFAULT_HK_WEIGHTS): # Read source file data_all = rf_util.read_h5_rf(input_file) # Convert to hierarchical dictionary format data_dict = rf_util.rf_to_dict(data_all) event_mask_dict = None if event_mask_folder and os.path.isdir(event_mask_folder): mask_files = os.listdir(event_mask_folder) mask_files = [ f for f in mask_files if os.path.isfile(os.path.join(event_mask_folder, f)) ] # print(mask_files) pattern = r"([A-Za-z0-9\.]{5,})_event_mask\.txt" pattern = re.compile(pattern) event_mask_dict = dict() for f in mask_files: match_result = pattern.match(f) if not match_result: continue code = match_result[1] # print(code) with open(os.path.join(event_mask_folder, f), 'r') as f: events = f.readlines() events = set([e.strip() for e in events]) event_mask_dict[code] = events # end with # end for # end if if event_mask_dict: print("Loaded {} event masks".format(len(event_mask_dict))) # end if # Plot all data to PDF file fixed_stack_height_inches = 0.8 y_pad_inches = 1.6 total_trace_height_inches = paper_size_A4[ 1] - fixed_stack_height_inches - y_pad_inches max_trace_height = 0.2 with PdfPages(output_file) as pdf: # Would like to use Tex, but lack desktop PC privileges to update packages to what is required plt.rc('text', usetex=False) pbar = tqdm.tqdm(total=len(data_dict)) network = data_dict.network rf_type = data_dict.rotation for st in sorted(data_dict.keys()): station_db = data_dict[st] pbar.update() pbar.set_description("{}.{}".format(network, st)) # Choose RF channel channel = rf_util.choose_rf_source_channel(rf_type, station_db) channel_data = station_db[channel] full_code = '.'.join([network, st, channel]) t_channel = list(channel) t_channel[-1] = 'T' t_channel = ''.join(t_channel) rf_stream = rf.RFStream(channel_data).sort(['back_azimuth']) if event_mask_dict and full_code in event_mask_dict: # Select events from external source event_mask = event_mask_dict[full_code] rf_stream = rf.RFStream([ tr for tr in rf_stream if tr.stats.event_id in event_mask ]).sort(['back_azimuth']) # end if if apply_amplitude_filter: # Label and filter quality rf_util.label_rf_quality_simple_amplitude(rf_type, rf_stream) rf_stream = rf.RFStream([ tr for tr in rf_stream if tr.stats.predicted_quality == 'a' ]).sort(['back_azimuth']) # end if if apply_similarity_filter: rf_stream = rf_util.filter_crosscorr_coeff(rf_stream) # end if if not rf_stream: continue # Find matching T-component data events = [tr.stats.event_id for tr in rf_stream] transverse_data = station_db[t_channel] t_stream = rf.RFStream([ tr for tr in transverse_data if tr.stats.event_id in events ]).sort(['back_azimuth']) if not t_stream: continue # Plot pinwheel of primary and transverse components fig = rf_plot_utils.plot_rf_wheel([rf_stream, t_stream], fontscaling=0.8) fig.set_size_inches(*paper_size_A4) plt.tight_layout() plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15) ax = fig.gca() fig.text(-0.32, -0.32, "\n".join(rf_stream[0].stats.processing), fontsize=6, transform=ax.transAxes) pdf.savefig(dpi=300, papertype='a4', orientation='portrait') plt.close() num_traces = len(rf_stream) assert len(t_stream) == num_traces # Plot RF stack of primary component trace_ht = min(total_trace_height_inches / num_traces, max_trace_height) fig = rf_plot_utils.plot_rf_stack( rf_stream, trace_height=trace_ht, stack_height=fixed_stack_height_inches, fig_width=paper_size_A4[0]) fig.suptitle("Channel {}".format(rf_stream[0].stats.channel)) # Customize layout to pack to top of page while preserving RF plots aspect ratios _rf_layout_A4(fig) # Save to new page in file pdf.savefig(dpi=300, papertype='a4', orientation='portrait') plt.close() # Plot RF stack of transverse component fig = rf_plot_utils.plot_rf_stack( t_stream, trace_height=trace_ht, stack_height=fixed_stack_height_inches, fig_width=paper_size_A4[0]) fig.suptitle("Channel {}".format(t_stream[0].stats.channel)) # Customize layout to pack to top of page while preserving RF plots aspect ratios _rf_layout_A4(fig) # Save to new page in file pdf.savefig(dpi=300, papertype='a4', orientation='portrait') plt.close() # Plot H-k stack using primary RF component fig = _produce_hk_stacking(rf_stream, weighting=hk_weights) paper_landscape = (paper_size_A4[1], paper_size_A4[0]) fig.set_size_inches(*paper_landscape) # plt.tight_layout() # plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15) pdf.savefig(dpi=300, papertype='a4', orientation='landscape') plt.close() # end for pbar.close()
def test_rf_integration(): import rf # Synthesize known RF. inclinations = np.array([20.0, 15.0, 10.0]) distances = np.array([60.0, 70.0, 80.0]) amplitudes = [1, 0.4, 0.2] rf_radial, F_s = _generate_rf_radial(inclinations, distances, amplitudes) time_shift = rf_radial[0].stats.onset - rf_radial[0].stats.starttime times = rf_radial[0].times() - time_shift # Generate synthetic vertical seismic trace. g = _generate_synthetic_source(times) # Collect test stream data with R and Z components into a RFStream object. rf_stream = rf.RFStream() np.random.seed(20190925) g_noise_scale = 5.0e-3 * np.abs(g).max() g_funcs = [] f_funcs = [] for i, tr in enumerate(rf_radial): # Assign trackable event id tr.stats.event_id = i # Add some random noise to the source signal g_noisy = g + np.random.normal(scale=g_noise_scale, size=g.shape) g_funcs.append(g_noisy) # Create source RFTrace src_tr = tr.copy() src_tr.data = g_noisy.copy() src_tr.stats.channel = 'HHZ' # Synthesize response signal and add some noise f = _generate_radial_from_src(tr.data, g_noisy, times) f_noise_scale = 5.0e-3 * np.abs(f).max() f_noisy = f + np.random.normal(scale=f_noise_scale, size=f.shape) f_funcs.append(f_noisy) # Create response RFTrace rsp_tr = tr.copy() rsp_tr.data = f_noisy.copy() rf_stream += src_tr rf_stream += rsp_tr # end for # Use rf library to compute comparative signals using time and freq domain deconvolution rf_freq = rf_stream.copy().rf(method='P', rotate=None, deconvolve='freq').select(component='R') try: rf_time = rf_stream.copy().rf(method='P', rotate=None, deconvolve='time').select(component='R') except NameError: import warnings # If Toeplitz not present on platform, rf may be unable to perform time-domain deconvolution warnings.warn( "Unable to test default time-domain deconvolution from rf library") rf_time = None # end try # Call rf generator on rf.RFStream using our custom deconvolution function rf_iter = rf_stream.copy().rf(method='P', rotate=None, deconvolve='func', func=rf_iter_deconv, normalize=0).select(component='R') # Perform deconv directly and compare with rf_iter to check that rf calls used our custom function. for i, (f, g) in enumerate(zip(f_funcs, g_funcs)): x, _, _, _, fit = iter_deconv_pulsetrain(f, g, F_s, time_shift) assert np.isclose(100.0, fit, rtol=1e-2) # Infer scaling factor due to normalization from max point, and use it to normalize x. norm_factor = np.nanmax(x) / np.nanmax(rf_iter[i].data) x /= norm_factor assert rf_iter[i].stats.event_id == i assert np.allclose(rf_iter[i].data, x, rtol=1e-3, atol=5e-3) # end for # Check that the local maxima of RF peaks found for different techniques all agree. # We expect exact agreement since the test data is very simple. def _local_maxima_mask_1d(arr): return (arr[1:-1] > arr[0:-2]) & (arr[1:-1] > arr[2:]) # end func def _rms(arr): return np.sqrt(np.mean(np.square(arr))) # end func for i, tr in enumerate(rf_iter): d = tr.data # Due to wiggles producing spurious local maxima for certain deconvolution methods (expected), # we only check that the maxima generated using iterative deconv are in common with other # techniques, rather than expect exact matches in the location of all local maxima. expected_mask = _local_maxima_mask_1d(d) & (d[1:-1] > _rms(d)) mask_idx = np.nonzero(expected_mask)[0] d_fd = rf_freq[i].data fd_mask = _local_maxima_mask_1d(d_fd) & (d_fd[1:-1] > _rms(d_fd)) fd_mask_idx = np.nonzero(fd_mask)[0] assert np.all(np.isin(mask_idx, fd_mask_idx)) if rf_time is not None: d_td = rf_time[i].data td_mask = _local_maxima_mask_1d(d_td) & (d_td[1:-1] > _rms(d_td)) td_mask_idx = np.nonzero(td_mask)[0] assert np.all(np.isin(mask_idx, td_mask_idx))