def main(input_file, output_file, event_mask_folder='', apply_amplitude_filter=False, apply_similarity_filter=False, hk_weights=DEFAULT_HK_WEIGHTS): # Read source file data_all = rf_util.read_h5_rf(input_file) # Convert to hierarchical dictionary format data_dict = rf_util.rf_to_dict(data_all) event_mask_dict = None if event_mask_folder and os.path.isdir(event_mask_folder): mask_files = os.listdir(event_mask_folder) mask_files = [ f for f in mask_files if os.path.isfile(os.path.join(event_mask_folder, f)) ] # print(mask_files) pattern = r"([A-Za-z0-9\.]{5,})_event_mask\.txt" pattern = re.compile(pattern) event_mask_dict = dict() for f in mask_files: match_result = pattern.match(f) if not match_result: continue code = match_result[1] # print(code) with open(os.path.join(event_mask_folder, f), 'r') as f: events = f.readlines() events = set([e.strip() for e in events]) event_mask_dict[code] = events # end with # end for # end if if event_mask_dict: print("Loaded {} event masks".format(len(event_mask_dict))) # end if # Plot all data to PDF file fixed_stack_height_inches = 0.8 y_pad_inches = 1.6 total_trace_height_inches = paper_size_A4[ 1] - fixed_stack_height_inches - y_pad_inches max_trace_height = 0.2 with PdfPages(output_file) as pdf: # Would like to use Tex, but lack desktop PC privileges to update packages to what is required plt.rc('text', usetex=False) pbar = tqdm.tqdm(total=len(data_dict)) network = data_dict.network rf_type = data_dict.rotation for st in sorted(data_dict.keys()): station_db = data_dict[st] pbar.update() pbar.set_description("{}.{}".format(network, st)) # Choose RF channel channel = rf_util.choose_rf_source_channel(rf_type, station_db) channel_data = station_db[channel] full_code = '.'.join([network, st, channel]) t_channel = list(channel) t_channel[-1] = 'T' t_channel = ''.join(t_channel) rf_stream = rf.RFStream(channel_data).sort(['back_azimuth']) if event_mask_dict and full_code in event_mask_dict: # Select events from external source event_mask = event_mask_dict[full_code] rf_stream = rf.RFStream([ tr for tr in rf_stream if tr.stats.event_id in event_mask ]).sort(['back_azimuth']) # end if if apply_amplitude_filter: # Label and filter quality rf_util.label_rf_quality_simple_amplitude(rf_type, rf_stream) rf_stream = rf.RFStream([ tr for tr in rf_stream if tr.stats.predicted_quality == 'a' ]).sort(['back_azimuth']) # end if if apply_similarity_filter: rf_stream = rf_util.filter_crosscorr_coeff(rf_stream) # end if if not rf_stream: continue # Find matching T-component data events = [tr.stats.event_id for tr in rf_stream] transverse_data = station_db[t_channel] t_stream = rf.RFStream([ tr for tr in transverse_data if tr.stats.event_id in events ]).sort(['back_azimuth']) if not t_stream: continue # Plot pinwheel of primary and transverse components fig = rf_plot_utils.plot_rf_wheel([rf_stream, t_stream], fontscaling=0.8) fig.set_size_inches(*paper_size_A4) plt.tight_layout() plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15) ax = fig.gca() fig.text(-0.32, -0.32, "\n".join(rf_stream[0].stats.processing), fontsize=6, transform=ax.transAxes) pdf.savefig(dpi=300, papertype='a4', orientation='portrait') plt.close() num_traces = len(rf_stream) assert len(t_stream) == num_traces # Plot RF stack of primary component trace_ht = min(total_trace_height_inches / num_traces, max_trace_height) fig = rf_plot_utils.plot_rf_stack( rf_stream, trace_height=trace_ht, stack_height=fixed_stack_height_inches, fig_width=paper_size_A4[0]) fig.suptitle("Channel {}".format(rf_stream[0].stats.channel)) # Customize layout to pack to top of page while preserving RF plots aspect ratios _rf_layout_A4(fig) # Save to new page in file pdf.savefig(dpi=300, papertype='a4', orientation='portrait') plt.close() # Plot RF stack of transverse component fig = rf_plot_utils.plot_rf_stack( t_stream, trace_height=trace_ht, stack_height=fixed_stack_height_inches, fig_width=paper_size_A4[0]) fig.suptitle("Channel {}".format(t_stream[0].stats.channel)) # Customize layout to pack to top of page while preserving RF plots aspect ratios _rf_layout_A4(fig) # Save to new page in file pdf.savefig(dpi=300, papertype='a4', orientation='portrait') plt.close() # Plot H-k stack using primary RF component fig = _produce_hk_stacking(rf_stream, weighting=hk_weights) paper_landscape = (paper_size_A4[1], paper_size_A4[0]) fig.set_size_inches(*paper_landscape) # plt.tight_layout() # plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15) pdf.savefig(dpi=300, papertype='a4', orientation='landscape') plt.close() # end for pbar.close()
def rf_inversion_export(input_h5_file, output_folder, network_code, component='R', resample_freq=6.25, trim_window=(-5.0, 20.0), moveout=True): """Export receiver function to text format for ingestion into Fortran RF inversion code. :param input_h5_file: Input hdf5 file containing receiver function data :type input_h5_file: str or Path :param output_folder: Folder in which to export text files, one per channel per station. Will be appended with network code. :type output_folder: str or Path :param network_code: Network to which this RF data belongs, used to disambiguate and track folders. :type network_code: str :param component: The channel component to export, defaults to 'R' :type component: str, optional :param resample_freq: Sampling rate (Hz) of the output files, defaults to 6.25 Hz :type resample_freq: float, optional :param trim_window: Time window to export relative to onset, defaults to (-5.0, 20.0). If data needs to be resampled, the samples are anchored to the start of this time window. :type trim_window: tuple, optional :param moveout: Whether to apply moveout correction prior to exporting, defaults to True :type moveout: bool, optional """ # Process for each station: # 1. Load hdf5 file containing RFs # 2. Filter to desired component. # 3. Quality filter to those that meet criteria (Sippl cross-correlation similarity) # 4. Moveout and stack the RFs # 5. Resample (lanczos) and trim RF # 6. Export one file per station in (time, amplitude format) output_folder += "_" + network_code if not os.path.isdir(output_folder): os.makedirs(output_folder, exist_ok=True) # end if data = rf_util.read_h5_rf(input_h5_file) data = data.select(component=component) rf_util.label_rf_quality_simple_amplitude('ZRT', data, snr_cutoff=2.0, rms_amp_cutoff=0.2, max_amp_cutoff=2.0) data = rf.RFStream( [tr for tr in data if tr.stats.predicted_quality == 'a']) data_dict = rf_util.rf_to_dict(data) for sta, ch_dict in data_dict: for cha, ch_traces in ch_dict.items(): similar_traces = rf_util.filter_crosscorr_coeff( rf.RFStream(ch_traces)) if not similar_traces: continue if moveout: similar_traces.moveout() # end if stack = similar_traces.stack() trace = stack[0] exact_start_time = trace.stats.onset + trim_window[0] stack.interpolate(sampling_rate=resample_freq, method='lanczos', a=10, starttime=exact_start_time) stack.trim2(*trim_window, reftime='onset') times = trace.times() - (trace.stats.onset - trace.stats.starttime) # TODO: Remove hardwired scaling factor. # This scaling factor only applies to iterative deconvolution with default Gaussian width # factor of 2.5. Once we upgrade to rf library version >= 0.9.0, we can remove this hardwired # setting and instead have it determined programatically from rf processing metadata stored # in the trace stats structure. # The scaling factor originates in the amplitude attenuation effect of the filtering applied # in iterative deconv, see table at end of this page: # http://eqseis.geosc.psu.edu/~cammon/HTML/RftnDocs/seq01.html # The values in this reference table are derived as the integral of the area under the # Gaussian in the frequency domain. Analytically, this amounts to simply dividing by scaling # factor of a/sqrt(pi), where 'a' here is the Gaussian width used in iterative deconvolution. iterdeconv_scaling = 2.5 / np.sqrt(np.pi) column_data = np.array([times, trace.data / iterdeconv_scaling]).T fname = os.path.join( output_folder, "_".join([network_code, sta, cha]) + "_rf.dat") np.savetxt(fname, column_data, fmt=('%5.2f', '%.8f'))
def main(input_file, output_file, event_mask_folder='', apply_amplitude_filter=False, apply_similarity_filter=False, hk_weights=DEFAULT_HK_WEIGHTS, hk_solution_labels=DEFAULT_HK_SOLN_LABEL, hk_hpf_freq=None, hk_vp=DEFAULT_Vp, save_hk_solution=False): # docstring redundant since CLI options are already documented. log.setLevel(logging.INFO) # Read source file log.info("Loading input file {}".format(input_file)) data_all = rf_util.read_h5_rf(input_file) # Convert to hierarchical dictionary format data_dict = rf_util.rf_to_dict(data_all) event_mask_dict = None if event_mask_folder and os.path.isdir(event_mask_folder): log.info( "Applying event mask from folder {}".format(event_mask_folder)) mask_files = os.listdir(event_mask_folder) mask_files = [ f for f in mask_files if os.path.isfile(os.path.join(event_mask_folder, f)) ] pattern = r"([A-Za-z0-9\.]{5,})_event_mask\.txt" pattern = re.compile(pattern) event_mask_dict = dict() for f in mask_files: match_result = pattern.match(f) if not match_result: continue code = match_result[1] with open(os.path.join(event_mask_folder, f), 'r') as _f: events = _f.readlines() events = set([e.strip() for e in events]) event_mask_dict[code] = events # end with # end for # end if if event_mask_dict: log.info("Loaded {} event masks".format(len(event_mask_dict))) # end if # Plot all data to PDF file fixed_stack_height_inches = 0.8 y_pad_inches = 1.6 total_trace_height_inches = paper_size_A4[ 1] - fixed_stack_height_inches - y_pad_inches max_trace_height = 0.2 log.setLevel(logging.WARNING) with PdfPages(output_file) as pdf: # Would like to use Tex, but lack desktop PC privileges to update packages to what is required plt.rc('text', usetex=False) pbar = tqdm.tqdm(total=len(data_dict)) network = data_dict.network rf_type = data_dict.rotation hk_soln = dict() station_coords = dict() for st in sorted(data_dict.keys()): station_db = data_dict[st] pbar.update() pbar.set_description("{}.{}".format(network, st)) # Choose RF channel channel = rf_util.choose_rf_source_channel(rf_type, station_db) channel_data = station_db[channel] if not channel_data: continue # end if full_code = '.'.join([network, st, channel]) t_channel = list(channel) t_channel[-1] = 'T' t_channel = ''.join(t_channel) rf_stream = rf.RFStream(channel_data).sort(['back_azimuth']) if event_mask_dict and full_code in event_mask_dict: # Select events from external source event_mask = event_mask_dict[full_code] rf_stream = rf.RFStream([ tr for tr in rf_stream if tr.stats.event_id in event_mask ]).sort(['back_azimuth']) # end if if apply_amplitude_filter: # Label and filter quality rf_util.label_rf_quality_simple_amplitude(rf_type, rf_stream) rf_stream = rf.RFStream([ tr for tr in rf_stream if tr.stats.predicted_quality == 'a' ]).sort(['back_azimuth']) # end if if not rf_stream: continue if apply_similarity_filter and len(rf_stream) >= 3: rf_stream = rf_util.filter_crosscorr_coeff(rf_stream) # end if if not rf_stream: continue # Find matching T-component data events = [tr.stats.event_id for tr in rf_stream] transverse_data = station_db[t_channel] t_stream = rf.RFStream([ tr for tr in transverse_data if tr.stats.event_id in events ]).sort(['back_azimuth']) # Plot pinwheel of primary and transverse components fig = rf_plot_utils.plot_rf_wheel([rf_stream, t_stream], fontscaling=0.8) fig.set_size_inches(*paper_size_A4) plt.tight_layout() plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15) ax = fig.gca() fig.text(-0.32, -0.32, "\n".join(rf_stream[0].stats.processing), fontsize=6, transform=ax.transAxes) pdf.savefig(dpi=300, papertype='a4', orientation='portrait') plt.close() num_traces = len(rf_stream) assert len(t_stream) == num_traces or not t_stream # Plot RF stack of primary component trace_ht = min(total_trace_height_inches / num_traces, max_trace_height) fig = rf_plot_utils.plot_rf_stack( rf_stream, trace_height=trace_ht, stack_height=fixed_stack_height_inches, fig_width=paper_size_A4[0]) fig.suptitle("Channel {}".format(rf_stream[0].stats.channel)) # Customize layout to pack to top of page while preserving RF plots aspect ratios _rf_layout_A4(fig) # Save to new page in file pdf.savefig(dpi=300, papertype='a4', orientation='portrait') plt.close() # Plot RF stack of transverse component if t_stream: fig = rf_plot_utils.plot_rf_stack( t_stream, trace_height=trace_ht, stack_height=fixed_stack_height_inches, fig_width=paper_size_A4[0]) fig.suptitle("Channel {}".format(t_stream[0].stats.channel)) # Customize layout to pack to top of page while preserving RF plots aspect ratios _rf_layout_A4(fig) # Save to new page in file pdf.savefig(dpi=300, papertype='a4', orientation='portrait') plt.close() # end if # Plot H-k stack using primary RF component fig, maxima = _produce_hk_stacking(rf_stream, weighting=hk_weights, labelling=hk_solution_labels, V_p=hk_vp) if save_hk_solution and hk_hpf_freq is None: hk_soln[st] = maxima station_coords[st] = (channel_data[0].stats.station_latitude, channel_data[0].stats.station_longitude) # end if paper_landscape = (paper_size_A4[1], paper_size_A4[0]) fig.set_size_inches(*paper_landscape) # plt.tight_layout() # plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15) pdf.savefig(dpi=300, papertype='a4', orientation='landscape') plt.close() if hk_hpf_freq is not None: # Repeat H-k stack with high pass filtering fig, maxima = _produce_hk_stacking( rf_stream, weighting=hk_weights, labelling=hk_solution_labels, V_p=hk_vp, filter_options={ 'type': 'highpass', 'freq': hk_hpf_freq, 'corners': 1, 'zerophase': True }) if save_hk_solution: hk_soln[st] = maxima station_coords[st] = ( channel_data[0].stats.station_latitude, channel_data[0].stats.station_longitude) # end if fig.set_size_inches(*paper_landscape) pdf.savefig(dpi=300, papertype='a4', orientation='landscape') plt.close() # end if # end for pbar.close() # end with # Save H-k solutions to CSV file if hk_soln: assert len(hk_soln) == len(station_coords) # Sort H-k solutions by depth from low to high update_dict = {} for st, hks in hk_soln.items(): sorted_hks = sorted([tuple(hk) for hk in hks]) update_dict[st] = np.array( list(station_coords[st]) + [i for hk in sorted_hks for i in hk]) # end for hk_soln.update(update_dict) df = pd.DataFrame.from_dict(hk_soln, orient='index') colnames = [('H{}'.format(i), 'k{}'.format(i)) for i in range((len(df.columns) - 2) // 2)] colnames = ['Latitude', 'Longitude'] + list( itertools.chain.from_iterable(colnames)) df.columns = colnames csv_fname, _ = os.path.splitext(output_file) csv_fname += '.csv' df.index.name = 'Station' df.to_csv(csv_fname)
def run_batch(transect_file, rf_waveform_file, fed_db_file, amplitude_filter=False, similarity_filter=False, stack_scale=0.4, width=30.0, spacing=2.0, max_depth=200.0, channel='R', output_folder='', colormap='seismic', annotators=None): """Run CCP generation in batch mode along a series of transects. :param transect_file: File containing specification of network and station locations of ends of transects :type transect_file: str or Path :param rf_waveform_file: HDF5 file of QA'd receiver functions for the network matching the transect file :type rf_waveform_file: str or Path :param fed_db_file: Name of file with which to initialize FederatedASDFDataBase :type fed_db_file: str or Path :param amplitude_filter: Whether to use amplitude-based filtering of waveforms beform plotting. :type amplitude_filter: bool :param similarity_filter: Whether to use RF waveform similarity filtering of waveforms beform plotting. :type similarity_filter: bool :param stack_scale: Max value to represent on color scale of CCP plot :type stack_scale: float :param width: Width of transect (km) :type width: float :param spacing: Discretization size (km) for RF ray sampling :type spacing: float :param max_depth: Maximum depth of slice below the transect line (km) :type max_depth: float :param channel: Channel component ID to source for the RF amplitude :type channel: str length 1 :return: None """ print("Reading HDF5 file...") rf_stream = rf.read_rf(rf_waveform_file, 'H5').select(component=channel) rf_type = rf_stream[0].stats.rotation if amplitude_filter: # Label and filter quality rf_util.label_rf_quality_simple_amplitude(rf_type, rf_stream) rf_stream = rf.RFStream( [tr for tr in rf_stream if tr.stats.predicted_quality == 'a']) # end if # For similarity filtering, similarity filtering must applied to one station at a time. if similarity_filter: data_dict = rf_util.rf_to_dict(rf_stream) rf_stream = rf.RFStream() for _sta, ch_dict in data_dict: for _cha, ch_traces in ch_dict.items(): if len(ch_traces) >= 3: # Use short time window that cuts off by 10 sec, since we're only interested in Ps phase here. filtered_traces = rf_util.filter_crosscorr_coeff( rf.RFStream(ch_traces), time_window=(-2, 10), apply_moveout=True) rf_stream += filtered_traces else: rf_stream += rf.RFStream(ch_traces) # end if # end for # end for # end if spectral_filter = { 'type': 'highpass', 'freq': 0.2, 'corners': 1, 'zerophase': True } if spectral_filter is not None: rf_stream.filter(**spectral_filter) # end if db = FederatedASDFDataSet.FederatedASDFDataSet(fed_db_file) sta_coords = db.unique_coordinates if output_folder and not os.path.isdir(output_folder): assert not os.path.isfile(output_folder) os.makedirs(output_folder, exist_ok=True) # end if with open(transect_file, 'r') as f: net = f.readline().strip() for transect in f.readlines(): if not transect.strip(): continue sta_start, sta_end = transect.split(',') sta_start = sta_start.strip() sta_end = sta_end.strip() start = '.'.join([net, sta_start]) end = '.'.join([net, sta_end]) start = np.array(sta_coords[start]) end = np.array(sta_coords[end]) # Offset ends slightly to make sure we don't lose end stations due to truncation error. # Note: for simplicity this treats lat/lon like cartesian coords, but this is approximate # and will break down near poles, for long transects, or if transect crosses the antimeridian. dirn = (end - start) dirn = dirn / np.linalg.norm(dirn) start -= LEAD_INOUT_DIST_KM * dirn / KM_PER_DEG end += LEAD_INOUT_DIST_KM * dirn / KM_PER_DEG start_latlon = (start[1], start[0]) end_latlon = (end[1], end[0]) title = 'Network {} CCP R-stacking (profile {}-{})'.format( net, sta_start, sta_end) hf_main, hf_map, metadata = run(rf_stream, start_latlon, end_latlon, width, spacing, max_depth, channel, stacked_scale=stack_scale, title=title, colormap=colormap, background_model='ak135_60') metadata['transect_start'] = start metadata['transect_end'] = end metadata['transect_dirn'] = dirn if annotators is not None: for ant in annotators: ant(hf_main, metadata) # end for # end if outfile_base = '{}-ZRT-R_CCP_stack_{}-{}_{}km_spacing'.format( net, sta_start, sta_end, spacing) outfile = outfile_base + '.pdf' outfile_map = outfile_base + '_MAP.pdf' outfile = os.path.join(output_folder, outfile) outfile_map = os.path.join(output_folder, outfile_map) if hf_main is not None: hf_main.savefig(outfile, dpi=300) plt.close(hf_main) # endif if hf_map is not None: hf_map.savefig(outfile_map, dpi=300) plt.close(hf_map)
def main(): """Main entry function for RF picking tool. """ infile = filedialog.askopenfilename(initialdir=".", title="Select RF file", filetypes=(("h5 files", "*.h5"), )) output_folder = filedialog.askdirectory( initialdir=os.path.split(infile)[0], title='Select output folder') if not os.path.isdir(output_folder): log.info("Creating output folder {}".format(output_folder)) os.makedirs(output_folder, exist_ok=True) # end if log.info("Output files will be emitted to {}".format(output_folder)) log.info("Loading %s", infile) data_all = rf_util.read_h5_rf(infile) data_dict = rf_util.rf_to_dict(data_all) stations = sorted(list(data_dict.keys())) # Assuming same rotation type for all RFs. This is consistent with the existing workflow. rf_type = data_all[0].stats.rotation for st in stations: station_db = data_dict[st] # Choose RF channel channel = rf_util.choose_rf_source_channel(rf_type, station_db) channel_data = station_db[channel] # Check assumption for tr in channel_data: assert tr.stats.rotation == rf_type, 'Mismatching RF rotation type' # Label and filter quality rf_util.label_rf_quality_simple_amplitude(rf_type, channel_data) rf_stream = rf.RFStream([ tr for tr in channel_data if tr.stats.predicted_quality == 'a' ]).sort(['back_azimuth']) if not rf_stream: log.info("No data survived filtering for %s, skipping", st) continue # Plot RF stack of primary component fig = rf_plot_utils.plot_rf_stack(rf_stream) fig.set_size_inches(8, 9) fig.suptitle("Channel {}".format(rf_stream[0].stats.channel)) ax0 = fig.axes[0] # Make sure we draw once first before capturing blit background fig.canvas.draw() # Disallow resizing to avoid having to handle blitting with resized window. win = fig.canvas.window() win.setFixedSize(win.size()) blit_background = fig.canvas.copy_from_bbox(ax0.bbox) mask = np.array([False] * len(rf_stream)) rect_select = RectangleSelector(ax0, lambda e0, e1: on_select(e0, e1, mask), useblit=True, rectprops=dict(fill=False, edgecolor='red')) cid = fig.canvas.mpl_connect( 'button_release_event', lambda e: on_release(e, ax0, mask, blit_background, rect_select)) plt.show() fig.canvas.mpl_disconnect(cid) rect_select = None selected_event_ids = [ tr.stats.event_id for i, tr in enumerate(rf_stream) if mask[i] ] log.info("{} streams selected".format(len(selected_event_ids))) log.info("Selected event ids:") log.info(selected_event_ids) network = rf_stream[0].stats.network outfile = os.path.join( output_folder, '.'.join([network, st, channel]) + '_event_mask.txt') log.info("Writing mask to file {}".format(outfile)) if os.path.exists(outfile): log.warning("Overwriting existing file {} !".format(outfile)) with open(outfile, 'w') as f: f.write('\n'.join(selected_event_ids))