Example #1
0
def main(input_file, output_file, network, reverb_stations=None):
    """
    Apply dereverberation to the R-component of receiver function.
    This is not a fully fledged sediment compensation method, and
    convention stacking applied after this may not reflect true depths
    to signal features.

    :param input_file: RF file to load in obspyh5 format with rf indexing
    :param output_file: RF file to write in obspyh5 format with rf indexing
    :param network: Network code of data to load from input file
    :param reverb_stations: File of station codes (one per line) to filter.
    """
    if reverb_stations is None:
        reverb_stations = []
    else:
        reverb_stations = reverb_stations.read().splitlines()
    # end if
    rf_stream = rf_util.read_h5_rf(input_file)
    rf_stream = rf_stream.select(network=network, component='R')
    stations = tqdm(reverb_stations, total=len(reverb_stations))
    for sta in stations:
        stations.set_description(sta)
        sta_stream = rf_stream.select(station=sta)
        if not sta_stream:
            continue
        _dereverb_yu_autocorr(sta_stream)
    # end if
    rf_stream.write(output_file, format='h5')
Example #2
0
def main(input_file,
         output_file,
         event_mask_folder='',
         apply_amplitude_filter=False,
         apply_similarity_filter=False,
         hk_weights=DEFAULT_HK_WEIGHTS):

    # Read source file
    data_all = rf_util.read_h5_rf(input_file)

    # Convert to hierarchical dictionary format
    data_dict = rf_util.rf_to_dict(data_all)

    event_mask_dict = None
    if event_mask_folder and os.path.isdir(event_mask_folder):
        mask_files = os.listdir(event_mask_folder)
        mask_files = [
            f for f in mask_files
            if os.path.isfile(os.path.join(event_mask_folder, f))
        ]
        # print(mask_files)
        pattern = r"([A-Za-z0-9\.]{5,})_event_mask\.txt"
        pattern = re.compile(pattern)
        event_mask_dict = dict()
        for f in mask_files:
            match_result = pattern.match(f)
            if not match_result:
                continue
            code = match_result[1]
            # print(code)
            with open(os.path.join(event_mask_folder, f), 'r') as f:
                events = f.readlines()
                events = set([e.strip() for e in events])
                event_mask_dict[code] = events
            # end with
        # end for
    # end if

    if event_mask_dict:
        print("Loaded {} event masks".format(len(event_mask_dict)))
    # end if

    # Plot all data to PDF file
    fixed_stack_height_inches = 0.8
    y_pad_inches = 1.6
    total_trace_height_inches = paper_size_A4[
        1] - fixed_stack_height_inches - y_pad_inches
    max_trace_height = 0.2

    with PdfPages(output_file) as pdf:
        # Would like to use Tex, but lack desktop PC privileges to update packages to what is required
        plt.rc('text', usetex=False)
        pbar = tqdm.tqdm(total=len(data_dict))
        network = data_dict.network
        rf_type = data_dict.rotation
        for st in sorted(data_dict.keys()):
            station_db = data_dict[st]

            pbar.update()
            pbar.set_description("{}.{}".format(network, st))

            # Choose RF channel
            channel = rf_util.choose_rf_source_channel(rf_type, station_db)
            channel_data = station_db[channel]
            full_code = '.'.join([network, st, channel])

            t_channel = list(channel)
            t_channel[-1] = 'T'
            t_channel = ''.join(t_channel)

            rf_stream = rf.RFStream(channel_data).sort(['back_azimuth'])
            if event_mask_dict and full_code in event_mask_dict:
                # Select events from external source
                event_mask = event_mask_dict[full_code]
                rf_stream = rf.RFStream([
                    tr for tr in rf_stream if tr.stats.event_id in event_mask
                ]).sort(['back_azimuth'])
            # end if
            if apply_amplitude_filter:
                # Label and filter quality
                rf_util.label_rf_quality_simple_amplitude(rf_type, rf_stream)
                rf_stream = rf.RFStream([
                    tr for tr in rf_stream if tr.stats.predicted_quality == 'a'
                ]).sort(['back_azimuth'])
            # end if
            if apply_similarity_filter:
                rf_stream = rf_util.filter_crosscorr_coeff(rf_stream)
            # end if

            if not rf_stream:
                continue

            # Find matching T-component data
            events = [tr.stats.event_id for tr in rf_stream]
            transverse_data = station_db[t_channel]
            t_stream = rf.RFStream([
                tr for tr in transverse_data if tr.stats.event_id in events
            ]).sort(['back_azimuth'])
            if not t_stream:
                continue

            # Plot pinwheel of primary and transverse components
            fig = rf_plot_utils.plot_rf_wheel([rf_stream, t_stream],
                                              fontscaling=0.8)
            fig.set_size_inches(*paper_size_A4)
            plt.tight_layout()
            plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15)
            ax = fig.gca()
            fig.text(-0.32,
                     -0.32,
                     "\n".join(rf_stream[0].stats.processing),
                     fontsize=6,
                     transform=ax.transAxes)
            pdf.savefig(dpi=300, papertype='a4', orientation='portrait')
            plt.close()

            num_traces = len(rf_stream)
            assert len(t_stream) == num_traces

            # Plot RF stack of primary component
            trace_ht = min(total_trace_height_inches / num_traces,
                           max_trace_height)
            fig = rf_plot_utils.plot_rf_stack(
                rf_stream,
                trace_height=trace_ht,
                stack_height=fixed_stack_height_inches,
                fig_width=paper_size_A4[0])
            fig.suptitle("Channel {}".format(rf_stream[0].stats.channel))
            # Customize layout to pack to top of page while preserving RF plots aspect ratios
            _rf_layout_A4(fig)
            # Save to new page in file
            pdf.savefig(dpi=300, papertype='a4', orientation='portrait')
            plt.close()

            # Plot RF stack of transverse component
            fig = rf_plot_utils.plot_rf_stack(
                t_stream,
                trace_height=trace_ht,
                stack_height=fixed_stack_height_inches,
                fig_width=paper_size_A4[0])
            fig.suptitle("Channel {}".format(t_stream[0].stats.channel))
            # Customize layout to pack to top of page while preserving RF plots aspect ratios
            _rf_layout_A4(fig)
            # Save to new page in file
            pdf.savefig(dpi=300, papertype='a4', orientation='portrait')
            plt.close()

            # Plot H-k stack using primary RF component
            fig = _produce_hk_stacking(rf_stream, weighting=hk_weights)
            paper_landscape = (paper_size_A4[1], paper_size_A4[0])
            fig.set_size_inches(*paper_landscape)
            # plt.tight_layout()
            # plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15)
            pdf.savefig(dpi=300, papertype='a4', orientation='landscape')
            plt.close()

        # end for
        pbar.close()
Example #3
0
def rf_inversion_export(input_h5_file,
                        output_folder,
                        network_code,
                        component='R',
                        resample_freq=6.25,
                        trim_window=(-5.0, 20.0),
                        moveout=True):
    """Export receiver function to text format for ingestion into Fortran RF inversion code.

    :param input_h5_file: Input hdf5 file containing receiver function data
    :type input_h5_file: str or Path
    :param output_folder: Folder in which to export text files, one per channel per station.
        Will be appended with network code.
    :type output_folder: str or Path
    :param network_code: Network to which this RF data belongs, used to disambiguate and track folders.
    :type network_code: str
    :param component: The channel component to export, defaults to 'R'
    :type component: str, optional
    :param resample_freq: Sampling rate (Hz) of the output files, defaults to 6.25 Hz
    :type resample_freq: float, optional
    :param trim_window: Time window to export relative to onset, defaults to (-5.0, 20.0). If data needs
        to be resampled, the samples are anchored to the start of this time window.
    :type trim_window: tuple, optional
    :param moveout: Whether to apply moveout correction prior to exporting, defaults to True
    :type moveout: bool, optional
    """
    # Process for each station:
    # 1. Load hdf5 file containing RFs
    # 2. Filter to desired component.
    # 3. Quality filter to those that meet criteria (Sippl cross-correlation similarity)
    # 4. Moveout and stack the RFs
    # 5. Resample (lanczos) and trim RF
    # 6. Export one file per station in (time, amplitude format)

    output_folder += "_" + network_code
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder, exist_ok=True)
    # end if

    data = rf_util.read_h5_rf(input_h5_file)

    data = data.select(component=component)

    rf_util.label_rf_quality_simple_amplitude('ZRT',
                                              data,
                                              snr_cutoff=2.0,
                                              rms_amp_cutoff=0.2,
                                              max_amp_cutoff=2.0)
    data = rf.RFStream(
        [tr for tr in data if tr.stats.predicted_quality == 'a'])

    data_dict = rf_util.rf_to_dict(data)

    for sta, ch_dict in data_dict:
        for cha, ch_traces in ch_dict.items():
            similar_traces = rf_util.filter_crosscorr_coeff(
                rf.RFStream(ch_traces))
            if not similar_traces:
                continue
            if moveout:
                similar_traces.moveout()
            # end if
            stack = similar_traces.stack()
            trace = stack[0]
            exact_start_time = trace.stats.onset + trim_window[0]
            stack.interpolate(sampling_rate=resample_freq,
                              method='lanczos',
                              a=10,
                              starttime=exact_start_time)
            stack.trim2(*trim_window, reftime='onset')

            times = trace.times() - (trace.stats.onset - trace.stats.starttime)
            # TODO: Remove hardwired scaling factor.
            # This scaling factor only applies to iterative deconvolution with default Gaussian width
            # factor of 2.5. Once we upgrade to rf library version >= 0.9.0, we can remove this hardwired
            # setting and instead have it determined programatically from rf processing metadata stored
            # in the trace stats structure.
            # The scaling factor originates in the amplitude attenuation effect of the filtering applied
            # in iterative deconv, see table at end of this page:
            # http://eqseis.geosc.psu.edu/~cammon/HTML/RftnDocs/seq01.html
            # The values in this reference table are derived as the integral of the area under the
            # Gaussian in the frequency domain. Analytically, this amounts to simply dividing by scaling
            # factor of a/sqrt(pi), where 'a' here is the Gaussian width used in iterative deconvolution.
            iterdeconv_scaling = 2.5 / np.sqrt(np.pi)
            column_data = np.array([times, trace.data / iterdeconv_scaling]).T
            fname = os.path.join(
                output_folder, "_".join([network_code, sta, cha]) + "_rf.dat")
            np.savetxt(fname, column_data, fmt=('%5.2f', '%.8f'))
Example #4
0
def main(input_file,
         output_file,
         event_mask_folder='',
         apply_amplitude_filter=False,
         apply_similarity_filter=False,
         hk_weights=DEFAULT_HK_WEIGHTS,
         hk_solution_labels=DEFAULT_HK_SOLN_LABEL,
         hk_hpf_freq=None,
         hk_vp=DEFAULT_Vp,
         save_hk_solution=False):
    # docstring redundant since CLI options are already documented.

    log.setLevel(logging.INFO)

    # Read source file
    log.info("Loading input file {}".format(input_file))
    data_all = rf_util.read_h5_rf(input_file)

    # Convert to hierarchical dictionary format
    data_dict = rf_util.rf_to_dict(data_all)

    event_mask_dict = None
    if event_mask_folder and os.path.isdir(event_mask_folder):
        log.info(
            "Applying event mask from folder {}".format(event_mask_folder))
        mask_files = os.listdir(event_mask_folder)
        mask_files = [
            f for f in mask_files
            if os.path.isfile(os.path.join(event_mask_folder, f))
        ]
        pattern = r"([A-Za-z0-9\.]{5,})_event_mask\.txt"
        pattern = re.compile(pattern)
        event_mask_dict = dict()
        for f in mask_files:
            match_result = pattern.match(f)
            if not match_result:
                continue
            code = match_result[1]
            with open(os.path.join(event_mask_folder, f), 'r') as _f:
                events = _f.readlines()
                events = set([e.strip() for e in events])
                event_mask_dict[code] = events
            # end with
        # end for
    # end if

    if event_mask_dict:
        log.info("Loaded {} event masks".format(len(event_mask_dict)))
    # end if

    # Plot all data to PDF file
    fixed_stack_height_inches = 0.8
    y_pad_inches = 1.6
    total_trace_height_inches = paper_size_A4[
        1] - fixed_stack_height_inches - y_pad_inches
    max_trace_height = 0.2

    log.setLevel(logging.WARNING)

    with PdfPages(output_file) as pdf:
        # Would like to use Tex, but lack desktop PC privileges to update packages to what is required
        plt.rc('text', usetex=False)
        pbar = tqdm.tqdm(total=len(data_dict))
        network = data_dict.network
        rf_type = data_dict.rotation
        hk_soln = dict()
        station_coords = dict()
        for st in sorted(data_dict.keys()):
            station_db = data_dict[st]

            pbar.update()
            pbar.set_description("{}.{}".format(network, st))

            # Choose RF channel
            channel = rf_util.choose_rf_source_channel(rf_type, station_db)
            channel_data = station_db[channel]
            if not channel_data:
                continue
            # end if
            full_code = '.'.join([network, st, channel])

            t_channel = list(channel)
            t_channel[-1] = 'T'
            t_channel = ''.join(t_channel)

            rf_stream = rf.RFStream(channel_data).sort(['back_azimuth'])
            if event_mask_dict and full_code in event_mask_dict:
                # Select events from external source
                event_mask = event_mask_dict[full_code]
                rf_stream = rf.RFStream([
                    tr for tr in rf_stream if tr.stats.event_id in event_mask
                ]).sort(['back_azimuth'])
            # end if
            if apply_amplitude_filter:
                # Label and filter quality
                rf_util.label_rf_quality_simple_amplitude(rf_type, rf_stream)
                rf_stream = rf.RFStream([
                    tr for tr in rf_stream if tr.stats.predicted_quality == 'a'
                ]).sort(['back_azimuth'])
            # end if
            if not rf_stream:
                continue
            if apply_similarity_filter and len(rf_stream) >= 3:
                rf_stream = rf_util.filter_crosscorr_coeff(rf_stream)
            # end if
            if not rf_stream:
                continue

            # Find matching T-component data
            events = [tr.stats.event_id for tr in rf_stream]
            transverse_data = station_db[t_channel]
            t_stream = rf.RFStream([
                tr for tr in transverse_data if tr.stats.event_id in events
            ]).sort(['back_azimuth'])

            # Plot pinwheel of primary and transverse components
            fig = rf_plot_utils.plot_rf_wheel([rf_stream, t_stream],
                                              fontscaling=0.8)
            fig.set_size_inches(*paper_size_A4)
            plt.tight_layout()
            plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15)
            ax = fig.gca()
            fig.text(-0.32,
                     -0.32,
                     "\n".join(rf_stream[0].stats.processing),
                     fontsize=6,
                     transform=ax.transAxes)
            pdf.savefig(dpi=300, papertype='a4', orientation='portrait')
            plt.close()

            num_traces = len(rf_stream)
            assert len(t_stream) == num_traces or not t_stream

            # Plot RF stack of primary component
            trace_ht = min(total_trace_height_inches / num_traces,
                           max_trace_height)
            fig = rf_plot_utils.plot_rf_stack(
                rf_stream,
                trace_height=trace_ht,
                stack_height=fixed_stack_height_inches,
                fig_width=paper_size_A4[0])
            fig.suptitle("Channel {}".format(rf_stream[0].stats.channel))
            # Customize layout to pack to top of page while preserving RF plots aspect ratios
            _rf_layout_A4(fig)
            # Save to new page in file
            pdf.savefig(dpi=300, papertype='a4', orientation='portrait')
            plt.close()

            # Plot RF stack of transverse component
            if t_stream:
                fig = rf_plot_utils.plot_rf_stack(
                    t_stream,
                    trace_height=trace_ht,
                    stack_height=fixed_stack_height_inches,
                    fig_width=paper_size_A4[0])
                fig.suptitle("Channel {}".format(t_stream[0].stats.channel))
                # Customize layout to pack to top of page while preserving RF plots aspect ratios
                _rf_layout_A4(fig)
                # Save to new page in file
                pdf.savefig(dpi=300, papertype='a4', orientation='portrait')
                plt.close()
            # end if

            # Plot H-k stack using primary RF component
            fig, maxima = _produce_hk_stacking(rf_stream,
                                               weighting=hk_weights,
                                               labelling=hk_solution_labels,
                                               V_p=hk_vp)
            if save_hk_solution and hk_hpf_freq is None:
                hk_soln[st] = maxima
                station_coords[st] = (channel_data[0].stats.station_latitude,
                                      channel_data[0].stats.station_longitude)
            # end if
            paper_landscape = (paper_size_A4[1], paper_size_A4[0])
            fig.set_size_inches(*paper_landscape)
            # plt.tight_layout()
            # plt.subplots_adjust(hspace=0.15, top=0.95, bottom=0.15)
            pdf.savefig(dpi=300, papertype='a4', orientation='landscape')
            plt.close()

            if hk_hpf_freq is not None:
                # Repeat H-k stack with high pass filtering
                fig, maxima = _produce_hk_stacking(
                    rf_stream,
                    weighting=hk_weights,
                    labelling=hk_solution_labels,
                    V_p=hk_vp,
                    filter_options={
                        'type': 'highpass',
                        'freq': hk_hpf_freq,
                        'corners': 1,
                        'zerophase': True
                    })
                if save_hk_solution:
                    hk_soln[st] = maxima
                    station_coords[st] = (
                        channel_data[0].stats.station_latitude,
                        channel_data[0].stats.station_longitude)
                # end if
                fig.set_size_inches(*paper_landscape)
                pdf.savefig(dpi=300, papertype='a4', orientation='landscape')
                plt.close()
            # end if

        # end for
        pbar.close()
    # end with

    # Save H-k solutions to CSV file
    if hk_soln:
        assert len(hk_soln) == len(station_coords)
        # Sort H-k solutions by depth from low to high
        update_dict = {}
        for st, hks in hk_soln.items():
            sorted_hks = sorted([tuple(hk) for hk in hks])
            update_dict[st] = np.array(
                list(station_coords[st]) +
                [i for hk in sorted_hks for i in hk])
        # end for
        hk_soln.update(update_dict)

        df = pd.DataFrame.from_dict(hk_soln, orient='index')
        colnames = [('H{}'.format(i), 'k{}'.format(i))
                    for i in range((len(df.columns) - 2) // 2)]
        colnames = ['Latitude', 'Longitude'] + list(
            itertools.chain.from_iterable(colnames))
        df.columns = colnames
        csv_fname, _ = os.path.splitext(output_file)
        csv_fname += '.csv'
        df.index.name = 'Station'
        df.to_csv(csv_fname)
Example #5
0
def main():
    """Main entry function for RF picking tool.
    """
    infile = filedialog.askopenfilename(initialdir=".",
                                        title="Select RF file",
                                        filetypes=(("h5 files", "*.h5"), ))
    output_folder = filedialog.askdirectory(
        initialdir=os.path.split(infile)[0], title='Select output folder')
    if not os.path.isdir(output_folder):
        log.info("Creating output folder {}".format(output_folder))
        os.makedirs(output_folder, exist_ok=True)
    # end if
    log.info("Output files will be emitted to {}".format(output_folder))

    log.info("Loading %s", infile)
    data_all = rf_util.read_h5_rf(infile)
    data_dict = rf_util.rf_to_dict(data_all)

    stations = sorted(list(data_dict.keys()))

    # Assuming same rotation type for all RFs. This is consistent with the existing workflow.
    rf_type = data_all[0].stats.rotation

    for st in stations:
        station_db = data_dict[st]

        # Choose RF channel
        channel = rf_util.choose_rf_source_channel(rf_type, station_db)
        channel_data = station_db[channel]
        # Check assumption
        for tr in channel_data:
            assert tr.stats.rotation == rf_type, 'Mismatching RF rotation type'

        # Label and filter quality
        rf_util.label_rf_quality_simple_amplitude(rf_type, channel_data)
        rf_stream = rf.RFStream([
            tr for tr in channel_data if tr.stats.predicted_quality == 'a'
        ]).sort(['back_azimuth'])
        if not rf_stream:
            log.info("No data survived filtering for %s, skipping", st)
            continue

        # Plot RF stack of primary component
        fig = rf_plot_utils.plot_rf_stack(rf_stream)
        fig.set_size_inches(8, 9)
        fig.suptitle("Channel {}".format(rf_stream[0].stats.channel))
        ax0 = fig.axes[0]
        # Make sure we draw once first before capturing blit background
        fig.canvas.draw()
        # Disallow resizing to avoid having to handle blitting with resized window.
        win = fig.canvas.window()
        win.setFixedSize(win.size())
        blit_background = fig.canvas.copy_from_bbox(ax0.bbox)

        mask = np.array([False] * len(rf_stream))
        rect_select = RectangleSelector(ax0,
                                        lambda e0, e1: on_select(e0, e1, mask),
                                        useblit=True,
                                        rectprops=dict(fill=False,
                                                       edgecolor='red'))
        cid = fig.canvas.mpl_connect(
            'button_release_event',
            lambda e: on_release(e, ax0, mask, blit_background, rect_select))
        plt.show()

        fig.canvas.mpl_disconnect(cid)
        rect_select = None

        selected_event_ids = [
            tr.stats.event_id for i, tr in enumerate(rf_stream) if mask[i]
        ]
        log.info("{} streams selected".format(len(selected_event_ids)))
        log.info("Selected event ids:")
        log.info(selected_event_ids)

        network = rf_stream[0].stats.network
        outfile = os.path.join(
            output_folder,
            '.'.join([network, st, channel]) + '_event_mask.txt')
        log.info("Writing mask to file {}".format(outfile))
        if os.path.exists(outfile):
            log.warning("Overwriting existing file {} !".format(outfile))
        with open(outfile, 'w') as f:
            f.write('\n'.join(selected_event_ids))