Example #1
0
 def final_model(self):
     """Return tuple of iteration and step count for final accepted model"""
     try:
         return self.steps.index[-1], self.steps[-1][-1]
     except TypeError:
         logger.warning("Inspector has no 'steps' data, returning None")
         return None, None
Example #2
0
 def initial_model(self):
     """Return tuple of the iteration and step count corresponding M00"""
     try:
         return self.steps.index[0], self.steps[0][0]
     except TypeError:
         logger.warning("Inspector has no 'steps' data, returning None")
         return None, None
Example #3
0
def taper_time_offset(st, taper_percentage=0.05, time_offset_sec=0):
    """
    Taper the leading edge of the waveform. If a time offset is given,
    e.g. 20s before the event origin time (T_0), taper all the way up from
    T=0 to T=T_0, to ensure that there are no impulse-like signals prior to the
    event origin.

    :type st: obspy.core.stream.Stream
    :param st: Stream object to be tapered
    :type taper_percentage: float
    :param taper_percentage: default taper percentage
    :type time_offset_sec: float
    :param time_offset_sec: Any time offset between the start of the stream to
        the event origin time. All time between these two points will be tapered
        to reduce any signals prior to the event origin.
    :rtype: obspy.core.stream.Stream
    :return: tapered Stream object
    """
    taper_amount = st[0].stats.npts * taper_percentage * st[0].stats.delta

    if taper_amount > abs(time_offset_sec):
        logger.warning("taper amount exceeds time offset, taper may affect "
                       "data if source receiver distance is short")
    elif taper_amount < abs(time_offset_sec):
        logger.info(f"adjusting taper to cover time offset {time_offset_sec}")
        taper_percentage = (abs(time_offset_sec) / st[0].stats.npts *
                            st[0].stats.delta)

    # Get rid of extra long period signals which may adversely affect processing
    st.detrend("simple").taper(taper_percentage, side="left")

    return st
Example #4
0
    def compare(self, iteration_a=None, step_count_a=None, iteration_b=None,
                step_count_b=None):
        """
        Compare the misfit and number of windows on an event by event basis
        between two evaluations. Provides absolute values as well as
        differences. Final dataframe is sorted by the difference in misfit,
        showing the most and least improved events.

        :type iteration_a: str
        :param iteration_a: initial iteration to use in comparison
        :type step_count_a: str
        :param step_count_a: initial step count to use in comparison
        :type iteration_b: str
        :param iteration_b: final iteration to use in comparison
        :type step_count_b: str
        :param step_count_b: final step count to use in comparison
        :rtype: pandas.core.data_frame.DataFrame
        :return: a sorted data frame containing the difference of misfit and
            number of windows between final and initial
        """
        # Assuming if first arg isnt given, default to first/last model
        if iteration_a is None:
            iteration_a, step_count_a = self.initial_model
        if iteration_b is None:
            iteration_b, step_count_b = self.final_model

        # If initial or final models not given, nothing to compare
        if None in [iteration_a, step_count_a, iteration_b, step_count_b]:
            logger.warning("Cannot locate model indices to compare model data")
            return None

        misfit = self.misfit(level="event")
        msft_a = misfit.loc[iteration_a, step_count_a]
        msft_b = misfit.loc[iteration_b, step_count_b]

        # Doesn't really make sense to compare unscaled misfit so drop column
        msft_a = msft_a.drop(["unscaled_misfit"], axis=1).copy()
        msft_b = msft_b.drop(["unscaled_misfit"], axis=1).copy()

        # For renaming and access to renamed columns
        initial = f"{iteration_a}{step_count_a}"
        final = f"{iteration_b}{step_count_b}"

        msft_a.rename({"nwin": f"nwin_{initial}",
                       "misfit": f"misfit_{initial}"},
                      axis="columns", inplace=True)
        msft_b.rename({"nwin": f"nwin_{final}", "misfit": f"misfit_{final}"},
                      axis="columns", inplace=True)

        df = pd.merge(msft_a, msft_b, left_index=True, right_index=True)
        df["diff_misfit"] = df[f"misfit_{final}"] - df[f"misfit_{initial}"]
        df["diff_nwin"] = df[f"nwin_{final}"] - df[f"nwin_{initial}"]

        return df.sort_values(by="diff_misfit")
Example #5
0
    def save(self, path="./", fmt="csv", tag=None):
        """
        Save the downloaded attributes into JSON files for easier re-loading.

        .. note::
            fmt == 'hdf' requires 'pytables' to be installed in the environment

        :type tag: str
        :param tag: tag to use to save files, defaults to the class tag
            but allows for the option of overwriting that
        :type path: str
        :param path: optional path to save to, defaults to cwd
        :type fmt: str
        :param fmt: format of the files to write, default csv
        """
        if tag is None:
            tag = self.tag
        if fmt == "hdf":
            try:
                import pytables
            except ImportError:
                fmt = "csv"
                print("format 'hdf' requires pytables, defaulting to 'csv'")

        if fmt == "csv":
            write_check = 0
            if not self.sources.empty:
                self.sources.to_csv(os.path.join(path, f"{tag}_src.csv"))
                write_check += 1
            if not self.receivers.empty:
                self.receivers.to_csv(os.path.join(path, f"{tag}_rcv.csv"))
                write_check += 1
            if not self.windows.empty:
                self.windows.to_csv(os.path.join(path, f"{tag}.csv"),
                                    index=False)
                write_check += 1
            if write_check == 0:
                logger.warning("Inspector empty, will not write to disk")
        elif fmt == "hdf":
            with pd.HDFStore(os.path.join(path, f"{tag}.hdf")) as s:
                s["sources"] = self.sources
                s["receivers"] = self.receivers
                s["windows"] = self.windows
        else:
            raise NotImplementedError
Example #6
0
    def event_get(self, event_id=None):
        """
        Return event information parameters pertaining to a given event id
        if an event id is given, else by origin time. Catches FDSN exceptions.

        :rtype event: obspy.core.event.Event or None
        :return event: event object if found, else None.
        """
        if not self.Client:
            return None
        if event_id is None:
            event_id = self.config.event_id

        event, origintime = None, None
        if event_id is not None:
            try:
                # Get events via event id, only available from certain clients
                logger.debug(f"event ID: {event_id}, querying "
                             f"client {self.config.client}")
                event = self.Client.get_events(eventid=event_id)[0]
            except FDSNException:
                pass
        if self.origintime and event is None:
            try:
                # If getting by event id doesn't work, try based on origintime
                logger.debug(f"origintime: {self.origintime}, querying"
                             f"client {self.config.client}")
                event = self.Client.get_events(starttime=self.origintime,
                                               endtime=self.origintime)
                if len(event) > 1:
                    # Getting by origin time may result in multiple events
                    # found in the catalog, this is hard to control and will
                    # probably need to be addressed manually.
                    logger.warning(f"{len(event)} events found, expected 1."
                                   f"Returning first entry, manual revision "
                                   f"may be required.")
                event = event[0]
            except FDSNException:
                pass
        return event
Example #7
0
    def write(self, write_to="ds"):
        """
        Write the data collected inside Manager to either a Pyasdf Dataset,
        or to individual files (not implemented).

        :type write_to: str
        :param write_to: choice to write data to, if "ds" writes to a
            pyasdf.asdf_data_set.ASDFDataSet

            * write_to == "ds":
                If gather is skipped but data should still be saved into an
                ASDFDataSet for data storage, this function will
                fill that dataset in the same fashion as the Gatherer class
            * write_to == "/path/to/output":
                write out all the internal data of the manager to a path
        """
        if write_to == "ds":
            if self.event:
                try:
                    self.ds.add_quakeml(self.event)
                except ValueError:
                    logger.warning("Event already present, not added")
            if self.inv:
                try:
                    self.ds.add_stationxml(self.inv)
                except TypeError:
                    logger.warning("StationXML already present, not added")
            # PyASDF has its own warnings if waveform data already present
            if self.st_obs:
                self.ds.add_waveforms(waveform=self.st_obs,
                                      tag=self.config.observed_tag)
            if self.st_syn:
                self.ds.add_waveforms(waveform=self.st_syn,
                                      tag=self.config.synthetic_tag)
            if self.windows:
                self.save_windows()
            if self.adjsrcs:
                self.save_adjsrcs()
        else:
            raise NotImplementedError
Example #8
0
def zero_pad_then_window(ws, pad_by_fraction_of_npts=.2):
    """
    To address Pyflex throwing ValueErrors when source-receiver distances are

    .. note::
        Sept 1, 2020
        Work in progress, may not actually want to do this to avoid any
        near-source effects?

    :type ws: pyflex.WindowSelector
    :param ws: an already-filled window selector object that should
        be passed in from the Manager object
    :rtype: list of pyflex.Window
    :return: a list of Window objects, or an empty list if no windows found or
        the zero padding didnt work
    """
    raise NotImplementedError

    logger.warning("Pyflex has thrown a ValueError, most likely due to a small"
                   "source-receiver distance. Attempting to zero-pad waveforms"
                   "and re-run window selection")

    # We assume that these traces have already been standardized. These values
    # will be used to ensure that we can undo the zero-padding
    original_origintime = ws.observed.stats.starttime
    original_endtime = ws.observed.stats.endtime
    original_npts = ws.observed.stats.npts

    # Pad by a fraction of the trace length
    pad_width = int(original_npts * pad_by_fraction_of_npts)

    # Pad only the front of the data
    ws.observed.data = np.pad(ws.observed.data, (pad_width,), mode="constant")
    ws.observed.stats.starttime -= pad_width * ws.observed.stats.delta

    ws.observed.data = np.pad(ws.observed.data, (pad_width,), mode="constant")
    ws.observed.stats.starttime -= pad_width * ws.observed.stats.delta

    ws.select_windows()
Example #9
0
    def save_windows(self):
        """
        Convenience function to save collected misfit windows into an 
        ASDFDataSet with some preliminary checks

        Auxiliary data tag is hardcoded as 'MisfitWindows'
        """
        if self.ds is None:
            logger.warning("Manager has no ASDFDataSet, cannot save windows")
        elif not self.windows:
            logger.warning("Manager has no windows to save")
        elif not self.config.save_to_ds:
            logger.warning("config parameter save_to_ds is set False, "
                           "will not save windows")
        else:
            logger.debug("saving misfit windows to ASDFDataSet")
            add_misfit_windows(self.windows,
                               self.ds,
                               path=self.config.aux_path)
Example #10
0
    def save_adjsrcs(self):
        """
        Convenience function to save collected adjoint sources into an 
        ASDFDataSet with some preliminary checks

        Auxiliary data tag is hardcoded as 'AdjointSources'        
        """
        if self.ds is None:
            logger.warning("Manager has no ASDFDataSet, cannot save "
                           "adjoint sources")
        elif not self.adjsrcs:
            logger.warning("Manager has no adjoint sources to save")
        elif not self.config.save_to_ds:
            logger.warning("config parameter save_to_ds is set False, "
                           "will not save adjoint sources")
        else:
            logger.debug("saving adjoint sources to ASDFDataSet")
            add_adjoint_sources(adjsrcs=self.adjsrcs,
                                ds=self.ds,
                                path=self.config.aux_path,
                                time_offset=self.stats.time_offset_sec)
Example #11
0
def trim_streams(st_a, st_b, precision=1E-3, force=None):
    """
    Trim two streams to common start and end times,
    Do some basic preprocessing before trimming.
    Allows user to force one stream to conform to another.
    Assumes all traces in a stream have the same time.
    Prechecks make sure that the streams are actually different

    :type st_a: obspy.stream.Stream
    :param st_a: streams to be trimmed
    :type st_b: obspy.stream.Stream
    :param st_b: streams to be trimmed
    :type precision: float
    :param precision: precision to check UTCDateTime differences
    :type force: str
    :param force: "a" or "b"; force trim to the length of "st_a" or to "st_b",
        if not given, trims to the common time
    :rtype: tuple of obspy.stream.Stream
    :return: trimmed stream objects in the same order as input
    """
    # Check if the times are already the same
    if st_a[0].stats.starttime - st_b[0].stats.starttime < precision and \
            st_a[0].stats.endtime - st_b[0].stats.endtime < precision:
        logger.debug(f"start and endtimes already match to {precision}")
        return st_a, st_b

    # Force the trim to the start and end times of one of the streams
    if force:
        if force.lower() == "a":
            start_set = st_a[0].stats.starttime
            end_set = st_a[0].stats.endtime
        elif force.lower() == "b":
            start_set = st_b[0].stats.starttime
            end_set = st_b[0].stats.endtime
    # Get starttime and endtime base on min values
    else:
        st_trimmed = st_a + st_b
        start_set, end_set = 0, 1E10
        for st in st_trimmed:
            start_hold = st.stats.starttime
            end_hold = st.stats.endtime
            if start_hold > start_set:
                start_set = start_hold
            if end_hold < end_set:
                end_set = end_hold

    # Trim to common start and end times
    st_a_out = st_a.copy()
    st_b_out = st_b.copy()
    for st in [st_a_out, st_b_out]:
        st.trim(start_set, end_set)

    # Trimming doesn't always make the starttimes exactly equal if the precision
    # of the UTCDateTime object is set too high.
    # Artificially shift the starttime of the streams iff the amount shifted
    # is less than the sampling rate
    for st in [st_a_out, st_b_out]:
        for tr in st:
            dt = start_set - tr.stats.starttime
            if 0 < dt < tr.stats.sampling_rate:
                logger.debug(f"shifting {tr.id} starttime by {dt}s")
                tr.stats.starttime = start_set
            elif dt >= tr.stats.delta:
                logger.warning(
                    f"{tr.id} starttime is {dt}s greater than delta")

    return st_a_out, st_b_out
Example #12
0
    def window(self,
               fix_windows=False,
               iteration=None,
               step_count=None,
               force=False,
               save=True):
        """
        Evaluate misfit windows using Pyflex. Save windows to ASDFDataSet.
        Allows previously defined windows to be retrieved from ASDFDataSet.

        .. note::
            * Windows are stored as dictionaries of pyflex.Window objects.
            * All windows are saved into the ASDFDataSet, even if retrieved.
            * STA/LTA information is collected and stored internally.

        :type fix_windows: bool
        :param fix_windows: do not pick new windows, but load windows from the
            given dataset from 'iteration' and 'step_count'
        :type iteration: int or str
        :param iteration: if 'fix_windows' is True, look for windows in this
            iteration. If None, will check the latest iteration/step_count
            in the given dataset
        :type step_count: int or str
        :param step_count: if 'fix_windows' is True, look for windows in this
            step_count. If None, will check the latest iteration/step_count
            in the given dataset
        :type force: bool
        :param force: ignore flag checks and run function, useful if e.g.
            external preprocessing is used that doesn't meet flag criteria
        :type save: bool
        :param save: save the gathered windows to an ASDF Dataset
        """
        # Pre-check to see if data has already been standardized
        self.check()

        if self.config.pyflex_preset is None:
            logger.info("pyflex preset is set to 'None', will not window")
            return

        if not self.stats.standardized and not force:
            raise ManagerError("cannot window, waveforms not standardized")

        # Determine how to treat fixed windows
        if fix_windows and not self.ds:
            logger.warning("cannot fix window, no dataset")
            fix_windows = False
        elif fix_windows and (iteration is None or step_count is None):
            # If no iteration/step_count values are given, automatically search
            # the previous step_count for windows in relation to the current
            # iteration/step_count
            iteration = self.config.iteration
            step_count = self.config.step_count
            return_previous = True
        else:
            # If fix windows and iteration/step_count are given, search the
            # dataset for windows under the current iteration/step_count
            return_previous = False

        # Synthetic STA/LTA as Pyflex WindowSelector.calculate_preliminaries()
        for comp in self.config.component_list:
            try:
                self.staltas[comp] = pyflex.stalta.sta_lta(
                    data=envelope(self.st_syn.select(component=comp)[0].data),
                    dt=self.st_syn.select(component=comp)[0].stats.delta,
                    min_period=self.config.min_period)
            except IndexError:
                continue

        # Find misfit windows, from a dataset or through window selection
        if fix_windows:
            self.retrieve_windows(iteration, step_count, return_previous)
        else:
            self.select_windows_plus()

        if save:
            self.save_windows()
        logger.info(f"{self.stats.nwin} window(s) total found")

        return self
Example #13
0
    def preprocess(self, which="both", overwrite=None, **kwargs):
        """
        Preprocess observed and synthetic waveforms in place.
        Default preprocessing tasks: Remove response (observed), rotate, filter,
        convolve with source time function (synthetic).

        .. note::
            Default preprocessing can be overwritten using a
            user-defined function that takes Manager and choice as inputs
            and outputs an ObsPy Stream object.

        .. note::
            Documented kwargs only apply to default preprocessing.

        :type which: str
        :param which: "obs", "syn" or "both" to choose which stream to process
            defaults to both
        :type overwrite: function
        :param overwrite: If a function is provided, it will overwrite the 
            standard preprocessing function. All arguments that are given
            to the standard preprocessing function will be passed as kwargs to
            the new function. This allows for customized preprocessing

        Keyword Arguments
        ::
            int water_level:
                water level for response removal
            float taper_percentage:
                amount to taper ends of waveform
            bool remove_response:
                remove instrument response using the Manager's inventory object.
                Defaults to True
            bool apply_filter:
                filter the waveforms using the Config's min_period and
                max_period parameters. Defaults to True
            bool convolve_with_stf:
                Convolve synthetic data with a Gaussian source time function if
                a half duration is provided.
        """
        if not self.inv and not self.config.synthetics_only:
            raise ManagerError("cannot preprocess, no inventory")
        if overwrite:
            assert (hasattr(overwrite,
                            '__call__')), "overwrite must be function"
            preproc_fx = overwrite
        else:
            preproc_fx = default_process

        # If required, will rotate based on source receiver lat/lon values
        if self.config.rotate_to_rtz:
            if not self.inv:
                logger.warning("cannot rotate components, no inventory")
            else:
                self.gcd, self.baz = gcd_and_baz(event=self.event,
                                                 sta=self.inv[0][0])

        # Preprocess observation waveforms
        if self.st_obs is not None and not self.stats.obs_processed and \
                which.lower() in ["obs", "both"]:
            logger.info("preprocessing observation data")
            self.st_obs = preproc_fx(self, choice="obs", **kwargs)
            self.stats.obs_processed = True

        # Preprocess synthetic waveforms
        if self.st_syn is not None and not self.stats.syn_processed and \
                which.lower() in ["syn", "both"]:
            logger.info("preprocessing synthetic data")
            self.st_syn = preproc_fx(self, choice="syn", **kwargs)
            self.stats.syn_processed = True

        # Set stats
        self.stats.len_obs = len(self.st_obs)
        self.stats.len_syn = len(self.st_syn)

        return self
Example #14
0
    def gather(self, code=None, choice=None, event_id=None, **kwargs):
        """
        Gather station dataless and waveform data using the Gatherer class.
        In order collect observed waveforms, dataless, and finally synthetics.

        For valid kwargs see methods in :doc:`core.gatherer`

        :type code: str
        :param code: Station code following SEED naming convention.
            This must be in the form NN.SSSS.LL.CCC (N=network, S=station,
            L=location, C=channel). Allows for wildcard naming. By default
            the pyatoa workflow wants three orthogonal components in the N/E/Z
            coordinate system. Example station code: NZ.OPRZ.10.HH?
        :type choice: list
        :param choice: allows user to gather individual bits of data, rather
            than gathering all. Allowed: 'inv', 'st_obs', 'st_syn'
        :raises ManagerError: if any part of the gathering fails.

        Keyword Arguments
        ::
            bool try_fm:
                Try to retrieve and append focal mechanism information to the
                Event object.
            str prefix:
                Prefix for event id when searching for event information,
                can be used to search ordered files e.g., CMTSOLUTION_001
            str suffix:
                Suffix for event id when searching for event information
            str station_level:
                The level of the station metadata if retrieved using the ObsPy
                Client. Defaults to 'response'
            str resp_dir_template:
                Directory structure template to search for response files.
                By default follows the SEED convention:
                'path/to/RESPONSE/{sta}.{net}/'
            str resp_fid_template:
                Response file naming template to search for station dataless.
                By default, follows the SEED convention
                'RESP.{net}.{sta}.{loc}.{cha}'
            str obs_dir_template:
                directory structure to search for observation data. Follows the
                SEED convention: 'path/to/obs_data/{year}/{net}/{sta}/{cha}'
            str obs_fid_template:
                File naming template to search for observation data. Follows the
                SEED convention: '{net}.{sta}.{loc}.{cha}*{year}.{jday:0>3}'
            str syn_cfgpath:
                Config.cfgpaths key to search for synthetic data. Defaults to
                'synthetics', but for the may need to be set to 'waveforms' in
                certain use-cases, e.g. synthetics-synthetic inversions.
            str syn_unit:
                Optional argument to specify the letter used to identify the
                units of the synthetic data: For Specfem3D: ["d", "v", "a", "?"]
                'd' for displacement, 'v' for velocity,  'a' for acceleration.
                Wildcards okay. Defaults to '?'
            str syn_dir_template:
                Directory structure template to search for synthetic waveforms.
                Defaults to empty string
            str syn_fid_template:
                The naming template of synthetic waveforms defaults to:
                "{net}.{sta}.*{cmp}.sem{syn_unit}"
        """
        # Default to gathering all data
        if choice is None:
            choice = ["event", "inv", "st_obs", "st_syn"]
        try:
            # Attempt to gather event information before waveforms/metadata
            if "event" in choice and self.event is None:
                if event_id is None:
                    event_id = self.config.event_id
                self.event = self.gatherer.gather_event(event_id, **kwargs)
            if code is not None:
                logger.info(f"gathering data for {code}")
                if "st_obs" in choice:
                    # Ensure observed waveforms gathered before synthetics and
                    # metadata. If this fails, no point to gathering the rest
                    self.st_obs = self.gatherer.gather_observed(code, **kwargs)
                if "inv" in choice:
                    self.inv = self.gatherer.gather_station(code, **kwargs)
                if "st_syn" in choice:
                    self.st_syn = self.gatherer.gather_synthetic(
                        code, **kwargs)

            return self
        except GathererNoDataException as e:
            # Catch the Gatherer exception and redirect as ManagerError
            # so that it can be caught by flow()
            raise ManagerError("Data Gatherer could not find some data") from e
        except Exception as e:
            # Gathering should be robust, but if something slips through, dont
            # let it kill a workflow, display and raise ManagerError
            logger.warning(e, exc_info=True)
            raise ManagerError("Uncontrolled error in data gathering") from e
Example #15
0
    def load(self,
             code,
             path=None,
             ds=None,
             synthetic_tag=None,
             observed_tag=None,
             config=True,
             windows=False,
             adjsrcs=False):
        """
        Populate the manager using a previously populated ASDFDataSet.
        Useful for re-instantiating an existing workflow that has already 
        gathered data and saved it to an ASDFDataSet.

        .. warning::
            Loading any floating point values may result in rounding errors.
            Be careful to round off floating points to the correct place before
            using in future work.

        :type code: str
        :param code: SEED conv. code, e.g. NZ.BFZ.10.HHZ
        :type path: str
        :param path: if no Config object is given during init, the User
            can specify the config path here to load data from the dataset.
            This skips the need to initiate a separate Config object.
        :type ds: None or pyasdf.asdf_data_set.ASDFDataSet
        :param ds: dataset can be given to load from, will not set the ds
        :type synthetic_tag: str
        :param synthetic_tag: waveform tag of the synthetic data in the dataset
            e.g. 'synthetic_m00s00'. If None given, will use `config` attribute.
        :type observed_tag: str
        :param observed_tag: waveform tag of the observed data in the dataset
            e.g. 'observed'. If None given, will use `config` attribute.
        :type config: bool
        :param config: load config from the dataset, defaults to True but
            can be set False if Config should be instantiated by the User
        :type windows: bool
        :param windows: load misfit windows from the dataset, defaults to False
        :type adjsrcs: bool
        :param adjsrcs: load adjoint sources from the dataset, defaults to False
        """
        # Allows a ds to be provided outside the attribute
        if self.ds and ds is None:
            ds = self.ds
        else:
            raise TypeError("load requires a Dataset")

        # If no Config object in Manager, try to load from dataset
        if config:
            if path is None:
                raise TypeError("load requires valid 'path' argument")
            logger.info(f"loading config from dataset {path}")
            try:
                self.config = Config(ds=ds, path=path)
            except AttributeError:
                logger.warning(f"No Config object in dataset for path {path}")

        assert (self.config is not None), "Config object required for load"
        assert len(code.split('.')) == 2, "'code' must be in form 'NN.SSS'"
        if windows or adjsrcs:
            assert (path is not None), "'path' required to load auxiliary data"
            iter_, step = path.split("/")

        # Reset and populate using the dataset
        self.__init__(config=self.config, ds=ds, event=ds.events[0])
        net, sta = code.split('.')
        sta_tag = f"{net}.{sta}"
        if sta_tag in ds.waveforms.list():
            self.inv = ds.waveforms[sta_tag].StationXML
            self.st_syn = ds.waveforms[sta_tag][synthetic_tag
                                                or self.config.synthetic_tag]
            self.st_obs = ds.waveforms[sta_tag][observed_tag
                                                or self.config.observed_tag]
            if windows:
                self.windows = load_windows(ds, net, sta, iter_, step, False)
            if adjsrcs:
                self.adjsrcs = load_adjsrcs(ds, net, sta, iter_, step)
        else:
            logger.warning(f"no data for {sta_tag} found in dataset")

        self.check()
        return self
Example #16
0
    def fetch_obs_by_dir(self, code, **kwargs):
        """
        Fetch observation waveforms via directory structure on disk.

        .. note::
            Default waveform directory structure assumed to follow SEED
            convention. That is:
            path/to/data/{YEAR}/{NETWORK}/{STATION}/{CHANNEL}*/{FID}
            e.g. path/to/data/2017/NZ/OPRZ/HHZ.D/NZ.OPRZ.10.HHZ.D

        :type code: str
        :param code: Station code following SEED naming convention.
            This must be in the form NN.SSSS.LL.CCC (N=network, S=station,
            L=location, C=channel). Allows for wildcard naming. By default
            the pyatoa workflow wants three orthogonal components in the N/E/Z
            coordinate system. Example station code: NZ.OPRZ.10.HH?
        :rtype stream: obspy.core.stream.Stream or None
        :return stream: stream object containing relevant waveforms, else None

        Keyword Arguments
        ::
            str obs_dir_template:
                directory structure to search for observation data.
                Follows the SEED convention:
                'path/to/obs_data/{year}/{net}/{sta}/{cha}'
            str obs_fid_template:
                File naming template to search for observation data.
                Follows the SEED convention:
                '{net}.{sta}.{loc}.{cha}*{year}.{jday:0>3}'
        """
        obs_dir_template = kwargs.get("obs_dir_template",
                                      "{year}/{net}/{sta}/{cha}*")
        obs_fid_template = kwargs.get(
            "obs_fid_template", "{net}.{sta}.{loc}.{cha}*{year}.{jday:0>3}")

        if self.origintime is None:
            raise AttributeError("'origintime' must be specified")

        net, sta, loc, cha = code.split('.')
        # If waveforms contain midnight, multiple files need to be read
        jdays = overlapping_days(origin_time=self.origintime,
                                 start_pad=self.config.start_pad,
                                 end_pad=self.config.end_pad)

        # Ensure that the paths are a list so that iterating doesnt accidentally
        # try to iterate through a string.
        paths = self.config.paths["waveforms"]
        if not isinstance(paths, list):
            paths = [paths]

        for path_ in paths:
            if not os.path.exists(path_):
                continue
            full_path = os.path.join(path_, obs_dir_template, obs_fid_template)
            pathlist = []
            for jday in jdays:
                pathlist.append(
                    full_path.format(net=net,
                                     sta=sta,
                                     cha=cha,
                                     loc=loc,
                                     jday=jday,
                                     year=self.origintime.year))
            st = Stream()
            for fid in pathlist:
                logger.debug(f"searching for observations: {fid}")
                for filepath in glob.glob(fid):
                    st += read(filepath)
                    logger.info(f"retrieved observations locally:\n{filepath}")
            if len(st) > 0:
                # Take care of gaps in data by converting to masked data
                st.merge()
                st.trim(starttime=self.origintime - self.config.start_pad,
                        endtime=self.origintime + self.config.end_pad)
                # Check if trimming retains data
                if len(st) > 0:
                    return st
                else:
                    logger.warning(
                        "data does not fit origin time +/- pad time")
                    return None
        else:
            return None
Example #17
0
    def check(self):
        """
        (Re)check the stats of the workflow and data within the Manager.

        Rechecks conditions whenever called, incase something has gone awry
        mid-workflow. Stats should only be set by this function.
        """
        # Give dataset filename if available
        if self.stats.dataset_id is None and self.ds is not None:
            self.stats.dataset_id = os.path.basename(self.ds.filename)

        # Determine the resource identifier for the Event object
        if self.stats.event_id is None and self.event is not None:
            self.stats.event_id = self.event.resource_id.id

        # Get the network and station name from the Inventory object
        if self.stats.inv_name is None and self.inv is not None:
            self.stats.inv_name = ".".join(
                [self.inv[0].code, self.inv[0][0].code])

        # Check if waveforms are Stream objects, and if preprocessed
        if self.st_obs is not None:
            self.stats.len_obs = len(self.st_obs)
            self.stats.obs_processed = is_preprocessed(self.st_obs)
            if self.stats.len_obs > len(self.config.component_list):
                logger.warning("More observed traces than listed components, "
                               "this may need to be reviewed manually")

        if self.st_syn is not None:
            self.stats.len_syn = len(self.st_syn)
            self.stats.syn_processed = is_preprocessed(self.st_syn)
            if self.stats.len_syn > len(self.config.component_list):
                logger.warning("More synthetic traces than listed components, "
                               "this may need to be reviewed manually")

        # Check standardization by comparing waveforms against the first
        if not self.stats.standardized and self.st_obs and self.st_syn:
            for tr in self.st[1:]:
                for atr in ["sampling_rate", "npts", "starttime"]:
                    if getattr(tr.stats, atr) != getattr(
                            self.st[0].stats, atr):
                        break
                break
            else:
                self.stats.standardized = True

        # Check for half duration used for source-time-function with synthetics
        if not self.stats.half_dur and self.event is not None:
            try:
                mt = self.event.preferred_focal_mechanism().moment_tensor
                self.stats.half_dur = mt.source_time_function.duration / 2
            except AttributeError:
                pass

        # Count how many misfit windows are contained in the dataset
        if self.stats.nwin is None and self.windows is not None:
            self.stats.nwin = sum([len(_) for _ in self.windows.values()])

        # Determine the unscaled misfit
        if not self.stats.misfit and self.adjsrcs is not None:
            self.stats.misfit = sum([_.misfit for _ in self.adjsrcs.values()])
Example #18
0
    def fetch_event_by_dir(self,
                           event_id,
                           prefix="",
                           suffix="",
                           format_=None,
                           **kwargs):
        """
        Fetch event information via directory structure on disk. Developed to
        parse CMTSOLUTION and QUAKEML files, but theoretically accepts any 
        format that the ObsPy read_events() function will accept.

        Will search through all paths given until a matching source file found.

        .. note::
            This function will search for the following path
            /path/to/event_dir/{prefix}{event_id}{suffix}
            
            so, if e.g., searching for a CMTSOLUTION file in the current dir:
            ./CMTSOLUTION_{event_id}

            Wildcards are okay but the function will return the first match

        :type event_id: str
        :param event_id: Unique event identifier to search source file by.
            e.g., a New Zealand earthquake ID '2018p130600'. A prefix or suffix
            will be tacked onto this 
        :rtype event: obspy.core.event.Event or None
        :return event: event object if found, else None.
        :type prefix: str
        :param prefix Prefix to prepend to event id for file name searching.
            Wildcards are okay.
        :type suffix: str
        :param suffix: Suffix to append to event id for file name searching.
            Wildcards are okay.
        :type format_: str or NoneType
        :param format_: Expected format of the file to read, e.g., 'QUAKEML', 
            passed to ObsPy read_events. NoneType means read_events() will guess
        """
        # Ensure that the paths are a list so that iterating doesnt accidentally
        # try to iterate through a string.
        paths = self.config.paths["events"]
        if not isinstance(paths, list):
            paths = [paths]

        event = None
        for path_ in paths:
            if not os.path.exists(path_):
                continue
            # Search for available event files
            fid = os.path.join(path_, f"{prefix}{event_id}{suffix}")
            for filepath in glob.glob(fid):
                logger.debug(f"searching for event data: {filepath}")
                if os.path.exists(filepath):
                    try:
                        # Allow input of various types of source files
                        if "SOURCE" in prefix:
                            logger.info(
                                f"reading SPECFEM2D SOURCE: {filepath}")
                            cat = [read_specfem2d_source(filepath)]
                        elif "FORCESOLUTION" in prefix:
                            logger.info(f"reading FORCESOLUTION: {filepath}")
                            cat = [read_forcesolution(filepath)]
                        else:
                            logger.info(
                                f"reading source using ObsPy: {filepath}")
                            cat = read_events(filepath, format=format_)

                        if len(cat) != 1:
                            logger.warning(
                                f"{filepath} event file contains more than one "
                                "event, returning 1st entry")
                        event = cat[0]
                        break
                    except Exception as e:
                        logger.warning(f"{filepath} event file read error {e}")

        if event is not None:
            logger.info(f"retrieved local file:\n{filepath}")
        else:
            logger.info(f"no local event file found")

        return event