Ejemplo n.º 1
0
    def syn_waveform_fetch(self, code, **kwargs):
        """
        Mid-level internal fetching function for synthetic waveform data.

        .. note::
            Checks if synthetics are already saved into a ASDFDataSet first.
            If synthetics are freshly generated from Specfem3D, they should have
            been placed into folders separated by event id and model iteration.

        :type code: str
        :param code: Station code following SEED naming convention.
            This must be in the form NN.SSSS.LL.CCC (N=network, S=station,
            L=location, C=channel). Allows for wildcard naming. By default
            the pyatoa workflow wants three orthogonal components in the N/E/Z
            coordinate system. Example station code: NZ.OPRZ.10.HH?
        :rtype: obspy.core.stream.Stream or None
        :return: stream object containing relevant waveforms, or None if no data
            is found
        """
        if self.ds:
            try:
                logger.info("searching ASDFDataSet for synthetics")
                return self.asdf_waveform_fetch(code,
                                                tag=self.config.synthetic_tag)
            except KeyError:
                pass
        logger.info("searching local filesystem for synthetics")
        return self.fetch_syn_by_dir(code, **kwargs)
Ejemplo n.º 2
0
    def gather_observed(self, code, **kwargs):
        """
        Gather observed waveforms as ObsPy streams.
        Check disk, else query webservice. Save to ASDFDataSet if requested.

        :type code: str
        :param code: Station code following SEED naming convention.
            This must be in the form NN.SSSS.LL.CCC (N=network, S=station,
            L=location, C=channel). Allows for wildcard naming. By default
            the pyatoa workflow wants three orthogonal components in the N/E/Z
            coordinate system. Example station code: NZ.OPRZ.10.HH?
        :rtype: obspy.core.stream.Stream
        :return: stream object containing relevant waveforms
        """
        logger.info("gathering observed waveforms")
        st_obs = self.obs_waveform_fetch(code, **kwargs)
        if st_obs is None:
            st_obs = self.obs_waveform_get(code)
            if st_obs is None:
                raise GathererNoDataException(
                    f"no observed waveforms for {code} found")
        logger.info("matching observed waveforms found")
        self._save_waveforms_to_dataset(st_obs, self.config.observed_tag)

        return st_obs
Ejemplo n.º 3
0
def taper_time_offset(st, taper_percentage=0.05, time_offset_sec=0):
    """
    Taper the leading edge of the waveform. If a time offset is given,
    e.g. 20s before the event origin time (T_0), taper all the way up from
    T=0 to T=T_0, to ensure that there are no impulse-like signals prior to the
    event origin.

    :type st: obspy.core.stream.Stream
    :param st: Stream object to be tapered
    :type taper_percentage: float
    :param taper_percentage: default taper percentage
    :type time_offset_sec: float
    :param time_offset_sec: Any time offset between the start of the stream to
        the event origin time. All time between these two points will be tapered
        to reduce any signals prior to the event origin.
    :rtype: obspy.core.stream.Stream
    :return: tapered Stream object
    """
    taper_amount = st[0].stats.npts * taper_percentage * st[0].stats.delta

    if taper_amount > abs(time_offset_sec):
        logger.warning("taper amount exceeds time offset, taper may affect "
                       "data if source receiver distance is short")
    elif taper_amount < abs(time_offset_sec):
        logger.info(f"adjusting taper to cover time offset {time_offset_sec}")
        taper_percentage = (abs(time_offset_sec) / st[0].stats.npts *
                            st[0].stats.delta)

    # Get rid of extra long period signals which may adversely affect processing
    st.detrend("simple").taper(taper_percentage, side="left")

    return st
Ejemplo n.º 4
0
    def obs_waveform_fetch(self, code, **kwargs):
        """
        Mid-level internal fetching function for observation waveform data.

        :type code: str
        :param code: Station code following SEED naming convention.
            This must be in the form NN.SSSS.LL.CCC (N=network, S=station,
            L=location, C=channel). Allows for wildcard naming. By default
            the pyatoa workflow wants three orthogonal components in the N/E/Z
            coordinate system. Example station code: NZ.OPRZ.10.HH?
        :rtype: obspy.core.stream.Stream or None
        :return: stream object containing relevant waveforms, or None
        """
        if self.ds:
            try:
                # Search the given ASDFDataSet first
                logger.info("searching ASDFDataSet for observations")
                return self.asdf_waveform_fetch(code,
                                                tag=self.config.observed_tag)
            except KeyError:
                pass
        logger.info("searching local filesystem for observations")
        if self.config.synthetics_only:
            return self.fetch_syn_by_dir(code,
                                         syn_cfgpath="waveforms",
                                         **kwargs)
        else:
            return self.fetch_obs_by_dir(code, **kwargs)
Ejemplo n.º 5
0
def reject_on_global_amplitude_ratio(data, windows, ratio=0.2):
    """
    Reject windows where peak amplitude falls below some threshold value. 

    This was created in order to suppress windows containing long period direct 
    arrivals, which were creating high-frequency adjoint sources.

    :type data: np.ndarray
    :param data: data array to query amplitude values from
    :type windows: list of pyflex.window.Window
    :param windows: list of window objects to check
    :type ratio: float
    :param ratio: percentage threshold of the peak value within a given window
        and the global peak value in the data array. Defaults to 0.2
    :rtype: tuple of lists of pyflex.window.Window
    :return: lists of accepted and rejected windows
    """
    accepted_windows, rejected_windows = [], []
    for win in windows:
        waveform_peak = abs_max(data)
        window_peak = abs_max(data[win.left:win.right])
        # Check the waveform amplitudes
        if abs(window_peak / waveform_peak) > ratio:
            accepted_windows.append(win)
        else:
            rejected_windows.append(win)
            
    logger.info("rejection based on global amplitude ratio removed "
                f"{len(rejected_windows)} windows"
                )

    return accepted_windows, rejected_windows
Ejemplo n.º 6
0
def get_geonet_mt(event_id, csv_fid=None):
    """
    Get moment tensor information from a internal csv file,
    or from an external github repository query.
    Only relevant to the new zealand tomography problem.
    Geonet moment tensors stored with a specific column format.

    :type event_id: str
    :param event_id: unique event identifier
    :type csv_fid: str
    :param csv_fid: optional path to GeoNet CMT solution file that is stored 
        locally on disk, will be accessed before querying web service
    :rtype moment_tensor: dict
    :return moment_tensor: dictionary created from rows of csv file
    """
    reader = None
    if csv_fid is not None:
        try:
            reader = csv.reader(open(csv_fid, 'r'), delimiter=',')
        except FileNotFoundError:
            pass

    if reader is None:
        # Request and open the CSV file. Assumed that GeoNet will keep their
        # moment-tensor information in their GitHub repository
        # Last accessed 23.6.19
        geonet_mt_csv = (
            "https://raw.githubusercontent.com/GeoNet/data/master/"
            "moment-tensor/GeoNet_CMT_solutions.csv")
        response = requests.get(geonet_mt_csv)
        if not response.ok:
            raise FileNotFoundError(f"Response from {geonet_mt_csv} not ok")

        reader = csv.reader(response.text.splitlines(), delimiter=',')

    # Parse the CSV file
    for i, row in enumerate(reader):
        # First row contains header information
        if i == 0:
            tags = row
        # First column gives event ids
        if row[0] == event_id:
            values = []
            # Grab the relevant information from the file
            for t, v in zip(tags, row):
                if t == "Date":
                    values.append(UTCDateTime(v))
                elif t == "PublicID":
                    values.append(v)
                else:
                    values.append(float(v))

            moment_tensor = dict(zip(tags, values))
            logger.info(f"geonet moment tensor found for: {event_id}")
            return moment_tensor
    else:
        raise AttributeError(f"no geonet moment tensor found for: {event_id}")
Ejemplo n.º 7
0
    def gather_obs_multithread(self,
                               codes,
                               max_workers=None,
                               print_exception=False,
                               **kwargs):
        """
        A multithreaded function that fetches all observed data (waveforms and
        StationXMLs) for a given event and store it to an ASDFDataSet.
        Multithreading is used to provide significant speed up for these request
        based tasks.

        :type codes: list of str
        :param codes: A list of station codes where station codes must be in the
            form NN.SSSS.LL.CCC (N=network, S=station, L=location, C=channel)
        :type max_workers: int
        :param max_workers: number of concurrent threads to use, passed to the
            ThreadPoolExecutor. If left as None, conurrent futures will
            automatically choose the system's number of cores.

        Keyword Arguments
        ::
            int return_count:
                if not None, determines how many data items must be collected
                for the station to be saved into the ASDFDataSet.
                e.g. StationXML and 3 component waveforms would equal 4 pieces
                of data, so a return_count == 4 means stations that do not
                return all components and metadata will not be saved to the
                dataset.
        """
        from concurrent.futures import ThreadPoolExecutor, as_completed

        logger.info("mass gathering observation data")

        assert(self.ds is not None), \
            "Mass gathering requires a dataset `ds` for data storage"
        assert(self.Client is not None), \
            "Mass gathering requires a Client for data queries"
        assert(self.origintime is not None), \
            "Mass gathering requires an origintime for data queries"

        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            futures = {
                executor.submit(self._obs_get_multithread, code, **kwargs):
                code
                for code in codes
            }
            for future in as_completed(futures):
                code = futures[future]
                try:
                    status = future.result()
                except Exception as e:
                    print(f"{code} exception: {e}\n")
                    if print_exception:
                        traceback.print_exc()
                else:
                    print(f"{code} data count: {status}")
Ejemplo n.º 8
0
    def retrieve_windows(self, iteration, step_count, return_previous):
        """
        Mid-level window selection function that retrieves windows from a 
        PyASDF Dataset, recalculates window criteria, and attaches window 
        information to Manager. No access to rejected window information.

        :type iteration: int or str
        :param iteration: retrieve windows from the given iteration
        :type step_count: int or str
        :param step_count: retrieve windows from the given step count
            in the given dataset
        :type return_previous: bool
        :param return_previous: if True: return windows from the previous
            step count in relation to the given iteration/step_count.
            if False: return windows from the given iteration/step_count
        """
        logger.info(f"retrieving windows from dataset")

        net, sta, _, _ = self.st_obs[0].get_id().split(".")
        # Function will return empty dictionary if no acceptable windows found
        windows = load_windows(ds=self.ds,
                               net=net,
                               sta=sta,
                               iteration=iteration,
                               step_count=step_count,
                               return_previous=return_previous)

        # Recalculate window criteria for new values for cc, tshift, dlnA etc...
        logger.debug("recalculating window criteria")
        for comp, windows_ in windows.items():
            try:
                d = self.st_obs.select(component=comp)[0].data
                s = self.st_syn.select(component=comp)[0].data
                for w, win in enumerate(windows_):
                    # Post the old and new values to the logger for sanity check
                    logger.debug(f"{comp}{w}_old - "
                                 f"cc:{win.max_cc_value:.2f} / "
                                 f"dt:{win.cc_shift:.1f} / "
                                 f"dlnA:{win.dlnA:.2f}")
                    win._calc_criteria(d, s)
                    logger.debug(f"{comp}{w}_new - "
                                 f"cc:{win.max_cc_value:.2f} / "
                                 f"dt:{win.cc_shift:.1f} / "
                                 f"dlnA:{win.dlnA:.2f}")
            # IndexError thrown when trying to access an empty Stream
            except IndexError:
                continue

        self.windows = windows
        self.stats.nwin = sum(len(_) for _ in self.windows.values())
Ejemplo n.º 9
0
    def event_fetch(self, event_id, **kwargs):
        """
        Mid-level internal fetching function for event information.
        Search ASDFDataSet for corresponding evemt, else look on disk.

        :type event_id: str
        :param event_id: Unique event identifier to search source file by.
            e.g., a New Zealand earthquake ID '2018p130600'. A prefix or suffix
            will be tacked onto this 
        :rtype event: obspy.core.event.Event or None
        :return event: event object if found, else None.
        """
        if self.ds:
            try:
                logger.info("searching ASDFDataSet for event info")
                return self.asdf_event_fetch()
            except (IndexError, AttributeError):
                pass
        logger.info("searching local filesystem for event info")
        return self.fetch_event_by_dir(event_id, **kwargs)
Ejemplo n.º 10
0
    def _save_waveforms_to_dataset(self, st, tag):
        """
        Save waveformsm to the ASDFDataSet with a simple check for existence
        of dataset and save parameter. Passes if waveforms already exist while
        ignoring the PyASDF warning that gets thrown if waveforms exist.

        :type st: obspy.core.stream.Stream
        :param st: Stream object to be saved into the dataset
        :type tag: str
        :param tag: unique identifier to save the waveforms under
        """
        if (self.ds is not None) and self.config.save_to_ds:
            # Catch ASDFWarning that occurs when data already exists
            with warnings.catch_warnings():
                warnings.filterwarnings("error")
                try:
                    self.ds.add_waveforms(waveform=st, tag=tag)
                    logger.info(f"saved to ASDFDataSet with tag '{tag}'")
                except ASDFWarning:
                    pass
Ejemplo n.º 11
0
def append_focal_mechanism(event, client=None, overwrite=False):
    """
    Attempt to find focal mechanism information with a given Event object.

    .. note::
        FDSN fetched events are devoid of a few bits of information that are
        useful for our applications, e.g. moment tensor, focal mechanisms.
        This function will perform the conversions and append the necessary
        information to the event located in the dataset.

    :type event: obspy.core.event.Event
    :param event: Event object to append a focal mechanism to.
    :type overwrite: bool
    :param overwrite: If the event already has a focal mechanism, this will
        overwrite that focal mechanism
    :raises TypeError: if event is not provided as an obspy.core.event.Event
    """
    if isinstance(event, Event):
        event_id = format_event_name(event)

        # If the event already has a focal mechanism attribute, don't gather
        if hasattr(event, 'focal_mechanisms') and \
                event.focal_mechanisms and not overwrite:
            return event
        if client and client.upper() == "GEONET":
            # Query GeoNet moment tensor catalog if using GeoNet catalog
            from pyatoa.plugins.new_zealand.gather import geonet_mt
            event, _ = geonet_mt(event_id=event_id, event=event, units="nm")
            logger.info("GeoNet moment tensor appended to Event")
        else:
            try:
                # Try to query GCMT web-based catalog for matching event
                event = get_gcmt_moment_tensor(
                    origintime=event.preferred_origin().time,
                    magnitude=event.preferred_magnitude().mag)
            except FileNotFoundError:
                logger.info("no GCMT moment tensor for event found")
    else:
        raise TypeError("'event' must be an ObsPy Event object")

    return event
Ejemplo n.º 12
0
    def gather_station(self, code, **kwargs):
        """
        Gather station dataless information. Check disk then query webservices.
        Save station information to ASDFDataSet if requested.

        :type code: str
        :param code: Station code following SEED naming convention.
            This must be in the form NN.SSSS.LL.CCC (N=network, S=station,
            L=location, C=channel). Allows for wildcard naming. By default
            the pyatoa workflow wants three orthogonal components in the N/E/Z
            coordinate system. Example station code: NZ.OPRZ.10.HH?
        :rtype: obspy.core.inventory.Inventory
        :return: inventory containing relevant network and stations
        """
        logger.info("gathering StationXML")
        inv = self.station_fetch(code, **kwargs)
        if inv is None:
            inv = self.station_get(code, **kwargs)
            if inv is None:
                raise GathererNoDataException(
                    f"no StationXML for {code} found")
        logger.info("matching StationXML found")
        if (self.ds is not None) and self.config.save_to_ds:
            # !!! This is a temp fix for PyASDF 0.6.1 where re-adding StationXML
            # !!! that contains comments throws a TypeError. Issue #59
            try:
                self.ds.add_stationxml(inv)
                logger.info("saved to ASDFDataSet")
            except TypeError:
                pass

        return inv
Ejemplo n.º 13
0
    def station_fetch(self, code, **kwargs):
        """
        Mid-level internal fetching function for station dataless information.
        Search ASDFDataSet for corresponding dataless, else look on disk.

        :type code: str
        :param code: Station code following SEED naming convention.
            This must be in the form NN.SSSS.LL.CCC (N=network, S=station,
            L=location, C=channel). Allows for wildcard naming. By default
            the pyatoa workflow wants three orthogonal components in the N/E/Z
            coordinate system. Example station code: NZ.OPRZ.10.HH?
        :rtype: obspy.core.inventory.Inventory or None
        :return: inventory containing relevant network and stations, or None
            if no data is found
        """
        if self.ds:
            try:
                logger.info("searching ASDFDataSet for station info")
                return self.asdf_station_fetch(code)
            except (KeyError, AttributeError):
                pass
        logger.info("searching local filesystem for station info")
        return self.fetch_resp_by_dir(code, **kwargs)
Ejemplo n.º 14
0
    def gather_synthetic(self, code, **kwargs):
        """
        Gather synthetic waveforms as ObsPy streams.
        Only possible to check ASDFDataSet and local filesystem.

        :type code: str
        :param code: Station code following SEED naming convention.
            This must be in the form NN.SSSS.LL.CCC (N=network, S=station,
            L=location, C=channel). Allows for wildcard naming. By default
            the pyatoa workflow wants three orthogonal components in the N/E/Z
            coordinate system. Example station code: NZ.OPRZ.10.HH?
        :rtype: obspy.core.stream.Stream
        :return: stream object containing relevant waveforms
        :raises GathererNoDataException: if no synthetic data is found
        """
        logger.info("gathering synthetic waveforms")
        st_syn = self.syn_waveform_fetch(code, **kwargs)
        if st_syn is None:
            raise GathererNoDataException(f"no synthetic waveforms found "
                                          f"for {code}")
        logger.info("matching synthetic waveforms found")
        self._save_waveforms_to_dataset(st_syn, self.config.synthetic_tag)

        return st_syn
Ejemplo n.º 15
0
    def window(self,
               fix_windows=False,
               iteration=None,
               step_count=None,
               force=False,
               save=True):
        """
        Evaluate misfit windows using Pyflex. Save windows to ASDFDataSet.
        Allows previously defined windows to be retrieved from ASDFDataSet.

        .. note::
            * Windows are stored as dictionaries of pyflex.Window objects.
            * All windows are saved into the ASDFDataSet, even if retrieved.
            * STA/LTA information is collected and stored internally.

        :type fix_windows: bool
        :param fix_windows: do not pick new windows, but load windows from the
            given dataset from 'iteration' and 'step_count'
        :type iteration: int or str
        :param iteration: if 'fix_windows' is True, look for windows in this
            iteration. If None, will check the latest iteration/step_count
            in the given dataset
        :type step_count: int or str
        :param step_count: if 'fix_windows' is True, look for windows in this
            step_count. If None, will check the latest iteration/step_count
            in the given dataset
        :type force: bool
        :param force: ignore flag checks and run function, useful if e.g.
            external preprocessing is used that doesn't meet flag criteria
        :type save: bool
        :param save: save the gathered windows to an ASDF Dataset
        """
        # Pre-check to see if data has already been standardized
        self.check()

        if self.config.pyflex_preset is None:
            logger.info("pyflex preset is set to 'None', will not window")
            return

        if not self.stats.standardized and not force:
            raise ManagerError("cannot window, waveforms not standardized")

        # Determine how to treat fixed windows
        if fix_windows and not self.ds:
            logger.warning("cannot fix window, no dataset")
            fix_windows = False
        elif fix_windows and (iteration is None or step_count is None):
            # If no iteration/step_count values are given, automatically search
            # the previous step_count for windows in relation to the current
            # iteration/step_count
            iteration = self.config.iteration
            step_count = self.config.step_count
            return_previous = True
        else:
            # If fix windows and iteration/step_count are given, search the
            # dataset for windows under the current iteration/step_count
            return_previous = False

        # Synthetic STA/LTA as Pyflex WindowSelector.calculate_preliminaries()
        for comp in self.config.component_list:
            try:
                self.staltas[comp] = pyflex.stalta.sta_lta(
                    data=envelope(self.st_syn.select(component=comp)[0].data),
                    dt=self.st_syn.select(component=comp)[0].stats.delta,
                    min_period=self.config.min_period)
            except IndexError:
                continue

        # Find misfit windows, from a dataset or through window selection
        if fix_windows:
            self.retrieve_windows(iteration, step_count, return_previous)
        else:
            self.select_windows_plus()

        if save:
            self.save_windows()
        logger.info(f"{self.stats.nwin} window(s) total found")

        return self
Ejemplo n.º 16
0
    def preprocess(self, which="both", overwrite=None, **kwargs):
        """
        Preprocess observed and synthetic waveforms in place.
        Default preprocessing tasks: Remove response (observed), rotate, filter,
        convolve with source time function (synthetic).

        .. note::
            Default preprocessing can be overwritten using a
            user-defined function that takes Manager and choice as inputs
            and outputs an ObsPy Stream object.

        .. note::
            Documented kwargs only apply to default preprocessing.

        :type which: str
        :param which: "obs", "syn" or "both" to choose which stream to process
            defaults to both
        :type overwrite: function
        :param overwrite: If a function is provided, it will overwrite the 
            standard preprocessing function. All arguments that are given
            to the standard preprocessing function will be passed as kwargs to
            the new function. This allows for customized preprocessing

        Keyword Arguments
        ::
            int water_level:
                water level for response removal
            float taper_percentage:
                amount to taper ends of waveform
            bool remove_response:
                remove instrument response using the Manager's inventory object.
                Defaults to True
            bool apply_filter:
                filter the waveforms using the Config's min_period and
                max_period parameters. Defaults to True
            bool convolve_with_stf:
                Convolve synthetic data with a Gaussian source time function if
                a half duration is provided.
        """
        if not self.inv and not self.config.synthetics_only:
            raise ManagerError("cannot preprocess, no inventory")
        if overwrite:
            assert (hasattr(overwrite,
                            '__call__')), "overwrite must be function"
            preproc_fx = overwrite
        else:
            preproc_fx = default_process

        # If required, will rotate based on source receiver lat/lon values
        if self.config.rotate_to_rtz:
            if not self.inv:
                logger.warning("cannot rotate components, no inventory")
            else:
                self.gcd, self.baz = gcd_and_baz(event=self.event,
                                                 sta=self.inv[0][0])

        # Preprocess observation waveforms
        if self.st_obs is not None and not self.stats.obs_processed and \
                which.lower() in ["obs", "both"]:
            logger.info("preprocessing observation data")
            self.st_obs = preproc_fx(self, choice="obs", **kwargs)
            self.stats.obs_processed = True

        # Preprocess synthetic waveforms
        if self.st_syn is not None and not self.stats.syn_processed and \
                which.lower() in ["syn", "both"]:
            logger.info("preprocessing synthetic data")
            self.st_syn = preproc_fx(self, choice="syn", **kwargs)
            self.stats.syn_processed = True

        # Set stats
        self.stats.len_obs = len(self.st_obs)
        self.stats.len_syn = len(self.st_syn)

        return self
Ejemplo n.º 17
0
    def standardize(self, force=False, standardize_to="syn"):
        """
        Standardize the observed and synthetic traces in place. 
        Ensures Streams have the same starttime, endtime, sampling rate, npts.

        :type force: bool
        :param force: allow the User to force the function to run even if checks
            say that the two Streams are already standardized
        :type standardize_to: str
        :param standardize_to: allows User to set which Stream conforms to which
            by default the Observed traces should conform to the Synthetic ones
            because exports to Specfem should be controlled by the Synthetic
            sampling rate, npts, etc.
        """
        self.check()
        if not self.stats.len_obs or not self.stats.len_syn:
            raise ManagerError("cannot standardize, not enough waveform data")
        elif self.stats.standardized and not force:
            logger.info("data already standardized")
            return self
        logger.info("standardizing streams")

        # If observations starttime after synthetic, zero pad the front of obs
        dt_st = self.st_obs[0].stats.starttime - self.st_syn[0].stats.starttime
        if dt_st > 0:
            self.st_obs = zero_pad(self.st_obs,
                                   dt_st,
                                   before=True,
                                   after=False)

        # Match sampling rates
        if standardize_to == "syn":
            self.st_obs.resample(self.st_syn[0].stats.sampling_rate)
        else:
            self.st_syn.resample(self.st_obs[0].stats.sampling_rate)

        # Match start and endtimes
        self.st_obs, self.st_syn = trim_streams(st_a=self.st_obs,
                                                st_b=self.st_syn,
                                                force={
                                                    "obs": "a",
                                                    "syn": "b"
                                                }[standardize_to])

        # Match the number of samples
        self.st_obs, self.st_syn = match_npts(st_a=self.st_obs,
                                              st_b=self.st_syn,
                                              force={
                                                  "obs": "a",
                                                  "syn": "b"
                                              }[standardize_to])

        # Determine if synthetics start before the origintime
        if self.event is not None:
            self.stats.time_offset_sec = (self.st_syn[0].stats.starttime -
                                          self.event.preferred_origin().time)
            logger.debug(f"time offset is {self.stats.time_offset_sec}s")
        else:
            self.stats.time_offset_sec = 0

        self.stats.standardized = True

        return self
Ejemplo n.º 18
0
    def gather(self, code=None, choice=None, event_id=None, **kwargs):
        """
        Gather station dataless and waveform data using the Gatherer class.
        In order collect observed waveforms, dataless, and finally synthetics.

        For valid kwargs see methods in :doc:`core.gatherer`

        :type code: str
        :param code: Station code following SEED naming convention.
            This must be in the form NN.SSSS.LL.CCC (N=network, S=station,
            L=location, C=channel). Allows for wildcard naming. By default
            the pyatoa workflow wants three orthogonal components in the N/E/Z
            coordinate system. Example station code: NZ.OPRZ.10.HH?
        :type choice: list
        :param choice: allows user to gather individual bits of data, rather
            than gathering all. Allowed: 'inv', 'st_obs', 'st_syn'
        :raises ManagerError: if any part of the gathering fails.

        Keyword Arguments
        ::
            bool try_fm:
                Try to retrieve and append focal mechanism information to the
                Event object.
            str prefix:
                Prefix for event id when searching for event information,
                can be used to search ordered files e.g., CMTSOLUTION_001
            str suffix:
                Suffix for event id when searching for event information
            str station_level:
                The level of the station metadata if retrieved using the ObsPy
                Client. Defaults to 'response'
            str resp_dir_template:
                Directory structure template to search for response files.
                By default follows the SEED convention:
                'path/to/RESPONSE/{sta}.{net}/'
            str resp_fid_template:
                Response file naming template to search for station dataless.
                By default, follows the SEED convention
                'RESP.{net}.{sta}.{loc}.{cha}'
            str obs_dir_template:
                directory structure to search for observation data. Follows the
                SEED convention: 'path/to/obs_data/{year}/{net}/{sta}/{cha}'
            str obs_fid_template:
                File naming template to search for observation data. Follows the
                SEED convention: '{net}.{sta}.{loc}.{cha}*{year}.{jday:0>3}'
            str syn_cfgpath:
                Config.cfgpaths key to search for synthetic data. Defaults to
                'synthetics', but for the may need to be set to 'waveforms' in
                certain use-cases, e.g. synthetics-synthetic inversions.
            str syn_unit:
                Optional argument to specify the letter used to identify the
                units of the synthetic data: For Specfem3D: ["d", "v", "a", "?"]
                'd' for displacement, 'v' for velocity,  'a' for acceleration.
                Wildcards okay. Defaults to '?'
            str syn_dir_template:
                Directory structure template to search for synthetic waveforms.
                Defaults to empty string
            str syn_fid_template:
                The naming template of synthetic waveforms defaults to:
                "{net}.{sta}.*{cmp}.sem{syn_unit}"
        """
        # Default to gathering all data
        if choice is None:
            choice = ["event", "inv", "st_obs", "st_syn"]
        try:
            # Attempt to gather event information before waveforms/metadata
            if "event" in choice and self.event is None:
                if event_id is None:
                    event_id = self.config.event_id
                self.event = self.gatherer.gather_event(event_id, **kwargs)
            if code is not None:
                logger.info(f"gathering data for {code}")
                if "st_obs" in choice:
                    # Ensure observed waveforms gathered before synthetics and
                    # metadata. If this fails, no point to gathering the rest
                    self.st_obs = self.gatherer.gather_observed(code, **kwargs)
                if "inv" in choice:
                    self.inv = self.gatherer.gather_station(code, **kwargs)
                if "st_syn" in choice:
                    self.st_syn = self.gatherer.gather_synthetic(
                        code, **kwargs)

            return self
        except GathererNoDataException as e:
            # Catch the Gatherer exception and redirect as ManagerError
            # so that it can be caught by flow()
            raise ManagerError("Data Gatherer could not find some data") from e
        except Exception as e:
            # Gathering should be robust, but if something slips through, dont
            # let it kill a workflow, display and raise ManagerError
            logger.warning(e, exc_info=True)
            raise ManagerError("Uncontrolled error in data gathering") from e
Ejemplo n.º 19
0
    def load(self,
             code,
             path=None,
             ds=None,
             synthetic_tag=None,
             observed_tag=None,
             config=True,
             windows=False,
             adjsrcs=False):
        """
        Populate the manager using a previously populated ASDFDataSet.
        Useful for re-instantiating an existing workflow that has already 
        gathered data and saved it to an ASDFDataSet.

        .. warning::
            Loading any floating point values may result in rounding errors.
            Be careful to round off floating points to the correct place before
            using in future work.

        :type code: str
        :param code: SEED conv. code, e.g. NZ.BFZ.10.HHZ
        :type path: str
        :param path: if no Config object is given during init, the User
            can specify the config path here to load data from the dataset.
            This skips the need to initiate a separate Config object.
        :type ds: None or pyasdf.asdf_data_set.ASDFDataSet
        :param ds: dataset can be given to load from, will not set the ds
        :type synthetic_tag: str
        :param synthetic_tag: waveform tag of the synthetic data in the dataset
            e.g. 'synthetic_m00s00'. If None given, will use `config` attribute.
        :type observed_tag: str
        :param observed_tag: waveform tag of the observed data in the dataset
            e.g. 'observed'. If None given, will use `config` attribute.
        :type config: bool
        :param config: load config from the dataset, defaults to True but
            can be set False if Config should be instantiated by the User
        :type windows: bool
        :param windows: load misfit windows from the dataset, defaults to False
        :type adjsrcs: bool
        :param adjsrcs: load adjoint sources from the dataset, defaults to False
        """
        # Allows a ds to be provided outside the attribute
        if self.ds and ds is None:
            ds = self.ds
        else:
            raise TypeError("load requires a Dataset")

        # If no Config object in Manager, try to load from dataset
        if config:
            if path is None:
                raise TypeError("load requires valid 'path' argument")
            logger.info(f"loading config from dataset {path}")
            try:
                self.config = Config(ds=ds, path=path)
            except AttributeError:
                logger.warning(f"No Config object in dataset for path {path}")

        assert (self.config is not None), "Config object required for load"
        assert len(code.split('.')) == 2, "'code' must be in form 'NN.SSS'"
        if windows or adjsrcs:
            assert (path is not None), "'path' required to load auxiliary data"
            iter_, step = path.split("/")

        # Reset and populate using the dataset
        self.__init__(config=self.config, ds=ds, event=ds.events[0])
        net, sta = code.split('.')
        sta_tag = f"{net}.{sta}"
        if sta_tag in ds.waveforms.list():
            self.inv = ds.waveforms[sta_tag].StationXML
            self.st_syn = ds.waveforms[sta_tag][synthetic_tag
                                                or self.config.synthetic_tag]
            self.st_obs = ds.waveforms[sta_tag][observed_tag
                                                or self.config.observed_tag]
            if windows:
                self.windows = load_windows(ds, net, sta, iter_, step, False)
            if adjsrcs:
                self.adjsrcs = load_adjsrcs(ds, net, sta, iter_, step)
        else:
            logger.warning(f"no data for {sta_tag} found in dataset")

        self.check()
        return self
Ejemplo n.º 20
0
    def select_windows_plus(self):
        """
        Mid-level custom window selection function that calls Pyflex select 
        windows, but includes additional window suppression functionality.
        Includes custom Pyflex addition of outputting rejected windows, which
        will be used internally for plotting.

        .. note::
            Pyflex will throw a ValueError if the arrival of the P-wave
            is too close to the initial portion of the waveform, considered the
            'noise' section. This happens for short source-receiver distances
            (< 100km).

            This error becomes a PyflexError if no event/station attributes
            are provided to the WindowSelector

            We could potentially deal with this by zero-padding the
            waveforms, and running select_windows() again, but for now we just
            raise a ManagerError and allow processing to continue
        """
        logger.info(f"running Pyflex w/ map: {self.config.pyflex_preset}")

        nwin, window_dict, reject_dict = 0, {}, {}
        for comp in self.config.component_list:
            try:
                obs = self.st_obs.select(component=comp)[0]
                syn = self.st_syn.select(component=comp)[0]
            # IndexError thrown when trying to access an empty Stream
            except IndexError:
                continue

            # Pyflex throws a TauP warning from ObsPy #2280, ignore that
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", UserWarning)
                ws = pyflex.WindowSelector(observed=obs,
                                           synthetic=syn,
                                           config=self.config.pyflex_config,
                                           event=self.event,
                                           station=self.inv)
                try:
                    windows = ws.select_windows()
                except (IndexError, pyflex.PyflexError):
                    # see docstring note for why this error is to be addressed
                    raise ManagerError("Cannot window, most likely because "
                                       "the source-receiver distance is too "
                                       "small w.r.t the minimum period")

            # Suppress windows that contain low-amplitude signals
            if self.config.win_amp_ratio > 0:
                windows, ws.rejects["amplitude"] = \
                       reject_on_global_amplitude_ratio(
                                            data=obs.data, windows=windows,
                                            ratio=self.config.win_amp_ratio
                                            )
            # ==================================================================
            # NOTE: Additional windowing criteria may be added here if necessary
            # ==================================================================
            if windows:
                window_dict[comp] = windows
            if ws.rejects:
                reject_dict[comp] = ws.rejects

            # Count windows and tell User
            logger.info(f"{len(windows)} window(s) selected for comp {comp}")
            nwin += len(windows)

        self.windows = window_dict
        self.rejwins = reject_dict
        self.stats.nwin = nwin
Ejemplo n.º 21
0
def get_gcmt_moment_tensor(origintime,
                           magnitude,
                           time_wiggle_sec=120,
                           magnitude_wiggle=0.5):
    """
    Query GCMT moment tensor catalog for moment tensor components

    :type origintime: UTCDateTime or str
    :param origintime: event origin time
    :type magnitude: float
    :param magnitude: centroid moment magnitude for event lookup
    :type time_wiggle_sec: int
    :param time_wiggle_sec: padding on catalog filtering criteria realted to
        event origin time
    :type magnitude_wiggle: float
    :param magnitude_wiggle: padding on catalog filter for magnitude
    :rtype: obspy.core.event.Event
    :return: event object for given earthquake
    """
    from urllib.error import HTTPError
    from obspy import UTCDateTime, read_events

    if not isinstance(origintime, UTCDateTime):
        #  GCMT
        origintime = UTCDateTime(origintime)

    # Determine filename using datetime properties
    month = origintime.strftime('%b').lower()  # e.g. 'jul'
    year_short = origintime.strftime('%y')  # e.g. '19'
    year_long = origintime.strftime('%Y')  # e.g. '2019'

    fid = f"{month}{year_short}.ndk"
    logger.info("querying GCMT database for moment tensor")
    try:
        cat = read_events("https://www.ldeo.columbia.edu/~gcmt/projects/CMT/"
                          f"catalog/NEW_MONTHLY/{year_long}/{fid}")
    except HTTPError:
        cat = read_events("http://www.ldeo.columbia.edu/~gcmt/projects/CMT/"
                          "catalog/NEW_QUICK/qcmt.ndk")

    # GCMT catalogs contain all events for a span of time
    # filter catalogs using ObsPy to find events with our specifications.
    # Magnitudes and origintimes are not always in agreement between agencies
    # So allow for some wiggle room
    cat_filt = cat.filter(
        f"time > {str(origintime - time_wiggle_sec)}",
        f"time < {str(origintime + time_wiggle_sec)}",
        f"magnitude >= {magnitude - magnitude_wiggle}",
        f"magnitude <= {magnitude + magnitude_wiggle}",
    )
    # Filtering may remove all events from catalog, return multiple events, or
    # may return the event of choice
    if not len(cat_filt):
        logger.info(f"no GCMT event found for {origintime} and M{magnitude}")
        raise FileNotFoundError("No events found")
    elif len(cat_filt) > 1:
        logger.info(f"multiple events found for {origintime} and M{magnitude}")
        print(f"{len(cat_filt)} events found, choosing first")
        return cat_filt[0]
    else:
        logger.info("GCMT event found matching criteria")
        return cat_filt[0]
Ejemplo n.º 22
0
    def fetch_obs_by_dir(self, code, **kwargs):
        """
        Fetch observation waveforms via directory structure on disk.

        .. note::
            Default waveform directory structure assumed to follow SEED
            convention. That is:
            path/to/data/{YEAR}/{NETWORK}/{STATION}/{CHANNEL}*/{FID}
            e.g. path/to/data/2017/NZ/OPRZ/HHZ.D/NZ.OPRZ.10.HHZ.D

        :type code: str
        :param code: Station code following SEED naming convention.
            This must be in the form NN.SSSS.LL.CCC (N=network, S=station,
            L=location, C=channel). Allows for wildcard naming. By default
            the pyatoa workflow wants three orthogonal components in the N/E/Z
            coordinate system. Example station code: NZ.OPRZ.10.HH?
        :rtype stream: obspy.core.stream.Stream or None
        :return stream: stream object containing relevant waveforms, else None

        Keyword Arguments
        ::
            str obs_dir_template:
                directory structure to search for observation data.
                Follows the SEED convention:
                'path/to/obs_data/{year}/{net}/{sta}/{cha}'
            str obs_fid_template:
                File naming template to search for observation data.
                Follows the SEED convention:
                '{net}.{sta}.{loc}.{cha}*{year}.{jday:0>3}'
        """
        obs_dir_template = kwargs.get("obs_dir_template",
                                      "{year}/{net}/{sta}/{cha}*")
        obs_fid_template = kwargs.get(
            "obs_fid_template", "{net}.{sta}.{loc}.{cha}*{year}.{jday:0>3}")

        if self.origintime is None:
            raise AttributeError("'origintime' must be specified")

        net, sta, loc, cha = code.split('.')
        # If waveforms contain midnight, multiple files need to be read
        jdays = overlapping_days(origin_time=self.origintime,
                                 start_pad=self.config.start_pad,
                                 end_pad=self.config.end_pad)

        # Ensure that the paths are a list so that iterating doesnt accidentally
        # try to iterate through a string.
        paths = self.config.paths["waveforms"]
        if not isinstance(paths, list):
            paths = [paths]

        for path_ in paths:
            if not os.path.exists(path_):
                continue
            full_path = os.path.join(path_, obs_dir_template, obs_fid_template)
            pathlist = []
            for jday in jdays:
                pathlist.append(
                    full_path.format(net=net,
                                     sta=sta,
                                     cha=cha,
                                     loc=loc,
                                     jday=jday,
                                     year=self.origintime.year))
            st = Stream()
            for fid in pathlist:
                logger.debug(f"searching for observations: {fid}")
                for filepath in glob.glob(fid):
                    st += read(filepath)
                    logger.info(f"retrieved observations locally:\n{filepath}")
            if len(st) > 0:
                # Take care of gaps in data by converting to masked data
                st.merge()
                st.trim(starttime=self.origintime - self.config.start_pad,
                        endtime=self.origintime + self.config.end_pad)
                # Check if trimming retains data
                if len(st) > 0:
                    return st
                else:
                    logger.warning(
                        "data does not fit origin time +/- pad time")
                    return None
        else:
            return None
Ejemplo n.º 23
0
    def __init__(self,
                 config=None,
                 ds=None,
                 event=None,
                 st_obs=None,
                 st_syn=None,
                 inv=None,
                 windows=None,
                 staltas=None,
                 adjsrcs=None,
                 gcd=None,
                 baz=None,
                 gatherer=None):
        """
        Initiate the Manager class with or without pre-defined attributes.

        .. note::
            If `ds` is not given in data can only be gathered via the
            config.paths attribute or using the ObsPy client service.
            Data will also not be saved.

        :type config: pyatoa.core.config.Config
        :param config: configuration object that contains necessary parameters
            to run through the Pyatoa workflow
        :type ds: pyasdf.asdf_data_set.ASDFDataSet
        :param ds: ASDF data set from which to read and write data
        :type event: obspy.core.event.Event
        :param event: An event object containing relevant earthquake information
        :type st_obs: obspy.core.stream.Stream
        :param st_obs: Stream object containing waveforms of observations
        :type st_syn: obspy.core.stream.Stream
        :param st_syn: Stream object containing waveforms of observations
        :type inv: obspy.core.inventory.Inventory
        :param inv: Inventory that should only contain the station of interest,
            it's relevant channels, and response information
        :type windows: dict of pyflex.Window objects
        :param windows: misfit windows calculated by Pyflex, stored in a
            dictionary based on component naming
        :type adjsrcs: dict of pyadjoint.AdjointSource objects
        :param adjsrcs: adjoint source waveforms stored in dictionaries
        :type gcd: float
        :param gcd: great circle distance between source and receiver in km
        :type baz: float
        :param baz: Backazimuth between source and receiver in units of degrees
        :type gatherer: pyatoa.core.gatherer.Gatherer
        :param gatherer: A previously instantiated Gatherer class.
            Should not have to be passed in by User, but is used for reset()
        """
        self.ds = ds
        self.inv = inv

        # Instantiate a Config object
        if config is not None:
            self.config = config
        else:
            logger.info("no config provided, initiating default")
            self.config = Config()

        # Ensure any user-provided event is an Event object
        if isinstance(event, obspy.core.event.catalog.Catalog):
            logger.info(f"event given as catalog, taking zeroth entry")
            event = event[0]
        self.event = event

        # Try to get origin time information from the event
        if self.event is not None:
            origintime = self.event.preferred_origin().time
        else:
            origintime = None

        # Instantiate a Gatherer object and pass along info
        if gatherer is None:
            self.gatherer = Gatherer(config=self.config,
                                     ds=self.ds,
                                     origintime=origintime)
        else:
            self.gatherer = gatherer

        # Copy Streams to avoid affecting original data
        if st_obs is not None:
            self.st_obs = st_obs.copy()
        else:
            self.st_obs = None
        if st_syn is not None:
            self.st_syn = st_syn.copy()
        else:
            self.st_syn = None

        # Data produced by the workflow
        self.gcd = gcd
        self.baz = baz
        self.windows = windows
        self.staltas = staltas or {}
        self.adjsrcs = adjsrcs
        self.rejwins = {}

        # Internal statistics to keep track of the workflow progress
        self.stats = ManagerStats()

        # Run internal checks on data
        self.check()
Ejemplo n.º 24
0
    def fetch_resp_by_dir(self, code, **kwargs):
        """
        Fetch station dataless via directory structure on disk.
        Will search through all paths given until StationXML found.

        .. note::
            Default path naming follows SEED convention, that is:
            path/to/dataless/{NET}.{STA}/RESP.{NET}.{STA}.{LOC}.{CHA}
            e.g. path/to/dataless/NZ.BFZ/RESP.NZ.BFZ.10.HHZ

        :type code: str
        :param code: Station code following SEED naming convention.
            This must be in the form NN.SSSS.LL.CCC (N=network, S=station,
            L=location, C=channel). Allows for wildcard naming. By default
            the pyatoa workflow wants three orthogonal components in the N/E/Z
            coordinate system. Example station code: NZ.OPRZ.10.HH?
        :rtype inv: obspy.core.inventory.Inventory or None
        :return inv: inventory containing relevant network and stations

        Keyword Arguments
        ::
            str resp_dir_template:
                Directory structure template to search for response files.
                By default follows the SEED convention,
                'path/to/RESPONSE/{sta}.{net}/'
            str resp_fid_template:
                Response file naming template to search for station dataless.
                By default, follows the SEED convention
                'RESP.{net}.{sta}.{loc}.{cha}'
        """
        resp_dir_template = kwargs.get("resp_dir_template", "{sta}.{net}")
        resp_fid_template = kwargs.get("resp_fid_template",
                                       "RESP.{net}.{sta}.{loc}.{cha}")

        inv = None
        net, sta, loc, cha = code.split('.')

        # Ensure that the paths are a list so that iterating doesnt accidentally
        # try to iterate through a string.
        paths = self.config.paths["responses"]
        if not isinstance(paths, list):
            paths = [paths]

        for path_ in paths:
            if not os.path.exists(path_):
                continue
            # Attempting to instantiate an empty Inventory requires some
            # positional arguements we dont have, so don't do that
            fid = os.path.join(path_, resp_dir_template, resp_fid_template)
            fid = fid.format(net=net, sta=sta, cha=cha, loc=loc)
            logger.debug(f"searching for responses: {fid}")
            for filepath in glob.glob(fid):
                if inv is None:
                    # The first inventory becomes the main inv to return
                    inv = read_inventory(filepath)
                else:
                    # All other inventories are appended to the original
                    inv_append = read_inventory(filepath)
                    # Merge inventories to remove repeated networks
                    inv = merge_inventories(inv, inv_append)
                logger.info(f"retrieved response locally:\n{filepath}")

        return inv
Ejemplo n.º 25
0
    def fetch_event_by_dir(self,
                           event_id,
                           prefix="",
                           suffix="",
                           format_=None,
                           **kwargs):
        """
        Fetch event information via directory structure on disk. Developed to
        parse CMTSOLUTION and QUAKEML files, but theoretically accepts any 
        format that the ObsPy read_events() function will accept.

        Will search through all paths given until a matching source file found.

        .. note::
            This function will search for the following path
            /path/to/event_dir/{prefix}{event_id}{suffix}
            
            so, if e.g., searching for a CMTSOLUTION file in the current dir:
            ./CMTSOLUTION_{event_id}

            Wildcards are okay but the function will return the first match

        :type event_id: str
        :param event_id: Unique event identifier to search source file by.
            e.g., a New Zealand earthquake ID '2018p130600'. A prefix or suffix
            will be tacked onto this 
        :rtype event: obspy.core.event.Event or None
        :return event: event object if found, else None.
        :type prefix: str
        :param prefix Prefix to prepend to event id for file name searching.
            Wildcards are okay.
        :type suffix: str
        :param suffix: Suffix to append to event id for file name searching.
            Wildcards are okay.
        :type format_: str or NoneType
        :param format_: Expected format of the file to read, e.g., 'QUAKEML', 
            passed to ObsPy read_events. NoneType means read_events() will guess
        """
        # Ensure that the paths are a list so that iterating doesnt accidentally
        # try to iterate through a string.
        paths = self.config.paths["events"]
        if not isinstance(paths, list):
            paths = [paths]

        event = None
        for path_ in paths:
            if not os.path.exists(path_):
                continue
            # Search for available event files
            fid = os.path.join(path_, f"{prefix}{event_id}{suffix}")
            for filepath in glob.glob(fid):
                logger.debug(f"searching for event data: {filepath}")
                if os.path.exists(filepath):
                    try:
                        # Allow input of various types of source files
                        if "SOURCE" in prefix:
                            logger.info(
                                f"reading SPECFEM2D SOURCE: {filepath}")
                            cat = [read_specfem2d_source(filepath)]
                        elif "FORCESOLUTION" in prefix:
                            logger.info(f"reading FORCESOLUTION: {filepath}")
                            cat = [read_forcesolution(filepath)]
                        else:
                            logger.info(
                                f"reading source using ObsPy: {filepath}")
                            cat = read_events(filepath, format=format_)

                        if len(cat) != 1:
                            logger.warning(
                                f"{filepath} event file contains more than one "
                                "event, returning 1st entry")
                        event = cat[0]
                        break
                    except Exception as e:
                        logger.warning(f"{filepath} event file read error {e}")

        if event is not None:
            logger.info(f"retrieved local file:\n{filepath}")
        else:
            logger.info(f"no local event file found")

        return event
Ejemplo n.º 26
0
    def measure(self, force=False, save=True):
        """
        Measure misfit and calculate adjoint sources using PyAdjoint.

        Method for caluculating misfit set in Config, Pyadjoint expects
        standardized traces with the same spectral content, so this function
        will not run unless these flags are passed.

        Returns a dictionary of adjoint sources based on component.
        Saves resultant dictionary to a pyasdf dataset if given.

        .. note::
            Pyadjoint returns an unscaled misfit value for an entire set of
            windows. To return a "total misfit" value as defined by 
            Tape (2010) Eq. 6, the total summed misfit will need to be scaled by 
            the number of misfit windows chosen in Manager.window().

        :type force: bool
        :param force: ignore flag checks and run function, useful if e.g.
            external preprocessing is used that doesn't meet flag criteria
        :type save: bool
        :param save: save adjoint sources to ASDFDataSet
        """
        self.check()

        if self.config.adj_src_type is None:
            logger.info("adjoint source type is 'None', will not measure")
            return

        # Check that data has been filtered and standardized
        if not self.stats.standardized and not force:
            raise ManagerError("cannot measure misfit, not standardized")
        elif not (self.stats.obs_processed and self.stats.syn_processed) \
                and not force:
            raise ManagerError("cannot measure misfit, not filtered")
        elif self.stats.nwin == 0 and not force:
            raise ManagerError("cannot measure misfit, no windows recovered")
        logger.debug(f"running Pyadjoint w/ type: {self.config.adj_src_type}")

        # Create list of windows needed for Pyadjoint
        adjoint_windows = self._format_windows()

        # Run Pyadjoint to retrieve adjoint source objects
        total_misfit, adjoint_sources = 0, {}
        for comp, adj_win in adjoint_windows.items():
            try:
                adj_src = pyadjoint.calculate_adjoint_source(
                    adj_src_type=self.config.adj_src_type,
                    config=self.config.pyadjoint_config,
                    observed=self.st_obs.select(component=comp)[0],
                    synthetic=self.st_syn.select(component=comp)[0],
                    window=adj_win,
                    plot=False)

                # Re-format component name to reflect SPECFEM convention
                adj_src.component = f"{channel_code(adj_src.dt)}X{comp}"

                # Save adjoint sources in dictionary object. Sum total misfit
                adjoint_sources[comp] = adj_src
                logger.info(f"{adj_src.misfit:.3f} misfit for comp {comp}")
                total_misfit += adj_src.misfit
            except IndexError:
                continue

        # Save adjoint source internally and to dataset
        self.adjsrcs = adjoint_sources
        if save:
            self.save_adjsrcs()

        # Run check to get total misfit
        self.check()
        logger.info(f"total misfit {self.stats.misfit:.3f}")

        return self
Ejemplo n.º 27
0
    def fetch_syn_by_dir(self, code, **kwargs):
        """
        Fetch synthetic waveforms from Specfem3D via directory structure on
        disk, if necessary convert native ASCII format to Stream object.

        :type code: str
        :param code: Station code following SEED naming convention.
            This must be in the form NN.SSSS.LL.CCC (N=network, S=station,
            L=location, C=channel). Allows for wildcard naming. By default
            the pyatoa workflow wants three orthogonal components in the N/E/Z
            coordinate system. Example station code: NZ.OPRZ.10.HH?
        :rtype stream: obspy.core.stream.Stream or None
        :return stream: stream object containing relevant waveforms

        Keyword Arguments
        ::
            str syn_pathname:
                Config.paths key to search for synthetic data. Defaults to
                'synthetics', but for the may need to be set to 'waveforms'
                in certain use-cases.
            str syn_unit:
                Optional argument to specify the letter used to identify the
                units of the synthetic data: For Specfem3D: ["d", "v", "a", "?"]
                'd' for displacement, 'v' for velocity,  'a' for acceleration.
                Wildcards okay. Defaults to '?'
            str syn_dir_template:
                Directory structure template to search for synthetic waveforms.
                Defaults to empty string
            str syn_fid_template:
                The naming template of synthetic waveforms defaults to
                "{net}.{sta}.*{cmp}.sem{syn_unit}"
        """
        syn_cfgpath = kwargs.get("syn_cfgpath", "synthetics")
        syn_unit = kwargs.get("syn_unit", "?")
        syn_dir_template = kwargs.get("syn_dir_template", "")
        syn_fid_template = kwargs.get("syn_fid_template",
                                      "{net}.{sta}.*{cmp}.sem{dva}")

        if self.origintime is None:
            raise AttributeError("'origintime' must be specified")

        # Generate information necessary to search for data
        net, sta, loc, cha = code.split('.')

        # Ensure that the paths are a list so that iterating doesnt accidentally
        # try to iterate through a string.
        paths = self.config.paths[syn_cfgpath]
        if not isinstance(paths, list):
            paths = [paths]

        for path_ in paths:
            if not os.path.exists(path_):
                continue

            # Here the path is determined for search. If event_id is given,
            # the function will search for an event_id directory.
            full_path = os.path.join(path_, syn_dir_template, syn_fid_template)
            logger.debug(f"searching for synthetics: {full_path}")
            st = Stream()
            for filepath in glob.glob(
                    full_path.format(net=net,
                                     sta=sta,
                                     cmp=cha[2:],
                                     dva=syn_unit.lower())):
                try:
                    # Convert the ASCII file to a miniseed
                    st += read_sem(filepath, self.origintime)
                except UnicodeDecodeError:
                    # If the data file is for some reason already in miniseed
                    st += read(filepath)
                logger.info(f"retrieved synthetics locally:\n{filepath}")
            if len(st) > 0:
                st.merge()
                st.trim(starttime=self.origintime - self.config.start_pad,
                        endtime=self.origintime + self.config.end_pad)
                return st
        else:
            return None