Example #1
0
 def loadwavelet(self, w, dtype="Seismogram", component=2, window=False):
     # This code is painfully similar to loaddata. To reduce errors
     # only the names have been changed to protect the innocent
     if dtype == "raw_vector" and window:
         raise RuntimeError(
             "RFdeconProcessor.loadwavelet:  " +
             "Illegal argument combination\nwindow cannot be true with raw_vector input"
         )
     if not (dtype == "Seismogram" or dtype == "TimeSeries"
             or dtype == "raw_vector"):
         raise RuntimeError("RFdeconProcessor.loadwavelet:  " +
                            " Illegal dtype parameter=" + dtype)
     wvector = []
     if window:
         if dtype == "Seismogram":
             ts = ExtractComponent(w, component)
             ts = WindowData(ts, self.dwin.start, self.dwin.end)
             wvector = ts.data
         elif dtype == "TimeSeries":
             ts = WindowData(w, self.dwin.start, self.dwin.end)
             wvector = ts.data
         else:
             wvector = w
     else:
         if dtype == "Seismogram":
             ts = ExtractComponent(w, component)
             wvector = ts.data
         elif dtype == "TimeSeries":
             wvector = ts.data
         else:
             wvector = w
     # Have to explicitly convert to ndarray because DoubleVector cannot be serialized.
     self.wvector = np.array(wvector)
Example #2
0
 def _wtva_Seismogram(self, d, fill):
     # this could be implemented by converting d to an ensemble
     ens = TimeSeriesEnsemble()
     for k in range(3):
         dcomp = ExtractComponent(d, k)
         ens.member.append(dcomp)
     self._wtva_TimeSeriesEnsemble(ens, fill)
Example #3
0
def test_ExtractComponent():
    seis = Seismogram()
    seis.live = 1
    seis.data = dmatrix(np.random.rand(3, 6))
    seis.npts = 6
    ts = []
    for i in range(3):
        ts.append(ExtractComponent(seis, i))
    for i in range(3):
        assert (ts[i].data == seis.data[i]).all()
Example #4
0
 def loadnoise(self, n, dtype="Seismogram", component=2, window=False):
     # First basic sanity checks
     # Return immediately for methods that ignore noise.
     # Note we do this silenetly assuming the function wrapper below
     # will post an error to elog for the output to handle this nonfatal error
     if self.algorithm == "LeastSquares" or self.algorithm == "WaterLevel":
         return
     if dtype == "raw_vector" and window:
         raise RuntimeError(
             "RFdeconProcessor.loadnoise:  " +
             "Illegal argument combination\nwindow cannot be true with raw_vector input"
         )
     if not (dtype == "Seismogram" or dtype == "TimeSeries"
             or dtype == "raw_vector"):
         raise RuntimeError("RFdeconProcessor.loadnoise:  " +
                            " Illegal dtype parameter=" + dtype)
     nvector = []
     # IMPORTANT  these two parameters are not required by the
     # ScalarDecon C code but need to be inserted in pf for any algorithm
     # that requires noise data (i.e. multitaper) and the window
     # options is desired
     if window:
         tws = self.md.get_double("noise_window_start")
         twe = self.md.get_double("noise_window_end")
         if dtype == "Seismogram":
             ts = ExtractComponent(n, component)
             ts = WindowData(ts, tws, twe)
             nvector = ts.data
         elif dtype == "TimeSeries":
             ts = WindowData(n, tws, twe)
             nvector = ts.data
         else:
             nvector = n
     else:
         if dtype == "Seismogram":
             ts = ExtractComponent(n, component)
             nvector = ts.data
         elif dtype == "TimeSeries":
             nvector = ts.data
         else:
             nvector = n
     # Have to explicitly convert to ndarray because DoubleVector cannot be serialized.
     self.nvector = np.array(nvector)
Example #5
0
    def loaddata(self, d, dtype="Seismogram", component=0, window=False):
        """
        Loads data for processing.  When window is set true
        use the internal pf definition of data time window
        and window the data.  The dtype parameter changes the
        behavior of this algorithm significantly depending on
        the setting.   It can be one of the following:
        Seismogram, TimeSeries, or raw_vector.   For the first
        two the data to process will be extracted in a
        pf specfied window if window is True.  If window is
        False TimeSeries data will be passed directly and
        Seismogram data will have the data defined by the
        component parameter copied to the internal data
        vector workspace.   If dtype is set to raw_vector
        d is assumed to be a raw numpy vector of doubles or
        an the aliased std::vector used in ccore, for example,
        in the TimeSeries object s vector.  Setting dtype
        to raw_vector and window True will result in this
        method throwing a RuntimeError exception as the
        combination is not possible since raw_vector data
        have no time base.

        :param d: input data (contents expected depend upon
        value of dtype parameter).
        :param dtype: string defining the form d is expected
          to be (see details above)
        :param component: component of Seismogram data to
          load as data vector.  Ignored if dtype is raw_vector
          or TimeSeries.
        :param window: boolean controlling internally
          defined windowing.  (see details above)

        :return:  Nothing (not None nothing) is returned
        """
        # First basic sanity checks
        if dtype == "raw_vector" and window:
            raise RuntimeError(
                "RFdeconProcessor.loaddata:  " +
                "Illegal argument combination\nwindow cannot be true with raw_vector input"
            )
        if not (dtype == "Seismogram" or dtype == "TimeSeries"
                or dtype == "raw_vector"):
            raise RuntimeError("RFdeconProcessor.loaddata:  " +
                               " Illegal dtype parameter=" + dtype)
        dvector = []
        if window:
            if dtype == "Seismogram":
                ts = ExtractComponent(d, component)
                ts = WindowData(ts, self.dwin.start, self.dwin.end)
                dvector = ts.data
            elif dtype == "TimeSeries":
                ts = WindowData(d, self.dwin.start, self.dwin.end)
                dvector = ts.data
            else:
                dvector = d
        else:
            if dtype == "Seismogram":
                ts = ExtractComponent(d, component)
                dvector = ts.data
            elif dtype == "TimeSeries":
                dvector = ts.data
            else:
                dvector = d
        # Have to explicitly convert to ndarray because DoubleVector cannot be serialized.
        self.dvector = np.array(dvector)
Example #6
0
def Seismogram2Stream(sg,
                      chanmap=["E", "N", "Z"],
                      hang=[90.0, 0.0, 0.0],
                      vang=[90.0, 90.0, 0.0]):
    # fixme hang and vang parameters
    """
    Convert a mspass::Seismogram object to an obspy::Stream with 3 components split apart.

    mspass and obspy have completely incompatible approaches to handling three
    component data.  obspy uses a Stream object that is a wrapper around and
    a list of Trace objects.  mspass stores 3C data bundled into a matrix
    container.   This function takes the matrix container apart and produces
    the three Trace objects obspy want to define 3C data.   The caller is
    responsible for how they handle bundling the output.

    A very dark side of this function is any error log entries in the part
    mspass Seismogram object will be lost in this conversion as obspy
    does not implement that concept.  If you need to save the error log
    you will need to save the input of this function to MongoDB to preserve
    the errorlog it may contain.

    :param sg: is the Seismogram object to be converted
    :type sg: :class:`~mspasspy.ccore.Seismogram`
    :param chanmap:  3 element list of channel names to be assigned components
    :type chanmap: list
    :param hang:  3 element list of horizontal angle attributes (azimuth in degrees)
      to be set in Stats array of output for each component.  (default is
      for cardinal directions)
    :type hang: list
    :param vang:  3 element list of vertical angle (theta of spherical coordinates)
      to be set in Stats array of output for each component.  (default is
      for cardinal directions)
    :type vang: list
    :return: obspy Stream object containing a list of 3 Trace objects in
       mspass component order. Presently the data are ALWAYS returned to
       cardinal directions (see above). It will be empty if sg was marked dead
    :rtype: :class:`obspy.core.stream.Stream`
    """
    dresult = obspy.core.Stream()
    dresult.dead_mspass = True
    # Note this logic will silently return an empty Stream object if the
    # data are marked dead
    if sg.live:
        dresult.dead_mspass = False
        uuids = sg.id()
        logstuff = sg.elog
        for i in range(3):
            ts = ExtractComponent(sg, i)
            ts.put_string(Keywords.chan, chanmap[i])
            ts.put_double(Keywords.channel_hang, hang[i])
            ts.put_double(Keywords.channel_vang, vang[i])
            # ts is a CoreTimeSeries but we need to add a few things to
            # make it mesh with TimeSeries2Trace
            tsex = TimeSeries(ts, uuids)
            tsex.elog = logstuff
            dobspy = TimeSeries2Trace(tsex)
            dresult.append(dobspy)
    else:
        for i in range(3):
            tc = obspy.core.Trace()
            tc.dead_mspass = True
            dresult.append(tc)
    return dresult
Example #7
0
def arrival_snr_QC(
    data_object,
    noise_window=TimeWindow(-130.0, -5.0),
    noise_spectrum_engine=None,
    signal_window=TimeWindow(-5.0, 120.0),
    signal_spectrum_engine=None,
    band_cutoff_snr=2.0,
    # check these are reasonable - don't remember the formula when writing this
    tbp=5.0,
    ntapers=10,
    high_frequency_search_start=5.0,
    poles=3,
    perc=95.0,
    phase_name="P",
    metadata_key="Parrival",
    optional_metrics=[
        "snr_stats",
        "filtered_envelope",
        "filtered_L2",
        "filtered_Linf",
        "filtered_MAD",
        "filtered_perc",
    ],
    save_spectra=False,
    db=None,
    collection="arrival",
    use_measured_arrival_time=False,
    measured_arrival_time_key="Ptime",
    taup_model=None,
    update_mode=False,
    component=2,
    source_collection="source",
    receiver_collection=None,
):
    """
    Compute a series of metrics that can be used for quality control
    filtering of seismic phase data.

    This is the highest level function in this module for computing
    signal-to-noise ratio metrics for processing signals that can be
    defined by a computable or measurable "phase".  Features this
    function adds over lower level functions in this module are:
        1.  An option to save computed metrics to a MongoDB collection
            (defaults as "arrival").  If the update_mode argument is
            set True (default is False) the function expects the data_object
            to contain the attribute "arrival_id" that references the
            ObjectID of an existing entry in the the collection where the
            data this function computes is to be saved (default is"arrival").
        2.  Adds an option to use a computed or measured arrival as the
            time reference for all windowing.   The lower level snr
            functions in this module require the user do what this
            function does prior to calling the function.  Note one or the other is required
            (i.e. either computed or measured time will be define t0 of the
             processing)

    The input of arg 0 (data_object) can be either a TimeSeries or
    a Seismogram object.  If a Seismogram object is passed the "component"
    argument is used to extract the specified single channel from the Seismogram
    object and than component is used for processing.  That is necessary
    because all the algorithms used are single channel algorithms.  To
    use this function on all components use a loop over components BUT
    make sure you use a unique value for the argument "metadata_key" for
    each component.  Note this will also produce multiple documents per
    input datum.

    The type of the data_object also has a more subtle implication the
    user must be aware of.  That is, in the MsPASS schema we store receiver coordinates
    in one of two different collections:  "channel" for TimeSeries data and
    "site" for Seismogram data.  When such data are loaded the generic keys
    like lat are always converted to names like channel_lat or site_lat
    for TimeSeries and Seismogram data respectively.   This function uses
    the data type to set that naming.  i.e. if the input is TimeSeries
    it tries to fetch the latitude data as channel_lat while if it the input
    is a Seismogram it tries to fetch site_lat.   That is true of all coordinate
    data loaded by normalization from a source and receiver collection.

    The following args are passed directly to the function arrival_snr:
    noise_window, signal_window, band_cutoff_snr, tbp, ntapers, poles,
    perc, phase_name, metadata_key, and optional_metrics.  See the docstring
    for arrival_snr and FD_snr_estimator for descriptions of how these
    arguments should be used.  This top level function adds arguments
    decribed below.

    :param db:  mspass Database object that is used as a handle for to MongoDB.
    Default is None, which the function takes to mean you don't want to
    save the computed values to MongoDB.   In this mode the computed
    metrics will all be posted to a python dict that can be found under the
    key defined by the "metadata_key" argument.   When db is defined the
    contents of that same python dict will save to MongoDB is the
    collection defined by the "collection" argument.  If db is run as
    the default None the user is responsible for saving and managing the
    computed snr data.   Be aware a simple later call to db.save_data
    will not produce the same normalized data with the (default) arrival
    collection.

    :param collection:  MongoDB collection name where the results of this
    function will be saved.  If the "update_mode" argument is also set
    True the update section will reference this collection. Default is "arrival".

    :param use_measured_arrival_time:  boolean defining the method used to
    define the time reference for windowing used for snr calculations.
    When True the function will attempt to fetch a phase arrival time with
    the key defined by the "measured_arrival_time_key" argument.  In that
    mode if the fetch fails the data_object will be killed and an error
    posted to elog.   That somewhat brutal choice was intentional as the
    expectation is if you want to use measured arrival times you don't
    want data where there are no picks.   The default is True to make
    the defaults consistent.  The reason is that the tau-p calculator
    handle is passed to the function when using model-based travel times.
    There is no way to default that so it defaults to None.

    :param measured_arrival_time_key: is the key used to fetch a
    measured arrival time.   This parameter is ignored if use_measured_arrival_time
    is False.

    :param taup_model: when use_measured_arrival_time is False this argument
    is required.  It defaults as None because there is now way the author
    knows to initialize it to anything valid.  If set it MUST be an instance
    of the obspy class TauPyModel (https://docs.obspy.org/packages/autogen/obspy.taup.tau.TauPyModel.html#obspy.taup.tau.TauPyModel)
    Mistakes in use of this argument can cause a MsPASSError exception to
    be thrown (not logged thrown as a fatal error) in one of two ways:
    (1)  If use_measured_arrival_time is False this argument must be defined,
    and (2) if it is defined it MUST be an instance of TauPyModel.

    :param update_mode:   When True the function will attempt to extract
    a MongoDB ObjectID from data_object's Metadata using the (currently fixed)
    key "arrival_id".   If found it will add the computed data to an existing
    document in the collection defined by the collection argument.  Otherwise
    it will simply add a new entry and post the ObjectID of the new document
    with the (same fixed) key arrival_id.  When False no attempt to fetch
    the arrival id is made and we simply add a record.  This parameter is
    completely ignored unless the db argument defines a valid Database class.

    :param component: integer (0, 1, or 2) defining which component of a
    Seismogram object to use to compute the requested snr metrics.   This
    parameter is ignored if the input is a TimeSeries.

    :param source_collection:  normalization collection for source data.
    The default is the MsPASS name "source" which means the function will
    try to load the source hypocenter coordinates (when required) as
    source_lat, source_lon, source_depth, and source_time.

    :param receiver_collection:  when set this name will override the
    automatic setting of the expected normalization collection naming
    for receiver functions (see above).  The default is None which causes
    the automatic switching to be involked.  If it is any other string
    the automatic naming will be overridden.

    :return:  the data_object modified by insertion of the snr QC data
    in the object's Metadata
    """
    if data_object.dead():
        return data_object
    if isinstance(data_object, TimeSeries):
        # We need to make a copy of a TimeSeries object to assure the only
        # thing we change is the Metadata we add to the return
        data_to_process = TimeSeries(data_object)
        if receiver_collection:
            rcol = receiver_collection
        else:
            rcol = "channel"
    elif isinstance(data_object, Seismogram):
        if component < 0 or component > 2:
            raise MsPASSError(
                "arrival_snr_QC:  usage error.  " +
                "component parameter passed with illegal value={n}\n".format(
                    n=component) + "Must be 0, 1, or 2",
                ErrorSeverity.Fatal,
            )
        data_to_process = ExtractComponent(data_object, component)
        if receiver_collection:
            rcol = receiver_collection
        else:
            rcol = "site"
    else:
        raise MsPASSError(
            "arrival_snr_QC:   received invalid input data\n" +
            "Input must be either TimeSeries or a Seismogram object",
            ErrorSeverity.Fatal,
        )
    if use_measured_arrival_time:
        arrival_time = data_object[measured_arrival_time_key]
    else:
        # This test is essential or python will throw a more obscure,
        # generic exception
        if taup_model is None:
            raise MsPASSError(
                "arrival_snr_QC:  usage error.  " +
                "taup_model parameter is set None but use_measured_arrival_time is False\n"
                +
                "This gives no way to define processing windows.  See docstring",
                ErrorSeverity.Fatal,
            )
        source_lat = data_object[source_collection + "_lat"]
        source_lon = data_object[source_collection + "_lon"]
        source_depth = data_object[source_collection + "_depth"]
        source_time = data_object[source_collection + "_time"]
        receiver_lat = data_object[rcol + "_lat"]
        receiver_lon = data_object[rcol + "_lon"]
        delta = locations2degrees(source_lat, source_lon, receiver_lat,
                                  receiver_lon)
        arrival = taup_model.get_travel_times(
            source_depth_in_km=source_depth,
            distance_in_degree=delta,
            phase_list=[phase_name],
        )
        arrival_time = source_time + arrival[0].time
        taup_arrival_phase = arrival[0].phase.name
        # not sure if this will happen but worth trapping it as a warning if
        # it does
        if phase_name != taup_arrival_phase:
            data_object.elog.log_error(
                "arrival_snr_QC",
                "Requested phase name=" + phase_name +
                " does not match phase name tag returned by obpsy taup calculator="
                + taup_arrival_phase,
                "Complaint",
            )
    if data_to_process.time_is_UTC():
        data_to_process.ator(arrival_time)
    [snrdata, elog] = FD_snr_estimator(
        data_to_process,
        noise_window,
        noise_spectrum_engine,
        signal_window,
        signal_spectrum_engine,
        band_cutoff_snr,
        tbp,
        ntapers,
        high_frequency_search_start,
        poles,
        perc,
        optional_metrics,
        save_spectra=save_spectra,
    )
    if elog.size() > 0:
        data_object.elog += elog
    snrdata["phase"] = phase_name
    snrdata["snr_arrival_time"] = arrival_time
    snrdata["snr_signal_window_start"] = arrival_time + signal_window.start
    snrdata["snr_signal_window_end"] = arrival_time + signal_window.end
    snrdata["snr_noise_window_start"] = arrival_time + noise_window.start
    snrdata["snr_noise_window_end"] = arrival_time + noise_window.end

    # These cross-referencing keys may not always be defined when a phase
    # time is based on a pick so we add these cautiously
    scol_id_key = source_collection + "_id"
    rcol_id_key = rcol + "_id"
    if data_object.is_defined(scol_id_key):
        snrdata[scol_id_key] = data_object[scol_id_key]
    if data_object.is_defined(rcol_id_key):
        snrdata[rcol_id_key] = data_object[rcol_id_key]
    # Note we add this result to data_object NOT data_to_process because that
    # is not always the same thing - for a TimeSeries input it is a copy of
    # the original but it may have been altered while for a Seismogram it is
    # an extracted component
    data_object[metadata_key] = snrdata
    if db:
        arrival_id_key = collection + "_id"
        dbcol = db[collection]
        if update_mode:
            if data_object.is_defined(arrival_id_key):
                arrival_id = data_object[arrival_id_key]
                filt = {"_id": arrival_id}
                update_clause = {"$set": snrdata}
                dbcol.update_one(filt, update_clause)
            else:
                data_object.elog.log_error(
                    "arrival_snr_QC",
                    "Running in update mode but arrival id key=" +
                    arrival_id_key + " is not defined\n" +
                    "Inserting computed snr data as a new document in collection="
                    + collection,
                    "Complaint",
                )
                arrival_id = dbcol.insert_one(snrdata).inserted_id
                data_object[arrival_id_key] = arrival_id
        else:
            arrival_id = dbcol.insert_one(snrdata).inserted_id
            data_object[arrival_id_key] = arrival_id
    return data_object