예제 #1
0
 def loadwavelet(self, w, dtype="Seismogram", component=2, window=False):
     # This code is painfully similar to loaddata. To reduce errors
     # only the names have been changed to protect the innocent
     if (dtype == "raw_vector" and window):
         raise RuntimeError(
             "RFdeconProcessor.loadwavelet:  " +
             "Illegal argument combination\nwindow cannot be true with raw_vector input"
         )
     if (not (dtype == "Seismogram" or dtype == "TimeSeries"
              or dtype == "raw_vector")):
         raise RuntimeError("RFdeconProcessor.loadwavelet:  " +
                            " Illegal dtype parameter=" + dtype)
     wvector = []
     if (window):
         if (dtype == "Seismogram"):
             ts = alg.ExtractComponent(w, component)
             ts = WindowData(ts, self.dwin)
             wvector = ts.data
         elif (dtype == "TimeSeries"):
             ts = WindowData(w, self.dwin)
             wvector = ts.data
         else:
             wvector = w
     else:
         if (dtype == "Seismogram"):
             ts = alg.ExtractComponent(w, component)
             wvector = ts.data
         elif (dtype == "TimeSeries"):
             wvector = ts.data
         else:
             wvector = w
     self.processor.loadwavelet(wvector)
예제 #2
0
 def loadwavelet(self, w, dtype="Seismogram", component=2, window=False):
     # This code is painfully similar to loaddata. To reduce errors
     # only the names have been changed to protect the innocent
     if dtype == "raw_vector" and window:
         raise RuntimeError(
             "RFdeconProcessor.loadwavelet:  " +
             "Illegal argument combination\nwindow cannot be true with raw_vector input"
         )
     if not (dtype == "Seismogram" or dtype == "TimeSeries"
             or dtype == "raw_vector"):
         raise RuntimeError("RFdeconProcessor.loadwavelet:  " +
                            " Illegal dtype parameter=" + dtype)
     wvector = []
     if window:
         if dtype == "Seismogram":
             ts = ExtractComponent(w, component)
             ts = WindowData(ts, self.dwin.start, self.dwin.end)
             wvector = ts.data
         elif dtype == "TimeSeries":
             ts = WindowData(w, self.dwin.start, self.dwin.end)
             wvector = ts.data
         else:
             wvector = w
     else:
         if dtype == "Seismogram":
             ts = ExtractComponent(w, component)
             wvector = ts.data
         elif dtype == "TimeSeries":
             wvector = ts.data
         else:
             wvector = w
     # Have to explicitly convert to ndarray because DoubleVector cannot be serialized.
     self.wvector = np.array(wvector)
예제 #3
0
def test_RFdecon():
    seis1 = get_live_seismogram(71, 2.0)
    seis1.t0 = -5

    seis2 = get_live_seismogram(71, 2.0)
    seis2.t0 = -5
    for i in range(3):
        for j in range(seis2.npts):
            seis2.data[i, j] = seis1.data[i, j]

    processor = RFdeconProcessor()
    processor.loaddata(seis1)
    processor.loadnoise(seis1, window=True)
    processor.loadwavelet(seis1, window=True)
    result1 = WindowData(seis1, processor.dwin.start, processor.dwin.end)
    for k in range(3):
        processor.loaddata(result1, component=k)
        x = processor.apply()
        for i in range(seis1.npts):
            result1.data[k, i] = x[i]

    result2 = RFdecon(seis2)

    for k in range(3):
        assert all(
            abs(a - b) < 1e-6
            for a, b in zip(result1.data[k], result2.data[k]))
예제 #4
0
 def loadnoise(self, n, dtype="Seismogram", component=2, window=False):
     # First basic sanity checks
     # Return immediately for methods that ignore noise.
     # Note we do this silenetly assuming the function wrapper below
     # will post an error to elog for the output to handle this nonfatal error
     if (self.algorithm == "LeastSquares"
             or self.algorithm == "WaterLevel"):
         return
     if (dtype == "raw_vector" and window):
         raise RuntimeError(
             "RFdeconProcessor.loadnoise:  " +
             "Illegal argument combination\nwindow cannot be true with raw_vector input"
         )
     if (not (dtype == "Seismogram" or dtype == "TimeSeries"
              or dtype == "raw_vector")):
         raise RuntimeError("RFdeconProcessor.loadnoise:  " +
                            " Illegal dtype parameter=" + dtype)
     nvector = []
     #IMPORTANT  these two parameters are not required by the
     # ScalarDecon C code but need to be inserted in pf for any algorithm
     # that requires noise data (i.e. multitaper) and the window
     # options is desired
     if (window):
         tws = self.md.get_double("noise_window_start")
         twe = self.md.get_double("noise_window_end")
         win = mspass.TimeWindow(tws, twe)
         if (dtype == "Seismogram"):
             ts = alg.ExtractComponent(n, component)
             ts = WindowData(ts, win)
             nvector = ts.data
         elif (dtype == "TimeSeries"):
             ts = WindowData(n, win)
             nvector = ts.data
         else:
             nvector = n
     else:
         if (dtype == "Seismogram"):
             ts = alg.ExtractComponent(n, component)
             nvector = ts.data
         elif (dtype == "TimeSeries"):
             nvector = ts.data
         else:
             nvector = n
     self.processor.loadnoise(nvector)
예제 #5
0
    def loaddata(self, d, dtype="Seismogram", component=0, window=False):
        """
        Loads data for processing.  When window is set true
        use the internal pf definition of data time window
        and window the data.  The dtype parameter changes the
        behavior of this algorithm significantly depending on
        the setting.   It can be one of the following:
        Seismogram, TimeSeries, or raw_vector.   For the first
        two the data to process will be extracted in a
        pf specfied window if window is True.  If window is
        False TimeSeries data will be passed directly and
        Seismogram data will have the data defined by the
        component parameter copied to the internal data
        vector workspace.   If dtype is set to raw_vector
        d is assumed to be a raw numpy vector of doubles or
        an the aliased std::vector used in ccore, for example,
        in the TimeSeries object s vector.  Setting dtype
        to raw_vector and window True will result in this
        method throwing a RuntimeError exception as the
        combination is not possible since raw_vector data
        have no time base.

        :param d: input data (contents expected depend upon
        value of dtype parameter).
        :param dtype: string defining the form d is expected
          to be (see details above)
        :param component: component of Seismogram data to
          load as data vector.  Ignored if dtype is raw_vector
          or TimeSeries.
        :param window: boolean controlling internally
          defined windowing.  (see details above)

        :return:  Nothing (not None nothing) is returned
        """
        # First basic sanity checks
        if dtype == "raw_vector" and window:
            raise RuntimeError(
                "RFdeconProcessor.loaddata:  " +
                "Illegal argument combination\nwindow cannot be true with raw_vector input"
            )
        if not (dtype == "Seismogram" or dtype == "TimeSeries"
                or dtype == "raw_vector"):
            raise RuntimeError("RFdeconProcessor.loaddata:  " +
                               " Illegal dtype parameter=" + dtype)
        dvector = []
        if window:
            if dtype == "Seismogram":
                ts = ExtractComponent(d, component)
                ts = WindowData(ts, self.dwin.start, self.dwin.end)
                dvector = ts.data
            elif dtype == "TimeSeries":
                ts = WindowData(d, self.dwin.start, self.dwin.end)
                dvector = ts.data
            else:
                dvector = d
        else:
            if dtype == "Seismogram":
                ts = ExtractComponent(d, component)
                dvector = ts.data
            elif dtype == "TimeSeries":
                dvector = ts.data
            else:
                dvector = d
        # Have to explicitly convert to ndarray because DoubleVector cannot be serialized.
        self.dvector = np.array(dvector)
예제 #6
0
def RFdecon(
    d,
    alg="LeastSquares",
    pf="RFdeconProcessor.pf",
    wavelet=None,
    noisedata=None,
    wcomp=2,
    ncomp=2,
    object_history=False,
    alg_name="RFdecon",
    alg_id=None,
    dryrun=False,
):
    """
    Use this function to compute conventional receiver functions
    from a single three component seismogram. In this function,
    an instance of wrapper class RFdeconProcessor will be built and
    initialized with alg and pf.

    Default assumes d contains all data sections required to do
    the deconvolution with the wavelet in component 2 (3 for matlab
    and FORTRAN people).  By default the data and noise
    (if required by the algorithm) sections will be extracted from
    the (assumed larger) data section using time windows defined
    internally in the processor pf definition.   For variations (e.g.
    adding tapering to one or more of the time series inputs)
    use the d, wavelet, and (if required) noise arguments to load
    each component separately.  Note d is dogmatically required
    to be three component data while optional wavelet and noisedata
    series are passed as plain numpy vectors (i.e. without the
    decoration of a TimeSeries).

    To make use of the extended outputs from RFdeconProcessor
    algorithms (e.g. actual output of the computed operator)
    call those methods after this function returns successfully
    with a three-component seismogram output.  That is possible
    because the processor object caches the most recent wavelet
    and inverse used for the deconvolution.   An exception is
    that all algorithms call their QCmetrics method of processor
    and push them to the headers of the deconvolved output.
    QCmetric attributes are algorithm dependent.

    The ProcessingHistory feature can optionally be enabled by
    setting the save_history argument to True.   When enable one should
    normally set a unique id for the algid argument.

    :param d:  Seismogram input data.  See notes above about
    time span of these data.
    :param alg: The algorithm to be applied, used for initializing
     a RFdeconProcessor object
    :param pf: The pf file to be parsed, used for inititalizing a
     RFdeconProcessor
    :param wavelet:   vector of doubles (numpy array or the
     std::vector container internal to TimeSeries object) defining
     the wavelet to use to compute deconvolution operator.
     Default is None which assumes processor was set up to use
     a component of d as the wavelet estimate.
    :param noisedata:  vector of doubles (numpy array or the
     std::vector container internal to TimeSeries object) defining
     noise data to use for computing regularization.  Not all RF
     estimation algorithms use noise estimators so this parameter
     is optional.   It can also be extracted from d depending on
     parameter file options.
    :param wcomp:  When defined from Seismogram d the wavelet
     estimate in conventional RFs is one of the components that
     are most P wave dominated. That is always one of three
     things:  Z, L of LQT, or the L component from the output of
     Kennett's free surface transformation operator.  The
     default is 2, which for ccore.Seismogram is always one of
     the above.   This parameter would be changed only if the
     data has undergone some novel transformation not yet invented
     and the best wavelet estimate was on in 2 (3 with FORTRAN
     and matlab numbering).
     :param ncomp: component number to use to compute noise.  This is used
     only if the algorithm in processor requires a noise estimate.
     Normally it should be the same as wcomp and is by default (2).
     :param object_history: boolean to enable or disable saving object
           level history.  Default is False.  Note this functionality is
           implemented via the mspass_func_wrapper decorator.
     :param alg_name:   When history is enabled this is the algorithm name
           assigned to the stamp for applying this algorithm.
           Default ("WindowData") should normally be just used.
           Note this functionality is implemented via the mspass_func_wrapper decorator.
     :param ald_id:  algorithm id to assign to history record (used only if
           object_history is set True.)
           Note this functionality is implemented via the mspass_func_wrapper decorator.
     :param dryrun:  When true only the arguments are checked for validity.
           When true nothing is calculated and the original data are returned.
           Note this functionality is implemented via the mspass_func_wrapper decorator.

    :return:  Seismogram object containing the RF estimates.
     The orientations are always the same as the input.
    """

    processor = RFdeconProcessor(alg, pf)

    try:
        if wavelet is not None:
            processor.loadwavelet(wavelet, dtype="raw_vector")
        else:
            # processor.loadwavelet(d,dtype='Seismogram',window=True,component=wcomp)
            processor.loadwavelet(d, window=True)
        if processor.uses_noise:
            if noisedata != None:
                processor.loadnoise(noisedata, dtype="raw_vector")
            else:
                processor.loadnoise(d, window=True, component=ncomp)
    except MsPASSError as err:
        d.kill()
        d.elog.log_error(err)
        return d
    # We window data before computing RF estimates for efficiency
    # Otherwise we would call the window operator 3 times below
    # WindowData does will kill the output if the window doesn't match
    # which is reason for the test immediately after this call
    result = WindowData(d, processor.dwin.start, processor.dwin.end)
    if result.dead():
        return result
    npts = result.npts
    try:
        for k in range(3):
            processor.loaddata(result, component=k)
            x = processor.apply()
            # overwrite this component's data in the result Seismogram
            # Use some caution handling any size mismatch
            nx = len(x)
            if nx >= npts:
                for i in range(npts):
                    result.data[k, i] = x[i]
            else:
                # this may not be the fastest way to do this but it is simple and clean
                # matters little since this is an error condition and should be rare
                for i in range(npts):
                    if i < nx:
                        result.data[k, i] = x[i]
                    else:
                        result.data[k, i] = 0.0
                # This is actually an error condition so we log it
                message = (
                    "Windowing size mismatch.\nData window length = %d which is less than operator length= %d"
                    % (nx, npts))
                result.elog.log_error("RFdecon", message,
                                      ErrorSeverity.Complaint)
    except MsPASSError as err:
        result.kill()
        result.elog.log_error(err)
    except:
        print(
            "RFDecon:  something threw an unexpected exception - this is a bug and needs to be fixed.\nKilling result from RFdecon."
        )
        result.kill()
        result.elog.log_error("RFdecon", "Unexpected exception caught",
                              ErrorSeverity.Invalid)
    finally:
        return result
예제 #7
0
파일: snr.py 프로젝트: wangyinz/mspass
def snr(
    data_object,
    noise_window=TimeWindow(-130.0, -5.0),
    signal_window=TimeWindow(-5.0, 120.0),
    noise_metric="mad",
    signal_metric="mad",
    perc=95.0,
):
    """
    Compute time-domain based signal-to-noise ratio with a specified metric.

    Signal-to-noise ratio is a fundamental measurement in all forms of
    seismic data processing.   There is, however, not a single unified metric
    that ideal for all types of signals one may want to analyze.  One class
    of metrics used time-domain metrics to use some measure of amplitude in
    a signal and noise window cut from a single waveform segment.  A type
    example is snr of some particular "seismic phase" (P, S, PP, ScS, etc)
    relative to some measure of background noise.  e.g. for P phases it is
    nearly universal to try to estimate snr from some window defined by the
    arrival time of P and a noise window before the time P arrives (pre-event noise).

    This function provides a generic api to measure a large range of metrics
    using one of four choices for measuring the norm of the data in the
    signal and noise windows:
        1.  rms - L2 norm
        2.  mad - median absolute difference, which is essentially the median amplitude in this context
        3.  perc - percentage norm ala seismic unix.  perc is defined at as the
            amplitude level were perc percentage of the data have an amplitude
            smaller than this value.  It is computed by ranking (sorting) the
            data, computing the count of that perctage relative to the number of
            amplitude samples, and returning the amplitude of the nearest value
            to that position in the ranked data.
        4.  peak - is the peak value which in linear algebra is the L infinity norm

    Note the user can specify a different norm for the signal and noise windows.
    The perc metric requires specifying what percentage level to use.

    This function will throw a MsPASSError exception if the window parameters
    do not define a time period inside the range of the data_object. You will
    need a custom function if the model of windows insider a larger waveform
    segment does not match your data.

    There is one final detail about an snr calculation that we handle carefully.
    With simulation data it is very common to have error free simulations where
    the "noise" window one would use with real data is all zeros.  An snr calculated
    with this function in that situation would either return inf or NaN depending
    on some picky details.  Neither is good as either can cause downstream
    problems.  For that reason we trap any condition where the noise amplitude
    measure is computed as zero.  If the signal amplitude is also zero we return
    a -1.0.  Otherwise we return a large, constant, positive number.  Neither
    condition will cause an exception to be thrown as that condition is considered
    somewhat to be anticipated.

    :param data_object:  MsPASS atomic data object (TimeSeries or Seismogram)
      to use for computing the snr.  Note that for Seismogram objects the
      metrix always use L2 measures of amplitude of each sample (i.e. vector amplitudes)
      If snr for components of a Seismogram are desired use ExtractComponent and
      apply this function to each component separately.
    :param noise_window: TimeWindow objects defining the time range to extract
      from data_object to define the part of the signal considered noise.
      Times can be absolute or relative.  Default the range -5 to 120 which
      is makes sense only as time relative to some phase arrival time.
    :param signal_window:  TimeWindow object defining the time range to
      extract from data_object to define the part of the signal defines as
      signal to use for the required amplitude measure.  Default of -130 to
      -5 is consistent with the default noise window (in terms of length) and
      is assumes a time relative to a phase arrival time.  For absolute times
      each call to this function may need its own time window.
    :param noise_metric:  string defining one of the four metrics defined above
      ('mad','peak','perc' or 'rms') to use for noise window measurement.
    :param signal_metric:  string defining one of the four metrics defined above
      ('mad','peak','perc' or 'rms') to use for signal window measurement.
    :return: estimated signal-to-noise ratio as a single float.  Note the
      special returns noted above for any situation where the noise window
      amplitude is 0
    """
    if _window_invalid(data_object, noise_window):
        raise MsPASSError(
            "snr:  noise_window []{wstart} - {wend}] is outside input data range"
            .format(wstart=noise_window.start, wend=noise_window.end),
            ErrorSeverity.Invalid,
        )
    if _window_invalid(data_object, signal_window):
        raise MsPASSError(
            "snr:  noise_window []{wstart} - {wend}] is outside input data range"
            .format(wstart=noise_window.start, wend=noise_window.end),
            ErrorSeverity.Invalid,
        )
    n = WindowData(data_object, noise_window.start, noise_window.end)
    s = WindowData(data_object, signal_window.start, signal_window.end)
    if noise_metric == "rms":
        namp = RMSAmplitude(n)
    elif noise_metric == "mad":
        namp = MADAmplitude(n)
    elif noise_metric == "peak":
        namp = PeakAmplitude(n)
    elif noise_metric == "perc":
        namp = PercAmplitude(n, perc)
    else:
        raise MsPASSError(
            "snr:  Illegal noise_metric argument = " + noise_metric,
            ErrorSeverity.Invalid,
        )

    if signal_metric == "rms":
        samp = RMSAmplitude(s)
    elif signal_metric == "mad":
        samp = MADAmplitude(s)
    elif signal_metric == "peak":
        samp = PeakAmplitude(s)
    elif signal_metric == "perc":
        samp = PercAmplitude(s, perc)
    else:
        raise MsPASSError(
            "snr:  Illegal signal_metric argument = " + signal_metric,
            ErrorSeverity.Invalid,
        )

    return _safe_snr_calculation(samp, namp)
예제 #8
0
파일: snr.py 프로젝트: wangyinz/mspass
def FD_snr_estimator(
    data_object,
    noise_window=TimeWindow(-130.0, -5.0),
    noise_spectrum_engine=None,
    signal_window=TimeWindow(-5.0, 120.0),
    signal_spectrum_engine=None,
    band_cutoff_snr=2.0,
    # check these are reasonable - don't remember the formula when writing this
    tbp=2.5,
    ntapers=4,
    high_frequency_search_start=5.0,
    poles=3,
    perc=95.0,
    optional_metrics=None,
    save_spectra=False,
):
    # optional_metrics=['snr_stats','filtered_envelope','filtered_L2','filtered_Linf','filtered_MAD','filtered_perc']):
    """
    Estimates one or more metrics of signal-to-noise from a TimeSeries object.
    An implicit assumption is that the analysis is centered on a timeable "phase"
    like P, PP, etc.

    This is a python function that can be used to compute one or several
    signal-to-noise ratio estimates based on an estimated bandwidth using
    the C++ function EstimateBandwidth.  The function has a fair number of
    options, but the core metrics computed are the bandwidth estimates
    computed by that function.  It uses a fairly simple search algorithm
    that functions well for most earthquake sources.  For the low end the
    algorithm searches from the first frequency indistinguishable from DC to
    find the lowest frequency for which the snr exceeds a threshold specified
    by the input parameter 'band_cutoff_snr'.   It does a similar search
    from the high end from a point 80% of Nyquist - a good choice for all
    modern digital data that use FIR antialias filters.   Both searches are
    not just defined with just the first frequency to satisfy the snr
    threshold criteria.  Only when a group of frequencies more than 2 times
    the time-bandwidth product exceed the threshold is the band edge
    defined.   The actual band edge is then defined as the first frequency
    exceeding the threshold.  That more elaborate algorithm was used to
    prevent pure lines in either the signal or noise spectrum from
    corrupting the estimates.

    A set of optional metrics can be computed.  All optional metrics use
    the bandwidth estimates in one way or another.   Optional metrics are
    defined by the following keywords passed through a list (actually
    any iterable container will work) of strings defining one or more
    of the keywords. The metrics and a brief description of each follow:

    *snr_stats* computes what are commonly plotted in box plots for the
    snr estimates within the estimated bandwidth:  minimum, maximum,
    0.25 (1/4) point, 0.75 (3/4) point, and the median.   These are set
    with following dict keys:   'snr_band_maximum','snr_band_minimum',
    'snr_band_1/4', 'srn_band_3/4', and 'snr_band_median' respectively.

    *filtered_envelope*, *filtered_L2*, *filtered_Linf*, *filtered_perc*, and *filtered_MAD*:
    All of these optional metrics first copy the data_object and then
    filter the copy with a Butterworth bandpass filter with the number of
    poles specified by the npoles argument and corners at the estimated
    band edge by the EstimateBandwidth function.   The metrics computed
    are time domain snr estimates computed with he filtered data.  They are
    actually computed from functions in this same module that can be
    used independently and have their own docstring description. The
    functions called have the following names in order of the keyword
    list above:  *snr_envelope*, *snr_L2*, *snr_Linv*, and *snr_MAD*.
    When the computed they are set in the output dictionary with the
    following (again in order) keys:  'snr_envelope','snr_L2', 'srn_Linf',
    and 'snr_MAD'.

    :param data_object:  TimeSeries object to be processed. For Seismogram
    objects the assumption is algorithm would be used for a single
    component (e.g longitudinal or vertical for a P phase)

    :param noise_window: defines the time window to use for computing the
    spectrum considered noise. The time span can be either relative or
    UTC (absolute) time but we do not check for consistency.  This low
    level function assumes they are consistent.  If not, the calculations
    are nearly guaranteed to fail.  Type must be mspasspy.ccore.TimeWindow.

    :param signal_window: defines the time window to use that defines what
    you consider "the signal".  The time span can be either relative or
    UTC (absolute) time but we do not check for consistency.  This low
    level function assumes they are consistent.  If not, the calculations
    are nearly guaranteed to fail.  Type must be mspasspy.ccore.TimeWindow.

    :param noise_spectrum_engine: is expected to either by a None type
    or an instance of a ccore object called an MTPowerSpectralEngine.
    When None an instance of MTPowerSpectralEngine is computed for
    each call to this function.   That is a convenience for small
    jobs or when called with data from mixed sample rates and/or variable
    length time windows.   It is very inefficient to use the default
    approach for processing large data sets and really for any use in a
    map operation with dask or spark.  Normal use should be for the user to
    predefine an MtPowerSpectralEngine from the expected window size
    for a given data sample rate and include it in the function call.

    :param signal_spectrum_engine:  is the comparable MTPowerSpectralEngine
    to use to compute the signal power spectrum.   Default is None with the
    same caveat as above for the noise_spectrum_engine.

    :param tbp:  time-bandwidth product to use for computing the set of
    Slepian functions used for the multitaper estimator.  This parameter is
    used only if the noise_spectrum_engine or signal_spectrum_engine
    arguments are set as None.  The default is 2.5

    :param ntapers:  is the number of Slepian functions (tapers) to compute
    for the multitaper estimators. Like tbp it is referenced only if
    noise_spectrum_engine or signal_spectrum_engine are set to None.
    Note the function will throw an exception if the ntaper parameter is
    not consistent with the time-bandwidth product.  That is, the
    maximum number of tapers is round(2*tbp-1).   Default is 4 which is
    consistent with default tbp=2.5

    :param high_frequency_search_start: Used to specify the upper frequency
      used to start the search for the upper end of the bandwidth by
      the function EstimateBandwidth.  Default is 4.0 which reasonable for
      teleseismic P wave data.  Should be change for usage other than
      analysis of teleseimic P phases or you the bandwidth may be
      grossly underestimated.

    :param npoles:   defines number of poles to us for the Butterworth
    bandpass applied for the "filtered" metrics (see above).  Default is 3.

    :param perc:   used only if 'filtered_perc' is in the optional metrics list.
    Specifies the perc parameter as used in seismic unix.  Uses the percentage
    point specified of the sorted abs of all amplitudes.  (Not perc=50.0 is
    identical to MAD)  Default is 95.0 which is 2 sigma for Gaussian noise.

    :param optional_metrics: is an iterable container containing one or more
    of the optional snr metrics discussed above.

    :param store_as_subdocument:  This parameter is included for
    flexibility but should not normally be changed by the user.  As noted
    earlier the outputs of this function are best abstracted as Metadata.
    When this parameter is False the Metadata members are all posted with
    directly to data_object's Metadata container.  If set True the
    internally generated python dict is copied and stored with a key
    defined through the subdocument_key argument.  See use below in
    function arrival_snr.

    :param subdocument_key:  key for storing results as a subdocument.
    This parameter is ignored unless store_as_subdocument is True.
    Default is "snr_data"

    :param save_spectra:   If set True (default is False) the function
    will pickle the computed noise and signal spectra and save the
    strings created along with a set of related metadata defining the
    time range to the output python dict (these will be saved in MongoDB
    when db is defined - see below).   This option should ONLY be used
    for spot checking, discovery of why an snr metric has unexpected
    results using graphics, or a research topic where the spectra would
    be of interest.  It is a very bad idea to turn this option on if
    you are processing a large quantity of data and saving the results
    to MongoDB as it will bloat the arrival collection.  Consider a
    different strategy if that essential for your work.

    :return:  python tuple with two components.  0 is a python dict with
    the computed metrics associated with keys defined above.  1 is a
    mspass.ccore.ErrorLogger object. Any errors in computng any of the
    metrics will be posted to this logger.  Users should then test this
    object using it's size() method and if it the log is not empty (size >0)
    the caller should handle that condition.   For normal use that means
    pushing any messages the log contains to the original data object's
    error log.
    """
    algname = "FN_snr_estimator"
    my_logger = ErrorLogger()
    # For this algorithm we dogmatically demand the input be a TimeSeries
    if not isinstance(data_object, TimeSeries):
        raise MsPASSError(
            "FD_snr_estimator:  Received invalid data object - arg0 data must be a TimeSeries",
            ErrorSeverity.Invalid,
        )
    # MTPowerSpectrum at the moment has an issue with how it handles
    # a user error in specifying time-band product and number of tapers.
    # We put in an explicit trap here and abort if the user makes a mistake
    # to avoid a huge spray of error message
    if ntapers > round(2 * tbp):
        message = (
            algname +
            "(Fatal Error):  ntapers={ntapers} inconsistent with tbp={tbp}\n".
            format(ntapers=ntapers, tbp=tbp))
        message += "ntapers must be >= round(2*tbp)"
        raise MsPASSError(message, ErrorSeverity.Fatal)
    if data_object.dead():
        my_logger.log_error(
            algname,
            "Datum received was set dead - cannot compute anything",
            ErrorSeverity.Invalid,
        )
        return [dict(), my_logger]
    # We enclose all the main code here in a try block and cat any MsPASSErrors
    # they will be posted as log message. Others will not be handled
    # intentionally letting python's error mechanism handle them as
    # unexpected exceptions - MsPASSError can be anticipated for data problems
    snrdata = dict()
    try:
        # First extract the required windows and compute the power spectra
        n = WindowData(data_object, noise_window.start, noise_window.end)
        s = WindowData(data_object, signal_window.start, signal_window.end)
        if noise_spectrum_engine:
            nengine = noise_spectrum_engine
        else:
            nengine = MTPowerSpectrumEngine(n.npts, tbp, ntapers)
        if signal_spectrum_engine:
            sengine = signal_spectrum_engine
        else:
            sengine = MTPowerSpectrumEngine(n.npts, tbp, ntapers)
        N = nengine.apply(n)
        S = sengine.apply(s)
        bwd = EstimateBandwidth(S.df, S, N, band_cutoff_snr, tbp,
                                high_frequency_search_start)
        # These estimates are always computed and posted
        snrdata["low_f_band_edge"] = bwd.low_edge_f
        snrdata["high_f_band_edge"] = bwd.high_edge_f
        snrdata["low_f_band_edge_snr"] = bwd.low_edge_snr
        snrdata["high_f_band_edge_snr"] = bwd.high_edge_snr
        snrdata["spectrum_frequency_range"] = bwd.f_range
        snrdata["bandwidth_fraction"] = bwd.bandwidth_fraction()
        snrdata["bandwidth"] = bwd.bandwidth()
        if save_spectra:
            snrdata["signal_spectrum"] = pickle.dumps(S)
            snrdata["noise_spectrum"] = pickle.dumps(N)
            snrdata["signal_window_start_time"] = signal_window.start
            snrdata["signal_window_end_time"] = signal_window.end
            snrdata["noise_window_start_time"] = noise_window.start
            snrdata["noise_window_end_time"] = noise_window.end

    except MsPASSError as err:
        newmessage = _reformat_mspass_error(
            err,
            "Spectrum calculation and EstimateBandwidth function section failed with the following message\n",
            "No SNR metrics can be computed for this datum",
        )
        my_logger.log_error(algname, newmessage, ErrorSeverity.Invalid)
        return [snrdata, my_logger]

    # For current implementation all the optional metrics require
    # computed a filtered version of the data.  If a new option is
    # desired that does not require filtering the data the logic
    # here will need to be changed to create a more exclusive test

    if len(optional_metrics) > 0:
        # use the mspass butterworth filter for speed - obspy
        # version requires a conversion to Trace objects
        BWfilt = Butterworth(
            False,
            True,
            True,
            poles,
            bwd.low_edge_f,
            poles,
            bwd.high_edge_f,
            data_object.dt,
        )
        filtered_data = TimeSeries(data_object)
        BWfilt.apply(filtered_data)
        nfilt = WindowData(filtered_data, noise_window.start, noise_window.end)
        sfilt = WindowData(filtered_data, signal_window.start,
                           signal_window.end)
        # In this implementation we don't need this any longer so we
        # delete it here.  If options are added beware
        del filtered_data
        # Some minor efficiency would be possible if we avoided
        # duplication of computations when multiple optional metrics
        # are requested, but the fragility that adds to maintenance
        # is not justified
        for metric in optional_metrics:
            if metric == "snr_stats":
                try:
                    stats = BandwidthStatistics(S, N, bwd)
                    # stats is a Metadata container - copy to snrdata
                    for k in stats.keys():
                        snrdata[k] = stats[k]
                except MsPASSError as err:
                    newmessage = _reformat_mspass_error(
                        err,
                        "BandwithStatistics throw the following error\n",
                        "Five snr_stats attributes were not computed",
                    )
                    my_logger.log_error(algname, newmessage, err.severity)
            if metric == "filtered_envelope":
                try:
                    analytic_nfilt = hilbert(nfilt.data)
                    analytic_sfilt = hilbert(sfilt.data)
                    nampvector = np.abs(analytic_nfilt)
                    sampvector = np.abs(analytic_sfilt)
                    namp = np.median(nampvector)
                    samp = np.max(sampvector)
                    snrdata[
                        "snr_envelope_Linf_over_L1"] = _safe_snr_calculation(
                            samp, namp)
                except:
                    my_logger.log_erro(
                        algname,
                        "Error computing filtered_envelope metrics:  snr_envelope_Linf_over_L1 not computed",
                        ErrorSeverity.Complaint,
                    )
            if metric == "filtered_L2":
                try:
                    namp = RMSAmplitude(nfilt)
                    samp = RMSAmplitude(sfilt)
                    snrvalue = _safe_snr_calculation(samp, namp)
                    snrdata["snr_L2"] = snrvalue
                except MsPASSError as err:
                    newmessage = _reformat_mspass_error(
                        err,
                        "Error computing filtered_L2 metric",
                        "snr_L2 attribute was not compouted",
                    )
                    my_logger.log_error(algname, newmessage, err.severity)

            if metric == "filtered_MAD":
                try:
                    namp = MADAmplitude(nfilt)
                    samp = MADAmplitude(sfilt)
                    snrvalue = _safe_snr_calculation(samp, namp)
                    snrdata["snr_MAD"] = snrvalue
                except MsPASSError as err:
                    newmessage = _reformat_mspass_error(
                        err,
                        "Error computing filtered_MAD metric",
                        "snr_MAD attribute was not computed",
                    )
                    my_logger.log_error(algname, newmessage, err.severity)

            if metric == "filtered_Linf":
                try:
                    # the C function expects a fraction - for users a percentage
                    # is clearer
                    namp = PercAmplitude(nfilt, perc / 100.0)
                    samp = PeakAmplitude(sfilt)
                    snrvalue = _safe_snr_calculation(samp, namp)
                    snrdata["snr_Linf"] = snrvalue
                    snrdata["snr_perc"] = perc
                except MsPASSError as err:
                    newmessage = _reformat_mspass_error(
                        err,
                        "Error computing filtered_Linf metric",
                        "snr_Linf attribute was not computed",
                    )
                    my_logger.log_error(algname, newmessage, err.severity)

            if metric == "filtered_perc":
                try:
                    namp = MADAmplitude(nfilt)
                    samp = PercAmplitude(sfilt, perc / 100.0)
                    snrvalue = _safe_snr_calculation(samp, namp)
                    snrdata["snr_perc"] = snrvalue
                    snrdata[
                        "snr_perc"] = perc  # redundant if filter_Linf is also run but tiny cost
                except MsPASSError as err:
                    newmessage = _reformat_mspass_error(
                        err,
                        "Error computing filtered_perc metric",
                        "snr_perf metric was not computed",
                    )
                    my_logger.log_error(algname, newmessage, err.severity)
            else:
                message = "Illegal optional_metrics keyword=" + metric + "\n"
                message += (
                    "If that is a typo expect some metrics will be missing from output"
                )
                my_logger.log_error(algname, message, ErrorSeverity.Complaint)
    return [snrdata, my_logger]
예제 #9
0
def test_windowdata():
    npts=1000
    ts=TimeSeries()
    setbasics(ts,npts)
    for i in range(npts):
        ts.data[i]=float(i)
    t3c=Seismogram()
    setbasics(t3c,npts)
    for k in range(3):
        for i in range(npts):
            t3c.data[k,i]=100*(k+1)+float(i)
    
    win=TimeWindow(2,3)
    d=WindowData(ts,win)
    print('t y')
    for j in range(d.npts):
        print(d.time(j),d.data[j])
    assert(len(d.data) == 101)
    assert(d.t0==2.0)
    assert(d.endtime() == 3.0)
    d=WindowData(t3c,win)
    print('t x0 x1 x2')
    for j in range(d.npts):
        print(d.time(j),d.data[0,j],d.data[1,j],d.data[2,j])
    assert(d.data.columns() == 101)
    assert(d.t0==2.0)
    assert(d.endtime() == 3.0)
    print('testing error handling')
    t3c.kill()
    d=WindowData(t3c,win)
    assert(d.npts == 1000 and (not d.live))
    d=WindowData(ts,win,preserve_history=True)
    print('Error message posted')
    print(d.elog.get_error_log())
    assert(d.elog.size() == 1)
    # this still throws an error but the message will be different
    d=WindowData(ts,win,preserve_history=True,instance='0')
    print('Error message posted')
    print(d.elog.get_error_log())
    assert(d.elog.size() == 1)