def loadwavelet(self, w, dtype="Seismogram", component=2, window=False): # This code is painfully similar to loaddata. To reduce errors # only the names have been changed to protect the innocent if dtype == "raw_vector" and window: raise RuntimeError( "RFdeconProcessor.loadwavelet: " + "Illegal argument combination\nwindow cannot be true with raw_vector input" ) if not (dtype == "Seismogram" or dtype == "TimeSeries" or dtype == "raw_vector"): raise RuntimeError("RFdeconProcessor.loadwavelet: " + " Illegal dtype parameter=" + dtype) wvector = [] if window: if dtype == "Seismogram": ts = ExtractComponent(w, component) ts = WindowData(ts, self.dwin.start, self.dwin.end) wvector = ts.data elif dtype == "TimeSeries": ts = WindowData(w, self.dwin.start, self.dwin.end) wvector = ts.data else: wvector = w else: if dtype == "Seismogram": ts = ExtractComponent(w, component) wvector = ts.data elif dtype == "TimeSeries": wvector = ts.data else: wvector = w # Have to explicitly convert to ndarray because DoubleVector cannot be serialized. self.wvector = np.array(wvector)
def _wtva_Seismogram(self, d, fill): # this could be implemented by converting d to an ensemble ens = TimeSeriesEnsemble() for k in range(3): dcomp = ExtractComponent(d, k) ens.member.append(dcomp) self._wtva_TimeSeriesEnsemble(ens, fill)
def test_ExtractComponent(): seis = Seismogram() seis.live = 1 seis.data = dmatrix(np.random.rand(3, 6)) seis.npts = 6 ts = [] for i in range(3): ts.append(ExtractComponent(seis, i)) for i in range(3): assert (ts[i].data == seis.data[i]).all()
def loadnoise(self, n, dtype="Seismogram", component=2, window=False): # First basic sanity checks # Return immediately for methods that ignore noise. # Note we do this silenetly assuming the function wrapper below # will post an error to elog for the output to handle this nonfatal error if self.algorithm == "LeastSquares" or self.algorithm == "WaterLevel": return if dtype == "raw_vector" and window: raise RuntimeError( "RFdeconProcessor.loadnoise: " + "Illegal argument combination\nwindow cannot be true with raw_vector input" ) if not (dtype == "Seismogram" or dtype == "TimeSeries" or dtype == "raw_vector"): raise RuntimeError("RFdeconProcessor.loadnoise: " + " Illegal dtype parameter=" + dtype) nvector = [] # IMPORTANT these two parameters are not required by the # ScalarDecon C code but need to be inserted in pf for any algorithm # that requires noise data (i.e. multitaper) and the window # options is desired if window: tws = self.md.get_double("noise_window_start") twe = self.md.get_double("noise_window_end") if dtype == "Seismogram": ts = ExtractComponent(n, component) ts = WindowData(ts, tws, twe) nvector = ts.data elif dtype == "TimeSeries": ts = WindowData(n, tws, twe) nvector = ts.data else: nvector = n else: if dtype == "Seismogram": ts = ExtractComponent(n, component) nvector = ts.data elif dtype == "TimeSeries": nvector = ts.data else: nvector = n # Have to explicitly convert to ndarray because DoubleVector cannot be serialized. self.nvector = np.array(nvector)
def loaddata(self, d, dtype="Seismogram", component=0, window=False): """ Loads data for processing. When window is set true use the internal pf definition of data time window and window the data. The dtype parameter changes the behavior of this algorithm significantly depending on the setting. It can be one of the following: Seismogram, TimeSeries, or raw_vector. For the first two the data to process will be extracted in a pf specfied window if window is True. If window is False TimeSeries data will be passed directly and Seismogram data will have the data defined by the component parameter copied to the internal data vector workspace. If dtype is set to raw_vector d is assumed to be a raw numpy vector of doubles or an the aliased std::vector used in ccore, for example, in the TimeSeries object s vector. Setting dtype to raw_vector and window True will result in this method throwing a RuntimeError exception as the combination is not possible since raw_vector data have no time base. :param d: input data (contents expected depend upon value of dtype parameter). :param dtype: string defining the form d is expected to be (see details above) :param component: component of Seismogram data to load as data vector. Ignored if dtype is raw_vector or TimeSeries. :param window: boolean controlling internally defined windowing. (see details above) :return: Nothing (not None nothing) is returned """ # First basic sanity checks if dtype == "raw_vector" and window: raise RuntimeError( "RFdeconProcessor.loaddata: " + "Illegal argument combination\nwindow cannot be true with raw_vector input" ) if not (dtype == "Seismogram" or dtype == "TimeSeries" or dtype == "raw_vector"): raise RuntimeError("RFdeconProcessor.loaddata: " + " Illegal dtype parameter=" + dtype) dvector = [] if window: if dtype == "Seismogram": ts = ExtractComponent(d, component) ts = WindowData(ts, self.dwin.start, self.dwin.end) dvector = ts.data elif dtype == "TimeSeries": ts = WindowData(d, self.dwin.start, self.dwin.end) dvector = ts.data else: dvector = d else: if dtype == "Seismogram": ts = ExtractComponent(d, component) dvector = ts.data elif dtype == "TimeSeries": dvector = ts.data else: dvector = d # Have to explicitly convert to ndarray because DoubleVector cannot be serialized. self.dvector = np.array(dvector)
def Seismogram2Stream(sg, chanmap=["E", "N", "Z"], hang=[90.0, 0.0, 0.0], vang=[90.0, 90.0, 0.0]): # fixme hang and vang parameters """ Convert a mspass::Seismogram object to an obspy::Stream with 3 components split apart. mspass and obspy have completely incompatible approaches to handling three component data. obspy uses a Stream object that is a wrapper around and a list of Trace objects. mspass stores 3C data bundled into a matrix container. This function takes the matrix container apart and produces the three Trace objects obspy want to define 3C data. The caller is responsible for how they handle bundling the output. A very dark side of this function is any error log entries in the part mspass Seismogram object will be lost in this conversion as obspy does not implement that concept. If you need to save the error log you will need to save the input of this function to MongoDB to preserve the errorlog it may contain. :param sg: is the Seismogram object to be converted :type sg: :class:`~mspasspy.ccore.Seismogram` :param chanmap: 3 element list of channel names to be assigned components :type chanmap: list :param hang: 3 element list of horizontal angle attributes (azimuth in degrees) to be set in Stats array of output for each component. (default is for cardinal directions) :type hang: list :param vang: 3 element list of vertical angle (theta of spherical coordinates) to be set in Stats array of output for each component. (default is for cardinal directions) :type vang: list :return: obspy Stream object containing a list of 3 Trace objects in mspass component order. Presently the data are ALWAYS returned to cardinal directions (see above). It will be empty if sg was marked dead :rtype: :class:`obspy.core.stream.Stream` """ dresult = obspy.core.Stream() dresult.dead_mspass = True # Note this logic will silently return an empty Stream object if the # data are marked dead if sg.live: dresult.dead_mspass = False uuids = sg.id() logstuff = sg.elog for i in range(3): ts = ExtractComponent(sg, i) ts.put_string(Keywords.chan, chanmap[i]) ts.put_double(Keywords.channel_hang, hang[i]) ts.put_double(Keywords.channel_vang, vang[i]) # ts is a CoreTimeSeries but we need to add a few things to # make it mesh with TimeSeries2Trace tsex = TimeSeries(ts, uuids) tsex.elog = logstuff dobspy = TimeSeries2Trace(tsex) dresult.append(dobspy) else: for i in range(3): tc = obspy.core.Trace() tc.dead_mspass = True dresult.append(tc) return dresult
def arrival_snr_QC( data_object, noise_window=TimeWindow(-130.0, -5.0), noise_spectrum_engine=None, signal_window=TimeWindow(-5.0, 120.0), signal_spectrum_engine=None, band_cutoff_snr=2.0, # check these are reasonable - don't remember the formula when writing this tbp=5.0, ntapers=10, high_frequency_search_start=5.0, poles=3, perc=95.0, phase_name="P", metadata_key="Parrival", optional_metrics=[ "snr_stats", "filtered_envelope", "filtered_L2", "filtered_Linf", "filtered_MAD", "filtered_perc", ], save_spectra=False, db=None, collection="arrival", use_measured_arrival_time=False, measured_arrival_time_key="Ptime", taup_model=None, update_mode=False, component=2, source_collection="source", receiver_collection=None, ): """ Compute a series of metrics that can be used for quality control filtering of seismic phase data. This is the highest level function in this module for computing signal-to-noise ratio metrics for processing signals that can be defined by a computable or measurable "phase". Features this function adds over lower level functions in this module are: 1. An option to save computed metrics to a MongoDB collection (defaults as "arrival"). If the update_mode argument is set True (default is False) the function expects the data_object to contain the attribute "arrival_id" that references the ObjectID of an existing entry in the the collection where the data this function computes is to be saved (default is"arrival"). 2. Adds an option to use a computed or measured arrival as the time reference for all windowing. The lower level snr functions in this module require the user do what this function does prior to calling the function. Note one or the other is required (i.e. either computed or measured time will be define t0 of the processing) The input of arg 0 (data_object) can be either a TimeSeries or a Seismogram object. If a Seismogram object is passed the "component" argument is used to extract the specified single channel from the Seismogram object and than component is used for processing. That is necessary because all the algorithms used are single channel algorithms. To use this function on all components use a loop over components BUT make sure you use a unique value for the argument "metadata_key" for each component. Note this will also produce multiple documents per input datum. The type of the data_object also has a more subtle implication the user must be aware of. That is, in the MsPASS schema we store receiver coordinates in one of two different collections: "channel" for TimeSeries data and "site" for Seismogram data. When such data are loaded the generic keys like lat are always converted to names like channel_lat or site_lat for TimeSeries and Seismogram data respectively. This function uses the data type to set that naming. i.e. if the input is TimeSeries it tries to fetch the latitude data as channel_lat while if it the input is a Seismogram it tries to fetch site_lat. That is true of all coordinate data loaded by normalization from a source and receiver collection. The following args are passed directly to the function arrival_snr: noise_window, signal_window, band_cutoff_snr, tbp, ntapers, poles, perc, phase_name, metadata_key, and optional_metrics. See the docstring for arrival_snr and FD_snr_estimator for descriptions of how these arguments should be used. This top level function adds arguments decribed below. :param db: mspass Database object that is used as a handle for to MongoDB. Default is None, which the function takes to mean you don't want to save the computed values to MongoDB. In this mode the computed metrics will all be posted to a python dict that can be found under the key defined by the "metadata_key" argument. When db is defined the contents of that same python dict will save to MongoDB is the collection defined by the "collection" argument. If db is run as the default None the user is responsible for saving and managing the computed snr data. Be aware a simple later call to db.save_data will not produce the same normalized data with the (default) arrival collection. :param collection: MongoDB collection name where the results of this function will be saved. If the "update_mode" argument is also set True the update section will reference this collection. Default is "arrival". :param use_measured_arrival_time: boolean defining the method used to define the time reference for windowing used for snr calculations. When True the function will attempt to fetch a phase arrival time with the key defined by the "measured_arrival_time_key" argument. In that mode if the fetch fails the data_object will be killed and an error posted to elog. That somewhat brutal choice was intentional as the expectation is if you want to use measured arrival times you don't want data where there are no picks. The default is True to make the defaults consistent. The reason is that the tau-p calculator handle is passed to the function when using model-based travel times. There is no way to default that so it defaults to None. :param measured_arrival_time_key: is the key used to fetch a measured arrival time. This parameter is ignored if use_measured_arrival_time is False. :param taup_model: when use_measured_arrival_time is False this argument is required. It defaults as None because there is now way the author knows to initialize it to anything valid. If set it MUST be an instance of the obspy class TauPyModel (https://docs.obspy.org/packages/autogen/obspy.taup.tau.TauPyModel.html#obspy.taup.tau.TauPyModel) Mistakes in use of this argument can cause a MsPASSError exception to be thrown (not logged thrown as a fatal error) in one of two ways: (1) If use_measured_arrival_time is False this argument must be defined, and (2) if it is defined it MUST be an instance of TauPyModel. :param update_mode: When True the function will attempt to extract a MongoDB ObjectID from data_object's Metadata using the (currently fixed) key "arrival_id". If found it will add the computed data to an existing document in the collection defined by the collection argument. Otherwise it will simply add a new entry and post the ObjectID of the new document with the (same fixed) key arrival_id. When False no attempt to fetch the arrival id is made and we simply add a record. This parameter is completely ignored unless the db argument defines a valid Database class. :param component: integer (0, 1, or 2) defining which component of a Seismogram object to use to compute the requested snr metrics. This parameter is ignored if the input is a TimeSeries. :param source_collection: normalization collection for source data. The default is the MsPASS name "source" which means the function will try to load the source hypocenter coordinates (when required) as source_lat, source_lon, source_depth, and source_time. :param receiver_collection: when set this name will override the automatic setting of the expected normalization collection naming for receiver functions (see above). The default is None which causes the automatic switching to be involked. If it is any other string the automatic naming will be overridden. :return: the data_object modified by insertion of the snr QC data in the object's Metadata """ if data_object.dead(): return data_object if isinstance(data_object, TimeSeries): # We need to make a copy of a TimeSeries object to assure the only # thing we change is the Metadata we add to the return data_to_process = TimeSeries(data_object) if receiver_collection: rcol = receiver_collection else: rcol = "channel" elif isinstance(data_object, Seismogram): if component < 0 or component > 2: raise MsPASSError( "arrival_snr_QC: usage error. " + "component parameter passed with illegal value={n}\n".format( n=component) + "Must be 0, 1, or 2", ErrorSeverity.Fatal, ) data_to_process = ExtractComponent(data_object, component) if receiver_collection: rcol = receiver_collection else: rcol = "site" else: raise MsPASSError( "arrival_snr_QC: received invalid input data\n" + "Input must be either TimeSeries or a Seismogram object", ErrorSeverity.Fatal, ) if use_measured_arrival_time: arrival_time = data_object[measured_arrival_time_key] else: # This test is essential or python will throw a more obscure, # generic exception if taup_model is None: raise MsPASSError( "arrival_snr_QC: usage error. " + "taup_model parameter is set None but use_measured_arrival_time is False\n" + "This gives no way to define processing windows. See docstring", ErrorSeverity.Fatal, ) source_lat = data_object[source_collection + "_lat"] source_lon = data_object[source_collection + "_lon"] source_depth = data_object[source_collection + "_depth"] source_time = data_object[source_collection + "_time"] receiver_lat = data_object[rcol + "_lat"] receiver_lon = data_object[rcol + "_lon"] delta = locations2degrees(source_lat, source_lon, receiver_lat, receiver_lon) arrival = taup_model.get_travel_times( source_depth_in_km=source_depth, distance_in_degree=delta, phase_list=[phase_name], ) arrival_time = source_time + arrival[0].time taup_arrival_phase = arrival[0].phase.name # not sure if this will happen but worth trapping it as a warning if # it does if phase_name != taup_arrival_phase: data_object.elog.log_error( "arrival_snr_QC", "Requested phase name=" + phase_name + " does not match phase name tag returned by obpsy taup calculator=" + taup_arrival_phase, "Complaint", ) if data_to_process.time_is_UTC(): data_to_process.ator(arrival_time) [snrdata, elog] = FD_snr_estimator( data_to_process, noise_window, noise_spectrum_engine, signal_window, signal_spectrum_engine, band_cutoff_snr, tbp, ntapers, high_frequency_search_start, poles, perc, optional_metrics, save_spectra=save_spectra, ) if elog.size() > 0: data_object.elog += elog snrdata["phase"] = phase_name snrdata["snr_arrival_time"] = arrival_time snrdata["snr_signal_window_start"] = arrival_time + signal_window.start snrdata["snr_signal_window_end"] = arrival_time + signal_window.end snrdata["snr_noise_window_start"] = arrival_time + noise_window.start snrdata["snr_noise_window_end"] = arrival_time + noise_window.end # These cross-referencing keys may not always be defined when a phase # time is based on a pick so we add these cautiously scol_id_key = source_collection + "_id" rcol_id_key = rcol + "_id" if data_object.is_defined(scol_id_key): snrdata[scol_id_key] = data_object[scol_id_key] if data_object.is_defined(rcol_id_key): snrdata[rcol_id_key] = data_object[rcol_id_key] # Note we add this result to data_object NOT data_to_process because that # is not always the same thing - for a TimeSeries input it is a copy of # the original but it may have been altered while for a Seismogram it is # an extracted component data_object[metadata_key] = snrdata if db: arrival_id_key = collection + "_id" dbcol = db[collection] if update_mode: if data_object.is_defined(arrival_id_key): arrival_id = data_object[arrival_id_key] filt = {"_id": arrival_id} update_clause = {"$set": snrdata} dbcol.update_one(filt, update_clause) else: data_object.elog.log_error( "arrival_snr_QC", "Running in update mode but arrival id key=" + arrival_id_key + " is not defined\n" + "Inserting computed snr data as a new document in collection=" + collection, "Complaint", ) arrival_id = dbcol.insert_one(snrdata).inserted_id data_object[arrival_id_key] = arrival_id else: arrival_id = dbcol.insert_one(snrdata).inserted_id data_object[arrival_id_key] = arrival_id return data_object