def test_Ensemble(Ensemble): md = Metadata() md['double'] = 3.14 md['bool'] = True md['long'] = 7 es = Ensemble(md, 3) if isinstance(es, TimeSeriesEnsemble): d = TimeSeries(10) d = make_constant_data_ts(d) es.member.append(d) es.member.append(d) es.member.append(d) else: d = Seismogram(10) d = make_constant_data_seis(d) es.member.append(d) es.member.append(d) es.member.append(d) es.sync_metadata(['double', 'long']) assert es.member[0].is_defined('bool') assert es.member[0]['bool'] == True assert not es.member[0].is_defined('double') assert not es.member[0].is_defined('long') es.sync_metadata() assert es.member[1].is_defined('double') assert es.member[1].is_defined('long') assert es.member[1]['double'] == 3.14 assert es.member[1]['long'] == 7 es.update_metadata(Metadata({'k': 'v'})) assert es['k'] == 'v'
def _load_md(rec, keys): """ Helper for load ensemble. Extracts metadata defined by keys list and posts to a Metadata container that is returned. """ # do this stupid for now without error handlers md = Metadata() for k in keys: x = rec[k] md.put(k, x) return md
def change_parameters(self, md): """ Use this method to change the internal parameter setting of the processor. It can be used, for example, to switch from the damped least square method to the water level method. Note the input must be a complete definition for a parameter set defining a particular algorithm. i.e. this is not an update method but t reinitializes the processor. :param md: is a mspass.Metadata object containing required parameters for the alternative algorithm. """ self.md = Metadata(md)
def __init__(self, alg="LeastSquares", pf="RFdeconProcessor.pf"): self.algorithm = alg self.pf = pf pfhandle = AntelopePf(pf) if self.algorithm == "LeastSquares": self.md = pfhandle.get_branch("LeastSquare") self.__uses_noise = False elif alg == "WaterLevel": self.md = pfhandle.get_branch("WaterLevel") self.__uses_noise = False elif alg == "MultiTaperXcor": self.md = pfhandle.get_branch("MultiTaperXcor") self.__uses_noise = True elif alg == "MultiTaperSpecDiv": self.md = pfhandle.get_branch("MultiTaperSpecDiv") self.__uses_noise = True elif alg == "GeneralizedIterative": raise RuntimeError( "Generalized Iterative method not yet supported") else: raise RuntimeError("Illegal value for alg=" + alg) # below is needed because AntelopePf cannot be serialized. self.md = Metadata(self.md)
def dict2Metadata(dic): """ Function to convert Python dict data to Metadata. pymongo returns a Python dict container from find queries to any collection. Simple type in returned documents can be converted to Metadata that are used as headers in the C++ components of mspass. :param dict: Python dict to convert :type dict: dict :return: Metadata object translated from d :rtype: :class:`~mspasspy.ccore.Metadata` """ return Metadata(dic)
def test_Ensemble(Ensemble): md = Metadata() md["double"] = 3.14 md["bool"] = True md["long"] = 7 es = Ensemble(md, 3) if isinstance(es, TimeSeriesEnsemble): d = TimeSeries(10) d = make_constant_data_ts(d) es.member.append(d) es.member.append(d) es.member.append(d) else: d = Seismogram(10) d = make_constant_data_seis(d) es.member.append(d) es.member.append(d) es.member.append(d) es.set_live( ) # new method for LoggingEnsemble needed because default is dead es.sync_metadata(["double", "long"]) assert es.member[0].is_defined("bool") assert es.member[0]["bool"] == True assert not es.member[0].is_defined("double") assert not es.member[0].is_defined("long") es.sync_metadata() assert es.member[1].is_defined("double") assert es.member[1].is_defined("long") assert es.member[1]["double"] == 3.14 assert es.member[1]["long"] == 7 es.update_metadata(Metadata({"k": "v"})) assert es["k"] == "v" # From here on we test features not in CoreEnsemble but only in # LoggingEnsemble. Note that we use pybind11 aliasing to # define TimeSeriesEnsemble == LoggingEnsemble<TimeSeries> and # SeismogramEnsemble == LoggingEnsemble<Seismogram>. # Should be initially marked live assert es.live() es.elog.log_error("test_ensemble", "test complaint", ErrorSeverity.Complaint) es.elog.log_error("test_ensemble", "test invalid", ErrorSeverity.Invalid) assert es.elog.size() == 2 assert es.live() es.kill() assert es.dead() # resurrect es es.set_live() assert es.live() # validate checks for for any live members - this tests that feature assert es.validate() # need this temporary copy for the next test_ if isinstance(es, TimeSeriesEnsemble): escopy = TimeSeriesEnsemble(es) else: escopy = SeismogramEnsemble(es) for d in escopy.member: d.kill() assert not escopy.validate() # Reuse escopy for pickle test escopy = pickle.loads(pickle.dumps(es)) assert escopy.is_defined("bool") assert escopy["bool"] == True assert escopy.is_defined("double") assert escopy.is_defined("long") assert escopy["double"] == 3.14 assert escopy["long"] == 7 assert escopy.live() assert escopy.elog.size() == 2 assert escopy.member[0].is_defined("bool") assert escopy.member[0]["bool"] == True assert escopy.member[0].is_defined("double") assert escopy.member[0].is_defined("long") assert es.member[1].is_defined("double") assert es.member[1].is_defined("long") assert es.member[1]["double"] == 3.14 assert es.member[1]["long"] == 7 if isinstance(es, TimeSeriesEnsemble): assert es.member[1].data == escopy.member[1].data else: assert (es.member[1].data[:] == escopy.member[1].data[:]).all()
def test_CoreSeismogram(): md = Metadata() md["delta"] = 0.01 md["starttime"] = 0.0 md["npts"] = 100 # test metadata constructor md["tmatrix"] = np.random.rand(3, 3) cseis = _CoreSeismogram(md, False) assert (cseis.tmatrix == md["tmatrix"]).all() md["tmatrix"] = dmatrix(np.random.rand(3, 3)) cseis = _CoreSeismogram(md, False) assert (cseis.tmatrix == md["tmatrix"]).all() md["tmatrix"] = np.random.rand(9) cseis = _CoreSeismogram(md, False) assert (cseis.tmatrix == md["tmatrix"].reshape(3, 3)).all() md["tmatrix"] = np.random.rand(1, 9) cseis = _CoreSeismogram(md, False) assert (cseis.tmatrix == md["tmatrix"].reshape(3, 3)).all() md["tmatrix"] = np.random.rand(9, 1) cseis = _CoreSeismogram(md, False) assert (cseis.tmatrix == md["tmatrix"].reshape(3, 3)).all() md["tmatrix"] = np.random.rand(3, 3).tolist() cseis = _CoreSeismogram(md, False) assert np.isclose(cseis.tmatrix, np.array(md["tmatrix"]).reshape(3, 3)).all() md["tmatrix"] = np.random.rand(9).tolist() cseis = _CoreSeismogram(md, False) assert np.isclose(cseis.tmatrix, np.array(md["tmatrix"]).reshape(3, 3)).all() # test whether the setter of tmatrix updates metadata correctly tm = np.random.rand(1, 9) cseis.tmatrix = tm assert (cseis.tmatrix == tm.reshape(3, 3)).all() assert np.isclose(cseis.tmatrix, np.array(cseis["tmatrix"]).reshape(3, 3)).all() tm = np.random.rand(9).tolist() cseis.tmatrix = tm assert np.isclose(cseis.tmatrix, np.array(tm).reshape(3, 3)).all() assert np.isclose(cseis.tmatrix, np.array(cseis["tmatrix"]).reshape(3, 3)).all() # test exceptions md["tmatrix"] = np.random.rand(4, 2) with pytest.raises(MsPASSError, match="should be a 3x3 matrix"): _CoreSeismogram(md, False) md["tmatrix"] = dmatrix(np.random.rand(2, 4)) with pytest.raises(MsPASSError, match="should be a 3x3 matrix"): _CoreSeismogram(md, False) md["tmatrix"] = 42 with pytest.raises(MsPASSError, match="not recognized"): _CoreSeismogram(md, False) md.erase("tmatrix") # tmatrix not defined is taken to default to tmatrix being an identity # matrix. We test that condition here cseis = _CoreSeismogram(md, False) assert np.isclose(cseis.tmatrix, np.eye(3)).all() md["tmatrix"] = {4: 2} with pytest.raises(MsPASSError, match="type is not recognized"): _CoreSeismogram(md, False) md["tmatrix"] = np.random.rand(9).tolist() md["tmatrix"][3] = "str" with pytest.raises(MsPASSError, match="should be float"): _CoreSeismogram(md, False) md["tmatrix"] = np.random.rand(3, 4).tolist() with pytest.raises(MsPASSError, match="should be a 3x3 list of list"): _CoreSeismogram(md, False) md["tmatrix"] = [1, 2, 3] with pytest.raises(MsPASSError, match="should be a 3x3 list of list"): _CoreSeismogram(md, False) md["tmatrix"] = np.random.rand(2, 2).tolist() with pytest.raises( MsPASSError, match="should be a list of 9 floats or a 3x3 list of list"): _CoreSeismogram(md, False) md["tmatrix"] = np.random.rand(3, 3).tolist() md["tmatrix"][1][1] = "str" with pytest.raises(MsPASSError, match="should be float"): _CoreSeismogram(md, False)
def test_Metadata(): md = Metadata() assert repr(md) == "Metadata({})" dic = {1: 1} md.put("dict", dic) val = md.get("dict") val[2] = 2 del val dic[3] = 3 del dic md["dict"][4] = 4 assert md["dict"] == {1: 1, 2: 2, 3: 3, 4: 4} md = Metadata({"array": np.array([3, 4])}) md["dict"] = {1: 1, 2: 2} md["str'i\"ng"] = "str'i\"ng" md["str'ing"] = "str'ing" md["double"] = 3.14 md["bool"] = True md["int"] = 7 md["string"] = "str\0ing" md["string"] = "str\ning" md["str\ting"] = "str\ting" md["str\0ing"] = "str\0ing" md["str\\0ing"] = "str\\0ing" md_copy = pickle.loads(pickle.dumps(md)) for i in md: if i == "array": assert (md[i] == md_copy[i]).all() else: assert md[i] == md_copy[i] md_copy2 = Metadata(dict(md)) assert not md_copy2.modified() assert md.modified() == md_copy.modified() md = Metadata({ "<class 'numpy.ndarray'>": np.array([3, 4]), "<class 'dict'>": { 1: 1, 2: 2 }, "string": "string", "double": 3.14, "bool": True, "long": 7, "<class 'bytes'>": b"\xba\xd0\xba\xd0", "<class 'NoneType'>": None, }) for i in md: assert md.type(i) == i md[b"\xba\xd0"] = b"\xba\xd0" md_copy = pickle.loads(pickle.dumps(md)) for i in md: if i == "<class 'numpy.ndarray'>": assert (md[i] == md_copy[i]).all() else: assert md[i] == md_copy[i] del md["<class 'numpy.ndarray'>"] md_copy.erase("<class 'numpy.ndarray'>") assert not "<class 'numpy.ndarray'>" in md assert not "<class 'numpy.ndarray'>" in md_copy assert md.keys() == md_copy.keys() with pytest.raises(TypeError, match="Metadata"): reversed(md) md = Metadata({1: 1, 3: 3}) md_copy = Metadata({2: 2, 3: 30}) md += md_copy assert md.__repr__() == "Metadata({'1': 1, '2': 2, '3': 30})" # Test with real data dic = { "_format": "MSEED", "arrival.time": 1356901212.242550, "calib": 1.000000, "chan": "BHZ", "delta": 0.025000, "deltim": -1.000000, "endtime": 1356904168.544538, "iphase": "P", "loc": "", "mseed": { "dataquality": "D", "number_of_records": 36, "encoding": "STEIM2", "byteorder": ">", "record_length": 4096, "filesize": 726344704, }, "net": "CI", "npts": 144000, "phase": "P", "sampling_rate": 40.000000, "site.elev": 0.258000, "site.lat": 35.126900, "site.lon": -118.830090, "site_id": "5fb6a67b37f8eef2f0658e9a", "sta": "ARV", "starttime": 1356900568.569538, } md = Metadata(dic) md["mod"] = "mod" md_copy = pickle.loads(pickle.dumps(md)) for i in md: assert md[i] == md_copy[i] assert md.modified() == md_copy.modified()
def setup_function(function): ts_size = 255 sampling_rate = 20.0 function.dict1 = { "network": "IU", "station": "ANMO", "starttime": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000), "npts": ts_size, "sampling_rate": sampling_rate, "channel": "BHE", "live": True, "_id": bson.objectid.ObjectId(), "jdate": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000), "date_str": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000), "not_defined_date": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000), } function.dict2 = { "network": "IU", "station": "ANMO", "starttime": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000), "npts": ts_size, "sampling_rate": sampling_rate, "channel": "BHN", } function.dict3 = { "network": "IU", "station": "ANMO", "starttime": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000), "npts": ts_size, "sampling_rate": sampling_rate, "channel": "BHZ", } function.tr1 = obspy.Trace(data=np.random.randint(0, 1000, ts_size), header=function.dict1) function.tr2 = obspy.Trace(data=np.random.randint(0, 1000, ts_size), header=function.dict2) function.tr3 = obspy.Trace(data=np.random.randint(0, 1000, ts_size), header=function.dict3) function.stream = obspy.Stream( traces=[function.tr1, function.tr2, function.tr3]) function.md1 = Metadata() function.md1.put("network", "IU") function.md1.put("npts", ts_size) function.md1.put("sampling_rate", sampling_rate) function.md1.put("live", True) function.ts1 = TimeSeries() function.ts1.data = DoubleVector(np.random.rand(ts_size)) function.ts1.live = True function.ts1.dt = 1 / sampling_rate function.ts1.t0 = 0 function.ts1.npts = ts_size # TODO: need to bind the constructor that can do TimeSeries(md1) function.ts1.put("net", "IU") function.ts1.put("npts", ts_size) function.ts1.put("sampling_rate", sampling_rate) function.seismogram = Seismogram() # TODO: the default of seismogram.tref is UTC which is inconsistent with the default # for TimeSeries() # TODO: It would be nice to have dmatrix support numpy.ndarray as input function.seismogram.data = dmatrix(3, ts_size) for i in range(3): for j in range(ts_size): function.seismogram.data[i, j] = np.random.rand() function.seismogram.live = True function.seismogram.dt = 1 / sampling_rate function.seismogram.t0 = 0 function.seismogram.npts = ts_size # FIXME: if the following key is network, the Seismogram2Stream will error out # when calling TimeSeries2Trace internally due to the issue when mdef.is_defined(k) # returns True but k is an alias, the mdef.type(k) will error out. function.seismogram.put("net", "IU") function.seismogram.put("npts", ts_size) function.seismogram.put("sampling_rate", sampling_rate)
class RFdeconProcessor: """ This class is a wrapper for the suite of receiver function deconvolution methods we call scalar methods. That is, the operation is reducable to two time series: wavelet signal and the data (TimeSeries) signal. That is in contrast to three component methods that always treat the data as vector samples. The class should be created as a global processor object to be used in a spark job. The design assumes the processor object will be passed as an argument to the RFdecon function that should appear as a function in a spark map call. """ def __repr__(self) -> str: repr_str = "{type}(alg='{alg}', pf='{pf}')".format(type=str( self.__class__), alg=self.algorithm, pf=self.pf) return repr_str def __str__(self) -> str: md_str = str(Metadata2dict(self.md)) processor_str = "{type}(alg='{alg}', pf='{pf}', md={md})".format( type=str(self.__class__), alg=self.algorithm, pf=self.pf, md=md_str) return processor_str def __init__(self, alg="LeastSquares", pf="RFdeconProcessor.pf"): self.algorithm = alg self.pf = pf pfhandle = AntelopePf(pf) if self.algorithm == "LeastSquares": self.md = pfhandle.get_branch("LeastSquare") self.__uses_noise = False elif alg == "WaterLevel": self.md = pfhandle.get_branch("WaterLevel") self.__uses_noise = False elif alg == "MultiTaperXcor": self.md = pfhandle.get_branch("MultiTaperXcor") self.__uses_noise = True elif alg == "MultiTaperSpecDiv": self.md = pfhandle.get_branch("MultiTaperSpecDiv") self.__uses_noise = True elif alg == "GeneralizedIterative": raise RuntimeError( "Generalized Iterative method not yet supported") else: raise RuntimeError("Illegal value for alg=" + alg) # below is needed because AntelopePf cannot be serialized. self.md = Metadata(self.md) def loaddata(self, d, dtype="Seismogram", component=0, window=False): """ Loads data for processing. When window is set true use the internal pf definition of data time window and window the data. The dtype parameter changes the behavior of this algorithm significantly depending on the setting. It can be one of the following: Seismogram, TimeSeries, or raw_vector. For the first two the data to process will be extracted in a pf specfied window if window is True. If window is False TimeSeries data will be passed directly and Seismogram data will have the data defined by the component parameter copied to the internal data vector workspace. If dtype is set to raw_vector d is assumed to be a raw numpy vector of doubles or an the aliased std::vector used in ccore, for example, in the TimeSeries object s vector. Setting dtype to raw_vector and window True will result in this method throwing a RuntimeError exception as the combination is not possible since raw_vector data have no time base. :param d: input data (contents expected depend upon value of dtype parameter). :param dtype: string defining the form d is expected to be (see details above) :param component: component of Seismogram data to load as data vector. Ignored if dtype is raw_vector or TimeSeries. :param window: boolean controlling internally defined windowing. (see details above) :return: Nothing (not None nothing) is returned """ # First basic sanity checks if dtype == "raw_vector" and window: raise RuntimeError( "RFdeconProcessor.loaddata: " + "Illegal argument combination\nwindow cannot be true with raw_vector input" ) if not (dtype == "Seismogram" or dtype == "TimeSeries" or dtype == "raw_vector"): raise RuntimeError("RFdeconProcessor.loaddata: " + " Illegal dtype parameter=" + dtype) dvector = [] if window: if dtype == "Seismogram": ts = ExtractComponent(d, component) ts = WindowData(ts, self.dwin.start, self.dwin.end) dvector = ts.data elif dtype == "TimeSeries": ts = WindowData(d, self.dwin.start, self.dwin.end) dvector = ts.data else: dvector = d else: if dtype == "Seismogram": ts = ExtractComponent(d, component) dvector = ts.data elif dtype == "TimeSeries": dvector = ts.data else: dvector = d # Have to explicitly convert to ndarray because DoubleVector cannot be serialized. self.dvector = np.array(dvector) def loadwavelet(self, w, dtype="Seismogram", component=2, window=False): # This code is painfully similar to loaddata. To reduce errors # only the names have been changed to protect the innocent if dtype == "raw_vector" and window: raise RuntimeError( "RFdeconProcessor.loadwavelet: " + "Illegal argument combination\nwindow cannot be true with raw_vector input" ) if not (dtype == "Seismogram" or dtype == "TimeSeries" or dtype == "raw_vector"): raise RuntimeError("RFdeconProcessor.loadwavelet: " + " Illegal dtype parameter=" + dtype) wvector = [] if window: if dtype == "Seismogram": ts = ExtractComponent(w, component) ts = WindowData(ts, self.dwin.start, self.dwin.end) wvector = ts.data elif dtype == "TimeSeries": ts = WindowData(w, self.dwin.start, self.dwin.end) wvector = ts.data else: wvector = w else: if dtype == "Seismogram": ts = ExtractComponent(w, component) wvector = ts.data elif dtype == "TimeSeries": wvector = ts.data else: wvector = w # Have to explicitly convert to ndarray because DoubleVector cannot be serialized. self.wvector = np.array(wvector) def loadnoise(self, n, dtype="Seismogram", component=2, window=False): # First basic sanity checks # Return immediately for methods that ignore noise. # Note we do this silenetly assuming the function wrapper below # will post an error to elog for the output to handle this nonfatal error if self.algorithm == "LeastSquares" or self.algorithm == "WaterLevel": return if dtype == "raw_vector" and window: raise RuntimeError( "RFdeconProcessor.loadnoise: " + "Illegal argument combination\nwindow cannot be true with raw_vector input" ) if not (dtype == "Seismogram" or dtype == "TimeSeries" or dtype == "raw_vector"): raise RuntimeError("RFdeconProcessor.loadnoise: " + " Illegal dtype parameter=" + dtype) nvector = [] # IMPORTANT these two parameters are not required by the # ScalarDecon C code but need to be inserted in pf for any algorithm # that requires noise data (i.e. multitaper) and the window # options is desired if window: tws = self.md.get_double("noise_window_start") twe = self.md.get_double("noise_window_end") if dtype == "Seismogram": ts = ExtractComponent(n, component) ts = WindowData(ts, tws, twe) nvector = ts.data elif dtype == "TimeSeries": ts = WindowData(n, tws, twe) nvector = ts.data else: nvector = n else: if dtype == "Seismogram": ts = ExtractComponent(n, component) nvector = ts.data elif dtype == "TimeSeries": nvector = ts.data else: nvector = n # Have to explicitly convert to ndarray because DoubleVector cannot be serialized. self.nvector = np.array(nvector) def apply(self): """ Compute the RF estimate using the algorithm defined internally. :return: vector of data that are the RF estimate computed from previously loaded data. """ if self.algorithm == "LeastSquares": processor = LeastSquareDecon(self.md) elif self.algorithm == "WaterLevel": processor = WaterLevelDecon(self.md) elif self.algorithm == "MultiTaperXcor": processor = MultiTaperXcorDecon(self.md) elif self.algorithm == "MultiTaperSpecDiv": processor = MultiTaperSpecDivDecon(self.md) if hasattr(self, "dvector"): processor.loaddata(DoubleVector(self.dvector)) if hasattr(self, "wvector"): processor.loadwavelet(DoubleVector(self.wvector)) if self.__uses_noise and hasattr(self, "nvector"): processor.loadnoise(DoubleVector(self.nvector)) processor.process() return processor.getresult() def actual_output(self): """ The actual output of a decon operator is the inverse filter applied to the wavelet. By design it is an approximation of the shaping wavelet defined for this operator. :return: Actual output of the operator as a ccore.TimeSeries object. The Metadata of the return is bare bones. The most important factor about this result is that because actual output waveforms are normally a zero phase wavelet of some kind the result is time shifted to be centered (i.e. t0 is rounded n/2 where n is the length of the vector returned). """ if self.algorithm == "LeastSquares": processor = LeastSquareDecon(self.md) elif self.algorithm == "WaterLevel": processor = WaterLevelDecon(self.md) elif self.algorithm == "MultiTaperXcor": processor = MultiTaperXcorDecon(self.md) elif self.algorithm == "MultiTaperSpecDiv": processor = MultiTaperSpecDivDecon(self.md) if hasattr(self, "dvector"): processor.loaddata(DoubleVector(self.dvector)) if hasattr(self, "wvector"): processor.loadwavelet(DoubleVector(self.wvector)) if self.__uses_noise and hasattr(self, "nvector"): processor.loadnoise(DoubleVector(self.nvector)) return processor.actual_output() def ideal_output(self): """ The ideal output of a decon operator is the same thing we call a shaping wavelet. This method returns the ideal output=shaping wavelet as a TimeSeries object. Like the actual output method the return function is circular shifted so the function peaks at 0 time located at n/2 samples from the start sample. Graphic displays will then show the wavelet centered and peaked at time 0. The prediction error can be computed as the difference between the actual_output and ideal_output TimeSeries objects. The norm of the prediction error is a helpful metric to display the stability and accuracy of the inverse. """ if self.algorithm == "LeastSquares": processor = LeastSquareDecon(self.md) elif self.algorithm == "WaterLevel": processor = WaterLevelDecon(self.md) elif self.algorithm == "MultiTaperXcor": processor = MultiTaperXcorDecon(self.md) elif self.algorithm == "MultiTaperSpecDiv": processor = MultiTaperSpecDivDecon(self.md) if hasattr(self, "dvector"): processor.loaddata(DoubleVector(self.dvector)) if hasattr(self, "wvector"): processor.loadwavelet(DoubleVector(self.wvector)) if self.__uses_noise and hasattr(self, "nvector"): processor.loadnoise(DoubleVector(self.nvector)) return processor.ideal_output() def inverse_filter(self): """ This method returns the actual inverse filter that if convolved with he original data will produce the RF estimate. Note the filter is meaningful only if the source wavelet is minimum phase. A standard theorem from time series analysis shows that the inverse of mixed phase wavelet is usually unstable and a maximum phase wavelet is always unstable. Fourier-based methods can still compute a stable solution even with a mixed phase wavelet because of the implied circular convolution. The result is returned as TimeSeries object. """ if self.algorithm == "LeastSquares": processor = LeastSquareDecon(self.md) elif self.algorithm == "WaterLevel": processor = WaterLevelDecon(self.md) elif self.algorithm == "MultiTaperXcor": processor = MultiTaperXcorDecon(self.md) elif self.algorithm == "MultiTaperSpecDiv": processor = MultiTaperSpecDivDecon(self.md) if hasattr(self, "dvector"): processor.loaddata(DoubleVector(self.dvector)) if hasattr(self, "wvector"): processor.loadwavelet(DoubleVector(self.wvector)) if self.__uses_noise and hasattr(self, "nvector"): processor.loadnoise(DoubleVector(self.nvector)) return processor.inverse_filter() def QCMetrics(self): """ All decon algorithms compute a set of algorithm dependent quality control metrics. This method returns the metrics as a set of fixed name:value pairs in a mspass.Metadata object. The details are algorithm dependent. See related documentation for metrics computed by different algorithms. """ if self.algorithm == "LeastSquares": processor = LeastSquareDecon(self.md) elif self.algorithm == "WaterLevel": processor = WaterLevelDecon(self.md) elif self.algorithm == "MultiTaperXcor": processor = MultiTaperXcorDecon(self.md) elif self.algorithm == "MultiTaperSpecDiv": processor = MultiTaperSpecDivDecon(self.md) if hasattr(self, "dvector"): processor.loaddata(DoubleVector(self.dvector)) if hasattr(self, "wvector"): processor.loadwavelet(DoubleVector(self.wvector)) if self.__uses_noise and hasattr(self, "nvector"): processor.loadnoise(DoubleVector(self.nvector)) return processor.QCMetrics() def change_parameters(self, md): """ Use this method to change the internal parameter setting of the processor. It can be used, for example, to switch from the damped least square method to the water level method. Note the input must be a complete definition for a parameter set defining a particular algorithm. i.e. this is not an update method but t reinitializes the processor. :param md: is a mspass.Metadata object containing required parameters for the alternative algorithm. """ self.md = Metadata(md) @property def uses_noise(self): return self.__uses_noise @property def dwin(self): tws = self.md.get_double("deconvolution_data_window_start") twe = self.md.get_double("deconvolution_data_window_end") return TimeWindow(tws, twe) @property def nwin(self): if self.__uses_noise: tws = self.md.get_double("noise_window_start") twe = self.md.get_double("noise_window_end") return TimeWindow(tws, twe) else: return TimeWindow # always initialize even if not used
def load_one_ensemble( doc, create_history=False, jobname="Default job", jobid="99999", algid="99999", ensemble_mdkeys=[], # default is to load nothing for ensemble apply_calib=False, verbose=False, ): """ This function can be used to load a full ensemble indexed in the collection import_miniseed_ensemble. It uses a large memory model that eat up the entire file using obspy's miniseed reader. It contains some relics of early ideas of potentially having the function utilize the history mechanism. Those may not work, but were retained. :param doc: is one record in the import_miniseed_ensemble collection :param create_history: if true each member of the ensemble will be defined in the history chain as an origin and jobname and jobid will be be used to construct the ProcessingHistory object. :param jobname: as used in ProcessingHistory (default "Default job") :param jobid: as used in processingHistory :param algid: as used in processingHistory :param ensemble_mdkeys: list of keys to copy from first member to ensemble Metadata (no type checking is done) :param apply_calib: if True tells obspy's reader to apply the calibration factor to convert the data to ground motion units. Default is false. :param verbose: write informational messages while processing """ try: ensemblemd = Metadata() if create_history: his = ProcessingHistory(jobname, jobid) form = doc["format"] mover = doc["mover"] if form != "mseed": raise MsPASSError( "Cannot handle this ensemble - ensemble format=" + form + "\nCan only be mseed for this reader" ) if mover != "obspy_seed_ensemble_reader": raise MsPASSError( "Cannot handle this ensemble - ensemble mover parameter=" + mover + " which is not supported" ) dir = doc["dir"] dfile = doc["dfile"] fname = dir + "/" + dfile # Note this algorithm actually should work with any format # supported by obspy's read function - should generalize it for release dseis = read(fname, format="mseed", apply_calib=apply_calib) if len(ensemble_mdkeys) > 0: ensemblemd = _load_md(doc, ensemble_mdkeys) else: # default is to load everything != members members_key = "members" for k in doc: if k != members_key: x = doc[k] ensemblemd[k] = x # There is a Stream2TimeSeriesEnsemble function # but we don't use it here because we need some functionality # not found in that simple function nseis = len(dseis) result = TimeSeriesEnsemble(ensemblemd, nseis) # Secondary files get handled almost the same except for # a warning. The warning message (hopefully) explains the # problem but our documentation must warn about his if this # prototype algorithm becomes the release version count = 0 for d in dseis: # print('debug - working on data object number',count) count += 1 dts = Trace2TimeSeries(d) if create_history: # This should just define jobname and jobid dts.load_history(his) seedid = d["seed_file_id"] dts.set_as_origin( "load_ensemble", algid, seedid, AtomicType.TIMESERIES, True ) result.member.append(dts) return result except: print("something threw an exception - needs more complete error handlers")
def test_CoreSeismogram(): md = Metadata() md['delta'] = 0.01 md['starttime'] = 0.0 md['npts'] = 100 # test metadata constructor md['tmatrix'] = np.random.rand(3, 3) cseis = _CoreSeismogram(md, False) assert (cseis.transformation_matrix == md['tmatrix']).all() md['tmatrix'] = dmatrix(np.random.rand(3, 3)) cseis = _CoreSeismogram(md, False) assert (cseis.transformation_matrix == md['tmatrix']).all() md['tmatrix'] = np.random.rand(9) cseis = _CoreSeismogram(md, False) assert (cseis.transformation_matrix == md['tmatrix'].reshape(3, 3)).all() md['tmatrix'] = np.random.rand(1, 9) cseis = _CoreSeismogram(md, False) assert (cseis.transformation_matrix == md['tmatrix'].reshape(3, 3)).all() md['tmatrix'] = np.random.rand(9, 1) cseis = _CoreSeismogram(md, False) assert (cseis.transformation_matrix == md['tmatrix'].reshape(3, 3)).all() md['tmatrix'] = np.random.rand(3, 3).tolist() cseis = _CoreSeismogram(md, False) assert np.isclose(cseis.transformation_matrix, np.array(md['tmatrix']).reshape(3, 3)).all() md['tmatrix'] = np.random.rand(9).tolist() cseis = _CoreSeismogram(md, False) assert np.isclose(cseis.transformation_matrix, np.array(md['tmatrix']).reshape(3, 3)).all() # test whether the setter of transformation_matrix updates metadata correctly tm = np.random.rand(1, 9) cseis.transformation_matrix = tm assert (cseis.transformation_matrix == tm.reshape(3, 3)).all() assert np.isclose(cseis.transformation_matrix, np.array(cseis['tmatrix']).reshape(3, 3)).all() tm = np.random.rand(9).tolist() cseis.transformation_matrix = tm assert np.isclose(cseis.transformation_matrix, np.array(tm).reshape(3, 3)).all() assert np.isclose(cseis.transformation_matrix, np.array(cseis['tmatrix']).reshape(3, 3)).all() # test exceptions md['tmatrix'] = np.random.rand(4, 2) with pytest.raises(MsPASSError, match="should be a 3x3 matrix"): _CoreSeismogram(md, False) md['tmatrix'] = dmatrix(np.random.rand(2, 4)) with pytest.raises(MsPASSError, match="should be a 3x3 matrix"): _CoreSeismogram(md, False) md['tmatrix'] = 42 with pytest.raises(MsPASSError, match="not recognized"): _CoreSeismogram(md, False) md.erase('tmatrix') with pytest.raises(MsPASSError, match="Error trying to extract"): _CoreSeismogram(md, False) md['tmatrix'] = {4: 2} with pytest.raises(MsPASSError, match="type is not recognized"): _CoreSeismogram(md, False) md['tmatrix'] = np.random.rand(9).tolist() md['tmatrix'][3] = 'str' with pytest.raises(MsPASSError, match="should be float"): _CoreSeismogram(md, False) md['tmatrix'] = np.random.rand(3, 4).tolist() with pytest.raises(MsPASSError, match="should be a 3x3 list of list"): _CoreSeismogram(md, False) md['tmatrix'] = [1, 2, 3] with pytest.raises(MsPASSError, match="should be a 3x3 list of list"): _CoreSeismogram(md, False) md['tmatrix'] = np.random.rand(2, 2).tolist() with pytest.raises( MsPASSError, match="should be a list of 9 floats or a 3x3 list of list"): _CoreSeismogram(md, False) md['tmatrix'] = np.random.rand(3, 3).tolist() md['tmatrix'][1][1] = 'str' with pytest.raises(MsPASSError, match="should be float"): _CoreSeismogram(md, False)
def test_Metadata(): md = Metadata() assert repr(md) == 'Metadata({})' dic = {1: 1} md.put('dict', dic) val = md.get('dict') val[2] = 2 del val dic[3] = 3 del dic md['dict'][4] = 4 assert md['dict'] == {1: 1, 2: 2, 3: 3, 4: 4} md = Metadata({'array': np.array([3, 4])}) md['dict'] = {1: 1, 2: 2} md['str\'i"ng'] = 'str\'i"ng' md["str'ing"] = "str'ing" md['double'] = 3.14 md['bool'] = True md['int'] = 7 md["string"] = "str\0ing" md["string"] = "str\ning" md["str\ting"] = "str\ting" md["str\0ing"] = "str\0ing" md["str\\0ing"] = "str\\0ing" md_copy = pickle.loads(pickle.dumps(md)) for i in md: if i == 'array': assert (md[i] == md_copy[i]).all() else: assert md[i] == md_copy[i] md = Metadata({ "<class 'numpy.ndarray'>": np.array([3, 4]), "<class 'dict'>": { 1: 1, 2: 2 }, 'string': 'string', 'double': 3.14, 'bool': True, 'long': 7, "<class 'bytes'>": b'\xba\xd0\xba\xd0', "<class 'NoneType'>": None }) for i in md: assert md.type(i) == i md[b'\xba\xd0'] = b'\xba\xd0' md_copy = pickle.loads(pickle.dumps(md)) for i in md: if i == "<class 'numpy.ndarray'>": assert (md[i] == md_copy[i]).all() else: assert md[i] == md_copy[i] del md["<class 'numpy.ndarray'>"] md_copy.erase("<class 'numpy.ndarray'>") assert not "<class 'numpy.ndarray'>" in md assert not "<class 'numpy.ndarray'>" in md_copy assert md.keys() == md_copy.keys() with pytest.raises(TypeError, match='Metadata'): reversed(md) md = Metadata({1: 1, 3: 3}) md_copy = Metadata({2: 2, 3: 30}) md += md_copy assert md.__repr__() == "Metadata({'1': 1, '2': 2, '3': 30})"
def post_ensemble_metadata(ens, keys=[], check_all_members=False, clean_members=False): """ It may be necessary to call this function after conversion from an obspy Stream to one of the mspass Ensemble classes. This function is necessary because a mspass Ensemble has a concept not part of the obspy Stream object. That is, mspass ensembles have a global Metadata container. That container is expected to contain Metadata common to all members of the ensemble. For example, for data from a single earthquake it would be sensible to post the source location information in the ensemble metadata container rather than having duplicates in each member. Two different approaches can be used to do this copy. The faster, but least reliable method is to simply copy the values from the first member of the ensemble. That approach is enabled by default. It is completely reliable when used after a conversion from an obspy Stream but ONLY if the data began life as a mspass ensemble with exactly the same keys set as global. The type example of that is after an obspy algorithm is applied to a mspass ensemble via the mspass decorators. A more cautious algorithm can be enabled by setting check_all_members True. In that mode the list of keys received is tested with a not equal test for against each member. Note we do not do anything fancy with floating point data to allow for finite precision. The reason is Metadata float values are normally expected to be constant data. In that case an != test will yield false when the comparison is between two copies. The not equal test may fail, however, if used with computed floating point numbers. An example where that is possible would be spatial gathers like PP data assembled by midpoint coordinates. If you need to build gathers in such a context we recommend you use an integer image point tied to a specialized document collection in MongoDB that defines the geometry of that point. There may be other examples, but the point is don't trust computed floating point values to work. It will also not work if the values of a key-value pair don't support an != comparison. That could be common if the value request for copy was a python object. :param ens: ensemble data to be processed. The function will throw a MsPASSError exception of ens is not either a TimeSeriesEnsemble or a SeismogramEnsemble. :param keys: is expected to be a list of metadata keys (required to be strings) that are to be copied from member metadata to ensemble metadata. :param check_all_members: switch controlling method used to extract metadata that is to be copied (see above for details). Default is False :param clean_members: when true data copied to ensemble metadata will be removed from all members. This option is only allowed if check_all_members is set True. It will be silently ignored if check_all_members is False. """ alg = "post_ensemble_metadata" if isinstance(ens, TimeSeriesEnsemble) or isinstance( ens, SeismogramEnsemble): md = Metadata() for d in ens.member: if d.live: for k in keys: if not k in d: raise MsPASSError( alg + ": no data matching requested key=" + k + " Cannot post to ensemble", "Invalid", ) md[k] = d[k] if check_all_members: for d in ens.member: for k in keys: if not _all_members_match(ens, k): raise MsPASSError( alg + ": Data mismatch data members with key=" + k + "\n In check_all_members mode all values associated with this key must match", "Invalid", ) if clean_members: for d in ens.member: for k in keys: d.erase(k) ens.update_metadata(md) else: raise MsPASSError( alg + ": Illegal data received. This function runs only on mspass ensemble objects", "Invalid", )