Python Metadata Examples, mspasspy.ccore.utility.Metadata Python Examples

Example #1

0

Show file

def test_Ensemble(Ensemble):
    md = Metadata()
    md['double'] = 3.14
    md['bool'] = True
    md['long'] = 7
    es = Ensemble(md, 3)
    if isinstance(es, TimeSeriesEnsemble):
        d = TimeSeries(10)
        d = make_constant_data_ts(d)
        es.member.append(d)
        es.member.append(d)
        es.member.append(d)
    else:
        d = Seismogram(10)
        d = make_constant_data_seis(d)
        es.member.append(d)
        es.member.append(d)
        es.member.append(d)
    es.sync_metadata(['double', 'long'])
    assert es.member[0].is_defined('bool')
    assert es.member[0]['bool'] == True
    assert not es.member[0].is_defined('double')
    assert not es.member[0].is_defined('long')
    es.sync_metadata()
    assert es.member[1].is_defined('double')
    assert es.member[1].is_defined('long')
    assert es.member[1]['double'] == 3.14
    assert es.member[1]['long'] == 7
    es.update_metadata(Metadata({'k': 'v'}))
    assert es['k'] == 'v'

Example #2

0

Show file

def _load_md(rec, keys):
    """
    Helper for load ensemble.   Extracts metadata defined by keys list and
    posts to a Metadata container that is returned.
    """
    # do this stupid for now without error handlers
    md = Metadata()
    for k in keys:
        x = rec[k]
        md.put(k, x)
    return md

Example #3

0

Show file

    def change_parameters(self, md):
        """
        Use this method to change the internal parameter setting of the
        processor.  It can be used, for example, to switch from the damped
        least square method to the water level method.  Note the input
        must be a complete definition for a parameter set defining a
        particular algorithm.  i.e. this is not an update method but
        t reinitializes the processor.

        :param md: is a mspass.Metadata object containing required parameters
        for the alternative algorithm.
        """
        self.md = Metadata(md)

Example #4

0

Show file

 def __init__(self, alg="LeastSquares", pf="RFdeconProcessor.pf"):
     self.algorithm = alg
     self.pf = pf
     pfhandle = AntelopePf(pf)
     if self.algorithm == "LeastSquares":
         self.md = pfhandle.get_branch("LeastSquare")
         self.__uses_noise = False
     elif alg == "WaterLevel":
         self.md = pfhandle.get_branch("WaterLevel")
         self.__uses_noise = False
     elif alg == "MultiTaperXcor":
         self.md = pfhandle.get_branch("MultiTaperXcor")
         self.__uses_noise = True
     elif alg == "MultiTaperSpecDiv":
         self.md = pfhandle.get_branch("MultiTaperSpecDiv")
         self.__uses_noise = True
     elif alg == "GeneralizedIterative":
         raise RuntimeError(
             "Generalized Iterative method not yet supported")
     else:
         raise RuntimeError("Illegal value for alg=" + alg)
     # below is needed because AntelopePf cannot be serialized.
     self.md = Metadata(self.md)

Example #5

0

Show file

def dict2Metadata(dic):
    """
    Function to convert Python dict data to Metadata.

    pymongo returns a Python dict container from find queries to any collection.
    Simple type in returned documents can be converted to Metadata
    that are used as headers in the C++ components of mspass.

    :param dict: Python dict to convert
    :type dict: dict
    :return: Metadata object translated from d
    :rtype: :class:`~mspasspy.ccore.Metadata`
    """
    return Metadata(dic)

Example #6

0

Show file

def test_Ensemble(Ensemble):
    md = Metadata()
    md["double"] = 3.14
    md["bool"] = True
    md["long"] = 7
    es = Ensemble(md, 3)
    if isinstance(es, TimeSeriesEnsemble):
        d = TimeSeries(10)
        d = make_constant_data_ts(d)
        es.member.append(d)
        es.member.append(d)
        es.member.append(d)
    else:
        d = Seismogram(10)
        d = make_constant_data_seis(d)
        es.member.append(d)
        es.member.append(d)
        es.member.append(d)
    es.set_live(
    )  # new method for LoggingEnsemble needed because default is dead
    es.sync_metadata(["double", "long"])
    assert es.member[0].is_defined("bool")
    assert es.member[0]["bool"] == True
    assert not es.member[0].is_defined("double")
    assert not es.member[0].is_defined("long")
    es.sync_metadata()
    assert es.member[1].is_defined("double")
    assert es.member[1].is_defined("long")
    assert es.member[1]["double"] == 3.14
    assert es.member[1]["long"] == 7
    es.update_metadata(Metadata({"k": "v"}))
    assert es["k"] == "v"
    # From here on we test features not in CoreEnsemble but only in
    # LoggingEnsemble.   Note that we use pybind11 aliasing to
    # define TimeSeriesEnsemble == LoggingEnsemble<TimeSeries> and
    # SeismogramEnsemble == LoggingEnsemble<Seismogram>.
    # Should be initially marked live
    assert es.live()
    es.elog.log_error("test_ensemble", "test complaint",
                      ErrorSeverity.Complaint)
    es.elog.log_error("test_ensemble", "test invalid", ErrorSeverity.Invalid)
    assert es.elog.size() == 2
    assert es.live()
    es.kill()
    assert es.dead()
    # resurrect es
    es.set_live()
    assert es.live()
    # validate checks for for any live members - this tests that feature
    assert es.validate()
    # need this temporary copy for the next test_
    if isinstance(es, TimeSeriesEnsemble):
        escopy = TimeSeriesEnsemble(es)
    else:
        escopy = SeismogramEnsemble(es)
    for d in escopy.member:
        d.kill()
    assert not escopy.validate()
    # Reuse escopy for pickle test
    escopy = pickle.loads(pickle.dumps(es))
    assert escopy.is_defined("bool")
    assert escopy["bool"] == True
    assert escopy.is_defined("double")
    assert escopy.is_defined("long")
    assert escopy["double"] == 3.14
    assert escopy["long"] == 7
    assert escopy.live()
    assert escopy.elog.size() == 2
    assert escopy.member[0].is_defined("bool")
    assert escopy.member[0]["bool"] == True
    assert escopy.member[0].is_defined("double")
    assert escopy.member[0].is_defined("long")
    assert es.member[1].is_defined("double")
    assert es.member[1].is_defined("long")
    assert es.member[1]["double"] == 3.14
    assert es.member[1]["long"] == 7
    if isinstance(es, TimeSeriesEnsemble):
        assert es.member[1].data == escopy.member[1].data
    else:
        assert (es.member[1].data[:] == escopy.member[1].data[:]).all()

Example #7

0

Show file

def test_CoreSeismogram():
    md = Metadata()
    md["delta"] = 0.01
    md["starttime"] = 0.0
    md["npts"] = 100
    # test metadata constructor
    md["tmatrix"] = np.random.rand(3, 3)
    cseis = _CoreSeismogram(md, False)
    assert (cseis.tmatrix == md["tmatrix"]).all()
    md["tmatrix"] = dmatrix(np.random.rand(3, 3))
    cseis = _CoreSeismogram(md, False)
    assert (cseis.tmatrix == md["tmatrix"]).all()
    md["tmatrix"] = np.random.rand(9)
    cseis = _CoreSeismogram(md, False)
    assert (cseis.tmatrix == md["tmatrix"].reshape(3, 3)).all()
    md["tmatrix"] = np.random.rand(1, 9)
    cseis = _CoreSeismogram(md, False)
    assert (cseis.tmatrix == md["tmatrix"].reshape(3, 3)).all()
    md["tmatrix"] = np.random.rand(9, 1)
    cseis = _CoreSeismogram(md, False)
    assert (cseis.tmatrix == md["tmatrix"].reshape(3, 3)).all()

    md["tmatrix"] = np.random.rand(3, 3).tolist()
    cseis = _CoreSeismogram(md, False)
    assert np.isclose(cseis.tmatrix,
                      np.array(md["tmatrix"]).reshape(3, 3)).all()
    md["tmatrix"] = np.random.rand(9).tolist()
    cseis = _CoreSeismogram(md, False)
    assert np.isclose(cseis.tmatrix,
                      np.array(md["tmatrix"]).reshape(3, 3)).all()

    # test whether the setter of tmatrix updates metadata correctly
    tm = np.random.rand(1, 9)
    cseis.tmatrix = tm
    assert (cseis.tmatrix == tm.reshape(3, 3)).all()
    assert np.isclose(cseis.tmatrix,
                      np.array(cseis["tmatrix"]).reshape(3, 3)).all()
    tm = np.random.rand(9).tolist()
    cseis.tmatrix = tm
    assert np.isclose(cseis.tmatrix, np.array(tm).reshape(3, 3)).all()
    assert np.isclose(cseis.tmatrix,
                      np.array(cseis["tmatrix"]).reshape(3, 3)).all()

    # test exceptions
    md["tmatrix"] = np.random.rand(4, 2)
    with pytest.raises(MsPASSError, match="should be a 3x3 matrix"):
        _CoreSeismogram(md, False)
    md["tmatrix"] = dmatrix(np.random.rand(2, 4))
    with pytest.raises(MsPASSError, match="should be a 3x3 matrix"):
        _CoreSeismogram(md, False)
    md["tmatrix"] = 42
    with pytest.raises(MsPASSError, match="not recognized"):
        _CoreSeismogram(md, False)
    md.erase("tmatrix")
    # tmatrix not defined is taken to default to tmatrix being an identity
    # matrix.  We test that condition here
    cseis = _CoreSeismogram(md, False)
    assert np.isclose(cseis.tmatrix, np.eye(3)).all()
    md["tmatrix"] = {4: 2}
    with pytest.raises(MsPASSError, match="type is not recognized"):
        _CoreSeismogram(md, False)

    md["tmatrix"] = np.random.rand(9).tolist()
    md["tmatrix"][3] = "str"
    with pytest.raises(MsPASSError, match="should be float"):
        _CoreSeismogram(md, False)
    md["tmatrix"] = np.random.rand(3, 4).tolist()
    with pytest.raises(MsPASSError, match="should be a 3x3 list of list"):
        _CoreSeismogram(md, False)
    md["tmatrix"] = [1, 2, 3]
    with pytest.raises(MsPASSError, match="should be a 3x3 list of list"):
        _CoreSeismogram(md, False)
    md["tmatrix"] = np.random.rand(2, 2).tolist()
    with pytest.raises(
            MsPASSError,
            match="should be a list of 9 floats or a 3x3 list of list"):
        _CoreSeismogram(md, False)
    md["tmatrix"] = np.random.rand(3, 3).tolist()
    md["tmatrix"][1][1] = "str"
    with pytest.raises(MsPASSError, match="should be float"):
        _CoreSeismogram(md, False)

Example #8

0

Show file

def test_Metadata():
    md = Metadata()
    assert repr(md) == "Metadata({})"
    dic = {1: 1}
    md.put("dict", dic)
    val = md.get("dict")
    val[2] = 2
    del val
    dic[3] = 3
    del dic
    md["dict"][4] = 4
    assert md["dict"] == {1: 1, 2: 2, 3: 3, 4: 4}

    md = Metadata({"array": np.array([3, 4])})
    md["dict"] = {1: 1, 2: 2}
    md["str'i\"ng"] = "str'i\"ng"
    md["str'ing"] = "str'ing"
    md["double"] = 3.14
    md["bool"] = True
    md["int"] = 7
    md["string"] = "str\0ing"
    md["string"] = "str\ning"
    md["str\ting"] = "str\ting"
    md["str\0ing"] = "str\0ing"
    md["str\\0ing"] = "str\\0ing"
    md_copy = pickle.loads(pickle.dumps(md))
    for i in md:
        if i == "array":
            assert (md[i] == md_copy[i]).all()
        else:
            assert md[i] == md_copy[i]
    md_copy2 = Metadata(dict(md))
    assert not md_copy2.modified()
    assert md.modified() == md_copy.modified()

    md = Metadata({
        "<class 'numpy.ndarray'>": np.array([3, 4]),
        "<class 'dict'>": {
            1: 1,
            2: 2
        },
        "string": "string",
        "double": 3.14,
        "bool": True,
        "long": 7,
        "<class 'bytes'>": b"\xba\xd0\xba\xd0",
        "<class 'NoneType'>": None,
    })
    for i in md:
        assert md.type(i) == i

    md[b"\xba\xd0"] = b"\xba\xd0"
    md_copy = pickle.loads(pickle.dumps(md))
    for i in md:
        if i == "<class 'numpy.ndarray'>":
            assert (md[i] == md_copy[i]).all()
        else:
            assert md[i] == md_copy[i]

    del md["<class 'numpy.ndarray'>"]
    md_copy.erase("<class 'numpy.ndarray'>")
    assert not "<class 'numpy.ndarray'>" in md
    assert not "<class 'numpy.ndarray'>" in md_copy
    assert md.keys() == md_copy.keys()

    with pytest.raises(TypeError, match="Metadata"):
        reversed(md)

    md = Metadata({1: 1, 3: 3})
    md_copy = Metadata({2: 2, 3: 30})
    md += md_copy
    assert md.__repr__() == "Metadata({'1': 1, '2': 2, '3': 30})"

    # Test with real data
    dic = {
        "_format": "MSEED",
        "arrival.time": 1356901212.242550,
        "calib": 1.000000,
        "chan": "BHZ",
        "delta": 0.025000,
        "deltim": -1.000000,
        "endtime": 1356904168.544538,
        "iphase": "P",
        "loc": "",
        "mseed": {
            "dataquality": "D",
            "number_of_records": 36,
            "encoding": "STEIM2",
            "byteorder": ">",
            "record_length": 4096,
            "filesize": 726344704,
        },
        "net": "CI",
        "npts": 144000,
        "phase": "P",
        "sampling_rate": 40.000000,
        "site.elev": 0.258000,
        "site.lat": 35.126900,
        "site.lon": -118.830090,
        "site_id": "5fb6a67b37f8eef2f0658e9a",
        "sta": "ARV",
        "starttime": 1356900568.569538,
    }
    md = Metadata(dic)
    md["mod"] = "mod"
    md_copy = pickle.loads(pickle.dumps(md))
    for i in md:
        assert md[i] == md_copy[i]
    assert md.modified() == md_copy.modified()

Example #9

0

Show file

def setup_function(function):
    ts_size = 255
    sampling_rate = 20.0

    function.dict1 = {
        "network": "IU",
        "station": "ANMO",
        "starttime": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000),
        "npts": ts_size,
        "sampling_rate": sampling_rate,
        "channel": "BHE",
        "live": True,
        "_id": bson.objectid.ObjectId(),
        "jdate": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000),
        "date_str": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000),
        "not_defined_date": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59,
                                              915000),
    }
    function.dict2 = {
        "network": "IU",
        "station": "ANMO",
        "starttime": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000),
        "npts": ts_size,
        "sampling_rate": sampling_rate,
        "channel": "BHN",
    }
    function.dict3 = {
        "network": "IU",
        "station": "ANMO",
        "starttime": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000),
        "npts": ts_size,
        "sampling_rate": sampling_rate,
        "channel": "BHZ",
    }
    function.tr1 = obspy.Trace(data=np.random.randint(0, 1000, ts_size),
                               header=function.dict1)
    function.tr2 = obspy.Trace(data=np.random.randint(0, 1000, ts_size),
                               header=function.dict2)
    function.tr3 = obspy.Trace(data=np.random.randint(0, 1000, ts_size),
                               header=function.dict3)
    function.stream = obspy.Stream(
        traces=[function.tr1, function.tr2, function.tr3])

    function.md1 = Metadata()
    function.md1.put("network", "IU")
    function.md1.put("npts", ts_size)
    function.md1.put("sampling_rate", sampling_rate)
    function.md1.put("live", True)

    function.ts1 = TimeSeries()
    function.ts1.data = DoubleVector(np.random.rand(ts_size))
    function.ts1.live = True
    function.ts1.dt = 1 / sampling_rate
    function.ts1.t0 = 0
    function.ts1.npts = ts_size
    # TODO: need to bind the constructor that can do TimeSeries(md1)
    function.ts1.put("net", "IU")
    function.ts1.put("npts", ts_size)
    function.ts1.put("sampling_rate", sampling_rate)

    function.seismogram = Seismogram()
    # TODO: the default of seismogram.tref is UTC which is inconsistent with the default
    # for TimeSeries()
    # TODO: It would be nice to have dmatrix support numpy.ndarray as input
    function.seismogram.data = dmatrix(3, ts_size)
    for i in range(3):
        for j in range(ts_size):
            function.seismogram.data[i, j] = np.random.rand()

    function.seismogram.live = True
    function.seismogram.dt = 1 / sampling_rate
    function.seismogram.t0 = 0
    function.seismogram.npts = ts_size
    # FIXME: if the following key is network, the Seismogram2Stream will error out
    # when calling TimeSeries2Trace internally due to the issue when mdef.is_defined(k)
    # returns True but k is an alias, the mdef.type(k) will error out.
    function.seismogram.put("net", "IU")
    function.seismogram.put("npts", ts_size)
    function.seismogram.put("sampling_rate", sampling_rate)

Example #10

0

Show file

class RFdeconProcessor:
    """
    This class is a wrapper for the suite of receiver function deconvolution
    methods we call scalar methods.  That is, the operation is reducable to
    two time series:   wavelet signal and the data (TimeSeries) signal.
    That is in contrast to three component methods that always treat the
    data as vector samples.   The class should be created as a global
    processor object to be used in a spark job.  The design assumes the
    processor object will be passed as an argument to the RFdecon
    function that should appear as a function in a spark map call.
    """
    def __repr__(self) -> str:
        repr_str = "{type}(alg='{alg}', pf='{pf}')".format(type=str(
            self.__class__),
                                                           alg=self.algorithm,
                                                           pf=self.pf)
        return repr_str

    def __str__(self) -> str:
        md_str = str(Metadata2dict(self.md))
        processor_str = "{type}(alg='{alg}', pf='{pf}', md={md})".format(
            type=str(self.__class__),
            alg=self.algorithm,
            pf=self.pf,
            md=md_str)
        return processor_str

    def __init__(self, alg="LeastSquares", pf="RFdeconProcessor.pf"):
        self.algorithm = alg
        self.pf = pf
        pfhandle = AntelopePf(pf)
        if self.algorithm == "LeastSquares":
            self.md = pfhandle.get_branch("LeastSquare")
            self.__uses_noise = False
        elif alg == "WaterLevel":
            self.md = pfhandle.get_branch("WaterLevel")
            self.__uses_noise = False
        elif alg == "MultiTaperXcor":
            self.md = pfhandle.get_branch("MultiTaperXcor")
            self.__uses_noise = True
        elif alg == "MultiTaperSpecDiv":
            self.md = pfhandle.get_branch("MultiTaperSpecDiv")
            self.__uses_noise = True
        elif alg == "GeneralizedIterative":
            raise RuntimeError(
                "Generalized Iterative method not yet supported")
        else:
            raise RuntimeError("Illegal value for alg=" + alg)
        # below is needed because AntelopePf cannot be serialized.
        self.md = Metadata(self.md)

    def loaddata(self, d, dtype="Seismogram", component=0, window=False):
        """
        Loads data for processing.  When window is set true
        use the internal pf definition of data time window
        and window the data.  The dtype parameter changes the
        behavior of this algorithm significantly depending on
        the setting.   It can be one of the following:
        Seismogram, TimeSeries, or raw_vector.   For the first
        two the data to process will be extracted in a
        pf specfied window if window is True.  If window is
        False TimeSeries data will be passed directly and
        Seismogram data will have the data defined by the
        component parameter copied to the internal data
        vector workspace.   If dtype is set to raw_vector
        d is assumed to be a raw numpy vector of doubles or
        an the aliased std::vector used in ccore, for example,
        in the TimeSeries object s vector.  Setting dtype
        to raw_vector and window True will result in this
        method throwing a RuntimeError exception as the
        combination is not possible since raw_vector data
        have no time base.

        :param d: input data (contents expected depend upon
        value of dtype parameter).
        :param dtype: string defining the form d is expected
          to be (see details above)
        :param component: component of Seismogram data to
          load as data vector.  Ignored if dtype is raw_vector
          or TimeSeries.
        :param window: boolean controlling internally
          defined windowing.  (see details above)

        :return:  Nothing (not None nothing) is returned
        """
        # First basic sanity checks
        if dtype == "raw_vector" and window:
            raise RuntimeError(
                "RFdeconProcessor.loaddata:  " +
                "Illegal argument combination\nwindow cannot be true with raw_vector input"
            )
        if not (dtype == "Seismogram" or dtype == "TimeSeries"
                or dtype == "raw_vector"):
            raise RuntimeError("RFdeconProcessor.loaddata:  " +
                               " Illegal dtype parameter=" + dtype)
        dvector = []
        if window:
            if dtype == "Seismogram":
                ts = ExtractComponent(d, component)
                ts = WindowData(ts, self.dwin.start, self.dwin.end)
                dvector = ts.data
            elif dtype == "TimeSeries":
                ts = WindowData(d, self.dwin.start, self.dwin.end)
                dvector = ts.data
            else:
                dvector = d
        else:
            if dtype == "Seismogram":
                ts = ExtractComponent(d, component)
                dvector = ts.data
            elif dtype == "TimeSeries":
                dvector = ts.data
            else:
                dvector = d
        # Have to explicitly convert to ndarray because DoubleVector cannot be serialized.
        self.dvector = np.array(dvector)

    def loadwavelet(self, w, dtype="Seismogram", component=2, window=False):
        # This code is painfully similar to loaddata. To reduce errors
        # only the names have been changed to protect the innocent
        if dtype == "raw_vector" and window:
            raise RuntimeError(
                "RFdeconProcessor.loadwavelet:  " +
                "Illegal argument combination\nwindow cannot be true with raw_vector input"
            )
        if not (dtype == "Seismogram" or dtype == "TimeSeries"
                or dtype == "raw_vector"):
            raise RuntimeError("RFdeconProcessor.loadwavelet:  " +
                               " Illegal dtype parameter=" + dtype)
        wvector = []
        if window:
            if dtype == "Seismogram":
                ts = ExtractComponent(w, component)
                ts = WindowData(ts, self.dwin.start, self.dwin.end)
                wvector = ts.data
            elif dtype == "TimeSeries":
                ts = WindowData(w, self.dwin.start, self.dwin.end)
                wvector = ts.data
            else:
                wvector = w
        else:
            if dtype == "Seismogram":
                ts = ExtractComponent(w, component)
                wvector = ts.data
            elif dtype == "TimeSeries":
                wvector = ts.data
            else:
                wvector = w
        # Have to explicitly convert to ndarray because DoubleVector cannot be serialized.
        self.wvector = np.array(wvector)

    def loadnoise(self, n, dtype="Seismogram", component=2, window=False):
        # First basic sanity checks
        # Return immediately for methods that ignore noise.
        # Note we do this silenetly assuming the function wrapper below
        # will post an error to elog for the output to handle this nonfatal error
        if self.algorithm == "LeastSquares" or self.algorithm == "WaterLevel":
            return
        if dtype == "raw_vector" and window:
            raise RuntimeError(
                "RFdeconProcessor.loadnoise:  " +
                "Illegal argument combination\nwindow cannot be true with raw_vector input"
            )
        if not (dtype == "Seismogram" or dtype == "TimeSeries"
                or dtype == "raw_vector"):
            raise RuntimeError("RFdeconProcessor.loadnoise:  " +
                               " Illegal dtype parameter=" + dtype)
        nvector = []
        # IMPORTANT  these two parameters are not required by the
        # ScalarDecon C code but need to be inserted in pf for any algorithm
        # that requires noise data (i.e. multitaper) and the window
        # options is desired
        if window:
            tws = self.md.get_double("noise_window_start")
            twe = self.md.get_double("noise_window_end")
            if dtype == "Seismogram":
                ts = ExtractComponent(n, component)
                ts = WindowData(ts, tws, twe)
                nvector = ts.data
            elif dtype == "TimeSeries":
                ts = WindowData(n, tws, twe)
                nvector = ts.data
            else:
                nvector = n
        else:
            if dtype == "Seismogram":
                ts = ExtractComponent(n, component)
                nvector = ts.data
            elif dtype == "TimeSeries":
                nvector = ts.data
            else:
                nvector = n
        # Have to explicitly convert to ndarray because DoubleVector cannot be serialized.
        self.nvector = np.array(nvector)

    def apply(self):
        """
        Compute the RF estimate using the algorithm defined internally.

        :return: vector of data that are the RF estimate computed from previously loaded data.
        """
        if self.algorithm == "LeastSquares":
            processor = LeastSquareDecon(self.md)
        elif self.algorithm == "WaterLevel":
            processor = WaterLevelDecon(self.md)
        elif self.algorithm == "MultiTaperXcor":
            processor = MultiTaperXcorDecon(self.md)
        elif self.algorithm == "MultiTaperSpecDiv":
            processor = MultiTaperSpecDivDecon(self.md)
        if hasattr(self, "dvector"):
            processor.loaddata(DoubleVector(self.dvector))
        if hasattr(self, "wvector"):
            processor.loadwavelet(DoubleVector(self.wvector))
        if self.__uses_noise and hasattr(self, "nvector"):
            processor.loadnoise(DoubleVector(self.nvector))
        processor.process()
        return processor.getresult()

    def actual_output(self):
        """
        The actual output of a decon operator is the inverse filter applied to
        the wavelet.  By design it is an approximation of the shaping wavelet
        defined for this operator.

        :return: Actual output of the operator as a ccore.TimeSeries object.
        The Metadata of the return is bare bones.  The most important factor
        about this result is that because actual output waveforms are normally
        a zero phase wavelet of some kind the result is time shifted to be
        centered (i.e. t0 is rounded n/2 where n is the length of the vector
                  returned).
        """
        if self.algorithm == "LeastSquares":
            processor = LeastSquareDecon(self.md)
        elif self.algorithm == "WaterLevel":
            processor = WaterLevelDecon(self.md)
        elif self.algorithm == "MultiTaperXcor":
            processor = MultiTaperXcorDecon(self.md)
        elif self.algorithm == "MultiTaperSpecDiv":
            processor = MultiTaperSpecDivDecon(self.md)
        if hasattr(self, "dvector"):
            processor.loaddata(DoubleVector(self.dvector))
        if hasattr(self, "wvector"):
            processor.loadwavelet(DoubleVector(self.wvector))
        if self.__uses_noise and hasattr(self, "nvector"):
            processor.loadnoise(DoubleVector(self.nvector))
        return processor.actual_output()

    def ideal_output(self):
        """
        The ideal output of a decon operator is the same thing we call a
        shaping wavelet.  This method returns the ideal output=shaping wavelet
        as a TimeSeries object.   Like the actual output method the return
        function is circular shifted so the function peaks at 0 time located
        at n/2 samples from the start sample.  Graphic displays will then show
        the wavelet centered and peaked at time 0.   The prediction error
        can be computed as the difference between the actual_output and
        ideal_output TimeSeries objects.   The norm of the prediction error
        is a helpful metric to display the stability and accuracy of the
        inverse.
        """
        if self.algorithm == "LeastSquares":
            processor = LeastSquareDecon(self.md)
        elif self.algorithm == "WaterLevel":
            processor = WaterLevelDecon(self.md)
        elif self.algorithm == "MultiTaperXcor":
            processor = MultiTaperXcorDecon(self.md)
        elif self.algorithm == "MultiTaperSpecDiv":
            processor = MultiTaperSpecDivDecon(self.md)
        if hasattr(self, "dvector"):
            processor.loaddata(DoubleVector(self.dvector))
        if hasattr(self, "wvector"):
            processor.loadwavelet(DoubleVector(self.wvector))
        if self.__uses_noise and hasattr(self, "nvector"):
            processor.loadnoise(DoubleVector(self.nvector))
        return processor.ideal_output()

    def inverse_filter(self):
        """
        This method returns the actual inverse filter that if convolved with
        he original data will produce the RF estimate.  Note the filter is
        meaningful only if the source wavelet is minimum phase.  A standard
        theorem from time series analysis shows that the inverse of mixed
        phase wavelet is usually unstable and a maximum phase wavelet is always
        unstable.   Fourier-based methods can still compute a stable solution
        even with a mixed phase wavelet because of the implied circular
        convolution.

        The result is returned as  TimeSeries object.
        """
        if self.algorithm == "LeastSquares":
            processor = LeastSquareDecon(self.md)
        elif self.algorithm == "WaterLevel":
            processor = WaterLevelDecon(self.md)
        elif self.algorithm == "MultiTaperXcor":
            processor = MultiTaperXcorDecon(self.md)
        elif self.algorithm == "MultiTaperSpecDiv":
            processor = MultiTaperSpecDivDecon(self.md)
        if hasattr(self, "dvector"):
            processor.loaddata(DoubleVector(self.dvector))
        if hasattr(self, "wvector"):
            processor.loadwavelet(DoubleVector(self.wvector))
        if self.__uses_noise and hasattr(self, "nvector"):
            processor.loadnoise(DoubleVector(self.nvector))
        return processor.inverse_filter()

    def QCMetrics(self):
        """
        All decon algorithms compute a set of algorithm dependent quality
        control metrics.  This method returns the metrics as a set of fixed
        name:value pairs in a mspass.Metadata object.   The details are
        algorithm dependent.  See related documentation for metrics computed
        by different algorithms.
        """
        if self.algorithm == "LeastSquares":
            processor = LeastSquareDecon(self.md)
        elif self.algorithm == "WaterLevel":
            processor = WaterLevelDecon(self.md)
        elif self.algorithm == "MultiTaperXcor":
            processor = MultiTaperXcorDecon(self.md)
        elif self.algorithm == "MultiTaperSpecDiv":
            processor = MultiTaperSpecDivDecon(self.md)
        if hasattr(self, "dvector"):
            processor.loaddata(DoubleVector(self.dvector))
        if hasattr(self, "wvector"):
            processor.loadwavelet(DoubleVector(self.wvector))
        if self.__uses_noise and hasattr(self, "nvector"):
            processor.loadnoise(DoubleVector(self.nvector))
        return processor.QCMetrics()

    def change_parameters(self, md):
        """
        Use this method to change the internal parameter setting of the
        processor.  It can be used, for example, to switch from the damped
        least square method to the water level method.  Note the input
        must be a complete definition for a parameter set defining a
        particular algorithm.  i.e. this is not an update method but
        t reinitializes the processor.

        :param md: is a mspass.Metadata object containing required parameters
        for the alternative algorithm.
        """
        self.md = Metadata(md)

    @property
    def uses_noise(self):
        return self.__uses_noise

    @property
    def dwin(self):
        tws = self.md.get_double("deconvolution_data_window_start")
        twe = self.md.get_double("deconvolution_data_window_end")
        return TimeWindow(tws, twe)

    @property
    def nwin(self):
        if self.__uses_noise:
            tws = self.md.get_double("noise_window_start")
            twe = self.md.get_double("noise_window_end")
            return TimeWindow(tws, twe)
        else:
            return TimeWindow  # always initialize even if not used

Example #11

0

Show file

def load_one_ensemble(
    doc,
    create_history=False,
    jobname="Default job",
    jobid="99999",
    algid="99999",
    ensemble_mdkeys=[],  # default is to load nothing for ensemble
    apply_calib=False,
    verbose=False,
):
    """
    This function can be used to load a full ensemble indexed in the
    collection import_miniseed_ensemble.  It uses a large memory model
    that eat up the entire file using obspy's miniseed reader.   It contains
    some relics of early ideas of potentially having the function
    utilize the history mechanism.  Those may not work, but were retained.

    :param doc: is one record in the import_miniseed_ensemble collection
    :param create_history:  if true each member of the ensemble will be
      defined in the history chain as an origin and jobname and jobid will be
      be used to construct the ProcessingHistory object.
    :param jobname: as used in ProcessingHistory (default "Default job")
    :param jobid: as used in processingHistory
    :param algid: as used in processingHistory
    :param ensemble_mdkeys:  list of keys to copy from first member to ensemble
       Metadata (no type checking is done)
    :param apply_calib:  if True tells obspy's reader to apply the calibration
      factor to convert the data to ground motion units.  Default is false.
    :param verbose:  write informational messages while processing
    """
    try:
        ensemblemd = Metadata()
        if create_history:
            his = ProcessingHistory(jobname, jobid)
        form = doc["format"]
        mover = doc["mover"]
        if form != "mseed":
            raise MsPASSError(
                "Cannot handle this ensemble - ensemble format="
                + form
                + "\nCan only be mseed for this reader"
            )
        if mover != "obspy_seed_ensemble_reader":
            raise MsPASSError(
                "Cannot handle this ensemble - ensemble mover parameter="
                + mover
                + " which is not supported"
            )
        dir = doc["dir"]
        dfile = doc["dfile"]
        fname = dir + "/" + dfile
        # Note this algorithm actually should work with any format
        # supported by obspy's read function - should generalize it for release
        dseis = read(fname, format="mseed", apply_calib=apply_calib)
        if len(ensemble_mdkeys) > 0:
            ensemblemd = _load_md(doc, ensemble_mdkeys)
        else:
            # default is to load everything != members
            members_key = "members"
            for k in doc:
                if k != members_key:
                    x = doc[k]
                    ensemblemd[k] = x
        # There is a Stream2TimeSeriesEnsemble function
        # but we don't use it here because we need some functionality
        # not found in that simple function
        nseis = len(dseis)
        result = TimeSeriesEnsemble(ensemblemd, nseis)
        # Secondary files get handled almost the same except for
        # a warning.   The warning message (hopefully) explains the
        # problem but our documentation must warn about his if this
        # prototype algorithm becomes the release version
        count = 0
        for d in dseis:
            # print('debug - working on data object number',count)
            count += 1
            dts = Trace2TimeSeries(d)
            if create_history:
                # This should just define jobname and jobid
                dts.load_history(his)
                seedid = d["seed_file_id"]
                dts.set_as_origin(
                    "load_ensemble", algid, seedid, AtomicType.TIMESERIES, True
                )
            result.member.append(dts)
        return result
    except:
        print("something threw an exception - needs more complete error handlers")

Example #12

0

Show file

def test_CoreSeismogram():
    md = Metadata()
    md['delta'] = 0.01
    md['starttime'] = 0.0
    md['npts'] = 100
    # test metadata constructor
    md['tmatrix'] = np.random.rand(3, 3)
    cseis = _CoreSeismogram(md, False)
    assert (cseis.transformation_matrix == md['tmatrix']).all()
    md['tmatrix'] = dmatrix(np.random.rand(3, 3))
    cseis = _CoreSeismogram(md, False)
    assert (cseis.transformation_matrix == md['tmatrix']).all()
    md['tmatrix'] = np.random.rand(9)
    cseis = _CoreSeismogram(md, False)
    assert (cseis.transformation_matrix == md['tmatrix'].reshape(3, 3)).all()
    md['tmatrix'] = np.random.rand(1, 9)
    cseis = _CoreSeismogram(md, False)
    assert (cseis.transformation_matrix == md['tmatrix'].reshape(3, 3)).all()
    md['tmatrix'] = np.random.rand(9, 1)
    cseis = _CoreSeismogram(md, False)
    assert (cseis.transformation_matrix == md['tmatrix'].reshape(3, 3)).all()

    md['tmatrix'] = np.random.rand(3, 3).tolist()
    cseis = _CoreSeismogram(md, False)
    assert np.isclose(cseis.transformation_matrix,
                      np.array(md['tmatrix']).reshape(3, 3)).all()
    md['tmatrix'] = np.random.rand(9).tolist()
    cseis = _CoreSeismogram(md, False)
    assert np.isclose(cseis.transformation_matrix,
                      np.array(md['tmatrix']).reshape(3, 3)).all()

    # test whether the setter of transformation_matrix updates metadata correctly
    tm = np.random.rand(1, 9)
    cseis.transformation_matrix = tm
    assert (cseis.transformation_matrix == tm.reshape(3, 3)).all()
    assert np.isclose(cseis.transformation_matrix,
                      np.array(cseis['tmatrix']).reshape(3, 3)).all()
    tm = np.random.rand(9).tolist()
    cseis.transformation_matrix = tm
    assert np.isclose(cseis.transformation_matrix,
                      np.array(tm).reshape(3, 3)).all()
    assert np.isclose(cseis.transformation_matrix,
                      np.array(cseis['tmatrix']).reshape(3, 3)).all()

    # test exceptions
    md['tmatrix'] = np.random.rand(4, 2)
    with pytest.raises(MsPASSError, match="should be a 3x3 matrix"):
        _CoreSeismogram(md, False)
    md['tmatrix'] = dmatrix(np.random.rand(2, 4))
    with pytest.raises(MsPASSError, match="should be a 3x3 matrix"):
        _CoreSeismogram(md, False)
    md['tmatrix'] = 42
    with pytest.raises(MsPASSError, match="not recognized"):
        _CoreSeismogram(md, False)
    md.erase('tmatrix')
    with pytest.raises(MsPASSError, match="Error trying to extract"):
        _CoreSeismogram(md, False)
    md['tmatrix'] = {4: 2}
    with pytest.raises(MsPASSError, match="type is not recognized"):
        _CoreSeismogram(md, False)

    md['tmatrix'] = np.random.rand(9).tolist()
    md['tmatrix'][3] = 'str'
    with pytest.raises(MsPASSError, match="should be float"):
        _CoreSeismogram(md, False)
    md['tmatrix'] = np.random.rand(3, 4).tolist()
    with pytest.raises(MsPASSError, match="should be a 3x3 list of list"):
        _CoreSeismogram(md, False)
    md['tmatrix'] = [1, 2, 3]
    with pytest.raises(MsPASSError, match="should be a 3x3 list of list"):
        _CoreSeismogram(md, False)
    md['tmatrix'] = np.random.rand(2, 2).tolist()
    with pytest.raises(
            MsPASSError,
            match="should be a list of 9 floats or a 3x3 list of list"):
        _CoreSeismogram(md, False)
    md['tmatrix'] = np.random.rand(3, 3).tolist()
    md['tmatrix'][1][1] = 'str'
    with pytest.raises(MsPASSError, match="should be float"):
        _CoreSeismogram(md, False)

Example #13

0

Show file

def test_Metadata():
    md = Metadata()
    assert repr(md) == 'Metadata({})'
    dic = {1: 1}
    md.put('dict', dic)
    val = md.get('dict')
    val[2] = 2
    del val
    dic[3] = 3
    del dic
    md['dict'][4] = 4
    assert md['dict'] == {1: 1, 2: 2, 3: 3, 4: 4}

    md = Metadata({'array': np.array([3, 4])})
    md['dict'] = {1: 1, 2: 2}
    md['str\'i"ng'] = 'str\'i"ng'
    md["str'ing"] = "str'ing"
    md['double'] = 3.14
    md['bool'] = True
    md['int'] = 7
    md["string"] = "str\0ing"
    md["string"] = "str\ning"
    md["str\ting"] = "str\ting"
    md["str\0ing"] = "str\0ing"
    md["str\\0ing"] = "str\\0ing"
    md_copy = pickle.loads(pickle.dumps(md))
    for i in md:
        if i == 'array':
            assert (md[i] == md_copy[i]).all()
        else:
            assert md[i] == md_copy[i]

    md = Metadata({
        "<class 'numpy.ndarray'>": np.array([3, 4]),
        "<class 'dict'>": {
            1: 1,
            2: 2
        },
        'string': 'string',
        'double': 3.14,
        'bool': True,
        'long': 7,
        "<class 'bytes'>": b'\xba\xd0\xba\xd0',
        "<class 'NoneType'>": None
    })
    for i in md:
        assert md.type(i) == i

    md[b'\xba\xd0'] = b'\xba\xd0'
    md_copy = pickle.loads(pickle.dumps(md))
    for i in md:
        if i == "<class 'numpy.ndarray'>":
            assert (md[i] == md_copy[i]).all()
        else:
            assert md[i] == md_copy[i]

    del md["<class 'numpy.ndarray'>"]
    md_copy.erase("<class 'numpy.ndarray'>")
    assert not "<class 'numpy.ndarray'>" in md
    assert not "<class 'numpy.ndarray'>" in md_copy
    assert md.keys() == md_copy.keys()

    with pytest.raises(TypeError, match='Metadata'):
        reversed(md)

    md = Metadata({1: 1, 3: 3})
    md_copy = Metadata({2: 2, 3: 30})
    md += md_copy
    assert md.__repr__() == "Metadata({'1': 1, '2': 2, '3': 30})"

Example #14

0

Show file

def post_ensemble_metadata(ens,
keys=[],
check_all_members=False,
clean_members=False):
"""
It may be necessary to call this function after conversion from
an obspy Stream to one of the mspass Ensemble classes. This function
is necessary because a mspass Ensemble has a concept not part of the
obspy Stream object. That is, mspass ensembles have a global
Metadata container. That container is expected to contain Metadata
common to all members of the ensemble. For example, for data from
a single earthquake it would be sensible to post the source location
information in the ensemble metadata container rather than
having duplicates in each member.

Two different approaches can be used to do this copy. The faster,
but least reliable method is to simply copy the values from the first
member of the ensemble. That approach is enabled by default.
It is completely reliable when used after a conversion from an obspy
Stream but ONLY if the data began life as a mspass ensemble with
exactly the same keys set as global. The type example of that
is after an obspy algorithm is applied to a mspass ensemble
via the mspass decorators.

A more cautious algorithm can be enabled by setting check_all_members
True. In that mode the list of keys received is tested with a
not equal test for against each member. Note we do not do anything
fancy with floating point data to allow for finite precision.
The reason is Metadata float values are normally expected to be
constant data. In that case an != test will yield false when the
comparison is between two copies. The not equal test may fail, however,
if used with computed floating point numbers. An example where
that is possible would be spatial gathers like PP data assembled by
midpoint coordinates. If you need to build gathers in such a context
we recommend you use an integer image point tied to a specialized
document collection in MongoDB that defines the geometry of that
point. There may be other examples, but the point is don't trust
computed floating point values to work. It will also not work if
the values of a key-value pair don't support an != comparison.
That could be common if the value request for copy was a python object.

:param ens: ensemble data to be processed. The function will throw
a MsPASSError exception of ens is not either a TimeSeriesEnsemble or a
SeismogramEnsemble.
:param keys: is expected to be a list of metadata keys (required to be
strings) that are to be copied from member metadata to ensemble
metadata.
:param check_all_members: switch controlling method used to extract
metadata that is to be copied (see above for details). Default is False
:param clean_members: when true data copied to ensemble metadata
will be removed from all members. This option is only allowed
if check_all_members is set True. It will be silently ignored if
check_all_members is False.
"""
alg = "post_ensemble_metadata"

if isinstance(ens, TimeSeriesEnsemble) or isinstance(
ens, SeismogramEnsemble):
md = Metadata()
for d in ens.member:
if d.live:
for k in keys:
if not k in d:
raise MsPASSError(
alg + ": no data matching requested key=" + k +
" Cannot post to ensemble",
"Invalid",
)
md[k] = d[k]
if check_all_members:
for d in ens.member:
for k in keys:
if not _all_members_match(ens, k):
raise MsPASSError(
alg + ": Data mismatch data members with key=" +
k +
"\n In check_all_members mode all values associated with this key must match",
"Invalid",
)
if clean_members:
for d in ens.member:
for k in keys:
d.erase(k)
ens.update_metadata(md)

else:
raise MsPASSError(
alg +
": Illegal data received. This function runs only on mspass ensemble objects",
"Invalid",
)