def makets(): """ Build TimeSeries object used in this tutorial """ d = TimeSeries() setbasics(d, 1000) y = rickerwave(2.0, 0.005) ny = len(y) for i in range(min(ny, 1000)): d.data[i] = y[i] return d
def test_PowerSpectrum(): ts = TimeSeries(100) ts.data[0] = 1.0 # delta function - spectrum will be flat ts.live = True engine = MTPowerSpectrumEngine(100, 5, 10) spec = engine.apply(ts) assert spec.Nyquist() == spec.f0 + spec.df * spec.nf() spec_copy = pickle.loads(pickle.dumps(spec)) assert spec.df == spec_copy.df assert spec.f0 == spec_copy.f0 assert spec.spectrum_type == spec_copy.spectrum_type assert np.allclose(spec.spectrum, spec_copy.spectrum)
def test_Ensemble(Ensemble): md = Metadata() md['double'] = 3.14 md['bool'] = True md['long'] = 7 es = Ensemble(md, 3) if isinstance(es, TimeSeriesEnsemble): d = TimeSeries(10) d = make_constant_data_ts(d) es.member.append(d) es.member.append(d) es.member.append(d) else: d = Seismogram(10) d = make_constant_data_seis(d) es.member.append(d) es.member.append(d) es.member.append(d) es.sync_metadata(['double', 'long']) assert es.member[0].is_defined('bool') assert es.member[0]['bool'] == True assert not es.member[0].is_defined('double') assert not es.member[0].is_defined('long') es.sync_metadata() assert es.member[1].is_defined('double') assert es.member[1].is_defined('long') assert es.member[1]['double'] == 3.14 assert es.member[1]['long'] == 7 es.update_metadata(Metadata({'k': 'v'})) assert es['k'] == 'v'
def test_windowdata(): npts=1000 ts=TimeSeries() setbasics(ts,npts) for i in range(npts): ts.data[i]=float(i) t3c=Seismogram() setbasics(t3c,npts) for k in range(3): for i in range(npts): t3c.data[k,i]=100*(k+1)+float(i) win=TimeWindow(2,3) d=WindowData(ts,win) print('t y') for j in range(d.npts): print(d.time(j),d.data[j]) assert(len(d.data) == 101) assert(d.t0==2.0) assert(d.endtime() == 3.0) d=WindowData(t3c,win) print('t x0 x1 x2') for j in range(d.npts): print(d.time(j),d.data[0,j],d.data[1,j],d.data[2,j]) assert(d.data.columns() == 101) assert(d.t0==2.0) assert(d.endtime() == 3.0) print('testing error handling') t3c.kill() d=WindowData(t3c,win) assert(d.npts == 1000 and (not d.live)) d=WindowData(ts,win,preserve_history=True) print('Error message posted') print(d.elog.get_error_log()) assert(d.elog.size() == 1) # this still throws an error but the message will be different d=WindowData(ts,win,preserve_history=True,instance='0') print('Error message posted') print(d.elog.get_error_log()) assert(d.elog.size() == 1)
def test_map_spark_and_dask(): l = [get_live_timeseries() for i in range(5)] spark_res = spark_map(l) dask_res = dask_map(l) ts_cp = TimeSeries(l[0]) res = signals.filter(ts_cp, "bandpass", freqmin=1, freqmax=5, preserve_history=True, instance='0') assert np.isclose(spark_res[0].data, ts_cp.data).all() assert np.isclose(dask_res[0].data, ts_cp.data).all()
def maketsens(d, n=20, moveout=True, moveout_dt=0.05): """ Makes a TimeSeries ensemble as copies of d. If moveout is true applies a linear moveout to members using moveout_dt times count of member in ensemble. """ # If python had templates this would be one because this and the # function below are identical except for types result = TimeSeriesEnsemble() for i in range(n): y = TimeSeries(d) # this makes a required deep copy if (moveout): y.t0 += float(i) * moveout_dt result.member.append(y) return result
def _deepcopy(self, d): """ Private helper method for immediately above. Necessary because copy.deepcopy doesn't work with our pybind11 wrappers. There may be a fix, but for now we have to use copy constructors specific to each object type. """ if (isinstance(d, TimeSeries)): return TimeSeries(d) elif (isinstance(d, Seismogram)): return Seismogram(d) elif (isinstance(d, TimeSeriesEnsemble)): return TimeSeriesEnsemble(d) elif (isinstance(d, SeismogramEnsemble)): return SeismogramEnsemble(d) else: raise RuntimeError( "SeismicPlotter._deepcopy: received and unsupported data type=", type(d))
def test_map_spark_and_dask(spark_context): l = [get_live_timeseries() for i in range(5)] # add net, sta, chan, loc to avoid metadata serialization problem for i in range(5): l[i]["chan"] = "HHZ" l[i]["loc"] = "test_loc" l[i]["net"] = "test_net" l[i]["sta"] = "test_sta" spark_res = spark_map(l, spark_context) dask_res = dask_map(l) ts_cp = TimeSeries(l[0]) res = signals.filter(ts_cp, "bandpass", freqmin=1, freqmax=5, object_history=True, alg_id="0") assert np.isclose(spark_res[0].data, ts_cp.data).all() assert np.isclose(dask_res[0].data, ts_cp.data).all()
def make_wavelet_noise_data(nscale=0.1, ns=2048, padlength=512, dt=0.05, npoles=3, corners=[0.08, 0.8]): wn = TimeSeries(ns) wn.t0 = 0.0 wn.dt = dt wn.tref = TimeReferenceType.Relative wn.live = True nd = ns + 2 * padlength y = nscale * randn(nd) sos = signal.butter(npoles, corners, btype='bandpass', output='sos', fs=1.0 / dt) y = signal.sosfilt(sos, y) for i in range(ns): wn.data[i] = y[i + padlength] return (wn)
def test_operators(): d = _CoreTimeSeries(10) d1 = make_constant_data_ts(d, nsamp=10) dsave = _CoreTimeSeries(d1) d = _CoreTimeSeries(6) d2 = make_constant_data_ts(d, t0=-0.2, nsamp=6, val=2.0) dsave = _CoreTimeSeries(d1) d1 += d2 assert np.allclose(d1.data, [3, 3, 3, 3, 1, 1, 1, 1, 1, 1]) d1 = _CoreTimeSeries(dsave) d = d1 + d2 assert np.allclose(d.data, [3, 3, 3, 3, 1, 1, 1, 1, 1, 1]) d1 = _CoreTimeSeries(dsave) d1 *= 2.5 assert np.allclose(d1.data, [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5]) d3 = TimeSeries(10) d4 = TimeSeries(6) d3 = make_constant_data_ts(d3, nsamp=10) d4 = make_constant_data_ts(d4, t0=-0.2, nsamp=6, val=2.0) dsave = _CoreTimeSeries(d3) d3 = TimeSeries(dsave) d3 += d4 assert np.allclose(d3.data, [3, 3, 3, 3, 1, 1, 1, 1, 1, 1]) d3 = TimeSeries(dsave) d = d3 + d4 assert np.allclose(d.data, [3, 3, 3, 3, 1, 1, 1, 1, 1, 1]) d1 = _CoreTimeSeries(dsave) d3 = TimeSeries(dsave) d3 *= 2.5 assert np.allclose(d3.data, [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5]) x = np.linspace(-0.7, 1.2, 20) for t in x: d3 = TimeSeries(dsave) d4.t0 = t d3 += d4 # These are selected asserts of the incremental test above # visually d4 moves through d3 as the t0 value advance. Assert # tests end member: skewed left, inside, and skewed right d3 = TimeSeries(dsave) d4.t0 = -0.7 # no overlap test d3 += d4 assert np.allclose(d3.data, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) d3 = TimeSeries(dsave) d4.t0 = -0.3 # overlap left d3 += d4 assert np.allclose(d3.data, [3, 3, 3, 1, 1, 1, 1, 1, 1, 1]) d3 = TimeSeries(dsave) d4.t0 = 0.3 # d4 inside d3 test d3 += d4 assert np.allclose(d3.data, [1, 1, 1, 3, 3, 3, 3, 3, 3, 1]) d3 = TimeSeries(dsave) d4.t0 = 0.7 # partial overlap right d3 += d4 assert np.allclose(d3.data, [1, 1, 1, 1, 1, 1, 1, 3, 3, 3]) d3 = TimeSeries(dsave) d4.t0 = 1.0 # no overlap test right d3 += d4 assert np.allclose(d3.data, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) # Repeat the same test for Seismogram objects # This section is edited cut-paste of above # Intentionally do not test _CoreSeismogram directly because # currently if it works for Seismogram it will for _CoreSeismogram d = _CoreSeismogram(10) d1 = make_constant_data_seis(d, nsamp=10) dsave = _CoreSeismogram(d1) d = _CoreSeismogram(6) d2 = make_constant_data_seis(d, t0=-0.2, nsamp=6, val=2.0) dsave = _CoreSeismogram(d1) d1 += d2 assert np.allclose( d1.data, np.array([ [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ]), ) d1 = _CoreSeismogram(dsave) d = d1 + d2 assert np.allclose( d.data, np.array([ [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ]), ) d1 = _CoreSeismogram(dsave) d1 *= 2.5 assert np.allclose( d1.data, np.array([ [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5], [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5], [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5], ]), ) d3 = Seismogram(10) d4 = Seismogram(6) d3 = make_constant_data_seis(d3, nsamp=10) d4 = make_constant_data_seis(d4, t0=-0.2, nsamp=6, val=2.0) dsave = Seismogram(d3) d3 += d4 assert np.allclose( d3.data, np.array([ [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ]), ) d3 = Seismogram(dsave) d = d3 + d4 assert np.allclose( d.data, np.array([ [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [3.0, 3.0, 3.0, 3.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ]), ) d3 = Seismogram(dsave) d3 *= 2.5 assert np.allclose( d1.data, np.array([ [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5], [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5], [2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5], ]), ) x = np.linspace(-0.7, 1.2, 20) for t in x: d3 = Seismogram(dsave) d4.t0 = t d3 += d4 # These are selected asserts of the incremental test above # visually d4 moves through d3 as the t0 value advance. Assert # tests end member: skewed left, inside, and skewed right d3 = Seismogram(dsave) d4.t0 = -0.7 # no overlap test d3 += d4 assert np.allclose( d3.data, np.array([ [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], ]), ) d3 = Seismogram(dsave) d4.t0 = -0.3 # overlap left d3 += d4 assert np.allclose( d3.data, np.array([ [3, 3, 3, 1, 1, 1, 1, 1, 1, 1], [3, 3, 3, 1, 1, 1, 1, 1, 1, 1], [3, 3, 3, 1, 1, 1, 1, 1, 1, 1], ]), ) d3 = Seismogram(dsave) d4.t0 = 0.3 # d4 inside d3 test d3 += d4 assert np.allclose( d3.data, np.array([ [1, 1, 1, 3, 3, 3, 3, 3, 3, 1], [1, 1, 1, 3, 3, 3, 3, 3, 3, 1], [1, 1, 1, 3, 3, 3, 3, 3, 3, 1], ]), ) d3 = Seismogram(dsave) d4.t0 = 0.7 # partial overlap right d3 += d4 assert np.allclose( d3.data, np.array([ [1, 1, 1, 1, 1, 1, 1, 3, 3, 3], [1, 1, 1, 1, 1, 1, 1, 3, 3, 3], [1, 1, 1, 1, 1, 1, 1, 3, 3, 3], ]), ) d3 = Seismogram(dsave) d4.t0 = 1.0 # no overlap test right d3 += d4 assert np.allclose( d3.data, np.array([ [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], ]), ) # Repeat exactly for - test but different numeric results # just omit *= tests d = _CoreTimeSeries(10) d1 = make_constant_data_ts(d, nsamp=10) dsave = _CoreTimeSeries(d1) d = _CoreTimeSeries(6) d2 = make_constant_data_ts(d, t0=-0.2, nsamp=6, val=2.0) dsave = _CoreTimeSeries(d1) d1 -= d2 assert np.allclose(d1.data, [-1, -1, -1, -1, 1, 1, 1, 1, 1, 1]) d1 = _CoreTimeSeries(dsave) d = d1 - d2 assert np.allclose(d.data, [-1, -1, -1, -1, 1, 1, 1, 1, 1, 1]) d3 = TimeSeries(10) d4 = TimeSeries(6) d3 = make_constant_data_ts(d3, nsamp=10) d4 = make_constant_data_ts(d4, t0=-0.2, nsamp=6, val=2.0) dsave = _CoreTimeSeries(d3) d3 = TimeSeries(dsave) d3 -= d4 assert np.allclose(d3.data, [-1, -1, -1, -1, 1, 1, 1, 1, 1, 1]) d3 = TimeSeries(dsave) d = d3 - d4 assert np.allclose(d.data, [-1, -1, -1, -1, 1, 1, 1, 1, 1, 1]) x = np.linspace(-0.7, 1.2, 20) for t in x: d3 = TimeSeries(dsave) d4.t0 = t d3 -= d4 # These are selected asserts of the incremental test above # visually d4 moves through d3 as the t0 value advance. Assert # tests end member: skewed left, inside, and skewed right d3 = TimeSeries(dsave) d4.t0 = -0.7 # no overlap test d3 -= d4 assert np.allclose(d3.data, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) d3 = TimeSeries(dsave) d4.t0 = -0.3 # overlap left d3 -= d4 assert np.allclose(d3.data, [-1, -1, -1, 1, 1, 1, 1, 1, 1, 1]) d3 = TimeSeries(dsave) d4.t0 = 0.3 # d4 inside d3 test d3 -= d4 assert np.allclose(d3.data, [1, 1, 1, -1, -1, -1, -1, -1, -1, 1]) d3 = TimeSeries(dsave) d4.t0 = 0.7 # partial overlap right d3 -= d4 assert np.allclose(d3.data, [1, 1, 1, 1, 1, 1, 1, -1, -1, -1]) d3 = TimeSeries(dsave) d4.t0 = 1.0 # no overlap test right d3 -= d4 assert np.allclose(d3.data, [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) # Repeat the same test for Seismogram objects # This section is edited cut-paste of above # Intentionally do not test _CoreSeismogram directly because # currently if it works for Seismogram it will for _CoreSeismogram d = _CoreSeismogram(10) d1 = make_constant_data_seis(d, nsamp=10) dsave = _CoreSeismogram(d1) d = _CoreSeismogram(6) d2 = make_constant_data_seis(d, t0=-0.2, nsamp=6, val=2.0) dsave = _CoreSeismogram(d1) d1 -= d2 assert np.allclose( d1.data, np.array([ [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ]), ) d1 = _CoreSeismogram(dsave) d = d1 - d2 assert np.allclose( d.data, np.array([ [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ]), ) d3 = Seismogram(10) d4 = Seismogram(6) d3 = make_constant_data_seis(d3, nsamp=10) d4 = make_constant_data_seis(d4, t0=-0.2, nsamp=6, val=2.0) dsave = Seismogram(d3) d3 -= d4 assert np.allclose( d3.data, np.array([ [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ]), ) d3 = Seismogram(dsave) d = d3 - d4 assert np.allclose( d.data, np.array([ [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], [-1.0, -1.0, -1.0, -1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ]), ) x = np.linspace(-0.7, 1.2, 20) for t in x: d3 = Seismogram(dsave) d4.t0 = t d3 -= d4 # These are selected asserts of the incremental test above # visually d4 moves through d3 as the t0 value advance. Assert # tests end member: skewed left, inside, and skewed right d3 = Seismogram(dsave) d4.t0 = -0.7 # no overlap test d3 -= d4 assert np.allclose( d3.data, np.array([ [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], ]), ) d3 = Seismogram(dsave) d4.t0 = -0.3 # overlap left d3 -= d4 assert np.allclose( d3.data, np.array([ [-1, -1, -1, 1, 1, 1, 1, 1, 1, 1], [-1, -1, -1, 1, 1, 1, 1, 1, 1, 1], [-1, -1, -1, 1, 1, 1, 1, 1, 1, 1], ]), ) d3 = Seismogram(dsave) d4.t0 = 0.3 # d4 inside d3 test d3 -= d4 assert np.allclose( d3.data, np.array([ [1, 1, 1, -1, -1, -1, -1, -1, -1, 1], [1, 1, 1, -1, -1, -1, -1, -1, -1, 1], [1, 1, 1, -1, -1, -1, -1, -1, -1, 1], ]), ) d3 = Seismogram(dsave) d4.t0 = 0.7 # partial overlap right d3 -= d4 assert np.allclose( d3.data, np.array([ [1, 1, 1, 1, 1, 1, 1, -1, -1, -1], [1, 1, 1, 1, 1, 1, 1, -1, -1, -1], [1, 1, 1, 1, 1, 1, 1, -1, -1, -1], ]), ) d3 = Seismogram(dsave) d4.t0 = 1.0 # no overlap test right d3 -= d4 assert np.allclose( d3.data, np.array([ [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], ]), )
def test_Ensemble(Ensemble): md = Metadata() md["double"] = 3.14 md["bool"] = True md["long"] = 7 es = Ensemble(md, 3) if isinstance(es, TimeSeriesEnsemble): d = TimeSeries(10) d = make_constant_data_ts(d) es.member.append(d) es.member.append(d) es.member.append(d) else: d = Seismogram(10) d = make_constant_data_seis(d) es.member.append(d) es.member.append(d) es.member.append(d) es.set_live( ) # new method for LoggingEnsemble needed because default is dead es.sync_metadata(["double", "long"]) assert es.member[0].is_defined("bool") assert es.member[0]["bool"] == True assert not es.member[0].is_defined("double") assert not es.member[0].is_defined("long") es.sync_metadata() assert es.member[1].is_defined("double") assert es.member[1].is_defined("long") assert es.member[1]["double"] == 3.14 assert es.member[1]["long"] == 7 es.update_metadata(Metadata({"k": "v"})) assert es["k"] == "v" # From here on we test features not in CoreEnsemble but only in # LoggingEnsemble. Note that we use pybind11 aliasing to # define TimeSeriesEnsemble == LoggingEnsemble<TimeSeries> and # SeismogramEnsemble == LoggingEnsemble<Seismogram>. # Should be initially marked live assert es.live() es.elog.log_error("test_ensemble", "test complaint", ErrorSeverity.Complaint) es.elog.log_error("test_ensemble", "test invalid", ErrorSeverity.Invalid) assert es.elog.size() == 2 assert es.live() es.kill() assert es.dead() # resurrect es es.set_live() assert es.live() # validate checks for for any live members - this tests that feature assert es.validate() # need this temporary copy for the next test_ if isinstance(es, TimeSeriesEnsemble): escopy = TimeSeriesEnsemble(es) else: escopy = SeismogramEnsemble(es) for d in escopy.member: d.kill() assert not escopy.validate() # Reuse escopy for pickle test escopy = pickle.loads(pickle.dumps(es)) assert escopy.is_defined("bool") assert escopy["bool"] == True assert escopy.is_defined("double") assert escopy.is_defined("long") assert escopy["double"] == 3.14 assert escopy["long"] == 7 assert escopy.live() assert escopy.elog.size() == 2 assert escopy.member[0].is_defined("bool") assert escopy.member[0]["bool"] == True assert escopy.member[0].is_defined("double") assert escopy.member[0].is_defined("long") assert es.member[1].is_defined("double") assert es.member[1].is_defined("long") assert es.member[1]["double"] == 3.14 assert es.member[1]["long"] == 7 if isinstance(es, TimeSeriesEnsemble): assert es.member[1].data == escopy.member[1].data else: assert (es.member[1].data[:] == escopy.member[1].data[:]).all()
def test_TimeSeries(): ts = TimeSeries() ts.npts = 100 ts.t0 = 0.0 ts.dt = 0.001 ts.live = 1 ts.tref = TimeReferenceType.Relative ts.data.append(1.0) ts.data.append(2.0) ts.data.append(3.0) ts.data.append(4.0) ts.sync_npts() assert ts.npts == 104 assert ts.npts == ts["npts"] ts += ts for i in range(4): ts.data[i] = i * 0.5 ts_copy = pickle.loads(pickle.dumps(ts)) assert ts.data == ts_copy.data assert ts.data[3] == 1.5 assert ts.data[103] == 8 assert ts.time(100) == 0.1 assert ts.sample_number(0.0998) == 100
def test_scale(): dts=_CoreTimeSeries(9) dir=setbasics(dts,9) d3c=_CoreSeismogram(5) setbasics(d3c,5) dts.data[0]=3.0 dts.data[1]=2.0 dts.data[2]=-4.0 dts.data[3]=1.0 dts.data[4]=-100.0 dts.data[5]=-1.0 dts.data[6]=5.0 dts.data[7]=1.0 dts.data[8]=-6.0 # MAD o=f above should be 2 # perf of 0.8 should be 4 # rms should be just over 10=10.010993957 print('Starting tests for time series data of amplitude functions') ampmad=MADAmplitude(dts) print('MAD amplitude estimate=',ampmad) assert(ampmad==3.0) amprms=RMSAmplitude(dts) print('RMS amplitude estimate=',amprms) assert(round(amprms,2)==100.46) amppeak=PeakAmplitude(dts) ampperf80=PerfAmplitude(dts,0.8) print('Peak amplitude=',amppeak) print('80% clip level amplitude=',ampperf80) assert(amppeak==100.0) assert(ampperf80==6.0) print('Starting comparable tests for 3c data') d3c.data[0,0]=3.0 d3c.data[0,1]=2.0 d3c.data[1,2]=-4.0 d3c.data[2,3]=1.0 d3c.data[0,4]=np.sqrt(2)*(100.0) d3c.data[1,4]=-np.sqrt(2)*(100.0) ampmad=MADAmplitude(d3c) print('MAD amplitude estimate=',ampmad) amprms=RMSAmplitude(d3c) print('RMS amplitude estimate=',amprms) amppeak=PeakAmplitude(d3c) ampperf60=PerfAmplitude(d3c,0.6) print('Peak amplitude=',amppeak) print('60% clip level amplitude=',ampperf60) assert(amppeak==200.0) assert(ampperf60==4.0) assert(ampmad==3.0) amptest=round(amprms,2) assert(amptest==89.48) print('Trying scaling functions for TimeSeries') # we need a deep copy here since scaling changes the data d2=TimeSeries(dts) amp=_scale(d2,ScalingMethod.Peak,1.0) print('Computed peak amplitude=',amp) print(d2.data) d2=TimeSeries(dts) amp=_scale(d2,ScalingMethod.Peak,10.0) print('Computed peak amplitude with peak set to 10=',amp) print(d2.data) assert(amp==100.0) assert(d2.data[4]==-10.0) print('verifying scale has modified and set calib correctly') calib=d2.get_double('calib') assert(calib==10.0) d2=TimeSeries(dts) d2.put('calib',6.0) print('test 2 with MAD metric and initial calib of 6') amp=_scale(d2,ScalingMethod.MAD,1.0) calib=d2.get_double('calib') print('New calib value set=',calib) assert(calib==18.0) print('Testing 3C scale functions') d=Seismogram(d3c) amp=_scale(d,ScalingMethod.Peak,1.0) print('Peak amplitude returned by scale funtion=',amp) calib=d.get_double('calib') print('Calib value retrieved (assumed inital 1.0)=',calib) print('Testing python scale function wrapper - first on a TimeSeries with defaults') d2=TimeSeries(dts) amp=scale(d2) print('peak amplitude returned =',amp[0]) assert(amp[0]==100.0) d=Seismogram(d3c) amp=scale(d) print('peak amplitude returned test Seismogram=',amp[0]) assert(amp[0]==200.0) print('starting tests of scale on ensembles') print('first test TimeSeriesEnemble with 5 scaled copies of same vector used earlier in this test') ens=TimeSeriesEnsemble() scls=[2.0,4.0,1.0,10.0,5.0] # note 4 s the median of this vector npts=dts.npts for i in range(5): d=TimeSeries(dts) for k in range(npts): d.data[k]*=scls[i] d.put('calib',1.0) ens.member.append(d) # work on a copy because scaling alters data in place enscpy=TimeSeriesEnsemble(ens) amps=scale(enscpy) print('returned amplitudes for members scaled individually') for i in range(5): print(amps[i]) assert(amps[i]==100.0*scls[i]) enscpy=TimeSeriesEnsemble(ens) amp=scale(enscpy,scale_by_section=True) print('average amplitude=',amp[0]) #assert(amp[0]==4.0) avgamp=amp[0] for i in range(5): calib=enscpy.member[i].get_double("calib") print('member number ',i,' calib is ',calib) assert(round(calib)==400.0) #print(enscpy.member[i].data) # similar test for SeismogramEnsemble npts=d3c.npts ens=SeismogramEnsemble() for i in range(5): d=Seismogram(d3c) for k in range(3): for j in range(npts): d.data[k,j]*=scls[i] d.put('calib',1.0) ens.member.append(d) print('Running comparable tests on SeismogramEnsemble') enscpy=SeismogramEnsemble(ens) amps=scale(enscpy) print('returned amplitudes for members scaled individually') for i in range(5): print(amps[i]) assert(round(amps[i])==round(200.0*scls[i])) print('Trying section scaling of same data') enscpy=SeismogramEnsemble(ens) amp=scale(enscpy,scale_by_section=True) print('average amplitude=',amp[0]) assert(round(amp[0])==800.0) avgamp=amp[0] for i in range(5): calib=enscpy.member[i].get_double("calib") print('member number ',i,' calib is ',calib) assert(round(calib)==800.0)
def setup_function(function): ts_size = 255 sampling_rate = 20.0 function.dict1 = { "network": "IU", "station": "ANMO", "starttime": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000), "npts": ts_size, "sampling_rate": sampling_rate, "channel": "BHE", "live": True, "_id": bson.objectid.ObjectId(), "jdate": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000), "date_str": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000), "not_defined_date": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000), } function.dict2 = { "network": "IU", "station": "ANMO", "starttime": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000), "npts": ts_size, "sampling_rate": sampling_rate, "channel": "BHN", } function.dict3 = { "network": "IU", "station": "ANMO", "starttime": obspy.UTCDateTime(2019, 12, 31, 23, 59, 59, 915000), "npts": ts_size, "sampling_rate": sampling_rate, "channel": "BHZ", } function.tr1 = obspy.Trace(data=np.random.randint(0, 1000, ts_size), header=function.dict1) function.tr2 = obspy.Trace(data=np.random.randint(0, 1000, ts_size), header=function.dict2) function.tr3 = obspy.Trace(data=np.random.randint(0, 1000, ts_size), header=function.dict3) function.stream = obspy.Stream( traces=[function.tr1, function.tr2, function.tr3]) function.md1 = Metadata() function.md1.put("network", "IU") function.md1.put("npts", ts_size) function.md1.put("sampling_rate", sampling_rate) function.md1.put("live", True) function.ts1 = TimeSeries() function.ts1.data = DoubleVector(np.random.rand(ts_size)) function.ts1.live = True function.ts1.dt = 1 / sampling_rate function.ts1.t0 = 0 function.ts1.npts = ts_size # TODO: need to bind the constructor that can do TimeSeries(md1) function.ts1.put("net", "IU") function.ts1.put("npts", ts_size) function.ts1.put("sampling_rate", sampling_rate) function.seismogram = Seismogram() # TODO: the default of seismogram.tref is UTC which is inconsistent with the default # for TimeSeries() # TODO: It would be nice to have dmatrix support numpy.ndarray as input function.seismogram.data = dmatrix(3, ts_size) for i in range(3): for j in range(ts_size): function.seismogram.data[i, j] = np.random.rand() function.seismogram.live = True function.seismogram.dt = 1 / sampling_rate function.seismogram.t0 = 0 function.seismogram.npts = ts_size # FIXME: if the following key is network, the Seismogram2Stream will error out # when calling TimeSeries2Trace internally due to the issue when mdef.is_defined(k) # returns True but k is an alias, the mdef.type(k) will error out. function.seismogram.put("net", "IU") function.seismogram.put("npts", ts_size) function.seismogram.put("sampling_rate", sampling_rate)
def make_simulation_wavelet(n=100, dt=0.05, t0=-1.0, imp=(20.0, -15.0, 4.0, -1.0), lag=(20, 24, 35, 45), npoles=3, corners=[2.0, 6.0]): dvec = make_impulse_vector(lag, imp, n) fsampling = int(1.0 / dt) sos = signal.butter(npoles, corners, btype='bandpass', output='sos', fs=fsampling) f = signal.sosfilt(sos, dvec) wavelet = TimeSeries(n) wavelet.set_t0(t0) wavelet.set_dt(dt) # This isn't necessary at the moment because relative is the default #wavelet.set_tref(TimeReferenceType.Relative) wavelet.set_npts(n) wavelet.set_live() for i in range(n): wavelet.data[i] = f[i] return wavelet
def get_live_timeseries(): ts = TimeSeries() ts.set_live() ts.dt = 1 / sampling_rate ts.npts = ts_size # ts.put('net', 'IU') ts.put('npts', ts_size) ts.put('sampling_rate', sampling_rate) ts.tref = TimeReferenceType.UTC ts.t0 = datetime.utcnow().timestamp() ts['delta'] = 0.1 ts['calib'] = 0.1 ts['site_id'] = bson.objectid.ObjectId() ts['channel_id'] = bson.objectid.ObjectId() ts['source_id'] = bson.objectid.ObjectId() ts.set_as_origin('test', '0', '0', AtomicType.TIMESERIES) ts.data = DoubleVector(np.random.rand(ts_size)) return ts
def get_sin_timeseries(): ts = TimeSeries() ts.set_live() ts.dt = 1 / sampling_rate ts.npts = ts_size # ts.put('net', 'IU') ts.put("npts", ts_size) ts.put("sampling_rate", sampling_rate) ts.tref = TimeReferenceType.UTC ts.t0 = datetime.utcnow().timestamp() ts["delta"] = 0.1 ts["calib"] = 0.1 ts["site_id"] = bson.objectid.ObjectId() ts["channel_id"] = bson.objectid.ObjectId() ts["source_id"] = bson.objectid.ObjectId() ts.set_as_origin("test", "0", "0", AtomicType.TIMESERIES) curve = np.linspace(0, 2 * np.pi, ts.npts) curve = np.sin(curve) + 0.2 * np.sin(10 * curve) ts.data = DoubleVector(curve) return ts
def read_data(d): di = d.get_string("dir") dfile = d.get_string("dfile") foff = d.get("foff") fname = os.path.join(di, dfile) with open(fname, mode="rb") as fh: fh.seek(foff) float_array = array("d") float_array.frombytes(fh.read(d.get("nofbytes"))) d.data = DoubleVector(float_array) if __name__ == "__main__": s = TimeSeries() s.data = DoubleVector(np.random.rand(255)) s["dir"] = "./" s["dfile"] = "test_op" save_data(s) s2 = TimeSeries() for k in s: s2[k] = s[k] s2.data = DoubleVector([]) print(len(s2.data)) read_data(s2) print(len(s2.data)) assert all(a == b for a, b in zip(s.data, s2.data)) # client = MongoClient('localhost', 27017) # db = client.mspass
def Trace2TimeSeries(trace, history=None): """ Convert an obspy Trace object to a TimeSeries object. An obspy Trace object mostly maps directly into the mspass TimeSeries object with the stats of Trace mapping (almost) directly to the TimeSeries Metadata object that is a base class to TimeSeries. A deep copy of the data vector in the original Trace is made to the result. That copy is done in C++ for speed (we found a 100+ fold speedup using that mechanism instead of a simple python loop) There is one important type collision in copying obspy starttime and endtime stats fields. obspy uses their UTCDateTime object to hold time but TimeSeries only supports an epoch time (UTCDateTime.timestamp) so the code here has to convert from the UTCDateTime to epoch time in the TimeSeries. Note in a TimeSeries starttime is the t0 attribute. The biggest mismatch in Trace and TimeSeries is that Trace has no concept of object level history as used in mspass. That history must be maintained outside obspy. To maintain full history the user must pass the history maintained externally through the optional history parameter. The contents of history will be loaded directly into the result with no sanity checks. :param trace: obspy trace object to convert :type trace: :class:`~obspy.core.trace.Trace` :param history: mspass ProcessingHistory object to post to result. :return: TimeSeries object derived from obpsy input Trace object :rtype: :class:`~mspasspy.ccore.TimeSeries` """ # The obspy trace object stats attribute only acts like a dictionary # we can't use it directly but this trick simplifies the copy to # mesh with py::dict for pybind11 - needed in TimeSeries constructor below h = dict(trace.stats) # These tests are excessively paranoid since starttime and endtime # are required attributes in Trace, but better save in case # someone creates one outside obspy if Keywords.starttime in trace.stats: t = h[Keywords.starttime] h[Keywords.starttime] = t.timestamp else: # We have to set this to something if it isn't set or # the TimeSeries constructor may abort h[Keywords.starttime] = 0.0 # we don't require endtime in TimeSeries so ignore if it is not set if "endtime" in trace.stats: t = h["endtime"] h["endtime"] = t.timestamp # # these define a map of aliases to apply when we convert to mspass # metadata from trace - we redefined these names but others could # surface as obspy evolves independently from mspass mspass_aliases = dict() mspass_aliases["station"] = Keywords.sta mspass_aliases["network"] = Keywords.net mspass_aliases["location"] = Keywords.loc mspass_aliases["channel"] = Keywords.chan for k in mspass_aliases: if k in h: x = h.pop(k) alias_key = mspass_aliases[k] h[alias_key] = x dout = TimeSeries(h, trace.data) if history != None: dout.load_history(history) dout.set_live() # The following dead_mspass attribute is used by our decorator API # to determine whether an object was dead before the conversion. try: if trace.dead_mspass: dout.live = False except AttributeError: pass return dout
def Seismogram2Stream(sg, chanmap=["E", "N", "Z"], hang=[90.0, 0.0, 0.0], vang=[90.0, 90.0, 0.0]): # fixme hang and vang parameters """ Convert a mspass::Seismogram object to an obspy::Stream with 3 components split apart. mspass and obspy have completely incompatible approaches to handling three component data. obspy uses a Stream object that is a wrapper around and a list of Trace objects. mspass stores 3C data bundled into a matrix container. This function takes the matrix container apart and produces the three Trace objects obspy want to define 3C data. The caller is responsible for how they handle bundling the output. A very dark side of this function is any error log entries in the part mspass Seismogram object will be lost in this conversion as obspy does not implement that concept. If you need to save the error log you will need to save the input of this function to MongoDB to preserve the errorlog it may contain. :param sg: is the Seismogram object to be converted :type sg: :class:`~mspasspy.ccore.Seismogram` :param chanmap: 3 element list of channel names to be assigned components :type chanmap: list :param hang: 3 element list of horizontal angle attributes (azimuth in degrees) to be set in Stats array of output for each component. (default is for cardinal directions) :type hang: list :param vang: 3 element list of vertical angle (theta of spherical coordinates) to be set in Stats array of output for each component. (default is for cardinal directions) :type vang: list :return: obspy Stream object containing a list of 3 Trace objects in mspass component order. Presently the data are ALWAYS returned to cardinal directions (see above). It will be empty if sg was marked dead :rtype: :class:`obspy.core.stream.Stream` """ dresult = obspy.core.Stream() dresult.dead_mspass = True # Note this logic will silently return an empty Stream object if the # data are marked dead if sg.live: dresult.dead_mspass = False uuids = sg.id() logstuff = sg.elog for i in range(3): ts = ExtractComponent(sg, i) ts.put_string(Keywords.chan, chanmap[i]) ts.put_double(Keywords.channel_hang, hang[i]) ts.put_double(Keywords.channel_vang, vang[i]) # ts is a CoreTimeSeries but we need to add a few things to # make it mesh with TimeSeries2Trace tsex = TimeSeries(ts, uuids) tsex.elog = logstuff dobspy = TimeSeries2Trace(tsex) dresult.append(dobspy) else: for i in range(3): tc = obspy.core.Trace() tc.dead_mspass = True dresult.append(tc) return dresult
def arrival_snr_QC( data_object, noise_window=TimeWindow(-130.0, -5.0), noise_spectrum_engine=None, signal_window=TimeWindow(-5.0, 120.0), signal_spectrum_engine=None, band_cutoff_snr=2.0, # check these are reasonable - don't remember the formula when writing this tbp=5.0, ntapers=10, high_frequency_search_start=5.0, poles=3, perc=95.0, phase_name="P", metadata_key="Parrival", optional_metrics=[ "snr_stats", "filtered_envelope", "filtered_L2", "filtered_Linf", "filtered_MAD", "filtered_perc", ], save_spectra=False, db=None, collection="arrival", use_measured_arrival_time=False, measured_arrival_time_key="Ptime", taup_model=None, update_mode=False, component=2, source_collection="source", receiver_collection=None, ): """ Compute a series of metrics that can be used for quality control filtering of seismic phase data. This is the highest level function in this module for computing signal-to-noise ratio metrics for processing signals that can be defined by a computable or measurable "phase". Features this function adds over lower level functions in this module are: 1. An option to save computed metrics to a MongoDB collection (defaults as "arrival"). If the update_mode argument is set True (default is False) the function expects the data_object to contain the attribute "arrival_id" that references the ObjectID of an existing entry in the the collection where the data this function computes is to be saved (default is"arrival"). 2. Adds an option to use a computed or measured arrival as the time reference for all windowing. The lower level snr functions in this module require the user do what this function does prior to calling the function. Note one or the other is required (i.e. either computed or measured time will be define t0 of the processing) The input of arg 0 (data_object) can be either a TimeSeries or a Seismogram object. If a Seismogram object is passed the "component" argument is used to extract the specified single channel from the Seismogram object and than component is used for processing. That is necessary because all the algorithms used are single channel algorithms. To use this function on all components use a loop over components BUT make sure you use a unique value for the argument "metadata_key" for each component. Note this will also produce multiple documents per input datum. The type of the data_object also has a more subtle implication the user must be aware of. That is, in the MsPASS schema we store receiver coordinates in one of two different collections: "channel" for TimeSeries data and "site" for Seismogram data. When such data are loaded the generic keys like lat are always converted to names like channel_lat or site_lat for TimeSeries and Seismogram data respectively. This function uses the data type to set that naming. i.e. if the input is TimeSeries it tries to fetch the latitude data as channel_lat while if it the input is a Seismogram it tries to fetch site_lat. That is true of all coordinate data loaded by normalization from a source and receiver collection. The following args are passed directly to the function arrival_snr: noise_window, signal_window, band_cutoff_snr, tbp, ntapers, poles, perc, phase_name, metadata_key, and optional_metrics. See the docstring for arrival_snr and FD_snr_estimator for descriptions of how these arguments should be used. This top level function adds arguments decribed below. :param db: mspass Database object that is used as a handle for to MongoDB. Default is None, which the function takes to mean you don't want to save the computed values to MongoDB. In this mode the computed metrics will all be posted to a python dict that can be found under the key defined by the "metadata_key" argument. When db is defined the contents of that same python dict will save to MongoDB is the collection defined by the "collection" argument. If db is run as the default None the user is responsible for saving and managing the computed snr data. Be aware a simple later call to db.save_data will not produce the same normalized data with the (default) arrival collection. :param collection: MongoDB collection name where the results of this function will be saved. If the "update_mode" argument is also set True the update section will reference this collection. Default is "arrival". :param use_measured_arrival_time: boolean defining the method used to define the time reference for windowing used for snr calculations. When True the function will attempt to fetch a phase arrival time with the key defined by the "measured_arrival_time_key" argument. In that mode if the fetch fails the data_object will be killed and an error posted to elog. That somewhat brutal choice was intentional as the expectation is if you want to use measured arrival times you don't want data where there are no picks. The default is True to make the defaults consistent. The reason is that the tau-p calculator handle is passed to the function when using model-based travel times. There is no way to default that so it defaults to None. :param measured_arrival_time_key: is the key used to fetch a measured arrival time. This parameter is ignored if use_measured_arrival_time is False. :param taup_model: when use_measured_arrival_time is False this argument is required. It defaults as None because there is now way the author knows to initialize it to anything valid. If set it MUST be an instance of the obspy class TauPyModel (https://docs.obspy.org/packages/autogen/obspy.taup.tau.TauPyModel.html#obspy.taup.tau.TauPyModel) Mistakes in use of this argument can cause a MsPASSError exception to be thrown (not logged thrown as a fatal error) in one of two ways: (1) If use_measured_arrival_time is False this argument must be defined, and (2) if it is defined it MUST be an instance of TauPyModel. :param update_mode: When True the function will attempt to extract a MongoDB ObjectID from data_object's Metadata using the (currently fixed) key "arrival_id". If found it will add the computed data to an existing document in the collection defined by the collection argument. Otherwise it will simply add a new entry and post the ObjectID of the new document with the (same fixed) key arrival_id. When False no attempt to fetch the arrival id is made and we simply add a record. This parameter is completely ignored unless the db argument defines a valid Database class. :param component: integer (0, 1, or 2) defining which component of a Seismogram object to use to compute the requested snr metrics. This parameter is ignored if the input is a TimeSeries. :param source_collection: normalization collection for source data. The default is the MsPASS name "source" which means the function will try to load the source hypocenter coordinates (when required) as source_lat, source_lon, source_depth, and source_time. :param receiver_collection: when set this name will override the automatic setting of the expected normalization collection naming for receiver functions (see above). The default is None which causes the automatic switching to be involked. If it is any other string the automatic naming will be overridden. :return: the data_object modified by insertion of the snr QC data in the object's Metadata """ if data_object.dead(): return data_object if isinstance(data_object, TimeSeries): # We need to make a copy of a TimeSeries object to assure the only # thing we change is the Metadata we add to the return data_to_process = TimeSeries(data_object) if receiver_collection: rcol = receiver_collection else: rcol = "channel" elif isinstance(data_object, Seismogram): if component < 0 or component > 2: raise MsPASSError( "arrival_snr_QC: usage error. " + "component parameter passed with illegal value={n}\n".format( n=component) + "Must be 0, 1, or 2", ErrorSeverity.Fatal, ) data_to_process = ExtractComponent(data_object, component) if receiver_collection: rcol = receiver_collection else: rcol = "site" else: raise MsPASSError( "arrival_snr_QC: received invalid input data\n" + "Input must be either TimeSeries or a Seismogram object", ErrorSeverity.Fatal, ) if use_measured_arrival_time: arrival_time = data_object[measured_arrival_time_key] else: # This test is essential or python will throw a more obscure, # generic exception if taup_model is None: raise MsPASSError( "arrival_snr_QC: usage error. " + "taup_model parameter is set None but use_measured_arrival_time is False\n" + "This gives no way to define processing windows. See docstring", ErrorSeverity.Fatal, ) source_lat = data_object[source_collection + "_lat"] source_lon = data_object[source_collection + "_lon"] source_depth = data_object[source_collection + "_depth"] source_time = data_object[source_collection + "_time"] receiver_lat = data_object[rcol + "_lat"] receiver_lon = data_object[rcol + "_lon"] delta = locations2degrees(source_lat, source_lon, receiver_lat, receiver_lon) arrival = taup_model.get_travel_times( source_depth_in_km=source_depth, distance_in_degree=delta, phase_list=[phase_name], ) arrival_time = source_time + arrival[0].time taup_arrival_phase = arrival[0].phase.name # not sure if this will happen but worth trapping it as a warning if # it does if phase_name != taup_arrival_phase: data_object.elog.log_error( "arrival_snr_QC", "Requested phase name=" + phase_name + " does not match phase name tag returned by obpsy taup calculator=" + taup_arrival_phase, "Complaint", ) if data_to_process.time_is_UTC(): data_to_process.ator(arrival_time) [snrdata, elog] = FD_snr_estimator( data_to_process, noise_window, noise_spectrum_engine, signal_window, signal_spectrum_engine, band_cutoff_snr, tbp, ntapers, high_frequency_search_start, poles, perc, optional_metrics, save_spectra=save_spectra, ) if elog.size() > 0: data_object.elog += elog snrdata["phase"] = phase_name snrdata["snr_arrival_time"] = arrival_time snrdata["snr_signal_window_start"] = arrival_time + signal_window.start snrdata["snr_signal_window_end"] = arrival_time + signal_window.end snrdata["snr_noise_window_start"] = arrival_time + noise_window.start snrdata["snr_noise_window_end"] = arrival_time + noise_window.end # These cross-referencing keys may not always be defined when a phase # time is based on a pick so we add these cautiously scol_id_key = source_collection + "_id" rcol_id_key = rcol + "_id" if data_object.is_defined(scol_id_key): snrdata[scol_id_key] = data_object[scol_id_key] if data_object.is_defined(rcol_id_key): snrdata[rcol_id_key] = data_object[rcol_id_key] # Note we add this result to data_object NOT data_to_process because that # is not always the same thing - for a TimeSeries input it is a copy of # the original but it may have been altered while for a Seismogram it is # an extracted component data_object[metadata_key] = snrdata if db: arrival_id_key = collection + "_id" dbcol = db[collection] if update_mode: if data_object.is_defined(arrival_id_key): arrival_id = data_object[arrival_id_key] filt = {"_id": arrival_id} update_clause = {"$set": snrdata} dbcol.update_one(filt, update_clause) else: data_object.elog.log_error( "arrival_snr_QC", "Running in update mode but arrival id key=" + arrival_id_key + " is not defined\n" + "Inserting computed snr data as a new document in collection=" + collection, "Complaint", ) arrival_id = dbcol.insert_one(snrdata).inserted_id data_object[arrival_id_key] = arrival_id else: arrival_id = dbcol.insert_one(snrdata).inserted_id data_object[arrival_id_key] = arrival_id return data_object
def FD_snr_estimator( data_object, noise_window=TimeWindow(-130.0, -5.0), noise_spectrum_engine=None, signal_window=TimeWindow(-5.0, 120.0), signal_spectrum_engine=None, band_cutoff_snr=2.0, # check these are reasonable - don't remember the formula when writing this tbp=2.5, ntapers=4, high_frequency_search_start=5.0, poles=3, perc=95.0, optional_metrics=None, save_spectra=False, ): # optional_metrics=['snr_stats','filtered_envelope','filtered_L2','filtered_Linf','filtered_MAD','filtered_perc']): """ Estimates one or more metrics of signal-to-noise from a TimeSeries object. An implicit assumption is that the analysis is centered on a timeable "phase" like P, PP, etc. This is a python function that can be used to compute one or several signal-to-noise ratio estimates based on an estimated bandwidth using the C++ function EstimateBandwidth. The function has a fair number of options, but the core metrics computed are the bandwidth estimates computed by that function. It uses a fairly simple search algorithm that functions well for most earthquake sources. For the low end the algorithm searches from the first frequency indistinguishable from DC to find the lowest frequency for which the snr exceeds a threshold specified by the input parameter 'band_cutoff_snr'. It does a similar search from the high end from a point 80% of Nyquist - a good choice for all modern digital data that use FIR antialias filters. Both searches are not just defined with just the first frequency to satisfy the snr threshold criteria. Only when a group of frequencies more than 2 times the time-bandwidth product exceed the threshold is the band edge defined. The actual band edge is then defined as the first frequency exceeding the threshold. That more elaborate algorithm was used to prevent pure lines in either the signal or noise spectrum from corrupting the estimates. A set of optional metrics can be computed. All optional metrics use the bandwidth estimates in one way or another. Optional metrics are defined by the following keywords passed through a list (actually any iterable container will work) of strings defining one or more of the keywords. The metrics and a brief description of each follow: *snr_stats* computes what are commonly plotted in box plots for the snr estimates within the estimated bandwidth: minimum, maximum, 0.25 (1/4) point, 0.75 (3/4) point, and the median. These are set with following dict keys: 'snr_band_maximum','snr_band_minimum', 'snr_band_1/4', 'srn_band_3/4', and 'snr_band_median' respectively. *filtered_envelope*, *filtered_L2*, *filtered_Linf*, *filtered_perc*, and *filtered_MAD*: All of these optional metrics first copy the data_object and then filter the copy with a Butterworth bandpass filter with the number of poles specified by the npoles argument and corners at the estimated band edge by the EstimateBandwidth function. The metrics computed are time domain snr estimates computed with he filtered data. They are actually computed from functions in this same module that can be used independently and have their own docstring description. The functions called have the following names in order of the keyword list above: *snr_envelope*, *snr_L2*, *snr_Linv*, and *snr_MAD*. When the computed they are set in the output dictionary with the following (again in order) keys: 'snr_envelope','snr_L2', 'srn_Linf', and 'snr_MAD'. :param data_object: TimeSeries object to be processed. For Seismogram objects the assumption is algorithm would be used for a single component (e.g longitudinal or vertical for a P phase) :param noise_window: defines the time window to use for computing the spectrum considered noise. The time span can be either relative or UTC (absolute) time but we do not check for consistency. This low level function assumes they are consistent. If not, the calculations are nearly guaranteed to fail. Type must be mspasspy.ccore.TimeWindow. :param signal_window: defines the time window to use that defines what you consider "the signal". The time span can be either relative or UTC (absolute) time but we do not check for consistency. This low level function assumes they are consistent. If not, the calculations are nearly guaranteed to fail. Type must be mspasspy.ccore.TimeWindow. :param noise_spectrum_engine: is expected to either by a None type or an instance of a ccore object called an MTPowerSpectralEngine. When None an instance of MTPowerSpectralEngine is computed for each call to this function. That is a convenience for small jobs or when called with data from mixed sample rates and/or variable length time windows. It is very inefficient to use the default approach for processing large data sets and really for any use in a map operation with dask or spark. Normal use should be for the user to predefine an MtPowerSpectralEngine from the expected window size for a given data sample rate and include it in the function call. :param signal_spectrum_engine: is the comparable MTPowerSpectralEngine to use to compute the signal power spectrum. Default is None with the same caveat as above for the noise_spectrum_engine. :param tbp: time-bandwidth product to use for computing the set of Slepian functions used for the multitaper estimator. This parameter is used only if the noise_spectrum_engine or signal_spectrum_engine arguments are set as None. The default is 2.5 :param ntapers: is the number of Slepian functions (tapers) to compute for the multitaper estimators. Like tbp it is referenced only if noise_spectrum_engine or signal_spectrum_engine are set to None. Note the function will throw an exception if the ntaper parameter is not consistent with the time-bandwidth product. That is, the maximum number of tapers is round(2*tbp-1). Default is 4 which is consistent with default tbp=2.5 :param high_frequency_search_start: Used to specify the upper frequency used to start the search for the upper end of the bandwidth by the function EstimateBandwidth. Default is 4.0 which reasonable for teleseismic P wave data. Should be change for usage other than analysis of teleseimic P phases or you the bandwidth may be grossly underestimated. :param npoles: defines number of poles to us for the Butterworth bandpass applied for the "filtered" metrics (see above). Default is 3. :param perc: used only if 'filtered_perc' is in the optional metrics list. Specifies the perc parameter as used in seismic unix. Uses the percentage point specified of the sorted abs of all amplitudes. (Not perc=50.0 is identical to MAD) Default is 95.0 which is 2 sigma for Gaussian noise. :param optional_metrics: is an iterable container containing one or more of the optional snr metrics discussed above. :param store_as_subdocument: This parameter is included for flexibility but should not normally be changed by the user. As noted earlier the outputs of this function are best abstracted as Metadata. When this parameter is False the Metadata members are all posted with directly to data_object's Metadata container. If set True the internally generated python dict is copied and stored with a key defined through the subdocument_key argument. See use below in function arrival_snr. :param subdocument_key: key for storing results as a subdocument. This parameter is ignored unless store_as_subdocument is True. Default is "snr_data" :param save_spectra: If set True (default is False) the function will pickle the computed noise and signal spectra and save the strings created along with a set of related metadata defining the time range to the output python dict (these will be saved in MongoDB when db is defined - see below). This option should ONLY be used for spot checking, discovery of why an snr metric has unexpected results using graphics, or a research topic where the spectra would be of interest. It is a very bad idea to turn this option on if you are processing a large quantity of data and saving the results to MongoDB as it will bloat the arrival collection. Consider a different strategy if that essential for your work. :return: python tuple with two components. 0 is a python dict with the computed metrics associated with keys defined above. 1 is a mspass.ccore.ErrorLogger object. Any errors in computng any of the metrics will be posted to this logger. Users should then test this object using it's size() method and if it the log is not empty (size >0) the caller should handle that condition. For normal use that means pushing any messages the log contains to the original data object's error log. """ algname = "FN_snr_estimator" my_logger = ErrorLogger() # For this algorithm we dogmatically demand the input be a TimeSeries if not isinstance(data_object, TimeSeries): raise MsPASSError( "FD_snr_estimator: Received invalid data object - arg0 data must be a TimeSeries", ErrorSeverity.Invalid, ) # MTPowerSpectrum at the moment has an issue with how it handles # a user error in specifying time-band product and number of tapers. # We put in an explicit trap here and abort if the user makes a mistake # to avoid a huge spray of error message if ntapers > round(2 * tbp): message = ( algname + "(Fatal Error): ntapers={ntapers} inconsistent with tbp={tbp}\n". format(ntapers=ntapers, tbp=tbp)) message += "ntapers must be >= round(2*tbp)" raise MsPASSError(message, ErrorSeverity.Fatal) if data_object.dead(): my_logger.log_error( algname, "Datum received was set dead - cannot compute anything", ErrorSeverity.Invalid, ) return [dict(), my_logger] # We enclose all the main code here in a try block and cat any MsPASSErrors # they will be posted as log message. Others will not be handled # intentionally letting python's error mechanism handle them as # unexpected exceptions - MsPASSError can be anticipated for data problems snrdata = dict() try: # First extract the required windows and compute the power spectra n = WindowData(data_object, noise_window.start, noise_window.end) s = WindowData(data_object, signal_window.start, signal_window.end) if noise_spectrum_engine: nengine = noise_spectrum_engine else: nengine = MTPowerSpectrumEngine(n.npts, tbp, ntapers) if signal_spectrum_engine: sengine = signal_spectrum_engine else: sengine = MTPowerSpectrumEngine(n.npts, tbp, ntapers) N = nengine.apply(n) S = sengine.apply(s) bwd = EstimateBandwidth(S.df, S, N, band_cutoff_snr, tbp, high_frequency_search_start) # These estimates are always computed and posted snrdata["low_f_band_edge"] = bwd.low_edge_f snrdata["high_f_band_edge"] = bwd.high_edge_f snrdata["low_f_band_edge_snr"] = bwd.low_edge_snr snrdata["high_f_band_edge_snr"] = bwd.high_edge_snr snrdata["spectrum_frequency_range"] = bwd.f_range snrdata["bandwidth_fraction"] = bwd.bandwidth_fraction() snrdata["bandwidth"] = bwd.bandwidth() if save_spectra: snrdata["signal_spectrum"] = pickle.dumps(S) snrdata["noise_spectrum"] = pickle.dumps(N) snrdata["signal_window_start_time"] = signal_window.start snrdata["signal_window_end_time"] = signal_window.end snrdata["noise_window_start_time"] = noise_window.start snrdata["noise_window_end_time"] = noise_window.end except MsPASSError as err: newmessage = _reformat_mspass_error( err, "Spectrum calculation and EstimateBandwidth function section failed with the following message\n", "No SNR metrics can be computed for this datum", ) my_logger.log_error(algname, newmessage, ErrorSeverity.Invalid) return [snrdata, my_logger] # For current implementation all the optional metrics require # computed a filtered version of the data. If a new option is # desired that does not require filtering the data the logic # here will need to be changed to create a more exclusive test if len(optional_metrics) > 0: # use the mspass butterworth filter for speed - obspy # version requires a conversion to Trace objects BWfilt = Butterworth( False, True, True, poles, bwd.low_edge_f, poles, bwd.high_edge_f, data_object.dt, ) filtered_data = TimeSeries(data_object) BWfilt.apply(filtered_data) nfilt = WindowData(filtered_data, noise_window.start, noise_window.end) sfilt = WindowData(filtered_data, signal_window.start, signal_window.end) # In this implementation we don't need this any longer so we # delete it here. If options are added beware del filtered_data # Some minor efficiency would be possible if we avoided # duplication of computations when multiple optional metrics # are requested, but the fragility that adds to maintenance # is not justified for metric in optional_metrics: if metric == "snr_stats": try: stats = BandwidthStatistics(S, N, bwd) # stats is a Metadata container - copy to snrdata for k in stats.keys(): snrdata[k] = stats[k] except MsPASSError as err: newmessage = _reformat_mspass_error( err, "BandwithStatistics throw the following error\n", "Five snr_stats attributes were not computed", ) my_logger.log_error(algname, newmessage, err.severity) if metric == "filtered_envelope": try: analytic_nfilt = hilbert(nfilt.data) analytic_sfilt = hilbert(sfilt.data) nampvector = np.abs(analytic_nfilt) sampvector = np.abs(analytic_sfilt) namp = np.median(nampvector) samp = np.max(sampvector) snrdata[ "snr_envelope_Linf_over_L1"] = _safe_snr_calculation( samp, namp) except: my_logger.log_erro( algname, "Error computing filtered_envelope metrics: snr_envelope_Linf_over_L1 not computed", ErrorSeverity.Complaint, ) if metric == "filtered_L2": try: namp = RMSAmplitude(nfilt) samp = RMSAmplitude(sfilt) snrvalue = _safe_snr_calculation(samp, namp) snrdata["snr_L2"] = snrvalue except MsPASSError as err: newmessage = _reformat_mspass_error( err, "Error computing filtered_L2 metric", "snr_L2 attribute was not compouted", ) my_logger.log_error(algname, newmessage, err.severity) if metric == "filtered_MAD": try: namp = MADAmplitude(nfilt) samp = MADAmplitude(sfilt) snrvalue = _safe_snr_calculation(samp, namp) snrdata["snr_MAD"] = snrvalue except MsPASSError as err: newmessage = _reformat_mspass_error( err, "Error computing filtered_MAD metric", "snr_MAD attribute was not computed", ) my_logger.log_error(algname, newmessage, err.severity) if metric == "filtered_Linf": try: # the C function expects a fraction - for users a percentage # is clearer namp = PercAmplitude(nfilt, perc / 100.0) samp = PeakAmplitude(sfilt) snrvalue = _safe_snr_calculation(samp, namp) snrdata["snr_Linf"] = snrvalue snrdata["snr_perc"] = perc except MsPASSError as err: newmessage = _reformat_mspass_error( err, "Error computing filtered_Linf metric", "snr_Linf attribute was not computed", ) my_logger.log_error(algname, newmessage, err.severity) if metric == "filtered_perc": try: namp = MADAmplitude(nfilt) samp = PercAmplitude(sfilt, perc / 100.0) snrvalue = _safe_snr_calculation(samp, namp) snrdata["snr_perc"] = snrvalue snrdata[ "snr_perc"] = perc # redundant if filter_Linf is also run but tiny cost except MsPASSError as err: newmessage = _reformat_mspass_error( err, "Error computing filtered_perc metric", "snr_perf metric was not computed", ) my_logger.log_error(algname, newmessage, err.severity) else: message = "Illegal optional_metrics keyword=" + metric + "\n" message += ( "If that is a typo expect some metrics will be missing from output" ) my_logger.log_error(algname, message, ErrorSeverity.Complaint) return [snrdata, my_logger]