def test_reduce_stack(): seis1 = get_live_seismogram() seis2 = get_live_seismogram() seis_cp = np.array(seis1.data) stack(seis1, seis2) res = np.add(np.array(seis_cp), np.array(seis2.data)) for i in range(3): assert np.isclose(seis1.data[i], res[i]).all() # fixme ts1 = get_live_timeseries() ts2 = get_live_timeseries() ts1_cp = np.array(ts1.data) stack(ts1, ts2) assert np.isclose(ts1.data, (np.array(ts1_cp) + np.array(ts2.data))).all() tse1 = get_live_timeseries_ensemble(2) tse2 = get_live_timeseries_ensemble(2) tse1_cp = TimeSeriesEnsemble(tse1) stack(tse1, tse2) for i in range(2): assert np.isclose( tse1.member[i].data, np.add(np.array(tse1_cp.member[i].data), np.array(tse2.member[i].data)), ).all() seis_e1 = get_live_seismogram_ensemble(2) seis_e2 = get_live_seismogram_ensemble(2) seis_e1_cp = SeismogramEnsemble(seis_e1) stack(seis_e1, seis_e2) for i in range(2): res = np.add(np.array(seis_e1_cp.member[i].data), np.array(seis_e2.member[i].data)) for j in range(3): assert np.isclose(seis_e1.member[i].data[j], res[j]).all() # fixme
def test_detrend(): ts = get_live_timeseries() seis = get_live_seismogram() tse = get_live_timeseries_ensemble(3) seis_e = get_live_seismogram_ensemble(3) detrend(ts, object_history=True, alg_id="0") detrend(seis, object_history=True, alg_id="0") detrend(tse, object_history=True, alg_id="0") detrend(seis_e, object_history=True, alg_id="0") detrend(ts, type="linear", object_history=True, alg_id="0") detrend(ts, type="constant", object_history=True, alg_id="0") detrend(ts, type="polynomial", order=2, object_history=True, alg_id="0") detrend(ts, type="spline", order=2, dspline=1000, object_history=True, alg_id="0") # functionality verification testing ts = get_live_timeseries() tr = obspy.Trace() tr.data = np.array(ts.data) copy = np.array(ts.data) tr.stats.sampling_rate = 20 tr.detrend(type="simple") detrend(ts, "simple", object_history=True, alg_id="0") assert all(abs(a - b) < 0.001 for a, b in zip(ts.data, tr.data)) assert not all(abs(a - b) < 0.001 for a, b in zip(ts.data, copy))
def test_correlate_template(): ts1 = get_live_timeseries() ts2 = get_live_timeseries() tr1 = ts1.toTrace() tr2 = ts2.toTrace() res1 = correlate_template(ts1, ts2, object_history=True, alg_id="0") res2 = obspy.signal.cross_correlation.correlate_template(tr1, tr2) assert all(abs(a - b) < 0.001 for a, b in zip(res1, res2))
def test_correlate(): ts1 = get_live_timeseries() ts2 = get_live_timeseries() tr1 = ts1.toTrace() tr2 = ts2.toTrace() res1 = correlate(ts1, ts2, 2, preserve_history=True, instance='0') res2 = obspy.signal.cross_correlation.correlate(tr1, tr2, 2) assert all(abs(a - b) < 0.001 for a, b in zip(res1, res2))
def test_timeseries_as_trace(): ts = get_live_timeseries() ts2 = get_live_timeseries() cp = np.array(ts.data) cp2 = np.array(ts2.data) dummy_func_timeseries_as_trace(ts, ts2) assert len(cp) != len(ts.data) assert len(cp2) != len(ts2.data) np.isclose([0, 1, 2], ts.data).all() np.isclose([2, 3, 4], ts2.data).all() assert ts["chan"] == "Z"
def test_filter(): ts = get_live_timeseries() seis = get_live_seismogram() tse = get_live_timeseries_ensemble(3) seis_e = get_live_seismogram_ensemble(3) filter(ts, "bandpass", freqmin=1, freqmax=5, object_history=True, alg_id="0") filter(seis, "bandpass", freqmin=1, freqmax=5, object_history=True, alg_id="0") filter(tse, "bandpass", freqmin=1, freqmax=5, object_history=True, alg_id="0") filter(seis_e, "bandpass", freqmin=1, freqmax=5, object_history=True, alg_id="0") filter(ts, "bandstop", freqmin=1, freqmax=5) filter(ts, "lowpass", freq=1) filter(ts, "highpass", freq=1) filter(ts, "lowpass_cheby_2", freq=1) # fixme fix testing warning # filter(ts, "lowpass_fir", freq=10) these two types are not supported # filter(ts, "remez_fir", freqmin=10, freqmax=20) # functionality verification testing ts = get_live_timeseries() tr = obspy.Trace() tr.data = np.array(ts.data) copy = np.array(ts.data) tr.stats.sampling_rate = 20 tr.filter("bandpass", freqmin=1, freqmax=5) filter(ts, "bandpass", freqmin=1, freqmax=5, object_history=True, alg_id="0") assert all(abs(a - b) < 0.001 for a, b in zip(ts.data, tr.data)) assert not all(abs(a - b) < 0.001 for a, b in zip(ts.data, copy))
def test_copy_helpers(): ts1 = get_live_timeseries() assert ts1.dt != 1 / 255 ts2 = get_live_timeseries() ts2.dt = 1 / 255 timeseries_copy_helper(ts1, ts2) assert ts1.dt == 1 / 255 seis1 = get_live_seismogram() assert seis1.dt != 1 / 255 seis2 = get_live_seismogram() seis2.dt = 1 / 255 seismogram_copy_helper(seis1, seis2) assert seis1.dt == 1 / 255
def test_topMute(): t0 = 4 t1 = 14 lmute = _TopMute(t0, t1, "linear") assert math.isclose(t0, lmute.get_t0()) assert math.isclose(t1, lmute.get_t1()) assert "linear" == lmute.taper_type() ldata = pickle.dumps(lmute) lcopy = pickle.loads(ldata) assert math.isclose(lcopy.get_t0(), lmute.get_t0()) assert math.isclose(lcopy.get_t1(), lmute.get_t1()) assert "linear" == lcopy.taper_type() cmute = _TopMute(t0, t1, "cosine") assert math.isclose(t0, cmute.get_t0()) assert math.isclose(t1, cmute.get_t1()) assert "cosine" == cmute.taper_type() cdata = pickle.dumps(cmute) ccopy = pickle.loads(cdata) assert math.isclose(ccopy.get_t0(), cmute.get_t0()) assert math.isclose(ccopy.get_t1(), cmute.get_t1()) assert "cosine" == ccopy.taper_type() ts_l = get_live_timeseries() ts_l.t0 = 0 ts_l.dt = 1 ts_l.npts = 200 ts_l.data += 1 lmute.apply(ts_l) assert ts_l.data[4] == 0 assert ts_l.data[9] == 0.5 assert ts_l.data[14] == 1 ts_c = get_live_timeseries() ts_c.t0 = 0 ts_c.dt = 1 ts_c.npts = 200 ts_c.data += 1 cmute.apply(ts_c) assert ts_c.data[4] == 0 assert ts_c.data[9] == 0.5 assert ts_c.data[14] == 1
def test_info_new_map(): # Seismogram and TimeSeries seis = get_live_seismogram() assert seis.number_of_stages() == 0 logging_helper.info(seis, 'dummy_func', '1') assert seis.number_of_stages() == 1 ts = get_live_timeseries() assert ts.number_of_stages() == 0 logging_helper.info(ts, 'dummy_func', '1') assert ts.number_of_stages() == 1 # ensemble seis_e = get_live_seismogram_ensemble(3) logging_helper.info(seis_e, 'dummy_func', '0') for i in range(3): assert seis_e.member[i].number_of_stages() == 1 seis_e = get_live_seismogram_ensemble(3) logging_helper.info(seis_e, 'dummy_func', '0', 0) assert seis_e.member[0].number_of_stages() == 1 tse = get_live_timeseries_ensemble(3) logging_helper.info(tse, 'dummy_func', '0', 0) assert tse.member[0].number_of_stages() == 1
def setup_class(self): client = Client('localhost') self.db = Database(client, 'test_dbclean') self.test_ts = get_live_timeseries() site_id = ObjectId() channel_id = ObjectId() source_id = ObjectId() self.db['site'].insert_one({ '_id': site_id, 'net': 'net', 'sta': 'sta', 'loc': 'loc', 'lat': 1.0, 'lon': 1.0, 'elev': 2.0, 'starttime': datetime.utcnow().timestamp(), 'endtime': datetime.utcnow().timestamp() }) self.db['channel'].insert_one({ '_id': channel_id, 'net': 'net1', 'sta': 'sta1', 'loc': 'loc1', 'chan': 'chan', 'lat': 1.1, 'lon': 1.1, 'elev': 2.1, 'starttime': datetime.utcnow().timestamp(), 'endtime': datetime.utcnow().timestamp(), 'edepth': 3.0, 'vang': 1.0, 'hang': 1.0 }) self.db['source'].insert_one({ '_id': source_id, 'lat': 1.2, 'lon': 1.2, 'time': datetime.utcnow().timestamp(), 'depth': 3.1, 'magnitude': 1.0 }) self.test_ts['site_id'] = site_id self.test_ts['source_id'] = source_id self.test_ts['channel_id'] = channel_id
def setup_class(self): client = DBClient("localhost") self.db = Database(client, "test_dbclean") self.test_ts = get_live_timeseries() site_id = ObjectId() channel_id = ObjectId() source_id = ObjectId() self.db["site"].insert_one({ "_id": site_id, "net": "net", "sta": "sta", "loc": "loc", "lat": 1.0, "lon": 1.0, "elev": 2.0, "starttime": datetime.utcnow().timestamp(), "endtime": datetime.utcnow().timestamp(), }) self.db["channel"].insert_one({ "_id": channel_id, "net": "net1", "sta": "sta1", "loc": "loc1", "chan": "chan", "lat": 1.1, "lon": 1.1, "elev": 2.1, "starttime": datetime.utcnow().timestamp(), "endtime": datetime.utcnow().timestamp(), "edepth": 3.0, "vang": 1.0, "hang": 1.0, }) self.db["source"].insert_one({ "_id": source_id, "lat": 1.2, "lon": 1.2, "time": datetime.utcnow().timestamp(), "depth": 3.1, "magnitude": 1.0, }) self.test_ts["site_id"] = site_id self.test_ts["source_id"] = source_id self.test_ts["channel_id"] = channel_id
def test_all_decorators(): # test mspass_func_wrapper with pytest.raises(TypeError) as err: dummy_func_2(1) assert (str(err.value) == "mspass_func_wrapper only accepts mspass object as data input") with pytest.raises(ValueError) as err: seis = get_live_seismogram() dummy_func_2(seis, object_history=True) assert (str(err.value) == "dummy_func_2: object_history was true but alg_id not defined") assert "OK" == dummy_func_2(seis, dryrun=True) assert seis.number_of_stages() == 0 dummy_func_2(seis, object_history=True, alg_id="0") assert seis.number_of_stages() == 1 # test timeseries_as_trace ts = get_live_timeseries() cp = np.array(ts.data) dummy_func_2(ts, object_history=True, alg_id="0") assert len(cp) != len(ts.data) np.isclose([0, 1, 2], ts.data).all() assert ts.number_of_stages() == 1 # test seismogram_as_stream seis1 = get_live_seismogram() cp1 = np.array(seis1.data[0]) dummy_func_2(seis1, object_history=True, alg_id="0") assert cp1[0] != seis1.data[0, 0] assert seis1.data[0, 0] == -1 assert seis1.number_of_stages() == 1 # test timeseries_ensemble_as_stream tse = get_live_timeseries_ensemble(2) cp = TimeSeriesEnsemble(tse) dummy_func_2(tse, object_history=True, alg_id="0") assert tse.member[0].data[0] == -1 assert tse.member[0].data[0] != cp.member[0].data[0] assert tse.member[0].number_of_stages() == 1 # test seismogram_ensemble_as_stream seis_e = get_live_seismogram_ensemble(2) cp = SeismogramEnsemble(seis_e) dummy_func_2(seis_e, object_history=True, alg_id="0") assert seis_e.member[0].data[0, 0] == -1 assert seis_e.member[0].data[0, 0] != cp.member[0].data[0, 0] assert seis_e.member[0].number_of_stages() == 1 # test inplace return seis1 = get_live_seismogram() # upgrade of decorator -> should explicitly pass the positional arguments ret = dummy_func_2(seis1, object_history=True, alg_id="0") assert seis1 == ret
def test_ator_rtoa(): ts = get_live_timeseries() original_t0 = ts.t0 ts_new = ator(ts, 1) assert ts_new.time_is_relative() assert ts_new.t0 == original_t0 - 1 ts_new2 = rtoa(ts_new) assert ts_new2.time_is_UTC() assert ts_new2.t0 == original_t0
def test_reduce_dask_spark(spark_context): l = [get_live_timeseries() for i in range(5)] res = np.zeros(255) for i in range(5): for j in range(255): res[j] = res[j] + l[i].data[j] spark_res = spark_reduce(l, spark_context) dask_res = dask_reduce(l) assert np.isclose(res, dask_res.data).all() assert np.isclose(res, spark_res.data).all() assert len(res) == len(spark_res.data)
def test_interpolate(): ts = get_live_timeseries() seis = get_live_seismogram() tse = get_live_timeseries_ensemble(3) seis_e = get_live_seismogram_ensemble(3) interpolate(ts, 255, object_history=True, alg_id="0") interpolate(seis, 255, object_history=True, alg_id="0") interpolate(tse, 255, object_history=True, alg_id="0") interpolate(seis_e, 255, object_history=True, alg_id="0") interpolate(ts, 255, method="lanczos", a=20, object_history=True, alg_id="0") ts = get_live_timeseries() interpolate(ts, 25, method="slinear", object_history=True, alg_id="0") ts = get_live_timeseries() interpolate(ts, 255, method="linear", object_history=True, alg_id="0") ts = get_live_timeseries() interpolate(ts, 255, method="nearest", object_history=True, alg_id="0") ts = get_live_timeseries() interpolate(ts, 255, method="zero", object_history=True, alg_id="0") # functionality verification testing ts = get_sin_timeseries() tr = obspy.Trace() tr.data = np.array(ts.data) copy = np.array(ts.data) tr.stats.sampling_rate = 20 tr.interpolate(40, method="linear", npts=500) interpolate(ts, 40, method="linear", npts=500, object_history=True, alg_id="0") assert len(ts.data) == len(tr.data) assert all(abs(a - b) < 0.001 for a, b in zip(ts.data, tr.data)) assert not all(abs(a - b) < 0.001 for a, b in zip(ts.data, copy)) assert ts.dt == 1 / 40
def test_interpolate(): ts = get_live_timeseries() seis = get_live_seismogram() tse = get_live_timeseries_ensemble(3) seis_e = get_live_seismogram_ensemble(3) interpolate(ts, 255, preserve_history=True, instance='0') interpolate(seis, 255, preserve_history=True, instance='0') interpolate(tse, 255, preserve_history=True, instance='0') interpolate(seis_e, 255, preserve_history=True, instance='0') interpolate(ts, 255, method='lanczos', a=20, preserve_history=True, instance='0') ts = get_live_timeseries() interpolate(ts, 25, method='slinear', preserve_history=True, instance='0') ts = get_live_timeseries() interpolate(ts, 255, method='linear', preserve_history=True, instance='0') ts = get_live_timeseries() interpolate(ts, 255, method='nearest', preserve_history=True, instance='0') ts = get_live_timeseries() interpolate(ts, 255, method='zero', preserve_history=True, instance='0') # functionality verification testing ts = get_sin_timeseries() tr = obspy.Trace() tr.data = np.array(ts.data) copy = np.array(ts.data) tr.stats.sampling_rate = 20 tr.interpolate(40, method="linear", npts=500) interpolate(ts, 40, method='linear', npts=500, preserve_history=True, instance='0') assert len(ts.data) == len(tr.data) assert all(abs(a - b) < 0.001 for a, b in zip(ts.data, tr.data)) assert not all(abs(a - b) < 0.001 for a, b in zip(ts.data, copy)) assert ts.dt == 1 / 40
def test_reduce_error(): tse = get_live_timeseries_ensemble(3) tse2 = get_live_timeseries_ensemble(2) with pytest.raises(IndexError) as err: logging_helper.reduce(tse, tse2, 'dummy_func', '0') assert str(err.value) == "logging_helper.reduce: data1 and data2 have different sizes of member" tse3 = get_live_timeseries_ensemble(3) ts = get_live_timeseries() with pytest.raises(TypeError) as ex: logging_helper.reduce(ts, tse3, 'dummy_func', '0') assert str(ex.value) == "logging_helper.reduce: data2 has a different type as data1"
def test_reduce_functionality(): # Seismogram and TimeSeries seis = get_live_seismogram() assert seis.number_of_stages() == 0 logging_helper.info(seis, 'dummy_func', '1') logging_helper.info(seis, 'dummy_func_2', '2') assert seis.number_of_stages() == 2 seis2 = get_live_seismogram() assert seis2.number_of_stages() == 0 logging_helper.reduce(seis2, seis, 'reduce', '3') assert len(seis2.get_nodes()) == 3 ts = get_live_timeseries() ts2 = get_live_timeseries() assert ts.number_of_stages() == 0 logging_helper.info(ts, 'dummy_func', '1') logging_helper.info(ts, 'dummy_func', '2') assert ts.number_of_stages() == 2 logging_helper.reduce(ts2, ts, 'reduce', '3') assert len(ts2.get_nodes()) == 3 # ensemble seis_e = get_live_seismogram_ensemble(3) seis_e2 = get_live_seismogram_ensemble(3) logging_helper.info(seis_e, 'dummy_func', '0') logging_helper.info(seis_e, 'dummy_func', '1') logging_helper.info(seis_e, 'dummy_func', '2') logging_helper.reduce(seis_e2, seis_e, "reduce", "3") for i in range(3): assert len(seis_e2.member[i].get_nodes()) == 4 tse = get_live_timeseries_ensemble(3) tse2 = get_live_timeseries_ensemble(3) logging_helper.info(tse, 'dummy_func', '0') logging_helper.info(tse, 'dummy_func', '1') logging_helper.info(tse, 'dummy_func', '2') logging_helper.reduce(tse2, tse, "reduce", "3") for i in range(3): assert len(tse2.member[i].get_nodes()) == 4
def test_map_spark_and_dask(): l = [get_live_timeseries() for i in range(5)] spark_res = spark_map(l) dask_res = dask_map(l) ts_cp = TimeSeries(l[0]) res = signals.filter(ts_cp, "bandpass", freqmin=1, freqmax=5, preserve_history=True, instance='0') assert np.isclose(spark_res[0].data, ts_cp.data).all() assert np.isclose(dask_res[0].data, ts_cp.data).all()
def test_mspass_reduce_func_wrapper(): ts1 = get_live_timeseries() ts1.data[0] = 1 ts2 = get_live_timeseries() logging_helper.info(ts2, "dummy_func", "1") logging_helper.info(ts2, "dummy_func_2", "2") assert len(ts1.get_nodes()) == 0 dummy_reduce_func(ts1, ts2, object_history=True, alg_id="3") assert ts1.data[0] == -1 assert len(ts1.get_nodes()) == 3 with pytest.raises(TypeError) as err: dummy_reduce_func([0], [1], object_history=True, alg_id="3") assert (str(err.value) == "only mspass objects are supported in reduce wrapped methods") with pytest.raises(TypeError) as err: dummy_reduce_func(ts1, get_live_seismogram(), object_history=True, alg_id="3") assert str(err.value) == "data2 has a different type as data1" with pytest.raises(ValueError) as err: seis1 = get_live_seismogram() seis2 = get_live_seismogram() dummy_reduce_func(seis1, seis2, object_history=True) assert (str( err.value ) == "dummy_reduce_func: object_history was true but alg_id not defined") assert "OK" == dummy_reduce_func(seis1, seis2, dryrun=True) ts1 = get_live_timeseries() ts2 = get_live_timeseries() assert len(ts1.elog.get_error_log()) == 0 dummy_reduce_func_runtime(ts1, ts2, object_history=True, alg_id="3") assert len(ts1.elog.get_error_log()) == 1 assert len(ts2.elog.get_error_log()) == 1 ts1 = get_live_timeseries() ts2 = get_live_timeseries() assert len(ts1.elog.get_error_log()) == 0 with pytest.raises(MsPASSError) as err: dummy_reduce_func_mspasserror(ts1, ts2, object_history=True, alg_id="3") assert str(err.value) == "test"
def test_map_spark_and_dask(spark_context): l = [get_live_timeseries() for i in range(5)] # add net, sta, chan, loc to avoid metadata serialization problem for i in range(5): l[i]["chan"] = "HHZ" l[i]["loc"] = "test_loc" l[i]["net"] = "test_net" l[i]["sta"] = "test_sta" spark_res = spark_map(l, spark_context) dask_res = dask_map(l) ts_cp = TimeSeries(l[0]) res = signals.filter(ts_cp, "bandpass", freqmin=1, freqmax=5, object_history=True, alg_id="0") assert np.isclose(spark_res[0].data, ts_cp.data).all() assert np.isclose(dask_res[0].data, ts_cp.data).all()
def test_vectorTaper(): vTaperData = [] for i in range(200): vTaperData.append(random.random()) vtaper = VectorTaper(vTaperData) v_data = pickle.dumps(vtaper) v_copy = pickle.loads(v_data) ts = get_live_timeseries() ts.t0 = 0 ts.dt = 1 ts.npts = 200 ts.data += 1 ts_copy = copy.deepcopy(ts) vtaper.apply(ts) v_copy.apply(ts_copy) assert len(ts.data) == len(ts_copy.data) for i in range(len(ts.data)): assert math.isclose(ts.data[i], ts_copy.data[i])
def test_taper_wrapper(): ts = get_live_timeseries() ts.t0 = 0 ts.dt = 1 ts.npts = 200 ts.data += 1 ts_l = linear_taper(ts, 4, 14, 170, 180) assert ts_l.data[4] == 0 assert ts_l.data[9] == 0.5 assert ts_l.data[14] == 1 assert ts_l.data[170] == 1 assert ts_l.data[175] == 0.5 assert ts_l.data[180] == 0 ts.npts = 200 ts.data += 1 ts_c = cosine_taper(ts, 4, 14, 170, 180) assert ts_c.data[4] == 0 assert ts_c.data[9] == 0.5 assert ts_c.data[14] == 1 assert ts_c.data[170] == 1 assert ts_c.data[175] == 0.5 assert ts_c.data[180] == 0 ts.npts = 200 ts.data += 1 vtaper = np.zeros(200) vtaper += 0.5 ts_v = vector_taper(ts, vtaper) assert ts_v.data[4] == 0.5 assert ts_v.data[9] == 0.5 assert ts_v.data[14] == 0.5 assert ts_v.data[170] == 0.5 assert ts_v.data[175] == 0.5 assert ts_v.data[180] == 0.5
def test_is_input_dead(): seis = get_live_seismogram() assert False == is_input_dead(seis) assert False == is_input_dead(any=seis) seis.kill() assert True == is_input_dead(seis) assert True == is_input_dead(any=seis) ts = get_live_timeseries() assert False == is_input_dead(ts) assert False == is_input_dead(any=ts) ts.kill() assert True == is_input_dead(ts) assert True == is_input_dead(any=ts) seis_e = get_live_seismogram_ensemble(3) assert False == is_input_dead(seis_e) assert False == is_input_dead(any=seis_e) seis_e.member[0].kill() assert False == is_input_dead(seis_e) assert False == is_input_dead(any=seis_e) seis_e.member[1].kill() seis_e.member[2].kill() assert True == is_input_dead(seis_e) assert True == is_input_dead(any=seis_e) tse = get_live_timeseries_ensemble(3) assert False == is_input_dead(tse) assert False == is_input_dead(any=tse) tse.member[0].kill() assert False == is_input_dead(tse) assert False == is_input_dead(any=tse) tse.member[1].kill() tse.member[2].kill() assert True == is_input_dead(tse) assert True == is_input_dead(any=tse)
def test_object_history(self, spark_context): manager_db = Database(self.client, "test_manager") manager_db["history_global"].delete_many({}) manager_db["history_object"].delete_many({}) l = [get_live_timeseries() for i in range(2)] # add net, sta, chan, loc to avoid metadata serialization problem for i in range(2): l[i]["chan"] = "HHZ" l[i]["loc"] = "test_loc" l[i]["net"] = "test_net" l[i]["sta"] = "test_sta" spark_res = spark_map(l, self.manager, spark_context) assert manager_db["history_global"].count_documents( {"alg_name": "filter"}) == 1 res = manager_db["history_global"].find_one({"alg_name": "filter"}) alg_id = res["alg_id"] # check status of the mspass objects for ts in spark_res: assert ts.number_of_stages() == 1 assert ts.current_nodedata().algorithm == "filter" assert ts.current_nodedata().algid == str(alg_id) assert ts.is_volatile() save_res = manager_db.save_data(spark_res[0], alg_name="filter", alg_id=str(alg_id)) # hardcode net, sta, net, loc to avoid serialization problem here, they are readonly metadata keys -> non fatal keys = 4 assert save_res.live assert manager_db["history_object"].count_documents( {"alg_name": "filter"}) == 1 doc = manager_db["history_object"].find_one({"alg_name": "filter"}) assert doc assert doc["_id"] == spark_res[0].current_nodedata().uuid assert doc["wf_TimeSeries_id"] == spark_res[0]["_id"] assert doc["alg_id"] == str(alg_id) assert doc["alg_name"] == "filter"
def test_mspass_reduce(self, spark_context): manager_db = Database(self.client, "test_manager") manager_db["history_global"].delete_many({}) l = [get_live_timeseries() for i in range(5)] # test mspass_reduce for spark spark_res = spark_reduce(l, self.manager, spark_context) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 1) assert manager_db["history_global"].count_documents( {"alg_name": "stack"}) == 1 res = manager_db["history_global"].find_one({"alg_name": "stack"}) assert res["job_id"] == self.manager.job_id assert res["job_name"] == self.manager.job_name assert res["alg_name"] == "stack" assert res["parameters"] == '{"object_history": "True", "alg_id": "2"}' spark_alg_id = res["alg_id"] # test mspass_reduce for dask dask_res = dask_reduce(l, self.manager) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 2) assert manager_db["history_global"].count_documents( {"alg_name": "stack"}) == 2 assert (manager_db["history_global"].count_documents( {"alg_id": spark_alg_id}) == 1) docs = manager_db["history_global"].find({"alg_name": "stack"}) for doc in docs: if doc["alg_id"] == spark_alg_id: continue res = doc assert res["job_id"] == self.manager.job_id assert res["job_name"] == self.manager.job_name assert res["alg_name"] == "stack" assert res["parameters"] == '{"object_history": "True", "alg_id": "3"}' # different alg -> different alg_id assert not res["alg_id"] == spark_alg_id dask_alg_id = res["alg_id"] # same alg + parameters combination -> same alg_id dask_res = dask_reduce(l, self.manager) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 3) assert manager_db["history_global"].count_documents( {"alg_name": "stack"}) == 3 assert (manager_db["history_global"].count_documents( {"alg_id": dask_alg_id}) == 2) docs = manager_db["history_global"].find({"alg_id": dask_alg_id}) doc1 = docs[0] doc2 = docs[1] assert not doc1["time"] == doc2["time"] assert doc1["job_id"] == doc2["job_id"] assert doc1["job_name"] == doc2["job_name"] assert doc1["alg_name"] == doc2["alg_name"] assert doc1["parameters"] == doc2["parameters"] # SPARK test user provided alg_name and parameter(exist) spark_alg_name = "stack" spark_alg_parameters = "object_history=True,alg_id=2" spark_res = spark_reduce( l, self.manager, spark_context, alg_name=spark_alg_name, parameters=spark_alg_parameters, ) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 4) assert manager_db["history_global"].count_documents( {"alg_name": "stack"}) == 4 assert (manager_db["history_global"].count_documents({ "alg_name": "stack", "parameters": '{"object_history": "True", "alg_id": "3"}', }) == 2) # SPARK test user provided alg_name and parameter(new) spark_alg_name = "new_stack" spark_alg_parameters = "object_history=True,alg_id=2" spark_res = spark_reduce( l, self.manager, spark_context, alg_name=spark_alg_name, parameters=spark_alg_parameters, ) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 5) assert (manager_db["history_global"].count_documents( {"alg_name": "new_stack"}) == 1) res = manager_db["history_global"].find_one({"alg_name": "new_stack"}) assert res["job_id"] == self.manager.job_id assert res["job_name"] == self.manager.job_name assert res["alg_name"] == "new_stack" assert res["parameters"] == '{"object_history": "True", "alg_id": "2"}' # DASK test user provided alg_name and parameter(exist) dask_alg_name = "stack" dask_alg_parameters = "object_history=True,alg_id=3" dask_res = dask_map(l, self.manager, alg_name=dask_alg_name, parameters=dask_alg_parameters) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 6) assert manager_db["history_global"].count_documents( {"alg_name": "stack"}) == 5 assert (manager_db["history_global"].count_documents({ "alg_name": "stack", "parameters": '{"object_history": "True", "alg_id": "3"}', }) == 3) # DASK test user provided alg_name and parameter(new) dask_alg_name = "new_stack" dask_alg_parameters = "object_history=True,alg_id=3" dask_res = dask_map(l, self.manager, alg_name=dask_alg_name, parameters=dask_alg_parameters) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 7) assert (manager_db["history_global"].count_documents({ "alg_name": "new_stack", "parameters": '{"object_history": "True", "alg_id": "3"}', }) == 1) res = manager_db["history_global"].find_one({ "alg_name": "new_stack", "parameters": '{"object_history": "True", "alg_id": "3"}', }) assert res["job_id"] == self.manager.job_id assert res["job_name"] == self.manager.job_name assert res["alg_name"] == "new_stack" assert res["parameters"] == '{"object_history": "True", "alg_id": "3"}'
def test_xcorr_max(): ts1 = get_live_timeseries() tr1 = ts1.toTrace() res1 = xcorr_max(ts1) res2 = obspy.signal.cross_correlation.xcorr_max(tr1) assert res1 == res2
def test_mspass_map(self, spark_context): l = [get_live_timeseries() for i in range(5)] # add net, sta, chan, loc to avoid metadata serialization problem for i in range(5): l[i]["chan"] = "HHZ" l[i]["loc"] = "test_loc" l[i]["net"] = "test_net" l[i]["sta"] = "test_sta" l[i].set_as_origin("test", "0", str(i), AtomicType.TIMESERIES) # test mspass_map for spark spark_res = spark_map(l, self.manager, spark_context) manager_db = Database(self.client, "test_manager") assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 1) res = manager_db["history_global"].find_one( {"job_name": self.manager.job_name}) assert res["job_id"] == self.manager.job_id assert res["job_name"] == self.manager.job_name assert res["alg_name"] == "filter" assert ( res["parameters"] == '{"arg_0": "bandpass", "freqmin": "1", "freqmax": "5", "object_history": "True"}' ) spark_alg_id = res["alg_id"] # test mspass_map for dask dask_res = dask_map(l, self.manager) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 2) assert (manager_db["history_global"].count_documents( {"alg_id": spark_alg_id}) == 2) docs = manager_db["history_global"].find({"alg_id": spark_alg_id}) assert docs[0]["job_id"] == docs[1]["job_id"] == self.manager.job_id assert docs[0]["job_name"] == docs[1][ "job_name"] == self.manager.job_name assert docs[0]["alg_name"] == docs[1]["alg_name"] == "filter" assert ( docs[0]["parameters"] == docs[1]["parameters"] == '{"arg_0": "bandpass", "freqmin": "1", "freqmax": "5", "object_history": "True"}' ) assert not docs[0]["time"] == docs[1]["time"] # same alg + parameters combination -> same alg_id dask_res = dask_map(l, self.manager) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 3) assert (manager_db["history_global"].count_documents( {"alg_id": spark_alg_id}) == 3) # SPARK test user provided alg_name and parameter(exist) spark_alg_name = "filter" spark_alg_parameters = "bandpass,freqmin=1,freqmax=5,object_history=True" spark_res = spark_map( l, self.manager, spark_context, alg_name=spark_alg_name, parameters=spark_alg_parameters, ) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 4) assert (manager_db["history_global"].count_documents( {"alg_id": spark_alg_id}) == 4) # SPARK test user provided alg_name and parameter(new) spark_alg_name = "new_filter" spark_alg_parameters = "bandpass,freqmin=1,freqmax=5,object_history=True" spark_res = spark_map( l, self.manager, spark_context, alg_name=spark_alg_name, parameters=spark_alg_parameters, ) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 5) assert (manager_db["history_global"].count_documents( {"alg_name": "new_filter"}) == 1) res = manager_db["history_global"].find_one({"alg_name": "new_filter"}) assert res["job_id"] == self.manager.job_id assert res["job_name"] == self.manager.job_name assert res["alg_name"] == "new_filter" assert ( res["parameters"] == '{"arg_0": "bandpass", "freqmin": "1", "freqmax": "5", "object_history": "True"}' ) new_spark_alg_id = res["alg_id"] assert (manager_db["history_global"].count_documents( {"alg_id": new_spark_alg_id}) == 1) # DASK test user provided alg_name and parameter(exist) dask_alg_name = "filter" dask_alg_parameters = "bandpass,freqmin=1,freqmax=5,object_history=True" dask_res = dask_map(l, self.manager, alg_name=dask_alg_name, parameters=dask_alg_parameters) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 6) assert (manager_db["history_global"].count_documents( {"alg_id": spark_alg_id}) == 5) # DASK test user provided alg_name and parameter(new) dask_alg_name = "new_filter_2" dask_alg_parameters = "bandpass,freqmin=1,freqmax=5,object_history=True" dask_res = dask_map(l, self.manager, alg_name=dask_alg_name, parameters=dask_alg_parameters) assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 7) assert (manager_db["history_global"].count_documents( {"alg_name": "new_filter_2"}) == 1) res = manager_db["history_global"].find_one( {"alg_name": "new_filter_2"}) assert res["job_id"] == self.manager.job_id assert res["job_name"] == self.manager.job_name assert res["alg_name"] == "new_filter_2" assert ( res["parameters"] == '{"arg_0": "bandpass", "freqmin": "1", "freqmax": "5", "object_history": "True"}' ) new_dask_alg_id = res["alg_id"] assert (manager_db["history_global"].count_documents( {"alg_id": new_dask_alg_id}) == 1) manager_db["history_object"].delete_many({}) # test spark mspass_map for save_data data = spark_context.parallelize(l) data_map = data.mspass_map(manager_db.save_data, global_history=self.manager) save_list = data_map.collect() assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 8) assert (manager_db["history_global"].count_documents( {"alg_name": "save_data"}) == 1) # check object history after save_data manager_db["history_object"].count_documents({}) == 5 manager_db["wf_TimeSeries"].count_documents({}) == 5 history_object_docs = manager_db["history_object"].find({}) idx = 0 doc_alg_id = None doc_ids = [] for doc in history_object_docs: if not doc_alg_id: doc_alg_id = doc["alg_id"] else: assert doc_alg_id == doc["alg_id"] doc_ids.append(doc["_id"]) assert doc["alg_name"] == "save_data" idx += 1 assert sorted(doc_ids) == ["0", "1", "2", "3", "4"] # test spark mspass_map for read_data save_l = [res[1] for res in save_list] data = spark_context.parallelize(save_l) data_map = data.mspass_map(manager_db.read_data, global_history=self.manager) read_list = data_map.collect() assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 9) assert (manager_db["history_global"].count_documents( {"alg_name": "read_data"}) == 1) manager_db["history_object"].delete_many({}) manager_db["wf_TimeSeries"].delete_many({}) # test dask mspass_map for save_data data = daskbag.from_sequence(l) data_map = data.mspass_map(manager_db.save_data, global_history=self.manager) save_list = data_map.compute() assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 10) assert (manager_db["history_global"].count_documents( {"alg_name": "save_data"}) == 2) res = manager_db["history_global"].find({"alg_name": "save_data"}) assert res[0]["job_id"] == res[1]["job_id"] == self.manager.job_id assert res[0]["job_name"] == res[1]["job_name"] == self.manager.job_name assert res[0]["alg_name"] == res[1]["alg_name"] == "save_data" assert (res[0]["parameters"] == res[1]["parameters"] == '{"object_history": "False"}') assert res[0]["alg_id"] == res[1]["alg_id"] # check object history after save_data manager_db["history_object"].count_documents({}) == 5 manager_db["wf_TimeSeries"].count_documents({}) == 5 history_object_docs = manager_db["history_object"].find({}) idx = 0 doc_alg_id = None doc_ids = [] for doc in history_object_docs: if not doc_alg_id: doc_alg_id = doc["alg_id"] else: assert doc_alg_id == doc["alg_id"] doc_ids.append(doc["_id"]) assert doc["alg_name"] == "save_data" idx += 1 assert sorted(doc_ids) == ["0", "1", "2", "3", "4"] # test dask mspass_map for read_data save_l = [res[1] for res in save_list] data = daskbag.from_sequence(save_l) data_map = data.mspass_map(manager_db.read_data, global_history=self.manager) read_list = data_map.compute() assert (manager_db["history_global"].count_documents( {"job_name": self.manager.job_name}) == 11) assert (manager_db["history_global"].count_documents( {"alg_name": "read_data"}) == 2) res = manager_db["history_global"].find({"alg_name": "read_data"}) assert res[0]["job_id"] == res[1]["job_id"] == self.manager.job_id assert res[0]["job_name"] == res[1]["job_name"] == self.manager.job_name assert res[0]["alg_name"] == res[1]["alg_name"] == "read_data" assert (res[0]["parameters"] == res[1]["parameters"] == '{"object_history": "False"}') assert res[0]["alg_id"] == res[1]["alg_id"]