def test_process_streams(): # Loma Prieta test station (nc216859) data_files, origin = read_data_dir("geonet", "us1000778i", "*.V1A") streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) sc.describe() config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) test = process_streams(sc, origin, config=config) logging.info(f"Testing trace: {test[0][1]}") assert len(test) == 3 assert len(test[0]) == 3 assert len(test[1]) == 3 assert len(test[2]) == 3 # Apparently the traces end up in a different order on the Travis linux # container than on my local mac. So testing individual traces need to # not care about trace order. trace_maxes = np.sort( [np.max(np.abs(t.data)) for t in test.select(station="HSES")[0]]) np.testing.assert_allclose(trace_maxes, np.array([157.812449, 240.379521, 263.601519]), rtol=1e-5)
def generate_workspace(): """Generate simple HDF5 with ASDF layout for testing.""" PCOMMANDS = [ "assemble", "process", ] EVENTID = "us1000778i" LABEL = "ptest" datafiles, event = read_data_dir("geonet", EVENTID, "*.V1A") tdir = tempfile.mkdtemp() tfilename = os.path.join(tdir, "workspace.h5") raw_data = [] for dfile in datafiles: raw_data += read_data(dfile) write_asdf(tfilename, raw_data, event, label="unprocessed") del raw_data config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) workspace = StreamWorkspace.open(tfilename) raw_streams = workspace.getStreams(EVENTID, labels=["unprocessed"], config=config) pstreams = process_streams(raw_streams, event, config=config) workspace.addStreams(event, pstreams, label=LABEL) workspace.calcMetrics(event.id, labels=[LABEL], config=config) return tfilename
def test_metrics(): eventid = "usb000syza" datafiles, event = read_data_dir("knet", eventid, "*") datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) # turn off sta/lta check and snr checks # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) # processed_streams = process_streams(raw_streams, event, config=newconfig) newconfig = config.copy() newconfig["processing"].append( {"NNet_QA": { "acceptance_threshold": 0.5, "model_name": "CantWell" }}) processed_streams = process_streams(raw_streams.copy(), event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, "test.hdf") workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, raw_streams, label="raw") workspace.addStreams(event, processed_streams, label="processed") stream1 = raw_streams[0] # Get metrics from station summary for raw streams summary1 = StationSummary.from_config(stream1) s1_df_in = summary1.pgms.sort_values(["IMT", "IMC"]) array1 = s1_df_in["Result"].to_numpy() # Compare to metrics from getStreamMetrics for raw streams workspace.calcMetrics(eventid, labels=["raw"]) summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.network, stream1[0].stats.station, "raw") s1_df_out = summary1_a.pgms.sort_values(["IMT", "IMC"]) array2 = s1_df_out["Result"].to_numpy() np.testing.assert_allclose(array1, array2, atol=1e-6, rtol=1e-6) workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_check_instrument(): data_files, origin = read_data_dir("fdsn", "nc51194936", "*.mseed") streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) sc.describe() config = update_config(os.path.join(datadir, "config_test_check_instr.yml")) test = process_streams(sc, origin, config=config) for sta, expected in [("CVS", True), ("GASB", True), ("SBT", False)]: st = test.select(station=sta)[0] logging.info(f"Testing stream: {st}") assert st.passed == expected
def _test_metrics2(): eventid = "usb000syza" datafiles, event = read_data_dir("knet", eventid, "*") datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) config["metrics"]["output_imts"].append("Arias") config["metrics"]["output_imcs"].append("arithmetic_mean") # Adjust checks so that streams pass checks for this test newconfig = drop_processing(config, ["check_sta_lta"]) csnr = [s for s in newconfig["processing"] if "compute_snr" in s.keys()][0] csnr["compute_snr"]["check"]["threshold"] = -10.0 processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, "test.hdf") workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label="processed") workspace.calcMetrics(event.id, labels=["processed"]) etable, imc_tables1, readmes1 = workspace.getTables("processed") assert "ARITHMETIC_MEAN" not in imc_tables1 assert "ARITHMETIC_MEAN" not in readmes1 del workspace.dataset.auxiliary_data.WaveFormMetrics del workspace.dataset.auxiliary_data.StationMetrics workspace.calcMetrics(event.id, labels=["processed"], config=config) etable2, imc_tables2, readmes2 = workspace.getTables("processed") assert "ARITHMETIC_MEAN" in imc_tables2 assert "ARITHMETIC_MEAN" in readmes2 assert "ARIAS" in imc_tables2["ARITHMETIC_MEAN"] testarray = readmes2["ARITHMETIC_MEAN"]["Column header"].to_numpy() assert "ARIAS" in testarray workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def _test_workspace(): eventid = "us1000778i" datafiles, event = read_data_dir("geonet", eventid, "*.V1A") tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLError) warnings.filterwarnings("ignore", category=FutureWarning) config = update_config( os.path.join(datadir, "config_min_freq_0p2.yml")) tfile = os.path.join(tdir, "test.hdf") raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label="raw") t2 = time.time() print("Adding %i streams took %.2f seconds" % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == "Events: 1 Stations: 3 Streams: 3" eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ["HSES", "THZ", "WTMC"] stations = workspace.getStations() assert sorted(stations) == ["HSES", "THZ", "WTMC"] # test retrieving event that doesn't exist with pytest.raises(KeyError): workspace.getEvent("foo") instream = None for stream in raw_streams: if stream[0].stats.station.lower() == "hses": instream = stream break if instream is None: raise ValueError("Instream should not be none.") outstream = workspace.getStreams(eventid, stations=["HSES"], labels=["raw"])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]["Label"] == "raw" assert label_summary.iloc[0]["Software"] == "gmprocess" sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, event, config=config) workspace.addStreams(event, processed_streams, "processed") idlist = workspace.getEventIds() assert idlist[0] == eventid outstream = workspace.getStreams(eventid, stations=["HSES"], labels=["processed"])[0] provenance = workspace.getProvenance(eventid, labels=["processed"]) first_row = pd.Series({ "Record": "NZ.HSES.--.HN1_us1000778i_processed", "Processing Step": "Remove Response", "Step Attribute": "input_units", "Attribute Value": "counts", }) last_row = pd.Series({ "Record": "NZ.WTMC.--.HNZ_us1000778i_processed", "Processing Step": "Lowpass Filter", "Step Attribute": "number_of_passes", "Attribute Value": 2, }) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == "hses": instream = stream break if instream is None: raise ValueError("Instream should not be none.") compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = "nz2018p115908" datafiles, event = read_data_dir("geonet", eventid, "*.V2A") raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label="foo") stations = workspace.getStations() eventids = workspace.getEventIds() assert eventids == ["us1000778i", "nz2018p115908"] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=["foo"])[0] assert instation == this_stream[0].stats.station usid = "us1000778i" inventory = workspace.getInventory(usid) workspace.close() codes = [ station.code for station in inventory.networks[0].stations ] assert sorted(set(codes)) == ["HSES", "THZ", "WPWS", "WTMC"] except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def _test_vs30_dist_metrics(): KNOWN_DISTANCES = { "epicentral": 5.1, "hypocentral": 10.2, "rupture": 2.21, "rupture_var": np.nan, "joyner_boore": 2.21, "joyner_boore_var": np.nan, "gc2_rx": 2.66, "gc2_ry": 3.49, "gc2_ry0": 0.00, "gc2_U": 34.34, "gc2_T": 2.66, } KNOWN_BAZ = 239.46 KNOWN_VS30 = 331.47 eventid = "ci38457511" datafiles, event = read_data_dir("fdsn", eventid, "*") datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) processed_streams = process_streams(raw_streams, event, config=config) rupture_file = get_rupture_file(datadir) grid_file = os.path.join(datadir, "test_grid.grd") config["metrics"]["vs30"] = { "vs30": { "file": grid_file, "column_header": "GlobalVs30", "readme_entry": "GlobalVs30", "units": "m/s", } } tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, "test.hdf") ws = StreamWorkspace(tfile) ws.addEvent(event) ws.addStreams(event, raw_streams, label="raw") ws.addStreams(event, processed_streams, label="processed") ws.calcMetrics(event.id, rupture_file=rupture_file, labels=["processed"], config=config) sta_sum = ws.getStreamMetrics(event.id, "CI", "CLC", "processed") for dist in sta_sum.distances: np.testing.assert_allclose(sta_sum.distances[dist], KNOWN_DISTANCES[dist], rtol=0.01) np.testing.assert_allclose(sta_sum._back_azimuth, KNOWN_BAZ, rtol=0.01) np.testing.assert_allclose(sta_sum._vs30["vs30"]["value"], KNOWN_VS30, rtol=0.01) event_df, imc_tables, readme_tables = ws.getTables("processed") ws.close() check_cols = set([ "EpicentralDistance", "HypocentralDistance", "RuptureDistance", "RuptureDistanceVar", "JoynerBooreDistance", "JoynerBooreDistanceVar", "GC2_rx", "GC2_ry", "GC2_ry0", "GC2_U", "GC2_T", "GlobalVs30", "BackAzimuth", ]) assert check_cols.issubset(set(readme_tables["Z"]["Column header"])) assert check_cols.issubset(set(imc_tables["Z"].columns)) except Exception as e: raise (e) finally: shutil.rmtree(tdir)