def generate_workspace(): """Generate simple HDF5 with ASDF layout for testing.""" PCOMMANDS = [ "assemble", "process", ] EVENTID = "us1000778i" LABEL = "ptest" datafiles, event = read_data_dir("geonet", EVENTID, "*.V1A") tdir = tempfile.mkdtemp() tfilename = os.path.join(tdir, "workspace.h5") raw_data = [] for dfile in datafiles: raw_data += read_data(dfile) write_asdf(tfilename, raw_data, event, label="unprocessed") del raw_data config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) workspace = StreamWorkspace.open(tfilename) raw_streams = workspace.getStreams(EVENTID, labels=["unprocessed"], config=config) pstreams = process_streams(raw_streams, event, config=config) workspace.addStreams(event, pstreams, label=LABEL) workspace.calcMetrics(event.id, labels=[LABEL], config=config) return tfilename
def test_zero_crossings(): datapath = os.path.join("data", "testdata", "zero_crossings") datadir = pkg_resources.resource_filename("gmprocess", datapath) sc = StreamCollection.from_directory(datadir) sc.describe() conf = get_config() update = { "processing": [ {"detrend": {"detrending_method": "demean"}}, {"check_zero_crossings": {"min_crossings": 1}}, ] } update_dict(conf, update) edict = { "id": "ak20419010", "time": UTCDateTime("2018-11-30T17:29:29"), "lat": 61.346, "lon": -149.955, "depth": 46.7, "magnitude": 7.1, } event = get_event_object(edict) test = process_streams(sc, event, conf) for st in test: for tr in st: assert tr.hasParameter("ZeroCrossingRate") np.testing.assert_allclose( test[0][0].getParameter("ZeroCrossingRate")["crossing_rate"], 0.008888888888888889, atol=1e-5, )
def test_process_streams(): # Loma Prieta test station (nc216859) data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A') streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) sc.describe() config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml')) test = process_streams(sc, origin, config=config) logging.info('Testing trace: %s' % test[0][1]) assert len(test) == 3 assert len(test[0]) == 3 assert len(test[1]) == 3 assert len(test[2]) == 3 # Apparently the traces end up in a different order on the Travis linux # container than on my local mac. So testing individual traces need to # not care about trace order. trace_maxes = np.sort( [np.max(np.abs(t.data)) for t in test.select(station='HSES')[0]]) np.testing.assert_allclose(trace_maxes, np.array( [157.81975508, 240.33718094, 263.67804256]), rtol=1e-5)
def test_allow_nans(): dpath = os.path.join("data", "testdata", "fdsn", "uu60363602") datadir = pkg_resources.resource_filename("gmprocess", dpath) sc = StreamCollection.from_directory(datadir) origin = read_event_json_files([os.path.join(datadir, "event.json")])[0] psc = process_streams(sc, origin) st = psc[0] ss = StationSummary.from_stream( st, components=["quadratic_mean"], imts=["FAS(4.0)"], bandwidth=300, allow_nans=True, ) assert np.isnan(ss.pgms.Result).all() ss = StationSummary.from_stream( st, components=["quadratic_mean"], imts=["FAS(4.0)"], bandwidth=189, allow_nans=False, ) assert ~np.isnan(ss.pgms.Result).all()
def _test_colocated(): eventid = "ci38445975" datafiles, event = read_data_dir("fdsn", eventid, "*") datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config_file = os.path.join(datadir, "test_config.yml") with open(config_file, "r", encoding="utf-8") as f: yaml = YAML() yaml.preserve_quotes = True config = yaml.load(f) processed_streams = process_streams(raw_streams, event, config=config) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, "test.hdf") ws = StreamWorkspace(tfile) ws.addEvent(event) ws.addStreams(event, raw_streams, label="raw") ws.addStreams(event, processed_streams, label="processed") ws.calcMetrics(eventid, labels=["processed"], config=config) stasum = ws.getStreamMetrics(eventid, "CI", "MIKB", "processed") np.testing.assert_allclose( stasum.get_pgm("duration", "geometric_mean"), 38.94480068) ws.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def _process_event(self, event): event_dir = os.path.join(self.gmrecords.data_path, event.id) workname = os.path.join(event_dir, WORKSPACE_NAME) if not os.path.isfile(workname): logging.info('No workspace file found for event %s. Please run ' 'subcommand \'assemble\' to generate workspace file.') logging.info('Continuing to next event.') return event.id workspace = StreamWorkspace.open(workname) rstreams = workspace.getStreams(event.id, labels=['unprocessed'], config=self.gmrecords.conf) if len(rstreams): logging.info('Processing \'%s\' streams for event %s...' % ('unprocessed', event.id)) pstreams = process_streams(rstreams, event, config=self.gmrecords.conf) workspace.addStreams(event, pstreams, label=self.process_tag) else: logging.info('No streams found. Nothing to do. Goodbye.') workspace.close() return event.id
def generate_workspace(): """Generate simple HDF5 with ASDF layout for testing. """ PCOMMANDS = [ 'assemble', 'process', ] EVENTID = 'us1000778i' LABEL = 'ptest' datafiles, event = read_data_dir('geonet', EVENTID, '*.V1A') tdir = tempfile.mkdtemp() tfilename = os.path.join(tdir, 'workspace.h5') raw_data = [] for dfile in datafiles: raw_data += read_data(dfile) write_asdf(tfilename, raw_data, event, label="unprocessed") del raw_data config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml')) workspace = StreamWorkspace.open(tfilename) raw_streams = workspace.getStreams(EVENTID, labels=['unprocessed']) pstreams = process_streams(raw_streams, event, config=config) workspace.addStreams(event, pstreams, label=LABEL) workspace.calcMetrics(event.id, labels=[LABEL], config=config) return tfilename
def test_process_streams(): # Loma Prieta test station (nc216859) data_files, origin = read_data_dir("geonet", "us1000778i", "*.V1A") streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) sc.describe() config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) test = process_streams(sc, origin, config=config) logging.info(f"Testing trace: {test[0][1]}") assert len(test) == 3 assert len(test[0]) == 3 assert len(test[1]) == 3 assert len(test[2]) == 3 # Apparently the traces end up in a different order on the Travis linux # container than on my local mac. So testing individual traces need to # not care about trace order. trace_maxes = np.sort( [np.max(np.abs(t.data)) for t in test.select(station="HSES")[0]]) np.testing.assert_allclose(trace_maxes, np.array([157.812449, 240.379521, 263.601519]), rtol=1e-5)
def _test_colocated(): eventid = 'ci38445975' datafiles, event = read_data_dir('fdsn', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config_file = os.path.join(datadir, 'test_config.yml') with open(config_file, 'r', encoding='utf-8') as f: config = yaml.load(f, Loader=yaml.FullLoader) processed_streams = process_streams(raw_streams, event, config=config) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') ws = StreamWorkspace(tfile) ws.addEvent(event) ws.addStreams(event, raw_streams, label='raw') ws.addStreams(event, processed_streams, label='processed') ws.calcMetrics(eventid, labels=['processed'], config=config) stasum = ws.getStreamMetrics(eventid, 'CI', 'MIKB', 'processed') np.testing.assert_allclose( stasum.get_pgm('duration', 'geometric_mean'), 38.94480068) ws.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test(): # Read in data with only one stationxml entry data_files, origin = read_data_dir("station_xml_epochs", "nc73631381", "*.mseed") test_root = os.path.normpath(os.path.join(data_files[0], os.pardir)) sc = StreamCollection.from_directory(test_root) psc = process_streams(sc, origin) # Read in data with all dates in stationxml data_files, origin = read_data_dir("station_xml_epochs", "nc73631381_ad", "*.mseed") test_root = os.path.normpath(os.path.join(data_files[0], os.pardir)) sc_ad = StreamCollection.from_directory(test_root) psc_ad = process_streams(sc_ad, origin) single_maxes = np.sort([np.max(tr.data) for tr in psc[0]]) alldates_maxes = np.sort([np.max(tr.data) for tr in psc_ad[0]]) assert_allclose(single_maxes, alldates_maxes)
def test_weird_sensitivity(): datafiles, origin = read_data_dir("fdsn", "us70008dx7", "SL.KOGS*.mseed") streams = [] for datafile in datafiles: streams += read_obspy(datafile) sc = StreamCollection(streams) psc = process_streams(sc, origin) channel = psc[0].select(component="E")[0] assert_almost_equal(channel.data.max(), 62900.197618074293)
def test_weird_sensitivity(): datafiles, origin = read_data_dir('fdsn', 'us70008dx7', 'SL.KOGS*.mseed') streams = [] for datafile in datafiles: streams += read_obspy(datafile) sc = StreamCollection(streams) psc = process_streams(sc, origin) channel = psc[0].select(component='E')[0] assert_almost_equal(channel.data.max(), 62900.191900393373)
def test_metrics(): eventid = "usb000syza" datafiles, event = read_data_dir("knet", eventid, "*") datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) # turn off sta/lta check and snr checks # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) # processed_streams = process_streams(raw_streams, event, config=newconfig) newconfig = config.copy() newconfig["processing"].append( {"NNet_QA": { "acceptance_threshold": 0.5, "model_name": "CantWell" }}) processed_streams = process_streams(raw_streams.copy(), event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, "test.hdf") workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, raw_streams, label="raw") workspace.addStreams(event, processed_streams, label="processed") stream1 = raw_streams[0] # Get metrics from station summary for raw streams summary1 = StationSummary.from_config(stream1) s1_df_in = summary1.pgms.sort_values(["IMT", "IMC"]) array1 = s1_df_in["Result"].to_numpy() # Compare to metrics from getStreamMetrics for raw streams workspace.calcMetrics(eventid, labels=["raw"]) summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.network, stream1[0].stats.station, "raw") s1_df_out = summary1_a.pgms.sort_values(["IMT", "IMC"]) array2 = s1_df_out["Result"].to_numpy() np.testing.assert_allclose(array1, array2, atol=1e-6, rtol=1e-6) workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_nnet(): conf = get_config() update = { "processing": [ { "detrend": { "detrending_method": "demean" } }, { "detrend": { "detrending_method": "linear" } }, { "compute_snr": { "bandwidth": 20.0, "check": { "max_freq": 5.0, "min_freq": 0.2, "threshold": 3.0 }, } }, { "NNet_QA": { "acceptance_threshold": 0.5, "model_name": "CantWell" } }, ] } update_dict(conf, update) data_files, origin = read_data_dir("geonet", "us1000778i", "*.V1A") streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) test = process_streams(sc, origin, conf) tstream = test.select(station="HSES")[0] nnet_dict = tstream.getStreamParam("nnet_qa") np.testing.assert_allclose(nnet_dict["score_HQ"], 0.99321798811740059, rtol=1e-3)
def test_metrics(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml')) # turn off sta/lta check and snr checks # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) # processed_streams = process_streams(raw_streams, event, config=newconfig) newconfig = config.copy() newconfig['processing'].append( {'NNet_QA': { 'acceptance_threshold': 0.5, 'model_name': 'CantWell' }}) processed_streams = process_streams(raw_streams.copy(), event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, raw_streams, label='raw') workspace.addStreams(event, processed_streams, label='processed') stream1 = raw_streams[0] # Get metrics from station summary for raw streams summary1 = StationSummary.from_config(stream1) s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC']) array1 = s1_df_in['Result'].to_numpy() # Compare to metrics from getStreamMetrics for raw streams workspace.calcMetrics(eventid, labels=['raw']) summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.network, stream1[0].stats.station, 'raw') s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC']) array2 = s1_df_out['Result'].to_numpy() np.testing.assert_allclose(array1, array2, atol=1e-6, rtol=1e-6) workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_check_instrument(): data_files, origin = read_data_dir("fdsn", "nc51194936", "*.mseed") streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) sc.describe() config = update_config(os.path.join(datadir, "config_test_check_instr.yml")) test = process_streams(sc, origin, config=config) for sta, expected in [("CVS", True), ("GASB", True), ("SBT", False)]: st = test.select(station=sta)[0] logging.info(f"Testing stream: {st}") assert st.passed == expected
def test_check_instrument(): data_files, origin = read_data_dir('fdsn', 'nc51194936', '*.mseed') streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) sc.describe() config = update_config(os.path.join(datadir, 'config_test_check_instr.yml')) test = process_streams(sc, origin, config=config) for sta, expected in [('CVS', True), ('GASB', True), ('SBT', False)]: st = test.select(station=sta)[0] logging.info('Testing stream: %s' % st) assert st.passed == expected
def test_nnet(): conf = get_config() update = { 'processing': [{ 'detrend': { 'detrending_method': 'demean' } }, { 'detrend': { 'detrending_method': 'linear' } }, { 'compute_snr': { 'bandwidth': 20.0, 'check': { 'max_freq': 5.0, 'min_freq': 0.2, 'threshold': 3.0 } } }, { 'NNet_QA': { 'acceptance_threshold': 0.5, 'model_name': 'CantWell' } }] } update_dict(conf, update) data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A') streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) test = process_streams(sc, origin, conf) tstream = test.select(station='HSES')[0] nnet_dict = tstream.getStreamParam('nnet_qa') np.testing.assert_allclose(nnet_dict['score_HQ'], 0.99321798811740059, rtol=1e-3)
def test_allow_nans(): dpath = os.path.join('data', 'testdata', 'fdsn', 'uu60363602') datadir = pkg_resources.resource_filename('gmprocess', dpath) sc = StreamCollection.from_directory(datadir) origin = read_event_json_files([os.path.join(datadir, 'event.json')])[0] psc = process_streams(sc, origin) st = psc[0] ss = StationSummary.from_stream(st, components=['quadratic_mean'], imts=['FAS(4.0)'], bandwidth=189, allow_nans=True) assert np.isnan(ss.pgms.Result).all() ss = StationSummary.from_stream(st, components=['quadratic_mean'], imts=['FAS(4.0)'], bandwidth=189, allow_nans=False) assert ~np.isnan(ss.pgms.Result).all()
def _test_metrics2(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml')) config['metrics']['output_imts'].append('Arias') config['metrics']['output_imcs'].append('arithmetic_mean') # Adjust checks so that streams pass checks for this test newconfig = drop_processing(config, ['check_sta_lta']) csnr = [s for s in newconfig['processing'] if 'compute_snr' in s.keys()][0] csnr['compute_snr']['check']['threshold'] = -10.0 processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label='processed') workspace.calcMetrics(event.id, labels=['processed']) etable, imc_tables1, readmes1 = workspace.getTables('processed') assert 'ARITHMETIC_MEAN' not in imc_tables1 assert 'ARITHMETIC_MEAN' not in readmes1 del workspace.dataset.auxiliary_data.WaveFormMetrics del workspace.dataset.auxiliary_data.StationMetrics workspace.calcMetrics(event.id, labels=['processed'], config=config) etable2, imc_tables2, readmes2 = workspace.getTables('processed') assert 'ARITHMETIC_MEAN' in imc_tables2 assert 'ARITHMETIC_MEAN' in readmes2 assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN'] testarray = readmes2['ARITHMETIC_MEAN']['Column header'].to_numpy() assert 'ARIAS' in testarray workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_zero_crossings(): datapath = os.path.join('data', 'testdata', 'zero_crossings') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc = StreamCollection.from_directory(datadir) sc.describe() conf = get_config() update = { 'processing': [{ 'detrend': { 'detrending_method': 'demean' } }, { 'check_zero_crossings': { 'min_crossings': 1 } }] } update_dict(conf, update) edict = { 'id': 'ak20419010', 'time': UTCDateTime('2018-11-30T17:29:29'), 'lat': 61.346, 'lon': -149.955, 'depth': 46.7, 'magnitude': 7.1 } event = get_event_object(edict) test = process_streams(sc, event, conf) for st in test: for tr in st: assert tr.hasParameter('ZeroCrossingRate') np.testing.assert_allclose( test[0][0].getParameter('ZeroCrossingRate')['crossing_rate'], 0.008888888888888889, atol=1e-5)
def _test_metrics2(): eventid = "usb000syza" datafiles, event = read_data_dir("knet", eventid, "*") datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) config["metrics"]["output_imts"].append("Arias") config["metrics"]["output_imcs"].append("arithmetic_mean") # Adjust checks so that streams pass checks for this test newconfig = drop_processing(config, ["check_sta_lta"]) csnr = [s for s in newconfig["processing"] if "compute_snr" in s.keys()][0] csnr["compute_snr"]["check"]["threshold"] = -10.0 processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, "test.hdf") workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label="processed") workspace.calcMetrics(event.id, labels=["processed"]) etable, imc_tables1, readmes1 = workspace.getTables("processed") assert "ARITHMETIC_MEAN" not in imc_tables1 assert "ARITHMETIC_MEAN" not in readmes1 del workspace.dataset.auxiliary_data.WaveFormMetrics del workspace.dataset.auxiliary_data.StationMetrics workspace.calcMetrics(event.id, labels=["processed"], config=config) etable2, imc_tables2, readmes2 = workspace.getTables("processed") assert "ARITHMETIC_MEAN" in imc_tables2 assert "ARITHMETIC_MEAN" in readmes2 assert "ARIAS" in imc_tables2["ARITHMETIC_MEAN"] testarray = readmes2["ARITHMETIC_MEAN"]["Column header"].to_numpy() assert "ARIAS" in testarray workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_free_field(): data_files, origin = read_data_dir("kiknet", "usp000hzq8") raw_streams = [] for dfile in data_files: raw_streams += read_data(dfile) sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, origin) # all of these streams should have failed for different reasons npassed = np.sum([pstream.passed for pstream in processed_streams]) assert npassed == 0 for pstream in processed_streams: is_free = pstream[0].free_field reason = "" for trace in pstream: if trace.hasParameter("failure"): reason = trace.getParameter("failure")["reason"] break if is_free: assert reason.startswith("Failed") else: assert reason == "Failed free field sensor check."
def test_lowpass_max(): datapath = os.path.join('data', 'testdata', 'lowpass_max') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc = StreamCollection.from_directory(datadir) sc.describe() conf = get_config() update = { 'processing': [ { 'detrend': { 'detrending_method': 'demean' } }, { 'remove_response': { 'f1': 0.001, 'f2': 0.005, 'f3': None, 'f4': None, 'output': 'ACC', 'water_level': 60 } }, # {'detrend': {'detrending_method': 'linear'}}, # {'detrend': {'detrending_method': 'demean'}}, { 'get_corner_frequencies': { 'constant': { 'highpass': 0.08, 'lowpass': 20.0 }, 'method': 'constant', 'snr': { 'same_horiz': True } } }, { 'lowpass_max_frequency': { 'fn_fac': 0.9 } } ] } update_dict(conf, update) update = { 'windows': { 'signal_end': { 'method': 'model', 'vmin': 1.0, 'floor': 120, 'model': 'AS16', 'epsilon': 2.0 }, 'window_checks': { 'do_check': False, 'min_noise_duration': 1.0, 'min_signal_duration': 1.0 } } } update_dict(conf, update) edict = { 'id': 'ci38038071', 'time': UTCDateTime('2018-08-30 02:35:36'), 'lat': 34.136, 'lon': -117.775, 'depth': 5.5, 'magnitude': 4.4 } event = get_event_object(edict) test = process_streams(sc, event, conf) for st in test: for tr in st: freq_dict = tr.getParameter('corner_frequencies') np.testing.assert_allclose(freq_dict['lowpass'], 18.0)
def process_event(event, outdir, pcommands, config, input_directory, process_tag, files_created, output_format, status, recompute_metrics, export_dir=None): # setup logging to write to the input logfile argthing = namedtuple('args', ['debug', 'quiet']) args = argthing(debug=True, quiet=False) setup_logger(args) logger = logging.getLogger() stream_handler = logger.handlers[0] logfile = os.path.join(outdir, '%s.log' % event.id) fhandler = logging.FileHandler(logfile) logger.removeHandler(stream_handler) logger.addHandler(fhandler) event_dir = os.path.join(outdir, event.id) if not os.path.exists(event_dir): os.makedirs(event_dir) workname = os.path.join(event_dir, WORKSPACE_NAME) workspace_exists = os.path.isfile(workname) workspace_has_processed = False workspace = None processing_done = False if workspace_exists: workspace = StreamWorkspace.open(workname) labels = workspace.getLabels() if len(labels): labels.remove('unprocessed') elif 'assemble' not in pcommands: print('No data in workspace. Please run assemble.') sys.exit(1) if len(labels) == 1: process_tag = labels[0] workspace_has_processed = True else: if 'process' not in pcommands: fmt = '\nThere are %i sets of processed data in %s.' tpl = (len(labels), workname) print(fmt % tpl) print(('This software can only handle one set of ' 'processed data. Exiting.\n')) sys.exit(1) download_done = False # Need to initialize rstreams/pstreams rstreams = [] pstreams = [] rupture_file = None if 'assemble' in pcommands: logging.info('Downloading/loading raw streams...') workspace, workspace_file, rstreams, rupture_file = download( event, event_dir, config, input_directory) download_done = True append_file(files_created, 'Workspace', workname) else: if not workspace_exists: print('\nYou opted not to download or process from input.') print('No previous HDF workspace file could be found.') print('Try re-running with the assemble command with or ') print('without the --directory option.\n') sys.exit(1) if 'process' in pcommands: logging.info('Getting raw streams from workspace...') with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) rstreams = workspace.getStreams(event.id, labels=['unprocessed']) download_done = True else: need_processed = set(['report', 'shakemap']) need_pstreams = len(need_processed.intersection(pcommands)) if workspace_has_processed: if need_pstreams: logging.info('Getting processed streams from workspace...') with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) pstreams = workspace.getStreams(event.id, labels=[process_tag]) download_done = True processing_done = True if ('process' in pcommands and download_done and not processing_done and len(rstreams)): logging.info('Processing raw streams for event %s...' % event.id) pstreams = process_streams(rstreams, event, config=config) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) workspace.addStreams(event, pstreams, label=process_tag) workspace.calcMetrics(event.id, labels=[process_tag], config=config, streams=pstreams, stream_label=process_tag, rupture_file=rupture_file) processing_done = True if 'export' in pcommands: if export_dir is not None: if not os.path.isdir(export_dir): os.makedirs(export_dir) outdir = export_dir labels = workspace.getLabels() if 'unprocessed' not in labels: fmt = ('Workspace file "%s" appears to have no unprocessed ' 'data. Skipping.') logging.info(fmt % workspace_file) else: labels.remove('unprocessed') if not labels: fmt = ('Workspace file "%s" appears to have no processed ' 'data. Skipping.') logging.info(fmt % workspace_file) else: logging.info('Creating tables for event %s...', event.id) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) if recompute_metrics: del workspace.dataset.auxiliary_data.WaveFormMetrics del workspace.dataset.auxiliary_data.StationMetrics workspace.calcMetrics(event.id, labels=labels, config=config, rupture_file=rupture_file) event_table, imc_tables, readmes = workspace.getTables( labels[0], streams=pstreams, stream_label=process_tag) ev_fit_spec, fit_readme = workspace.getFitSpectraTable( event.id, labels[0], pstreams) # Set the precisions for the imc tables, event table, and # fit_spectra table before writing imc_tables_formatted = {} for imc, imc_table in imc_tables.items(): imc_tables_formatted[imc] = set_precisions(imc_table) event_table_formatted = set_precisions(event_table) df_fit_spectra_formatted = set_precisions(ev_fit_spec) if not os.path.isdir(outdir): os.makedirs(outdir) filenames = ['events'] + \ [imc.lower() for imc in imc_tables_formatted.keys()] + \ [imc.lower() + '_README' for imc in readmes.keys()] + \ ['fit_spectra_parameters', 'fit_spectra_parameters_README'] files = [event_table_formatted] + list( imc_tables_formatted.values()) + list(readmes.values()) + [ df_fit_spectra_formatted, fit_readme ] if output_format != 'csv': output_format = 'xlsx' for filename, df in dict(zip(filenames, files)).items(): filepath = os.path.join(outdir, filename + '.%s' % output_format) if os.path.exists(filepath): if 'README' in filename: continue else: mode = 'a' header = False else: mode = 'w' header = True append_file(files_created, 'Tables', filepath) if output_format == 'csv': df.to_csv(filepath, index=False, float_format=DEFAULT_FLOAT_FORMAT, na_rep=DEFAULT_NA_REP, mode=mode, header=header) else: df.to_excel(filepath, index=False, float_format=DEFAULT_FLOAT_FORMAT, na_rep=DEFAULT_NA_REP, mode=mode, header=header) if ('report' in pcommands and processing_done and len(pstreams)): logging.info('Creating diagnostic plots for event %s...' % event.id) plot_dir = os.path.join(event_dir, 'plots') if not os.path.isdir(plot_dir): os.makedirs(plot_dir) for stream in pstreams: summary_plots(stream, plot_dir, event) mapfile = draw_stations_map(pstreams, event, event_dir) plot_moveout(pstreams, event.latitude, event.longitude, file=os.path.join(event_dir, 'moveout_plot.png')) append_file(files_created, 'Station map', mapfile) append_file(files_created, 'Moveout plot', 'moveout_plot.png') logging.info('Creating diagnostic report for event %s...' % event.id) # Build the summary report? build_conf = config['build_report'] report_format = build_conf['format'] if report_format == 'latex': report_file, success = build_report_latex(pstreams, event_dir, event, config=config) else: report_file = '' success = False if os.path.isfile(report_file) and success: append_file(files_created, 'Summary report', report_file) if 'provenance' in pcommands and processing_done and len(pstreams): logging.info('Creating provenance table for event %s...' % event.id) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) provdata = workspace.getProvenance(event.id, labels=[process_tag]) if output_format == 'csv': csvfile = os.path.join(event_dir, 'provenance.csv') append_file(files_created, 'Provenance', csvfile) provdata.to_csv(csvfile) else: excelfile = os.path.join(event_dir, 'provenance.xlsx') append_file(files_created, 'Provenance', excelfile) provdata.to_excel(excelfile, index=False) if 'shakemap' in pcommands and processing_done and len(pstreams): logging.info('Creating shakemap table for event %s...' % event.id) shakemap_file, jsonfile = save_shakemap_amps(pstreams, event, event_dir) append_file(files_created, 'shakemap', shakemap_file) append_file(files_created, 'shakemap', jsonfile) if status and processing_done and len(pstreams): if status == 'short': index = 'Failure reason' col = ['Number of records'] elif status == 'long': index = 'Station ID' col = ['Failure reason'] elif status == 'net': index = 'Network' col = ['Number of passed records', 'Number of failed records'] status_info = pstreams.get_status(status) status_info.to_csv(os.path.join(event_dir, 'status.csv'), header=col, index_label=index) # since we don't know how many events users will be processing, # let's guard against memory issues by clearing out the big data # structures workspace.close() logging.info('Finishing event %s' % event.id) return workname
def _test_workspace(): eventid = "us1000778i" datafiles, event = read_data_dir("geonet", eventid, "*.V1A") tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLError) warnings.filterwarnings("ignore", category=FutureWarning) config = update_config( os.path.join(datadir, "config_min_freq_0p2.yml")) tfile = os.path.join(tdir, "test.hdf") raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label="raw") t2 = time.time() print("Adding %i streams took %.2f seconds" % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == "Events: 1 Stations: 3 Streams: 3" eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ["HSES", "THZ", "WTMC"] stations = workspace.getStations() assert sorted(stations) == ["HSES", "THZ", "WTMC"] # test retrieving event that doesn't exist with pytest.raises(KeyError): workspace.getEvent("foo") instream = None for stream in raw_streams: if stream[0].stats.station.lower() == "hses": instream = stream break if instream is None: raise ValueError("Instream should not be none.") outstream = workspace.getStreams(eventid, stations=["HSES"], labels=["raw"])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]["Label"] == "raw" assert label_summary.iloc[0]["Software"] == "gmprocess" sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, event, config=config) workspace.addStreams(event, processed_streams, "processed") idlist = workspace.getEventIds() assert idlist[0] == eventid outstream = workspace.getStreams(eventid, stations=["HSES"], labels=["processed"])[0] provenance = workspace.getProvenance(eventid, labels=["processed"]) first_row = pd.Series({ "Record": "NZ.HSES.--.HN1_us1000778i_processed", "Processing Step": "Remove Response", "Step Attribute": "input_units", "Attribute Value": "counts", }) last_row = pd.Series({ "Record": "NZ.WTMC.--.HNZ_us1000778i_processed", "Processing Step": "Lowpass Filter", "Step Attribute": "number_of_passes", "Attribute Value": 2, }) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == "hses": instream = stream break if instream is None: raise ValueError("Instream should not be none.") compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = "nz2018p115908" datafiles, event = read_data_dir("geonet", eventid, "*.V2A") raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label="foo") stations = workspace.getStations() eventids = workspace.getEventIds() assert eventids == ["us1000778i", "nz2018p115908"] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=["foo"])[0] assert instation == this_stream[0].stats.station usid = "us1000778i" inventory = workspace.getInventory(usid) workspace.close() codes = [ station.code for station in inventory.networks[0].stations ] assert sorted(set(codes)) == ["HSES", "THZ", "WPWS", "WTMC"] except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def _test_vs30_dist_metrics(): KNOWN_DISTANCES = { "epicentral": 5.1, "hypocentral": 10.2, "rupture": 2.21, "rupture_var": np.nan, "joyner_boore": 2.21, "joyner_boore_var": np.nan, "gc2_rx": 2.66, "gc2_ry": 3.49, "gc2_ry0": 0.00, "gc2_U": 34.34, "gc2_T": 2.66, } KNOWN_BAZ = 239.46 KNOWN_VS30 = 331.47 eventid = "ci38457511" datafiles, event = read_data_dir("fdsn", eventid, "*") datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) processed_streams = process_streams(raw_streams, event, config=config) rupture_file = get_rupture_file(datadir) grid_file = os.path.join(datadir, "test_grid.grd") config["metrics"]["vs30"] = { "vs30": { "file": grid_file, "column_header": "GlobalVs30", "readme_entry": "GlobalVs30", "units": "m/s", } } tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, "test.hdf") ws = StreamWorkspace(tfile) ws.addEvent(event) ws.addStreams(event, raw_streams, label="raw") ws.addStreams(event, processed_streams, label="processed") ws.calcMetrics(event.id, rupture_file=rupture_file, labels=["processed"], config=config) sta_sum = ws.getStreamMetrics(event.id, "CI", "CLC", "processed") for dist in sta_sum.distances: np.testing.assert_allclose(sta_sum.distances[dist], KNOWN_DISTANCES[dist], rtol=0.01) np.testing.assert_allclose(sta_sum._back_azimuth, KNOWN_BAZ, rtol=0.01) np.testing.assert_allclose(sta_sum._vs30["vs30"]["value"], KNOWN_VS30, rtol=0.01) event_df, imc_tables, readme_tables = ws.getTables("processed") ws.close() check_cols = set([ "EpicentralDistance", "HypocentralDistance", "RuptureDistance", "RuptureDistanceVar", "JoynerBooreDistance", "JoynerBooreDistanceVar", "GC2_rx", "GC2_ry", "GC2_ry0", "GC2_U", "GC2_T", "GlobalVs30", "BackAzimuth", ]) assert check_cols.issubset(set(readme_tables["Z"]["Column header"])) assert check_cols.issubset(set(imc_tables["Z"].columns)) except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_lowpass_max(): datapath = os.path.join("data", "testdata", "lowpass_max") datadir = pkg_resources.resource_filename("gmprocess", datapath) sc = StreamCollection.from_directory(datadir) sc.describe() conf = get_config() update = { "processing": [ { "detrend": { "detrending_method": "demean" } }, { "remove_response": { "f1": 0.001, "f2": 0.005, "f3": None, "f4": None, "water_level": 60, } }, # {'detrend': {'detrending_method': 'linear'}}, # {'detrend': {'detrending_method': 'demean'}}, { "get_corner_frequencies": { "constant": { "highpass": 0.08, "lowpass": 20.0 }, "method": "constant", "snr": { "same_horiz": True }, } }, { "lowpass_max_frequency": { "fn_fac": 0.9 } }, ] } update_dict(conf, update) update = { "windows": { "signal_end": { "method": "model", "vmin": 1.0, "floor": 120, "model": "AS16", "epsilon": 2.0, }, "window_checks": { "do_check": False, "min_noise_duration": 1.0, "min_signal_duration": 1.0, }, } } update_dict(conf, update) edict = { "id": "ci38038071", "time": UTCDateTime("2018-08-30 02:35:36"), "lat": 34.136, "lon": -117.775, "depth": 5.5, "magnitude": 4.4, } event = get_event_object(edict) test = process_streams(sc, event, conf) for st in test: for tr in st: freq_dict = tr.getParameter("corner_frequencies") np.testing.assert_allclose(freq_dict["lowpass"], 18.0)
def _test_vs30_dist_metrics(): KNOWN_DISTANCES = { 'epicentral': 5.1, 'hypocentral': 10.2, 'rupture': 2.21, 'rupture_var': np.nan, 'joyner_boore': 2.21, 'joyner_boore_var': np.nan, 'gc2_rx': 2.66, 'gc2_ry': 3.49, 'gc2_ry0': 0.00, 'gc2_U': 34.34, 'gc2_T': 2.66 } KNOWN_BAZ = 239.46 KNOWN_VS30 = 331.47 eventid = 'ci38457511' datafiles, event = read_data_dir('fdsn', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml')) processed_streams = process_streams(raw_streams, event, config=config) rupture_file = get_rupture_file(datadir) grid_file = os.path.join(datadir, 'test_grid.grd') config['metrics']['vs30'] = { 'vs30': { 'file': grid_file, 'column_header': 'GlobalVs30', 'readme_entry': 'GlobalVs30', 'units': 'm/s' } } tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') ws = StreamWorkspace(tfile) ws.addEvent(event) ws.addStreams(event, raw_streams, label='raw') ws.addStreams(event, processed_streams, label='processed') ws.calcMetrics(event.id, rupture_file=rupture_file, labels=['processed'], config=config) sta_sum = ws.getStreamMetrics(event.id, 'CI', 'CLC', 'processed') for dist in sta_sum.distances: np.testing.assert_allclose(sta_sum.distances[dist], KNOWN_DISTANCES[dist], rtol=0.01) np.testing.assert_allclose(sta_sum._back_azimuth, KNOWN_BAZ, rtol=0.01) np.testing.assert_allclose(sta_sum._vs30['vs30']['value'], KNOWN_VS30, rtol=0.01) event_df, imc_tables, readme_tables = ws.getTables('processed') ws.close() check_cols = set([ 'EpicentralDistance', 'HypocentralDistance', 'RuptureDistance', 'RuptureDistanceVar', 'JoynerBooreDistance', 'JoynerBooreDistanceVar', 'GC2_rx', 'GC2_ry', 'GC2_ry0', 'GC2_U', 'GC2_T', 'GlobalVs30', 'BackAzimuth' ]) assert check_cols.issubset(set(readme_tables['Z']['Column header'])) assert check_cols.issubset(set(imc_tables['Z'].columns)) except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def _test_workspace(): eventid = 'us1000778i' datafiles, event = read_data_dir('geonet', eventid, '*.V1A') tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLLoadWarning) warnings.filterwarnings("ignore", category=FutureWarning) config = update_config( os.path.join(datadir, 'config_min_freq_0p2.yml')) tfile = os.path.join(tdir, 'test.hdf') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label='raw') t2 = time.time() print('Adding %i streams took %.2f seconds' % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == 'Events: 1 Stations: 3 Streams: 3' eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ['HSES', 'THZ', 'WTMC'] stations = workspace.getStations(eventid=eventid) assert sorted(stations) == ['HSES', 'THZ', 'WTMC'] # test retrieving event that doesn't exist with pytest.raises(KeyError): workspace.getEvent('foo') instream = None for stream in raw_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: raise ValueError('Instream should not be none.') outstream = workspace.getStreams(eventid, stations=['HSES'], labels=['raw'])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]['Label'] == 'raw' assert label_summary.iloc[0]['Software'] == 'gmprocess' sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, event, config=config) workspace.addStreams(event, processed_streams, 'processed') idlist = workspace.getEventIds() assert idlist[0] == eventid outstream = workspace.getStreams(eventid, stations=['HSES'], labels=['processed'])[0] provenance = workspace.getProvenance(eventid, labels=['processed']) first_row = pd.Series({ 'Record': 'NZ.HSES.--.HN1_us1000778i_processed', 'Processing Step': 'Remove Response', 'Step Attribute': 'input_units', 'Attribute Value': 'counts' }) last_row = pd.Series({ 'Record': 'NZ.WTMC.--.HNZ_us1000778i_processed', 'Processing Step': 'Lowpass Filter', 'Step Attribute': 'number_of_passes', 'Attribute Value': 2 }) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: raise ValueError('Instream should not be none.') compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = 'nz2018p115908' datafiles, event = read_data_dir('geonet', eventid, '*.V2A') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label='foo') stations = workspace.getStations(eventid) eventids = workspace.getEventIds() assert eventids == ['us1000778i', 'nz2018p115908'] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=['foo'])[0] assert instation == this_stream[0].stats.station usid = 'us1000778i' inventory = workspace.getInventory(usid) workspace.close() codes = [ station.code for station in inventory.networks[0].stations ] assert sorted(set(codes)) == ['HSES', 'THZ', 'WPWS', 'WTMC'] except Exception as e: raise (e) finally: shutil.rmtree(tdir)