def _test_colocated(): eventid = 'ci38445975' datafiles, event = read_data_dir('fdsn', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config_file = os.path.join(datadir, 'test_config.yml') with open(config_file, 'r', encoding='utf-8') as f: config = yaml.load(f, Loader=yaml.FullLoader) processed_streams = process_streams(raw_streams, event, config=config) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') ws = StreamWorkspace(tfile) ws.addEvent(event) ws.addStreams(event, raw_streams, label='raw') ws.addStreams(event, processed_streams, label='processed') ws.calcMetrics(eventid, labels=['processed'], config=config) stasum = ws.getStreamMetrics(eventid, 'CI', 'MIKB', 'processed') np.testing.assert_allclose( stasum.get_pgm('duration', 'geometric_mean'), 38.94480068) ws.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_metrics2(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml')) config['metrics']['output_imts'].append('Arias') config['metrics']['output_imcs'].append('arithmetic_mean') # turn off sta/lta check and snr checks newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label='processed') workspace.calcMetrics(event.id, labels=['processed']) etable, imc_tables1, readmes1 = workspace.getTables('processed') assert 'ARITHMETIC_MEAN' not in imc_tables1 assert 'ARITHMETIC_MEAN' not in readmes1 del workspace.dataset.auxiliary_data.WaveFormMetrics del workspace.dataset.auxiliary_data.StationMetrics workspace.calcMetrics(event.id, labels=['processed'], config=config) etable2, imc_tables2, readmes2 = workspace.getTables('processed') assert 'ARITHMETIC_MEAN' in imc_tables2 assert 'ARITHMETIC_MEAN' in readmes2 assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN'] testarray = readmes2['ARITHMETIC_MEAN']['Column header'].to_numpy() assert 'ARIAS' in testarray except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_metrics2(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = get_config() config['metrics']['output_imts'].append('Arias') config['metrics']['output_imcs'].append('arithmetic_mean') # turn off sta/lta check and snr checks newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label='processed') workspace.calcMetrics(event.id, labels=['processed']) etable, imc_tables1 = workspace.getTables('processed') etable2, imc_tables2 = workspace.getTables('processed', config=config) assert 'ARITHMETIC_MEAN' not in imc_tables1 assert 'ARITHMETIC_MEAN' in imc_tables2 assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN'] except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_stream_params(): eventid = 'us1000778i' datafiles, event = read_data_dir( 'geonet', eventid, '20161113_110259_WTMC_20.V1A' ) tdir = tempfile.mkdtemp() streams = [] try: streams += read_data(datafiles[0]) statsdict = {'name': 'Fred', 'age': 34} streams[0].setStreamParam('stats', statsdict) tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, streams, label='stats') outstreams = workspace.getStreams(event.id, labels=['stats']) cmpdict = outstreams[0].getStreamParam('stats') assert cmpdict == statsdict workspace.close() except Exception as e: raise(e) finally: shutil.rmtree(tdir)
def _test_colocated(): eventid = "ci38445975" datafiles, event = read_data_dir("fdsn", eventid, "*") datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config_file = os.path.join(datadir, "test_config.yml") with open(config_file, "r", encoding="utf-8") as f: yaml = YAML() yaml.preserve_quotes = True config = yaml.load(f) processed_streams = process_streams(raw_streams, event, config=config) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, "test.hdf") ws = StreamWorkspace(tfile) ws.addEvent(event) ws.addStreams(event, raw_streams, label="raw") ws.addStreams(event, processed_streams, label="processed") ws.calcMetrics(eventid, labels=["processed"], config=config) stasum = ws.getStreamMetrics(eventid, "CI", "MIKB", "processed") np.testing.assert_allclose( stasum.get_pgm("duration", "geometric_mean"), 38.94480068) ws.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_raw(): msg = "dataset.value has been deprecated. Use dataset[()] instead." with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLLoadWarning) warnings.filterwarnings("ignore", category=FutureWarning) raw_streams, inv = request_raw_waveforms( fdsn_client='IRIS', org_time='2018-11-30T17-29-29.330Z', lat=61.3464, lon=-149.9552, before_time=120, after_time=120, dist_min=0, dist_max=0.135, networks='*', stations='*', channels=['?N?'], access_restricted=False) tdir = tempfile.mkdtemp() try: edict = get_event_dict('ak20419010') origin = get_event_object('ak20419010') tfile = os.path.join(tdir, 'test.hdf') sc1 = StreamCollection(raw_streams) workspace = StreamWorkspace(tfile) workspace.addStreams(origin, sc1, label='raw') tstreams = workspace.getStreams(edict['id']) assert len(tstreams) == 0 imclist = [ 'greater_of_two_horizontals', 'channels', 'rotd50', 'rotd100' ] imtlist = ['sa1.0', 'PGA', 'pgv', 'fas2.0', 'arias'] # this shouldn't do anything workspace.setStreamMetrics(edict['id'], imclist=imclist, imtlist=imtlist) processed_streams = process_streams(sc1, edict) workspace.addStreams(origin, processed_streams, 'processed') labels = workspace.getLabels() tags = workspace.getStreamTags(edict['id']) out_raw_streams = workspace.getStreams(edict['id'], get_raw=True) assert len(out_raw_streams) == len(sc1) # this should only work on processed data workspace.setStreamMetrics(edict['id'], imclist=imclist, imtlist=imtlist) df = workspace.summarizeLabels() x = 1 except Exception as e: raise e finally: shutil.rmtree(tdir)
def download(event, event_dir, config, directory): """Download data or load data from local directory, turn into Streams. Args: event (ScalarEvent): Object containing basic event hypocenter, origin time, magnitude. event_dir (str): Path where raw directory should be created (if downloading). config (dict): Dictionary with gmprocess configuration information. directory (str): Path where raw data already exists. Returns: tuple: - StreamWorkspace: Contains the event and raw streams. - str: Name of workspace HDF file. - StreamCollection: Raw data StationStreams. """ # generate the raw directory rawdir = get_rawdir(event_dir) if directory is None: tcollection, terrors = fetch_data( event.time.datetime, event.latitude, event.longitude, event.depth_km, event.magnitude, config=config, rawdir=rawdir) # create an event.json file in each event directory, # in case user is simply downloading for now create_event_file(event, event_dir) else: streams, bad, terrors = directory_to_streams(directory) tcollection = StreamCollection(streams) # plot the raw waveforms with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) pngfiles = glob.glob(os.path.join(rawdir, '*.png')) if not len(pngfiles): plot_raw(rawdir, tcollection, event) # create the workspace file and put the unprocessed waveforms in it workname = os.path.join(event_dir, 'workspace.hdf') if os.path.isfile(workname): os.remove(workname) workspace = StreamWorkspace(workname) workspace.addEvent(event) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) workspace.addStreams(event, tcollection, label='unprocessed') return (workspace, workname, tcollection)
def test_metrics(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = get_config() # turn off sta/lta check and snr checks newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label='processed') stream1 = processed_streams[0] stream2 = processed_streams[1] summary1 = StationSummary.from_config(stream1) summary2 = StationSummary.from_config(stream2) workspace.setStreamMetrics(event.id, 'processed', summary1) workspace.setStreamMetrics(event.id, 'processed', summary2) workspace.calcStationMetrics(event.id, labels=['processed']) summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.station, 'processed') s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC']) s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC']) array1 = s1_df_in['Result'].as_matrix() array2 = s1_df_out['Result'].as_matrix() np.testing.assert_almost_equal(array1, array2, decimal=4) df = workspace.getMetricsTable(event.id) cmp_series = { 'GREATER_OF_TWO_HORIZONTALS': 0.6787, 'H1': 0.3869, 'H2': 0.6787, 'Z': 0.7663 } pga_dict = df.iloc[0]['PGA'].to_dict() for key, value in pga_dict.items(): value2 = cmp_series[key] np.testing.assert_almost_equal(value, value2, decimal=4) workspace.close() except Exception as e: raise(e) finally: shutil.rmtree(tdir)
def test_metrics(): eventid = "usb000syza" datafiles, event = read_data_dir("knet", eventid, "*") datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) # turn off sta/lta check and snr checks # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) # processed_streams = process_streams(raw_streams, event, config=newconfig) newconfig = config.copy() newconfig["processing"].append( {"NNet_QA": { "acceptance_threshold": 0.5, "model_name": "CantWell" }}) processed_streams = process_streams(raw_streams.copy(), event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, "test.hdf") workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, raw_streams, label="raw") workspace.addStreams(event, processed_streams, label="processed") stream1 = raw_streams[0] # Get metrics from station summary for raw streams summary1 = StationSummary.from_config(stream1) s1_df_in = summary1.pgms.sort_values(["IMT", "IMC"]) array1 = s1_df_in["Result"].to_numpy() # Compare to metrics from getStreamMetrics for raw streams workspace.calcMetrics(eventid, labels=["raw"]) summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.network, stream1[0].stats.station, "raw") s1_df_out = summary1_a.pgms.sort_values(["IMT", "IMC"]) array2 = s1_df_out["Result"].to_numpy() np.testing.assert_allclose(array1, array2, atol=1e-6, rtol=1e-6) workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_metrics(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml')) # turn off sta/lta check and snr checks # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) # processed_streams = process_streams(raw_streams, event, config=newconfig) newconfig = config.copy() newconfig['processing'].append( {'NNet_QA': { 'acceptance_threshold': 0.5, 'model_name': 'CantWell' }}) processed_streams = process_streams(raw_streams.copy(), event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, raw_streams, label='raw') workspace.addStreams(event, processed_streams, label='processed') stream1 = raw_streams[0] # Get metrics from station summary for raw streams summary1 = StationSummary.from_config(stream1) s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC']) array1 = s1_df_in['Result'].to_numpy() # Compare to metrics from getStreamMetrics for raw streams workspace.calcMetrics(eventid, labels=['raw']) summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.network, stream1[0].stats.station, 'raw') s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC']) array2 = s1_df_out['Result'].to_numpy() np.testing.assert_allclose(array1, array2, atol=1e-6, rtol=1e-6) workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_metrics(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = get_config() # turn off sta/lta check and snr checks newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label='processed') stream1 = processed_streams[0] stream2 = processed_streams[1] summary1 = StationSummary.from_config(stream1) summary2 = StationSummary.from_config(stream2) workspace.setStreamMetrics(event.id, 'processed', summary1) workspace.setStreamMetrics(event.id, 'processed', summary2) summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.station, 'processed') s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC']) s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC']) array1 = s1_df_in['Result'].as_matrix() array2 = s1_df_out['Result'].as_matrix() np.testing.assert_almost_equal(array1, array2, decimal=4) df = workspace.getMetricsTable(event.id) cmp_series = {'GREATER_OF_TWO_HORIZONTALS': 0.6787, 'HN1': 0.3869, 'HN2': 0.6787, 'HNZ': 0.7663} pga_dict = df.iloc[0]['PGA'].to_dict() for key, value in pga_dict.items(): value2 = cmp_series[key] np.testing.assert_almost_equal(value, value2, decimal=4) workspace.close() except Exception as e: raise(e) finally: shutil.rmtree(tdir)
def test_metrics(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = get_config() # turn off sta/lta check and snr checks # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) # processed_streams = process_streams(raw_streams, event, config=newconfig) newconfig = config.copy() newconfig['processing'].append( {'NNet_QA': { 'acceptance_threshold': 0.5, 'model_name': 'CantWell' }}) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, raw_streams, label='raw') workspace.addStreams(event, processed_streams, label='processed') stream1 = raw_streams[0] summary1 = StationSummary.from_config(stream1) s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC']) array1 = s1_df_in['Result'].as_matrix() workspace.calcStreamMetrics(eventid, labels=['raw']) workspace.calcStationMetrics(event.id, labels=['raw']) pstreams2 = workspace.getStreams(event.id, labels=['processed']) assert pstreams2[0].getStreamParamKeys() == ['nnet_qa'] summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.network, stream1[0].stats.station, 'raw') s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC']) array2 = s1_df_out['Result'].as_matrix() np.testing.assert_almost_equal(array1, array2, decimal=4) workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def _test_stream_params(): eventid = "us1000778i" datafiles, event = read_data_dir("geonet", eventid, "20161113_110259_WTMC_20.V1A") tdir = tempfile.mkdtemp() streams = [] try: streams += read_data(datafiles[0]) statsdict = {"name": "Fred", "age": 34} streams[0].setStreamParam("stats", statsdict) tfile = os.path.join(tdir, "test.hdf") workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, streams, label="stats") outstreams = workspace.getStreams(event.id, labels=["stats"]) cmpdict = outstreams[0].getStreamParam("stats") assert cmpdict == statsdict workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def _test_metrics2(): eventid = "usb000syza" datafiles, event = read_data_dir("knet", eventid, "*") datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) config["metrics"]["output_imts"].append("Arias") config["metrics"]["output_imcs"].append("arithmetic_mean") # Adjust checks so that streams pass checks for this test newconfig = drop_processing(config, ["check_sta_lta"]) csnr = [s for s in newconfig["processing"] if "compute_snr" in s.keys()][0] csnr["compute_snr"]["check"]["threshold"] = -10.0 processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, "test.hdf") workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label="processed") workspace.calcMetrics(event.id, labels=["processed"]) etable, imc_tables1, readmes1 = workspace.getTables("processed") assert "ARITHMETIC_MEAN" not in imc_tables1 assert "ARITHMETIC_MEAN" not in readmes1 del workspace.dataset.auxiliary_data.WaveFormMetrics del workspace.dataset.auxiliary_data.StationMetrics workspace.calcMetrics(event.id, labels=["processed"], config=config) etable2, imc_tables2, readmes2 = workspace.getTables("processed") assert "ARITHMETIC_MEAN" in imc_tables2 assert "ARITHMETIC_MEAN" in readmes2 assert "ARIAS" in imc_tables2["ARITHMETIC_MEAN"] testarray = readmes2["ARITHMETIC_MEAN"]["Column header"].to_numpy() assert "ARIAS" in testarray workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_stream_params(): eventid = 'us1000778i' datafiles, event = read_data_dir('geonet', eventid, '20161113_110259_WTMC_20.V1A') tdir = tempfile.mkdtemp() streams = [] try: streams += read_data(datafiles[0]) statsdict = {'name': 'Fred', 'age': 34} streams[0].setStreamParam('stats', statsdict) tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, streams, label='stats') outstreams = workspace.getStreams(event.id, labels=['stats']) cmpdict = outstreams[0].getStreamParam('stats') assert cmpdict == statsdict workspace.close() except Exception as e: raise(e) finally: shutil.rmtree(tdir)
def _test_vs30_dist_metrics(): KNOWN_DISTANCES = { 'epicentral': 5.1, 'hypocentral': 10.2, 'rupture': 2.21, 'rupture_var': np.nan, 'joyner_boore': 2.21, 'joyner_boore_var': np.nan, 'gc2_rx': 2.66, 'gc2_ry': 3.49, 'gc2_ry0': 0.00, 'gc2_U': 34.34, 'gc2_T': 2.66 } KNOWN_BAZ = 239.46 KNOWN_VS30 = 331.47 eventid = 'ci38457511' datafiles, event = read_data_dir('fdsn', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, 'config_min_freq_0p2.yml')) processed_streams = process_streams(raw_streams, event, config=config) rupture_file = get_rupture_file(datadir) grid_file = os.path.join(datadir, 'test_grid.grd') config['metrics']['vs30'] = { 'vs30': { 'file': grid_file, 'column_header': 'GlobalVs30', 'readme_entry': 'GlobalVs30', 'units': 'm/s' } } tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') ws = StreamWorkspace(tfile) ws.addEvent(event) ws.addStreams(event, raw_streams, label='raw') ws.addStreams(event, processed_streams, label='processed') ws.calcMetrics(event.id, rupture_file=rupture_file, labels=['processed'], config=config) sta_sum = ws.getStreamMetrics(event.id, 'CI', 'CLC', 'processed') for dist in sta_sum.distances: np.testing.assert_allclose(sta_sum.distances[dist], KNOWN_DISTANCES[dist], rtol=0.01) np.testing.assert_allclose(sta_sum._back_azimuth, KNOWN_BAZ, rtol=0.01) np.testing.assert_allclose(sta_sum._vs30['vs30']['value'], KNOWN_VS30, rtol=0.01) event_df, imc_tables, readme_tables = ws.getTables('processed') ws.close() check_cols = set([ 'EpicentralDistance', 'HypocentralDistance', 'RuptureDistance', 'RuptureDistanceVar', 'JoynerBooreDistance', 'JoynerBooreDistanceVar', 'GC2_rx', 'GC2_ry', 'GC2_ry0', 'GC2_U', 'GC2_T', 'GlobalVs30', 'BackAzimuth' ]) assert check_cols.issubset(set(readme_tables['Z']['Column header'])) assert check_cols.issubset(set(imc_tables['Z'].columns)) except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def assemble(event, config, directory, gmprocess_version): """Download data or load data from local directory, turn into Streams. Args: event (ScalarEvent): Object containing basic event hypocenter, origin time, magnitude. config (dict): Dictionary with gmprocess configuration information. directory (str): Path where data already exists. Must be organized in a 'raw' directory, within directories with names as the event ids. For example, if `directory` is 'proj_dir' and you have data for event id 'abc123' then the raw data to be read in should be located in `proj_dir/abc123/raw/`. gmprocess_version (str): Software version for gmprocess. Returns: tuple: - StreamWorkspace: Contains the event and raw streams. - str: Name of workspace HDF file. - StreamCollection: Raw data StationStreams. - str: Path to the rupture file. """ # Make raw directory in_event_dir = os.path.join(directory, event.id) in_raw_dir = get_rawdir(in_event_dir) logging.debug(f"in_raw_dir: {in_raw_dir}") streams, unprocessed_files, unprocessed_file_errors = directory_to_streams( in_raw_dir, config=config) # Write errors to a csv file failures_file = Path(in_raw_dir) / "read_failures.csv" colnames = ["File", "Failure"] with open(failures_file, "w", newline="") as f: writer = csv.writer(f, delimiter=",", quoting=csv.QUOTE_MINIMAL) writer.writerow(colnames) for ufile, uerror in zip(unprocessed_files, unprocessed_file_errors): writer.writerow([ufile, uerror]) logging.debug("streams:") logging.debug(streams) if config["read"]["use_streamcollection"]: stream_array = StreamCollection(streams, **config["duplicate"]) else: stream_array = StreamArray(streams) logging.info("stream_array.describe_string():") logging.info(stream_array.describe_string()) # Create the workspace file and put the unprocessed waveforms in it workname = os.path.join(in_event_dir, WORKSPACE_NAME) # Remove any existing workspace file if os.path.isfile(workname): os.remove(workname) workspace = StreamWorkspace(workname) workspace.addEvent(event) logging.debug("workspace.dataset.events:") logging.debug(workspace.dataset.events) workspace.addGmprocessVersion(gmprocess_version) workspace.addConfig() workspace.addStreams(event, stream_array, label="unprocessed", gmprocess_version=gmprocess_version) logging.debug("workspace.dataset.waveforms.list():") logging.debug(workspace.dataset.waveforms.list()) logging.debug("workspace.dataset.config") return workspace
def test_workspace(): eventid = 'us1000778i' datafiles, event = read_data_dir('geonet', eventid, '*.V1A') tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLLoadWarning) warnings.filterwarnings("ignore", category=FutureWarning) config = get_config() tfile = os.path.join(tdir, 'test.hdf') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label='raw') t2 = time.time() print('Adding %i streams took %.2f seconds' % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == 'Events: 1 Stations: 3 Streams: 3' eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ['hses', 'thz', 'wtmc'] stations = workspace.getStations(eventid=eventid) assert sorted(stations) == ['hses', 'thz', 'wtmc'] # test retrieving tags for an event that doesn't exist try: workspace.getStreamTags('foo') except KeyError: assert 1 == 1 # test retrieving event that doesn't exist try: workspace.getEvent('foo') except KeyError: assert 1 == 1 instream = None for stream in raw_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: assert 1 == 2 outstream = workspace.getStreams(eventid, stations=['hses'], labels=['raw'])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]['Label'] == 'raw' assert label_summary.iloc[0]['Software'] == 'gmprocess' sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, event, config=config) workspace.addStreams(event, processed_streams, 'processed') idlist = workspace.getEventIds() assert idlist[0] == eventid event_tags = workspace.getStreamTags(eventid) assert sorted(event_tags) == ['hses_processed', 'hses_raw', 'thz_processed', 'thz_raw', 'wtmc_processed', 'wtmc_raw'] outstream = workspace.getStreams(eventid, stations=['hses'], labels=['processed'])[0] provenance = workspace.getProvenance(eventid, labels=['processed']) first_row = pd.Series({'Record': 'NZ.HSES.HN1', 'Processing Step': 'Remove Response', 'Step Attribute': 'input_units', 'Attribute Value': 'counts'}) last_row = pd.Series({'Record': 'NZ.WTMC.HNZ', 'Processing Step': 'Lowpass Filter', 'Step Attribute': 'number_of_passes', 'Attribute Value': 2}) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: assert 1 == 2 compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = 'nz2018p115908' datafiles, event = read_data_dir('geonet', eventid, '*.V2A') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label='foo') stations = workspace.getStations(eventid) eventids = workspace.getEventIds() assert eventids == ['us1000778i', 'nz2018p115908'] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=['foo'])[0] assert instation == this_stream[0].stats.station usid = 'us1000778i' inventory = workspace.getInventory(usid) codes = [station.code for station in inventory.networks[0].stations] assert sorted(codes) == ['HSES', 'THZ', 'WPWS', 'WTMC'] except Exception as e: raise(e) finally: shutil.rmtree(tdir)
def _test_workspace(): eventid = "us1000778i" datafiles, event = read_data_dir("geonet", eventid, "*.V1A") tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLError) warnings.filterwarnings("ignore", category=FutureWarning) config = update_config( os.path.join(datadir, "config_min_freq_0p2.yml")) tfile = os.path.join(tdir, "test.hdf") raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label="raw") t2 = time.time() print("Adding %i streams took %.2f seconds" % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == "Events: 1 Stations: 3 Streams: 3" eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ["HSES", "THZ", "WTMC"] stations = workspace.getStations() assert sorted(stations) == ["HSES", "THZ", "WTMC"] # test retrieving event that doesn't exist with pytest.raises(KeyError): workspace.getEvent("foo") instream = None for stream in raw_streams: if stream[0].stats.station.lower() == "hses": instream = stream break if instream is None: raise ValueError("Instream should not be none.") outstream = workspace.getStreams(eventid, stations=["HSES"], labels=["raw"])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]["Label"] == "raw" assert label_summary.iloc[0]["Software"] == "gmprocess" sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, event, config=config) workspace.addStreams(event, processed_streams, "processed") idlist = workspace.getEventIds() assert idlist[0] == eventid outstream = workspace.getStreams(eventid, stations=["HSES"], labels=["processed"])[0] provenance = workspace.getProvenance(eventid, labels=["processed"]) first_row = pd.Series({ "Record": "NZ.HSES.--.HN1_us1000778i_processed", "Processing Step": "Remove Response", "Step Attribute": "input_units", "Attribute Value": "counts", }) last_row = pd.Series({ "Record": "NZ.WTMC.--.HNZ_us1000778i_processed", "Processing Step": "Lowpass Filter", "Step Attribute": "number_of_passes", "Attribute Value": 2, }) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == "hses": instream = stream break if instream is None: raise ValueError("Instream should not be none.") compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = "nz2018p115908" datafiles, event = read_data_dir("geonet", eventid, "*.V2A") raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label="foo") stations = workspace.getStations() eventids = workspace.getEventIds() assert eventids == ["us1000778i", "nz2018p115908"] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=["foo"])[0] assert instation == this_stream[0].stats.station usid = "us1000778i" inventory = workspace.getInventory(usid) workspace.close() codes = [ station.code for station in inventory.networks[0].stations ] assert sorted(set(codes)) == ["HSES", "THZ", "WPWS", "WTMC"] except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_workspace(): eventid = 'us1000778i' datafiles, origin = read_data_dir('geonet', eventid, '*.V1A') event = get_event_object(origin) tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLLoadWarning) warnings.filterwarnings("ignore", category=FutureWarning) config = get_config() tfile = os.path.join(tdir, 'test.hdf') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label='raw') t2 = time.time() print('Adding %i streams took %.2f seconds' % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == 'Events: 1 Stations: 3 Streams: 3' eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ['hses', 'thz', 'wtmc'] stations = workspace.getStations(eventid=eventid) assert sorted(stations) == ['hses', 'thz', 'wtmc'] # test retrieving tags for an event that doesn't exist try: workspace.getStreamTags('foo') except KeyError: assert 1 == 1 # test retrieving event that doesn't exist try: workspace.getEvent('foo') except KeyError: assert 1 == 1 instream = None for stream in raw_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: assert 1 == 2 outstream = workspace.getStreams(eventid, stations=['hses'], labels=['raw'])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]['Label'] == 'raw' assert label_summary.iloc[0]['Software'] == 'gmprocess' sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, origin, config=config) workspace.addStreams(event, processed_streams, 'processed') idlist = workspace.getEventIds() assert idlist[0] == eventid event_tags = workspace.getStreamTags(eventid) assert sorted(event_tags) == [ 'hses_processed', 'hses_raw', 'thz_processed', 'thz_raw', 'wtmc_processed', 'wtmc_raw' ] outstream = workspace.getStreams(eventid, stations=['hses'], labels=['processed'])[0] provenance = workspace.getProvenance(eventid, labels=['processed']) first_row = pd.Series({ 'Record': 'NZ.HSES.HN1', 'Processing Step': 'Remove Response', 'Step Attribute': 'input_units', 'Attribute Value': 'counts' }) last_row = pd.Series({ 'Record': 'NZ.WTMC.HNZ', 'Processing Step': 'Detrend', 'Step Attribute': 'detrending_method', 'Attribute Value': 'baseline_sixth_order' }) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: assert 1 == 2 compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = 'nz2018p115908' datafiles, origin = read_data_dir('geonet', eventid, '*.V2A') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) event = get_event_object(origin) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label='foo') stations = workspace.getStations(eventid) eventids = workspace.getEventIds() assert eventids == ['us1000778i', 'nz2018p115908'] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=['foo'])[0] assert instation == this_stream[0].stats.station # set and retrieve waveform metrics in the file imclist = [ 'greater_of_two_horizontals', 'channels', 'rotd50', 'rotd100' ] imtlist = ['sa1.0', 'PGA', 'pgv', 'fas2.0', 'arias'] usid = 'us1000778i' tags = workspace.getStreamTags(usid) workspace.setStreamMetrics(eventid, labels=['foo'], imclist=imclist, imtlist=imtlist) summary = workspace.getStreamMetrics(eventid, instation, 'foo') summary_series = summary.toSeries()['ARIAS'] cmpseries = pd.Series({ 'GEOMETRIC_MEAN': np.NaN, 'GREATER_OF_TWO_HORIZONTALS': 0.0005, 'HN1': 0.0001, 'HN2': 0.0005, 'HNZ': 0.0000, 'ROTD100.0': 0.0005, 'ROTD50.0': 0.0003 }) assert cmpseries.equals(summary_series) workspace.setStreamMetrics(usid, labels=['processed']) df = workspace.getMetricsTable(usid, labels=['processed']) cmpdict = { 'GREATER_OF_TWO_HORIZONTALS': [26.8906, 4.9415, 94.6646], 'HN1': [24.5105, 4.9415, 94.6646], 'HN2': [26.8906, 4.0758, 86.7877], 'HNZ': [16.0941, 2.5401, 136.7054] } cmpframe = pd.DataFrame(cmpdict) assert df['PGA'].equals(cmpframe) inventory = workspace.getInventory(usid) codes = [ station.code for station in inventory.networks[0].stations ] assert sorted(codes) == ['HSES', 'THZ', 'WPWS', 'WTMC'] except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def _test_vs30_dist_metrics(): KNOWN_DISTANCES = { "epicentral": 5.1, "hypocentral": 10.2, "rupture": 2.21, "rupture_var": np.nan, "joyner_boore": 2.21, "joyner_boore_var": np.nan, "gc2_rx": 2.66, "gc2_ry": 3.49, "gc2_ry0": 0.00, "gc2_U": 34.34, "gc2_T": 2.66, } KNOWN_BAZ = 239.46 KNOWN_VS30 = 331.47 eventid = "ci38457511" datafiles, event = read_data_dir("fdsn", eventid, "*") datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = update_config(os.path.join(datadir, "config_min_freq_0p2.yml")) processed_streams = process_streams(raw_streams, event, config=config) rupture_file = get_rupture_file(datadir) grid_file = os.path.join(datadir, "test_grid.grd") config["metrics"]["vs30"] = { "vs30": { "file": grid_file, "column_header": "GlobalVs30", "readme_entry": "GlobalVs30", "units": "m/s", } } tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, "test.hdf") ws = StreamWorkspace(tfile) ws.addEvent(event) ws.addStreams(event, raw_streams, label="raw") ws.addStreams(event, processed_streams, label="processed") ws.calcMetrics(event.id, rupture_file=rupture_file, labels=["processed"], config=config) sta_sum = ws.getStreamMetrics(event.id, "CI", "CLC", "processed") for dist in sta_sum.distances: np.testing.assert_allclose(sta_sum.distances[dist], KNOWN_DISTANCES[dist], rtol=0.01) np.testing.assert_allclose(sta_sum._back_azimuth, KNOWN_BAZ, rtol=0.01) np.testing.assert_allclose(sta_sum._vs30["vs30"]["value"], KNOWN_VS30, rtol=0.01) event_df, imc_tables, readme_tables = ws.getTables("processed") ws.close() check_cols = set([ "EpicentralDistance", "HypocentralDistance", "RuptureDistance", "RuptureDistanceVar", "JoynerBooreDistance", "JoynerBooreDistanceVar", "GC2_rx", "GC2_ry", "GC2_ry0", "GC2_U", "GC2_T", "GlobalVs30", "BackAzimuth", ]) assert check_cols.issubset(set(readme_tables["Z"]["Column header"])) assert check_cols.issubset(set(imc_tables["Z"].columns)) except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def download(event, event_dir, config, directory): """Download data or load data from local directory, turn into Streams. Args: event (ScalarEvent): Object containing basic event hypocenter, origin time, magnitude. event_dir (str): Path where raw directory should be created (if downloading). config (dict): Dictionary with gmprocess configuration information. directory (str): Path where data already exists. Must be organized in a 'raw' directory, within directories with names as the event ids. For example, if `directory` is 'proj_dir' and you have data for event id 'abc123' then the raw data to be read in should be located in `proj_dir/abc123/raw/`. Returns: tuple: - StreamWorkspace: Contains the event and raw streams. - str: Name of workspace HDF file. - StreamCollection: Raw data StationStreams. """ # Make raw directory rawdir = get_rawdir(event_dir) if directory is None: tcollection, terrors = fetch_data(event.time.datetime, event.latitude, event.longitude, event.depth_km, event.magnitude, config=config, rawdir=rawdir) # create an event.json file in each event directory, # in case user is simply downloading for now create_event_file(event, event_dir) else: # Make raw directory in_event_dir = os.path.join(directory, event.id) in_raw_dir = get_rawdir(in_event_dir) streams, bad, terrors = directory_to_streams(in_raw_dir) tcollection = StreamCollection(streams, **config['duplicate']) create_event_file(event, event_dir) # Plot the raw waveforms with warnings.catch_warnings(): warnings.simplefilter("ignore", category=UserWarning) pngfiles = glob.glob(os.path.join(rawdir, '*.png')) if not len(pngfiles): plot_raw(rawdir, tcollection, event) # Create the workspace file and put the unprocessed waveforms in it workname = os.path.join(event_dir, 'workspace.hdf') # Remove any existing workspace file if os.path.isfile(workname): os.remove(workname) workspace = StreamWorkspace(workname) workspace.addEvent(event) with warnings.catch_warnings(): warnings.simplefilter("ignore", category=H5pyDeprecationWarning) workspace.addStreams(event, tcollection, label='unprocessed') return (workspace, workname, tcollection)
def _test_workspace(): eventid = 'us1000778i' datafiles, event = read_data_dir('geonet', eventid, '*.V1A') tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLLoadWarning) warnings.filterwarnings("ignore", category=FutureWarning) config = update_config( os.path.join(datadir, 'config_min_freq_0p2.yml')) tfile = os.path.join(tdir, 'test.hdf') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label='raw') t2 = time.time() print('Adding %i streams took %.2f seconds' % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == 'Events: 1 Stations: 3 Streams: 3' eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ['HSES', 'THZ', 'WTMC'] stations = workspace.getStations(eventid=eventid) assert sorted(stations) == ['HSES', 'THZ', 'WTMC'] # test retrieving event that doesn't exist with pytest.raises(KeyError): workspace.getEvent('foo') instream = None for stream in raw_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: raise ValueError('Instream should not be none.') outstream = workspace.getStreams(eventid, stations=['HSES'], labels=['raw'])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]['Label'] == 'raw' assert label_summary.iloc[0]['Software'] == 'gmprocess' sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, event, config=config) workspace.addStreams(event, processed_streams, 'processed') idlist = workspace.getEventIds() assert idlist[0] == eventid outstream = workspace.getStreams(eventid, stations=['HSES'], labels=['processed'])[0] provenance = workspace.getProvenance(eventid, labels=['processed']) first_row = pd.Series({ 'Record': 'NZ.HSES.--.HN1_us1000778i_processed', 'Processing Step': 'Remove Response', 'Step Attribute': 'input_units', 'Attribute Value': 'counts' }) last_row = pd.Series({ 'Record': 'NZ.WTMC.--.HNZ_us1000778i_processed', 'Processing Step': 'Lowpass Filter', 'Step Attribute': 'number_of_passes', 'Attribute Value': 2 }) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: raise ValueError('Instream should not be none.') compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = 'nz2018p115908' datafiles, event = read_data_dir('geonet', eventid, '*.V2A') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label='foo') stations = workspace.getStations(eventid) eventids = workspace.getEventIds() assert eventids == ['us1000778i', 'nz2018p115908'] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=['foo'])[0] assert instation == this_stream[0].stats.station usid = 'us1000778i' inventory = workspace.getInventory(usid) workspace.close() codes = [ station.code for station in inventory.networks[0].stations ] assert sorted(set(codes)) == ['HSES', 'THZ', 'WPWS', 'WTMC'] except Exception as e: raise (e) finally: shutil.rmtree(tdir)