def get_events(eventids, textfile, eventinfo, directory): """Return a list of events from one of the four inputs: Args: eventids (list or None): List of ComCat event IDs. textfile (str or None): Path to text file containing event IDs or info. eventinfo (list or None): List containing: - id Any string, no spaces. - time Any ISO-compatible date/time string. - latitude Latitude in decimal degrees. - longitude Longitude in decimal degrees. - depth Depth in kilometers. - magnitude Earthquake magnitude. directory (str): Path to a directory containing event subdirectories, each containing an event.json file, where the ID in the json file matches the subdirectory containing it. Returns: list: ScalarEvent objects. """ events = [] if eventids is not None: for eventid in eventids: event = get_event_object(eventid) events.append(event) elif textfile is not None: events = parse_event_file(textfile) elif eventinfo is not None: eid = eventinfo[0] time = eventinfo[1] lat = float(eventinfo[2]) lon = float(eventinfo[3]) dep = float(eventinfo[4]) mag = float(eventinfo[5]) event = ScalarEvent() event.fromParams(eid, time, lat, lon, dep, mag) events = [event] elif directory is not None: eventfiles = get_event_files(directory) if not len(eventfiles): eventids = os.listdir(directory) try: for eventid in eventids: event = get_event_object(eventid) events.append(event) except Exception: events = [] else: events = read_event_json_files(eventfiles) return events
def test_asdf(): eventid = 'us1000778i' datafiles, origin = read_data_dir('geonet', eventid, '*.V1A') event = get_event_object(origin) tdir = tempfile.mkdtemp() try: config = get_config() tfile = os.path.join(tdir, 'test.hdf') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) write_asdf(tfile, raw_streams, event) assert is_asdf(tfile) assert not is_asdf(datafiles[0]) outstreams = read_asdf(tfile) assert len(outstreams) == len(raw_streams) write_asdf(tfile, raw_streams, event, label='foo') outstreams2 = read_asdf(tfile, label='foo') assert len(outstreams2) == len(raw_streams) except Exception: assert 1 == 2 finally: shutil.rmtree(tdir)
def test_fdsnfetch(): # homedir = os.path.dirname(os.path.abspath(__file__)) # fdsnfetch = os.path.join(homedir, '..', '..', 'bin', 'fdsnfetch') # datadir = os.path.join(homedir, '..', 'data', 'fdsnfetch') # parameters = '2001-02-28T18:54:32 47.149 -122.7266667 ' # cmd_input = '%s %s' % (datadir, parameters) # cmd = '%s %s' % (fdsnfetch, cmd_input) # res, stdout, stderr = get_command_output(cmd) # print(stdout.decode('utf-8').strip()) # print(stderr.decode('utf-8').strip()) # Confirm that we got the three ALCT files as expected streams, inv = request_raw_waveforms('IRIS', '2001-02-28T18:54:32', 47.149, -122.7266667, before_time=120, after_time=120, dist_max=1.0, stations=['ALCT'], networks=["UW"], channels=['EN*']) assert len(streams) == 3 # Test writing out the streams in ASDF format tdir = tempfile.mkdtemp() tfile = os.path.join(tdir, 'test.hdf') event_dict = get_event_object('uw10530748') write_asdf(tfile, streams, event_dict, label='foo')
def test_event(): subdir = os.path.join('data', 'testdata', 'vcr_event_test.yaml') tape_file = pkg_resources.resource_filename('gmprocess', subdir) with vcr.use_cassette(tape_file): eid = 'us1000j96d' # M7.0 Peru Mar 1 2019 edict = get_event_dict(eid) tdict = { 'id': 'us1000j96d', 'time': UTCDateTime(2019, 3, 1, 8, 50, 42, 615000), 'lat': -14.7007, 'lon': -70.1516, 'depth': 267, 'magnitude': 7.0 } for key, value in tdict.items(): v1 = edict[key] assert value == v1 event = get_event_object(eid) assert event.id == eid assert event.magnitude == tdict['magnitude'] assert event.time == tdict['time'] assert event.latitude == tdict['lat'] assert event.longitude == tdict['lon'] assert event.depth == tdict['depth'] * 1000 assert event.depth_km == tdict['depth']
def test_fit_spectra(): config = get_config() datapath = os.path.join('data', 'testdata', 'demo', 'ci38457511', 'raw') datadir = pkg_resources.resource_filename('gmprocess', datapath) event = get_event_object('ci38457511') sc = StreamCollection.from_directory(datadir) for st in sc: st = signal_split(st, event) end_conf = config['windows']['signal_end'] st = signal_end(st, event_time=event.time, event_lon=event.longitude, event_lat=event.latitude, event_mag=event.magnitude, **end_conf) st = compute_snr(st, 30) st = get_corner_frequencies(st, method='constant', constant={ 'highpass': 0.08, 'lowpass': 20.0 }) for st in sc: spectrum.fit_spectra(st, event)
def test_lowpass_max(): datapath = os.path.join('data', 'testdata', 'lowpass_max') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc = StreamCollection.from_directory(datadir) sc.describe() conf = get_config() update = { 'processing': [ {'detrend': {'detrending_method': 'demean'}}, {'remove_response': { 'f1': 0.001, 'f2': 0.005, 'f3': None, 'f4': None, 'output': 'ACC', 'water_level': 60} }, # {'detrend': {'detrending_method': 'linear'}}, # {'detrend': {'detrending_method': 'demean'}}, {'get_corner_frequencies': { 'constant': { 'highpass': 0.08, 'lowpass': 20.0 }, 'method': 'constant', 'snr': {'same_horiz': True}} }, {'lowpass_max_frequency': {'fn_fac': 0.9}} ] } update_dict(conf, update) update = { 'windows': { 'signal_end': { 'method': 'model', 'vmin': 1.0, 'floor': 120, 'model': 'AS16', 'epsilon': 2.0 }, 'window_checks': { 'do_check': False, 'min_noise_duration': 1.0, 'min_signal_duration': 1.0 } } } update_dict(conf, update) edict = { 'id': 'ci38038071', 'time': UTCDateTime('2018-08-30 02:35:36'), 'lat': 34.136, 'lon': -117.775, 'depth': 5.5, 'magnitude': 4.4 } event = get_event_object(edict) test = process_streams(sc, event, conf) for st in test: for tr in st: freq_dict = tr.getParameter('corner_frequencies') np.testing.assert_allclose(freq_dict['lowpass'], 18.0)
def test_raw(): msg = "dataset.value has been deprecated. Use dataset[()] instead." with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLLoadWarning) warnings.filterwarnings("ignore", category=FutureWarning) raw_streams, inv = request_raw_waveforms( fdsn_client='IRIS', org_time='2018-11-30T17-29-29.330Z', lat=61.3464, lon=-149.9552, before_time=120, after_time=120, dist_min=0, dist_max=0.135, networks='*', stations='*', channels=['?N?'], access_restricted=False) tdir = tempfile.mkdtemp() try: edict = get_event_dict('ak20419010') origin = get_event_object('ak20419010') tfile = os.path.join(tdir, 'test.hdf') sc1 = StreamCollection(raw_streams) workspace = StreamWorkspace(tfile) workspace.addStreams(origin, sc1, label='raw') tstreams = workspace.getStreams(edict['id']) assert len(tstreams) == 0 imclist = [ 'greater_of_two_horizontals', 'channels', 'rotd50', 'rotd100' ] imtlist = ['sa1.0', 'PGA', 'pgv', 'fas2.0', 'arias'] # this shouldn't do anything workspace.setStreamMetrics(edict['id'], imclist=imclist, imtlist=imtlist) processed_streams = process_streams(sc1, edict) workspace.addStreams(origin, processed_streams, 'processed') labels = workspace.getLabels() tags = workspace.getStreamTags(edict['id']) out_raw_streams = workspace.getStreams(edict['id'], get_raw=True) assert len(out_raw_streams) == len(sc1) # this should only work on processed data workspace.setStreamMetrics(edict['id'], imclist=imclist, imtlist=imtlist) df = workspace.summarizeLabels() x = 1 except Exception as e: raise e finally: shutil.rmtree(tdir)
def parse_event_file(eventfile): """Parse text file containing basic event information. Files can contain: - one column, in which case that column contains ComCat event IDs. - Six columns, in which case those columns should be: - id: any string (no spaces) - time: Any ISO standard for date/time. - lat: Earthquake latitude in decimal degrees. - lon: Earthquake longitude in decimal degrees. - depth: Earthquake longitude in kilometers. - magnitude: Earthquake magnitude. - magnitude_type: Earthquake magnitude type. NB: THERE SHOULD NOT BE ANY HEADERS ON THIS FILE! Args: eventfile (str): Path to event text file Returns: list: ScalarEvent objects constructed from list of event information. """ df = pd.read_csv(eventfile, sep=',', header=None) nrows, ncols = df.shape events = [] if ncols == 1: df.columns = ['eventid'] for idx, row in df.iterrows(): event = get_event_object(row['eventid']) events.append(event) elif ncols == 7: df.columns = [ 'id', 'time', 'lat', 'lon', 'depth', 'magnitude', 'magnitude_type' ] df['time'] = pd.to_datetime(df['time']) for idx, row in df.iterrows(): rowdict = row.to_dict() event = get_event_object(rowdict) events.append(event) else: return None return events
def read_event_json_files(eventfiles): """Read event.json file and return ScalarEvent object. Args: eventfiles (list): Event.json files to be read. Returns: list: ScalarEvent objects. """ events = [] for eventfile in eventfiles: with open(eventfile, 'rt') as f: eventdict = json.load(f) # eventdict['depth'] *= 1000 event = get_event_object(eventdict) events.append(event) return events
def test_event(): subdir = os.path.join('data', 'testdata', 'vcr_event_test.yaml') tape_file = pkg_resources.resource_filename('gmprocess', subdir) with vcr.use_cassette(tape_file): eid = 'us1000j96d' # M7.0 Peru Mar 1 2019 edict = get_event_dict(eid) tdict = {'id': 'us1000j96d', 'time': UTCDateTime(2019, 3, 1, 8, 50, 42, 570000), 'lat': -14.7132, 'lon': -70.1375, 'depth': 267, 'magnitude': 7} assert edict == tdict event = get_event_object(eid) assert event.resource_id.id == 'us1000j96d' assert event.magnitudes[0].mag == 7.0
def test_zero_crossings(): datapath = os.path.join('data', 'testdata', 'zero_crossings') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc = StreamCollection.from_directory(datadir) sc.describe() conf = get_config() update = { 'processing': [{ 'detrend': { 'detrending_method': 'demean' } }, { 'check_zero_crossings': { 'min_crossings': 1 } }] } update_dict(conf, update) edict = { 'id': 'ak20419010', 'time': UTCDateTime('2018-11-30T17:29:29'), 'lat': 61.346, 'lon': -149.955, 'depth': 46.7, 'magnitude': 7.1 } event = get_event_object(edict) test = process_streams(sc, event, conf) for st in test: for tr in st: assert tr.hasParameter('ZeroCrossingRate') np.testing.assert_allclose( test[0][0].getParameter('ZeroCrossingRate')['crossing_rate'], 0.008888888888888889, atol=1e-5)
def test_trim_multiple_events(): datapath = os.path.join('data', 'testdata', 'multiple_events') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc = StreamCollection.from_directory(os.path.join(datadir, 'ci38457511')) origin = get_event_object('ci38457511') df, catalog = create_travel_time_dataframe( sc, os.path.join(datadir, 'catalog.csv'), 5, 0.1, 'iasp91') for st in sc: st.detrend('demean') remove_response(st, None, None) st = corner_frequencies.constant(st) lowpass_filter(st) highpass_filter(st) signal_split(st, origin) signal_end(st, origin.time, origin.longitude, origin.latitude, origin.magnitude, method='model', model='AS16') cut(st, 2) trim_multiple_events(st, origin, catalog, df, 0.2, 0.7, 'B14', {'vs30': 760}, {'rake': 0}) num_failures = sum([1 if not st.passed else 0 for st in sc]) assert num_failures == 1 failure = sc.select(station='WRV2')[0][0].getParameter('failure') assert failure['module'] == 'trim_multiple_events' assert failure['reason'] == ('A significant arrival from another event ' 'occurs within the first 70.0 percent of the ' 'signal window') for tr in sc.select(station='JRC2')[0]: np.testing.assert_almost_equal( tr.stats.endtime, UTCDateTime('2019-07-06T03:20:38.7983Z'))
def test_event(): subdir = os.path.join('data', 'testdata', 'vcr_event_test.yaml') tape_file = pkg_resources.resource_filename('gmprocess', subdir) with vcr.use_cassette(tape_file): eid = 'us1000j96d' # M7.0 Peru Mar 1 2019 edict = get_event_dict(eid) tdict = {'id': 'us1000j96d', 'time': UTCDateTime(2019, 3, 1, 8, 50, 42, 615000), 'lat': -14.7007, 'lon': -70.1516, 'depth': 267, 'magnitude': 7.0} for key, value in tdict.items(): v1 = edict[key] assert value == v1 event = get_event_object(eid) assert event.id == eid assert event.magnitude == tdict['magnitude'] assert event.time == tdict['time'] assert event.latitude == tdict['lat'] assert event.longitude == tdict['lon'] assert event.depth == tdict['depth'] * 1000 assert event.depth_km == tdict['depth']
def get_events(eventids, textfile, eventinfo, directory, outdir=None): """Find the list of events. Args: eventids (list or None): List of ComCat event IDs. textfile (str or None): Path to text file containing event IDs or info. eventinfo (list or None): List containing: - id Any string, no spaces. - time Any ISO-compatible date/time string. - latitude Latitude in decimal degrees. - longitude Longitude in decimal degrees. - depth Depth in kilometers. - magnitude Earthquake magnitude. - magnitude_type Earthquake magnitude type. directory (str): Path to a directory containing event subdirectories, each containing an event.json file, where the ID in the json file matches the subdirectory containing it. outdir (str): Output directory. Returns: list: ScalarEvent objects. """ events = [] if eventids is not None: for eventid in eventids: event = get_event_object(eventid) events.append(event) elif textfile is not None: events = parse_event_file(textfile) elif eventinfo is not None: eid = eventinfo[0] time = eventinfo[1] lat = float(eventinfo[2]) lon = float(eventinfo[3]) dep = float(eventinfo[4]) mag = float(eventinfo[5]) mag_type = str(eventinfo[6]) event = ScalarEvent() event.fromParams(eid, time, lat, lon, dep, mag, mag_type) events = [event] elif directory is not None: eventfiles = get_event_files(directory) if not len(eventfiles): eventids = [ f for f in os.listdir(directory) if not f.startswith('.') ] for eventid in eventids: try: event = get_event_object(eventid) events.append(event) # If the event ID has been updated, make sure to rename # the source folder and issue a warning to the user if event.id != eventid: old_dir = os.path.join(directory, eventid) new_dir = os.path.join(directory, event.id) os.rename(old_dir, new_dir) logging.warn('Directory %s has been renamed to %s.' % (old_dir, new_dir)) except: logging.warning('Could not get info for event id: %s' % eventid) else: events = read_event_json_files(eventfiles) elif outdir is not None: eventfiles = get_event_files(outdir) if not len(eventfiles): eventids = os.listdir(outdir) for eventid in eventids: try: event = get_event_object(eventid) events.append(event) except: logging.warning('Could not get info for event id: %s' % eventid) else: events = read_event_json_files(eventfiles) return events
# data = "/gmprocess/data/testdata/knet/us2000cnnl/" data = "/data/testdata/demo/ci38457511/raw/" output = "/Users/gabriel/groundmotion-processing/output/" # Path to example data datapath = os.path.join('data', 'testdata', 'demo', 'ci38457511', 'raw') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc = StreamCollection.from_directory(datadir) # Includes 3 StationStreams, each with 3 StationTraces sc.describe() # Get the default config file conf = get_config() # Get event object event = get_event_object('ci38457511') # Process the streams psc = process_streams(sc, event, conf) psc.describe() # Save plots of processed records for st in psc: if os.path.exists(output + "test/"): st.plot(outfile='%stest/%s.png' % (output, st.get_id())) else: os.mkdir(os.path.join(output, "test/")) st.plot(outfile='%stest/%s.png' % (output, st.get_id()))
def test_workspace(): eventid = 'us1000778i' datafiles, origin = read_data_dir('geonet', eventid, '*.V1A') event = get_event_object(origin) tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLLoadWarning) warnings.filterwarnings("ignore", category=FutureWarning) config = get_config() tfile = os.path.join(tdir, 'test.hdf') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label='raw') t2 = time.time() print('Adding %i streams took %.2f seconds' % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == 'Events: 1 Stations: 3 Streams: 3' eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ['hses', 'thz', 'wtmc'] stations = workspace.getStations(eventid=eventid) assert sorted(stations) == ['hses', 'thz', 'wtmc'] # test retrieving tags for an event that doesn't exist try: workspace.getStreamTags('foo') except KeyError: assert 1 == 1 # test retrieving event that doesn't exist try: workspace.getEvent('foo') except KeyError: assert 1 == 1 instream = None for stream in raw_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: assert 1 == 2 outstream = workspace.getStreams(eventid, stations=['hses'], labels=['raw'])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]['Label'] == 'raw' assert label_summary.iloc[0]['Software'] == 'gmprocess' sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, origin, config=config) workspace.addStreams(event, processed_streams, 'processed') idlist = workspace.getEventIds() assert idlist[0] == eventid event_tags = workspace.getStreamTags(eventid) assert sorted(event_tags) == [ 'hses_processed', 'hses_raw', 'thz_processed', 'thz_raw', 'wtmc_processed', 'wtmc_raw' ] outstream = workspace.getStreams(eventid, stations=['hses'], labels=['processed'])[0] provenance = workspace.getProvenance(eventid, labels=['processed']) first_row = pd.Series({ 'Record': 'NZ.HSES.HN1', 'Processing Step': 'Remove Response', 'Step Attribute': 'input_units', 'Attribute Value': 'counts' }) last_row = pd.Series({ 'Record': 'NZ.WTMC.HNZ', 'Processing Step': 'Detrend', 'Step Attribute': 'detrending_method', 'Attribute Value': 'baseline_sixth_order' }) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: assert 1 == 2 compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = 'nz2018p115908' datafiles, origin = read_data_dir('geonet', eventid, '*.V2A') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) event = get_event_object(origin) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label='foo') stations = workspace.getStations(eventid) eventids = workspace.getEventIds() assert eventids == ['us1000778i', 'nz2018p115908'] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=['foo'])[0] assert instation == this_stream[0].stats.station # set and retrieve waveform metrics in the file imclist = [ 'greater_of_two_horizontals', 'channels', 'rotd50', 'rotd100' ] imtlist = ['sa1.0', 'PGA', 'pgv', 'fas2.0', 'arias'] usid = 'us1000778i' tags = workspace.getStreamTags(usid) workspace.setStreamMetrics(eventid, labels=['foo'], imclist=imclist, imtlist=imtlist) summary = workspace.getStreamMetrics(eventid, instation, 'foo') summary_series = summary.toSeries()['ARIAS'] cmpseries = pd.Series({ 'GEOMETRIC_MEAN': np.NaN, 'GREATER_OF_TWO_HORIZONTALS': 0.0005, 'HN1': 0.0001, 'HN2': 0.0005, 'HNZ': 0.0000, 'ROTD100.0': 0.0005, 'ROTD50.0': 0.0003 }) assert cmpseries.equals(summary_series) workspace.setStreamMetrics(usid, labels=['processed']) df = workspace.getMetricsTable(usid, labels=['processed']) cmpdict = { 'GREATER_OF_TWO_HORIZONTALS': [26.8906, 4.9415, 94.6646], 'HN1': [24.5105, 4.9415, 94.6646], 'HN2': [26.8906, 4.0758, 86.7877], 'HNZ': [16.0941, 2.5401, 136.7054] } cmpframe = pd.DataFrame(cmpdict) assert df['PGA'].equals(cmpframe) inventory = workspace.getInventory(usid) codes = [ station.code for station in inventory.networks[0].stations ] assert sorted(codes) == ['HSES', 'THZ', 'WPWS', 'WTMC'] except Exception as e: raise (e) finally: shutil.rmtree(tdir)