def _get_person_agent(pr): '''Get the seis-prov entity for the user software. Args: pr (prov.model.ProvDocument): Existing ProvDocument. Returns: prov.model.ProvDocument: Provenance document updated with gmprocess software name/version. ''' username = getpass.getuser() config = get_config() fullname = '' email = '' if 'user' in config: if 'name' in config['user']: fullname = config['user']['name'] if 'email' in config['user']: email = config['user']['email'] hashstr = '0000001' person_id = "seis_prov:sp001_pp_%s" % hashstr pr.agent(person_id, other_attributes=((("prov:label", username), ("prov:type", prov.identifier.QualifiedName( prov.constants.PROV, "Person")), ("seis_prov:name", fullname), ("seis_prov:email", email)))) return pr
def _get_person_agent(pr): '''Get the seis-prov entity for the user software. Args: pr (prov.model.ProvDocument): Existing ProvDocument. Returns: prov.model.ProvDocument: Provenance document updated with gmprocess software name/version. ''' username = getpass.getuser() config = get_config() fullname = '' email = '' if 'user' in config: if 'name' in config['user']: fullname = config['user']['name'] if 'email' in config['user']: email = config['user']['email'] hashstr = '0000001' person_id = "seis_prov:sp001_pp_%s" % hashstr pr.agent(person_id, other_attributes=(( ("prov:label", username), ("prov:type", prov.identifier.QualifiedName(prov.constants.PROV, "Person")), ("seis_prov:name", fullname), ("seis_prov:email", email) ))) return pr
def _test_horizontal_frequencies(): config = get_config() event_time = UTCDateTime('2001-02-28T18:54:32') ALCT_tr1 = read(os.path.join(datadir, 'ALCTENE.UW..sac'))[0] ALCT_tr2 = read(os.path.join(datadir, 'ALCTENN.UW..sac'))[0] stream = [ALCT_tr1, ALCT_tr2] ALCT_dist = 75.9559 processed = process.process_config( stream, config=config, event_time=event_time, epi_dist=ALCT_dist) for trace in processed: corners = trace.stats.processing_parameters.corners # assert corners['default_high_frequency'] == 50 np.testing.assert_allclose([corners['default_high_frequency']], [50.0]) # assert corners['default_low_frequency'] == 0.018310546875 assert corners['default_low_frequency'] == 0.01595909725588508 stream[0].stats.channel = 'Z' processed = process.process_config( stream, config=config, event_time=event_time, epi_dist=ALCT_dist) corners1 = processed[0].stats.processing_parameters.corners high1 = corners1['default_high_frequency'] low1 = corners1['default_low_frequency'] assert np.allclose([high1], [50.0]) # assert low1 == 0.0244140625 assert low1 == 0.02155036612037732 corners2 = processed[1].stats.processing_parameters.corners high2 = corners2['default_high_frequency'] low2 = corners2['default_low_frequency'] # assert high2 == 48.4619140625 assert high2 == 48.52051157467704 # assert low2 == 0.018310546875 assert low2 == 0.01595909725588508
def test_metrics2(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = get_config() config['metrics']['output_imts'].append('Arias') config['metrics']['output_imcs'].append('arithmetic_mean') # turn off sta/lta check and snr checks newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label='processed') workspace.calcMetrics(event.id, labels=['processed']) etable, imc_tables1 = workspace.getTables('processed') etable2, imc_tables2 = workspace.getTables('processed', config=config) assert 'ARITHMETIC_MEAN' not in imc_tables1 assert 'ARITHMETIC_MEAN' in imc_tables2 assert 'ARIAS' in imc_tables2['ARITHMETIC_MEAN'] except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def test_asdf(): eventid = 'us1000778i' datafiles, origin = read_data_dir('geonet', eventid, '*.V1A') event = get_event_object(origin) tdir = tempfile.mkdtemp() try: config = get_config() tfile = os.path.join(tdir, 'test.hdf') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) write_asdf(tfile, raw_streams, event) assert is_asdf(tfile) assert not is_asdf(datafiles[0]) outstreams = read_asdf(tfile) assert len(outstreams) == len(raw_streams) write_asdf(tfile, raw_streams, event, label='foo') outstreams2 = read_asdf(tfile, label='foo') assert len(outstreams2) == len(raw_streams) except Exception: assert 1 == 2 finally: shutil.rmtree(tdir)
def test_nnet(): conf = get_config() update = { 'processing': [ {'detrend': {'detrending_method': 'demean'}}, # {'check_zero_crossings': {'min_crossings': 10}}, {'detrend': {'detrending_method': 'linear'}}, {'compute_snr': {'bandwidth': 20.0, 'check': {'max_freq': 5.0, 'min_freq': 0.2, 'threshold': 3.0}}}, {'NNet_QA': {'acceptance_threshold': 0.5, 'model_name': 'CantWell'}} ] } update_dict(conf, update) data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A') streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) test = process_streams(sc, origin, conf) tstream = test.select(station='HSES')[0] allparams = tstream.getStreamParamKeys() nnet_dict = tstream.getStreamParam('nnet_qa') np.testing.assert_allclose( nnet_dict['score_HQ'], 0.99321798811740059, rtol=1e-3)
def test_asdf(): eventid = 'us1000778i' datafiles, event = read_data_dir('geonet', eventid, '*.V1A') tdir = tempfile.mkdtemp() try: config = get_config() tfile = os.path.join(tdir, 'test.hdf') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) write_asdf(tfile, raw_streams, event) assert is_asdf(tfile) assert not is_asdf(datafiles[0]) outstreams = read_asdf(tfile) assert len(outstreams) == len(raw_streams) write_asdf(tfile, raw_streams, event, label='foo') outstreams2 = read_asdf(tfile, label='foo') assert len(outstreams2) == len(raw_streams) except Exception: assert 1 == 2 finally: shutil.rmtree(tdir)
def generate_workspace(): """Generate simple HDF5 with ASDF layout for testing. """ PCOMMANDS = [ 'assemble', 'process', ] EVENTID = 'us1000778i' LABEL = 'ptest' datafiles, event = read_data_dir('geonet', EVENTID, '*.V1A') tdir = tempfile.mkdtemp() tfilename = os.path.join(tdir, 'workspace.h5') raw_data = [] for dfile in datafiles: raw_data += read_data(dfile) write_asdf(tfilename, raw_data, event, label="unprocessed") del raw_data config = get_config() workspace = StreamWorkspace.open(tfilename) raw_streams = workspace.getStreams(EVENTID, labels=['unprocessed']) pstreams = process_streams(raw_streams, event, config=config) workspace.addStreams(event, pstreams, label=LABEL) workspace.calcMetrics(event.id, labels=[LABEL], config=config) return tfilename
def test_fit_spectra(): config = get_config() datapath = os.path.join('data', 'testdata', 'demo', 'ci38457511', 'raw') datadir = pkg_resources.resource_filename('gmprocess', datapath) event = get_event_object('ci38457511') sc = StreamCollection.from_directory(datadir) for st in sc: st = signal_split(st, event) end_conf = config['windows']['signal_end'] st = signal_end(st, event_time=event.time, event_lon=event.longitude, event_lat=event.latitude, event_mag=event.magnitude, **end_conf) st = compute_snr(st, 30) st = get_corner_frequencies(st, method='constant', constant={ 'highpass': 0.08, 'lowpass': 20.0 }) for st in sc: spectrum.fit_spectra(st, event)
def test_all_pickers(): streams = get_streams() picker_config = get_config(section='pickers') methods = ['ar', 'baer', 'power', 'kalkan'] columns = ['Stream', 'Method', 'Pick_Time', 'Mean_SNR'] df = pd.DataFrame(columns=columns) for stream in streams: print(stream.get_id()) for method in methods: try: if method == 'ar': loc, mean_snr = pick_ar(stream, picker_config=picker_config) elif method == 'baer': loc, mean_snr = pick_baer(stream, picker_config=picker_config) elif method == 'power': loc, mean_snr = pick_power(stream, picker_config=picker_config) elif method == 'kalkan': loc, mean_snr = pick_kalkan(stream, picker_config=picker_config) elif method == 'yeck': loc, mean_snr = pick_yeck(stream) except GMProcessException: loc = -1 mean_snr = np.nan row = { 'Stream': stream.get_id(), 'Method': method, 'Pick_Time': loc, 'Mean_SNR': mean_snr } df = df.append(row, ignore_index=True) stations = df['Stream'].unique() cmpdict = { 'TW.ECU.BN': 'kalkan', 'TW.ELD.BN': 'power', 'TW.EGF.BN': 'ar', 'TW.EAS.BN': 'ar', 'TW.EDH.BN': 'ar', 'TK.4304.HN': 'ar', 'TK.0921.HN': 'ar', 'TK.5405.HN': 'ar', 'NZ.HSES.HN': 'baer', 'NZ.WTMC.HN': 'baer', 'NZ.THZ.HN': 'power' } for station in stations: station_df = df[df['Stream'] == station] max_snr = station_df['Mean_SNR'].max() maxrow = station_df[station_df['Mean_SNR'] == max_snr].iloc[0] method = maxrow['Method'] try: assert cmpdict[station] == method except Exception as e: x = 1
def from_stream(cls, stream, components, imts, event=None, damping=None, smoothing=None, bandwidth=None, config=None): """ Args: stream (obspy.core.stream.Stream): Strong motion timeseries for one station. components (list): List of requested components (str). imts (list): List of requested imts (str). event (ScalarEvent): Origin/magnitude for the event containing time, latitude, longitude, depth, and magnitude. damping (float): Damping of oscillator. Default is None. smoothing (float): Smoothing method. Default is None. bandwidth (float): Bandwidth of smoothing. Default is None. config (dictionary): Configuration dictionary. Note: Assumes a processed stream with units of gal (1 cm/s^2). No processing is done by this class. """ if config is None: config = get_config() station = cls() imts = np.sort(imts) components = np.sort(components) if damping is None: damping = config['metrics']['sa']['damping'] if smoothing is None: smoothing = config['metrics']['fas']['smoothing'] if bandwidth is None: bandwidth = config['metrics']['fas']['bandwidth'] station._damping = damping station._smoothing = smoothing station._bandwidth = bandwidth station._stream = stream station.event = event station.set_metadata() metrics = MetricsController(imts, components, stream, bandwidth=bandwidth, damping=damping, event=event, smooth_type=smoothing) pgms = metrics.pgms if pgms is None: station._components = metrics.imcs station._imts = metrics.imts station.pgms = pd.DataFrame.from_dict({ 'IMT': [], 'IMC': [], 'Result': [] }) else: station._components = set(pgms['IMC'].tolist()) station._imts = set(pgms['IMT'].tolist()) station.pgms = pgms station._summary = station.get_summary() return station
def test_lowpass_max(): datapath = os.path.join('data', 'testdata', 'lowpass_max') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc = StreamCollection.from_directory(datadir) sc.describe() conf = get_config() update = { 'processing': [ {'detrend': {'detrending_method': 'demean'}}, {'remove_response': { 'f1': 0.001, 'f2': 0.005, 'f3': None, 'f4': None, 'output': 'ACC', 'water_level': 60} }, # {'detrend': {'detrending_method': 'linear'}}, # {'detrend': {'detrending_method': 'demean'}}, {'get_corner_frequencies': { 'constant': { 'highpass': 0.08, 'lowpass': 20.0 }, 'method': 'constant', 'snr': {'same_horiz': True}} }, {'lowpass_max_frequency': {'fn_fac': 0.9}} ] } update_dict(conf, update) update = { 'windows': { 'signal_end': { 'method': 'model', 'vmin': 1.0, 'floor': 120, 'model': 'AS16', 'epsilon': 2.0 }, 'window_checks': { 'do_check': False, 'min_noise_duration': 1.0, 'min_signal_duration': 1.0 } } } update_dict(conf, update) edict = { 'id': 'ci38038071', 'time': UTCDateTime('2018-08-30 02:35:36'), 'lat': 34.136, 'lon': -117.775, 'depth': 5.5, 'magnitude': 4.4 } event = get_event_object(edict) test = process_streams(sc, event, conf) for st in test: for tr in st: freq_dict = tr.getParameter('corner_frequencies') np.testing.assert_allclose(freq_dict['lowpass'], 18.0)
def __init__(self, imts, imcs, timeseries, bandwidth=None, damping=None, event=None, smooth_type=None): """ Args: imts (list): Intensity measurement types (string) to calculate. imcs (list): Intensity measurement components (string) to calculate. timeseries (StationStream): Stream of the timeseries data. event (ScalarEvent): Defines the focal time, geographic location, and magnitude of an earthquake hypocenter. Default is None. damping (float): Damping for the oscillator calculation. bandwidth (float): Bandwidth for the smoothing calculation. smoothing (string): Currently not used, as konno_ohmachi is the only smoothing type. """ if not isinstance(imts, (list, np.ndarray)): imts = [imts] if not isinstance(imcs, (list, np.ndarray)): imcs = [imcs] self.imts = set(np.sort([imt.lower() for imt in imts])) self.imcs = set(np.sort([imc.lower() for imc in imcs])) if 'radial_transverse' in self.imcs and event is None: raise PGMException('MetricsController: Event is required for ' 'radial_transverse imc') self.timeseries = timeseries self.validate_stream() self.event = event self.config = get_config() self.damping = damping self.smooth_type = smooth_type self.bandwidth = bandwidth if damping is None: self.damping = self.config['metrics']['sa']['damping'] if smooth_type is None: self.smooth_type = self.config['metrics']['fas']['smoothing'] if bandwidth is None: self.bandwidth = self.config['metrics']['fas']['bandwidth'] self._available_imts, self._available_imcs = gather_pgms() self._step_sets = self.get_steps() imtstr = '_'.join(imts) if '_sa' in imtstr or imtstr.startswith('sa'): self._times = self._get_horizontal_time() else: self._times = None self.pgms = self.execute_steps()
def pick_ar(stream, picker_config=None, config=None): """Wrapper around the AR P-phase picker. Args: stream (StationStream): Stream containing waveforms that need to be picked. picker_config (dict): Dictionary with parameters for AR P-phase picker. See picker.yml. config (dict): Configuration dictionary. Key value here is: windows: window_checks: min_noise_duration Returns: tuple: - Best estimate for p-wave arrival time (s since start of trace). - Mean signal to noise ratio based on the pick. """ if picker_config is None: picker_config = get_config(section='pickers') if config is None: config = get_config() min_noise_dur = config['windows']['window_checks']['min_noise_duration'] params = picker_config['ar'] # Get the east, north, and vertical components from the stream st_e = stream.select(channel='??[E1]') st_n = stream.select(channel='??[N2]') st_z = stream.select(channel='??[Z3]') # Check if we found one of each component # If not, use the next picker in the order of preference if len(st_e) != 1 or len(st_n) != 1 or len(st_z) != 1: raise GMProcessException('Unable to perform AR picker.') minloc = ar_pick(st_z[0].data, st_n[0].data, st_e[0].data, st_z[0].stats.sampling_rate, **params)[0] if minloc < min_noise_dur: fmt = 'Noise window (%.1f s) less than minimum (%.1f)' tpl = (minloc, min_noise_dur) raise GMProcessException(fmt % tpl) mean_snr = calc_snr(stream, minloc) return (minloc, mean_snr)
def pick_baer(stream, picker_config=None, config=None): """Wrapper around the Baer P-phase picker. Args: stream (StationStream): Stream containing waveforms that need to be picked. picker_config (dict): Dictionary with parameters for Baer P-phase picker. See picker.yml. config (dict): Configuration dictionary. Key value here is: windows: window_checks: min_noise_duration Returns: tuple: - Best estimate for p-wave arrival time (s since start of trace). - Mean signal to noise ratio based on the pick. """ if picker_config is None: picker_config = get_config(section='pickers') if config is None: config = get_config() min_noise_dur = config['windows']['window_checks']['min_noise_duration'] params = picker_config['baer'] locs = [] for trace in stream: pick_sample = pk_baer(trace.data, trace.stats.sampling_rate, **params)[0] loc = pick_sample * trace.stats.delta locs.append(loc) locs = np.array(locs) if np.any(locs >= 0): minloc = np.min(locs[locs >= 0]) else: minloc = -1 if minloc < min_noise_dur: fmt = 'Noise window (%.1f s) less than minimum (%.1f)' tpl = (minloc, min_noise_dur) raise GMProcessException(fmt % tpl) mean_snr = calc_snr(stream, minloc) return (minloc, mean_snr)
def test_all_pickers(): streams = get_streams() picker_config = get_config(section='pickers') methods = ['ar', 'baer', 'power', 'kalkan'] columns = ['Stream', 'Method', 'Pick_Time', 'Mean_SNR'] df = pd.DataFrame(columns=columns) for stream in streams: print(stream.get_id()) for method in methods: try: if method == 'ar': loc, mean_snr = pick_ar( stream, picker_config=picker_config) elif method == 'baer': loc, mean_snr = pick_baer( stream, picker_config=picker_config) elif method == 'power': loc, mean_snr = pick_power( stream, picker_config=picker_config) elif method == 'kalkan': loc, mean_snr = pick_kalkan(stream, picker_config=picker_config) elif method == 'yeck': loc, mean_snr = pick_yeck(stream) except GMProcessException: loc = -1 mean_snr = np.nan row = {'Stream': stream.get_id(), 'Method': method, 'Pick_Time': loc, 'Mean_SNR': mean_snr} df = df.append(row, ignore_index=True) stations = df['Stream'].unique() cmpdict = {'TW.ECU.BN': 'kalkan', 'TW.ELD.BN': 'ar', 'TW.EGF.BN': 'ar', 'TW.EAS.BN': 'ar', 'TW.EDH.BN': 'ar', 'TK.4304.HN': 'ar', 'TK.0921.HN': 'ar', 'TK.5405.HN': 'ar', 'NZ.HSES.HN': 'baer', 'NZ.WTMC.HN': 'baer', 'NZ.THZ.HN': 'power'} for station in stations: station_df = df[df['Stream'] == station] max_snr = station_df['Mean_SNR'].max() maxrow = station_df[station_df['Mean_SNR'] == max_snr].iloc[0] method = maxrow['Method'] assert cmpdict[station] == method
def test_metrics(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = get_config() # turn off sta/lta check and snr checks newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label='processed') stream1 = processed_streams[0] stream2 = processed_streams[1] summary1 = StationSummary.from_config(stream1) summary2 = StationSummary.from_config(stream2) workspace.setStreamMetrics(event.id, 'processed', summary1) workspace.setStreamMetrics(event.id, 'processed', summary2) workspace.calcStationMetrics(event.id, labels=['processed']) summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.station, 'processed') s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC']) s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC']) array1 = s1_df_in['Result'].as_matrix() array2 = s1_df_out['Result'].as_matrix() np.testing.assert_almost_equal(array1, array2, decimal=4) df = workspace.getMetricsTable(event.id) cmp_series = { 'GREATER_OF_TWO_HORIZONTALS': 0.6787, 'H1': 0.3869, 'H2': 0.6787, 'Z': 0.7663 } pga_dict = df.iloc[0]['PGA'].to_dict() for key, value in pga_dict.items(): value2 = cmp_series[key] np.testing.assert_almost_equal(value, value2, decimal=4) workspace.close() except Exception as e: raise(e) finally: shutil.rmtree(tdir)
def pick_travel(stream, origin, model=None, picker_config=None): '''Use TauP travel time model to find P-Phase arrival time. Args: stream (StationStream): StationStream containing 1 or more channels of waveforms. origin (ScalarEvent): Event origin/magnitude information. model (TauPyModel): TauPyModel object for computing travel times. Returns: tuple: - Best estimate for p-wave arrival time (s since start of trace). - Mean signal to noise ratio based on the pick. ''' if model is None: if picker_config is None: picker_config = get_config(section='pickers') model = TauPyModel(picker_config['travel_time']['model']) if stream[0].stats.starttime == NAN_TIME: return (-1, 0) lat = origin.latitude lon = origin.longitude depth = origin.depth_km if depth < 0: depth = 0 etime = origin.time slat = stream[0].stats.coordinates.latitude slon = stream[0].stats.coordinates.longitude dist_deg = locations2degrees(lat, lon, slat, slon) try: arrivals = model.get_travel_times(source_depth_in_km=depth, distance_in_degree=dist_deg, phase_list=['P', 'p', 'Pn']) except Exception as e: fmt = 'Exception "%s" generated by get_travel_times() dist=%.3f depth=%.1f' logging.warning(fmt % (str(e), dist_deg, depth)) arrivals = [] if not len(arrivals): return (-1, 0) # arrival time is time since origin arrival = arrivals[0] # we need time since start of the record minloc = arrival.time + (etime - stream[0].stats.starttime) mean_snr = calc_snr(stream, minloc) return (minloc, mean_snr)
def test_metrics(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = get_config() # turn off sta/lta check and snr checks newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, processed_streams, label='processed') stream1 = processed_streams[0] stream2 = processed_streams[1] summary1 = StationSummary.from_config(stream1) summary2 = StationSummary.from_config(stream2) workspace.setStreamMetrics(event.id, 'processed', summary1) workspace.setStreamMetrics(event.id, 'processed', summary2) summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.station, 'processed') s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC']) s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC']) array1 = s1_df_in['Result'].as_matrix() array2 = s1_df_out['Result'].as_matrix() np.testing.assert_almost_equal(array1, array2, decimal=4) df = workspace.getMetricsTable(event.id) cmp_series = {'GREATER_OF_TWO_HORIZONTALS': 0.6787, 'HN1': 0.3869, 'HN2': 0.6787, 'HNZ': 0.7663} pga_dict = df.iloc[0]['PGA'].to_dict() for key, value in pga_dict.items(): value2 = cmp_series[key] np.testing.assert_almost_equal(value, value2, decimal=4) workspace.close() except Exception as e: raise(e) finally: shutil.rmtree(tdir)
def from_config(cls, stream, config=None, event=None): """ Args: stream (obspy.core.stream.Stream): Strong motion timeseries for one station. event (ScalarEvent): Object containing latitude, longitude, depth, and magnitude. config (dictionary): Configuration dictionary. Note: Assumes a processed stream with units of gal (1 cm/s^2). No processing is done by this class. """ if config is None: config = get_config() station = cls() damping = config['metrics']['sa']['damping'] smoothing = config['metrics']['fas']['smoothing'] bandwidth = config['metrics']['fas']['bandwidth'] station._damping = damping station._smoothing = smoothing station._bandwidth = bandwidth station._stream = stream station.event = event station.set_metadata() metrics = MetricsController.from_config(stream, config=config, event=event) pgms = metrics.pgms if pgms is None: station._components = metrics.imcs station._imts = metrics.imts station.pgms = pd.DataFrame.from_dict({ 'IMT': [], 'IMC': [], 'Result': [] }) else: station._components = set(pgms['IMC'].tolist()) station._imts = set(pgms['IMT'].tolist()) station.pgms = pgms station._summary = station.get_summary() return station
def test_metrics(): eventid = 'usb000syza' datafiles, event = read_data_dir('knet', eventid, '*') datadir = os.path.split(datafiles[0])[0] raw_streams = StreamCollection.from_directory(datadir) config = get_config() # turn off sta/lta check and snr checks # newconfig = drop_processing(config, ['check_sta_lta', 'compute_snr']) # processed_streams = process_streams(raw_streams, event, config=newconfig) newconfig = config.copy() newconfig['processing'].append( {'NNet_QA': { 'acceptance_threshold': 0.5, 'model_name': 'CantWell' }}) processed_streams = process_streams(raw_streams, event, config=newconfig) tdir = tempfile.mkdtemp() try: tfile = os.path.join(tdir, 'test.hdf') workspace = StreamWorkspace(tfile) workspace.addEvent(event) workspace.addStreams(event, raw_streams, label='raw') workspace.addStreams(event, processed_streams, label='processed') stream1 = raw_streams[0] summary1 = StationSummary.from_config(stream1) s1_df_in = summary1.pgms.sort_values(['IMT', 'IMC']) array1 = s1_df_in['Result'].as_matrix() workspace.calcStreamMetrics(eventid, labels=['raw']) workspace.calcStationMetrics(event.id, labels=['raw']) pstreams2 = workspace.getStreams(event.id, labels=['processed']) assert pstreams2[0].getStreamParamKeys() == ['nnet_qa'] summary1_a = workspace.getStreamMetrics(event.id, stream1[0].stats.network, stream1[0].stats.station, 'raw') s1_df_out = summary1_a.pgms.sort_values(['IMT', 'IMC']) array2 = s1_df_out['Result'].as_matrix() np.testing.assert_almost_equal(array1, array2, decimal=4) workspace.close() except Exception as e: raise (e) finally: shutil.rmtree(tdir)
def pick_travel(stream, origin, picker_config=None): '''Use TauP travel time model to find P-Phase arrival time. Args: stream (StationStream): StationStream containing 1 or more channels of waveforms. origin (ScalarEvent): Event origin/magnitude information. picker_config (dict): Dictionary containing picker configuration. Returns: tuple: - Best estimate for p-wave arrival time (s since start of trace). - Mean signal to noise ratio based on the pick. ''' if picker_config is None: picker_config = get_config(section='pickers') model = picker_config['travel_time']['model'] model = TauPyModel(model=model) if stream[0].stats.starttime == NAN_TIME: return (-1, 0) lat = origin.latitude lon = origin.longitude depth = origin.depth_km if depth < 0: depth = 0 etime = origin.time slat = stream[0].stats.coordinates.latitude slon = stream[0].stats.coordinates.longitude dist_deg = locations2degrees(lat, lon, slat, slon) arrivals = model.get_travel_times(source_depth_in_km=int(depth), distance_in_degree=dist_deg, phase_list=['P', 'p', 'Pn']) if not len(arrivals): return (-1, 0) # arrival time is time since origin arrival = arrivals[0] # we need time since start of the record minloc = arrival.time + (etime - stream[0].stats.starttime) mean_snr = calc_snr(stream, minloc) return (minloc, mean_snr)
def pick_yeck(stream): """IN DEVELOPMENT! SNR based P-phase picker. Args: stream (StationStream): Stream containing waveforms that need to be picked. Returns: tuple: - Best estimate for p-wave arrival time (s since start of trace). - Mean signal to noise ratio based on the pick. """ min_window = 5.0 # put into config config = get_config() min_noise_dur = config['windows']['window_checks']['min_noise_duration'] locs = [] for trace in stream: data = trace.data sr = trace.stats.sampling_rate pidx_start = int(min_window * sr) snr = np.zeros(len(data)) for pidx in range(pidx_start, len(data) - pidx_start): snr_i = sub_calc_snr(data, pidx) snr[pidx] = snr_i snr = np.array(snr) pidx = snr.argmax() loc = pidx / sr locs.append(loc) locs = np.array(locs) if np.any(locs >= 0): minloc = np.min(locs[locs >= 0]) else: minloc = -1 if minloc < min_noise_dur: fmt = 'Noise window (%.1f s) less than minimum (%.1f)' tpl = (minloc, min_noise_dur) raise GMProcessException(fmt % tpl) mean_snr = calc_snr(stream, minloc) return (minloc, mean_snr)
def test_zero_crossings(): datapath = os.path.join('data', 'testdata', 'zero_crossings') datadir = pkg_resources.resource_filename('gmprocess', datapath) sc = StreamCollection.from_directory(datadir) sc.describe() conf = get_config() update = { 'processing': [{ 'detrend': { 'detrending_method': 'demean' } }, { 'check_zero_crossings': { 'min_crossings': 1 } }] } update_dict(conf, update) edict = { 'id': 'ak20419010', 'time': UTCDateTime('2018-11-30T17:29:29'), 'lat': 61.346, 'lon': -149.955, 'depth': 46.7, 'magnitude': 7.1 } event = get_event_object(edict) test = process_streams(sc, event, conf) for st in test: for tr in st: assert tr.hasParameter('ZeroCrossingRate') np.testing.assert_allclose( test[0][0].getParameter('ZeroCrossingRate')['crossing_rate'], 0.008888888888888889, atol=1e-5)
def pick_travel(stream, origin, picker_config=None): '''Use TauP travel time model to find P-Phase arrival time. Args: stream (StationStream): StationStream containing 1 or more channels of waveforms. origin (ScalarEvent): Event origin/magnitude information. picker_config (dict): Dictionary containing picker configuration. Returns: tuple: - Best estimate for p-wave arrival time (s since start of trace). - Mean signal to noise ratio based on the pick. ''' if picker_config is None: picker_config = get_config(section='pickers') model = picker_config['travel_time']['model'] model = TauPyModel(model=model) if stream[0].stats.starttime == NAN_TIME: return (-1, 0) lat = origin.latitude lon = origin.longitude depth = origin.depth_km etime = origin.time slat = stream[0].stats.coordinates.latitude slon = stream[0].stats.coordinates.longitude dist_deg = locations2degrees(lat, lon, slat, slon) arrivals = model.get_travel_times(source_depth_in_km=int(depth), distance_in_degree=dist_deg, phase_list=['P', 'p', 'Pn']) if not len(arrivals): return (-1, 0) arrival = arrivals[0] minloc = arrival.time + (etime - stream[0].stats.starttime) mean_snr = calc_snr(stream, minloc) return (minloc, mean_snr)
def __disp_checks(tr, max_final_displacement=0.025, max_displacment_ratio=0.2): # Need to find the high/low pass filtering steps in the config # to ensure that filtering here is done with the same options config = get_config() processing_steps = config['processing'] ps_names = [list(ps.keys())[0] for ps in processing_steps] ind = int(np.where(np.array(ps_names) == 'highpass_filter')[0][0]) hp_args = processing_steps[ind]['highpass_filter'] ind = int(np.where(np.array(ps_names) == 'lowpass_filter')[0][0]) lp_args = processing_steps[ind]['lowpass_filter'] # Make a copy of the trace so we don't modify it in place with # filtering or integration trdis = tr.copy() # Filter trdis = lowpass_filter_trace(trdis, **lp_args) trdis = highpass_filter_trace(trdis, **hp_args) # Integrate to displacment trdis.integrate() trdis.integrate() # Checks ok = True max_displacment = np.max(np.abs(trdis.data)) final_displacement = np.abs(trdis.data[-1]) disp_ratio = final_displacement/max_displacment if final_displacement > max_final_displacement: ok = False if disp_ratio > max_displacment_ratio: ok = False return ok
def update_config(custom_cfg_file): """Merge custom config with default. Args: custom_cfg_file (str): Path to custom config. Returns: dict: Merged config dictionary. """ config = get_config() if not os.path.isfile(custom_cfg_file): return config try: with open(custom_cfg_file, 'rt') as f: custom_cfg = yaml.load(f, Loader=yaml.FullLoader) update_dict(config, custom_cfg) except yaml.parser.ParserError as pe: return None return config
def __disp_checks(tr, max_final_displacement=0.025, max_displacment_ratio=0.2): # Need to find the high/low pass filtering steps in the config # to ensure that filtering here is done with the same options config = get_config() processing_steps = config['processing'] ps_names = [list(ps.keys())[0] for ps in processing_steps] ind = int(np.where(np.array(ps_names) == 'highpass_filter')[0][0]) hp_args = processing_steps[ind]['highpass_filter'] ind = int(np.where(np.array(ps_names) == 'lowpass_filter')[0][0]) lp_args = processing_steps[ind]['lowpass_filter'] # Make a copy of the trace so we don't modify it in place with # filtering or integration trdis = tr.copy() # Filter trdis = lowpass_filter_trace(trdis, **lp_args) trdis = highpass_filter_trace(trdis, **hp_args) # Integrate to displacment trdis.integrate() trdis.integrate() # Checks ok = True max_displacment = np.max(np.abs(trdis.data)) final_displacement = np.abs(trdis.data[-1]) disp_ratio = final_displacement / max_displacment if final_displacement > max_final_displacement: ok = False if disp_ratio > max_displacment_ratio: ok = False return ok
import os import zipfile import logging import numpy as np # local imports from gmprocess.config import get_config CONFIG = get_config() DUPLICATE_MARKER = '1' def is_evenly_spaced(times, rtol=1e-6, atol=1e-8): """ Checks whether times are evenly spaced. Args: times (array): Array of floats of times in seconds. rtol (float): The relative tolerance parameter. See numpy.allclose. atol (float): The absolute tolerance parameter. See numpy.allclose. Returns: bool: True if times are evenly spaced. False otherwise. """ diff_times = np.diff(times) return np.all( np.isclose(diff_times[0], diff_times, rtol=rtol, atol=atol)
def process_streams(streams, origin, config=None): """ Run processing steps from the config file. This method looks in the 'processing' config section and loops over those steps and hands off the config options to the appropriate prcessing method. Streams that fail any of the tests are kepth in the StreamCollection but the parameter 'passed_checks' is set to False and subsequent processing steps are not applied once a check has failed. Args: streams (list): A StreamCollection object. origin (ScalarEvent): ScalarEvent object. config (dict): Configuration dictionary (or None). See get_config(). Returns: A StreamCollection object. """ if not isinstance(streams, StreamCollection): raise ValueError('streams must be a StreamCollection instance.') if config is None: config = get_config() logging.info('Processing streams...') event_time = origin.time event_lon = origin.longitude event_lat = origin.latitude # ------------------------------------------------------------------------- # Begin noise/signal window steps logging.info('Windowing noise and signal...') window_conf = config['windows'] processed_streams = streams.copy() for st in processed_streams: logging.info('Checking stream %s...' % st.get_id()) # Estimate noise/signal split time st = signal_split( st, origin) # Estimate end of signal end_conf = window_conf['signal_end'] event_mag = origin.magnitude st = signal_end( st, event_time=event_time, event_lon=event_lon, event_lat=event_lat, event_mag=event_mag, **end_conf ) wcheck_conf = window_conf['window_checks'] if wcheck_conf['do_check']: st = window_checks( st, min_noise_duration=wcheck_conf['min_noise_duration'], min_signal_duration=wcheck_conf['min_signal_duration'] ) # ------------------------------------------------------------------------- # Begin processing steps logging.info('Starting processing...') processing_steps = config['processing'] # Loop over streams for stream in processed_streams: logging.info('Stream: %s' % stream.get_id()) for processing_step_dict in processing_steps: key_list = list(processing_step_dict.keys()) if len(key_list) != 1: raise ValueError( 'Each processing step must contain exactly one key.') step_name = key_list[0] logging.info('Processing step: %s' % step_name) step_args = processing_step_dict[step_name] # Using globals doesn't seem like a great solution here, but it # works. if step_name not in globals(): raise ValueError( 'Processing step %s is not valid.' % step_name) # Origin is required by some steps and has to be handled specially. # There must be a better solution for this... if step_name == 'fit_spectra': step_args = { 'origin': origin } elif step_name in REQ_ORIGIN: step_args['origin'] = origin if step_args is None: stream = globals()[step_name](stream) else: stream = globals()[step_name](stream, **step_args) # Build the summary report? build_conf = config['build_report'] if build_conf['run']: build_report(processed_streams, build_conf['directory'], origin, config=config) logging.info('Finished processing streams.') return processed_streams
def __init__(self, time, lat, lon, depth, magnitude, user=None, password=None, radius=None, dt=None, ddepth=None, dmag=None, rawdir=None, config=None, drop_non_free=True): """Create a KNETFetcher instance. Download KNET/KikNet data from the Japanese NIED site: http://www.kyoshin.bosai.go.jp/cgi-bin/kyoshin/quick/list_eqid_en.cgi Args: time (datetime): Origin time. lat (float): Origin latitude. lon (float): Origin longitude. depth (float): Origin depth. magnitude (float): Origin magnitude. user (str): username for KNET/KikNET site. password (str): (Optional) password for site. radius (float): Search radius (km). dt (float): Search time window (sec). ddepth (float): Search depth window (km). dmag (float): Search magnitude window (magnitude units). rawdir (str): Path to location where raw data will be stored. If not specified, raw data will be deleted. config (dict): Dictionary containing configuration. If None, retrieve global config. drop_non_free (bool): Option to ignore non-free-field (borehole, sensors on structures, etc.) """ # what values do we use for search thresholds? # In order of priority: # 1) Not-None values passed in constructor # 2) Configured values # 3) DEFAULT values at top of the module if config is None: config = get_config() cfg_radius = None cfg_dt = None cfg_ddepth = None cfg_dmag = None cfg_user = None cfg_password = None if 'fetchers' in config: if 'KNETFetcher' in config['fetchers']: fetch_cfg = config['fetchers']['KNETFetcher'] if 'radius' in fetch_cfg: cfg_radius = float(fetch_cfg['radius']) if 'dt' in fetch_cfg: cfg_dt = float(fetch_cfg['dt']) if 'ddepth' in fetch_cfg: cfg_ddepth = float(fetch_cfg['ddepth']) if 'dmag' in fetch_cfg: cfg_dmag = float(fetch_cfg['dmag']) if 'user' in fetch_cfg: cfg_user = fetch_cfg['user'] if 'password' in fetch_cfg: cfg_password = fetch_cfg['password'] radius = _get_first_value(radius, cfg_radius, RADIUS) dt = _get_first_value(dt, cfg_dt, DT) ddepth = _get_first_value(ddepth, cfg_ddepth, DDEPTH) dmag = _get_first_value(dmag, cfg_dmag, DMAG) # for knet/kiknet, username/password is required if user is None or password is None: # check to see if those values are configured if cfg_user and cfg_password: user = cfg_user password = cfg_password else: fmt = 'Username/password are required to retrieve KNET/KikNET data.' raise Exception(fmt) if user == 'USERNAME' or password == 'PASSWORD': fmt = ('Username/password are required to retrieve KNET/KikNET\n' 'data. This tool can download data from the Japanese NIED\n' 'website. However, for this to work you will first need \n' 'to obtain a username and password from this website:\n' 'https://hinetwww11.bosai.go.jp/nied/registration/?LANG=en\n' 'Then create a custom config file by running the gmsetup\n' 'program, and edit the fetchers:KNETFetcher section\n' 'to use your username and password.') raise Exception(fmt) self.user = user self.password = password tz = pytz.UTC self.time = tz.localize(time) self.lat = lat self.lon = lon self.radius = radius self.dt = dt self.rawdir = rawdir self.depth = depth self.magnitude = magnitude self.ddepth = ddepth self.dmag = dmag self.jptime = self.time + timedelta(seconds=JST_OFFSET) xmin = 127.705 xmax = 147.393 ymin = 29.428 ymax = 46.109 # this announces to the world the valid bounds for this fetcher. self.BOUNDS = [xmin, xmax, ymin, ymax] self.drop_non_free = drop_non_free
def test_corner_frequencies(): # Default config has 'constant' corner frequency method, so the need # here is to force the 'snr' method. data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A') streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) config = get_config() window_conf = config['windows'] processed_streams = sc.copy() for st in processed_streams: if st.passed: # Estimate noise/signal split time event_time = origin.time event_lon = origin.longitude event_lat = origin.latitude st = signal_split(st, origin) # Estimate end of signal end_conf = window_conf['signal_end'] event_mag = origin.magnitude print(st) st = signal_end( st, event_time=event_time, event_lon=event_lon, event_lat=event_lat, event_mag=event_mag, **end_conf ) wcheck_conf = window_conf['window_checks'] st = window_checks( st, min_noise_duration=wcheck_conf['min_noise_duration'], min_signal_duration=wcheck_conf['min_signal_duration'] ) pconfig = config['processing'] # Run SNR check # I think we don't do this anymore. test = [ d for d in pconfig if list(d.keys())[0] == 'compute_snr' ] snr_config = test[0]['compute_snr'] for stream in processed_streams: stream = compute_snr( stream, **snr_config ) # Run get_corner_frequencies test = [ d for d in pconfig if list(d.keys())[0] == 'get_corner_frequencies' ] cf_config = test[0]['get_corner_frequencies'] snr_config = cf_config['snr'] lp = [] hp = [] for stream in processed_streams: if not stream.passed: continue stream = get_corner_frequencies( stream, method="snr", snr=snr_config ) if stream[0].hasParameter('corner_frequencies'): cfdict = stream[0].getParameter('corner_frequencies') lp.append(cfdict['lowpass']) hp.append(cfdict['highpass']) np.testing.assert_allclose( np.sort(hp), [0.00751431, 0.01354455, 0.04250735], atol=1e-6 )
def __handle_duplicates(self, max_dist_tolerance, process_level_preference, format_preference): """ Removes duplicate data from the StreamCollection, based on the process level and format preferences. Args: max_dist_tolerance (float): Maximum distance tolerance for determining whether two streams are at the same location (in meters). process_level_preference (list): A list containing 'V0', 'V1', 'V2', with the order determining which process level is the most preferred (most preferred goes first in the list). format_preference (list): A list continaing strings of the file source formats (found in gmprocess.io). Does not need to list all of the formats. Example: ['cosmos', 'dmg'] indicates that cosmos files are preferred over dmg files. """ # If arguments are None, check the config # If not in the config, use the default values at top of the file preferences = { 'max_dist_tolerance': max_dist_tolerance, 'process_level_preference': process_level_preference, 'format_preference': format_preference } default_config = None for key, val in preferences.items(): if val is None: if default_config is None: default_config = get_config() preferences[key] = default_config['duplicate'][key] stream_params = gather_stream_parameters(self.streams) traces = [] for st in self.streams: for tr in st: traces.append(tr) preferred_traces = [] for tr_to_add in traces: is_duplicate = False for tr_pref in preferred_traces: if are_duplicates(tr_to_add, tr_pref, preferences['max_dist_tolerance']): is_duplicate = True break if is_duplicate: if choose_preferred( tr_to_add, tr_pref, preferences['process_level_preference'], preferences['format_preference']) == tr_to_add: preferred_traces.remove(tr_pref) logging.info( 'Trace %s (%s) is a duplicate and ' 'has been removed from the StreamCollection.' % (tr_pref.id, tr_pref.stats.standard.source_file)) preferred_traces.append(tr_to_add) else: logging.info( 'Trace %s (%s) is a duplicate and ' 'has been removed from the StreamCollection.' % (tr_to_add.id, tr_to_add.stats.standard.source_file)) else: preferred_traces.append(tr_to_add) streams = [StationStream([tr]) for tr in preferred_traces] streams = insert_stream_parameters(streams, stream_params) self.streams = streams
def fetch_data(time, lat, lon, depth, magnitude, config=None, rawdir=None, drop_non_free=True): """Retrieve data using any DataFetcher subclass. Args: time (datetime): Origin time. lat (float): Origin latitude. lon (float): Origin longitude. depth (float): Origin depth. magnitude (float): Origin magnitude. radius (float): Search radius (km). dt (float): Search time window (sec). ddepth (float): Search depth window (km). dmag (float): Search magnitude window (magnitude units). rawdir (str): Path to location where raw data will be stored. If not specified, raw data will be deleted. drop_non_free (bool): Option to ignore non-free-field (borehole, sensors on structures, etc.) Returns: StreamCollection: StreamCollection object. """ if config is None: config = get_config() fetchers = find_fetchers(lat, lon) instances = [] errors = [] for fetchname, fetcher in fetchers.items(): try: fetchinst = fetcher(time, lat, lon, depth, magnitude, config=config, rawdir=rawdir, drop_non_free=drop_non_free) except Exception as e: fmt = 'Could not instantiate Fetcher %s, due to error\n "%s"' tpl = (fetchname, str(e)) msg = fmt % tpl logging.warn(msg) errors.append(msg) continue xmin, xmax, ymin, ymax = fetchinst.BOUNDS if (xmin < lon < xmax) and (ymin < lat < ymax): instances.append(fetchinst) efmt = '%s M%.1f (%.4f,%.4f)' etpl = (time, magnitude, lat, lon) esummary = efmt % etpl streams = [] for fetcher in instances: if 'FDSN' in str(fetcher): tstreams = fetcher.retrieveData() if len(streams): streams = streams + tstreams else: streams = tstreams else: events = fetcher.getMatchingEvents(solve=True) if not len(events): msg = 'No event matching %s found by class %s' logging.warn(msg % (esummary, str(fetcher))) continue tstreams = fetcher.retrieveData(events[0]) if len(streams): streams = streams + tstreams else: streams = tstreams if streams is None: streams = [] return (streams, errors)
def __init__(self, time, lat, lon, depth, magnitude, user=None, password=None, radius=None, dt=None, ddepth=None, dmag=None, rawdir=None, config=None, drop_non_free=True): """Create a GeoNetFetcher instance. Args: time (datetime): Origin time. lat (float): Origin latitude. lon (float): Origin longitude. depth (float): Origin depth. magnitude (float): Origin magnitude. user (str): (Optional) username for site. password (str): (Optional) password for site. radius (float): Search radius (km). dt (float): Search time window (sec). ddepth (float): Search depth window (km). dmag (float): Search magnitude window (magnitude units). rawdir (str): Path to location where raw data will be stored. If not specified, raw data will be deleted. config (dict): Dictionary containing configuration. If None, retrieve global config. drop_non_free (bool): Option to ignore non-free-field (borehole, sensors on structures, etc.) """ # what values do we use for search thresholds? # In order of priority: # 1) Not-None values passed in constructor # 2) Configured values # 3) DEFAULT values at top of the module if config is None: config = get_config() cfg_radius = None cfg_dt = None cfg_ddepth = None cfg_dmag = None if 'fetchers' in config: if 'GeoNetFetcher' in config['fetchers']: fetch_cfg = config['fetchers']['GeoNetFetcher'] if 'radius' in fetch_cfg: cfg_radius = float(fetch_cfg['radius']) if 'dt' in fetch_cfg: cfg_dt = float(fetch_cfg['dt']) if 'ddepth' in fetch_cfg: cfg_ddepth = float(fetch_cfg['ddepth']) if 'dmag' in fetch_cfg: cfg_dmag = float(fetch_cfg['dmag']) radius = _get_first_value(radius, cfg_radius, RADIUS) dt = _get_first_value(dt, cfg_dt, DT) ddepth = _get_first_value(ddepth, cfg_ddepth, DDEPTH) dmag = _get_first_value(dmag, cfg_dmag, DMAG) tz = pytz.UTC if isinstance(time, UTCDateTime): time = time.datetime self.time = tz.localize(time) self.lat = lat self.lon = lon self.radius = radius self.dt = dt self.rawdir = rawdir self.depth = depth self.magnitude = magnitude self.ddepth = ddepth self.dmag = dmag xmin = 158.555 xmax = 192.656 ymin = -51.553 ymax = -26.809 # this announces to the world the valid bounds for this fetcher. self.BOUNDS = [xmin, xmax, ymin, ymax] self.drop_non_free = drop_non_free
def from_config(cls, timeseries, config=None, event=None): """ Create class instance from a config. Can be a custom config or the default config found in ~/.gmprocess/config.yml. Args: timeseries (StationStream): Stream of the timeseries data. config (dictionary): Custom config. Default is None, and the default config will be used. event (ScalarEvent): Defines the focal time, geographic location and magnitude of an earthquake hypocenter. Default is None. Notes: Custom configs must be in the following format: {'metrics': 'output_imcs': <list>, 'output_imts': <list>, 'sa':{ 'damping': <float>, 'periods': { 'start': <float>, 'stop': <float>, 'num': <float>, 'spacing': <string>, 'use_array': <bool>, 'defined_periods': <list>, } }, 'fas':{ 'smoothing': <float>, 'bandwidth': <float>, 'periods': { 'start': <float>, 'stop': <float>, 'num': <float>, 'spacing': <string>, 'use_array': <bool>, 'defined_periods': <list>, } } } Currently the only acceptied smoothing type is 'konno_ohmachi', and the options for spacing are 'linspace' or 'logspace'. """ if config is None: config = get_config() metrics = config['metrics'] config_imts = [imt.lower() for imt in metrics['output_imts']] imcs = [imc.lower() for imc in metrics['output_imcs']] # append periods imts = [] for imt in config_imts: if imt == 'sa': if metrics['sa']['periods']['use_array']: start = metrics['sa']['periods']['start'] stop = metrics['sa']['periods']['stop'] num = metrics['sa']['periods']['num'] if metrics['sa']['periods']['spacing'] == 'logspace': periods = np.logspace(start, stop, num=num) else: periods = np.linspace(start, stop, num=num) for period in periods: imts += ['sa' + str(period)] else: for period in metrics['sa']['periods']['defined_periods']: imts += ['sa' + str(period)] elif imt == 'fas': if metrics['fas']['periods']['use_array']: start = metrics['fas']['periods']['start'] stop = metrics['fas']['periods']['stop'] num = metrics['fas']['periods']['num'] if metrics['fas']['periods']['spacing'] == 'logspace': periods = np.logspace(start, stop, num=num) else: periods = np.linspace(start, stop, num=num) for period in periods: imts += ['fas' + str(period)] else: for period in metrics['fas']['periods']['defined_periods']: imts += ['fas' + str(period)] else: imts += [imt] damping = metrics['sa']['damping'] smoothing = metrics['fas']['smoothing'] bandwidth = metrics['fas']['bandwidth'] allow_nans = metrics['fas']['allow_nans'] controller = cls(imts, imcs, timeseries, bandwidth=bandwidth, damping=damping, event=event, smooth_type=smoothing, allow_nans=allow_nans) return controller
#!/usr/bin/env python3 from gmprocess.io.read import read_data from gmprocess.windows import signal_split import pkg_resources import os from obspy import UTCDateTime from gmprocess.config import get_config from gmprocess.io.test_utils import read_data_dir from gmprocess.streamcollection import StreamCollection PICKER_CONFIG = get_config(section='pickers') knet_data = os.path.join('data', 'testdata', 'process') data_path = pkg_resources.resource_filename('gmprocess', knet_data) def _test_signal_split(): st1 = read_data(os.path.join(data_path, 'AOM0170806140843.EW'))[0] st2 = read_data(os.path.join(data_path, 'AOM0170806140843.NS'))[0] st3 = read_data(os.path.join(data_path, 'AOM0170806140843.UD'))[0] st = st1 + st2 + st3 # Test the AR pick PICKER_CONFIG['order_of_preference'] = ['ar', 'baer', 'cwb'] signal_split(st, method='p_arrival', picker_config=PICKER_CONFIG) known_arrival = UTCDateTime(2008, 6, 13, 23, 44, 17) for tr in st:
def __init__(self, time, lat, lon, depth, magnitude, email=None, process_type='raw', station_type='Ground', eq_radius=None, eq_dt=None, station_radius=None, rawdir=None, config=None, drop_non_free=True): """Create a CESMDFetcher instance. Download strong motion records from the CESMD site: https://strongmotioncenter.org/wserv/records/builder/ Args: time (datetime): Origin time. lat (float): Origin latitude. lon (float): Origin longitude. depth (float): Origin depth. magnitude (float): Origin magnitude. email (str): email address for CESMD site. process_type (str): One of 'raw' or 'processed'. station_type (str): One of "Array", "Ground", "Building", "Bridge", "Dam", "Tunnel", "Wharf", "Other" eq_radius (float): Earthquake search radius (km). eq_dt (float): Earthquake search time window (sec). station_radius (float): Station search radius (km). rawdir (str): Path to location where raw data will be stored. If not specified, raw data will be deleted. config (dict): Dictionary containing configuration. If None, retrieve global config. drop_non_free (bool): Option to ignore non-free-field (borehole, sensors on structures, etc.) """ # what values do we use for search thresholds? # In order of priority: # 1) Not-None values passed in constructor # 2) Configured values # 3) DEFAULT values at top of the module if config is None: config = get_config() cfg_eq_radius = None cfg_station_radius = None cfg_eq_dt = None cfg_email = None cfg_station_type = None cfg_process_type = None if 'fetchers' in config: if 'CESMDFetcher' in config['fetchers']: fetch_cfg = config['fetchers']['CESMDFetcher'] if 'eq_radius' in fetch_cfg: cfg_eq_radius = float(fetch_cfg['eq_radius']) if 'station_radius' in fetch_cfg: cfg_station_radius = float(fetch_cfg['station_radius']) if 'dt' in fetch_cfg: cfg_eq_dt = float(fetch_cfg['eq_dt']) if 'email' in fetch_cfg: cfg_email = fetch_cfg['email'] if 'process_type' in fetch_cfg: cfg_process_type = fetch_cfg['process_type'] if 'station_type' in fetch_cfg: cfg_station_type = fetch_cfg['station_type'] radius = _get_first_value(eq_radius, cfg_eq_radius, EQ_RADIUS) station_radius = _get_first_value(station_radius, cfg_station_radius, STATION_RADIUS) eq_dt = _get_first_value(eq_dt, cfg_eq_dt, EQ_DT) station_type = _get_first_value(station_type, cfg_station_type, STATION_TYPE) process_type = _get_first_value(process_type, cfg_process_type, PROCESS_TYPE) # for CESMD, user (email address) is required if email is None: # check to see if those values are configured if cfg_email: email = cfg_email else: fmt = 'Email address is required to retrieve CESMD data.' raise Exception(fmt) if email == 'EMAIL': fmt = ('Email address is required to retrieve CESMD\n' 'data. This tool can download data from the CESMD\n' 'website. However, for this to work you will first need \n' 'to register your email address using this website:\n' 'https://strongmotioncenter.org/cgi-bin/CESMD/register.pl\n' 'Then create a custom config file by running the gmsetup\n' 'program, and edit the fetchers:CESMDFetcher section\n' 'to use your email address.') raise Exception(fmt) self.metadata = None self.email = email self.process_type = process_type self.station_type = station_type tz = pytz.UTC if isinstance(time, UTCDateTime): time = time.datetime self.time = tz.localize(time) self.lat = lat self.lon = lon self.radius = radius self.station_radius = station_radius self.eq_dt = eq_dt self.rawdir = rawdir self.depth = depth self.magnitude = magnitude xmin = -199.528 xmax = -63.473 ymin = 17.44 ymax = 73.571 # this announces to the world the valid bounds for this fetcher. self.BOUNDS = [xmin, xmax, ymin, ymax] self.drop_non_free = drop_non_free
def __init__(self, time, lat, lon, depth, magnitude, user=None, password=None, radius=100, dt=16, ddepth=30, dmag=0.3, rawdir=None, config=None, drop_non_free=True): """Create a TurkeyFetcher instance. Download Turkish strong motion data from the Turkish NSMN site: http://kyhdata.deprem.gov.tr/2K/kyhdata_v4.php Args: time (datetime): Origin time. lat (float): Origin latitude. lon (float): Origin longitude. depth (float): Origin depth. magnitude (float): Origin magnitude. radius (float): Search radius (km). dt (float): Search time window (sec). ddepth (float): Search depth window (km). dmag (float): Search magnitude window (magnitude units). rawdir (str): Path to location where raw data will be stored. If not specified, raw data will be deleted. config (dict): Dictionary containing configuration. If None, retrieve global config. drop_non_free (bool): Option to ignore non-free-field (borehole, sensors on structures, etc.) """ # what values do we use for search thresholds? # In order of priority: # 1) Not-None values passed in constructor # 2) Configured values # 3) DEFAULT values at top of the module if config is None: config = get_config() cfg_radius = None cfg_dt = None cfg_ddepth = None cfg_dmag = None if 'fetchers' in config: if 'TurkeyFetcher' in config['fetchers']: fetch_cfg = config['fetchers']['KNETFetcher'] if 'radius' in fetch_cfg: cfg_radius = float(fetch_cfg['radius']) if 'dt' in fetch_cfg: cfg_dt = float(fetch_cfg['dt']) if 'ddepth' in fetch_cfg: cfg_ddepth = float(fetch_cfg['ddepth']) if 'dmag' in fetch_cfg: cfg_dmag = float(fetch_cfg['dmag']) radius = _get_first_value(radius, cfg_radius, RADIUS) dt = _get_first_value(dt, cfg_dt, DT) ddepth = _get_first_value(ddepth, cfg_ddepth, DDEPTH) dmag = _get_first_value(dmag, cfg_dmag, DMAG) tz = pytz.UTC self.time = tz.localize(time) self.lat = lat self.lon = lon self.radius = radius self.dt = dt self.rawdir = rawdir self.depth = depth self.magnitude = magnitude self.ddepth = ddepth self.dmag = dmag xmin = 25.664 xmax = 46.67 ymin = 34.132 ymax = 43.555 # this announces to the world the valid bounds for this fetcher. self.BOUNDS = [xmin, xmax, ymin, ymax] self.drop_non_free = drop_non_free
def test_corner_frequencies(): # Default config has 'constant' corner frequency method, so the need # here is to force the 'snr' method. data_files, origin = read_data_dir('geonet', 'us1000778i', '*.V1A') streams = [] for f in data_files: streams += read_data(f) sc = StreamCollection(streams) config = get_config() window_conf = config['windows'] processed_streams = sc.copy() for st in processed_streams: if st.passed: # Estimate noise/signal split time event_time = origin.time event_lon = origin.longitude event_lat = origin.latitude st = signal_split(st, origin) # Estimate end of signal end_conf = window_conf['signal_end'] event_mag = origin.magnitude print(st) st = signal_end(st, event_time=event_time, event_lon=event_lon, event_lat=event_lat, event_mag=event_mag, **end_conf) wcheck_conf = window_conf['window_checks'] st = window_checks( st, min_noise_duration=wcheck_conf['min_noise_duration'], min_signal_duration=wcheck_conf['min_signal_duration']) pconfig = config['processing'] # Run SNR check # I think we don't do this anymore. test = [d for d in pconfig if list(d.keys())[0] == 'compute_snr'] snr_config = test[0]['compute_snr'] for stream in processed_streams: stream = compute_snr(stream, **snr_config) # Run get_corner_frequencies test = [ d for d in pconfig if list(d.keys())[0] == 'get_corner_frequencies' ] cf_config = test[0]['get_corner_frequencies'] snr_config = cf_config['snr'] # With same_horiz False snr_config['same_horiz'] = False lp = [] hp = [] for stream in processed_streams: if not stream.passed: continue stream = get_corner_frequencies(stream, method="snr", snr=snr_config) if stream[0].hasParameter('corner_frequencies'): cfdict = stream[0].getParameter('corner_frequencies') lp.append(cfdict['lowpass']) hp.append(cfdict['highpass']) np.testing.assert_allclose(np.sort(hp), [0.00751431, 0.01354455, 0.04250735], atol=1e-6) st = processed_streams.select(station='HSES')[0] lps = [tr.getParameter('corner_frequencies')['lowpass'] for tr in st] hps = [tr.getParameter('corner_frequencies')['highpass'] for tr in st] np.testing.assert_allclose(np.sort(lps), [100., 100., 100.], atol=1e-6) np.testing.assert_allclose(np.sort(hps), [0.00305176, 0.00751431, 0.02527502], atol=1e-6) # With same_horiz True snr_config['same_horiz'] = True lp = [] hp = [] for stream in processed_streams: if not stream.passed: continue stream = get_corner_frequencies(stream, method="snr", snr=snr_config) if stream[0].hasParameter('corner_frequencies'): cfdict = stream[0].getParameter('corner_frequencies') lp.append(cfdict['lowpass']) hp.append(cfdict['highpass']) np.testing.assert_allclose(np.sort(hp), [0.00751431, 0.01354455, 0.04882812], atol=1e-6) st = processed_streams.select(station='HSES')[0] lps = [tr.getParameter('corner_frequencies')['lowpass'] for tr in st] hps = [tr.getParameter('corner_frequencies')['highpass'] for tr in st] np.testing.assert_allclose(np.sort(lps), [100., 100., 100.], atol=1e-6) np.testing.assert_allclose(np.sort(hps), [0.00751431, 0.00751431, 0.02527502], atol=1e-6)
def process_streams(streams, origin, config=None): """ Run processing steps from the config file. This method looks in the 'processing' config section and loops over those steps and hands off the config options to the appropriate prcessing method. Streams that fail any of the tests are kepth in the StreamCollection but the parameter 'passed_checks' is set to False and subsequent processing steps are not applied once a check has failed. Args: streams (list): A StreamCollection object. origin (ScalarEvent): ScalarEvent object. config (dict): Configuration dictionary (or None). See get_config(). Returns: A StreamCollection object. """ if not isinstance(streams, StreamCollection): raise ValueError('streams must be a StreamCollection instance.') if config is None: config = get_config() logging.info('Processing streams...') event_time = origin.time event_lon = origin.longitude event_lat = origin.latitude # ------------------------------------------------------------------------- # Begin noise/signal window steps logging.info('Windowing noise and signal...') window_conf = config['windows'] processed_streams = streams.copy() for st in processed_streams: logging.info('Checking stream %s...' % st.get_id()) # Estimate noise/signal split time st = signal_split(st, origin) # Estimate end of signal end_conf = window_conf['signal_end'] event_mag = origin.magnitude st = signal_end(st, event_time=event_time, event_lon=event_lon, event_lat=event_lat, event_mag=event_mag, **end_conf) wcheck_conf = window_conf['window_checks'] if wcheck_conf['do_check']: st = window_checks( st, min_noise_duration=wcheck_conf['min_noise_duration'], min_signal_duration=wcheck_conf['min_signal_duration']) # ------------------------------------------------------------------------- # Begin processing steps logging.info('Starting processing...') processing_steps = config['processing'] # Loop over streams for stream in processed_streams: logging.info('Stream: %s' % stream.get_id()) for processing_step_dict in processing_steps: key_list = list(processing_step_dict.keys()) if len(key_list) != 1: raise ValueError( 'Each processing step must contain exactly one key.') step_name = key_list[0] logging.info('Processing step: %s' % step_name) step_args = processing_step_dict[step_name] # Using globals doesn't seem like a great solution here, but it # works. if step_name not in globals(): raise ValueError('Processing step %s is not valid.' % step_name) # Origin is required by some steps and has to be handled specially. # There must be a better solution for this... if step_name == 'fit_spectra': step_args = {'origin': origin} elif step_name in REQ_ORIGIN: step_args['origin'] = origin if step_args is None: stream = globals()[step_name](stream) else: stream = globals()[step_name](stream, **step_args) # Build the summary report? build_conf = config['build_report'] if build_conf['run']: build_report(processed_streams, build_conf['directory'], origin, config=config) logging.info('Finished processing streams.') return processed_streams
def signal_split( st, origin, picker_config=None, config=None): """ This method tries to identifies the boundary between the noise and signal for the waveform. The split time is placed inside the 'processing_parameters' key of the trace stats. The P-wave arrival is used as the split between the noise and signal windows. Multiple picker methods are suppored and can be configured in the config file '~/.gmprocess/picker.yml Args: st (StationStream): Stream of data. origin (ScalarEvent): ScalarEvent object. picker_config (dict): Dictionary containing picker configuration information. config (dict): Dictionary containing system configuration information. Returns: trace with stats dict updated to include a stats['processing_parameters']['signal_split'] dictionary. """ if picker_config is None: picker_config = get_config(section='pickers') if config is None: config = get_config() loc, mean_snr = pick_travel(st, origin, picker_config=picker_config) if loc > 0: tsplit = st[0].stats.starttime + loc preferred_picker = 'travel_time' else: pick_methods = ['ar', 'baer', 'power', 'kalkan'] columns = ['Stream', 'Method', 'Pick_Time', 'Mean_SNR'] df = pd.DataFrame(columns=columns) for pick_method in pick_methods: try: if pick_method == 'ar': loc, mean_snr = pick_ar( st, picker_config=picker_config, config=config) elif pick_method == 'baer': loc, mean_snr = pick_baer( st, picker_config=picker_config, config=config) elif pick_method == 'power': loc, mean_snr = pick_power( st, picker_config=picker_config, config=config) elif pick_method == 'kalkan': loc, mean_snr = pick_kalkan(st, picker_config=picker_config, config=config) elif pick_method == 'yeck': loc, mean_snr = pick_kalkan(st) except Exception: loc = -1 mean_snr = np.nan row = {'Stream': st.get_id(), 'Method': pick_method, 'Pick_Time': loc, 'Mean_SNR': mean_snr} df = df.append(row, ignore_index=True) max_snr = df['Mean_SNR'].max() if not np.isnan(max_snr): maxrow = df[df['Mean_SNR'] == max_snr].iloc[0] tsplit = st[0].stats.starttime + maxrow['Pick_Time'] preferred_picker = maxrow['Method'] else: tsplit = -1 if tsplit >= st[0].stats.starttime: # Update trace params split_params = { 'split_time': tsplit, 'method': 'p_arrival', 'picker_type': preferred_picker } for tr in st: tr.setParameter('signal_split', split_params) return st
def process_streams(streams, origin, config=None): """ Run processing steps from the config file. This method looks in the 'processing' config section and loops over those steps and hands off the config options to the appropriate prcessing method. Streams that fail any of the tests are kepth in the StreamCollection but the parameter 'passed_checks' is set to False and subsequent processing steps are not applied once a check has failed. Args: streams (list): A StreamCollection object. origin (ScalarEvent): ScalarEvent object. config (dict): Configuration dictionary (or None). See get_config(). Returns: A StreamCollection object. """ if not isinstance(streams, StreamCollection): raise ValueError('streams must be a StreamCollection instance.') if config is None: config = get_config() logging.info('Processing streams...') event_time = origin.time event_lon = origin.longitude event_lat = origin.latitude # ------------------------------------------------------------------------- # Compute a travel-time matrix for interpolation later in the # trim_multiple events step if any('trim_multiple_events' in dict for dict in config['processing']): travel_time_df, catalog = create_travel_time_dataframe( streams, **config['travel_time']) # ------------------------------------------------------------------------- # Begin noise/signal window steps logging.info('Windowing noise and signal...') window_conf = config['windows'] model = TauPyModel(config['pickers']['travel_time']['model']) for st in streams: logging.info('Checking stream %s...' % st.get_id()) # Estimate noise/signal split time st = signal_split(st, origin, model, picker_config=config['pickers'], config=config) # Estimate end of signal end_conf = window_conf['signal_end'] event_mag = origin.magnitude st = signal_end(st, event_time=event_time, event_lon=event_lon, event_lat=event_lat, event_mag=event_mag, **end_conf) wcheck_conf = window_conf['window_checks'] if wcheck_conf['do_check']: st = window_checks( st, min_noise_duration=wcheck_conf['min_noise_duration'], min_signal_duration=wcheck_conf['min_signal_duration']) # ------------------------------------------------------------------------- # Begin processing steps logging.info('Starting processing...') processing_steps = config['processing'] # Loop over streams for stream in streams: logging.info('Stream: %s' % stream.get_id()) for processing_step_dict in processing_steps: key_list = list(processing_step_dict.keys()) if len(key_list) != 1: raise ValueError( 'Each processing step must contain exactly one key.') step_name = key_list[0] logging.info('Processing step: %s' % step_name) step_args = processing_step_dict[step_name] # Using globals doesn't seem like a great solution here, but it # works. if step_name not in globals(): raise ValueError('Processing step %s is not valid.' % step_name) # Origin is required by some steps and has to be handled specially. # There must be a better solution for this... if step_name == 'fit_spectra': step_args = {'origin': origin} elif step_name in REQ_ORIGIN: step_args['origin'] = origin elif step_name == 'trim_multiple_events': step_args['catalog'] = catalog step_args['travel_time_df'] = travel_time_df elif step_name == 'compute_snr': step_args['mag'] = origin.magnitude if step_args is None: stream = globals()[step_name](stream) else: stream = globals()[step_name](stream, **step_args) # ------------------------------------------------------------------------- # Begin colocated instrument selection colocated_conf = config['colocated'] streams.select_colocated(**colocated_conf) for st in streams: for tr in st: tr.stats.standard.process_level = PROCESS_LEVELS['V2'] logging.info('Finished processing streams.') return streams
def __init__(self, time, lat, lon, depth, magnitude, user=None, password=None, radius=None, dt=None, ddepth=None, dmag=None, rawdir=None, config=None, drop_non_free=True): """Create a KNETFetcher instance. Download KNET/KikNet data from the Japanese NIED site: http://www.kyoshin.bosai.go.jp/cgi-bin/kyoshin/quick/list_eqid_en.cgi Args: time (datetime): Origin time. lat (float): Origin latitude. lon (float): Origin longitude. depth (float): Origin depth. magnitude (float): Origin magnitude. user (str): username for KNET/KikNET site. password (str): (Optional) password for site. radius (float): Search radius (km). dt (float): Search time window (sec). ddepth (float): Search depth window (km). dmag (float): Search magnitude window (magnitude units). rawdir (str): Path to location where raw data will be stored. If not specified, raw data will be deleted. config (dict): Dictionary containing configuration. If None, retrieve global config. drop_non_free (bool): Option to ignore non-free-field (borehole, sensors on structures, etc.) """ # what values do we use for search thresholds? # In order of priority: # 1) Not-None values passed in constructor # 2) Configured values # 3) DEFAULT values at top of the module if config is None: config = get_config() cfg_radius = None cfg_dt = None cfg_ddepth = None cfg_dmag = None cfg_user = None cfg_password = None if 'fetchers' in config: if 'KNETFetcher' in config['fetchers']: fetch_cfg = config['fetchers']['KNETFetcher'] if 'radius' in fetch_cfg: cfg_radius = float(fetch_cfg['radius']) if 'dt' in fetch_cfg: cfg_dt = float(fetch_cfg['dt']) if 'ddepth' in fetch_cfg: cfg_ddepth = float(fetch_cfg['ddepth']) if 'dmag' in fetch_cfg: cfg_dmag = float(fetch_cfg['dmag']) if 'user' in fetch_cfg: cfg_user = fetch_cfg['user'] if 'password' in fetch_cfg: cfg_password = fetch_cfg['password'] radius = _get_first_value(radius, cfg_radius, RADIUS) dt = _get_first_value(dt, cfg_dt, DT) ddepth = _get_first_value(ddepth, cfg_ddepth, DDEPTH) dmag = _get_first_value(dmag, cfg_dmag, DMAG) # for knet/kiknet, username/password is required if user is None or password is None: # check to see if those values are configured if cfg_user and cfg_password: user = cfg_user password = cfg_password else: fmt = 'Username/password are required to retrieve KNET/KikNET data.' raise Exception(fmt) if user == 'USERNAME' or password == 'PASSWORD': fmt = ('Username/password are required to retrieve KNET/KikNET\n' 'data. This tool can download data from the Japanese NIED\n' 'website. However, for this to work you will first need \n' 'to obtain a username and password from this website:\n' 'https://hinetwww11.bosai.go.jp/nied/registration/?LANG=en\n' 'Then create a custom config file by running the gmsetup\n' 'program, and edit the fetchers:KNETFetcher section\n' 'to use your username and password.') raise Exception(fmt) # allow user to turn restrict stations on or off. Restricting saves time, # probably will not ignore significant data. self.restrict_stations = config['fetchers']['KNETFetcher']['restrict_stations'] self.user = user self.password = password tz = pytz.UTC if isinstance(time, UTCDateTime): time = time.datetime self.time = tz.localize(time) self.lat = lat self.lon = lon self.radius = radius self.dt = dt self.rawdir = rawdir self.depth = depth self.magnitude = magnitude self.ddepth = ddepth self.dmag = dmag self.jptime = self.time + timedelta(seconds=JST_OFFSET) xmin = 127.705 xmax = 147.393 ymin = 29.428 ymax = 46.109 # this announces to the world the valid bounds for this fetcher. self.BOUNDS = [xmin, xmax, ymin, ymax] self.drop_non_free = drop_non_free
def test_workspace(): eventid = 'us1000778i' datafiles, event = read_data_dir('geonet', eventid, '*.V1A') tdir = tempfile.mkdtemp() try: with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=H5pyDeprecationWarning) warnings.filterwarnings("ignore", category=YAMLLoadWarning) warnings.filterwarnings("ignore", category=FutureWarning) config = get_config() tfile = os.path.join(tdir, 'test.hdf') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace(tfile) t1 = time.time() workspace.addStreams(event, raw_streams, label='raw') t2 = time.time() print('Adding %i streams took %.2f seconds' % (len(raw_streams), (t2 - t1))) str_repr = workspace.__repr__() assert str_repr == 'Events: 1 Stations: 3 Streams: 3' eventobj = workspace.getEvent(eventid) assert eventobj.origins[0].latitude == event.origins[0].latitude assert eventobj.magnitudes[0].mag == event.magnitudes[0].mag stations = workspace.getStations() assert sorted(stations) == ['hses', 'thz', 'wtmc'] stations = workspace.getStations(eventid=eventid) assert sorted(stations) == ['hses', 'thz', 'wtmc'] # test retrieving tags for an event that doesn't exist try: workspace.getStreamTags('foo') except KeyError: assert 1 == 1 # test retrieving event that doesn't exist try: workspace.getEvent('foo') except KeyError: assert 1 == 1 instream = None for stream in raw_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: assert 1 == 2 outstream = workspace.getStreams(eventid, stations=['hses'], labels=['raw'])[0] compare_streams(instream, outstream) label_summary = workspace.summarizeLabels() assert label_summary.iloc[0]['Label'] == 'raw' assert label_summary.iloc[0]['Software'] == 'gmprocess' sc = StreamCollection(raw_streams) processed_streams = process_streams(sc, event, config=config) workspace.addStreams(event, processed_streams, 'processed') idlist = workspace.getEventIds() assert idlist[0] == eventid event_tags = workspace.getStreamTags(eventid) assert sorted(event_tags) == ['hses_processed', 'hses_raw', 'thz_processed', 'thz_raw', 'wtmc_processed', 'wtmc_raw'] outstream = workspace.getStreams(eventid, stations=['hses'], labels=['processed'])[0] provenance = workspace.getProvenance(eventid, labels=['processed']) first_row = pd.Series({'Record': 'NZ.HSES.HN1', 'Processing Step': 'Remove Response', 'Step Attribute': 'input_units', 'Attribute Value': 'counts'}) last_row = pd.Series({'Record': 'NZ.WTMC.HNZ', 'Processing Step': 'Lowpass Filter', 'Step Attribute': 'number_of_passes', 'Attribute Value': 2}) assert provenance.iloc[0].equals(first_row) assert provenance.iloc[-1].equals(last_row) # compare the parameters from the input processed stream # to it's output equivalent instream = None for stream in processed_streams: if stream[0].stats.station.lower() == 'hses': instream = stream break if instream is None: assert 1 == 2 compare_streams(instream, outstream) workspace.close() # read in data from a second event and stash it in the workspace eventid = 'nz2018p115908' datafiles, event = read_data_dir('geonet', eventid, '*.V2A') raw_streams = [] for dfile in datafiles: raw_streams += read_data(dfile) workspace = StreamWorkspace.open(tfile) workspace.addStreams(event, raw_streams, label='foo') stations = workspace.getStations(eventid) eventids = workspace.getEventIds() assert eventids == ['us1000778i', 'nz2018p115908'] instation = raw_streams[0][0].stats.station this_stream = workspace.getStreams(eventid, stations=[instation], labels=['foo'])[0] assert instation == this_stream[0].stats.station usid = 'us1000778i' inventory = workspace.getInventory(usid) codes = [station.code for station in inventory.networks[0].stations] assert sorted(codes) == ['HSES', 'THZ', 'WPWS', 'WTMC'] except Exception as e: raise(e) finally: shutil.rmtree(tdir)
def from_config(cls, timeseries, config=None, event=None): """ Create class instance from a config. Can be a custom config or the default config found in ~/.gmprocess/config.yml. Args: timeseries (StationStream): Stream of the timeseries data. config (dictionary): Custom config. Default is None, and the default config will be used. event (ScalarEvent): Defines the focal time, geographic location and magnitude of an earthquake hypocenter. Default is None. Notes: Custom configs must be in the following format: {'metrics': 'output_imcs': <list>, 'output_imts': <list>, 'sa':{ 'damping': <float>, 'periods': { 'start': <float>, 'stop': <float>, 'num': <float>, 'spacing': <string>, 'use_array': <bool>, 'defined_periods': <list>, } }, 'fas':{ 'smoothing': <float>, 'bandwidth': <float>, 'periods': { 'start': <float>, 'stop': <float>, 'num': <float>, 'spacing': <string>, 'use_array': <bool>, 'defined_periods': <list>, } } } Currently the only acceptied smoothing type is 'konno_ohmachi', and the options for spacing are 'linspace' or 'logspace'. """ if config is None: config = get_config() metrics = config['metrics'] config_imts = [imt.lower() for imt in metrics['output_imts']] imcs = [imc.lower() for imc in metrics['output_imcs']] # append periods imts = [] for imt in config_imts: if imt == 'sa': if metrics['sa']['periods']['use_array']: start = metrics['sa']['periods']['start'] stop = metrics['sa']['periods']['stop'] num = metrics['sa']['periods']['num'] if metrics['sa']['periods']['spacing'] == 'logspace': periods = np.logspace(start, stop, num=num) else: periods = np.linspace(start, stop, num=num) for period in periods: imts += ['sa' + str(period)] for period in metrics['sa']['periods']['defined_periods']: imts += ['sa' + str(period)] elif imt == 'fas': if metrics['fas']['periods']['use_array']: start = metrics['fas']['periods']['start'] stop = metrics['fas']['periods']['stop'] num = metrics['fas']['periods']['num'] if metrics['fas']['periods']['spacing'] == 'logspace': periods = np.logspace(start, stop, num=num) else: periods = np.linspace(start, stop, num=num) for period in periods: imts += ['fas' + str(period)] for period in metrics['fas']['periods']['defined_periods']: imts += ['fas' + str(period)] else: imts += [imt] damping = metrics['sa']['damping'] smoothing = metrics['fas']['smoothing'] bandwidth = metrics['fas']['bandwidth'] controller = cls(imts, imcs, timeseries, bandwidth=bandwidth, damping=damping, event=event, smooth_type=smoothing) return controller
def __init__(self, time, lat, lon, depth, magnitude, radius=None, time_before=None, time_after=None, channels=None, rawdir=None, config=None, drop_non_free=True): """Create an FDSNFetcher instance. Download waveform data from the all available FDSN sites using the Obspy mass downloader functionality. Args: time (datetime): Origin time. lat (float): Origin latitude. lon (float): Origin longitude. depth (float): Origin depth. magnitude (float): Origin magnitude. radius (float): Search radius (km). time_before (float): Seconds before arrival time (sec). time_after (float): Seconds after arrival time (sec). rawdir (str): Path to location where raw data will be stored. If not specified, raw data will be deleted. config (dict): Dictionary containing configuration. If None, retrieve global config. drop_non_free (bool): Option to ignore non-free-field (borehole, sensors on structures, etc.) """ # what values do we use for search thresholds? # In order of priority: # 1) Not-None values passed in constructor # 2) Configured values # 3) DEFAULT values at top of the module if config is None: config = get_config() cfg_radius = None cfg_time_before = None cfg_time_after = None cfg_channels = None exclude_networks = EXCLUDE_NETWORKS exclude_stations = EXCLUDE_STATIONS reject_channels_with_gaps = REJECT_CHANNELS_WITH_GAPS minimum_length = MINIMUM_LENGTH sanitize = SANITIZE minimum_interstation_distance_in_m = MINIMUM_INTERSTATION_DISTANCE_IN_M network = NETWORK if 'fetchers' in config: if 'FDSNFetcher' in config['fetchers']: fetch_cfg = config['fetchers']['FDSNFetcher'] if 'radius' in fetch_cfg: cfg_radius = float(fetch_cfg['radius']) if 'time_before' in fetch_cfg: cfg_time_before = float(fetch_cfg['time_before']) if 'time_after' in fetch_cfg: cfg_time_after = float(fetch_cfg['time_after']) if 'channels' in fetch_cfg: cfg_channels = fetch_cfg['channels'] if 'exclude_networks' in fetch_cfg: exclude_networks = fetch_cfg['exclude_networks'] if 'exclude_stations' in fetch_cfg: exclude_stations = fetch_cfg['exclude_stations'] if 'reject_channels_with_gaps' in fetch_cfg: reject_channels_with_gaps = fetch_cfg['reject_channels_with_gaps'] if 'minimum_length' in fetch_cfg: minimum_length = fetch_cfg['minimum_length'] if 'sanitize' in fetch_cfg: sanitize = fetch_cfg['sanitize'] if 'minimum_interstation_distance_in_m' in fetch_cfg: minimum_interstation_distance_in_m = fetch_cfg['minimum_interstation_distance_in_m'] if 'network' in fetch_cfg: network = fetch_cfg['network'] radius = _get_first_value(radius, cfg_radius, RADIUS) time_before = _get_first_value(time_before, cfg_time_before, TIME_BEFORE) time_after = _get_first_value(time_after, cfg_time_after, TIME_AFTER) channels = _get_first_value(channels, cfg_channels, CHANNELS) tz = pytz.UTC self.time = tz.localize(time) self.lat = lat self.lon = lon self.radius = radius self.time_before = time_before self.time_after = time_after self.rawdir = rawdir self.depth = depth self.magnitude = magnitude self.channels = channels self.network = network self.exclude_networks = exclude_networks self.exclude_stations = exclude_stations self.reject_channels_with_gaps = reject_channels_with_gaps self.minimum_length = minimum_length self.sanitize = sanitize self.minimum_interstation_distance_in_m = minimum_interstation_distance_in_m self.drop_non_free = drop_non_free self.BOUNDS = [-180, 180, -90, 90]