def _stretch_wrapper(groupnames, fname, outkey, filter=None, **kwargs): """ Wrapper around `~yam.stretch.stretch()` :param groupname: group to load the correlations from :param fname: file to load correlations from :param fname_stretch: file for writing results :param outkey: key to write stretch results to :param filter: filter correlations before stretching (bandpass, tuple with min and max frequency) :param \*\*kwargs: all other kwargs are passed to `~yam.stretch.stretch()` function """ with h5py.File(fname, 'r') as f: traces = [obspyh5.dataset2trace(f[g]) for g in groupnames] stream = obspy.Stream(traces) for tr in stream: tr.data = np.require(tr.data, float) if filter: _filter(stream, filter) stretchres = yam.stretch.stretch(stream, **kwargs) if stretchres is not None: stretchres['attrs']['key'] = outkey return stretchres
def iter_h5_stream(src_file, headonly=False): """ Iterate over hdf5 file containing streams in obspyh5 format. :param src_file: Path to file to read :type src_file: str or pathlib.Path :param headonly: Only read trace stats, do not read actual time series data :type headonly: bool :yield: obspy.Stream containing traces for a single seismic event. """ assert is_obspyh5(src_file), '{} is not an obspyh5 file'.format(src_file) logger = logging.getLogger(__name__) fname = os.path.split(src_file)[-1] with h5py.File(src_file, mode='r') as h5f: root = h5f['/waveforms'] for seedid, station_grp in root.items(): logger.info('{}: Group {}'.format(fname, seedid)) num_events = len(station_grp) for i, (_src_event_time, event_grp) in enumerate(station_grp.items()): traces = [] for _trace_id, channel in event_grp.items(): traces.append(dataset2trace(channel, headonly=headonly)) # end for evid = traces[0].stats.event_id for tr in traces[1:]: assert tr.stats.event_id == evid # end for logger.info('Event {} ({}/{})'.format(evid, i + 1, num_events)) yield seedid, evid, obspy.Stream(traces)
def _stack_wrapper(groupnames, fname, outkey, **kwargs): """ Wrapper around `~yam.stack.stack()` :param groupnames: groups to load the correlations from :param fname: file to load correlations from :param outkey: key to write stacked correlations to :param \*\*kwargs: all other kwargs are passed to `~yam.stack.stack()` function """ with h5py.File(fname, 'r') as f: traces = [obspyh5.dataset2trace(f[g]) for g in groupnames] stream = obspy.Stream(traces) stack_stream = yam.stack.stack(stream, **kwargs) for tr in stack_stream: tr.stats.key = outkey return stack_stream
def __iter__(self): logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) logger.info("Scanning station groups from file {}".format( self.h5_filename)) with self._open_source_file() as f: wf_data = f['waveforms'] num_stations = len(wf_data) count = 0 event_count = 0 for station_id in wf_data: count += 1 logger.info("Station {} {}/{}".format(station_id, count, num_stations)) station_data = wf_data[station_id] station_stream3c = [] for event_time in station_data: event_traces = station_data[event_time] # if len(event_traces) != self.num_components: # logging.warning("Incorrect number of traces ({}) for stn {} event {}, skipping" # .format(len(event_traces), station_id, event_time)) # continue traces = [] for trace_id in event_traces: trace = dataset2trace(event_traces[trace_id]) traces.append(trace) event_count += 1 station_stream3c.append(RFStream(traces=traces).sort()) # end for # Yield the results with 3-channel trace triplets grouped together in RFStream instances. yield station_id, station_stream3c # end for # end with logger.info("Yielded {} event traces to process".format(event_count))
def __iter__(self): logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) logger.info("Scanning jobs metadata from file {}".format(self.h5_filename)) with self._open_source_file() as f: wf_data = f['waveforms'] num_stations = len(wf_data) count = 0 event_count = 0 create_event_id = False first_loop = True for station_id in wf_data: count += 1 logger.info("Station {} {}/{}".format(station_id, count, num_stations)) station_data = wf_data[station_id] for event_time in station_data: event_traces = station_data[event_time] if not event_traces: continue if first_loop: first_loop = False tmp = list(event_traces.keys())[0] create_event_id = ('event_id' not in event_traces[tmp].attrs) traces = [] for trace_id in event_traces: trace = dataset2trace(event_traces[trace_id]) traces.append(trace) stream = RFStream(traces=traces) if len(stream) != self.num_components and self.channel_pattern is not None: for ch_mask in self.channel_pattern.split(','): _stream = stream.select(channel=ch_mask) logging.info("Tried channel mask {}, got {} channels".format(ch_mask, len(_stream))) if len(_stream) == self.num_components: stream = _stream break # end for # end if if len(stream) != self.num_components: logging.warning("Incorrect number of traces ({}) for stn {} event {}, skipping" .format(len(stream), station_id, event_time)) continue # end if # Force order of traces to ZNE ordering. stream.traces = sorted(stream.traces, key=zne_order) # Strongly assert expected ordering of traces. This must be respected so that # RF normalization works properly. assert stream.traces[0].stats.channel[-1] == 'Z' assert stream.traces[1].stats.channel[-1] == 'N' assert stream.traces[2].stats.channel[-1] == 'E' event_count += 1 if create_event_id: event_id = event_count else: event_id = traces[0].stats.event_id assert np.all([(tr.stats.event_id == event_id) for tr in traces]) # end if yield station_id, event_id, event_time, stream # end for # end for # end with logger.info("Yielded {} event traces to process".format(event_count))