def stack(stream, length=None, move=None): """ Stack traces in stream by correlation id :param stream: |Stream| object with correlations :param length: time span of one trace in the stack in seconds (alternatively a string consisting of a number and a unit -- ``'d'`` for days and ``'h'`` for hours -- can be specified, i.e. ``'3d'`` stacks together all traces inside a three days time window, default: None, which stacks together all traces) :param move: define a moving stack, float or string, default: None -- no moving stack, if specified move usually is smaller than length to get an overlap in the stacked traces :return: |Stream| object with stacked correlations """ stream.sort() stream_stack = obspy.Stream() ids = {_corr_id(tr) for tr in stream} ids.discard(None) for id_ in ids: traces = [tr for tr in stream if _corr_id(tr) == id_] if length is None: data = np.mean([tr.data for tr in traces], dtype=float, axis=0) tr_stack = obspy.Trace(data, header=traces[0].stats) tr_stack.stats.key = tr_stack.stats.key + '_s' if 'num' in traces[0].stats: tr_stack.stats.num = sum(tr.stats.num for tr in traces) else: tr_stack.stats.num = len(traces) stream_stack.append(tr_stack) else: t1 = traces[0].stats.starttime lensec = _time2sec(length) movesec = _time2sec(move) if move else lensec if (lensec % (24 * 3600) == 0 or isinstance(length, str) and 'd' in length): t1 = UTC(t1.year, t1.month, t1.day) elif (lensec % 3600 == 0 or isinstance(length, str) and 'm' in length): t1 = UTC(t1.year, t1.month, t1.day, t1.hour) t2 = max(t1, traces[-1].stats.endtime - lensec) for t in IterTime(t1, t2, dt=movesec): sel = [ tr for tr in traces if -0.1 <= tr.stats.starttime - t <= lensec + 0.1 ] if len(sel) == 0: continue data = np.mean([tr.data for tr in sel], dtype=float, axis=0) tr_stack = obspy.Trace(data, header=sel[0].stats) key_add = '_s%s' % length + (move is not None) * ('m%s' % move) tr_stack.stats.key = tr_stack.stats.key + key_add tr_stack.stats.starttime = t if 'num' in traces[0].stats: tr_stack.stats.num = sum(tr.stats.num for tr in sel) else: tr_stack.stats.num = len(sel) stream_stack.append(tr_stack) return stream_stack
def _slide_and_correlate_traces(day, next_day, length, overlap, discard, max_lag, outkey, task): """Helper function for parallel correlating""" tr1, tr2, dist, azi, baz = task xstream = obspy.Stream() for t1 in IterTime(day, next_day - length + overlap, dt=length - overlap): sub = obspy.Stream([tr1, tr2]).slice(t1, t1 + length) if len(sub) < 2: continue st = [tr.stats.starttime for tr in sub] et = [tr.stats.endtime for tr in sub] if max(st) > min(et): # this should not happen continue sub.trim(max(st), min(et)) if discard: avail = min( (tr.data.count() if hasattr(tr.data, 'count') else len(tr)) / tr.stats.sampling_rate / length for tr in sub) if avail < discard: msg = ('discard trace combination %s-%s for time %s ' '(availability %.1f%% < %.1f%% desired)') log.debug(msg, sub[0].id, sub[1].id, str(max(st))[:19], 100 * avail, 100 * discard) continue for tr in sub: _fill_array(tr.data, fill_value=0.) tr.data = np.ma.getdata(tr.data) xtr = correlate_traces(sub[0], sub[1], max_lag) xtr.stats.starttime = t1 xtr.stats.key = outkey xtr.stats.dist = dist xtr.stats.azi = azi xtr.stats.baz = baz xstream += xtr return xstream
def _slide_and_correlate_traces(day, next_day, length, overlap, discard, max_lag, outkey, demean_window, task): """Helper function for parallel correlating""" tr1, tr2, dist, azi, baz = task sr = tr1.stats.sampling_rate sr2 = tr2.stats.sampling_rate if sr != sr2: msg = 'Traces have different sampling rate (%s != %s)' % (sr, sr2) raise ValueError(msg) xstream = obspy.Stream() for t1 in IterTime(day, next_day - length + overlap, dt=length - overlap): sub = obspy.Stream([tr1, tr2]).slice(t1, t1 + length) if len(sub) < 2: continue st = [tr.stats.starttime for tr in sub] et = [tr.stats.endtime for tr in sub] if max(st) > min(et): # this should not happen continue sub.trim(max(st), min(et)) _make_same_length(sub[0], sub[1]) avail = min( (tr.data.count() if hasattr(tr.data, 'count') else len(tr)) / sr / length for tr in sub) if discard is not None and avail < discard: msg = ('discard trace combination %s-%s for time %s ' '(availability %.1f%% < %.1f%% desired)') log.debug(msg, sub[0].id, sub[1].id, str(max(st))[:19], 100 * avail, 100 * discard) continue for tr in sub: _fill_array(tr.data, fill_value=0) tr.data = np.ma.getdata(tr.data) xtr = correlate_traces(sub[0], sub[1], max_lag, demean=demean_window) xtr.stats.starttime = t1 xtr.stats.key = outkey xtr.stats.dist = dist xtr.stats.azi = azi xtr.stats.baz = baz xtr.stats.avail = avail xstream += xtr return xstream
def start_correlate(io, filter_inventory=None, startdate='1990-01-01', enddate='2020-01-01', njobs=None, parallel_inner_loop=False, keep_correlations=False, stack='1d', dataset_kwargs=None, **kwargs): """ Start correlation :param io: |io| :param filter_inventory: filter inventory with its select method, specified dict is passed to |Inventory.select| :param str startdate,enddate: start and end date as strings : param njobs: number of cores to use for computation, days are computed parallel, this might consume much memory, default: None -- use all available cores :param parallel_inner_loop: Run inner loops parallel instead of outer loop (preproccessing of different stations and correlation of different pairs versus processing of different days). Useful for a datset with many stations. :param dtype: data type for storing correlations (default: float16 - half precision) :param dataset_kwargs: options passed to obspyh5 resp. h5py when creating a new dataset, e.g. `dataset_kwargs={'compression':'gzip'}`. See create_dataset in h5py for more options. By default the dtype is set to `'float16'`. :param keep_correlations,stack,\*\*kwargs: all other kwargs are passed to `~yam.correlate.correlate()` function """ if dataset_kwargs is None: dataset_kwargs = {} if filter_inventory: log.debug('filter inventory') io['inventory'] = io['inventory'].select(**filter_inventory) log.info('start preprocessing and correlation') tasks = [str(t)[:10] for t in IterTime(UTC(startdate), UTC(enddate))] done_tasks = None if stack is not None: key2 = kwargs['outkey'] + '_s' + stack done_tasks = [t[-16:-6] for t in _get_existent(io['stack'], key2, 4)] if keep_correlations: key2 = kwargs['outkey'] done_tasks2 = [t[-16:-6] for t in _get_existent(io['corr'], key2, 4)] if done_tasks is None: done_tasks = done_tasks2 else: done_tasks = [t for t in done_tasks if t in done_tasks2] tasks = _todo_tasks(tasks, done_tasks) tasks = [UTC(t) for t in tasks] kwargs.update({'keep_correlations': keep_correlations, 'stack': stack}) dir_corr = 'correlation' if os.path.exists(dir_corr): shutil.rmtree(dir_corr) os.makedirs(dir_corr) if parallel_inner_loop: kwargs['njobs'] = njobs njobs = 1 do_work = functools.partial(correlate, io, **kwargs) if njobs == 1: log.info('do work sequentially') for task in tqdm.tqdm(tasks, total=len(tasks)): result = do_work(task) # _write_corr(result, io, **dataset_kwargs) else: pool = multiprocessing.Pool(njobs) log.info('do work parallel (%d cores)', pool._processes) for result in tqdm.tqdm(pool.imap_unordered(do_work, tasks), total=len(tasks)): continue # _write_corr(result, io, **dataset_kwargs) pool.close() pool.join() log.info('finished preprocessing and correlation')