Esempio n. 1
0
def stack(stream, length=None, move=None):
    """
    Stack traces in stream by correlation id

    :param stream: |Stream| object with correlations
    :param length: time span of one trace in the stack in seconds
        (alternatively a string consisting of a number and a unit
        -- ``'d'`` for days and ``'h'`` for hours -- can be specified,
        i.e. ``'3d'`` stacks together all traces inside a three days time
        window, default: None, which stacks together all traces)
    :param move: define a moving stack, float or string,
        default: None -- no moving stack,
        if specified move usually is smaller than length to get an overlap
        in the stacked traces
    :return: |Stream| object with stacked correlations
    """
    stream.sort()
    stream_stack = obspy.Stream()
    ids = {_corr_id(tr) for tr in stream}
    ids.discard(None)
    for id_ in ids:
        traces = [tr for tr in stream if _corr_id(tr) == id_]
        if length is None:
            data = np.mean([tr.data for tr in traces], dtype=float, axis=0)
            tr_stack = obspy.Trace(data, header=traces[0].stats)
            tr_stack.stats.key = tr_stack.stats.key + '_s'
            if 'num' in traces[0].stats:
                tr_stack.stats.num = sum(tr.stats.num for tr in traces)
            else:
                tr_stack.stats.num = len(traces)
            stream_stack.append(tr_stack)
        else:
            t1 = traces[0].stats.starttime
            lensec = _time2sec(length)
            movesec = _time2sec(move) if move else lensec
            if (lensec % (24 * 3600) == 0
                    or isinstance(length, str) and 'd' in length):
                t1 = UTC(t1.year, t1.month, t1.day)
            elif (lensec % 3600 == 0
                  or isinstance(length, str) and 'm' in length):
                t1 = UTC(t1.year, t1.month, t1.day, t1.hour)
            t2 = max(t1, traces[-1].stats.endtime - lensec)
            for t in IterTime(t1, t2, dt=movesec):
                sel = [
                    tr for tr in traces
                    if -0.1 <= tr.stats.starttime - t <= lensec + 0.1
                ]
                if len(sel) == 0:
                    continue
                data = np.mean([tr.data for tr in sel], dtype=float, axis=0)
                tr_stack = obspy.Trace(data, header=sel[0].stats)
                key_add = '_s%s' % length + (move is not None) * ('m%s' % move)
                tr_stack.stats.key = tr_stack.stats.key + key_add
                tr_stack.stats.starttime = t
                if 'num' in traces[0].stats:
                    tr_stack.stats.num = sum(tr.stats.num for tr in sel)
                else:
                    tr_stack.stats.num = len(sel)
                stream_stack.append(tr_stack)
    return stream_stack
Esempio n. 2
0
def _slide_and_correlate_traces(day, next_day, length, overlap, discard,
                                max_lag, outkey, task):
    """Helper function for parallel correlating"""
    tr1, tr2, dist, azi, baz = task
    xstream = obspy.Stream()
    for t1 in IterTime(day, next_day - length + overlap, dt=length - overlap):
        sub = obspy.Stream([tr1, tr2]).slice(t1, t1 + length)
        if len(sub) < 2:
            continue
        st = [tr.stats.starttime for tr in sub]
        et = [tr.stats.endtime for tr in sub]
        if max(st) > min(et):  # this should not happen
            continue
        sub.trim(max(st), min(et))
        if discard:
            avail = min(
                (tr.data.count() if hasattr(tr.data, 'count') else len(tr)) /
                tr.stats.sampling_rate / length for tr in sub)
            if avail < discard:
                msg = ('discard trace combination %s-%s for time %s '
                       '(availability %.1f%% < %.1f%% desired)')
                log.debug(msg, sub[0].id, sub[1].id,
                          str(max(st))[:19], 100 * avail, 100 * discard)
                continue
        for tr in sub:
            _fill_array(tr.data, fill_value=0.)
            tr.data = np.ma.getdata(tr.data)
        xtr = correlate_traces(sub[0], sub[1], max_lag)
        xtr.stats.starttime = t1
        xtr.stats.key = outkey
        xtr.stats.dist = dist
        xtr.stats.azi = azi
        xtr.stats.baz = baz
        xstream += xtr
    return xstream
Esempio n. 3
0
def _slide_and_correlate_traces(day, next_day, length, overlap, discard,
                                max_lag, outkey, demean_window, task):
    """Helper function for parallel correlating"""
    tr1, tr2, dist, azi, baz = task
    sr = tr1.stats.sampling_rate
    sr2 = tr2.stats.sampling_rate
    if sr != sr2:
        msg = 'Traces have different sampling rate (%s != %s)' % (sr, sr2)
        raise ValueError(msg)
    xstream = obspy.Stream()
    for t1 in IterTime(day, next_day - length + overlap, dt=length - overlap):
        sub = obspy.Stream([tr1, tr2]).slice(t1, t1 + length)
        if len(sub) < 2:
            continue
        st = [tr.stats.starttime for tr in sub]
        et = [tr.stats.endtime for tr in sub]
        if max(st) > min(et):  # this should not happen
            continue
        sub.trim(max(st), min(et))
        _make_same_length(sub[0], sub[1])
        avail = min(
            (tr.data.count() if hasattr(tr.data, 'count') else len(tr)) / sr /
            length for tr in sub)
        if discard is not None and avail < discard:
            msg = ('discard trace combination %s-%s for time %s '
                   '(availability %.1f%% < %.1f%% desired)')
            log.debug(msg, sub[0].id, sub[1].id,
                      str(max(st))[:19], 100 * avail, 100 * discard)
            continue
        for tr in sub:
            _fill_array(tr.data, fill_value=0)
            tr.data = np.ma.getdata(tr.data)
        xtr = correlate_traces(sub[0], sub[1], max_lag, demean=demean_window)
        xtr.stats.starttime = t1
        xtr.stats.key = outkey
        xtr.stats.dist = dist
        xtr.stats.azi = azi
        xtr.stats.baz = baz
        xtr.stats.avail = avail
        xstream += xtr
    return xstream
Esempio n. 4
0
def start_correlate(io,
                    filter_inventory=None,
                    startdate='1990-01-01', enddate='2020-01-01',
                    njobs=None,
                    parallel_inner_loop=False,
                    keep_correlations=False,
                    stack='1d',
                    dataset_kwargs=None,
                    **kwargs):
    """
    Start correlation

    :param io: |io|
    :param filter_inventory: filter inventory with its select method,
        specified dict is passed to |Inventory.select|
    :param str startdate,enddate: start and end date as strings
    : param njobs: number of cores to use for computation, days are computed
        parallel, this might consume much memory, default: None -- use all
        available cores
    :param parallel_inner_loop: Run inner loops parallel instead of outer loop
        (preproccessing of different stations and correlation of different
        pairs versus processing of different days).
        Useful for a datset with many stations.
    :param dtype: data type for storing correlations
        (default: float16 - half precision)
    :param dataset_kwargs: options passed to obspyh5 resp. h5py when creating
         a new dataset,
         e.g. `dataset_kwargs={'compression':'gzip'}`.
         See create_dataset in h5py for more options.
         By default the dtype is set to `'float16'`.
    :param keep_correlations,stack,\*\*kwargs: all other kwargs are passed to
        `~yam.correlate.correlate()` function
    """
    if dataset_kwargs is None:
        dataset_kwargs = {}
    if filter_inventory:
        log.debug('filter inventory')
        io['inventory'] = io['inventory'].select(**filter_inventory)
    log.info('start preprocessing and correlation')
    tasks = [str(t)[:10] for t in IterTime(UTC(startdate), UTC(enddate))]
    done_tasks = None
    if stack is not None:
        key2 = kwargs['outkey'] + '_s' + stack
        done_tasks = [t[-16:-6] for t in _get_existent(io['stack'], key2, 4)]
    if keep_correlations:
        key2 = kwargs['outkey']
        done_tasks2 = [t[-16:-6] for t in _get_existent(io['corr'], key2, 4)]
        if done_tasks is None:
            done_tasks = done_tasks2
        else:
            done_tasks = [t for t in done_tasks if t in done_tasks2]
    tasks = _todo_tasks(tasks, done_tasks)
    tasks = [UTC(t) for t in tasks]
    kwargs.update({'keep_correlations': keep_correlations, 'stack': stack})

    dir_corr = 'correlation'
    if os.path.exists(dir_corr):
        shutil.rmtree(dir_corr)
    os.makedirs(dir_corr)

    if parallel_inner_loop:
        kwargs['njobs'] = njobs
        njobs = 1
    do_work = functools.partial(correlate, io, **kwargs)
    if njobs == 1:
        log.info('do work sequentially')
        for task in tqdm.tqdm(tasks, total=len(tasks)):
            result = do_work(task)
            # _write_corr(result, io, **dataset_kwargs)
    else:
        pool = multiprocessing.Pool(njobs)
        log.info('do work parallel (%d cores)', pool._processes)
        for result in tqdm.tqdm(pool.imap_unordered(do_work, tasks),
                                total=len(tasks)):
            continue
            # _write_corr(result, io, **dataset_kwargs)
        pool.close()
        pool.join()

    log.info('finished preprocessing and correlation')