Exemple #1
0
def _iter_h5(io, key, level=3):
    """Iterator yielding streams or stretching results, depending on key"""
    is_stretch = 't' in _analyze_key(key)
    fname = _get_fname(io, key)
    iter_ = _iter_dicts if is_stretch else _iter_streams
    for obj in iter_(fname, key, level=level):
        yield obj
Exemple #2
0
def plot(io, key, plottype=None, seedid=None, day=None, prep_kw={},
         corrid=None, show=False,
         **kwargs):
    """
    Plot everything

    :param io: |io|
    :param key: key of objects to plot, or one of stations, data, prepdata
    :param plottype: plot type to use
        (non default values are ``'vs_dist'`` and ``'wiggle'`` for
        correlation plots, ``'velocity'`` for plots of stretching results)
    :param seedid: seed id of a  channel (for data or prepdata)
    :param day: |UTC| object with day (for data or prepdata)
    :param dict prep_kw: options passed to preprocess (for prepdata only)
    :param corrid: correlation configuration (for prepdata only)
    :param show: show interactive plot
    :param \*\*kwargs: all other kwargs are passed to
        the corresponding plot function in `~yam.imaging` module
    """
    import yam.imaging
    path = io['plot']
    if not os.path.exists(path):
        os.mkdir(path)
    if key in ('stations', 'data', 'prepdata'):
        pt = key
    else:
        is_corr = 't' not in _analyze_key(key)
        if is_corr and plottype == 'vs_dist':
            pt = 'corr_vs_dist'
        elif is_corr and plottype == 'wiggle':
            pt = 'corr_vs_time_wiggle'
        elif is_corr and plottype is None:
            pt = 'corr_vs_time'
        elif not is_corr and plottype is None:
            pt = 'sim_mat'
        elif not is_corr and plottype == 'velocity':
            pt = 'velocity_change'
        else:
            raise ParseError('Combination of key and plottype not supported')

    kw = kwargs.get('plot_%s_options' % pt, {})
    kw.update(kwargs.get('plot_options', {}))
    bname = os.path.join(path, pt)
    if key == 'stations':
        yam.imaging.plot_stations(io['inventory'], bname, **kw)
    elif key in ('data', 'prepdata'):
        data = load(io, key, do='return', seedid=seedid, day=day,
                    prep_kw=prep_kw)
        fname = bname + '_%s_%s' % (seedid, day)
        if key == 'prepdata':
            fname = fname + '_c' + corrid
        yam.imaging.plot_data(data, fname, show=show, **kw)
    else:
        plot_ = getattr(yam.imaging, 'plot_' + pt)
        if pt == 'corr_vs_dist':
            fname2 = _get_fname(io, key)
            stream = obspy.read(fname2, 'H5', group=key)
            fname = bname + '_' + key.replace('/', '_')
            plot_(stream, fname, **kw)
        elif pt == 'velocity_change':
            results = [res for task, res in _iter_h5(io, key)]
            fname = bname + '_' + key.replace('/', '_')
            plot_(results, fname, **kw)
        else:
            for task, res in _iter_h5(io, key):
                fname = bname + task.replace('/', '_')
                plot_(res, fname, **kw)
    if show:
        from matplotlib.pyplot import show
        show()
Exemple #3
0
def load(io, key, seedid=None, day=None, do='return', prep_kw={},
         fname=None, format=None):
    """
    Load object and do something with it

    :param io: io
    :param key: key of object to load
        (key inside HDF5 file, or one of data, prepdata, stations)
    :param seedid: seed id of a  channel (for data or prepdata)
    :param day: |UTC| object with day (for data or prepdata)
    :param do: specifies what to do with the object, default is ``'return'``
        which simply returns the object, other possible values are
        ``'print'`` -- print object (used by print command),
        ``'load'`` -- load object in IPython session (used by load command),
        ``'export'`` -- export correlations to different file format
        (used by export command)
    :param dict prep_kw: options passed to preprocess (for prepdata only)
    :param fname: file name (for export command)
    :param format: target format (for export command)
    """
    if key == 'stations':
        obj = io['inventory']
    elif key in ('data', 'prepdata'):
        if seedid is None or day is None:
            msg = 'seedid and day must be given for data or prepdata'
            raise ParseError(msg)
        if key == 'prepdata':
            prep_keys = ('remove_response', 'remove_response_options',
                         'demean', 'filter', 'normalization',
                         'time_norm_options', 'spectral_whitening_options',
                         'tolerance_shift',
                         'downsample')
            prep_kw = {k: prep_kw.get(k) for k in prep_keys}
        obj = _load_data(seedid, day, io['data'], io.get('data_format'),
                         key, inventory=io['inventory'], **prep_kw)
    else:
        is_stretch = 't' in _analyze_key(key)
        fname_in = _get_fname(io, key)
        if is_stretch:
            obj = read_dicts(fname_in, key)
            if do == 'print':
                obj = '\n\n'.join(str(o) for o in obj)
        else:
            obj = obspy.read(fname_in, 'H5', group=key, headonly=do == 'print')
            if do == 'print':
                obj = obj.__str__(extended=True)
    if do == 'print':
        print(obj)
    elif do == 'load':
        _start_ipy(obj)
    elif do == 'return':
        return obj
    elif do == 'export':
        print('export', obj)
        if format == 'H5':
            obspyh5.set_index()
        obj.write(fname, format)
        if format == 'H5':
            from yam.io import INDEX
            obspyh5.set_index(INDEX)
    else:
        raise
Exemple #4
0
def info(io, key=None, subkey='', config=None, **unused_kwargs):
    """
    Print information about yam project

    :param io: |io|
    :param key: key to print infos about
        (key inside HDF5 file, or one of data, stations,
        default: None -- print overview)
    :param subkey: only print part of the HDF5 file
    :param config: list of configuration dictionaries
    """
    print2 = _get_print2()
    data_plugin = io.get('data_plugin')
    if key is None:
        print('Stations:')
        inventory = io['inventory']
        if inventory is None:
            print2('Not found')
        else:
            stations = inventory.get_contents()['stations']
            channels = inventory.get_contents()['channels']
            print2(' '.join(st.strip().split()[0] for st in stations))
            print2('%d stations, %d channels' % (len(stations), len(channels)))
        if data_plugin:
            print('Data plugin:')
            print2('%s' % data_plugin)
        else:
            print('Raw data (expression for day files):')
            print2(io['data'])
            print2('%d files found' % len(_get_data_files(io['data'])))
        print('Config ids:')

        def get_keys(d):
            if d is None or len(d) == 0:
                return 'None'
            else:
                return ', '.join(sorted(d.keys()))
        print2('c Corr: ' + get_keys(config[0]))
        print2('s Stack: ' + get_keys(config[1]))
        print2('t Stretch: ' + get_keys(config[2]))
        print('Correlations (channel combinations, correlations calculated):')
        _print_info_helper('corr', io)
        print('Stacks:')
        _print_info_helper('stack', io)
        print('Stretching matrices:')
        _print_info_helper('tstretch', io)
    elif key == 'stations':
        print(io['inventory'])
    elif key == 'data':
        if data_plugin:
            print('Data plugin:')
            print2('%s' % data_plugin)
        else:
            print('Raw data (expression for day files):')
            print2(io['data'])
            fnames = _get_data_files(io['data'])
            print2('%d files found' % len(fnames))
            for fname in sorted(fnames):
                print2(fname)
    else:
        is_stretch = 't' in _analyze_key(key)
        fname = _get_fname(io, key)
        level = 3 if is_stretch else 4
        for line in _get_existent(fname, key + subkey, level):
            print2(line)
Exemple #5
0
def start_stack(io, key, outkey, subkey='', njobs=None,
                starttime=None, endtime=None,
                dataset_kwargs=None,
                **kwargs):
    """
    Start stacking

    :param io: |io|
    :param key:  key to load correlations from
    :param outkey: key to write stacked correlations to
    :param subkey: only use a part of the correlations
    :param njobs: number of cores to use for computation,
        default: None -- use all available cores
    :param starttime,endtime: constrain start and end dates
    :param dataset_kwargs: options passed to obspyh5 resp. h5py when creating
         a new dataset,
         e.g. `dataset_kwargs={'compression':'gzip'}`.
         See create_dataset in h5py for more options.
         By default the dtype is set to `'float16'`.
    :param \*\*kwargs: all other kwargs are passed to
        `yam.stack.stack()` function
    """
    if dataset_kwargs is None:
        dataset_kwargs = {}
    dataset_kwargs.setdefault('dtype', 'float16')
    fname = io['stack'] if 's' in _analyze_key(key) else io['corr']
    tasks = _get_existent(fname, key + subkey, 3)
    done_tasks = [t.replace(outkey, key) for t in
                  _get_existent(io['stack'], outkey + subkey, 3)]
    tasks = _todo_tasks(tasks, done_tasks)
    length = kwargs.get('length')
    for task in tqdm.tqdm(tasks, total=len(tasks)):
        subtasks = [t for t in _get_existent(fname, task, 4) if
                    (starttime is None or t[-16:] >= starttime) and
                    (endtime is None or t[-16:] <= endtime)]
        if length is None and njobs != 1:
            step = 1000
            subtask_chunks = [tuple(subtasks[i:i + step]) for i in
                              range(0, len(subtasks), step)]
        else:
            subtask_chunks = [subtasks]
            # TODO: parallel stacking for arbitrary stack id
#            lensec = _time2sec(length)
#            if lensec >= 30 * 24 * 3600:
#                subtask_chunks = [subtasks]
#            else:
#                subtask_chunks = []
#                for i in range(0, len(subtasks), step):
#                    chunk = subtasks[i:i + step]
#                    t1 = UTC(subtasks[i + step - 1][-16:])
#                    j = 0
#                    while i + step + j < len(subtasks):
#                        t2 = UTC(subtasks[i + step + j][-16:])
#                        # assume lensec is always larger than movesec
#                        # not ideal, may load to much data
#                        # eg for stack over 1 year
#                        if t2 - t1 <= lensec:
#                            chunk.append(subtasks[i + step + j])
#                        else:
#                            break
#                        j += 1
#                    subtask_chunks.append(chunk)
        do_work = functools.partial(_stack_wrapper, fname=fname, outkey=outkey,
                                    **kwargs)
        results = []
        if njobs == 1 or len(subtask_chunks) == 1:
            log.debug('do work sequentially')
            for stask in tqdm.tqdm(subtask_chunks, total=len(subtask_chunks)):
                result = do_work(stask)
                results.append(result)
        else:
            pool = multiprocessing.Pool(njobs)
            log.debug('do work parallel (%d cores)', pool._processes)
            for result in tqdm.tqdm(
                    pool.imap(do_work, subtask_chunks),
                    total=len(subtask_chunks)):
                results.append(result)
            pool.close()
            pool.join()
        if length is None:
            for stream in results:
                assert len(stream) <= 1
            traces = [tr for stream in results for tr in stream]
            num = sum(tr.stats.num for tr in traces)
            data = np.sum([tr.data * (tr.stats.num / num) for tr in traces],
                          axis=0)
            tr_stack = obspy.Trace(data, header=traces[0].stats)
            tr_stack.stats.num = num
            tr_stack.write(io['stack'], 'H5', mode='a', **dataset_kwargs)
        else:
            for stack_stream in results:
                stack_stream.write(io['stack'], 'H5', mode='a',
                                   **dataset_kwargs)