Esempio n. 1
0
def _iter_h5(io, key, level=3):
    """Iterator yielding streams or stretching results, depending on key"""
    is_stretch = 't' in _analyze_key(key)
    fname = _get_fname(io, key)
    iter_ = _iter_dicts if is_stretch else _iter_streams
    for obj in iter_(fname, key, level=level):
        yield obj
Esempio n. 2
0
def remove(io, keys):
    """
    Remove one or several keys from HDF5 file

    :param io: |io|
    :param keys: list of keys to remove
    """
    for key in keys:
        if '/' in key and key.split('/', 1) != '':
            from warnings import warn
            warn('It is highly encouraged to delete only top level keys')
        fname = _get_fname(io, key)
        with h5py.File(fname, 'a') as f:
            del f[key]
Esempio n. 3
0
def _print_info_helper(key, io):
    print2 = _get_print2()
    is_stretch = key == 'tstretch'
    fname = _get_fname(io, key)
    keys = _get_existent(fname, '/', 1)  # 1, 3, 4
    if len(keys) == 0:
        print2('None')
    for key in sorted(keys):
        keys2 = _get_existent(fname, key, 3)
        subkey = key.split('/')[-1]
        if is_stretch:
            o = '%s: %d combs' % (subkey, len(keys2))
        else:
            keys3 = _get_existent(fname, key, 4)
            o = ('%s: %d combs, %d corrs' %
                 (subkey, len(keys2), len(keys3)))
        print2(o)
Esempio n. 4
0
def plot(io, key, plottype=None, seedid=None, day=None, prep_kw={},
         corrid=None, show=False,
         **kwargs):
    """
    Plot everything

    :param io: |io|
    :param key: key of objects to plot, or one of stations, data, prepdata
    :param plottype: plot type to use
        (non default values are ``'vs_dist'`` and ``'wiggle'`` for
        correlation plots, ``'velocity'`` for plots of stretching results)
    :param seedid: seed id of a  channel (for data or prepdata)
    :param day: |UTC| object with day (for data or prepdata)
    :param dict prep_kw: options passed to preprocess (for prepdata only)
    :param corrid: correlation configuration (for prepdata only)
    :param show: show interactive plot
    :param \*\*kwargs: all other kwargs are passed to
        the corresponding plot function in `~yam.imaging` module
    """
    import yam.imaging
    path = io['plot']
    if not os.path.exists(path):
        os.mkdir(path)
    if key in ('stations', 'data', 'prepdata'):
        pt = key
    else:
        is_corr = 't' not in _analyze_key(key)
        if is_corr and plottype == 'vs_dist':
            pt = 'corr_vs_dist'
        elif is_corr and plottype == 'wiggle':
            pt = 'corr_vs_time_wiggle'
        elif is_corr and plottype is None:
            pt = 'corr_vs_time'
        elif not is_corr and plottype is None:
            pt = 'sim_mat'
        elif not is_corr and plottype == 'velocity':
            pt = 'velocity_change'
        else:
            raise ParseError('Combination of key and plottype not supported')

    kw = kwargs.get('plot_%s_options' % pt, {})
    kw.update(kwargs.get('plot_options', {}))
    bname = os.path.join(path, pt)
    if key == 'stations':
        yam.imaging.plot_stations(io['inventory'], bname, **kw)
    elif key in ('data', 'prepdata'):
        data = load(io, key, do='return', seedid=seedid, day=day,
                    prep_kw=prep_kw)
        fname = bname + '_%s_%s' % (seedid, day)
        if key == 'prepdata':
            fname = fname + '_c' + corrid
        yam.imaging.plot_data(data, fname, show=show, **kw)
    else:
        plot_ = getattr(yam.imaging, 'plot_' + pt)
        if pt == 'corr_vs_dist':
            fname2 = _get_fname(io, key)
            stream = obspy.read(fname2, 'H5', group=key)
            fname = bname + '_' + key.replace('/', '_')
            plot_(stream, fname, **kw)
        elif pt == 'velocity_change':
            results = [res for task, res in _iter_h5(io, key)]
            fname = bname + '_' + key.replace('/', '_')
            plot_(results, fname, **kw)
        else:
            for task, res in _iter_h5(io, key):
                fname = bname + task.replace('/', '_')
                plot_(res, fname, **kw)
    if show:
        from matplotlib.pyplot import show
        show()
Esempio n. 5
0
def load(io, key, seedid=None, day=None, do='return', prep_kw={},
         fname=None, format=None):
    """
    Load object and do something with it

    :param io: io
    :param key: key of object to load
        (key inside HDF5 file, or one of data, prepdata, stations)
    :param seedid: seed id of a  channel (for data or prepdata)
    :param day: |UTC| object with day (for data or prepdata)
    :param do: specifies what to do with the object, default is ``'return'``
        which simply returns the object, other possible values are
        ``'print'`` -- print object (used by print command),
        ``'load'`` -- load object in IPython session (used by load command),
        ``'export'`` -- export correlations to different file format
        (used by export command)
    :param dict prep_kw: options passed to preprocess (for prepdata only)
    :param fname: file name (for export command)
    :param format: target format (for export command)
    """
    if key == 'stations':
        obj = io['inventory']
    elif key in ('data', 'prepdata'):
        if seedid is None or day is None:
            msg = 'seedid and day must be given for data or prepdata'
            raise ParseError(msg)
        if key == 'prepdata':
            prep_keys = ('remove_response', 'remove_response_options',
                         'demean', 'filter', 'normalization',
                         'time_norm_options', 'spectral_whitening_options',
                         'tolerance_shift',
                         'downsample')
            prep_kw = {k: prep_kw.get(k) for k in prep_keys}
        obj = _load_data(seedid, day, io['data'], io.get('data_format'),
                         key, inventory=io['inventory'], **prep_kw)
    else:
        is_stretch = 't' in _analyze_key(key)
        fname_in = _get_fname(io, key)
        if is_stretch:
            obj = read_dicts(fname_in, key)
            if do == 'print':
                obj = '\n\n'.join(str(o) for o in obj)
        else:
            obj = obspy.read(fname_in, 'H5', group=key, headonly=do == 'print')
            if do == 'print':
                obj = obj.__str__(extended=True)
    if do == 'print':
        print(obj)
    elif do == 'load':
        _start_ipy(obj)
    elif do == 'return':
        return obj
    elif do == 'export':
        print('export', obj)
        if format == 'H5':
            obspyh5.set_index()
        obj.write(fname, format)
        if format == 'H5':
            from yam.io import INDEX
            obspyh5.set_index(INDEX)
    else:
        raise
Esempio n. 6
0
def info(io, key=None, subkey='', config=None, **unused_kwargs):
    """
    Print information about yam project

    :param io: |io|
    :param key: key to print infos about
        (key inside HDF5 file, or one of data, stations,
        default: None -- print overview)
    :param subkey: only print part of the HDF5 file
    :param config: list of configuration dictionaries
    """
    print2 = _get_print2()
    data_plugin = io.get('data_plugin')
    if key is None:
        print('Stations:')
        inventory = io['inventory']
        if inventory is None:
            print2('Not found')
        else:
            stations = inventory.get_contents()['stations']
            channels = inventory.get_contents()['channels']
            print2(' '.join(st.strip().split()[0] for st in stations))
            print2('%d stations, %d channels' % (len(stations), len(channels)))
        if data_plugin:
            print('Data plugin:')
            print2('%s' % data_plugin)
        else:
            print('Raw data (expression for day files):')
            print2(io['data'])
            print2('%d files found' % len(_get_data_files(io['data'])))
        print('Config ids:')

        def get_keys(d):
            if d is None or len(d) == 0:
                return 'None'
            else:
                return ', '.join(sorted(d.keys()))
        print2('c Corr: ' + get_keys(config[0]))
        print2('s Stack: ' + get_keys(config[1]))
        print2('t Stretch: ' + get_keys(config[2]))
        print('Correlations (channel combinations, correlations calculated):')
        _print_info_helper('corr', io)
        print('Stacks:')
        _print_info_helper('stack', io)
        print('Stretching matrices:')
        _print_info_helper('tstretch', io)
    elif key == 'stations':
        print(io['inventory'])
    elif key == 'data':
        if data_plugin:
            print('Data plugin:')
            print2('%s' % data_plugin)
        else:
            print('Raw data (expression for day files):')
            print2(io['data'])
            fnames = _get_data_files(io['data'])
            print2('%d files found' % len(fnames))
            for fname in sorted(fnames):
                print2(fname)
    else:
        is_stretch = 't' in _analyze_key(key)
        fname = _get_fname(io, key)
        level = 3 if is_stretch else 4
        for line in _get_existent(fname, key + subkey, level):
            print2(line)
Esempio n. 7
0
def start_stretch(io, key, subkey='', njobs=None, reftrid=None,
                  starttime=None, endtime=None,
                  dataset_kwargs=None,
                  **kwargs):
    """
    Start stretching

    :param io: |io|
    :param key:  key to load correlations from
    :param subkey: only use a part of the correlations
    :param njobs: number of cores to use for computation,
        default: None -- use all available cores
    :param reftrid: Parallel processing is only possible when this parameter
        is specified. Key to load the reference trace from, e.g. `'c1_s'`,
        it can be created by a command similar to `yam stack c1 ''`.
    :param starttime,endtime: constrain start and end dates
    :param dataset_kwargs: options passed to obspyh5 resp. h5py when creating
         a new dataset,
         e.g. `dataset_kwargs={'compression':'gzip'}`.
         See create_dataset in h5py for more options.
         By default the dtype is set to `'float16'`.
    :param \*\*kwargs: all other kwargs are passed to
        `stretch_wrapper()` function
    """
    if dataset_kwargs is None:
        dataset_kwargs = {}
    fname = _get_fname(io, key)
    outkey = kwargs['outkey']
    tasks = _get_existent(fname, key + subkey, 3)
    done_tasks = [t.replace(outkey, key) for t in
                  _get_existent(io['stretch'], outkey + subkey, 3)]
    tasks = _todo_tasks(tasks, done_tasks)
    for task in tqdm.tqdm(tasks, total=len(tasks)):
        if reftrid is None:
            reftr = None
        else:
            fname_reftr = _get_fname(io, reftrid)
            group_reftr = task.replace(key, reftrid)
            reftr = obspy.read(fname_reftr, 'H5', group=group_reftr,
                               dtype=float)
            if len(reftr) != 1:
                raise NotImplementedError('Reference must be single trace')
            reftr = reftr[0]
        subtasks = [t for t in _get_existent(fname, task, 4) if
                    (starttime is None or t[-16:] >= starttime) and
                    (endtime is None or t[-16:] <= endtime)]
        if reftr is None:
            subtask_chunks = [tuple(subtasks)]
        else:
            step = 1000
            subtask_chunks = [tuple(subtasks[i:i + step]) for i in
                              range(0, len(subtasks), step)]
        do_work = functools.partial(_stretch_wrapper, fname=fname,
                                    reftr=reftr, **kwargs)
        results = []
        if njobs == 1 or len(subtask_chunks) == 1:
            log.debug('do work sequentially')
            for stask in tqdm.tqdm(subtask_chunks, total=len(subtask_chunks)):
                result = do_work(stask)
                results.append(result)
        else:
            pool = multiprocessing.Pool(njobs)
            log.debug('do work parallel (%d cores)', pool._processes)
            for result in tqdm.tqdm(
                    pool.imap(do_work, subtask_chunks),
                    total=len(subtask_chunks)):
                results.append(result)
            pool.close()
            pool.join()
        result = yam.stretch.join_dicts(results)
        if result is not None:
            write_dict(result, io['stretch'], **dataset_kwargs)