def _iter_h5(io, key, level=3): """Iterator yielding streams or stretching results, depending on key""" is_stretch = 't' in _analyze_key(key) fname = _get_fname(io, key) iter_ = _iter_dicts if is_stretch else _iter_streams for obj in iter_(fname, key, level=level): yield obj
def plot(io, key, plottype=None, seedid=None, day=None, prep_kw={}, corrid=None, show=False, **kwargs): """ Plot everything :param io: |io| :param key: key of objects to plot, or one of stations, data, prepdata :param plottype: plot type to use (non default values are ``'vs_dist'`` and ``'wiggle'`` for correlation plots, ``'velocity'`` for plots of stretching results) :param seedid: seed id of a channel (for data or prepdata) :param day: |UTC| object with day (for data or prepdata) :param dict prep_kw: options passed to preprocess (for prepdata only) :param corrid: correlation configuration (for prepdata only) :param show: show interactive plot :param \*\*kwargs: all other kwargs are passed to the corresponding plot function in `~yam.imaging` module """ import yam.imaging path = io['plot'] if not os.path.exists(path): os.mkdir(path) if key in ('stations', 'data', 'prepdata'): pt = key else: is_corr = 't' not in _analyze_key(key) if is_corr and plottype == 'vs_dist': pt = 'corr_vs_dist' elif is_corr and plottype == 'wiggle': pt = 'corr_vs_time_wiggle' elif is_corr and plottype is None: pt = 'corr_vs_time' elif not is_corr and plottype is None: pt = 'sim_mat' elif not is_corr and plottype == 'velocity': pt = 'velocity_change' else: raise ParseError('Combination of key and plottype not supported') kw = kwargs.get('plot_%s_options' % pt, {}) kw.update(kwargs.get('plot_options', {})) bname = os.path.join(path, pt) if key == 'stations': yam.imaging.plot_stations(io['inventory'], bname, **kw) elif key in ('data', 'prepdata'): data = load(io, key, do='return', seedid=seedid, day=day, prep_kw=prep_kw) fname = bname + '_%s_%s' % (seedid, day) if key == 'prepdata': fname = fname + '_c' + corrid yam.imaging.plot_data(data, fname, show=show, **kw) else: plot_ = getattr(yam.imaging, 'plot_' + pt) if pt == 'corr_vs_dist': fname2 = _get_fname(io, key) stream = obspy.read(fname2, 'H5', group=key) fname = bname + '_' + key.replace('/', '_') plot_(stream, fname, **kw) elif pt == 'velocity_change': results = [res for task, res in _iter_h5(io, key)] fname = bname + '_' + key.replace('/', '_') plot_(results, fname, **kw) else: for task, res in _iter_h5(io, key): fname = bname + task.replace('/', '_') plot_(res, fname, **kw) if show: from matplotlib.pyplot import show show()
def load(io, key, seedid=None, day=None, do='return', prep_kw={}, fname=None, format=None): """ Load object and do something with it :param io: io :param key: key of object to load (key inside HDF5 file, or one of data, prepdata, stations) :param seedid: seed id of a channel (for data or prepdata) :param day: |UTC| object with day (for data or prepdata) :param do: specifies what to do with the object, default is ``'return'`` which simply returns the object, other possible values are ``'print'`` -- print object (used by print command), ``'load'`` -- load object in IPython session (used by load command), ``'export'`` -- export correlations to different file format (used by export command) :param dict prep_kw: options passed to preprocess (for prepdata only) :param fname: file name (for export command) :param format: target format (for export command) """ if key == 'stations': obj = io['inventory'] elif key in ('data', 'prepdata'): if seedid is None or day is None: msg = 'seedid and day must be given for data or prepdata' raise ParseError(msg) if key == 'prepdata': prep_keys = ('remove_response', 'remove_response_options', 'demean', 'filter', 'normalization', 'time_norm_options', 'spectral_whitening_options', 'tolerance_shift', 'downsample') prep_kw = {k: prep_kw.get(k) for k in prep_keys} obj = _load_data(seedid, day, io['data'], io.get('data_format'), key, inventory=io['inventory'], **prep_kw) else: is_stretch = 't' in _analyze_key(key) fname_in = _get_fname(io, key) if is_stretch: obj = read_dicts(fname_in, key) if do == 'print': obj = '\n\n'.join(str(o) for o in obj) else: obj = obspy.read(fname_in, 'H5', group=key, headonly=do == 'print') if do == 'print': obj = obj.__str__(extended=True) if do == 'print': print(obj) elif do == 'load': _start_ipy(obj) elif do == 'return': return obj elif do == 'export': print('export', obj) if format == 'H5': obspyh5.set_index() obj.write(fname, format) if format == 'H5': from yam.io import INDEX obspyh5.set_index(INDEX) else: raise
def info(io, key=None, subkey='', config=None, **unused_kwargs): """ Print information about yam project :param io: |io| :param key: key to print infos about (key inside HDF5 file, or one of data, stations, default: None -- print overview) :param subkey: only print part of the HDF5 file :param config: list of configuration dictionaries """ print2 = _get_print2() data_plugin = io.get('data_plugin') if key is None: print('Stations:') inventory = io['inventory'] if inventory is None: print2('Not found') else: stations = inventory.get_contents()['stations'] channels = inventory.get_contents()['channels'] print2(' '.join(st.strip().split()[0] for st in stations)) print2('%d stations, %d channels' % (len(stations), len(channels))) if data_plugin: print('Data plugin:') print2('%s' % data_plugin) else: print('Raw data (expression for day files):') print2(io['data']) print2('%d files found' % len(_get_data_files(io['data']))) print('Config ids:') def get_keys(d): if d is None or len(d) == 0: return 'None' else: return ', '.join(sorted(d.keys())) print2('c Corr: ' + get_keys(config[0])) print2('s Stack: ' + get_keys(config[1])) print2('t Stretch: ' + get_keys(config[2])) print('Correlations (channel combinations, correlations calculated):') _print_info_helper('corr', io) print('Stacks:') _print_info_helper('stack', io) print('Stretching matrices:') _print_info_helper('tstretch', io) elif key == 'stations': print(io['inventory']) elif key == 'data': if data_plugin: print('Data plugin:') print2('%s' % data_plugin) else: print('Raw data (expression for day files):') print2(io['data']) fnames = _get_data_files(io['data']) print2('%d files found' % len(fnames)) for fname in sorted(fnames): print2(fname) else: is_stretch = 't' in _analyze_key(key) fname = _get_fname(io, key) level = 3 if is_stretch else 4 for line in _get_existent(fname, key + subkey, level): print2(line)
def start_stack(io, key, outkey, subkey='', njobs=None, starttime=None, endtime=None, dataset_kwargs=None, **kwargs): """ Start stacking :param io: |io| :param key: key to load correlations from :param outkey: key to write stacked correlations to :param subkey: only use a part of the correlations :param njobs: number of cores to use for computation, default: None -- use all available cores :param starttime,endtime: constrain start and end dates :param dataset_kwargs: options passed to obspyh5 resp. h5py when creating a new dataset, e.g. `dataset_kwargs={'compression':'gzip'}`. See create_dataset in h5py for more options. By default the dtype is set to `'float16'`. :param \*\*kwargs: all other kwargs are passed to `yam.stack.stack()` function """ if dataset_kwargs is None: dataset_kwargs = {} dataset_kwargs.setdefault('dtype', 'float16') fname = io['stack'] if 's' in _analyze_key(key) else io['corr'] tasks = _get_existent(fname, key + subkey, 3) done_tasks = [t.replace(outkey, key) for t in _get_existent(io['stack'], outkey + subkey, 3)] tasks = _todo_tasks(tasks, done_tasks) length = kwargs.get('length') for task in tqdm.tqdm(tasks, total=len(tasks)): subtasks = [t for t in _get_existent(fname, task, 4) if (starttime is None or t[-16:] >= starttime) and (endtime is None or t[-16:] <= endtime)] if length is None and njobs != 1: step = 1000 subtask_chunks = [tuple(subtasks[i:i + step]) for i in range(0, len(subtasks), step)] else: subtask_chunks = [subtasks] # TODO: parallel stacking for arbitrary stack id # lensec = _time2sec(length) # if lensec >= 30 * 24 * 3600: # subtask_chunks = [subtasks] # else: # subtask_chunks = [] # for i in range(0, len(subtasks), step): # chunk = subtasks[i:i + step] # t1 = UTC(subtasks[i + step - 1][-16:]) # j = 0 # while i + step + j < len(subtasks): # t2 = UTC(subtasks[i + step + j][-16:]) # # assume lensec is always larger than movesec # # not ideal, may load to much data # # eg for stack over 1 year # if t2 - t1 <= lensec: # chunk.append(subtasks[i + step + j]) # else: # break # j += 1 # subtask_chunks.append(chunk) do_work = functools.partial(_stack_wrapper, fname=fname, outkey=outkey, **kwargs) results = [] if njobs == 1 or len(subtask_chunks) == 1: log.debug('do work sequentially') for stask in tqdm.tqdm(subtask_chunks, total=len(subtask_chunks)): result = do_work(stask) results.append(result) else: pool = multiprocessing.Pool(njobs) log.debug('do work parallel (%d cores)', pool._processes) for result in tqdm.tqdm( pool.imap(do_work, subtask_chunks), total=len(subtask_chunks)): results.append(result) pool.close() pool.join() if length is None: for stream in results: assert len(stream) <= 1 traces = [tr for stream in results for tr in stream] num = sum(tr.stats.num for tr in traces) data = np.sum([tr.data * (tr.stats.num / num) for tr in traces], axis=0) tr_stack = obspy.Trace(data, header=traces[0].stats) tr_stack.stats.num = num tr_stack.write(io['stack'], 'H5', mode='a', **dataset_kwargs) else: for stack_stream in results: stack_stream.write(io['stack'], 'H5', mode='a', **dataset_kwargs)