def check(self, show_progress=False): if show_progress: pbar = util.progressbar('checking store', self.config.nrecords) problems = 0 for i, args in enumerate(self.config.iter_nodes()): tr = self.get(args) if tr and not tr.is_zero: if not tr.begin_value == tr.data[0]: logger.warn('wrong begin value for trace at %s ' '(data corruption?)' % str(args)) problems += 1 if not tr.end_value == tr.data[-1]: logger.warn('wrong end value for trace at %s ' '(data corruption?)' % str(args)) problems += 1 if not num.all(num.isfinite(tr.data)): logger.warn('nans or infs in trace at %s' % str(args)) problems += 1 if show_progress: pbar.update(i+1) if show_progress: pbar.finish() return problems
def dump_waveforms(self, engine, sources, path, tmin=None, tmax=None, overwrite=False): path_waveforms = op.join(path, 'waveforms') # gf.store.remake_dir(path_waveforms, force=overwrite) path_traces = op.join( path_waveforms, '%(wmin_year)s', '%(wmin_month)s', '%(wmin_day)s', 'waveform_%(network)s_%(station)s_' + '%(location)s_%(channel)s_%(tmin)s_%(tmax)s.mseed') tmin_all, tmax_all = self.get_time_range(sources) tmin = tmin if tmin is not None else tmin_all tmax = tmax if tmax is not None else tmax_all tts = util.time_to_str tinc = self.tinc or self.get_useful_time_increment(engine, sources) tmin = math.floor(tmin / tinc) * tinc tmax = math.ceil(tmax / tinc) * tinc nwin = int(round((tmax - tmin) / tinc)) pbar = util.progressbar('Generating waveforms', nwin) for iwin in range(nwin): pbar.update(iwin) tmin_win = max(tmin, tmin + iwin * tinc) tmax_win = min(tmax, tmin + (iwin + 1) * tinc) if tmax_win <= tmin_win: continue trs = self.get_waveforms(engine, sources, tmin_win, tmax_win) try: io.save(trs, path_traces, additional=dict(wmin_year=tts(tmin_win, format='%Y'), wmin_month=tts(tmin_win, format='%m'), wmin_day=tts(tmin_win, format='%d'), wmin=tts(tmin_win, format='%Y-%m-%d_%H-%M-%S'), wmax_year=tts(tmax_win, format='%Y'), wmax_month=tts(tmax_win, format='%m'), wmax_day=tts(tmax_win, format='%d'), wmax=tts(tmax_win, format='%Y-%m-%d_%H-%M-%S')), overwrite=overwrite) except FileSaveError as e: logger.debug('Waveform exists %s' % e) pbar.finish() return [path_waveforms]
def __init__(self, label, n): self._label = label self._n = n self._bar = None if show_progress: self._bar = util.progressbar(label, self._n) if update_progress: update_progress(label, 0, self._n)
def dump_waveforms(self, engine, sources, path, tmin=None, tmax=None, overwrite=False): path_waveforms = op.join(path, 'waveforms') gf.store.remake_dir(path_waveforms, force=overwrite) path_traces = op.join( path_waveforms, '%(wmin_year)s', '%(wmin_month)s', '%(wmin_day)s', 'waveform_%(network)s_%(station)s_' + '%(location)s_%(channel)s_%(tmin)s_%(tmax)s.mseed') tmin_all, tmax_all = self.get_time_range(sources) tmin = tmin if tmin is not None else tmin_all tmax = tmax if tmax is not None else tmax_all tts = util.time_to_str tinc = self.tinc or self.get_useful_time_increment(engine, sources) tmin = math.floor(tmin / tinc) * tinc tmax = math.ceil(tmax / tinc) * tinc nwin = int(round((tmax - tmin) / tinc)) pbar = util.progressbar('Generating waveforms', nwin) for iwin in range(nwin): pbar.update(iwin) tmin_win = max(tmin, tmin + iwin*tinc) tmax_win = min(tmax, tmin + (iwin+1)*tinc) if tmax_win <= tmin_win: continue trs = self.get_waveforms(engine, sources, tmin_win, tmax_win) try: io.save( trs, path_traces, additional=dict( wmin_year=tts(tmin_win, format='%Y'), wmin_month=tts(tmin_win, format='%m'), wmin_day=tts(tmin_win, format='%d'), wmin=tts(tmin_win, format='%Y-%m-%d_%H-%M-%S'), wmax_year=tts(tmax_win, format='%Y'), wmax_month=tts(tmax_win, format='%m'), wmax_day=tts(tmax_win, format='%d'), wmax=tts(tmax_win, format='%Y-%m-%d_%H-%M-%S')), overwrite=overwrite) except FileSaveError as e: logger.debug('Waveform exists %s' % e) pbar.finish() return [path_waveforms]
def command_redeploy(args): parser, options, args = cl_parse('redeploy', args) if not len(args) == 2: parser.print_help() sys.exit(1) source_store_dir, dest_store_dir = args try: source = gf.Store(source_store_dir) except gf.StoreError as e: die(e) try: gf.store.Store.create_dependants(dest_store_dir) except gf.StoreError: pass try: dest = gf.Store(dest_store_dir, 'w') except gf.StoreError as e: die(e) show_progress = True if show_progress: pbar = util.progressbar('redeploying', dest.config.nrecords) for i, args in enumerate(dest.config.iter_nodes()): try: tr = source.get(args, interpolation='off') dest.put(args, tr) except (gf.meta.OutOfBounds, gf.store.NotAllowedToInterpolate) as e: logger.debug('skipping %s, (%s)' % (sindex(args), e)) except gf.store.StoreError as e: logger.warn('cannot insert %s, (%s)' % (sindex(args), e)) if show_progress: pbar.update(i + 1) if show_progress: pbar.finish()
def command_redeploy(args): parser, options, args = cl_parse('redeploy', args) if not len(args) == 2: parser.print_help() sys.exit(1) source_store_dir, dest_store_dir = args try: source = gf.Store(source_store_dir) except gf.StoreError as e: die(e) try: gf.store.Store.create_dependants(dest_store_dir) except gf.StoreError: pass try: dest = gf.Store(dest_store_dir, 'w') except gf.StoreError as e: die(e) show_progress = True if show_progress: pbar = util.progressbar('redeploying', dest.config.nrecords) for i, args in enumerate(dest.config.iter_nodes()): try: tr = source.get(args, interpolation='off') dest.put(args, tr) except (gf.meta.OutOfBounds, gf.store.NotAllowedToInterpolate) as e: logger.debug('skipping %s, (%s)' % (sindex(args), e)) except gf.store.StoreError as e: logger.warn('cannot insert %s, (%s)' % (sindex(args), e)) if show_progress: pbar.update(i+1) if show_progress: pbar.finish()
def chopper_grouped(self, gather, progress=None, *args, **kwargs): keys = self.gather_keys(gather) if len(keys) == 0: return outer_group_selector = None if 'group_selector' in kwargs: outer_group_selector = kwargs['group_selector'] outer_trace_selector = None if 'trace_selector' in kwargs: outer_trace_selector = kwargs['trace_selector'] # the use of this gather-cache makes it impossible to modify the pile # during chopping gather_cache = {} pbar = None if progress is not None: pbar = util.progressbar(progress, len(keys)) for ikey, key in enumerate(keys): def tsel(tr): return gather(tr) == key and (outer_trace_selector is None or outer_trace_selector(tr)) def gsel(gr): if gr not in gather_cache: gather_cache[gr] = gr.gather_keys(gather) return key in gather_cache[gr] and ( outer_group_selector is None or outer_group_selector(gr)) kwargs['trace_selector'] = tsel kwargs['group_selector'] = gsel for traces in self.chopper(*args, **kwargs): yield traces if pbar: pbar.update(ikey + 1) if pbar: pbar.finish()
def chopper_grouped(self, gather, progress=None, *args, **kwargs): keys = self.gather_keys(gather) if len(keys) == 0: return outer_group_selector = None if 'group_selector' in kwargs: outer_group_selector = kwargs['group_selector'] outer_trace_selector = None if 'trace_selector' in kwargs: outer_trace_selector = kwargs['trace_selector'] # the use of this gather-cache makes it impossible to modify the pile # during chopping gather_cache = {} pbar = None if progress is not None: pbar = util.progressbar(progress, len(keys)) for ikey, key in enumerate(keys): def tsel(tr): return gather(tr) == key and (outer_trace_selector is None or outer_trace_selector(tr)) def gsel(gr): if gr not in gather_cache: gather_cache[gr] = gr.gather_keys(gather) return key in gather_cache[gr] and ( outer_group_selector is None or outer_group_selector(gr)) kwargs['trace_selector'] = tsel kwargs['group_selector'] = gsel for traces in self.chopper(*args, **kwargs): yield traces if pbar: pbar.update(ikey+1) if pbar: pbar.finish()
def make_decimated(self, decimate, config=None, force=False, show_progress=False): '''Create decimated version of GF store. Create a downsampled version of the GF store. Downsampling is done for the integer factor `decimate` which should be in the range [2,8]. If `config` is ``None``, all traces of the GF store are decimated and held available (i.e. the index mapping of the original store is used), otherwise, a different spacial stepping can be specified by giving a modified GF store configuration in `config` (see :py:meth:`create`). Decimated GF sub-stores are created under the `decimated` subdirectory within the GF store directory. Holding available decimated versions of the GF store can save computation time, IO bandwidth, or decrease memory footprint at the cost of increased disk space usage, when computation are done for lower frequency signals. ''' if not self._f_index: self.open() if not (2 <= decimate <= 8): raise StoreError('decimate argument must be in the range [2,8]') assert self.mode == 'r' if config is None: config = self.config config = copy.deepcopy(config) config.sample_rate = self.config.sample_rate / decimate if decimate in self._decimated: del self._decimated[decimate] store_dir = self._decimated_store_dir(decimate) if os.path.exists(store_dir): if force: shutil.rmtree(store_dir) else: raise CannotCreate('store already exists at %s' % store_dir) store_dir_incomplete = store_dir + '-incomplete' Store.create(store_dir_incomplete, config, force=force) decimated = Store(store_dir_incomplete, 'w') if show_progress: pbar = util.progressbar('decimating store', self.config.nrecords) for i, args in enumerate(decimated.config.iter_nodes()): tr = self.get(args, decimate=decimate) decimated.put(args, tr) if show_progress: pbar.update(i+1) if show_progress: pbar.finish() decimated.close() shutil.move(store_dir_incomplete, store_dir) self._decimated[decimate] = None
def command_export(args): from subprocess import Popen, PIPE try: from tunguska import gfdb except ImportError as err: die('the kiwi tools must be installed to use this feature', err) def setup(parser): parser.add_option('--nchunks', dest='nchunks', type='int', default=1, metavar='N', help='split output gfdb into N chunks') parser, options, args = cl_parse('export', args, setup=setup) show_progress = True if len(args) not in (1, 2): parser.print_help() sys.exit(1) target_path = args.pop() if os.path.isdir(target_path): target_path = os.path.join(target_path, 'kiwi_gfdb') logger.warn('exported gfdb will be named as "%s.*"' % target_path) source_store_dir = get_store_dir(args) source = gf.Store(source_store_dir, 'r') config = source.config if not isinstance(config, gf.meta.ConfigTypeA): die('only stores of type A can be exported to Kiwi format') if os.path.isfile(target_path + '.index'): die('destation already exists') cmd = [ str(x) for x in [ 'gfdb_build', target_path, options.nchunks, config.ndistances, config.nsource_depths, config.ncomponents, config.deltat, config.distance_delta, config.source_depth_delta, config.distance_min, config.source_depth_min ] ] p = Popen(cmd, stdin=PIPE) p.communicate() out_db = gfdb.Gfdb(target_path) if show_progress: pbar = util.progressbar('exporting', config.nrecords / config.ncomponents) for i, (z, x) in enumerate(config.iter_nodes(level=-1)): data_out = [] for ig in range(config.ncomponents): try: tr = source.get((z, x, ig), interpolation='off') data_out.append((tr.t, tr.data * config.factor)) except gf.store.StoreError as e: logger.warn('cannot get %s, (%s)' % (sindex((z, x, ig)), e)) data_out.append(None) # put a zero valued sample to no-data zero-traces at a compatible time tmins = [ entry[0][0] for entry in data_out if entry is not None and entry[0].size != 0 ] if tmins: tmin = min(tmins) for entry in data_out: if entry is not None and entry[0].size == 0: entry[0].resize(1) entry[1].resize(1) entry[0][0] = tmin entry[1][0] = 0.0 out_db.put_traces_slow(x, z, data_out) if show_progress: pbar.update(i + 1) if show_progress: pbar.finish() source.close()
def command_import(args): try: from tunguska import gfdb except ImportError: die('the kiwi tools must be installed to use this feature') parser, options, args = cl_parse('import', args) show_progress = True if not len(args) == 2: parser.print_help() sys.exit(1) source_path, dest_store_dir = args if os.path.isdir(source_path): source_path = pjoin(source_path, 'db') source_path = re.sub(r'(\.\d+\.chunk|\.index)$', '', source_path) db = gfdb.Gfdb(source_path) config = gf.meta.ConfigTypeA(id='imported_gfs', distance_min=db.firstx, distance_max=db.firstx + (db.nx - 1) * db.dx, distance_delta=db.dx, source_depth_min=db.firstz, source_depth_max=db.firstz + (db.nz - 1) * db.dz, source_depth_delta=db.dz, sample_rate=1.0 / db.dt, ncomponents=db.ng) try: gf.store.Store.create(dest_store_dir, config=config) dest = gf.Store(dest_store_dir, 'w') if show_progress: pbar = util.progressbar( 'importing', dest.config.nrecords / dest.config.ncomponents) for i, args in enumerate(dest.config.iter_nodes(level=-1)): source_depth, distance = [float(x) for x in args] traces = db.get_traces_pyrocko(distance, source_depth) ig_to_trace = dict((tr.meta['ig'] - 1, tr) for tr in traces) for ig in range(db.ng): if ig in ig_to_trace: tr = ig_to_trace[ig] gf_tr = gf.store.GFTrace(tr.get_ydata(), int(round(tr.tmin / tr.deltat)), tr.deltat) else: gf_tr = gf.store.Zero dest.put((source_depth, distance, ig), gf_tr) if show_progress: pbar.update(i + 1) if show_progress: pbar.finish() dest.close() except gf.StoreError as e: die(e)
def command_export(args): from subprocess import Popen, PIPE try: from tunguska import gfdb except ImportError as err: die('the kiwi tools must be installed to use this feature', err) def setup(parser): parser.add_option( '--nchunks', dest='nchunks', type='int', default=1, metavar='N', help='split output gfdb into N chunks') parser, options, args = cl_parse('export', args, setup=setup) show_progress = True if len(args) not in (1, 2): parser.print_help() sys.exit(1) target_path = args.pop() if op.isdir(target_path): target_path = op.join(target_path, 'kiwi_gfdb') logger.warn('exported gfdb will be named as "%s.*"' % target_path) source_store_dir = get_store_dir(args) source = gf.Store(source_store_dir, 'r') config = source.config if not isinstance(config, gf.meta.ConfigTypeA): die('only stores of type A can be exported to Kiwi format') if op.isfile(target_path + '.index'): die('destation already exists') cmd = [str(x) for x in [ 'gfdb_build', target_path, options.nchunks, config.ndistances, config.nsource_depths, config.ncomponents, config.deltat, config.distance_delta, config.source_depth_delta, config.distance_min, config.source_depth_min]] p = Popen(cmd, stdin=PIPE) p.communicate() out_db = gfdb.Gfdb(target_path) if show_progress: pbar = util.progressbar( 'exporting', config.nrecords/config.ncomponents) for i, (z, x) in enumerate(config.iter_nodes(level=-1)): data_out = [] for ig in range(config.ncomponents): try: tr = source.get((z, x, ig), interpolation='off') data_out.append((tr.t, tr.data * config.factor)) except gf.store.StoreError as e: logger.warn('cannot get %s, (%s)' % (sindex((z, x, ig)), e)) data_out.append(None) # put a zero valued sample to no-data zero-traces at a compatible time tmins = [ entry[0][0] for entry in data_out if entry is not None and entry[0].size != 0] if tmins: tmin = min(tmins) for entry in data_out: if entry is not None and entry[0].size == 0: entry[0].resize(1) entry[1].resize(1) entry[0][0] = tmin entry[1][0] = 0.0 out_db.put_traces_slow(x, z, data_out) if show_progress: pbar.update(i+1) if show_progress: pbar.finish() source.close()
def command_import(args): try: from tunguska import gfdb except ImportError: die('the kiwi tools must be installed to use this feature') parser, options, args = cl_parse('import', args) show_progress = True if not len(args) == 2: parser.print_help() sys.exit(1) source_path, dest_store_dir = args if op.isdir(source_path): source_path = op.join(source_path, 'db') source_path = re.sub(r'(\.\d+\.chunk|\.index)$', '', source_path) db = gfdb.Gfdb(source_path) config = gf.meta.ConfigTypeA( id='imported_gfs', distance_min=db.firstx, distance_max=db.firstx + (db.nx-1) * db.dx, distance_delta=db.dx, source_depth_min=db.firstz, source_depth_max=db.firstz + (db.nz-1) * db.dz, source_depth_delta=db.dz, sample_rate=1.0/db.dt, ncomponents=db.ng ) try: gf.store.Store.create(dest_store_dir, config=config) dest = gf.Store(dest_store_dir, 'w') if show_progress: pbar = util.progressbar( 'importing', dest.config.nrecords/dest.config.ncomponents) for i, args in enumerate(dest.config.iter_nodes(level=-1)): source_depth, distance = [float(x) for x in args] traces = db.get_traces_pyrocko(distance, source_depth) ig_to_trace = dict((tr.meta['ig']-1, tr) for tr in traces) for ig in range(db.ng): if ig in ig_to_trace: tr = ig_to_trace[ig] gf_tr = gf.store.GFTrace( tr.get_ydata(), int(round(tr.tmin / tr.deltat)), tr.deltat) else: gf_tr = gf.store.Zero dest.put((source_depth, distance, ig), gf_tr) if show_progress: pbar.update(i+1) if show_progress: pbar.finish() dest.close() except gf.StoreError as e: die(e)
def ensure_waveforms(self, engine, sources, path, tmin=None, tmax=None): path_waveforms = op.join(path, 'waveforms') util.ensuredir(path_waveforms) p = self._get_pile(path_waveforms) nslc_ids = set(target.codes for target in self.get_targets()) def have_waveforms(tmin, tmax): trs_have = p.all( tmin=tmin, tmax=tmax, load_data=False, degap=False, trace_selector=lambda tr: tr.nslc_id in nslc_ids) return any(tr.data_len() > 0 for tr in trs_have) def add_files(paths): p.load_files(paths, fileformat='mseed', show_progress=False) path_traces = op.join( path_waveforms, '%(wmin_year)s', '%(wmin_month)s', '%(wmin_day)s', 'waveform_%(network)s_%(station)s_' + '%(location)s_%(channel)s_%(tmin)s_%(tmax)s.mseed') tmin_all, tmax_all = self.get_time_range(sources) tmin = tmin if tmin is not None else tmin_all tmax = tmax if tmax is not None else tmax_all tts = util.time_to_str tinc = self.tinc or self.get_useful_time_increment(engine, sources) tmin = math.floor(tmin / tinc) * tinc tmax = math.ceil(tmax / tinc) * tinc nwin = int(round((tmax - tmin) / tinc)) pbar = None for iwin in range(nwin): tmin_win = tmin + iwin*tinc tmax_win = tmin + (iwin+1)*tinc if have_waveforms(tmin_win, tmax_win): continue if pbar is None: pbar = util.progressbar('Generating waveforms', (nwin-iwin)) pbar.update(iwin) trs = self.get_waveforms(engine, sources, tmin_win, tmax_win) try: wpaths = io.save( trs, path_traces, additional=dict( wmin_year=tts(tmin_win, format='%Y'), wmin_month=tts(tmin_win, format='%m'), wmin_day=tts(tmin_win, format='%d'), wmin=tts(tmin_win, format='%Y-%m-%d_%H-%M-%S'), wmax_year=tts(tmax_win, format='%Y'), wmax_month=tts(tmax_win, format='%m'), wmax_day=tts(tmax_win, format='%d'), wmax=tts(tmax_win, format='%Y-%m-%d_%H-%M-%S'))) for wpath in wpaths: logger.debug('Generated file: %s' % wpath) add_files(wpaths) except FileSaveError as e: raise ScenarioError(str(e)) if pbar is not None: pbar.finish()