def new_func(*args, **kwargs): runval = run(func.__name__) if isinstance(run, collections.Callable) else run if runval is False: return elif runval is True: pass elif runval is None: if status['fail']: return else: raise ValueError('run value = %s but must be True, False, or None' % runval) logger.verbose('') logger.verbose('-' * 60) logger.info(' Running task: %s at %s' % (func.__name__, time.ctime())) logger.verbose('-' * 60) try: func(*args, **kwargs) pyyaks.context.store_context(status.get('context_file')) except KeyboardInterrupt: raise except TaskSkip: pass except: if status['fail'] is False: logger.error('%s: %s\n\n' % (func.__name__, traceback.format_exc())) status['fail'] = True
def _sync_stat_archive(opt, msid_files, logger, content, stat, index_tbl): """ Actual worker for syncing the stat archive for ``content``. """ # Get the last row of data from the length of the TIME.col (or archfiles?) ft = fetch.ft ft['content'] = content ft['interval'] = stat stats_dir = Path(msid_files['statsdir'].abs) if not stats_dir.exists(): logger.debug(f'Skipping {stat} data for {content}: no directory') return logger.info('') logger.info(f'Processing {stat} data for {content}') # Get the MSIDs that are in client archive msids = [str(fn.name)[:-3] for fn in stats_dir.glob('*.h5')] if not msids: logger.debug(f'Skipping {stat} data for {content}: no stats h5 files') return else: logger.debug(f'Stat msids are {msids}') last_date_id, last_date_id_file = get_last_date_id( msid_files, msids, stat, logger) logger.verbose(f'Got {last_date_id} as last date_id that was applied to archive') # Get list of applicable dat objects (new data, before opt.date_stop). Also # return ``date_id`` which is the date_id of the final data set in the list. # This will be written as the new ``last_date_id``. try: dats, date_id = get_stat_data_sets(ft, index_tbl, last_date_id, logger, opt) except urllib.error.URLError as err: if 'timed out' in str(err): msg = f' ERROR: timed out getting {stat} data for {content}' logger.error(msg) process_errors.append(msg) return else: raise if not dats: return dat, msids = concat_data_sets(dats, ['data']) with DelayedKeyboardInterrupt(logger): with timing_logger(logger, f'Applying updates to {len(msids)} h5 files'): for msid in msids: fetch.ft['msid'] = msid stat_file = msid_files['stats'].abs if os.path.exists(stat_file): append_stat_col(dat, stat_file, msid, date_id, opt, logger) logger.debug(f'Updating {last_date_id_file} with {date_id}') if not opt.dry_run: with open(last_date_id_file, 'w') as fh: fh.write(f'{date_id}')
def sync_full_archive(opt, msid_files, logger, content, index_tbl): """ Sync the archive for ``content``. :param opt: :param msid_files: :param logger: :param content: :param index_tbl: index of sync file entries :return: """ # Get the last row of data from the length of the TIME.col (or archfiles?) ft = fetch.ft ft['content'] = content ft['msid'] = 'TIME' ft['interval'] = 'full' # If no TIME.h5 file then no point in going further time_file = Path(msid_files['msid'].abs) if not time_file.exists(): logger.debug(f'Skipping full data for {content}: no {time_file} file') return logger.info('') logger.info(f'Processing full data for {content}') # Get the 0-based index of last available full data row with tables.open_file(str(time_file), 'r') as h5: last_row_idx = len(h5.root.data) - 1 # Look for index table rows that have new data => the row ends after the last existing # data. Note: row0 and row1 correspond to the slice row0:row1, so up to but # not including the row indexed row1 (0-based). So for 3 existing rows, # last_row_idx=2 so to get the new row with index=3 you need row1=4, or equivalently # row1 > n_rows. By def'n we know that row0 <= 3 at this point. ok = index_tbl['row1'] > last_row_idx + 1 if np.count_nonzero(ok) == 0: logger.info(f'No new sync data for {content}: no new rows in index table') index_tbl = index_tbl[ok] try: dats = get_full_data_sets(ft, index_tbl, logger, opt) except urllib.error.URLError as err: if 'timed out' in str(err): msg = f' ERROR: timed out getting full data for {content}' logger.error(msg) process_errors.append(msg) dats = [] else: raise if dats: dat, msids = concat_data_sets(dats, ['data', 'quality']) with DelayedKeyboardInterrupt(logger): update_full_h5_files(dat, logger, msid_files, msids, opt) update_full_archfiles_db3(dat, logger, msid_files, opt)
def _append_h5_col_tlm(msid, epoch, times=None, values=None, apply_direct=False): """Append new values to an HDF5 MSID data table. Parameters ---------- msids : <class 'list'> of msids with data buffered for appending to the archive. """ global _times global _values ft['msid'] = msid values_h5 = tables.open_file( f"{TELEMETRY_ARCHIVE}/data/tlm/{msid}/values.h5", mode='a' ) if values_h5.__contains__('/values') is False: raise IOError(f"Archive file for {msid} value data does not contain root '/values'") times_h5 = tables.open_file( f"{TELEMETRY_ARCHIVE}/data/tlm/{msid}/times.h5", mode='a' ) if times_h5.__contains__('/times') is False: raise IOError(f"Archive file for {msid} time data does not contain root '/times'") # Index should point to current number of rows index = values_h5.root.values.nrows # TODO: Filter interval before append # if index != 0: # last_time = times_h5.root.times[index - 1] # tstart = np.atleast_1d(_times[msid])[0] # tstop = np.atleast_1d(_times[msid])[-1] try: if apply_direct == True: values_h5.root.values.append(np.atleast_1d(values)) times_h5.root.times.append(np.atleast_1d(times)) else: values_h5.root.values.append(np.atleast_1d(_values[msid])) times_h5.root.times.append(np.atleast_1d(_times[msid])) except Exception as err: logger.error(f'{msid} couldnt append the normal way {type(_values[msid])} | {[_values[msid]]} | {_values[msid]}') _update_index_file(msid, epoch, index) values_h5.close() times_h5.close() return 0
def new_func(*args, **kwargs): try: self.setup() func(*args, **kwargs) except (KeyboardInterrupt, TaskSkip): raise except: if status['fail'] is False: logger.error('%s: %s\n\n' % (func.__name__, traceback.format_exc())) status['fail'] = True raise finally: self.teardown()
def process_obsids(start, stop): obsids = events.obsids.filter(start, stop) for obsid_event in obsids: obsid = obsid_event.obsid filename = os.path.join('data', str(obsid) + '.pkl') if os.path.exists(filename): logger.info('Skipping obsid {}, file exists'.format(obsid)) continue else: logger.info('**********************************') logger.info('Processing obsid {}'.format(obsid)) logger.info('**********************************') try: dat = get_obsid(obsid) except Exception as err: logger.error('ERROR in obsid {}: {}\n'.format(obsid, err)) open(filename + '.ERR', 'w') else: pickle.dump(dat, open(filename, 'w'), protocol=-1) logger.info('Success for {}\n'.format(obsid))
def main(args=None): global fetch # fetch module, see below # Setup for updating the sync repository opt = get_options(args) # Set up logging loglevel = int(opt.log_level) logger = pyyaks.logger.get_logger(name='cheta_update_client_archive', level=loglevel, format="%(asctime)s %(message)s") # If --data-root is supplied then set the fetch msid_files basedir via ENG_ARCHIVE # prior to importing fetch. This ensures that ``content`` is consistent with # the destination archive. if opt.data_root is not None: if not Path(opt.data_root).exists(): raise FileNotFoundError( f'local cheta archive directory {Path(opt.data_root).absolute()} not found') os.environ['ENG_ARCHIVE'] = opt.data_root fetch = importlib.import_module('.fetch', __package__) # Turn things around and define data_root based on fetch, relying on it to # find the archive if --data-root is not specified. opt.data_root = fetch.msid_files.basedir logger.info(f'Running cheta_update_client_archive version {__version__}') logger.info(f' {__file__}') logger.info('') logger.info(f'Updating client archive at {fetch.msid_files.basedir}') if opt.add_msids: add_msids(opt, logger) return if opt.content: contents = opt.content else: # fetch.content is a dict of {MSID: content_type} values contents = set(fetch.content.values()) # Global list of timeout errors process_errors.clear() for content in sorted(contents): fetch.ft['content'] = content index_tbl = get_index_tbl(content, logger, opt) if index_tbl is not None: sync_full_archive(opt, fetch.msid_files, logger, content, index_tbl) for stat in STATS_DT: sync_stat_archive(opt, fetch.msid_files, logger, content, stat, index_tbl) if process_errors: logger.error('') logger.error('PROCESS ERRORS or WARNINGS:') for process_error in process_errors: logger.error(process_error)
def update_index_file(index_file, opt, logger): """Update the top-level index file of data available in the sync archive :param index_file: Path of index ECSV file :param opt: options :param logger: output logger :return: index table (astropy Table) """ if index_file.exists(): # Start time of last update contained in the sync repo (if it exists), but do not look # back more than max_lookback days. This is relevant for rarely sampled # content like cpe1eng. filetime0 = (DateTime(opt.date_stop) - opt.max_lookback).secs index_tbl = Table.read(index_file) if len(index_tbl) == 0: # Need to start with a fresh index_tbl since the string column will end up # with a length=1 string (date_id) and add_row later will give the wrong result. index_tbl = None else: filetime0 = max(filetime0, index_tbl['filetime1'][-1]) else: # For initial index file creation use the --date-start option index_tbl = None filetime0 = DateTime(opt.date_start).secs max_secs = int(opt.max_days * 86400) time_stop = DateTime(opt.date_stop).secs # Step through the archfile files entries and collect them into groups of up # to --max-days based on file time stamp (which is an integer in CXC secs). rows = [] filename = fetch.msid_files['archfiles'].abs logger.verbose(f'Opening archfiles {filename}') with DBI(dbi='sqlite', server=filename) as dbi: while True: filetime1 = min(filetime0 + max_secs, time_stop) logger.verbose( f'select from archfiles ' f'filetime > {DateTime(filetime0).fits[:-4]} {filetime0} ' f'filetime <= {DateTime(filetime1).fits[:-4]} {filetime1} ') archfiles = dbi.fetchall(f'select * from archfiles ' f'where filetime > {filetime0} ' f'and filetime <= {filetime1} ' f'order by filetime ') # Found new archfiles? If so get a new index table row for them. if len(archfiles) > 0: rows.append(get_row_from_archfiles(archfiles)) filedates = DateTime(archfiles['filetime']).fits logger.verbose(f'Got {len(archfiles)} archfiles rows from ' f'{filedates[0]} to {filedates[-1]}') filetime0 = filetime1 # Stop if already queried out to the end of desired time range if filetime1 >= time_stop: break if not rows: logger.info(f'No updates available for content {fetch.ft["content"]}') return index_tbl # Create table from scratch or add new rows. In normal processing there # will just be one row per run. if index_tbl is None: index_tbl = Table(rows) else: for row in rows: index_tbl.add_row(row) if not index_file.parent.exists(): logger.info(f'Making directory {index_file.parent}') index_file.parent.mkdir(exist_ok=True, parents=True) msg = check_index_tbl_consistency(index_tbl) if msg: msg += '\n' msg += '\n'.join(index_tbl.pformat(max_lines=-1, max_width=-1)) logger.error(f'Index table inconsistency: {msg}') return None logger.info(f'Writing {len(rows)} row(s) to index file {index_file}') index_tbl.write(index_file, format='ascii.ecsv') return index_tbl
def main(): global logger opt = get_opt() logger = pyyaks.logger.get_logger(name='kadi', level=opt.log_level, format="%(asctime)s %(message)s") log_run_info(logger.info, opt) # Set the global root data directory. This gets used in the django # setup to find the sqlite3 database file. os.environ['KADI'] = os.path.abspath(opt.data_root) from .paths import EVENTS_DB_PATH logger.info('Event database : {}'.format(EVENTS_DB_PATH())) logger.info('') from .events import models # Allow for a cmd line option --start. If supplied then loop the # effective value of opt.stop from start to the cmd line # --stop in steps of --max-lookback-time if opt.start is None: date_stops = [opt.stop] else: t_starts = np.arange( DateTime(opt.start).secs, DateTime(opt.stop).secs, opt.loop_days * 86400.) date_stops = [DateTime(t).date for t in t_starts] date_stops.append(opt.stop) # Get the event classes in models module EventModels = [ Model for name, Model in vars(models).items() if (isinstance(Model, type) # is a class and issubclass(Model, models.BaseEvent) # is a BaseEvent subclass and 'Meta' not in Model.__dict__ # is not a base class and hasattr(Model, 'get_events') # can get events ) ] # Filter on ---model command line arg(s) if opt.models: EventModels = [ x for x in EventModels if any( re.match(y, x.__name__) for y in opt.models) ] # Update priority (higher priority value means earlier in processing) EventModels = sorted(EventModels, key=lambda x: x.update_priority, reverse=True) for EventModel in EventModels: try: if opt.delete_from_start and opt.start is not None: delete_from_date(EventModel, opt.start) for date_stop in date_stops: update(EventModel, date_stop) except Exception: # Something went wrong, but press on with processing other EventModels import traceback logger.error(f'ERROR in processing {EventModel}') logger.error(f'Traceback:\n{traceback.format_exc()}')