예제 #1
0
        def new_func(*args, **kwargs):
            runval = run(func.__name__) if isinstance(run, collections.Callable) else run
            if runval is False:
                return
            elif runval is True:
                pass
            elif runval is None:
                if status['fail']:
                    return
            else:
                raise ValueError('run value = %s but must be True, False, or None' % runval)

            logger.verbose('')
            logger.verbose('-' * 60)
            logger.info(' Running task: %s at %s' % (func.__name__, time.ctime()))
            logger.verbose('-' * 60)

            try:
                func(*args, **kwargs)
                pyyaks.context.store_context(status.get('context_file'))
            except KeyboardInterrupt:
                raise
            except TaskSkip:
                pass
            except:
                if status['fail'] is False:
                    logger.error('%s: %s\n\n' % (func.__name__, traceback.format_exc()))
                    status['fail'] = True
예제 #2
0
def _sync_stat_archive(opt, msid_files, logger, content, stat, index_tbl):
    """
    Actual worker for syncing the stat archive for ``content``.
    """
    # Get the last row of data from the length of the TIME.col (or archfiles?)
    ft = fetch.ft
    ft['content'] = content
    ft['interval'] = stat

    stats_dir = Path(msid_files['statsdir'].abs)
    if not stats_dir.exists():
        logger.debug(f'Skipping {stat} data for {content}: no directory')
        return

    logger.info('')
    logger.info(f'Processing {stat} data for {content}')

    # Get the MSIDs that are in client archive
    msids = [str(fn.name)[:-3] for fn in stats_dir.glob('*.h5')]
    if not msids:
        logger.debug(f'Skipping {stat} data for {content}: no stats h5 files')
        return
    else:
        logger.debug(f'Stat msids are {msids}')

    last_date_id, last_date_id_file = get_last_date_id(
        msid_files, msids, stat, logger)
    logger.verbose(f'Got {last_date_id} as last date_id that was applied to archive')

    # Get list of applicable dat objects (new data, before opt.date_stop).  Also
    # return ``date_id`` which is the date_id of the final data set in the list.
    # This will be written as the new ``last_date_id``.
    try:
        dats, date_id = get_stat_data_sets(ft, index_tbl, last_date_id, logger, opt)
    except urllib.error.URLError as err:
        if 'timed out' in str(err):
            msg = f'  ERROR: timed out getting {stat} data for {content}'
            logger.error(msg)
            process_errors.append(msg)
            return
        else:
            raise

    if not dats:
        return

    dat, msids = concat_data_sets(dats, ['data'])
    with DelayedKeyboardInterrupt(logger):
        with timing_logger(logger, f'Applying updates to {len(msids)} h5 files'):
            for msid in msids:
                fetch.ft['msid'] = msid
                stat_file = msid_files['stats'].abs
                if os.path.exists(stat_file):
                    append_stat_col(dat, stat_file, msid, date_id, opt, logger)

            logger.debug(f'Updating {last_date_id_file} with {date_id}')
            if not opt.dry_run:
                with open(last_date_id_file, 'w') as fh:
                    fh.write(f'{date_id}')
예제 #3
0
def sync_full_archive(opt, msid_files, logger, content, index_tbl):
    """
    Sync the archive for ``content``.

    :param opt:
    :param msid_files:
    :param logger:
    :param content:
    :param index_tbl: index of sync file entries
    :return:
    """
    # Get the last row of data from the length of the TIME.col (or archfiles?)
    ft = fetch.ft
    ft['content'] = content
    ft['msid'] = 'TIME'
    ft['interval'] = 'full'

    # If no TIME.h5 file then no point in going further
    time_file = Path(msid_files['msid'].abs)
    if not time_file.exists():
        logger.debug(f'Skipping full data for {content}: no {time_file} file')
        return

    logger.info('')
    logger.info(f'Processing full data for {content}')

    # Get the 0-based index of last available full data row
    with tables.open_file(str(time_file), 'r') as h5:
        last_row_idx = len(h5.root.data) - 1

    # Look for index table rows that have new data => the row ends after the last existing
    # data.  Note: row0 and row1 correspond to the slice row0:row1, so up to but
    # not including the row indexed row1 (0-based).  So for 3 existing rows,
    # last_row_idx=2 so to get the new row with index=3 you need row1=4, or equivalently
    # row1 > n_rows. By def'n we know that row0 <= 3 at this point.
    ok = index_tbl['row1'] > last_row_idx + 1

    if np.count_nonzero(ok) == 0:
        logger.info(f'No new sync data for {content}: no new rows in index table')

    index_tbl = index_tbl[ok]

    try:
        dats = get_full_data_sets(ft, index_tbl, logger, opt)
    except urllib.error.URLError as err:
        if 'timed out' in str(err):
            msg = f'  ERROR: timed out getting full data for {content}'
            logger.error(msg)
            process_errors.append(msg)
            dats = []
        else:
            raise

    if dats:
        dat, msids = concat_data_sets(dats, ['data', 'quality'])
        with DelayedKeyboardInterrupt(logger):
            update_full_h5_files(dat, logger, msid_files, msids, opt)
            update_full_archfiles_db3(dat, logger, msid_files, opt)
예제 #4
0
def _append_h5_col_tlm(msid, epoch, times=None, values=None, apply_direct=False):

    """Append new values to an HDF5 MSID data table.

    Parameters
    ----------
    msids : <class 'list'> of msids with data buffered for appending to the
            archive.
    """

    global _times
    global _values

    ft['msid'] = msid

    values_h5 = tables.open_file(
        f"{TELEMETRY_ARCHIVE}/data/tlm/{msid}/values.h5",
        mode='a'
    )

    if values_h5.__contains__('/values') is False:
        raise IOError(f"Archive file for {msid} value data does not contain root '/values'")

    times_h5 = tables.open_file(
        f"{TELEMETRY_ARCHIVE}/data/tlm/{msid}/times.h5",
        mode='a'
    )

    if times_h5.__contains__('/times') is False:
        raise IOError(f"Archive file for {msid} time data does not contain root '/times'")

    # Index should point to current number of rows
    index = values_h5.root.values.nrows
    # TODO: Filter interval before append
    # if index != 0:
    #     last_time = times_h5.root.times[index -  1]
    #     tstart = np.atleast_1d(_times[msid])[0]
    #     tstop = np.atleast_1d(_times[msid])[-1]

    try:
        if apply_direct == True:
            values_h5.root.values.append(np.atleast_1d(values))
            times_h5.root.times.append(np.atleast_1d(times)) 
        else:
            values_h5.root.values.append(np.atleast_1d(_values[msid]))
            times_h5.root.times.append(np.atleast_1d(_times[msid]))
    except Exception as err:
        logger.error(f'{msid} couldnt append the normal way {type(_values[msid])} | {[_values[msid]]} | {_values[msid]}')


    _update_index_file(msid, epoch, index)

    values_h5.close()
    times_h5.close()

    return 0
예제 #5
0
 def new_func(*args, **kwargs):
     try:
         self.setup()
         func(*args, **kwargs)
     except (KeyboardInterrupt, TaskSkip):
         raise
     except:
         if status['fail'] is False:
             logger.error('%s: %s\n\n' % (func.__name__, traceback.format_exc()))
             status['fail'] = True
         raise
     finally:
         self.teardown()
예제 #6
0
def process_obsids(start, stop):
    obsids = events.obsids.filter(start, stop)
    for obsid_event in obsids:
        obsid = obsid_event.obsid
        filename = os.path.join('data', str(obsid) + '.pkl')
        if os.path.exists(filename):
            logger.info('Skipping obsid {}, file exists'.format(obsid))
            continue
        else:
            logger.info('**********************************')
            logger.info('Processing obsid {}'.format(obsid))
            logger.info('**********************************')

        try:
            dat = get_obsid(obsid)
        except Exception as err:
            logger.error('ERROR in obsid {}: {}\n'.format(obsid, err))
            open(filename + '.ERR', 'w')
        else:
            pickle.dump(dat, open(filename, 'w'), protocol=-1)
            logger.info('Success for {}\n'.format(obsid))
예제 #7
0
def main(args=None):
    global fetch  # fetch module, see below

    # Setup for updating the sync repository
    opt = get_options(args)

    # Set up logging
    loglevel = int(opt.log_level)
    logger = pyyaks.logger.get_logger(name='cheta_update_client_archive', level=loglevel,
                                      format="%(asctime)s %(message)s")

    # If --data-root is supplied then set the fetch msid_files basedir via ENG_ARCHIVE
    # prior to importing fetch.  This ensures that ``content`` is consistent with
    # the destination archive.
    if opt.data_root is not None:
        if not Path(opt.data_root).exists():
            raise FileNotFoundError(
                f'local cheta archive directory {Path(opt.data_root).absolute()} not found')
        os.environ['ENG_ARCHIVE'] = opt.data_root

    fetch = importlib.import_module('.fetch', __package__)

    # Turn things around and define data_root based on fetch, relying on it to
    # find the archive if --data-root is not specified.
    opt.data_root = fetch.msid_files.basedir

    logger.info(f'Running cheta_update_client_archive version {__version__}')
    logger.info(f'  {__file__}')
    logger.info('')
    logger.info(f'Updating client archive at {fetch.msid_files.basedir}')

    if opt.add_msids:
        add_msids(opt, logger)
        return

    if opt.content:
        contents = opt.content
    else:
        # fetch.content is a dict of {MSID: content_type} values
        contents = set(fetch.content.values())

    # Global list of timeout errors
    process_errors.clear()

    for content in sorted(contents):
        fetch.ft['content'] = content
        index_tbl = get_index_tbl(content, logger, opt)
        if index_tbl is not None:
            sync_full_archive(opt, fetch.msid_files, logger, content, index_tbl)
            for stat in STATS_DT:
                sync_stat_archive(opt, fetch.msid_files, logger, content, stat, index_tbl)

    if process_errors:
        logger.error('')
        logger.error('PROCESS ERRORS or WARNINGS:')

    for process_error in process_errors:
        logger.error(process_error)
예제 #8
0
def update_index_file(index_file, opt, logger):
    """Update the top-level index file of data available in the sync archive

    :param index_file: Path of index ECSV file
    :param opt: options
    :param logger: output logger
    :return: index table (astropy Table)
    """
    if index_file.exists():
        # Start time of last update contained in the sync repo (if it exists), but do not look
        # back more than max_lookback days.  This is relevant for rarely sampled
        # content like cpe1eng.
        filetime0 = (DateTime(opt.date_stop) - opt.max_lookback).secs

        index_tbl = Table.read(index_file)
        if len(index_tbl) == 0:
            # Need to start with a fresh index_tbl since the string column will end up
            # with a length=1 string (date_id) and add_row later will give the wrong result.
            index_tbl = None
        else:
            filetime0 = max(filetime0, index_tbl['filetime1'][-1])
    else:
        # For initial index file creation use the --date-start option
        index_tbl = None
        filetime0 = DateTime(opt.date_start).secs

    max_secs = int(opt.max_days * 86400)
    time_stop = DateTime(opt.date_stop).secs

    # Step through the archfile files entries and collect them into groups of up
    # to --max-days based on file time stamp (which is an integer in CXC secs).
    rows = []
    filename = fetch.msid_files['archfiles'].abs
    logger.verbose(f'Opening archfiles {filename}')
    with DBI(dbi='sqlite', server=filename) as dbi:
        while True:
            filetime1 = min(filetime0 + max_secs, time_stop)
            logger.verbose(
                f'select from archfiles '
                f'filetime > {DateTime(filetime0).fits[:-4]} {filetime0} '
                f'filetime <= {DateTime(filetime1).fits[:-4]} {filetime1} ')
            archfiles = dbi.fetchall(f'select * from archfiles '
                                     f'where filetime > {filetime0} '
                                     f'and filetime <= {filetime1} '
                                     f'order by filetime ')

            # Found new archfiles?  If so get a new index table row for them.
            if len(archfiles) > 0:
                rows.append(get_row_from_archfiles(archfiles))
                filedates = DateTime(archfiles['filetime']).fits
                logger.verbose(f'Got {len(archfiles)} archfiles rows from '
                               f'{filedates[0]} to {filedates[-1]}')

            filetime0 = filetime1

            # Stop if already queried out to the end of desired time range
            if filetime1 >= time_stop:
                break

    if not rows:
        logger.info(f'No updates available for content {fetch.ft["content"]}')
        return index_tbl

    # Create table from scratch or add new rows.  In normal processing there
    # will just be one row per run.
    if index_tbl is None:
        index_tbl = Table(rows)
    else:
        for row in rows:
            index_tbl.add_row(row)

    if not index_file.parent.exists():
        logger.info(f'Making directory {index_file.parent}')
        index_file.parent.mkdir(exist_ok=True, parents=True)

    msg = check_index_tbl_consistency(index_tbl)
    if msg:
        msg += '\n'
        msg += '\n'.join(index_tbl.pformat(max_lines=-1, max_width=-1))
        logger.error(f'Index table inconsistency: {msg}')
        return None

    logger.info(f'Writing {len(rows)} row(s) to index file {index_file}')
    index_tbl.write(index_file, format='ascii.ecsv')

    return index_tbl
예제 #9
0
def main():
    global logger

    opt = get_opt()

    logger = pyyaks.logger.get_logger(name='kadi',
                                      level=opt.log_level,
                                      format="%(asctime)s %(message)s")
    log_run_info(logger.info, opt)

    # Set the global root data directory.  This gets used in the django
    # setup to find the sqlite3 database file.
    os.environ['KADI'] = os.path.abspath(opt.data_root)
    from .paths import EVENTS_DB_PATH

    logger.info('Event database : {}'.format(EVENTS_DB_PATH()))
    logger.info('')

    from .events import models

    # Allow for a cmd line option --start.  If supplied then loop the
    # effective value of opt.stop from start to the cmd line
    # --stop in steps of --max-lookback-time
    if opt.start is None:
        date_stops = [opt.stop]
    else:
        t_starts = np.arange(
            DateTime(opt.start).secs,
            DateTime(opt.stop).secs, opt.loop_days * 86400.)
        date_stops = [DateTime(t).date for t in t_starts]
        date_stops.append(opt.stop)

    # Get the event classes in models module
    EventModels = [
        Model for name, Model in vars(models).items()
        if (isinstance(Model, type)  # is a class
            and issubclass(Model, models.BaseEvent)  # is a BaseEvent subclass
            and 'Meta' not in Model.__dict__  # is not a base class
            and hasattr(Model, 'get_events')  # can get events
            )
    ]

    # Filter on ---model command line arg(s)
    if opt.models:
        EventModels = [
            x for x in EventModels if any(
                re.match(y, x.__name__) for y in opt.models)
        ]

    # Update priority (higher priority value means earlier in processing)
    EventModels = sorted(EventModels,
                         key=lambda x: x.update_priority,
                         reverse=True)

    for EventModel in EventModels:
        try:
            if opt.delete_from_start and opt.start is not None:
                delete_from_date(EventModel, opt.start)

            for date_stop in date_stops:
                update(EventModel, date_stop)
        except Exception:
            # Something went wrong, but press on with processing other EventModels
            import traceback
            logger.error(f'ERROR in processing {EventModel}')
            logger.error(f'Traceback:\n{traceback.format_exc()}')