Beispiel #1
0
def update_full_archfiles_db3(dat, logger, msid_files, opt):
    # Update the archfiles.db3 database to include the associated archive files
    server_file = msid_files['archfiles'].abs
    logger.debug(f'Updating {server_file}')

    def as_python(val):
        try:
            return val.item()
        except AttributeError:
            return val

    with timing_logger(logger, f'Updating {server_file}', 'info', 'info'):
        with DBI(dbi='sqlite', server=server_file) as db:
            for archfile in dat['archfiles']:
                vals = {name: as_python(archfile[name]) for name in archfile.dtype.names}
                logger.debug(f'Inserting {vals["filename"]}')
                if not opt.dry_run:
                    try:
                        db.insert(vals, 'archfiles')
                    except sqlite3.IntegrityError as err:
                        # Expected exception for archfiles already in the table
                        assert 'UNIQUE constraint failed: archfiles.filename' in str(err)

            if not opt.dry_run:
                db.commit()
Beispiel #2
0
    def add_cmd(self, **cmd):
        """
        Add command in correct order to the commands list.

        TO DO: use scs and step for further sorting??
        """
        cmd_date = cmd["date"]

        logger.debug("Adding command %s", cmd)

        # Prevent adding command before current command since the command
        # interpreter is a one-pass process.
        if cmd_date < self.date:
            raise ValueError("cannot insert command {} prior to current command {}".format(cmd, self.curr_cmd))

        # Insert command at first place where new command date is strictly
        # less than existing command date.  This implementation is linear, and
        # could be improved, though in practice commands are often inserted
        # close to the original.
        cmds = self.cmds
        for i_cmd in xrange(self.i_cmd + 1, len(cmds)):
            if cmd_date < cmds[i_cmd]["date"]:
                cmds.insert(i_cmd, cmd)
                break
        else:
            cmds.append(cmd)
Beispiel #3
0
def _get_stat_data_from_archive(filename, stat, tstart, tstop, last_row1, logger):
    """
    Return stat table rows in the range tstart <= time < tstop.

    Also returns the corresponding table row indexes.

    :param filename: HDF5 file to read
    :param stat: stat (5min or daily)
    :param tstart: min time
    :param tstop: max time
    :param last_row1: row1 for previous index table entry
    :param logger: logger
    :return:
    """
    dt = STATS_DT[stat]

    logger.debug(f'_get_stat_data({filename}, {stat}, {DateTime(tstart).fits}, '
                 f'{DateTime(tstop).fits}, {last_row1})')

    with tables.open_file(filename, 'r') as h5:
        # Check if tstart is beyond the end of the table.  If so, return an empty table
        table = h5.root.data
        last_index = table[-1]['index']
        last_time = (last_index + 0.5) * dt
        if tstart > last_time:
            logger.debug(f'No available stats data {DateTime(tstart).fits} > '
                         f'{DateTime(last_time).fits} (returning empty table)')
            row0 = row1 = len(table)
            table_rows = table[row0:row1]
        else:
            # Compute approx number of rows from the end for tstart.  Normally the index value
            # goes in lock step with row, but it can happen that an index is missed because of
            # missing data.  But if we back up by delta_rows, we are guaranteed to get to at
            # least the row corresponding to tstart.
            delta_rows = int((last_time - tstart) / dt) + 10
            times = (table[-delta_rows:]['index'] + 0.5) * dt

            # In the worst case of starting to sync a client archive for a rarely-sampled
            # content like cpe1eng or pcad7eng (AOSPASA2CV,) we need to include an extra ``dt``
            # on both ends to ensure that the first / last rows are caught. If the last
            # full-res sample is either before or after the stat mid-point timestamp then
            # stat sample may get dropped. This happened in real life for AOSPASA2CV.
            # Having extra rows on front is OK because they just get clipped, and an extra
            # row on back is OK because of clipping on the next update (and in normal
            # processing we always want the sync archive to have all recent data).
            sub_row0, sub_row1 = np.searchsorted(times, [tstart - dt, tstop + dt])
            sub_row_offset = len(table) - delta_rows

            row0 = sub_row0 + sub_row_offset
            row1 = sub_row1 + sub_row_offset

            # If we have the last value of row1 (from previous sync entry) then use
            # that instead of computed value for row0.
            if last_row1 is not None:
                row0 = last_row1

            table_rows = table[row0:row1]  # returns np.ndarray (structured array)

    return table_rows, row0, row1
Beispiel #4
0
def sync_full_archive(opt, msid_files, logger, content, index_tbl):
    """
    Sync the archive for ``content``.

    :param opt:
    :param msid_files:
    :param logger:
    :param content:
    :param index_tbl: index of sync file entries
    :return:
    """
    # Get the last row of data from the length of the TIME.col (or archfiles?)
    ft = fetch.ft
    ft['content'] = content
    ft['msid'] = 'TIME'
    ft['interval'] = 'full'

    # If no TIME.h5 file then no point in going further
    time_file = Path(msid_files['msid'].abs)
    if not time_file.exists():
        logger.debug(f'Skipping full data for {content}: no {time_file} file')
        return

    logger.info('')
    logger.info(f'Processing full data for {content}')

    # Get the 0-based index of last available full data row
    with tables.open_file(str(time_file), 'r') as h5:
        last_row_idx = len(h5.root.data) - 1

    # Look for index table rows that have new data => the row ends after the last existing
    # data.  Note: row0 and row1 correspond to the slice row0:row1, so up to but
    # not including the row indexed row1 (0-based).  So for 3 existing rows,
    # last_row_idx=2 so to get the new row with index=3 you need row1=4, or equivalently
    # row1 > n_rows. By def'n we know that row0 <= 3 at this point.
    ok = index_tbl['row1'] > last_row_idx + 1

    if np.count_nonzero(ok) == 0:
        logger.info(f'No new sync data for {content}: no new rows in index table')

    index_tbl = index_tbl[ok]

    try:
        dats = get_full_data_sets(ft, index_tbl, logger, opt)
    except urllib.error.URLError as err:
        if 'timed out' in str(err):
            msg = f'  ERROR: timed out getting full data for {content}'
            logger.error(msg)
            process_errors.append(msg)
            dats = []
        else:
            raise

    if dats:
        dat, msids = concat_data_sets(dats, ['data', 'quality'])
        with DelayedKeyboardInterrupt(logger):
            update_full_h5_files(dat, logger, msid_files, msids, opt)
            update_full_archfiles_db3(dat, logger, msid_files, opt)
Beispiel #5
0
def main():
    global opt, ft, msid_files, logger

    opt, args = get_options()
    ft = fetch.ft
    msid_files = pyyaks.context.ContextDict('add_derived.msid_files',
                                            basedir=opt.data_root)
    msid_files.update(file_defs.msid_files)
    logger = pyyaks.logger.get_logger(name='engarchive',
                                      level=pyyaks.logger.VERBOSE,
                                      format="%(asctime)s %(message)s")

    # Get the derived parameter classes
    dp_classes = (getattr(derived, x) for x in dir(derived)
                  if x.startswith('DP_'))
    dp_classes = [
        x for x in dp_classes
        if hasattr(x, '__base__') and issubclass(x, derived.DerivedParameter)
    ]
    content_defs = {}
    for dp_class in dp_classes:
        colname = dp_class.__name__.upper()
        dp = dp_class()
        content = dp.content
        if opt.content == [] or any(
                re.match(x + r'\d+', content) for x in opt.content):
            dpd = content_defs.setdefault(content, {})
            dpd.setdefault('classes', {'TIME': None})
            dpd['content'] = content
            dpd['classes'][colname] = dp_class
            dpd['mnf_step'] = dp.mnf_step
            dpd['time_step'] = dp.time_step

    for content, content_def in content_defs.items():
        ft['content'] = content
        logger.info('CONTENT = {}'.format(content))

        # Make content directory
        if not os.path.exists(msid_files['contentdir'].rel):
            logger.info('Making directory {}'.format(
                msid_files['contentdir'].rel))
            os.mkdir(msid_files['contentdir'].rel)

        # Make the archfiles.db3 file (if needed)
        make_archfiles_db(msid_files['archfiles'].abs, content_def)

        for colname in content_def['classes']:
            ft['msid'] = colname
            logger.debug('MSID = {}'.format(colname))
            # Create colnames and colnames_all pickle files (if needed) and add colname
            add_colname(msid_files['colnames'].rel, colname)
            add_colname(msid_files['colnames_all'].rel, colname)

            make_msid_file(colname, content, content_def)

        add_colname(msid_files['colnames_all'].rel, 'QUALITY')
Beispiel #6
0
    def __set__(self, SC, value):
        date = SC.date

        logger.debug("%s %s=%s", date, self.name, value)

        self.value = value
        self.values.append(value)
        self.dates.resize(len(self.values))
        self.dates[-1] = date

        SC.set_state_value(date, self.name, value)
Beispiel #7
0
def main():
    global opt, ft, msid_files, logger

    opt, args = get_options()
    ft = fetch.ft
    msid_files = pyyaks.context.ContextDict('add_derived.msid_files', basedir=opt.data_root)
    msid_files.update(file_defs.msid_files)
    logger = pyyaks.logger.get_logger(name='engarchive', level=pyyaks.logger.VERBOSE, 
                                      format="%(asctime)s %(message)s")

    # Get the derived parameter classes
    dp_classes = (getattr(derived, x) for x in dir(derived) if x.startswith('DP_'))
    dp_classes = [x for x in dp_classes if hasattr(x, '__base__') and
                                           issubclass(x, derived.DerivedParameter)]
    content_defs = {}
    for dp_class in dp_classes:
        colname = dp_class.__name__.upper()
        dp = dp_class()
        content = dp.content
        if opt.content == [] or any(re.match(x + r'\d+', content) for x in opt.content):
            dpd = content_defs.setdefault(content, {})
            dpd.setdefault('classes', {'TIME': None})
            dpd['content'] = content
            dpd['classes'][colname] = dp_class
            dpd['mnf_step'] = dp.mnf_step
            dpd['time_step'] = dp.time_step

    for content, content_def in content_defs.items():
        ft['content'] = content
        logger.info('CONTENT = {}'.format(content))

        # Make content directory
        if not os.path.exists(msid_files['contentdir'].rel):
            logger.info('Making directory {}'.format(msid_files['contentdir'].rel))
            os.mkdir(msid_files['contentdir'].rel)

        # Make the archfiles.db3 file (if needed)
        make_archfiles_db(msid_files['archfiles'].abs, content_def)

        for colname in content_def['classes']:
            ft['msid'] = colname
            logger.debug('MSID = {}'.format(colname))
            # Create colnames and colnames_all pickle files (if needed) and add colname
            add_colname(msid_files['colnames'].rel, colname)
            add_colname(msid_files['colnames_all'].rel, colname)

            make_msid_file(colname, content, content_def)

        add_colname(msid_files['colnames_all'].rel, 'QUALITY')
Beispiel #8
0
def _sync_stat_archive(opt, msid_files, logger, content, stat, index_tbl):
    """
    Actual worker for syncing the stat archive for ``content``.
    """
    # Get the last row of data from the length of the TIME.col (or archfiles?)
    ft = fetch.ft
    ft['content'] = content
    ft['interval'] = stat

    stats_dir = Path(msid_files['statsdir'].abs)
    if not stats_dir.exists():
        logger.debug(f'Skipping {stat} data for {content}: no directory')
        return

    logger.info('')
    logger.info(f'Processing {stat} data for {content}')

    # Get the MSIDs that are in client archive
    msids = [str(fn.name)[:-3] for fn in stats_dir.glob('*.h5')]
    if not msids:
        logger.debug(f'Skipping {stat} data for {content}: no stats h5 files')
        return
    else:
        logger.debug(f'Stat msids are {msids}')

    last_date_id, last_date_id_file = get_last_date_id(
        msid_files, msids, stat, logger)
    logger.verbose(f'Got {last_date_id} as last date_id that was applied to archive')

    # Get list of applicable dat objects (new data, before opt.date_stop).  Also
    # return ``date_id`` which is the date_id of the final data set in the list.
    # This will be written as the new ``last_date_id``.
    try:
        dats, date_id = get_stat_data_sets(ft, index_tbl, last_date_id, logger, opt)
    except urllib.error.URLError as err:
        if 'timed out' in str(err):
            msg = f'  ERROR: timed out getting {stat} data for {content}'
            logger.error(msg)
            process_errors.append(msg)
            return
        else:
            raise

    if not dats:
        return

    dat, msids = concat_data_sets(dats, ['data'])
    with DelayedKeyboardInterrupt(logger):
        with timing_logger(logger, f'Applying updates to {len(msids)} h5 files'):
            for msid in msids:
                fetch.ft['msid'] = msid
                stat_file = msid_files['stats'].abs
                if os.path.exists(stat_file):
                    append_stat_col(dat, stat_file, msid, date_id, opt, logger)

            logger.debug(f'Updating {last_date_id_file} with {date_id}')
            if not opt.dry_run:
                with open(last_date_id_file, 'w') as fh:
                    fh.write(f'{date_id}')
Beispiel #9
0
def func_depend(func, *args, **kwargs):
    """
    For (func, args, kwargs) input, func(*args, **kwargs) is evaluated and is
    evaluated in boolean context.  For the ``depends`` list a func() return of
    False raises an exception indicating that the task dependencies are not
    met.  For ``targets`` a func() return of False results in check_depend
    returning False.
    """
    if isinstance(dep, (list, tuple)):
        func, args, kwargs = dep
        if func(*args, **kwargs):
            logger.debug('Func %s succeeded' % func.__name__)
        else:
            logger.debug('Func %s failed' % func.__name__)
            if deptype == 'depends':
                raise DependFuncFailure('Depend function %s false' % func.__name__)
            else:
                return False                
Beispiel #10
0
def get_last_date_id(msid_files, msids, stat, logger):
    """
    Get the last date_id used for syncing the client archive.  First try the
    last_date_id file.  If this does not exist then infer a reasonable value
    by looking at stat data for ``msids``

    :param msid_files:
    :param msids:
    :param stat:
    :param logger:
    :return:
    """
    last_date_id_file = msid_files['last_date_id'].abs

    if Path(last_date_id_file).exists():
        logger.verbose(f'Reading {last_date_id_file} to get last update time')
        with open(last_date_id_file, 'r') as fh:
            last_date_id = fh.read()
    else:
        logger.verbose(f'Reading stat h5 files to get last update time')
        times = []
        for msid in msids:
            fetch.ft['msid'] = msid
            filename = msid_files['stats'].abs
            logger.debug(f'Reading {filename} to check stat times')
            with tables.open_file(filename, 'r') as h5:
                index = h5.root.data.cols.index[-1]
                times.append((index + 0.5) * STATS_DT[stat])

        # Get the least recent stats data available and then go back 5 days to be
        # sure nothing gets missed.  Except for ephemeris files that are weird:
        # when they appear in the archive they include weeks of data in the past
        # and possibly future data.
        last_time = min(times)
        lookback = 30 if re.search(r'ephem[01]$', fetch.ft['content'].val) else 5
        last_date_id = get_date_id(DateTime(last_time - lookback * 86400).fits)

    return last_date_id, last_date_id_file
Beispiel #11
0
def get_cmds(timeline_loads, mp_dir='/data/mpcrit1/mplogs'):
    """
    Get backstop commands corresponding to the supplied timeline load segments.
    The timeline load segments must be ordered by 'id'.

    Return cmds in the format defined by Ska.ParseCM.read_backstop().
    """
    if np.min(np.diff(timeline_loads['id'])) < 1:
        raise ValueError('Timeline loads id not monotonically increasing')

    cmds = []
    for tl in timeline_loads:
        bs_file = Ska.File.get_globfiles(os.path.join(mp_dir + tl.mp_dir,
                                                      '*.backstop'))[0]
        if bs_file not in BACKSTOP_CACHE:
            bs_cmds = read_backstop(bs_file)
            logger.info('Read {} commands from {}'.format(len(bs_cmds), bs_file))
            BACKSTOP_CACHE[bs_file] = bs_cmds
        else:
            bs_cmds = BACKSTOP_CACHE[bs_file]

        # Only store commands for this timeline (match SCS and date)
        bs_cmds = [x for x in bs_cmds
                   if tl['datestart'] <= x['date'] <= tl['datestop']
                   and x['scs'] == tl['scs']]

        for bs_cmd in bs_cmds:
            bs_cmd['timeline_id'] = tl['id']

        logger.info('  Got {} backstop commands for timeline_id={} and SCS={}'
                    .format(len(bs_cmds), tl['id'], tl['scs']))
        cmds.extend(bs_cmds)

    # Sort by date and SCS step number.
    cmds = sorted(cmds, key=lambda y: (y['date'], y['step']))
    logger.debug('Read total of {} commands'.format(len(cmds)))

    return cmds
Beispiel #12
0
def append_stat_col(dat, stat_file, msid, date_id, opt, logger):
    """
    Append ``dat`` to the appropriate stats h5 file.

    :param dat:
    :param stat_file:
    :param msid:
    :param date_id:
    :param opt:
    :param logger:
    :return: None
    """
    vals = {key: dat[f'{msid}.{key}'] for key in ('data', 'row0', 'row1')}
    logger.debug(f'append_stat_col msid={msid} date_id={date_id}, '
                 f'row0,1 = {vals["row0"]} {vals["row1"]}')

    mode = 'r' if opt.dry_run else 'a'
    with tables.open_file(stat_file, mode=mode) as h5:
        last_row_idx = len(h5.root.data) - 1

        # Check if there is any new data in this chunk
        if vals['row1'] - 1 <= last_row_idx:
            logger.debug(f'Skipping {date_id} for {msid}: no new data '
                         f'row1={vals["row1"]} last_row_idx={last_row_idx}')
            return

        # If this row begins before then end of current data then chop the
        # beginning of data for this row.
        if vals['row0'] <= last_row_idx:
            idx0 = last_row_idx + 1 - vals['row0']
            logger.debug(f'Chopping {idx0 + 1} rows from data')
            vals['data'] = vals['data'][idx0:]
            vals['row0'] += idx0

        if vals['row0'] != len(h5.root.data):
            raise RowMismatchError(
                f'ERROR: unexpected discontinuity for stat msid={msid} '
                f'content={fetch.ft["content"]}\n'
                f'Looks like your archive is in a bad state, CONTACT '
                f'your local Ska expert with this info:\n'
                f'  First row0 in new data {vals["row0"]} != '
                f'length of existing data {len(h5.root.data)}')

        logger.debug(f'Appending {len(vals["data"])} rows to {stat_file}')
        if not opt.dry_run:
            h5.root.data.append(vals['data'])
Beispiel #13
0
def check_depend(depends=None, targets=None):
    """Check that dependencies are satisfied.

    A dependency in the ``depends`` or ``targets`` list can be either a file
    name as a string or a renderable object (file or value) with an mtime
    attribute.

    A file name is treated in the usual sense of depend and target files.  A
    missing depend file raises an exception and a missing target means
    check_depend returns False.  In addition all targets must be newer
    than all depends.

    :param depends: list of file or value dependencies
    :param targets: list of file or value targets

    :returns: dependencies_satisfied, info_message
    """
    # Lists of mod time for depend and target files.  Seed the list with a
    # fake very OLD and NEW file (respectively) so the final min/max comparison
    # always works.
    mtimes = dict(depends = [1],
                  targets = [2**31])
    deptypes = dict(depends=depends,
                    targets=targets)
    statuses = {}

    # Step through all depends and targets and determine existence and mod. time.
    # Collect this status and informational messages in statuses[deptype]
    for deptype in ('depends', 'targets'):
        statuses[deptype] = []
        deps = deptypes[deptype]
        if not deps:
            continue

        for dep in deps:
            # Check if dep is not a ContextValue.  If so interpret as a filename
            if not hasattr(dep, 'mtime'):
                dep = pyyaks.context.ContextValue(val=dep, name=dep,
                                                  parent=pyyaks.context.ContextDict(basedir='.'))
                
            mtime = dep.mtime                
            info = '%s %s %s = %s' % (deptype.title()[:-1], dep.type, dep.fullname, dep.abs)
            if mtime is None:
                statuses[deptype].append((False, info + ' does not exist'))
            else:
                statuses[deptype].append((True, info + ' (%s)' % time.ctime(mtime)))
                mtimes[deptype].append(mtime)

    # Do all depends exist?  If not raise an exception which will trigger task failure
    if not all(x[0] for x in statuses['depends']):
        msg = 'Dependencies missing:\n' + '\n'.join(x[1] for x in statuses['depends'])
        logger.debug(msg)
        raise DependMissing(msg)

    # Do all targets exist?  If not return False.  This is a normal situation
    # before the task is run but will raise an exception after the task is run.
    if not all(x[0] for x in statuses['targets']):
        msg = 'Targets missing:\n' + '\n'.join(x[1] for x in statuses['targets'])
        logger.debug(msg)
        return False, msg

    # Are all targets as old as all depends?  Allow for equality since target files could be
    # created within the same second (particularly for "touch" files).
    min_targets = min(mtimes['targets'])
    max_depends = max(mtimes['depends'])
    ok = min_targets >= max_depends
    msg = 'Depends and targets info:\n' if ok else 'Depend(s) are newer than target(s):\n'
    msg += '\n'.join(x[1] for x in (statuses['depends'] + statuses['targets']))
    logger.debug(msg)
    return ok, msg
Beispiel #14
0
 def teardown(self):
     for envvar in self.env:
         del os.environ[envvar]
     os.environ.update(self.origenv)
     logger.debug('Restored local environment')
Beispiel #15
0
 def teardown(self):
     os.chdir(self.origdir)
     logger.debug('Restored directory to "%s"' % self.origdir)
Beispiel #16
0
 def setup(self):
     self.origenv = os.environ.copy()
     os.environ.update(self.env)
     logger.debug('Updated local environment')
Beispiel #17
0
def update_sync_data_stat(content, logger, row, stat):
    """
    Update stats (5min, daily) sync data for index table ``row``

    :param content: content name (e.g. acis4eng)
    :param logger: logger
    :param row: one row of the full-res index table
    :param stat: stat interval (5min or daily)
    :return:
    """
    ft = fetch.ft
    ft['interval'] = stat

    outfile = Path(sync_files['data'].abs)
    if outfile.exists():
        logger.verbose(f'Skipping {outfile}, already exists')
        return

    # First get the times corresponding to row0 and row1 in the full resolution archive
    ft['msid'] = 'TIME'
    with tables.open_file(fetch.msid_files['msid'].abs, 'r') as h5:
        table = h5.root.data
        tstart = table[row['row0']]
        # Ensure that table row1 (for tstop) doesn't fall off the edge since the last
        # index file row will have row1 exactly equal to the table length.
        row1 = min(row['row1'], len(table) - 1)
        tstop = table[row1]

    out = {}
    msids = list(fetch.all_colnames[content] - set(fetch.IGNORE_COLNAMES))

    # Get dict of last sync repo row for each MSID.  This is keyed as {msid: last_row1},
    # where row1 is (as always) the slice row1.
    last_rows_filename = sync_files['last_rows'].abs
    if Path(last_rows_filename).exists():
        logger.verbose(f'Reading {last_rows_filename}')
        last_rows = pickle.load(open(last_rows_filename, 'rb'))
    else:
        last_rows = {}

    # Go through each MSID and get the raw HDF5 table data corresponding to the
    # time range tstart:tstop found above.
    n_rows_set = set()
    n_msids = 0
    for msid in msids:
        last_row1 = last_rows.get(msid)
        ft['msid'] = msid
        filename = fetch.msid_files['stats'].abs
        if not Path(filename).exists():
            logger.debug(f'No {stat} stat data for {msid} - skipping')
            continue

        n_msids += 1
        stat_rows, row0, row1 = _get_stat_data_from_archive(
            filename, stat, tstart, tstop, last_row1, logger)
        logger.verbose(f'Got stat rows {row0} {row1} for stat {stat} {msid}')
        n_rows_set.add(row1 - row0)
        if row1 > row0:
            out[f'{msid}.data'] = stat_rows
            out[f'{msid}.row0'] = row0
            out[f'{msid}.row1'] = row1
            last_rows[msid] = row1

    n_rows = n_rows_set.pop() if len(n_rows_set) == 1 else n_rows_set

    outfile.parent.mkdir(exist_ok=True, parents=True)
    # TODO: increase compression to max (gzip?)
    logger.info(
        f'Writing {outfile} with {n_rows} rows of data and {n_msids} msids')
    with gzip.open(outfile, 'wb') as fh:
        pickle.dump(out, fh)

    # Save the row1 value for each MSID to use as row0 for the next update
    logger.verbose(f'Writing {last_rows_filename}')
    with open(last_rows_filename, 'wb') as fh:
        pickle.dump(last_rows, fh)
Beispiel #18
0
def append_h5_col(opt, msid, vals, logger, msid_files):
    """Append new values to an HDF5 MSID data table.

    :param opt:
    :param msid:
    :param vals: dict with `data`, `quality`, `row0` and `row1` keys
    :param logger:
    :param msid_files:
    """
    fetch.ft['msid'] = msid

    msid_file = Path(msid_files['msid'].abs)
    if not msid_file.exists():
        logger.debug(f'Skipping MSID update no {msid_file}')
        return

    mode = 'r' if opt.dry_run else 'a'
    with tables.open_file(str(msid_file), mode=mode) as h5:
        # If the vals[] data begins before the end of current data then chop the
        # beginning of data for this row.
        last_row_idx = len(h5.root.data) - 1
        if vals['row0'] <= last_row_idx:
            idx0 = last_row_idx + 1 - vals['row0']
            logger.debug(f'Chopping {idx0 + 1} rows from data')
            for key in ('data', 'quality'):
                vals[key] = vals[key][idx0:]
            vals['row0'] += idx0

        n_vals = len(vals['data'])
        logger.verbose(f'Appending {n_vals} rows to {msid_file}')

        # Normally at this point there is always data to append since we got here
        # by virtue of the TIME.h5 file being incomplete relative to available sync
        # data.  However, user might have manually rsynced a file as part of adding
        # a new MSID, in which case it might be up to date and there is no req'd action.
        if n_vals == 0:
            return

        if vals['row0'] != len(h5.root.data):
            raise RowMismatchError(
                f'ERROR: unexpected discontinuity for full msid={msid} '
                f'content={fetch.ft["content"]}\n'
                f'Looks like your archive is in a bad state, CONTACT '
                f'your local Ska expert with this info:\n'
                f'  First row0 in new data {vals["row0"]} != '
                f'length of existing data {len(h5.root.data)}')

        # For the TIME column include special processing to effectively remove
        # existing rows that are superceded by new rows in time.  This is done by
        # marking the TIME value as bad quality.  This process happens regularly
        # for ephemeris content, which gets updated once weekly and has substantial
        # overlaps in the archive data.  Here we only worry about the beginning of
        # new data because anything in the middle will have already been marked
        # bad by update_archive.py.
        if msid == 'TIME':
            time0 = vals['data'][0]
            idx1 = len(h5.root.data) - 1
            ii = 0
            while h5.root.data[idx1 - ii] - time0 > -0.0001:
                h5.root.quality[idx1 - ii] = True
                ii += 1
            if ii > 0:
                logger.verbose(f'Excluded {ii} rows due to overlap')

        if not opt.dry_run:
            h5.root.data.append(vals['data'])
            h5.root.quality.append(vals['quality'])
Beispiel #19
0
def update_sync_data_full(content, logger, row):
    """
    Update full-resolution sync data including archfiles for index table ``row``

    This generates a gzipped pickle file with a dict that has sync update values
    for all available  MSIDs in this chunk of ``content`` telemetry.  This has
    `archfiles` (structured ndarray of rows) to store archfiles rows and then
    {msid}.quality, {msid}.data, {msid}.row0 and {msid}.row1.

    :param content: content type
    :param logger: global logger
    :param row: archfile row
    :return: None
    """
    ft = fetch.ft
    ft['interval'] = 'full'

    outfile = Path(sync_files['data'].abs)
    if outfile.exists():
        logger.verbose(f'Skipping {outfile}, already exists')
        return

    out = {}
    msids = list(fetch.all_colnames[content]) + ['TIME']

    # row{filetime0} and row{filetime1} are the *inclusive* `filetime` stamps
    # for the archfiles to be included  in this row.  They do not overlap, so
    # the selection below must be equality.
    with DBI(dbi='sqlite', server=fetch.msid_files['archfiles'].abs) as dbi:
        query = (f'select * from archfiles '
                 f'where filetime >= {row["filetime0"]} '
                 f'and filetime <= {row["filetime1"]} '
                 f'order by filetime ')
        archfiles = dbi.fetchall(query)
        out['archfiles'] = archfiles

    # Row slice indexes into full-resolution MSID h5 files.  All MSIDs share the
    # same row0:row1 range.
    row0 = row['row0']
    row1 = row['row1']

    # Go through each MSID and collect values
    n_msids = 0
    for msid in msids:
        ft['msid'] = msid
        filename = fetch.msid_files['msid'].abs
        if not Path(filename).exists():
            logger.debug(f'No MSID file for {msid} - skipping')
            continue

        n_msids += 1
        with tables.open_file(filename, 'r') as h5:
            out[f'{msid}.quality'] = h5.root.quality[row0:row1]
            out[f'{msid}.data'] = h5.root.data[row0:row1]
            out[f'{msid}.row0'] = row0
            out[f'{msid}.row1'] = row1

    n_rows = row1 - row0
    logger.info(
        f'Writing {outfile} with {n_rows} rows of data and {n_msids} msids')

    outfile.parent.mkdir(exist_ok=True, parents=True)
    # TODO: increase compression to max (gzip?)
    with gzip.open(outfile, 'wb') as fh:
        pickle.dump(out, fh)
Beispiel #20
0
def get_cmds(start, stop, mp_dir='/data/mpcrit1/mplogs'):
    """
    Get backstop commands corresponding to the supplied timeline load segments.
    The timeline load segments must be ordered by 'id'.

    Return cmds in the format defined by Ska.ParseCM.read_backstop().
    """
    # Get timeline_loads within date range.  Also get non-load commands
    # within the date range covered by the timelines.
    server = os.path.join(os.environ['SKA'], 'data', 'cmd_states', 'cmd_states.db3')
    with Ska.DBI.DBI(dbi='sqlite', server=server) as db:
        timeline_loads = db.fetchall("""SELECT * from timeline_loads
                                        WHERE datestop > '{}' AND datestart < '{}'
                                        ORDER BY id"""
                                     .format(start.date, stop.date))

        # Get non-load commands (from autonomous or ground SCS107, NSM, etc) in the
        # time range that the timelines span.
        tl_datestart = min(timeline_loads['datestart'])
        nl_cmds = db.fetchall('SELECT * from cmds where timeline_id IS NULL and '
                              'date >= "{}" and date <= "{}"'
                              .format(tl_datestart, stop.date))

        # Private method from cmd_states.py fetches the actual int/float param values
        # and returns list of dict.
        nl_cmds = _tl_to_bs_cmds(nl_cmds, None, db)
        nl_cmds = fix_nonload_cmds(nl_cmds)
        logger.info(f'Found {len(nl_cmds)} non-load commands between {tl_datestart} : {stop.date}')

    logger.info('Found {} timelines included within {} to {}'
                .format(len(timeline_loads), start.date, stop.date))

    if np.min(np.diff(timeline_loads['id'])) < 1:
        raise ValueError('Timeline loads id not monotonically increasing')

    cmds = []
    orbit_cmds = []
    orbit_cmd_files = set()

    for tl in timeline_loads:
        bs_file = Ska.File.get_globfiles(os.path.join(mp_dir + tl.mp_dir,
                                                      '*.backstop'))[0]
        if bs_file not in BACKSTOP_CACHE:
            bs_cmds = read_backstop(bs_file)
            logger.info('Read {} commands from {}'.format(len(bs_cmds), bs_file))
            BACKSTOP_CACHE[bs_file] = bs_cmds
        else:
            bs_cmds = BACKSTOP_CACHE[bs_file]

        # Process ORBPOINT (orbit event) pseudo-commands in backstop.  These
        # have scs=0 and need to be treated separately since during a replan
        # or shutdown we still want these ORBPOINT to be in the cmds archive
        # and not be excluded by timeline intervals.
        if bs_file not in orbit_cmd_files:
            bs_orbit_cmds = [x for x in bs_cmds if x['type'] == 'ORBPOINT']
            for orbit_cmd in bs_orbit_cmds:
                orbit_cmd['timeline_id'] = tl['id']
                if 'EVENT_TYPE' not in orbit_cmd['params']:
                    orbit_cmd['params']['EVENT_TYPE'] = orbit_cmd['params']['TYPE']
                    del orbit_cmd['params']['TYPE']
            orbit_cmds.extend(bs_orbit_cmds)
            orbit_cmd_files.add(bs_file)

        # Only store commands for this timeline (match SCS and date)
        bs_cmds = [x for x in bs_cmds
                   if tl['datestart'] <= x['date'] <= tl['datestop']
                   and x['scs'] == tl['scs']]

        for bs_cmd in bs_cmds:
            bs_cmd['timeline_id'] = tl['id']

        logger.info('  Got {} backstop commands for timeline_id={} and SCS={}'
                    .format(len(bs_cmds), tl['id'], tl['scs']))
        cmds.extend(bs_cmds)

    orbit_cmds = get_unique_orbit_cmds(orbit_cmds)
    logger.debug('Read total of {} orbit commands'
                 .format(len(orbit_cmds)))

    cmds.extend(nl_cmds)
    cmds.extend(orbit_cmds)

    # Sort by date and SCS step number.
    cmds = sorted(cmds, key=lambda y: (y['date'], y['step']))
    logger.debug('Read total of {} commands ({} non-load commands)'
                 .format(len(cmds), len(nl_cmds)))

    return cmds
Beispiel #21
0
def update_index_file(index_file, opt, logger):
    """Update the top-level index file of data available in the sync archive

    :param index_file: Path of index ECSV file
    :param opt: options
    :param logger: output logger
    :return: index table (astropy Table)
    """
    if index_file.exists():
        # Start time of last update contained in the sync repo (if it exists), but do not look
        # back more than max_lookback days.  This is relevant for rarely sampled
        # content like cpe1eng.
        filetime0 = (DateTime(opt.date_stop) - opt.max_lookback).secs

        index_tbl = Table.read(index_file)
        if len(index_tbl) == 0:
            # Need to start with a fresh index_tbl since the string column will end up
            # with a length=1 string (date_id) and add_row later will give the wrong result.
            index_tbl = None
        else:
            filetime0 = max(filetime0, index_tbl['filetime1'][-1])
    else:
        # For initial index file creation use the --date-start option
        index_tbl = None
        filetime0 = DateTime(opt.date_start).secs

    max_secs = int(opt.max_days * 86400)
    time_stop = DateTime(opt.date_stop).secs

    # Step through the archfile files entries and collect them into groups of up
    # to --max-days based on file time stamp (which is an integer in CXC secs).
    rows = []
    filename = fetch.msid_files['archfiles'].abs
    logger.debug(f'Opening archfiles {filename}')
    with DBI(dbi='sqlite', server=filename) as dbi:
        while True:
            filetime1 = min(filetime0 + max_secs, time_stop)
            logger.verbose(f'select from archfiles '
                           f'filetime > {DateTime(filetime0).fits[:-4]} {filetime0} '
                           f'filetime <= {DateTime(filetime1).fits[:-4]} {filetime1} '
                           )
            archfiles = dbi.fetchall(f'select * from archfiles '
                                     f'where filetime > {filetime0} '
                                     f'and filetime <= {filetime1} '
                                     f'order by filetime ')

            # Found new archfiles?  If so get a new index table row for them.
            if len(archfiles) > 0:
                rows.append(get_row_from_archfiles(archfiles))
                filedates = DateTime(archfiles['filetime']).fits
                logger.verbose(f'Got {len(archfiles)} archfiles rows from '
                               f'{filedates[0]} to {filedates[-1]}')

            filetime0 = filetime1

            # Stop if already queried out to the end of desired time range
            if filetime1 >= time_stop:
                break

    if not rows:
        logger.info(f'No updates available for content {fetch.ft["content"]}')
        return index_tbl

    # Create table from scratch or add new rows.  In normal processing there
    # will just be one row per run.
    if index_tbl is None:
        index_tbl = Table(rows)
    else:
        for row in rows:
            index_tbl.add_row(row)

    if not index_file.parent.exists():
        logger.info(f'Making directory {index_file.parent}')
        index_file.parent.mkdir(exist_ok=True, parents=True)

    msg = check_index_tbl_consistency(index_tbl)
    if msg:
        msg += '\n'
        msg += '\n'.join(index_tbl.pformat(max_lines=-1, max_width=-1))
        logger.error(f'Index table inconsistency: {msg}')
        return None

    logger.info(f'Writing {len(rows)} row(s) to index file {index_file}')
    index_tbl.write(index_file, format='ascii.ecsv')

    return index_tbl
Beispiel #22
0
def add_h5_cmds(h5file, idx_cmds):
    """
    Add `idx_cmds` to HDF5 file `h5file` of indexed spacecraft commands.
    If file does not exist then create it.
    """
    # Note: reading this file uncompressed is about 5 times faster, so sacrifice file size
    # for read speed and do not use compression.
    h5 = tables.open_file(h5file, mode='a')

    # Convert cmds (list of tuples) to numpy structured array.  This also works for an
    # existing structured array.
    cmds = np.array(idx_cmds, dtype=CMDS_DTYPE)

    # TODO : make sure that changes in non-load commands triggers an update

    try:
        h5d = h5.root.data
        logger.info('Opened h5 cmds table {}'.format(h5file))
    except tables.NoSuchNodeError:
        h5.create_table(h5.root, 'data', cmds, "cmds", expectedrows=2e6)
        logger.info('Created h5 cmds table {}'.format(h5file))
    else:
        date0 = min(idx_cmd[1] for idx_cmd in idx_cmds)
        h5_date = h5d.cols.date[:]
        idx_recent = np.searchsorted(h5_date, date0)
        logger.info('Selecting commands from h5d[{}:]'.format(idx_recent))
        logger.info('  {}'.format(str(h5d[idx_recent])))
        h5d_recent = h5d[idx_recent:]  # recent h5d entries

        # Define the column names that specify a complete and unique row
        key_names = ('date', 'type', 'tlmsid', 'scs', 'step', 'timeline_id', 'vcdu')

        h5d_recent_vals = [tuple(
            row[x].decode('ascii') if isinstance(row[x], bytes) else str(row[x])
            for x in key_names)
            for row in h5d_recent]
        idx_cmds_vals = [tuple(str(x) for x in row[1:]) for row in idx_cmds]

        diff = difflib.SequenceMatcher(a=h5d_recent_vals, b=idx_cmds_vals, autojunk=False)
        blocks = diff.get_matching_blocks()
        logger.info('Matching blocks for existing HDF5 and timeline commands')
        for block in blocks:
            logger.info('  {}'.format(block))
        opcodes = diff.get_opcodes()
        logger.info('Diffs between existing HDF5 and timeline commands')
        for opcode in opcodes:
            logger.info('  {}'.format(opcode))
        # Find the first matching block that is sufficiently long
        for block in blocks:
            if block.size > MIN_MATCHING_BLOCK_SIZE:
                break
        else:
            raise ValueError('No matching blocks at least {} long'
                             .format(MIN_MATCHING_BLOCK_SIZE))

        # Index into idx_cmds at the end of the large matching block.  block.b is the
        # beginning of the match.
        idx_cmds_idx = block.b + block.size

        if idx_cmds_idx < len(cmds):
            # Index into h5d at the point of the first diff after the large matching block
            h5d_idx = block.a + block.size + idx_recent

            if h5d_idx < len(h5d):
                logger.debug('Deleted relative cmds indexes {} .. {}'.format(h5d_idx - idx_recent,
                                                                             len(h5d) - idx_recent))
                logger.debug('Deleted cmds indexes {} .. {}'.format(h5d_idx, len(h5d)))
                h5d.truncate(h5d_idx)

            h5d.append(cmds[idx_cmds_idx:])
            logger.info('Added {} commands to HDF5 cmds table'.format(len(cmds[idx_cmds_idx:])))
        else:
            logger.info('No new timeline commands, HDF5 cmds table not updated')

    h5.flush()
    logger.info('Upated HDF5 cmds table {}'.format(h5file))
    h5.close()
Beispiel #23
0
def add_h5_cmds(h5file, idx_cmds):
    """
    Add `idx_cmds` to HDF5 file `h5file` of indexed spacecraft commands.
    If file does not exist then create it.
    """
    # Note: reading this file uncompressed is about 5 times faster, so sacrifice file size
    # for read speed and do not use compression.
    h5 = tables.openFile(h5file, mode='a')

    # Convert cmds (list of tuples) to numpy structured array.  This also works for an
    # existing structured array.
    cmds = np.array(idx_cmds, dtype=CMDS_DTYPE)

    try:
        h5d = h5.root.data
        logger.info('Opened h5 cmds table {}'.format(h5file))
    except tables.NoSuchNodeError:
        h5.createTable(h5.root, 'data', cmds, "cmds", expectedrows=2e6)
        logger.info('Created h5 cmds table {}'.format(h5file))
    else:
        date0 = min(idx_cmd[1] for idx_cmd in idx_cmds)
        h5_date = h5d.cols.date[:]
        idx_recent = np.searchsorted(h5_date, date0)
        logger.info('Selecting commands from h5d[{}:]'.format(idx_recent))
        logger.info('  {}'.format(str(h5d[idx_recent])))
        h5d_recent = h5d[idx_recent:]  # recent h5d entries

        # Define the column names that specify a complete and unique row
        key_names = ('date', 'type', 'tlmsid', 'scs', 'step', 'timeline_id')

        h5d_recent_vals = [tuple(str(row[x]) for x in key_names) for row in h5d_recent]
        idx_cmds_vals = [tuple(str(x) for x in row[1:]) for row in idx_cmds]

        diff = difflib.SequenceMatcher(a=h5d_recent_vals, b=idx_cmds_vals, autojunk=False)
        blocks = diff.get_matching_blocks()
        logger.info('Matching blocks for existing HDF5 and timeline commands')
        for block in blocks:
            logger.info('  {}'.format(block))
        opcodes = diff.get_opcodes()
        logger.info('Diffs between existing HDF5 and timeline commands')
        for opcode in opcodes:
            logger.info('  {}'.format(opcode))
        # Find the first matching block that is sufficiently long
        for block in blocks:
            if block.size > MIN_MATCHING_BLOCK_SIZE:
                break
        else:
            raise ValueError('No matching blocks at least {} long'
                             .format(MIN_MATCHING_BLOCK_SIZE))

        # Index into idx_cmds at the end of the large matching block.  block.b is the
        # beginning of the match.
        idx_cmds_idx = block.b + block.size

        if idx_cmds_idx < len(cmds):
            # Index into h5d at the point of the first diff after the large matching block
            h5d_idx = block.a + block.size + idx_recent

            if h5d_idx < len(h5d):
                logger.debug('Deleted relative cmds indexes {} .. {}'.format(h5d_idx - idx_recent,
                                                                             len(h5d) - idx_recent))
                logger.debug('Deleted cmds indexes {} .. {}'.format(h5d_idx, len(h5d)))
                h5d.truncate(h5d_idx)

            h5d.append(cmds[idx_cmds_idx:])
            logger.info('Added {} commands to HDF5 cmds table'.format(len(cmds[idx_cmds_idx:])))
        else:
            logger.info('No new timeline commands, HDF5 cmds table not updated')

    h5.flush()
    logger.info('Upated HDF5 cmds table {}'.format(h5file))
    h5.close()